From 8e65d9ff963e707c51216db24252ebd608758e99 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 11 Sep 2024 13:21:05 +0200 Subject: [PATCH] Fix unique key checks in JSON object constructors When building a JSON object, the code builds a hash table of keys, to allow checking if the keys are unique. The uniqueness check and adding the new key happens in json_unique_check_key(), but this assumes the pointer to the key remains valid. Unfortunately, two places passed pointers to keys in a buffer, while also appending more data (additional key/value pairs) to the buffer. With enough data the buffer is resized by enlargeStringInfo(), which calls repalloc(), invalidating the earlier key pointers. Due to this the uniqueness check may fail with both false negatives and false positives, producing JSON objects with duplicate keys or failing to produce a perfectly valid JSON object. This affects multiple functions that enforce uniqueness of keys, all introduced in PG16 with the new SQL/JSON: - json_object_agg_unique / jsonb_object_agg_unique - json_object / jsonb_objectagg Existing regression tests did not detect the issue, simply because the initial buffer size is 1024 and the objects were small enough not to require the repalloc. With a sufficiently large object, AddressSanitizer reported the access to invalid memory immediately. So would valgrind, of course. Fixed by copying the key into the hash table memory context, and adding regression tests with enough data to repalloc the buffer. Backpatch to 16, where the functions were introduced. Reported by Alexander Lakhin. Investigation and initial fix by Junwang Zhao, with various improvements and tests by me. Reported-by: Alexander Lakhin Author: Junwang Zhao, Tomas Vondra Backpatch-through: 16 Discussion: https://postgr.es/m/18598-3279ed972a2347c7@postgresql.org Discussion: https://postgr.es/m/CAEG8a3JjH0ReJF2_O7-8LuEbO69BxPhYeXs95_x7+H9AMWF1gw@mail.gmail.com --- src/backend/utils/adt/json.c | 20 +++++++++++++++++--- src/test/regress/expected/json.out | 3 +++ src/test/regress/expected/sqljson.out | 5 +++++ src/test/regress/sql/json.sql | 2 ++ src/test/regress/sql/sqljson.sql | 5 +++++ 5 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 7205f4adca8..fab747789ab 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -1181,7 +1181,14 @@ json_object_agg_transfn_worker(FunctionCallInfo fcinfo, if (unique_keys) { - const char *key = &out->data[key_offset]; + /* + * Copy the key first, instead of pointing into the buffer. It will be + * added to the hash table, but the buffer may get reallocated as + * we're appending more data to it. That would invalidate pointers to + * keys in the current buffer. + */ + const char *key = MemoryContextStrdup(aggcontext, + &out->data[key_offset]); if (!json_unique_check_key(&state->unique_check.check, key, 0)) ereport(ERROR, @@ -1343,8 +1350,15 @@ json_build_object_worker(int nargs, Datum *args, bool *nulls, Oid *types, if (unique_keys) { - /* check key uniqueness after key appending */ - const char *key = &out->data[key_offset]; + /* + * check key uniqueness after key appending + * + * Copy the key first, instead of pointing into the buffer. It + * will be added to the hash table, but the buffer may get + * reallocated as we're appending more data to it. That would + * invalidate pointers to keys in the current buffer. + */ + const char *key = pstrdup(&out->data[key_offset]); if (!json_unique_check_key(&unique_check.check, key, 0)) ereport(ERROR, diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index aa29bc597bd..d16c067b22f 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -2282,6 +2282,9 @@ select json_object('{a,b,"","d e f"}','{1,2,3,"a b c"}'); {"a" : "1", "b" : "2", "" : "3", "d e f" : "a b c"} (1 row) +-- json_object_agg_unique requires unique keys +select json_object_agg_unique(mod(i,100), i) from generate_series(0, 199) i; +ERROR: duplicate JSON object key value: "0" -- json_to_record and json_to_recordset select * from json_to_record('{"a":1,"b":"foo","c":"bar"}') as x(a int, b text, d text); diff --git a/src/test/regress/expected/sqljson.out b/src/test/regress/expected/sqljson.out index fa2abdb4a7f..ff75ea78ee6 100644 --- a/src/test/regress/expected/sqljson.out +++ b/src/test/regress/expected/sqljson.out @@ -247,6 +247,8 @@ SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2 ABSENT ON NULL); {"a" : "1", "c" : 2} (1 row) +SELECT JSON_OBJECT(1: 1, '2': NULL, '3': 1, repeat('x', 1000): 1, 2: repeat('a', 100) WITH UNIQUE); +ERROR: duplicate JSON object key value: "2" SELECT JSON_OBJECT(1: 1, '1': NULL WITH UNIQUE); ERROR: duplicate JSON object key value: "1" SELECT JSON_OBJECT(1: 1, '1': NULL ABSENT ON NULL WITH UNIQUE); @@ -624,6 +626,9 @@ ERROR: duplicate JSON object key value SELECT JSON_OBJECTAGG(k: v ABSENT ON NULL WITH UNIQUE KEYS RETURNING jsonb) FROM (VALUES (1, 1), (1, NULL), (2, 2)) foo(k, v); ERROR: duplicate JSON object key value +SELECT JSON_OBJECTAGG(mod(i,100): (i)::text FORMAT JSON WITH UNIQUE) +FROM generate_series(0, 199) i; +ERROR: duplicate JSON object key value: "0" -- Test JSON_OBJECT deparsing EXPLAIN (VERBOSE, COSTS OFF) SELECT JSON_OBJECT('foo' : '1' FORMAT JSON, 'bar' : 'baz' RETURNING json); diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index ec57dfe7070..50b4ed67435 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -748,6 +748,8 @@ select json_object('{a,b,NULL,"d e f"}','{1,2,3,"a b c"}'); select json_object('{a,b,"","d e f"}','{1,2,3,"a b c"}'); +-- json_object_agg_unique requires unique keys +select json_object_agg_unique(mod(i,100), i) from generate_series(0, 199) i; -- json_to_record and json_to_recordset diff --git a/src/test/regress/sql/sqljson.sql b/src/test/regress/sql/sqljson.sql index 4fd820fd515..6fbcdeed61c 100644 --- a/src/test/regress/sql/sqljson.sql +++ b/src/test/regress/sql/sqljson.sql @@ -74,6 +74,8 @@ SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2); SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2 NULL ON NULL); SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2 ABSENT ON NULL); +SELECT JSON_OBJECT(1: 1, '2': NULL, '3': 1, repeat('x', 1000): 1, 2: repeat('a', 100) WITH UNIQUE); + SELECT JSON_OBJECT(1: 1, '1': NULL WITH UNIQUE); SELECT JSON_OBJECT(1: 1, '1': NULL ABSENT ON NULL WITH UNIQUE); SELECT JSON_OBJECT(1: 1, '1': NULL NULL ON NULL WITH UNIQUE RETURNING jsonb); @@ -216,6 +218,9 @@ FROM (VALUES (1, 1), (1, NULL), (2, 2)) foo(k, v); SELECT JSON_OBJECTAGG(k: v ABSENT ON NULL WITH UNIQUE KEYS RETURNING jsonb) FROM (VALUES (1, 1), (1, NULL), (2, 2)) foo(k, v); +SELECT JSON_OBJECTAGG(mod(i,100): (i)::text FORMAT JSON WITH UNIQUE) +FROM generate_series(0, 199) i; + -- Test JSON_OBJECT deparsing EXPLAIN (VERBOSE, COSTS OFF) SELECT JSON_OBJECT('foo' : '1' FORMAT JSON, 'bar' : 'baz' RETURNING json);