1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Fix unescaping of JSON Unicode escapes, especially for non-UTF8.

Per discussion  on -hackers. We treat Unicode escapes when unescaping
them similarly to the way we treat them in PostgreSQL string literals.
Escapes in the ASCII range are always accepted, no matter what the
database encoding. Escapes for higher code points are only processed in
UTF8 databases, and attempts to process them in other databases will
result in an error. \u0000 is never unescaped, since it would result in
an impermissible null byte.
This commit is contained in:
Andrew Dunstan
2013-06-12 13:35:24 -04:00
parent c1d729b419
commit 78ed8e03c6
5 changed files with 1033 additions and 11 deletions

View File

@ -299,8 +299,14 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,3
-- handling of unicode surrogate pairs
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
--handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped;