mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Fix unescaping of JSON Unicode escapes, especially for non-UTF8.
Per discussion on -hackers. We treat Unicode escapes when unescaping them similarly to the way we treat them in PostgreSQL string literals. Escapes in the ASCII range are always accepted, no matter what the database encoding. Escapes for higher code points are only processed in UTF8 databases, and attempts to process them in other databases will result in an error. \u0000 is never unescaped, since it would result in an impermissible null byte.
This commit is contained in:
@ -299,8 +299,14 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,3
|
||||
|
||||
-- handling of unicode surrogate pairs
|
||||
|
||||
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
|
||||
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
|
||||
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
|
||||
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
|
||||
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
|
||||
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
|
||||
|
||||
--handling of simple unicode escapes
|
||||
|
||||
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
|
||||
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
|
||||
select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped;
|
||||
|
Reference in New Issue
Block a user