1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Speed up lexing of long JSON strings

Use optimized linear search when looking ahead for end quotes,
backslashes, and non-printable characters. This results in nearly 40%
faster JSON parsing on x86-64 when most values are long strings, and
all platforms should see some improvement.

Reviewed by Andres Freund and Nathan Bossart
Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com
Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
This commit is contained in:
John Naylor
2022-08-31 10:39:17 +07:00
parent 05519126a0
commit 0a8de93a48
3 changed files with 28 additions and 3 deletions

View File

@ -19,6 +19,7 @@
#include "common/jsonapi.h"
#include "mb/pg_wchar.h"
#include "port/pg_lfind.h"
#ifndef FRONTEND
#include "miscadmin.h"
@ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex)
}
else
{
char *p;
char *p = s;
if (hi_surrogate != -1)
return JSON_UNICODE_LOW_SURROGATE;
@ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex)
* Skip to the first byte that requires special handling, so we
* can batch calls to appendBinaryStringInfo.
*/
for (p = s; p < end; p++)
while (p < end - sizeof(Vector8) &&
!pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
!pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
!pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
p += sizeof(Vector8);
for (; p < end; p++)
{
if (*p == '\\' || *p == '"')
break;
else if ((unsigned char) *p < 32)
else if ((unsigned char) *p <= 31)
{
/* Per RFC4627, these characters MUST be escaped. */
/*