1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-15 05:46:52 +03:00

test_json_parser: Speed up 002_inline.pl

Some macOS machines are having trouble with 002_inline, which executes
the JSON parser test executables hundreds of times in a nested loop.
Both developer machines and buildfarm critters have shown excessive test
durations, upwards of 20 seconds.

Push the innermost loop of 002_inline, which iterates through differing
chunk sizes, down into the test executable. (I'd eventually like to push
all of the JSON unit tests down into C, but this is an easy win in the
short term.) Testers have reported a speedup between 4-9x.

Reported-by: Robert Haas <robertmhaas@gmail.com>
Suggested-by: Andres Freund <andres@anarazel.de>
Tested-by: Andrew Dunstan <andrew@dunslane.net>
Tested-by: Tom Lane <tgl@sss.pgh.pa.us>
Tested-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CA%2BTgmobKoG%2BgKzH9qB7uE4MFo-z1hn7UngqAe9b0UqNbn3_XGQ%40mail.gmail.com
Backpatch-through: 17
This commit is contained in:
Jacob Champion
2025-10-01 09:48:57 -07:00
parent 3e908fb54f
commit fd726b8379
3 changed files with 107 additions and 58 deletions

View File

@@ -6,10 +6,12 @@ This module contains two programs for testing the json parsers.
- `test_json_parser_incremental` is for testing the incremental parser, It - `test_json_parser_incremental` is for testing the incremental parser, It
reads in a file and passes it in very small chunks (default is 60 bytes at a reads in a file and passes it in very small chunks (default is 60 bytes at a
time) to the incremental parser. It's not meant to be a speed test but to time) to the incremental parser. It's not meant to be a speed test but to
test the accuracy of the incremental parser. There are two option arguments, test the accuracy of the incremental parser. The option "-c nn" specifies an
"-c nn" specifies an alternative chunk size, and "-s" specifies using alternative chunk size, "-r nn" runs a range of chunk sizes down to one byte
semantic routines. The semantic routines re-output the json, although not in on the same input (with output separated by null bytes), and "-s" specifies
a very pretty form. The required non-option argument is the input file name. using semantic routines. The semantic routines re-output the json, although
not in a very pretty form. The required non-option argument is the input file
name.
- `test_json_parser_perf` is for speed testing both the standard - `test_json_parser_perf` is for speed testing both the standard
recursive descent parser and the non-recursive incremental recursive descent parser and the non-recursive incremental
parser. If given the `-i` flag it uses the non-recursive parser, parser. If given the `-i` flag it uses the non-recursive parser,

View File

@@ -33,23 +33,37 @@ sub test
print $fh "$json"; print $fh "$json";
close($fh); close($fh);
# The -r mode runs the parser in a loop, with output separated by nulls.
# Unpack that as a list of null-terminated ASCII strings (Z*) and check that
# each run produces the same result.
my ($all_stdout, $all_stderr) =
run_command([ @exe, "-r", $chunk, $fname ]);
my @stdout = unpack("(Z*)*", $all_stdout);
my @stderr = unpack("(Z*)*", $all_stderr);
is(scalar @stdout, $chunk, "$name: stdout has correct number of entries");
is(scalar @stderr, $chunk, "$name: stderr has correct number of entries");
my $i = 0;
foreach my $size (reverse(1 .. $chunk)) foreach my $size (reverse(1 .. $chunk))
{ {
my ($stdout, $stderr) = run_command([ @exe, "-c", $size, $fname ]);
if (defined($params{error})) if (defined($params{error}))
{ {
unlike($stdout, qr/SUCCESS/, unlike($stdout[$i], qr/SUCCESS/,
"$name, chunk size $size: test fails"); "$name, chunk size $size: test fails");
like($stderr, $params{error}, like($stderr[$i], $params{error},
"$name, chunk size $size: correct error output"); "$name, chunk size $size: correct error output");
} }
else else
{ {
like($stdout, qr/SUCCESS/, like($stdout[$i], qr/SUCCESS/,
"$name, chunk size $size: test succeeds"); "$name, chunk size $size: test succeeds");
is($stderr, "", "$name, chunk size $size: no error output"); is($stderr[$i], "", "$name, chunk size $size: no error output");
} }
$i++;
} }
} }

View File

@@ -12,9 +12,14 @@
* the parser in very small chunks. In practice you would normally use * the parser in very small chunks. In practice you would normally use
* much larger chunks, but doing this makes it more likely that the * much larger chunks, but doing this makes it more likely that the
* full range of increment handling, especially in the lexer, is exercised. * full range of increment handling, especially in the lexer, is exercised.
*
* If the "-c SIZE" option is provided, that chunk size is used instead * If the "-c SIZE" option is provided, that chunk size is used instead
* of the default of 60. * of the default of 60.
* *
* If the "-r SIZE" option is provided, a range of chunk sizes from SIZE down to
* 1 are run sequentially. A null byte is printed to the streams after each
* iteration.
*
* If the -s flag is given, the program does semantic processing. This should * If the -s flag is given, the program does semantic processing. This should
* just mirror back the json, albeit with white space changes. * just mirror back the json, albeit with white space changes.
* *
@@ -88,8 +93,8 @@ main(int argc, char **argv)
StringInfoData json; StringInfoData json;
int n_read; int n_read;
size_t chunk_size = DEFAULT_CHUNK_SIZE; size_t chunk_size = DEFAULT_CHUNK_SIZE;
bool run_chunk_ranges = false;
struct stat statbuf; struct stat statbuf;
off_t bytes_left;
const JsonSemAction *testsem = &nullSemAction; const JsonSemAction *testsem = &nullSemAction;
char *testfile; char *testfile;
int c; int c;
@@ -102,10 +107,13 @@ main(int argc, char **argv)
if (!lex) if (!lex)
pg_fatal("out of memory"); pg_fatal("out of memory");
while ((c = getopt(argc, argv, "c:os")) != -1) while ((c = getopt(argc, argv, "r:c:os")) != -1)
{ {
switch (c) switch (c)
{ {
case 'r': /* chunk range */
run_chunk_ranges = true;
/* fall through */
case 'c': /* chunk size */ case 'c': /* chunk size */
chunk_size = strtou64(optarg, NULL, 10); chunk_size = strtou64(optarg, NULL, 10);
if (chunk_size > BUFSIZE) if (chunk_size > BUFSIZE)
@@ -135,8 +143,6 @@ main(int argc, char **argv)
exit(1); exit(1);
} }
makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
initStringInfo(&json); initStringInfo(&json);
if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL) if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
@@ -145,25 +151,39 @@ main(int argc, char **argv)
if (fstat(fileno(json_file), &statbuf) != 0) if (fstat(fileno(json_file), &statbuf) != 0)
pg_fatal("error statting input: %m"); pg_fatal("error statting input: %m");
bytes_left = statbuf.st_size; do
{
/*
* This outer loop only repeats in -r mode. Reset the parse state and
* our position in the input file for the inner loop, which performs
* the incremental parsing.
*/
off_t bytes_left = statbuf.st_size;
size_t to_read = chunk_size;
makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
rewind(json_file);
resetStringInfo(&json);
for (;;) for (;;)
{ {
/* We will break when there's nothing left to read */ /* We will break when there's nothing left to read */
if (bytes_left < chunk_size) if (bytes_left < to_read)
chunk_size = bytes_left; to_read = bytes_left;
n_read = fread(buff, 1, chunk_size, json_file); n_read = fread(buff, 1, to_read, json_file);
if (n_read < chunk_size) if (n_read < to_read)
pg_fatal("error reading input file: %d", ferror(json_file)); pg_fatal("error reading input file: %d", ferror(json_file));
appendBinaryStringInfo(&json, buff, n_read); appendBinaryStringInfo(&json, buff, n_read);
/* /*
* Append some trailing junk to the buffer passed to the parser. This * Append some trailing junk to the buffer passed to the parser.
* helps us ensure that the parser does the right thing even if the * This helps us ensure that the parser does the right thing even
* chunk isn't terminated with a '\0'. * if the chunk isn't terminated with a '\0'.
*/ */
appendStringInfoString(&json, "1+23 trailing junk"); appendStringInfoString(&json, "1+23 trailing junk");
bytes_left -= n_read; bytes_left -= n_read;
@@ -198,8 +218,21 @@ main(int argc, char **argv)
} }
cleanup: cleanup:
fclose(json_file);
freeJsonLexContext(lex); freeJsonLexContext(lex);
/*
* In -r mode, separate output with nulls so that the calling test can
* split it up, decrement the chunk size, and loop back to the top.
* All other modes immediately fall out of the loop and exit.
*/
if (run_chunk_ranges)
{
fputc('\0', stdout);
fputc('\0', stderr);
}
} while (run_chunk_ranges && (--chunk_size > 0));
fclose(json_file);
free(json.data); free(json.data);
free(lex); free(lex);