mirror of
https://github.com/postgres/postgres.git
synced 2025-04-29 13:56:47 +03:00
Assorted minor cleanups in the test_json_parser module
Per gripes from Michael Paquier Discussion: https://postgr.es/m/ZhTQ6_w1vwOhqTQI@paquier.xyz Along the way, also clean up a handful of typos in 3311ea86ed and ea7b4e9a2a, found by Alexander Lakhin, and a couple of stylistic snafus noted by Daniel Westermann and Daniel Gustafsson.
This commit is contained in:
parent
b8a7bfa333
commit
42fa4b6601
@ -34,7 +34,7 @@
|
|||||||
#define BLOCKS_PER_READ 512
|
#define BLOCKS_PER_READ 512
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we expect the find the last lines of the manifest, including the checksum,
|
* We expect to find the last lines of the manifest, including the checksum,
|
||||||
* in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
|
* in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
|
||||||
* parse step if we are about to overflow MAX_CHUNK bytes.
|
* parse step if we are about to overflow MAX_CHUNK bytes.
|
||||||
*/
|
*/
|
||||||
@ -88,8 +88,8 @@ struct IncrementalBackupInfo
|
|||||||
* Files extracted from the backup manifest.
|
* Files extracted from the backup manifest.
|
||||||
*
|
*
|
||||||
* We don't really need this information, because we use WAL summaries to
|
* We don't really need this information, because we use WAL summaries to
|
||||||
* figure what's changed. It would be unsafe to just rely on the list of
|
* figure out what's changed. It would be unsafe to just rely on the list
|
||||||
* files that existed before, because it's possible for a file to be
|
* of files that existed before, because it's possible for a file to be
|
||||||
* removed and a new one created with the same name and different
|
* removed and a new one created with the same name and different
|
||||||
* contents. In such cases, the whole file must still be sent. We can tell
|
* contents. In such cases, the whole file must still be sent. We can tell
|
||||||
* from the WAL summaries whether that happened, but not from the file
|
* from the WAL summaries whether that happened, but not from the file
|
||||||
|
@ -356,7 +356,7 @@ makeJsonLexContextCstringLen(JsonLexContext *lex, char *json,
|
|||||||
* need explicit stacks for predictions, field names and null indicators, but
|
* need explicit stacks for predictions, field names and null indicators, but
|
||||||
* we don't need the input, that will be handed in bit by bit to the
|
* we don't need the input, that will be handed in bit by bit to the
|
||||||
* parse routine. We also need an accumulator for partial tokens in case
|
* parse routine. We also need an accumulator for partial tokens in case
|
||||||
* the boundary between chunks happns to fall in the middle of a token.
|
* the boundary between chunks happens to fall in the middle of a token.
|
||||||
*/
|
*/
|
||||||
#define JS_STACK_CHUNK_SIZE 64
|
#define JS_STACK_CHUNK_SIZE 64
|
||||||
#define JS_MAX_PROD_LEN 10 /* more than we need */
|
#define JS_MAX_PROD_LEN 10 /* more than we need */
|
||||||
@ -1414,9 +1414,9 @@ json_lex(JsonLexContext *lex)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add any remaining alpha_numeric chars. This takes care of the
|
* Add any remaining alphanumeric chars. This takes care of the
|
||||||
* {null, false, true} literals as well as any trailing
|
* {null, false, true} literals as well as any trailing
|
||||||
* alpha-numeric junk on non-string tokens.
|
* alphanumeric junk on non-string tokens.
|
||||||
*/
|
*/
|
||||||
for (int i = added; i < lex->input_length; i++)
|
for (int i = added; i < lex->input_length; i++)
|
||||||
{
|
{
|
||||||
|
@ -806,7 +806,7 @@ json_manifest_finalize_wal_range(JsonManifestParseState *parse)
|
|||||||
* the rest of the file.
|
* the rest of the file.
|
||||||
*
|
*
|
||||||
* For an incremental parse, this will just be called on the last chunk of the
|
* For an incremental parse, this will just be called on the last chunk of the
|
||||||
* manifest, and the cryptohash context paswed in. For a non-incremental
|
* manifest, and the cryptohash context passed in. For a non-incremental
|
||||||
* parse incr_ctx will be NULL.
|
* parse incr_ctx will be NULL.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
|
@ -4,22 +4,21 @@ Module `test_json_parser`
|
|||||||
This module contains two programs for testing the json parsers.
|
This module contains two programs for testing the json parsers.
|
||||||
|
|
||||||
- `test_json_parser_incremental` is for testing the incremental parser, It
|
- `test_json_parser_incremental` is for testing the incremental parser, It
|
||||||
reads in a file and pases it in very small chunks (60 bytes at a time) to
|
reads in a file and passes it in very small chunks (default is 60 bytes at a
|
||||||
the incremental parser. It's not meant to be a speed test but to test the
|
time) to the incremental parser. It's not meant to be a speed test but to
|
||||||
accuracy of the incremental parser. It takes one argument: the name of the
|
test the accuracy of the incremental parser. There are two option arguments,
|
||||||
input file.
|
"-c nn" specifies an alternative chunk size, and "-s" specifies using
|
||||||
|
semantic routines. The semantic routines re-output the json, although not in
|
||||||
|
a very pretty form. The required non-option argument is the input file name.
|
||||||
- `test_json_parser_perf` is for speed testing both the standard
|
- `test_json_parser_perf` is for speed testing both the standard
|
||||||
recursive descent parser and the non-recursive incremental
|
recursive descent parser and the non-recursive incremental
|
||||||
parser. If given the `-i` flag it uses the non-recursive parser,
|
parser. If given the `-i` flag it uses the non-recursive parser,
|
||||||
otherwise the stardard parser. The remaining flags are the number of
|
otherwise the standard parser. The remaining flags are the number of
|
||||||
parsing iterations and the file containing the input. Even when
|
parsing iterations and the file containing the input. Even when
|
||||||
using the non-recursive parser, the input is passed to the parser in a
|
using the non-recursive parser, the input is passed to the parser in a
|
||||||
single chunk. The results are thus comparable to those of the
|
single chunk. The results are thus comparable to those of the
|
||||||
standard parser.
|
standard parser.
|
||||||
|
|
||||||
The easiest way to use these is to run `make check` and `make speed-check`
|
The sample input file is a small, sanitized extract from a list of `delicious`
|
||||||
|
|
||||||
The sample input file is a small extract from a list of `delicious`
|
|
||||||
bookmarks taken some years ago, all wrapped in a single json
|
bookmarks taken some years ago, all wrapped in a single json
|
||||||
array. 10,000 iterations of parsing this file gives a reasonable
|
array.
|
||||||
benchmark, and that is what the `speed-check` target does.
|
|
||||||
|
@ -3,16 +3,17 @@
|
|||||||
* test_json_parser_incremental.c
|
* test_json_parser_incremental.c
|
||||||
* Test program for incremental JSON parser
|
* Test program for incremental JSON parser
|
||||||
*
|
*
|
||||||
* Copyright (c) 2023, PostgreSQL Global Development Group
|
* Copyright (c) 2024, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* src/test/modules/test_json_parser/test_json_parser_incremental.c
|
* src/test/modules/test_json_parser/test_json_parser_incremental.c
|
||||||
*
|
*
|
||||||
* This progam tests incremental parsing of json. The input is fed into
|
* This program tests incremental parsing of json. The input is fed into
|
||||||
* the parser in very small chunks. In practice you would normally use
|
* the parser in very small chunks. In practice you would normally use
|
||||||
* much larger chunks, but doing this makes it more likely that the
|
* much larger chunks, but doing this makes it more likely that the
|
||||||
* full range of incement handling, especially in the lexer, is exercised.
|
* full range of increment handling, especially in the lexer, is exercised.
|
||||||
* If the "-c SIZE" option is provided, that chunk size is used instead.
|
* If the "-c SIZE" option is provided, that chunk size is used instead
|
||||||
|
* of the default of 60.
|
||||||
*
|
*
|
||||||
* The argument specifies the file containing the JSON input.
|
* The argument specifies the file containing the JSON input.
|
||||||
*
|
*
|
||||||
@ -31,6 +32,9 @@
|
|||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
#include "pg_getopt.h"
|
#include "pg_getopt.h"
|
||||||
|
|
||||||
|
#define BUFSIZE 6000
|
||||||
|
#define DEFAULT_CHUNK_SIZE 60
|
||||||
|
|
||||||
typedef struct DoState
|
typedef struct DoState
|
||||||
{
|
{
|
||||||
JsonLexContext *lex;
|
JsonLexContext *lex;
|
||||||
@ -67,14 +71,13 @@ JsonSemAction sem = {
|
|||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
/* max delicious line length is less than this */
|
char buff[BUFSIZE];
|
||||||
char buff[6001];
|
|
||||||
FILE *json_file;
|
FILE *json_file;
|
||||||
JsonParseErrorType result;
|
JsonParseErrorType result;
|
||||||
JsonLexContext lex;
|
JsonLexContext lex;
|
||||||
StringInfoData json;
|
StringInfoData json;
|
||||||
int n_read;
|
int n_read;
|
||||||
size_t chunk_size = 60;
|
size_t chunk_size = DEFAULT_CHUNK_SIZE;
|
||||||
struct stat statbuf;
|
struct stat statbuf;
|
||||||
off_t bytes_left;
|
off_t bytes_left;
|
||||||
JsonSemAction *testsem = &nullSemAction;
|
JsonSemAction *testsem = &nullSemAction;
|
||||||
@ -88,6 +91,11 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
case 'c': /* chunksize */
|
case 'c': /* chunksize */
|
||||||
sscanf(optarg, "%zu", &chunk_size);
|
sscanf(optarg, "%zu", &chunk_size);
|
||||||
|
if (chunk_size > BUFSIZE)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "chunk size cannot exceed %d\n", BUFSIZE);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 's': /* do semantic processing */
|
case 's': /* do semantic processing */
|
||||||
testsem = &sem;
|
testsem = &sem;
|
||||||
@ -121,6 +129,12 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
n_read = fread(buff, 1, chunk_size, json_file);
|
n_read = fread(buff, 1, chunk_size, json_file);
|
||||||
appendBinaryStringInfo(&json, buff, n_read);
|
appendBinaryStringInfo(&json, buff, n_read);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Append some trailing junk to the buffer passed to the parser. This
|
||||||
|
* helps us ensure that the parser does the right thing even if the
|
||||||
|
* chunk isn't terminated with a '\0'.
|
||||||
|
*/
|
||||||
appendStringInfoString(&json, "1+23 trailing junk");
|
appendStringInfoString(&json, "1+23 trailing junk");
|
||||||
bytes_left -= n_read;
|
bytes_left -= n_read;
|
||||||
if (bytes_left > 0)
|
if (bytes_left > 0)
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
/*-------------------------------------------------------------------------
|
/*-------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
* test_json_parser_perf.c
|
* test_json_parser_perf.c
|
||||||
* Performancet est program for both flavors of the JSON parser
|
* Performance test program for both flavors of the JSON parser
|
||||||
*
|
*
|
||||||
* Copyright (c) 2023, PostgreSQL Global Development Group
|
* Copyright (c) 2024, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* src/test/modules/test_json_parser/test_json_parser_perf.c
|
* src/test/modules/test_json_parser/test_json_parser_perf.c
|
||||||
*
|
*
|
||||||
* This progam tests either the standard (recursive descent) JSON parser
|
* This program tests either the standard (recursive descent) JSON parser
|
||||||
* or the incremental (table driven) parser, but without breaking the input
|
* or the incremental (table driven) parser, but without breaking the input
|
||||||
* into chunks in the latter case. Thus it can be used to compare the pure
|
* into chunks in the latter case. Thus it can be used to compare the pure
|
||||||
* parsing speed of the two parsers. If the "-i" option is used, then the
|
* parsing speed of the two parsers. If the "-i" option is used, then the
|
||||||
@ -28,11 +28,12 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#define BUFSIZE 6000
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
/* max delicious line length is less than this */
|
char buff[BUFSIZE];
|
||||||
char buff[6001];
|
|
||||||
FILE *json_file;
|
FILE *json_file;
|
||||||
JsonParseErrorType result;
|
JsonParseErrorType result;
|
||||||
JsonLexContext *lex;
|
JsonLexContext *lex;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user