1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-17 17:02:08 +03:00

Use incremental parsing of backup manifests.

This changes the three callers to json_parse_manifest() to use
json_parse_manifest_incremental_chunk() if appropriate. In the case of
the backend caller, since we don't know the size of the manifest in
advance we always call the incremental parser.

Author: Andrew Dunstan
Reviewed-By: Jacob Champion

Discussion: https://postgr.es/m/7b0a51d6-0d9d-7366-3a1a-f74397a02f55@dunslane.net
This commit is contained in:
Andrew Dunstan
2024-03-11 02:31:51 -04:00
parent ea7b4e9a2a
commit 222e11a10a
3 changed files with 178 additions and 62 deletions

View File

@ -33,6 +33,14 @@
#define BLOCKS_PER_READ 512
/*
* we expect the find the last lines of the manifest, including the checksum,
* in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
* parse step if we are about to overflow MAX_CHUNK bytes.
*/
#define MIN_CHUNK 1024
#define MAX_CHUNK (128 * 1024)
/*
* Details extracted from the WAL ranges present in the supplied backup manifest.
*/
@ -112,6 +120,11 @@ struct IncrementalBackupInfo
* turns out to be a problem in practice, we'll need to be more clever.
*/
BlockRefTable *brtab;
/*
* State object for incremental JSON parsing
*/
JsonManifestParseIncrementalState *inc_state;
};
static void manifest_process_version(JsonManifestParseContext *context,
@ -142,6 +155,7 @@ CreateIncrementalBackupInfo(MemoryContext mcxt)
{
IncrementalBackupInfo *ib;
MemoryContext oldcontext;
JsonManifestParseContext *context;
oldcontext = MemoryContextSwitchTo(mcxt);
@ -157,6 +171,17 @@ CreateIncrementalBackupInfo(MemoryContext mcxt)
*/
ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
context = palloc0(sizeof(JsonManifestParseContext));
/* Parse the manifest. */
context->private_data = ib;
context->version_cb = manifest_process_version;
context->system_identifier_cb = manifest_process_system_identifier;
context->per_file_cb = manifest_process_file;
context->per_wal_range_cb = manifest_process_wal_range;
context->error_cb = manifest_report_error;
ib->inc_state = json_parse_manifest_incremental_init(context);
MemoryContextSwitchTo(oldcontext);
return ib;
@ -176,13 +201,20 @@ AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
/* Switch to our memory context. */
oldcontext = MemoryContextSwitchTo(ib->mcxt);
/*
* XXX. Our json parser is at present incapable of parsing json blobs
* incrementally, so we have to accumulate the entire backup manifest
* before we can do anything with it. This should really be fixed, since
* some users might have very large numbers of files in the data
* directory.
*/
if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
{
/*
* time for an incremental parse. We'll do all but the last MIN_CHUNK
* so that we have enough left for the final piece.
*/
json_parse_manifest_incremental_chunk(
ib->inc_state, ib->buf.data, ib->buf.len - MIN_CHUNK, false);
/* now remove what we just parsed */
memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
MIN_CHUNK + 1);
ib->buf.len = MIN_CHUNK;
}
appendBinaryStringInfo(&ib->buf, data, len);
/* Switch back to previous memory context. */
@ -196,20 +228,14 @@ AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
void
FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
{
JsonManifestParseContext context;
MemoryContext oldcontext;
/* Switch to our memory context. */
oldcontext = MemoryContextSwitchTo(ib->mcxt);
/* Parse the manifest. */
context.private_data = ib;
context.version_cb = manifest_process_version;
context.system_identifier_cb = manifest_process_system_identifier;
context.per_file_cb = manifest_process_file;
context.per_wal_range_cb = manifest_process_wal_range;
context.error_cb = manifest_report_error;
json_parse_manifest(&context, ib->buf.data, ib->buf.len);
/* Parse the last chunk of the manifest */
json_parse_manifest_incremental_chunk(
ib->inc_state, ib->buf.data, ib->buf.len, true);
/* Done with the buffer, so release memory. */
pfree(ib->buf.data);