mirror of
https://github.com/postgres/postgres.git
synced 2025-10-15 05:46:52 +03:00
pg_dump: Retrieve attribute statistics in batches.
Currently, pg_dump gathers attribute statistics with a query per
relation, which can cause pg_dump to take significantly longer,
especially when there are many relations. This commit addresses
this by teaching pg_dump to gather attribute statistics for 64
relations at a time. Some simple tests showed this was the optimal
batch size, but performance may vary depending on the workload.
Our lookahead code determines the next batch of relations by
searching the TOC sequentially for relevant entries. This approach
assumes that we will dump all such entries in TOC order, which
unfortunately isn't true for dump formats that use
RestoreArchive(). RestoreArchive() does multiple passes through
the TOC and selectively dumps certain groups of entries each time.
This is particularly problematic for index stats and a subset of
matview stats; both are in SECTION_POST_DATA, but matview stats
that depend on matview data are dumped in RESTORE_PASS_POST_ACL,
while all other stats are dumped in RESTORE_PASS_MAIN. To handle
this, this commit moves all statistics data entries in
SECTION_POST_DATA to RESTORE_PASS_POST_ACL, which ensures that we
always dump them in TOC order. A convenient side effect of this
change is that we can revert a decent chunk of commit a0a4601765
,
but that is left for a follow-up commit.
Author: Corey Huinker <corey.huinker@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com
This commit is contained in:
@@ -2655,7 +2655,7 @@ WriteToc(ArchiveHandle *AH)
|
||||
}
|
||||
else if (te->defnDumper)
|
||||
{
|
||||
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
|
||||
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
|
||||
|
||||
te->defnLen = WriteStr(AH, defn);
|
||||
pg_free(defn);
|
||||
@@ -3284,23 +3284,16 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
|
||||
|
||||
/*
|
||||
* If statistics data is dependent on materialized view data, it must be
|
||||
* deferred to RESTORE_PASS_POST_ACL.
|
||||
* deferred to RESTORE_PASS_POST_ACL. Those entries are already marked as
|
||||
* SECTION_POST_DATA, and some other stats entries (e.g., index stats)
|
||||
* will also be marked as SECTION_POST_DATA. Additionally, our lookahead
|
||||
* code in fetchAttributeStats() assumes that we dump all statistics data
|
||||
* entries in TOC order. To ensure this assumption holds, we move all
|
||||
* statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL.
|
||||
*/
|
||||
if (strcmp(te->desc, "STATISTICS DATA") == 0)
|
||||
{
|
||||
for (int i = 0; i < te->nDeps; i++)
|
||||
{
|
||||
DumpId depid = te->dependencies[i];
|
||||
|
||||
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
|
||||
{
|
||||
TocEntry *otherte = AH->tocsByDumpId[depid];
|
||||
|
||||
if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
|
||||
return RESTORE_PASS_POST_ACL;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (strcmp(te->desc, "STATISTICS DATA") == 0 &&
|
||||
te->section == SECTION_POST_DATA)
|
||||
return RESTORE_PASS_POST_ACL;
|
||||
|
||||
/* All else can be handled in the main pass. */
|
||||
return RESTORE_PASS_MAIN;
|
||||
@@ -3951,7 +3944,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
|
||||
}
|
||||
else if (te->defnDumper)
|
||||
{
|
||||
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
|
||||
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
|
||||
|
||||
te->defnLen = ahprintf(AH, "%s\n\n", defn);
|
||||
pg_free(defn);
|
||||
|
Reference in New Issue
Block a user