1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-25 21:42:33 +03:00

Account for TOAST data while scheduling parallel dumps.

In parallel mode, pg_dump tries to order the table-data-dumping
jobs with the largest tables first.  However, it was only
consulting the pg_class.relpages value to determine table size.
This ignores TOAST data, and so we could make poor scheduling
decisions in cases where some large tables are mostly TOASTed
data while others have very little.  To fix, add in the relpages
value for the TOAST table as well.

This patch also fixes a potential integer-overflow issue that
could result in poor scheduling on machines where off_t is
only 32 bits wide.  Such platforms are probably extinct in the
wild, but we do still nominally support them, so repair.

Per complaint from Hans Buschmann.

Discussion: https://postgr.es/m/7d7eb6128f40401d81b3b7a898b6b4de@W2012-02.nidsa.loc
This commit is contained in:
Tom Lane 2021-12-06 13:23:07 -05:00
parent be85727a3d
commit 65aaed22a8
2 changed files with 27 additions and 9 deletions

View File

@ -2467,13 +2467,26 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
/*
* Set the TocEntry's dataLength in case we are doing a parallel dump
* and want to order dump jobs by table size. We choose to measure
* dataLength in table pages during dump, so no scaling is needed.
* dataLength in table pages (including TOAST pages) during dump, so
* no scaling is needed.
*
* However, relpages is declared as "integer" in pg_class, and hence
* also in TableInfo, but it's really BlockNumber a/k/a unsigned int.
* Cast so that we get the right interpretation of table sizes
* exceeding INT_MAX pages.
*/
te->dataLength = (BlockNumber) tbinfo->relpages;
te->dataLength += (BlockNumber) tbinfo->toastpages;
/*
* If pgoff_t is only 32 bits wide, the above refinement is useless,
* and instead we'd better worry about integer overflow. Clamp to
* INT_MAX if the correct result exceeds that.
*/
if (sizeof(te->dataLength) == 4 &&
(tbinfo->relpages < 0 || tbinfo->toastpages < 0 ||
te->dataLength < 0))
te->dataLength = INT_MAX;
}
destroyPQExpBuffer(copyBuf);
@ -6254,6 +6267,7 @@ getTables(Archive *fout, int *numTables)
int i_relhasindex;
int i_relhasrules;
int i_relpages;
int i_toastpages;
int i_owning_tab;
int i_owning_col;
int i_reltablespace;
@ -6303,6 +6317,7 @@ getTables(Archive *fout, int *numTables)
"(%s c.relowner) AS rolname, "
"c.relchecks, "
"c.relhasindex, c.relhasrules, c.relpages, "
"tc.relpages AS toastpages, "
"d.refobjid AS owning_tab, "
"d.refobjsubid AS owning_col, "
"tsp.spcname AS reltablespace, ",
@ -6459,17 +6474,14 @@ getTables(Archive *fout, int *numTables)
"LEFT JOIN pg_am am ON (c.relam = am.oid)\n");
/*
* We don't need any data from the TOAST table before 8.2.
*
* We purposefully ignore toast OIDs for partitioned tables; the reason is
* that versions 10 and 11 have them, but later versions do not, so
* emitting them causes the upgrade to fail.
*/
if (fout->remoteVersion >= 80200)
appendPQExpBufferStr(query,
"LEFT JOIN pg_class tc ON (c.reltoastrelid = tc.oid"
" AND tc.relkind = " CppAsString2(RELKIND_TOASTVALUE)
" AND c.relkind <> " CppAsString2(RELKIND_PARTITIONED_TABLE) ")\n");
appendPQExpBufferStr(query,
"LEFT JOIN pg_class tc ON (c.reltoastrelid = tc.oid"
" AND tc.relkind = " CppAsString2(RELKIND_TOASTVALUE)
" AND c.relkind <> " CppAsString2(RELKIND_PARTITIONED_TABLE) ")\n");
/*
* Restrict to interesting relkinds (in particular, not indexes). Not all
@ -6520,6 +6532,7 @@ getTables(Archive *fout, int *numTables)
i_relhasindex = PQfnumber(res, "relhasindex");
i_relhasrules = PQfnumber(res, "relhasrules");
i_relpages = PQfnumber(res, "relpages");
i_toastpages = PQfnumber(res, "toastpages");
i_owning_tab = PQfnumber(res, "owning_tab");
i_owning_col = PQfnumber(res, "owning_col");
i_reltablespace = PQfnumber(res, "reltablespace");
@ -6581,6 +6594,10 @@ getTables(Archive *fout, int *numTables)
tblinfo[i].hasindex = (strcmp(PQgetvalue(res, i, i_relhasindex), "t") == 0);
tblinfo[i].hasrules = (strcmp(PQgetvalue(res, i, i_relhasrules), "t") == 0);
tblinfo[i].relpages = atoi(PQgetvalue(res, i, i_relpages));
if (PQgetisnull(res, i, i_toastpages))
tblinfo[i].toastpages = 0;
else
tblinfo[i].toastpages = atoi(PQgetvalue(res, i, i_toastpages));
if (PQgetisnull(res, i, i_owning_tab))
{
tblinfo[i].owning_tab = InvalidOid;
@ -10407,7 +10424,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
* about is allowing blob dumping to be parallelized, not just
* getting a smarter estimate for the single TOC entry.)
*/
te->dataLength = MaxBlockNumber;
te->dataLength = INT_MAX;
}
break;
case DO_POLICY:

View File

@ -315,6 +315,7 @@ typedef struct _tableInfo
int owning_col; /* attr # of column owning sequence */
bool is_identity_sequence;
int relpages; /* table's size in pages (from pg_class) */
int toastpages; /* toast table's size in pages, if any */
bool interesting; /* true if need to collect more data */
bool dummy_view; /* view's real definition must be postponed */