1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-26 12:21:12 +03:00

Add cost-based vacuum delay time to progress views.

This commit adds the amount of time spent sleeping due to
cost-based delay to the pg_stat_progress_vacuum and
pg_stat_progress_analyze system views.  A new configuration
parameter named track_cost_delay_timing, which is off by default,
controls whether this information is gathered.  For vacuum, the
reported value includes the sleep time of any associated parallel
workers.  However, parallel workers only report their sleep time
once per second to avoid overloading the leader process.

Bumps catversion.

Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Masahiro Ikeda <ikedamsh@oss.nttdata.com>
Reviewed-by: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Sergei Kornilov <sk@zsrv.org>
Discussion: https://postgr.es/m/ZmaXmWDL829fzAVX%40ip-10-97-1-34.eu-west-3.compute.internal
This commit is contained in:
Nathan Bossart
2025-02-11 16:38:14 -06:00
parent e5b0b0ce15
commit bb8dff9995
11 changed files with 144 additions and 5 deletions

View File

@ -8252,6 +8252,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry id="guc-track-cost-delay-timing" xreflabel="track_cost_delay_timing">
<term><varname>track_cost_delay_timing</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>track_cost_delay_timing</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables timing of cost-based vacuum delay (see
<xref linkend="runtime-config-resource-vacuum-cost"/>). This parameter
is off by default, as it will repeatedly query the operating system for
the current time, which may cause significant overhead on some
platforms. You can use the <xref linkend="pgtesttiming"/> tool to
measure the overhead of timing on your system. Cost-based vacuum delay
timing information is displayed in
<link linkend="vacuum-progress-reporting"><structname>pg_stat_progress_vacuum</structname></link>
and
<link linkend="analyze-progress-reporting"><structname>pg_stat_progress_analyze</structname></link>.
Only superusers and users with the appropriate <literal>SET</literal>
privilege can change this setting.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-track-io-timing" xreflabel="track_io_timing"> <varlistentry id="guc-track-io-timing" xreflabel="track_io_timing">
<term><varname>track_io_timing</varname> (<type>boolean</type>) <term><varname>track_io_timing</varname> (<type>boolean</type>)
<indexterm> <indexterm>

View File

@ -5606,6 +5606,18 @@ FROM pg_stat_get_backend_idset() AS backendid;
<literal>acquiring inherited sample rows</literal>. <literal>acquiring inherited sample rows</literal>.
</para></entry> </para></entry>
</row> </row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>delay_time</structfield> <type>double precision</type>
</para>
<para>
Total time spent sleeping due to cost-based delay (see
<xref linkend="runtime-config-resource-vacuum-cost"/>, in milliseconds
(if <xref linkend="guc-track-cost-delay-timing"/> is enabled, otherwise
zero).
</para></entry>
</row>
</tbody> </tbody>
</tgroup> </tgroup>
</table> </table>
@ -6531,6 +6543,21 @@ FROM pg_stat_get_backend_idset() AS backendid;
<literal>cleaning up indexes</literal>. <literal>cleaning up indexes</literal>.
</para></entry> </para></entry>
</row> </row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>delay_time</structfield> <type>double precision</type>
</para>
<para>
Total time spent sleeping due to cost-based delay (see
<xref linkend="runtime-config-resource-vacuum-cost"/>), in milliseconds
(if <xref linkend="guc-track-cost-delay-timing"/> is enabled, otherwise
zero). This includes the time that any associated parallel workers have
slept. However, parallel workers report their sleep time no more
frequently than once per second, so the reported value may be slightly
stale.
</para></entry>
</row>
</tbody> </tbody>
</tgroup> </tgroup>
</table> </table>

View File

@ -1213,7 +1213,8 @@ CREATE VIEW pg_stat_progress_analyze AS
S.param5 AS ext_stats_computed, S.param5 AS ext_stats_computed,
S.param6 AS child_tables_total, S.param6 AS child_tables_total,
S.param7 AS child_tables_done, S.param7 AS child_tables_done,
CAST(S.param8 AS oid) AS current_child_table_relid CAST(S.param8 AS oid) AS current_child_table_relid,
S.param9 / 1000000::double precision AS delay_time
FROM pg_stat_get_progress_info('ANALYZE') AS S FROM pg_stat_get_progress_info('ANALYZE') AS S
LEFT JOIN pg_database D ON S.datid = D.oid; LEFT JOIN pg_database D ON S.datid = D.oid;
@ -1233,7 +1234,8 @@ CREATE VIEW pg_stat_progress_vacuum AS
S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count, S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count,
S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes, S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes,
S.param8 AS num_dead_item_ids, S.param9 AS indexes_total, S.param8 AS num_dead_item_ids, S.param9 AS indexes_total,
S.param10 AS indexes_processed S.param10 AS indexes_processed,
S.param11 / 1000000::double precision AS delay_time
FROM pg_stat_get_progress_info('VACUUM') AS S FROM pg_stat_get_progress_info('VACUUM') AS S
LEFT JOIN pg_database D ON S.datid = D.oid; LEFT JOIN pg_database D ON S.datid = D.oid;

View File

@ -39,6 +39,7 @@
#include "catalog/pg_inherits.h" #include "catalog/pg_inherits.h"
#include "commands/cluster.h" #include "commands/cluster.h"
#include "commands/defrem.h" #include "commands/defrem.h"
#include "commands/progress.h"
#include "commands/vacuum.h" #include "commands/vacuum.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
@ -59,6 +60,12 @@
#include "utils/snapmgr.h" #include "utils/snapmgr.h"
#include "utils/syscache.h" #include "utils/syscache.h"
/*
* Minimum interval for cost-based vacuum delay reports from a parallel worker.
* This aims to avoid sending too many messages and waking up the leader too
* frequently.
*/
#define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
/* /*
* GUC parameters * GUC parameters
@ -70,6 +77,7 @@ int vacuum_multixact_freeze_table_age;
int vacuum_failsafe_age; int vacuum_failsafe_age;
int vacuum_multixact_failsafe_age; int vacuum_multixact_failsafe_age;
double vacuum_max_eager_freeze_failure_rate; double vacuum_max_eager_freeze_failure_rate;
bool track_cost_delay_timing;
/* /*
* Variables for cost-based vacuum delay. The defaults differ between * Variables for cost-based vacuum delay. The defaults differ between
@ -80,6 +88,9 @@ double vacuum_max_eager_freeze_failure_rate;
double vacuum_cost_delay = 0; double vacuum_cost_delay = 0;
int vacuum_cost_limit = 200; int vacuum_cost_limit = 200;
/* Variable for reporting cost-based vacuum delay from parallel workers. */
int64 parallel_vacuum_worker_delay_ns = 0;
/* /*
* VacuumFailsafeActive is a defined as a global so that we can determine * VacuumFailsafeActive is a defined as a global so that we can determine
* whether or not to re-enable cost-based vacuum delay when vacuuming a table. * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
@ -2416,13 +2427,66 @@ vacuum_delay_point(bool is_analyze)
/* Nap if appropriate */ /* Nap if appropriate */
if (msec > 0) if (msec > 0)
{ {
instr_time delay_start;
if (msec > vacuum_cost_delay * 4) if (msec > vacuum_cost_delay * 4)
msec = vacuum_cost_delay * 4; msec = vacuum_cost_delay * 4;
if (track_cost_delay_timing)
INSTR_TIME_SET_CURRENT(delay_start);
pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
pg_usleep(msec * 1000); pg_usleep(msec * 1000);
pgstat_report_wait_end(); pgstat_report_wait_end();
if (track_cost_delay_timing)
{
instr_time delay_end;
instr_time delay;
INSTR_TIME_SET_CURRENT(delay_end);
INSTR_TIME_SET_ZERO(delay);
INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
/*
* For parallel workers, we only report the delay time every once
* in a while to avoid overloading the leader with messages and
* interrupts.
*/
if (IsParallelWorker())
{
static instr_time last_report_time;
instr_time time_since_last_report;
Assert(!is_analyze);
/* Accumulate the delay time */
parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
/* Calculate interval since last report */
INSTR_TIME_SET_ZERO(time_since_last_report);
INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
/* If we haven't reported in a while, do so now */
if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
{
pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
parallel_vacuum_worker_delay_ns);
/* Reset variables */
last_report_time = delay_end;
parallel_vacuum_worker_delay_ns = 0;
}
}
else if (is_analyze)
pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
INSTR_TIME_GET_NANOSEC(delay));
else
pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
INSTR_TIME_GET_NANOSEC(delay));
}
/* /*
* We don't want to ignore postmaster death during very long vacuums * We don't want to ignore postmaster death during very long vacuums
* with vacuum_cost_delay configured. We can't use the usual * with vacuum_cost_delay configured. We can't use the usual

View File

@ -1094,6 +1094,11 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
&wal_usage[ParallelWorkerNumber]); &wal_usage[ParallelWorkerNumber]);
/* Report any remaining cost-based vacuum delay time */
if (track_cost_delay_timing)
pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
parallel_vacuum_worker_delay_ns);
TidStoreDetach(dead_items); TidStoreDetach(dead_items);
/* Pop the error context stack */ /* Pop the error context stack */

View File

@ -1471,6 +1471,15 @@ struct config_bool ConfigureNamesBool[] =
true, true,
NULL, NULL, NULL NULL, NULL, NULL
}, },
{
{"track_cost_delay_timing", PGC_SUSET, STATS_CUMULATIVE,
gettext_noop("Collects timing statistics for cost-based vacuum delay."),
NULL
},
&track_cost_delay_timing,
false,
NULL, NULL, NULL
},
{ {
{"track_io_timing", PGC_SUSET, STATS_CUMULATIVE, {"track_io_timing", PGC_SUSET, STATS_CUMULATIVE,
gettext_noop("Collects timing statistics for database I/O activity."), gettext_noop("Collects timing statistics for database I/O activity."),

View File

@ -635,6 +635,7 @@
#track_activities = on #track_activities = on
#track_activity_query_size = 1024 # (change requires restart) #track_activity_query_size = 1024 # (change requires restart)
#track_counts = on #track_counts = on
#track_cost_delay_timing = off
#track_io_timing = off #track_io_timing = off
#track_wal_io_timing = off #track_wal_io_timing = off
#track_functions = none # none, pl, all #track_functions = none # none, pl, all

View File

@ -57,6 +57,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 202502111 #define CATALOG_VERSION_NO 202502112
#endif #endif

View File

@ -28,6 +28,7 @@
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS 7 #define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS 7
#define PROGRESS_VACUUM_INDEXES_TOTAL 8 #define PROGRESS_VACUUM_INDEXES_TOTAL 8
#define PROGRESS_VACUUM_INDEXES_PROCESSED 9 #define PROGRESS_VACUUM_INDEXES_PROCESSED 9
#define PROGRESS_VACUUM_DELAY_TIME 10
/* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */ /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP 1 #define PROGRESS_VACUUM_PHASE_SCAN_HEAP 1
@ -46,6 +47,7 @@
#define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL 5 #define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL 5
#define PROGRESS_ANALYZE_CHILD_TABLES_DONE 6 #define PROGRESS_ANALYZE_CHILD_TABLES_DONE 6
#define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7 #define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7
#define PROGRESS_ANALYZE_DELAY_TIME 8
/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ /* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */
#define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1 #define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1

View File

@ -303,6 +303,7 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_min_age;
extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; extern PGDLLIMPORT int vacuum_multixact_freeze_table_age;
extern PGDLLIMPORT int vacuum_failsafe_age; extern PGDLLIMPORT int vacuum_failsafe_age;
extern PGDLLIMPORT int vacuum_multixact_failsafe_age; extern PGDLLIMPORT int vacuum_multixact_failsafe_age;
extern PGDLLIMPORT bool track_cost_delay_timing;
/* /*
* Relevant for vacuums implementing eager scanning. Normal vacuums may * Relevant for vacuums implementing eager scanning. Normal vacuums may
@ -330,6 +331,8 @@ extern PGDLLIMPORT bool VacuumFailsafeActive;
extern PGDLLIMPORT double vacuum_cost_delay; extern PGDLLIMPORT double vacuum_cost_delay;
extern PGDLLIMPORT int vacuum_cost_limit; extern PGDLLIMPORT int vacuum_cost_limit;
extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns;
/* in commands/vacuum.c */ /* in commands/vacuum.c */
extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel);
extern void vacuum(List *relations, VacuumParams *params, extern void vacuum(List *relations, VacuumParams *params,

View File

@ -1932,7 +1932,8 @@ pg_stat_progress_analyze| SELECT s.pid,
s.param5 AS ext_stats_computed, s.param5 AS ext_stats_computed,
s.param6 AS child_tables_total, s.param6 AS child_tables_total,
s.param7 AS child_tables_done, s.param7 AS child_tables_done,
(s.param8)::oid AS current_child_table_relid (s.param8)::oid AS current_child_table_relid,
((s.param9)::double precision / (1000000)::double precision) AS delay_time
FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
LEFT JOIN pg_database d ON ((s.datid = d.oid))); LEFT JOIN pg_database d ON ((s.datid = d.oid)));
pg_stat_progress_basebackup| SELECT pid, pg_stat_progress_basebackup| SELECT pid,
@ -2062,7 +2063,8 @@ pg_stat_progress_vacuum| SELECT s.pid,
s.param7 AS dead_tuple_bytes, s.param7 AS dead_tuple_bytes,
s.param8 AS num_dead_item_ids, s.param8 AS num_dead_item_ids,
s.param9 AS indexes_total, s.param9 AS indexes_total,
s.param10 AS indexes_processed s.param10 AS indexes_processed,
((s.param11)::double precision / (1000000)::double precision) AS delay_time
FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
LEFT JOIN pg_database d ON ((s.datid = d.oid))); LEFT JOIN pg_database d ON ((s.datid = d.oid)));
pg_stat_recovery_prefetch| SELECT stats_reset, pg_stat_recovery_prefetch| SELECT stats_reset,