1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-03 15:22:11 +03:00

Display the leader apply worker's PID for parallel apply workers.

Add leader_pid to pg_stat_subscription. leader_pid is the process ID of
the leader apply worker if this process is a parallel apply worker. If
this field is NULL, it indicates that the process is a leader apply
worker or a synchronization worker. The new column makes it easier to
distinguish parallel apply workers from other kinds of workers and helps
to identify the leader for the parallel workers corresponding to a
particular subscription.

Additionally, update the leader_pid column in pg_stat_activity as well to
display the PID of the leader apply worker for parallel apply workers.

Author: Hou Zhijie
Reviewed-by: Peter Smith, Sawada Masahiko, Amit Kapila, Shveta Mallik
Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
This commit is contained in:
Amit Kapila
2023-01-18 09:03:12 +05:30
parent 14bdb3f13d
commit d540a02a72
11 changed files with 106 additions and 44 deletions

View File

@@ -948,6 +948,7 @@ CREATE VIEW pg_stat_subscription AS
su.oid AS subid,
su.subname,
st.pid,
st.leader_pid,
st.relid,
st.received_lsn,
st.last_msg_send_time,

View File

@@ -849,7 +849,7 @@ LogicalParallelApplyLoop(shm_mq_handle *mqh)
static void
pa_shutdown(int code, Datum arg)
{
SendProcSignal(MyLogicalRepWorker->apply_leader_pid,
SendProcSignal(MyLogicalRepWorker->leader_pid,
PROCSIG_PARALLEL_APPLY_MESSAGE,
InvalidBackendId);
@@ -932,7 +932,7 @@ ParallelApplyWorkerMain(Datum main_arg)
error_mqh = shm_mq_attach(mq, seg, NULL);
pq_redirect_to_shm_mq(seg, error_mqh);
pq_set_parallel_leader(MyLogicalRepWorker->apply_leader_pid,
pq_set_parallel_leader(MyLogicalRepWorker->leader_pid,
InvalidBackendId);
MyLogicalRepWorker->last_send_time = MyLogicalRepWorker->last_recv_time =
@@ -950,7 +950,7 @@ ParallelApplyWorkerMain(Datum main_arg)
* The parallel apply worker doesn't need to monopolize this replication
* origin which was already acquired by its leader process.
*/
replorigin_session_setup(originid, MyLogicalRepWorker->apply_leader_pid);
replorigin_session_setup(originid, MyLogicalRepWorker->leader_pid);
replorigin_session_origin = originid;
CommitTransactionCommand();

View File

@@ -410,7 +410,7 @@ retry:
worker->relstate = SUBREL_STATE_UNKNOWN;
worker->relstate_lsn = InvalidXLogRecPtr;
worker->stream_fileset = NULL;
worker->apply_leader_pid = is_parallel_apply_worker ? MyProcPid : InvalidPid;
worker->leader_pid = is_parallel_apply_worker ? MyProcPid : InvalidPid;
worker->parallel_apply = is_parallel_apply_worker;
worker->last_lsn = InvalidXLogRecPtr;
TIMESTAMP_NOBEGIN(worker->last_send_time);
@@ -732,7 +732,7 @@ logicalrep_worker_cleanup(LogicalRepWorker *worker)
worker->userid = InvalidOid;
worker->subid = InvalidOid;
worker->relid = InvalidOid;
worker->apply_leader_pid = InvalidPid;
worker->leader_pid = InvalidPid;
worker->parallel_apply = false;
}
@@ -1066,13 +1066,41 @@ IsLogicalLauncher(void)
return LogicalRepCtx->launcher_pid == MyProcPid;
}
/*
* Return the pid of the leader apply worker if the given pid is the pid of a
* parallel apply worker, otherwise, return InvalidPid.
*/
pid_t
GetLeaderApplyWorkerPid(pid_t pid)
{
int leader_pid = InvalidPid;
int i;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
for (i = 0; i < max_logical_replication_workers; i++)
{
LogicalRepWorker *w = &LogicalRepCtx->workers[i];
if (isParallelApplyWorker(w) && w->proc && pid == w->proc->pid)
{
leader_pid = w->leader_pid;
break;
}
}
LWLockRelease(LogicalRepWorkerLock);
return leader_pid;
}
/*
* Returns state of the subscriptions.
*/
Datum
pg_stat_get_subscription(PG_FUNCTION_ARGS)
{
#define PG_STAT_GET_SUBSCRIPTION_COLS 8
#define PG_STAT_GET_SUBSCRIPTION_COLS 9
Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
int i;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
@@ -1098,10 +1126,6 @@ pg_stat_get_subscription(PG_FUNCTION_ARGS)
if (OidIsValid(subid) && worker.subid != subid)
continue;
/* Skip if this is a parallel apply worker */
if (isParallelApplyWorker(&worker))
continue;
worker_pid = worker.proc->pid;
values[0] = ObjectIdGetDatum(worker.subid);
@@ -1110,26 +1134,32 @@ pg_stat_get_subscription(PG_FUNCTION_ARGS)
else
nulls[1] = true;
values[2] = Int32GetDatum(worker_pid);
if (XLogRecPtrIsInvalid(worker.last_lsn))
nulls[3] = true;
if (isParallelApplyWorker(&worker))
values[3] = Int32GetDatum(worker.leader_pid);
else
values[3] = LSNGetDatum(worker.last_lsn);
if (worker.last_send_time == 0)
nulls[3] = true;
if (XLogRecPtrIsInvalid(worker.last_lsn))
nulls[4] = true;
else
values[4] = TimestampTzGetDatum(worker.last_send_time);
if (worker.last_recv_time == 0)
values[4] = LSNGetDatum(worker.last_lsn);
if (worker.last_send_time == 0)
nulls[5] = true;
else
values[5] = TimestampTzGetDatum(worker.last_recv_time);
if (XLogRecPtrIsInvalid(worker.reply_lsn))
values[5] = TimestampTzGetDatum(worker.last_send_time);
if (worker.last_recv_time == 0)
nulls[6] = true;
else
values[6] = LSNGetDatum(worker.reply_lsn);
if (worker.reply_time == 0)
values[6] = TimestampTzGetDatum(worker.last_recv_time);
if (XLogRecPtrIsInvalid(worker.reply_lsn))
nulls[7] = true;
else
values[7] = TimestampTzGetDatum(worker.reply_time);
values[7] = LSNGetDatum(worker.reply_lsn);
if (worker.reply_time == 0)
nulls[8] = true;
else
values[8] = TimestampTzGetDatum(worker.reply_time);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
values, nulls);

View File

@@ -25,6 +25,7 @@
#include "pgstat.h"
#include "postmaster/bgworker_internals.h"
#include "postmaster/postmaster.h"
#include "replication/logicallauncher.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/acl.h"
@@ -409,9 +410,9 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
/*
* If a PGPROC entry was retrieved, display wait events and lock
* group leader information if any. To avoid extra overhead, no
* extra lock is being held, so there is no guarantee of
* consistency across multiple rows.
* group leader or apply leader information if any. To avoid
* extra overhead, no extra lock is being held, so there is no
* guarantee of consistency across multiple rows.
*/
if (proc != NULL)
{
@@ -426,14 +427,24 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
/*
* Show the leader only for active parallel workers. This
* leaves the field as NULL for the leader of a parallel
* group.
* leaves the field as NULL for the leader of a parallel group
* or the leader of parallel apply workers.
*/
if (leader && leader->pid != beentry->st_procpid)
{
values[28] = Int32GetDatum(leader->pid);
nulls[28] = false;
}
else if (beentry->st_backendType == B_BG_WORKER)
{
int leader_pid = GetLeaderApplyWorkerPid(beentry->st_procpid);
if (leader_pid != InvalidPid)
{
values[28] = Int32GetDatum(leader_pid);
nulls[28] = false;
}
}
}
if (wait_event_type)