diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9ab070adffb..1c5cfee25d1 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -28663,6 +28663,144 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres}
+
+
+
+ pg_get_process_memory_contexts
+
+ pg_get_process_memory_contexts ( pid integer, summary boolean, timeout float )
+ setof record
+ ( name text,
+ ident text,
+ type text,
+ path integer[],
+ level integer,
+ total_bytes bigint,
+ total_nblocks bigint,
+ free_bytes bigint,
+ free_chunks bigint,
+ used_bytes bigint,
+ num_agg_contexts integer,
+ stats_timestamp timestamptz )
+
+
+ This function handles requests to display the memory contexts of a
+ PostgreSQL process with the specified
+ process ID. The function can be used to send requests to backends as
+ well as auxiliary processes.
+
+
+ The returned record contains extended statistics per each memory
+ context:
+
+
+
+ name - The name of the memory context.
+
+
+
+
+ ident - Memory context ID (if any).
+
+
+
+
+ type - The type of memory context, possible
+ values are: AllocSet, Generation, Slab and Bump.
+
+
+
+
+ path - Memory contexts are organized in a
+ tree model with TopMemoryContext as the root, and all other memory
+ contexts as nodes in the tree. The path
+ displays the path from the root to the current memory context. The
+ path is limited to 100 children per node, which each node limited
+ to a max depth of 100, to preserve memory during reporting. The
+ printed path will also be limited to 100 nodes counting from the
+ TopMemoryContext.
+
+
+
+
+ level - The level in the tree of the current
+ memory context.
+
+
+
+
+ total_bytes - The total number of bytes
+ allocated to this memory context.
+
+
+
+
+ total_nblocks - The total number of blocks
+ used for the allocated memory.
+
+
+
+
+ free_bytes - The amount of free memory in
+ this memory context.
+
+
+
+
+ free_chunks - The number of chunks that
+ free_bytes corresponds to.
+
+
+
+
+ used_bytes - The total number of bytes
+ currently occupied.
+
+
+
+
+ num_agg_contexts - The number of memory
+ contexts aggregated in the displayed statistics.
+
+
+
+
+ stats_timestamp - When the statistics were
+ extracted from the process.
+
+
+
+
+
+ When summary is true, statistics
+ for memory contexts at levels 1 and 2 are displayed, with level 1
+ representing the root node (i.e., TopMemoryContext).
+ Statistics for contexts on level 2 and below are aggregates of all
+ child contexts' statistics, where num_agg_contexts
+ indicate the number aggregated child contexts. When
+ summary is false,
+ the num_agg_contexts value is 1,
+ indicating that individual statistics are being displayed. The levels
+ are limited to the first 100 contexts.
+
+
+ Busy processes can delay reporting memory context statistics,
+ timeout specifies the number of seconds
+ to wait for updated statistics. timeout can be
+ specified in fractions of a second.
+
+
+ After receiving memory context statistics from the target process, it
+ returns the results as one row per context. If all the contexts don't
+ fit within the pre-determined size limit, the remaining context
+ statistics are aggregated and a cumulative total is displayed. The
+ num_agg_contexts column indicates the number of
+ contexts aggregated in the displayed statistics. When
+ num_agg_contexts is 1 is means
+ that the context statistics are displayed separately.
+
+
+
@@ -28802,6 +28940,40 @@ LOG: Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560
because it may generate a large number of log messages.
+
+ pg_get_process_memory_contexts can be used to request
+ memory contexts statistics of any PostgreSQL
+ process. For example:
+
+postgres=# SELECT * FROM pg_get_process_memory_contexts(
+ (SELECT pid FROM pg_stat_activity
+ WHERE backend_type = 'checkpointer'),
+ false, 0.5) LIMIT 1;
+-[ RECORD 1 ]----+------------------------------
+name | TopMemoryContext
+ident |
+type | AllocSet
+path | {1}
+level | 1
+total_bytes | 90304
+total_nblocks | 3
+free_bytes | 2880
+free_chunks | 1
+used_bytes | 87424
+num_agg_contexts | 1
+stats_timestamp | 2025-03-24 13:55:47.796698+01
+
+
+
+ While pg_get_process_memory_contexts can be used to
+ query memory contexts of the local backend,
+ pg_backend_memory_contexts
+ (see for more details)
+ will be less resource intensive when only the local backend is of interest.
+
+
+
+
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 08f780a2e63..15efb02badb 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -674,6 +674,11 @@ GRANT SELECT ON pg_backend_memory_contexts TO pg_read_all_stats;
REVOKE EXECUTE ON FUNCTION pg_get_backend_memory_contexts() FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_get_backend_memory_contexts() TO pg_read_all_stats;
+REVOKE EXECUTE ON FUNCTION
+ pg_get_process_memory_contexts(integer, boolean, float) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION
+ pg_get_process_memory_contexts(integer, boolean, float) TO pg_read_all_stats;
+
-- Statistics views
CREATE VIEW pg_stat_all_tables AS
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 2513a8ef8a6..16756152b71 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -781,6 +781,10 @@ ProcessAutoVacLauncherInterrupts(void)
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+ /* Publish memory contexts of this process */
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
+
/* Process sinval catchup interrupts that happened while sleeping */
ProcessCatchupInterrupt();
}
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index fda91ffd1ce..d3cb3f1891c 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -663,6 +663,10 @@ ProcessCheckpointerInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+
+ /* Publish memory contexts of this process */
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
}
/*
diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c
index 0ae9bf906ec..f24f574e748 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -48,6 +48,10 @@ ProcessMainLoopInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+
+ /* Publish memory contexts of this process */
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
}
/*
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 7e622ae4bd2..cb7408acf4c 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -867,6 +867,10 @@ ProcessPgArchInterrupts(void)
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+ /* Publish memory contexts of this process */
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
+
if (ConfigReloadPending)
{
char *archiveLib = pstrdup(XLogArchiveLibrary);
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 27e86cf393f..7149a67fcbc 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -192,6 +192,10 @@ ProcessStartupProcInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+
+ /* Publish memory contexts of this process */
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
}
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index 0fec4f1f871..c7a76711cc5 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -879,6 +879,10 @@ ProcessWalSummarizerInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+
+ /* Publish memory contexts of this process */
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
}
/*
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 2fa045e6b0f..00c76d05356 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -51,6 +51,7 @@
#include "storage/sinvaladt.h"
#include "utils/guc.h"
#include "utils/injection_point.h"
+#include "utils/memutils.h"
/* GUCs */
int shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
@@ -150,6 +151,7 @@ CalculateShmemSize(int *num_semaphores)
size = add_size(size, InjectionPointShmemSize());
size = add_size(size, SlotSyncShmemSize());
size = add_size(size, AioShmemSize());
+ size = add_size(size, MemoryContextReportingShmemSize());
/* include additional requested shmem from preload libraries */
size = add_size(size, total_addin_request);
@@ -343,6 +345,7 @@ CreateOrAttachShmemStructs(void)
WaitEventCustomShmemInit();
InjectionPointShmemInit();
AioShmemInit();
+ MemoryContextReportingShmemInit();
}
/*
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index b7c39a4c5f0..a3c2cd12277 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -690,6 +690,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT))
HandleLogMemoryContextInterrupt();
+ if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT))
+ HandleGetMemoryContextInterrupt();
+
if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE))
HandleParallelApplyMessageInterrupt();
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 3df29658f18..dc4d96c16af 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -178,6 +178,8 @@ static const char *const BuiltinTrancheNames[] = {
[LWTRANCHE_XACT_SLRU] = "XactSLRU",
[LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
[LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
+ [LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState",
+ [LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess",
};
StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e9ef0fbfe32..f194e6b3dcc 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -50,6 +50,7 @@
#include "storage/procsignal.h"
#include "storage/spin.h"
#include "storage/standby.h"
+#include "utils/memutils.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 6ae9f38f0c8..dc4c600922d 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3535,6 +3535,9 @@ ProcessInterrupts(void)
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
+ if (PublishMemoryContextPending)
+ ProcessGetMemoryContextInterrupt();
+
if (ParallelApplyMessagePending)
ProcessParallelApplyMessages();
}
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 8bce14c38fd..23eaf559c8d 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -161,6 +161,7 @@ WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit."
WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication."
WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated."
XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at transaction end."
+MEM_CXT_PUBLISH "Waiting for a process to publish memory information."
ABI_compatibility:
diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c
index 396c2f223b4..3ede88e5036 100644
--- a/src/backend/utils/adt/mcxtfuncs.c
+++ b/src/backend/utils/adt/mcxtfuncs.c
@@ -17,28 +17,25 @@
#include "funcapi.h"
#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "access/twophase.h"
+#include "catalog/pg_authid_d.h"
#include "storage/proc.h"
#include "storage/procarray.h"
+#include "utils/acl.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "utils/wait_event_types.h"
/* ----------
* The max bytes for showing identifiers of MemoryContext.
* ----------
*/
#define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024
-
-/*
- * MemoryContextId
- * Used for storage of transient identifiers for
- * pg_get_backend_memory_contexts.
- */
-typedef struct MemoryContextId
-{
- MemoryContext context;
- int context_id;
-} MemoryContextId;
+struct MemoryStatsBackendState *memCxtState = NULL;
+struct MemoryStatsCtl *memCxtArea = NULL;
/*
* int_list_to_array
@@ -89,7 +86,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
*/
for (MemoryContext cur = context; cur != NULL; cur = cur->parent)
{
- MemoryContextId *entry;
+ MemoryStatsContextId *entry;
bool found;
entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found);
@@ -143,24 +140,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
else
nulls[1] = true;
- switch (context->type)
- {
- case T_AllocSetContext:
- type = "AllocSet";
- break;
- case T_GenerationContext:
- type = "Generation";
- break;
- case T_SlabContext:
- type = "Slab";
- break;
- case T_BumpContext:
- type = "Bump";
- break;
- default:
- type = "???";
- break;
- }
+ type = ContextTypeToString(context->type);
values[2] = CStringGetTextDatum(type);
values[3] = Int32GetDatum(list_length(path)); /* level */
@@ -175,6 +155,38 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
list_free(path);
}
+/*
+ * ContextTypeToString
+ * Returns a textual representation of a context type
+ *
+ * This should cover the same types as MemoryContextIsValid.
+ */
+const char *
+ContextTypeToString(NodeTag type)
+{
+ const char *context_type;
+
+ switch (type)
+ {
+ case T_AllocSetContext:
+ context_type = "AllocSet";
+ break;
+ case T_GenerationContext:
+ context_type = "Generation";
+ break;
+ case T_SlabContext:
+ context_type = "Slab";
+ break;
+ case T_BumpContext:
+ context_type = "Bump";
+ break;
+ default:
+ context_type = "???";
+ break;
+ }
+ return context_type;
+}
+
/*
* pg_get_backend_memory_contexts
* SQL SRF showing backend memory context.
@@ -189,7 +201,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
HTAB *context_id_lookup;
ctl.keysize = sizeof(MemoryContext);
- ctl.entrysize = sizeof(MemoryContextId);
+ ctl.entrysize = sizeof(MemoryStatsContextId);
ctl.hcxt = CurrentMemoryContext;
context_id_lookup = hash_create("pg_get_backend_memory_contexts",
@@ -216,7 +228,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
foreach_ptr(MemoryContextData, cur, contexts)
{
- MemoryContextId *entry;
+ MemoryStatsContextId *entry;
bool found;
/*
@@ -224,8 +236,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
* PutMemoryContextsStatsTupleStore needs this to populate the "path"
* column with the parent context_ids.
*/
- entry = (MemoryContextId *) hash_search(context_id_lookup, &cur,
- HASH_ENTER, &found);
+ entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
+ HASH_ENTER, &found);
entry->context_id = context_id++;
Assert(!found);
@@ -305,3 +317,349 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(true);
}
+
+/*
+ * pg_get_process_memory_contexts
+ * Signal a backend or an auxiliary process to send its memory contexts,
+ * wait for the results and display them.
+ *
+ * By default, only superusers or users with PG_READ_ALL_STATS are allowed to
+ * signal a process to return the memory contexts. This is because allowing
+ * any users to issue this request at an unbounded rate would cause lots of
+ * requests to be sent, which can lead to denial of service. Additional roles
+ * can be permitted with GRANT.
+ *
+ * On receipt of this signal, a backend or an auxiliary process sets the flag
+ * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS()
+ * or process-specific interrupt handler to copy the memory context details
+ * to a dynamic shared memory space.
+ *
+ * We have defined a limit on DSA memory that could be allocated per process -
+ * if the process has more memory contexts than what can fit in the allocated
+ * size, the excess contexts are summarized and represented as cumulative total
+ * at the end of the buffer.
+ *
+ * After sending the signal, wait on a condition variable. The publishing
+ * backend, after copying the data to shared memory, sends signal on that
+ * condition variable. There is one condition variable per publishing backend.
+ * Once the condition variable is signalled, check if the latest memory context
+ * information is available and display.
+ *
+ * If the publishing backend does not respond before the condition variable
+ * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is
+ * time left within the timeout specified by the user, before giving up and
+ * returning previously published statistics, if any. If no previous statistics
+ * exist, return NULL.
+ */
+#define MEMSTATS_WAIT_TIMEOUT 100
+Datum
+pg_get_process_memory_contexts(PG_FUNCTION_ARGS)
+{
+ int pid = PG_GETARG_INT32(0);
+ bool summary = PG_GETARG_BOOL(1);
+ double timeout = PG_GETARG_FLOAT8(2);
+ PGPROC *proc;
+ ProcNumber procNumber = INVALID_PROC_NUMBER;
+ bool proc_is_aux = false;
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ MemoryStatsEntry *memcxt_info;
+ TimestampTz start_timestamp;
+
+ /*
+ * See if the process with given pid is a backend or an auxiliary process
+ * and remember the type for when we requery the process later.
+ */
+ proc = BackendPidGetProc(pid);
+ if (proc == NULL)
+ {
+ proc = AuxiliaryPidGetProc(pid);
+ proc_is_aux = true;
+ }
+
+ /*
+ * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid
+ * isn't valid; this is however not a problem and leave with a WARNING.
+ * See comment in pg_log_backend_memory_contexts for a discussion on this.
+ */
+ if (proc == NULL)
+ {
+ /*
+ * This is just a warning so a loop-through-resultset will not abort
+ * if one backend terminated on its own during the run.
+ */
+ ereport(WARNING,
+ errmsg("PID %d is not a PostgreSQL server process", pid));
+ PG_RETURN_NULL();
+ }
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ procNumber = GetNumberFromPGProc(proc);
+
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+ memCxtState[procNumber].summary = summary;
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+
+ start_timestamp = GetCurrentTimestamp();
+
+ /*
+ * Send a signal to a PostgreSQL process, informing it we want it to
+ * produce information about its memory contexts.
+ */
+ if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0)
+ {
+ ereport(WARNING,
+ errmsg("could not send signal to process %d: %m", pid));
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Even if the proc has published statistics, the may not be due to the
+ * current request, but previously published stats. Check if the stats
+ * are updated by comparing the timestamp, if the stats are newer than our
+ * previously recorded timestamp from before sending the procsignal, they
+ * must by definition be updated. Wait for the timeout specified by the
+ * user, following which display old statistics if available or return
+ * NULL.
+ */
+ while (1)
+ {
+ long msecs;
+
+ /*
+ * We expect to come out of sleep when the requested process has
+ * finished publishing the statistics, verified using the valid DSA
+ * pointer.
+ *
+ * Make sure that the information belongs to pid we requested
+ * information for, Otherwise loop back and wait for the server
+ * process to finish publishing statistics.
+ */
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+
+ /*
+ * Note in procnumber.h file says that a procNumber can be re-used for
+ * a different backend immediately after a backend exits. In case an
+ * old process' data was there and not updated by the current process
+ * in the slot identified by the procNumber, the pid of the requested
+ * process and the proc_id might not match.
+ */
+ if (memCxtState[procNumber].proc_id == pid)
+ {
+ /*
+ * Break if the latest stats have been read, indicated by
+ * statistics timestamp being newer than the current request
+ * timestamp.
+ */
+ msecs = TimestampDifferenceMilliseconds(start_timestamp,
+ memCxtState[procNumber].stats_timestamp);
+
+ if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)
+ && msecs > 0)
+ break;
+ }
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+
+ /*
+ * Recheck the state of the backend before sleeping on the condition
+ * variable to ensure the process is still alive. Only check the
+ * relevant process type based on the earlier PID check.
+ */
+ if (proc_is_aux)
+ proc = AuxiliaryPidGetProc(pid);
+ else
+ proc = BackendPidGetProc(pid);
+
+ /*
+ * The process ending during memory context processing is not an
+ * error.
+ */
+ if (proc == NULL)
+ {
+ ereport(WARNING,
+ errmsg("PID %d is no longer a PostgreSQL server process",
+ pid));
+ PG_RETURN_NULL();
+ }
+
+ msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp());
+
+ /*
+ * If we haven't already exceeded the timeout value, sleep for the
+ * remainder of the timeout on the condition variable.
+ */
+ if (msecs > 0 && msecs < (timeout * 1000))
+ {
+ /*
+ * Wait for the timeout as defined by the user. If no updated
+ * statistics are available within the allowed time then display
+ * previously published statistics if there are any. If no
+ * previous statistics are available then return NULL. The timer
+ * is defined in milliseconds since thats what the condition
+ * variable sleep uses.
+ */
+ if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv,
+ ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH))
+ {
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+ /* Displaying previously published statistics if available */
+ if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
+ break;
+ else
+ {
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+ PG_RETURN_NULL();
+ }
+ }
+ }
+ else
+ {
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+ /* Displaying previously published statistics if available */
+ if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
+ break;
+ else
+ {
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+ PG_RETURN_NULL();
+ }
+ }
+ }
+
+ /*
+ * We should only reach here with a valid DSA handle, either containing
+ * updated statistics or previously published statistics (identified by
+ * the timestamp.
+ */
+ Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID);
+ /* Attach to the dsa area if we have not already done so */
+ if (area == NULL)
+ {
+ MemoryContext oldcontext = CurrentMemoryContext;
+
+ MemoryContextSwitchTo(TopMemoryContext);
+ area = dsa_attach(memCxtArea->memstats_dsa_handle);
+ MemoryContextSwitchTo(oldcontext);
+ dsa_pin_mapping(area);
+ }
+
+ /*
+ * Backend has finished publishing the stats, project them.
+ */
+ memcxt_info = (MemoryStatsEntry *)
+ dsa_get_address(area, memCxtState[procNumber].memstats_dsa_pointer);
+
+#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12
+ for (int i = 0; i < memCxtState[procNumber].total_stats; i++)
+ {
+ ArrayType *path_array;
+ int path_length;
+ Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
+ bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
+ char *name;
+ char *ident;
+ Datum *path_datum = NULL;
+ int *path_int = NULL;
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ if (DsaPointerIsValid(memcxt_info[i].name))
+ {
+ name = (char *) dsa_get_address(area, memcxt_info[i].name);
+ values[0] = CStringGetTextDatum(name);
+ }
+ else
+ nulls[0] = true;
+
+ if (DsaPointerIsValid(memcxt_info[i].ident))
+ {
+ ident = (char *) dsa_get_address(area, memcxt_info[i].ident);
+ values[1] = CStringGetTextDatum(ident);
+ }
+ else
+ nulls[1] = true;
+
+ values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type));
+
+ path_length = memcxt_info[i].path_length;
+ path_datum = (Datum *) palloc(path_length * sizeof(Datum));
+ if (DsaPointerIsValid(memcxt_info[i].path))
+ {
+ path_int = (int *) dsa_get_address(area, memcxt_info[i].path);
+ for (int j = 0; j < path_length; j++)
+ path_datum[j] = Int32GetDatum(path_int[j]);
+ path_array = construct_array_builtin(path_datum, path_length, INT4OID);
+ values[3] = PointerGetDatum(path_array);
+ }
+ else
+ nulls[3] = true;
+
+ values[4] = Int32GetDatum(memcxt_info[i].levels);
+ values[5] = Int64GetDatum(memcxt_info[i].totalspace);
+ values[6] = Int64GetDatum(memcxt_info[i].nblocks);
+ values[7] = Int64GetDatum(memcxt_info[i].freespace);
+ values[8] = Int64GetDatum(memcxt_info[i].freechunks);
+ values[9] = Int64GetDatum(memcxt_info[i].totalspace -
+ memcxt_info[i].freespace);
+ values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats);
+ values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+
+ ConditionVariableCancelSleep();
+
+ PG_RETURN_NULL();
+}
+
+Size
+MemoryContextReportingShmemSize(void)
+{
+ Size sz = 0;
+ Size TotalProcs = 0;
+
+ TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS);
+ TotalProcs = add_size(TotalProcs, MaxBackends);
+ sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState)));
+
+ sz = add_size(sz, sizeof(MemoryStatsCtl));
+
+ return sz;
+}
+
+/*
+ * Initialize shared memory for displaying memory context statistics
+ */
+void
+MemoryContextReportingShmemInit(void)
+{
+ bool found;
+
+ memCxtArea = (MemoryStatsCtl *)
+ ShmemInitStruct("MemoryStatsCtl",
+ sizeof(MemoryStatsCtl), &found);
+
+ if (!found)
+ {
+ LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE);
+ memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID;
+ }
+
+ memCxtState = (MemoryStatsBackendState *)
+ ShmemInitStruct("MemoryStatsBackendState",
+ ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)),
+ &found);
+
+ if (found)
+ return;
+
+ for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++)
+ {
+ ConditionVariableInit(&memCxtState[i].memcxt_cv);
+ LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC);
+ memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer;
+ }
+}
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 2152aad97d9..92304a1f124 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -39,6 +39,7 @@ volatile sig_atomic_t TransactionTimeoutPending = false;
volatile sig_atomic_t IdleSessionTimeoutPending = false;
volatile sig_atomic_t ProcSignalBarrierPending = false;
volatile sig_atomic_t LogMemoryContextPending = false;
+volatile sig_atomic_t PublishMemoryContextPending = false;
volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index c09c4d404ba..01309ef3f86 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -667,6 +667,13 @@ BaseInit(void)
* drop ephemeral slots, which in turn triggers stats reporting.
*/
ReplicationSlotInitialize();
+
+ /*
+ * The before shmem exit callback frees the DSA memory occupied by the
+ * latest memory context statistics that could be published by this proc
+ * if requested.
+ */
+ before_shmem_exit(AtProcExit_memstats_cleanup, 0);
}
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index d98ae9db6be..cf4e22bf1cc 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -23,6 +23,11 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "storage/lwlock.h"
+#include "storage/ipc.h"
+#include "utils/dsa.h"
+#include "utils/hsearch.h"
#include "utils/memdebug.h"
#include "utils/memutils.h"
#include "utils/memutils_internal.h"
@@ -135,6 +140,17 @@ static const MemoryContextMethods mcxt_methods[] = {
};
#undef BOGUS_MCTX
+/*
+ * This is passed to MemoryContextStatsInternal to determine whether
+ * to print context statistics or not and where to print them logs or
+ * stderr.
+ */
+typedef enum PrintDestination
+{
+ PRINT_STATS_TO_STDERR = 0,
+ PRINT_STATS_TO_LOGS,
+ PRINT_STATS_NONE
+} PrintDestination;
/*
* CurrentMemoryContext
@@ -156,16 +172,31 @@ MemoryContext CurTransactionContext = NULL;
/* This is a transient link to the active portal's memory context: */
MemoryContext PortalContext = NULL;
+dsa_area *area = NULL;
static void MemoryContextDeleteOnly(MemoryContext context);
static void MemoryContextCallResetCallbacks(MemoryContext context);
static void MemoryContextStatsInternal(MemoryContext context, int level,
int max_level, int max_children,
MemoryContextCounters *totals,
- bool print_to_stderr);
+ PrintDestination print_location,
+ int *num_contexts);
static void MemoryContextStatsPrint(MemoryContext context, void *passthru,
const char *stats_string,
bool print_to_stderr);
+static void PublishMemoryContext(MemoryStatsEntry *memcxt_infos,
+ int curr_id, MemoryContext context,
+ List *path,
+ MemoryContextCounters stat,
+ int num_contexts, dsa_area *area,
+ int max_levels);
+static void compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup,
+ int *stats_count,
+ bool summary);
+static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup);
+static void free_memorycontextstate_dsa(dsa_area *area, int total_stats,
+ dsa_pointer prev_dsa_pointer);
+static void end_memorycontext_reporting(void);
/*
* You should not do memory allocations within a critical section, because
@@ -831,11 +862,19 @@ MemoryContextStatsDetail(MemoryContext context,
bool print_to_stderr)
{
MemoryContextCounters grand_totals;
+ int num_contexts;
+ PrintDestination print_location;
memset(&grand_totals, 0, sizeof(grand_totals));
+ if (print_to_stderr)
+ print_location = PRINT_STATS_TO_STDERR;
+ else
+ print_location = PRINT_STATS_TO_LOGS;
+
+ /* num_contexts report number of contexts aggregated in the output */
MemoryContextStatsInternal(context, 0, max_level, max_children,
- &grand_totals, print_to_stderr);
+ &grand_totals, print_location, &num_contexts);
if (print_to_stderr)
fprintf(stderr,
@@ -870,13 +909,14 @@ MemoryContextStatsDetail(MemoryContext context,
* One recursion level for MemoryContextStats
*
* Print stats for this context if possible, but in any case accumulate counts
- * into *totals (if not NULL).
+ * into *totals (if not NULL). The callers should make sure that print_location
+ * is set to PRINT_STATS_STDERR or PRINT_STATS_TO_LOGS or PRINT_STATS_NONE.
*/
static void
MemoryContextStatsInternal(MemoryContext context, int level,
int max_level, int max_children,
MemoryContextCounters *totals,
- bool print_to_stderr)
+ PrintDestination print_location, int *num_contexts)
{
MemoryContext child;
int ichild;
@@ -884,10 +924,39 @@ MemoryContextStatsInternal(MemoryContext context, int level,
Assert(MemoryContextIsValid(context));
/* Examine the context itself */
- context->methods->stats(context,
- MemoryContextStatsPrint,
- &level,
- totals, print_to_stderr);
+ switch (print_location)
+ {
+ case PRINT_STATS_TO_STDERR:
+ context->methods->stats(context,
+ MemoryContextStatsPrint,
+ &level,
+ totals, true);
+ break;
+
+ case PRINT_STATS_TO_LOGS:
+ context->methods->stats(context,
+ MemoryContextStatsPrint,
+ &level,
+ totals, false);
+ break;
+
+ case PRINT_STATS_NONE:
+
+ /*
+ * Do not print the statistics if print_location is
+ * PRINT_STATS_NONE, only compute totals. This is used in
+ * reporting of memory context statistics via a sql function. Last
+ * parameter is not relevant.
+ */
+ context->methods->stats(context,
+ NULL,
+ NULL,
+ totals, false);
+ break;
+ }
+
+ /* Increment the context count for each of the recursive call */
+ *num_contexts = *num_contexts + 1;
/*
* Examine children.
@@ -907,7 +976,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
MemoryContextStatsInternal(child, level + 1,
max_level, max_children,
totals,
- print_to_stderr);
+ print_location, num_contexts);
}
}
@@ -926,7 +995,13 @@ MemoryContextStatsInternal(MemoryContext context, int level,
child = MemoryContextTraverseNext(child, context);
}
- if (print_to_stderr)
+ /*
+ * Add the count of children contexts which are traversed in the
+ * non-recursive manner.
+ */
+ *num_contexts = *num_contexts + ichild;
+
+ if (print_location == PRINT_STATS_TO_STDERR)
{
for (int i = 0; i <= level; i++)
fprintf(stderr, " ");
@@ -939,7 +1014,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
local_totals.freechunks,
local_totals.totalspace - local_totals.freespace);
}
- else
+ else if (print_location == PRINT_STATS_TO_LOGS)
ereport(LOG_SERVER_ONLY,
(errhidestmt(true),
errhidecontext(true),
@@ -1276,6 +1351,22 @@ HandleLogMemoryContextInterrupt(void)
/* latch will be set by procsignal_sigusr1_handler */
}
+/*
+ * HandleGetMemoryContextInterrupt
+ * Handle receipt of an interrupt indicating a request to publish memory
+ * contexts statistics.
+ *
+ * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as
+ * this cannot be performed in a signal handler.
+ */
+void
+HandleGetMemoryContextInterrupt(void)
+{
+ InterruptPending = true;
+ PublishMemoryContextPending = true;
+ /* latch will be set by procsignal_sigusr1_handler */
+}
+
/*
* ProcessLogMemoryContextInterrupt
* Perform logging of memory contexts of this backend process.
@@ -1313,6 +1404,538 @@ ProcessLogMemoryContextInterrupt(void)
MemoryContextStatsDetail(TopMemoryContext, 100, 100, false);
}
+/*
+ * ProcessGetMemoryContextInterrupt
+ * Generate information about memory contexts used by the process.
+ *
+ * Performs a breadth first search on the memory context tree, thus parents
+ * statistics are reported before their children in the monitoring function
+ * output.
+ *
+ * Statistics for all the processes are shared via the same dynamic shared
+ * area. Statistics written by each process are tracked independently in
+ * per-process DSA pointers. These pointers are stored in static shared memory.
+ *
+ * We calculate maximum number of context's statistics that can be displayed
+ * using a pre-determined limit for memory available per process for this
+ * utility maximum size of statistics for each context. The remaining context
+ * statistics if any are captured as a cumulative total at the end of
+ * individual context's statistics.
+ *
+ * If summary is true, we capture the level 1 and level 2 contexts
+ * statistics. For that we traverse the memory context tree recursively in
+ * depth first search manner to cover all the children of a parent context, to
+ * be able to display a cumulative total of memory consumption by a parent at
+ * level 2 and all its children.
+ */
+void
+ProcessGetMemoryContextInterrupt(void)
+{
+ List *contexts;
+ HASHCTL ctl;
+ HTAB *context_id_lookup;
+ int context_id = 0;
+ MemoryStatsEntry *meminfo;
+ bool summary = false;
+ int max_stats;
+ int idx = MyProcNumber;
+ int stats_count = 0;
+ int stats_num = 0;
+ MemoryContextCounters stat;
+ int num_individual_stats = 0;
+
+ PublishMemoryContextPending = false;
+
+ /*
+ * The hash table is used for constructing "path" column of the view,
+ * similar to its local backend counterpart.
+ */
+ ctl.keysize = sizeof(MemoryContext);
+ ctl.entrysize = sizeof(MemoryStatsContextId);
+ ctl.hcxt = CurrentMemoryContext;
+
+ context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts",
+ 256,
+ &ctl,
+ HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+ /* List of contexts to process in the next round - start at the top. */
+ contexts = list_make1(TopMemoryContext);
+
+ /* Compute the number of stats that can fit in the defined limit */
+ max_stats =
+ MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE;
+ LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
+ summary = memCxtState[idx].summary;
+ LWLockRelease(&memCxtState[idx].lw_lock);
+
+ /*
+ * Traverse the memory context tree to find total number of contexts. If
+ * summary is requested report the total number of contexts at level 1 and
+ * 2 from the top. Also, populate the hash table of context ids.
+ */
+ compute_contexts_count_and_ids(contexts, context_id_lookup, &stats_count,
+ summary);
+
+ /*
+ * Allocate memory in this process's DSA for storing statistics of the the
+ * memory contexts upto max_stats, for contexts that don't fit within a
+ * limit, a cumulative total is written as the last record in the DSA
+ * segment.
+ */
+ stats_num = Min(stats_count, max_stats);
+
+ LWLockAcquire(&memCxtArea->lw_lock, LW_EXCLUSIVE);
+
+ /*
+ * Create a DSA and send handle to the the client process after storing
+ * the context statistics. If number of contexts exceed a predefined
+ * limit(8MB), a cumulative total is stored for such contexts.
+ */
+ if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID)
+ {
+ MemoryContext oldcontext = CurrentMemoryContext;
+ dsa_handle handle;
+
+ MemoryContextSwitchTo(TopMemoryContext);
+
+ area = dsa_create(memCxtArea->lw_lock.tranche);
+
+ handle = dsa_get_handle(area);
+ MemoryContextSwitchTo(oldcontext);
+
+ dsa_pin_mapping(area);
+
+ /*
+ * Pin the DSA area, this is to make sure the area remains attachable
+ * even if current backend exits. This is done so that the statistics
+ * are published even if the process exits while a client is waiting.
+ */
+ dsa_pin(area);
+
+ /* Set the handle in shared memory */
+ memCxtArea->memstats_dsa_handle = handle;
+ }
+
+ /*
+ * If DSA exists, created by another process publishing statistics, attach
+ * to it.
+ */
+ else if (area == NULL)
+ {
+ MemoryContext oldcontext = CurrentMemoryContext;
+
+ MemoryContextSwitchTo(TopMemoryContext);
+ area = dsa_attach(memCxtArea->memstats_dsa_handle);
+ MemoryContextSwitchTo(oldcontext);
+ dsa_pin_mapping(area);
+ }
+ LWLockRelease(&memCxtArea->lw_lock);
+
+ /*
+ * Hold the process lock to protect writes to process specific memory. Two
+ * processes publishing statistics do not block each other.
+ */
+ LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
+ memCxtState[idx].proc_id = MyProcPid;
+
+ if (DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer))
+ {
+ /*
+ * Free any previous allocations, free the name, ident and path
+ * pointers before freeing the pointer that contains them.
+ */
+ free_memorycontextstate_dsa(area, memCxtState[idx].total_stats,
+ memCxtState[idx].memstats_dsa_pointer);
+ }
+
+ /*
+ * Assigning total stats before allocating memory so that memory cleanup
+ * can run if any subsequent dsa_allocate call to allocate name/ident/path
+ * fails.
+ */
+ memCxtState[idx].total_stats = stats_num;
+ memCxtState[idx].memstats_dsa_pointer =
+ dsa_allocate0(area, stats_num * sizeof(MemoryStatsEntry));
+
+ meminfo = (MemoryStatsEntry *)
+ dsa_get_address(area, memCxtState[idx].memstats_dsa_pointer);
+
+ if (summary)
+ {
+ int cxt_id = 0;
+ List *path = NIL;
+
+ /* Copy TopMemoryContext statistics to DSA */
+ memset(&stat, 0, sizeof(stat));
+ (*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL,
+ &stat, true);
+ path = lcons_int(1, path);
+ PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat,
+ 1, area, 100);
+ cxt_id = cxt_id + 1;
+
+ /*
+ * Copy statistics for each of TopMemoryContexts children. This
+ * includes statistics of at most 100 children per node, with each
+ * child node limited to a depth of 100 in its subtree.
+ */
+ for (MemoryContext c = TopMemoryContext->firstchild; c != NULL;
+ c = c->nextchild)
+ {
+ MemoryContextCounters grand_totals;
+ int num_contexts = 0;
+ int level = 0;
+
+ path = NIL;
+ memset(&grand_totals, 0, sizeof(grand_totals));
+
+ MemoryContextStatsInternal(c, level, 100, 100, &grand_totals,
+ PRINT_STATS_NONE, &num_contexts);
+
+ path = compute_context_path(c, context_id_lookup);
+
+ /*
+ * Register the stats entry first, that way the cleanup handler
+ * can reach it in case of allocation failures of one or more
+ * members.
+ */
+ memCxtState[idx].total_stats = cxt_id++;
+ PublishMemoryContext(meminfo, cxt_id, c, path,
+ grand_totals, num_contexts, area, 100);
+ }
+ memCxtState[idx].total_stats = cxt_id;
+
+ end_memorycontext_reporting();
+
+ /* Notify waiting backends and return */
+ hash_destroy(context_id_lookup);
+
+ return;
+ }
+
+ foreach_ptr(MemoryContextData, cur, contexts)
+ {
+ List *path = NIL;
+
+ /*
+ * Figure out the transient context_id of this context and each of its
+ * ancestors, to compute a path for this context.
+ */
+ path = compute_context_path(cur, context_id_lookup);
+
+ /* Examine the context stats */
+ memset(&stat, 0, sizeof(stat));
+ (*cur->methods->stats) (cur, NULL, NULL, &stat, true);
+
+ /* Account for saving one statistics slot for cumulative reporting */
+ if (context_id < (max_stats - 1) || stats_count <= max_stats)
+ {
+ /* Copy statistics to DSA memory */
+ PublishMemoryContext(meminfo, context_id, cur, path, stat, 1, area, 100);
+ }
+ else
+ {
+ meminfo[max_stats - 1].totalspace += stat.totalspace;
+ meminfo[max_stats - 1].nblocks += stat.nblocks;
+ meminfo[max_stats - 1].freespace += stat.freespace;
+ meminfo[max_stats - 1].freechunks += stat.freechunks;
+ }
+
+ /*
+ * DSA max limit per process is reached, write aggregate of the
+ * remaining statistics.
+ *
+ * We can store contexts from 0 to max_stats - 1. When stats_count is
+ * greater than max_stats, we stop reporting individual statistics
+ * when context_id equals max_stats - 2. As we use max_stats - 1 array
+ * slot for reporting cumulative statistics or "Remaining Totals".
+ */
+ if (stats_count > max_stats && context_id == (max_stats - 2))
+ {
+ char *nameptr;
+ int namelen = strlen("Remaining Totals");
+
+ num_individual_stats = context_id + 1;
+ meminfo[max_stats - 1].name = dsa_allocate(area, namelen + 1);
+ nameptr = dsa_get_address(area, meminfo[max_stats - 1].name);
+ strncpy(nameptr, "Remaining Totals", namelen);
+ meminfo[max_stats - 1].ident = InvalidDsaPointer;
+ meminfo[max_stats - 1].path = InvalidDsaPointer;
+ meminfo[max_stats - 1].type = 0;
+ }
+ context_id++;
+ }
+
+ /*
+ * Statistics are not aggregated, i.e individual statistics reported when
+ * stats_count <= max_stats.
+ */
+ if (stats_count <= max_stats)
+ {
+ memCxtState[idx].total_stats = context_id;
+ }
+ /* Report number of aggregated memory contexts */
+ else
+ {
+ meminfo[max_stats - 1].num_agg_stats = context_id -
+ num_individual_stats;
+
+ /*
+ * Total stats equals num_individual_stats + 1 record for cumulative
+ * statistics.
+ */
+ memCxtState[idx].total_stats = num_individual_stats + 1;
+ }
+
+ /* Notify waiting backends and return */
+ end_memorycontext_reporting();
+
+ hash_destroy(context_id_lookup);
+}
+
+/*
+ * Update timestamp and signal all the waiting client backends after copying
+ * all the statistics.
+ */
+static void
+end_memorycontext_reporting(void)
+{
+ memCxtState[MyProcNumber].stats_timestamp = GetCurrentTimestamp();
+ LWLockRelease(&memCxtState[MyProcNumber].lw_lock);
+ ConditionVariableBroadcast(&memCxtState[MyProcNumber].memcxt_cv);
+}
+
+/*
+ * compute_context_path
+ *
+ * Append the transient context_id of this context and each of its ancestors
+ * to a list, in order to compute a path.
+ */
+static List *
+compute_context_path(MemoryContext c, HTAB *context_id_lookup)
+{
+ bool found;
+ List *path = NIL;
+ MemoryContext cur_context;
+
+ for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent)
+ {
+ MemoryStatsContextId *cur_entry;
+
+ cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found);
+
+ if (!found)
+ elog(ERROR, "hash table corrupted, can't construct path value");
+
+ path = lcons_int(cur_entry->context_id, path);
+ }
+
+ return path;
+}
+
+/*
+ * Return the number of contexts allocated currently by the backend
+ * Assign context ids to each of the contexts.
+ */
+static void
+compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup,
+ int *stats_count, bool summary)
+{
+ foreach_ptr(MemoryContextData, cur, contexts)
+ {
+ MemoryStatsContextId *entry;
+ bool found;
+
+ entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
+ HASH_ENTER, &found);
+ Assert(!found);
+
+ /*
+ * context id starts with 1 so increment the stats_count before
+ * assigning.
+ */
+ entry->context_id = ++(*stats_count);
+
+ /* Append the children of the current context to the main list. */
+ for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild)
+ {
+ if (summary)
+ {
+ entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c,
+ HASH_ENTER, &found);
+ Assert(!found);
+
+ entry->context_id = ++(*stats_count);
+ }
+
+ contexts = lappend(contexts, c);
+ }
+
+ /*
+ * In summary mode only the first two level (from top) contexts are
+ * displayed.
+ */
+ if (summary)
+ break;
+ }
+}
+
+/*
+ * PublishMemoryContext
+ *
+ * Copy the memory context statistics of a single context to a DSA memory
+ */
+static void
+PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id,
+ MemoryContext context, List *path,
+ MemoryContextCounters stat, int num_contexts,
+ dsa_area *area, int max_levels)
+{
+ const char *ident = context->ident;
+ const char *name = context->name;
+ int *path_list;
+
+ /*
+ * To be consistent with logging output, we label dynahash contexts with
+ * just the hash table name as with MemoryContextStatsPrint().
+ */
+ if (context->ident && strncmp(context->name, "dynahash", 8) == 0)
+ {
+ name = context->ident;
+ ident = NULL;
+ }
+
+ if (name != NULL)
+ {
+ int namelen = strlen(name);
+ char *nameptr;
+
+ if (strlen(name) >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE)
+ namelen = pg_mbcliplen(name, namelen,
+ MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1);
+
+ memcxt_info[curr_id].name = dsa_allocate(area, namelen + 1);
+ nameptr = (char *) dsa_get_address(area, memcxt_info[curr_id].name);
+ strlcpy(nameptr, name, namelen + 1);
+ }
+ else
+ memcxt_info[curr_id].name = InvalidDsaPointer;
+
+ /* Trim and copy the identifier if it is not set to NULL */
+ if (ident != NULL)
+ {
+ int idlen = strlen(context->ident);
+ char *identptr;
+
+ /*
+ * Some identifiers such as SQL query string can be very long,
+ * truncate oversize identifiers.
+ */
+ if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE)
+ idlen = pg_mbcliplen(ident, idlen,
+ MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1);
+
+ memcxt_info[curr_id].ident = dsa_allocate(area, idlen + 1);
+ identptr = (char *) dsa_get_address(area, memcxt_info[curr_id].ident);
+ strlcpy(identptr, ident, idlen + 1);
+ }
+ else
+ memcxt_info[curr_id].ident = InvalidDsaPointer;
+
+ /* Allocate DSA memory for storing path information */
+ if (path == NIL)
+ memcxt_info[curr_id].path = InvalidDsaPointer;
+ else
+ {
+ int levels = Min(list_length(path), max_levels);
+
+ memcxt_info[curr_id].path_length = levels;
+ memcxt_info[curr_id].path = dsa_allocate0(area, levels * sizeof(int));
+ memcxt_info[curr_id].levels = list_length(path);
+ path_list = (int *) dsa_get_address(area, memcxt_info[curr_id].path);
+
+ foreach_int(i, path)
+ {
+ path_list[foreach_current_index(i)] = i;
+ if (--levels == 0)
+ break;
+ }
+ }
+ memcxt_info[curr_id].type = context->type;
+ memcxt_info[curr_id].totalspace = stat.totalspace;
+ memcxt_info[curr_id].nblocks = stat.nblocks;
+ memcxt_info[curr_id].freespace = stat.freespace;
+ memcxt_info[curr_id].freechunks = stat.freechunks;
+ memcxt_info[curr_id].num_agg_stats = num_contexts;
+}
+
+/*
+ * free_memorycontextstate_dsa
+ *
+ * Worker for freeing resources from a MemoryStatsEntry. Callers are
+ * responsible for ensuring that the DSA pointer is valid.
+ */
+static void
+free_memorycontextstate_dsa(dsa_area *area, int total_stats,
+ dsa_pointer prev_dsa_pointer)
+{
+ MemoryStatsEntry *meminfo;
+
+ meminfo = (MemoryStatsEntry *) dsa_get_address(area, prev_dsa_pointer);
+ Assert(meminfo != NULL);
+ for (int i = 0; i < total_stats; i++)
+ {
+ if (DsaPointerIsValid(meminfo[i].name))
+ dsa_free(area, meminfo[i].name);
+
+ if (DsaPointerIsValid(meminfo[i].ident))
+ dsa_free(area, meminfo[i].ident);
+
+ if (DsaPointerIsValid(meminfo[i].path))
+ dsa_free(area, meminfo[i].path);
+ }
+
+ dsa_free(area, memCxtState[MyProcNumber].memstats_dsa_pointer);
+ memCxtState[MyProcNumber].memstats_dsa_pointer = InvalidDsaPointer;
+}
+
+/*
+ * Free the memory context statistics stored by this process
+ * in DSA area.
+ */
+void
+AtProcExit_memstats_cleanup(int code, Datum arg)
+{
+ int idx = MyProcNumber;
+
+ if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID)
+ return;
+
+ LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
+
+ if (!DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer))
+ {
+ LWLockRelease(&memCxtState[idx].lw_lock);
+ return;
+ }
+
+ /* If the dsa mapping could not be found, attach to the area */
+ if (area == NULL)
+ area = dsa_attach(memCxtArea->memstats_dsa_handle);
+
+ /*
+ * Free the memory context statistics, free the name, ident and path
+ * pointers before freeing the pointer that contains these pointers and
+ * integer statistics.
+ */
+ free_memorycontextstate_dsa(area, memCxtState[idx].total_stats,
+ memCxtState[idx].memstats_dsa_pointer);
+
+ dsa_detach(area);
+ LWLockRelease(&memCxtState[idx].lw_lock);
+}
+
void *
palloc(Size size)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 37a484147a8..4708f55be18 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8571,6 +8571,16 @@
prorettype => 'bool', proargtypes => 'int4',
prosrc => 'pg_log_backend_memory_contexts' },
+# publishing memory contexts of the specified postgres process
+{ oid => '2173', descr => 'publish memory contexts of the specified backend',
+ proname => 'pg_get_process_memory_contexts', provolatile => 'v',
+ prorows => '100', proretset => 't', proparallel => 'r',
+ prorettype => 'record', proargtypes => 'int4 bool float8',
+ proallargtypes => '{int4,bool,float8,text,text,text,_int4,int4,int8,int8,int8,int8,int8,int4,timestamptz}',
+ proargmodes => '{i,i,i,o,o,o,o,o,o,o,o,o,o,o,o}',
+ proargnames => '{pid, summary, retries, name, ident, type, path, level, total_bytes, total_nblocks, free_bytes, free_chunks, used_bytes, num_agg_contexts, stats_timestamp}',
+ prosrc => 'pg_get_process_memory_contexts' },
+
# non-persistent series generator
{ oid => '1066', descr => 'non-persistent series generator',
proname => 'generate_series', prorows => '1000',
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0d8528b2875..58b2496a9cb 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -96,6 +96,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending;
+extern PGDLLIMPORT volatile sig_atomic_t PublishMemoryContextPending;
extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 4df1d25c045..d333f338ebb 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -219,6 +219,8 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_XACT_SLRU,
LWTRANCHE_PARALLEL_VACUUM_DSA,
LWTRANCHE_AIO_URING_COMPLETION,
+ LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE,
+ LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC,
LWTRANCHE_FIRST_USER_DEFINED,
} BuiltinTrancheIds;
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 016dfd9b3f6..cfe14631445 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -35,6 +35,7 @@ typedef enum
PROCSIG_WALSND_INIT_STOPPING, /* ask walsenders to prepare for shutdown */
PROCSIG_BARRIER, /* global barrier interrupt */
PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */
+ PROCSIG_GET_MEMORY_CONTEXT, /* ask backend to send the memory contexts */
PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */
/* Recovery conflict reasons */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 8abc26abce2..d328270fafc 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -18,6 +18,9 @@
#define MEMUTILS_H
#include "nodes/memnodes.h"
+#include "storage/condition_variable.h"
+#include "storage/lmgr.h"
+#include "utils/dsa.h"
/*
@@ -48,6 +51,23 @@
#define AllocHugeSizeIsValid(size) ((Size) (size) <= MaxAllocHugeSize)
+/*
+ * Memory Context reporting size limits.
+ */
+
+/* Max length of context name and ident */
+#define MEMORY_CONTEXT_IDENT_SHMEM_SIZE 64
+/* Maximum size (in bytes) of DSA area per process */
+#define MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND ((size_t) (1 * 1024 * 1024))
+
+/*
+ * Maximum size per context. Actual size may be lower as this assumes the worst
+ * case of deepest path and longest identifiers (name and ident, thus the
+ * multiplication by 2). The path depth is limited to 100 like for memory
+ * context logging.
+ */
+#define MAX_MEMORY_CONTEXT_STATS_SIZE (sizeof(MemoryStatsEntry) + \
+ (100 * sizeof(int)) + (2 * MEMORY_CONTEXT_IDENT_SHMEM_SIZE))
/*
* Standard top-level memory contexts.
@@ -319,4 +339,66 @@ pg_memory_is_all_zeros(const void *ptr, size_t len)
return true;
}
+/* Dynamic shared memory state for statistics per context */
+typedef struct MemoryStatsEntry
+{
+ dsa_pointer name;
+ dsa_pointer ident;
+ dsa_pointer path;
+ NodeTag type;
+ int path_length;
+ int levels;
+ int64 totalspace;
+ int64 nblocks;
+ int64 freespace;
+ int64 freechunks;
+ int num_agg_stats;
+} MemoryStatsEntry;
+
+/*
+ * Static shared memory state representing the DSA area created for memory
+ * context statistics reporting. A single DSA area is created and used by all
+ * the processes, each having its specific DSA allocations for sharing memory
+ * statistics, tracked by per backend static shared memory state.
+ */
+typedef struct MemoryStatsCtl
+{
+ dsa_handle memstats_dsa_handle;
+ LWLock lw_lock;
+} MemoryStatsCtl;
+
+/*
+ * Per backend static shared memory state for memory context statistics
+ * reporting.
+ */
+typedef struct MemoryStatsBackendState
+{
+ ConditionVariable memcxt_cv;
+ LWLock lw_lock;
+ int proc_id;
+ int total_stats;
+ bool summary;
+ dsa_pointer memstats_dsa_pointer;
+ TimestampTz stats_timestamp;
+} MemoryStatsBackendState;
+
+
+/*
+ * Used for storage of transient identifiers for pg_get_backend_memory_contexts
+ */
+typedef struct MemoryStatsContextId
+{
+ MemoryContext context;
+ int context_id;
+} MemoryStatsContextId;
+
+extern PGDLLIMPORT MemoryStatsBackendState *memCxtState;
+extern PGDLLIMPORT MemoryStatsCtl *memCxtArea;
+extern void ProcessGetMemoryContextInterrupt(void);
+extern const char *ContextTypeToString(NodeTag type);
+extern void HandleGetMemoryContextInterrupt(void);
+extern Size MemoryContextReportingShmemSize(void);
+extern void MemoryContextReportingShmemInit(void);
+extern void AtProcExit_memstats_cleanup(int code, Datum arg);
+extern dsa_area *area;
#endif /* MEMUTILS_H */
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 83228cfca29..ae17d028ed3 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -232,3 +232,22 @@ select * from pg_timezone_abbrevs where abbrev = 'LMT';
LMT | @ 7 hours 52 mins 58 secs ago | f
(1 row)
+DO $$
+DECLARE
+ bg_writer_pid int;
+ r RECORD;
+BEGIN
+ SELECT pid from pg_stat_activity where backend_type='background writer'
+ INTO bg_writer_pid;
+
+ select type, name, ident
+ from pg_get_process_memory_contexts(bg_writer_pid, false, 20)
+ where path = '{1}' into r;
+ RAISE NOTICE '%', r;
+ select type, name, ident
+ from pg_get_process_memory_contexts(pg_backend_pid(), false, 20)
+ where path = '{1}' into r;
+ RAISE NOTICE '%', r;
+END $$;
+NOTICE: (AllocSet,TopMemoryContext,)
+NOTICE: (AllocSet,TopMemoryContext,)
diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql
index 66179f026b3..d0917b6868e 100644
--- a/src/test/regress/sql/sysviews.sql
+++ b/src/test/regress/sql/sysviews.sql
@@ -101,3 +101,21 @@ select count(distinct utc_offset) >= 24 as ok from pg_timezone_abbrevs;
-- One specific case we can check without much fear of breakage
-- is the historical local-mean-time value used for America/Los_Angeles.
select * from pg_timezone_abbrevs where abbrev = 'LMT';
+
+DO $$
+DECLARE
+ bg_writer_pid int;
+ r RECORD;
+BEGIN
+ SELECT pid from pg_stat_activity where backend_type='background writer'
+ INTO bg_writer_pid;
+
+ select type, name, ident
+ from pg_get_process_memory_contexts(bg_writer_pid, false, 20)
+ where path = '{1}' into r;
+ RAISE NOTICE '%', r;
+ select type, name, ident
+ from pg_get_process_memory_contexts(pg_backend_pid(), false, 20)
+ where path = '{1}' into r;
+ RAISE NOTICE '%', r;
+END $$;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 87e6da8d25e..780e4c4fc07 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1671,6 +1671,10 @@ MemoryContextCounters
MemoryContextData
MemoryContextMethodID
MemoryContextMethods
+MemoryStatsBackendState
+MemoryStatsContextId
+MemoryStatsCtl
+MemoryStatsEntry
MemoryStatsPrintFunc
MergeAction
MergeActionState