diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9ab070adffb..1c5cfee25d1 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -28663,6 +28663,144 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres} + + + + pg_get_process_memory_contexts + + pg_get_process_memory_contexts ( pid integer, summary boolean, timeout float ) + setof record + ( name text, + ident text, + type text, + path integer[], + level integer, + total_bytes bigint, + total_nblocks bigint, + free_bytes bigint, + free_chunks bigint, + used_bytes bigint, + num_agg_contexts integer, + stats_timestamp timestamptz ) + + + This function handles requests to display the memory contexts of a + PostgreSQL process with the specified + process ID. The function can be used to send requests to backends as + well as auxiliary processes. + + + The returned record contains extended statistics per each memory + context: + + + + name - The name of the memory context. + + + + + ident - Memory context ID (if any). + + + + + type - The type of memory context, possible + values are: AllocSet, Generation, Slab and Bump. + + + + + path - Memory contexts are organized in a + tree model with TopMemoryContext as the root, and all other memory + contexts as nodes in the tree. The path + displays the path from the root to the current memory context. The + path is limited to 100 children per node, which each node limited + to a max depth of 100, to preserve memory during reporting. The + printed path will also be limited to 100 nodes counting from the + TopMemoryContext. + + + + + level - The level in the tree of the current + memory context. + + + + + total_bytes - The total number of bytes + allocated to this memory context. + + + + + total_nblocks - The total number of blocks + used for the allocated memory. + + + + + free_bytes - The amount of free memory in + this memory context. + + + + + free_chunks - The number of chunks that + free_bytes corresponds to. + + + + + used_bytes - The total number of bytes + currently occupied. + + + + + num_agg_contexts - The number of memory + contexts aggregated in the displayed statistics. + + + + + stats_timestamp - When the statistics were + extracted from the process. + + + + + + When summary is true, statistics + for memory contexts at levels 1 and 2 are displayed, with level 1 + representing the root node (i.e., TopMemoryContext). + Statistics for contexts on level 2 and below are aggregates of all + child contexts' statistics, where num_agg_contexts + indicate the number aggregated child contexts. When + summary is false, + the num_agg_contexts value is 1, + indicating that individual statistics are being displayed. The levels + are limited to the first 100 contexts. + + + Busy processes can delay reporting memory context statistics, + timeout specifies the number of seconds + to wait for updated statistics. timeout can be + specified in fractions of a second. + + + After receiving memory context statistics from the target process, it + returns the results as one row per context. If all the contexts don't + fit within the pre-determined size limit, the remaining context + statistics are aggregated and a cumulative total is displayed. The + num_agg_contexts column indicates the number of + contexts aggregated in the displayed statistics. When + num_agg_contexts is 1 is means + that the context statistics are displayed separately. + + + @@ -28802,6 +28940,40 @@ LOG: Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560 because it may generate a large number of log messages. + + pg_get_process_memory_contexts can be used to request + memory contexts statistics of any PostgreSQL + process. For example: + +postgres=# SELECT * FROM pg_get_process_memory_contexts( + (SELECT pid FROM pg_stat_activity + WHERE backend_type = 'checkpointer'), + false, 0.5) LIMIT 1; +-[ RECORD 1 ]----+------------------------------ +name | TopMemoryContext +ident | +type | AllocSet +path | {1} +level | 1 +total_bytes | 90304 +total_nblocks | 3 +free_bytes | 2880 +free_chunks | 1 +used_bytes | 87424 +num_agg_contexts | 1 +stats_timestamp | 2025-03-24 13:55:47.796698+01 + + + + While pg_get_process_memory_contexts can be used to + query memory contexts of the local backend, + pg_backend_memory_contexts + (see for more details) + will be less resource intensive when only the local backend is of interest. + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 08f780a2e63..15efb02badb 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -674,6 +674,11 @@ GRANT SELECT ON pg_backend_memory_contexts TO pg_read_all_stats; REVOKE EXECUTE ON FUNCTION pg_get_backend_memory_contexts() FROM PUBLIC; GRANT EXECUTE ON FUNCTION pg_get_backend_memory_contexts() TO pg_read_all_stats; +REVOKE EXECUTE ON FUNCTION + pg_get_process_memory_contexts(integer, boolean, float) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION + pg_get_process_memory_contexts(integer, boolean, float) TO pg_read_all_stats; + -- Statistics views CREATE VIEW pg_stat_all_tables AS diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 2513a8ef8a6..16756152b71 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -781,6 +781,10 @@ ProcessAutoVacLauncherInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); + /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index fda91ffd1ce..d3cb3f1891c 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -663,6 +663,10 @@ ProcessCheckpointerInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index 0ae9bf906ec..f24f574e748 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -48,6 +48,10 @@ ProcessMainLoopInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 7e622ae4bd2..cb7408acf4c 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -867,6 +867,10 @@ ProcessPgArchInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); + if (ConfigReloadPending) { char *archiveLib = pstrdup(XLogArchiveLibrary); diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 27e86cf393f..7149a67fcbc 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -192,6 +192,10 @@ ProcessStartupProcInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index 0fec4f1f871..c7a76711cc5 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -879,6 +879,10 @@ ProcessWalSummarizerInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 2fa045e6b0f..00c76d05356 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -51,6 +51,7 @@ #include "storage/sinvaladt.h" #include "utils/guc.h" #include "utils/injection_point.h" +#include "utils/memutils.h" /* GUCs */ int shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE; @@ -150,6 +151,7 @@ CalculateShmemSize(int *num_semaphores) size = add_size(size, InjectionPointShmemSize()); size = add_size(size, SlotSyncShmemSize()); size = add_size(size, AioShmemSize()); + size = add_size(size, MemoryContextReportingShmemSize()); /* include additional requested shmem from preload libraries */ size = add_size(size, total_addin_request); @@ -343,6 +345,7 @@ CreateOrAttachShmemStructs(void) WaitEventCustomShmemInit(); InjectionPointShmemInit(); AioShmemInit(); + MemoryContextReportingShmemInit(); } /* diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index b7c39a4c5f0..a3c2cd12277 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -690,6 +690,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT)) HandleLogMemoryContextInterrupt(); + if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT)) + HandleGetMemoryContextInterrupt(); + if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE)) HandleParallelApplyMessageInterrupt(); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 3df29658f18..dc4d96c16af 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -178,6 +178,8 @@ static const char *const BuiltinTrancheNames[] = { [LWTRANCHE_XACT_SLRU] = "XactSLRU", [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA", [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion", + [LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState", + [LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess", }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index e9ef0fbfe32..f194e6b3dcc 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -50,6 +50,7 @@ #include "storage/procsignal.h" #include "storage/spin.h" #include "storage/standby.h" +#include "utils/memutils.h" #include "utils/timeout.h" #include "utils/timestamp.h" diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 6ae9f38f0c8..dc4c600922d 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3535,6 +3535,9 @@ ProcessInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); + if (ParallelApplyMessagePending) ProcessParallelApplyMessages(); } diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 8bce14c38fd..23eaf559c8d 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -161,6 +161,7 @@ WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit." WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication." WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated." XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at transaction end." +MEM_CXT_PUBLISH "Waiting for a process to publish memory information." ABI_compatibility: diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index 396c2f223b4..3ede88e5036 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -17,28 +17,25 @@ #include "funcapi.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "access/twophase.h" +#include "catalog/pg_authid_d.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/memutils.h" +#include "utils/wait_event_types.h" /* ---------- * The max bytes for showing identifiers of MemoryContext. * ---------- */ #define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024 - -/* - * MemoryContextId - * Used for storage of transient identifiers for - * pg_get_backend_memory_contexts. - */ -typedef struct MemoryContextId -{ - MemoryContext context; - int context_id; -} MemoryContextId; +struct MemoryStatsBackendState *memCxtState = NULL; +struct MemoryStatsCtl *memCxtArea = NULL; /* * int_list_to_array @@ -89,7 +86,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, */ for (MemoryContext cur = context; cur != NULL; cur = cur->parent) { - MemoryContextId *entry; + MemoryStatsContextId *entry; bool found; entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found); @@ -143,24 +140,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, else nulls[1] = true; - switch (context->type) - { - case T_AllocSetContext: - type = "AllocSet"; - break; - case T_GenerationContext: - type = "Generation"; - break; - case T_SlabContext: - type = "Slab"; - break; - case T_BumpContext: - type = "Bump"; - break; - default: - type = "???"; - break; - } + type = ContextTypeToString(context->type); values[2] = CStringGetTextDatum(type); values[3] = Int32GetDatum(list_length(path)); /* level */ @@ -175,6 +155,38 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, list_free(path); } +/* + * ContextTypeToString + * Returns a textual representation of a context type + * + * This should cover the same types as MemoryContextIsValid. + */ +const char * +ContextTypeToString(NodeTag type) +{ + const char *context_type; + + switch (type) + { + case T_AllocSetContext: + context_type = "AllocSet"; + break; + case T_GenerationContext: + context_type = "Generation"; + break; + case T_SlabContext: + context_type = "Slab"; + break; + case T_BumpContext: + context_type = "Bump"; + break; + default: + context_type = "???"; + break; + } + return context_type; +} + /* * pg_get_backend_memory_contexts * SQL SRF showing backend memory context. @@ -189,7 +201,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) HTAB *context_id_lookup; ctl.keysize = sizeof(MemoryContext); - ctl.entrysize = sizeof(MemoryContextId); + ctl.entrysize = sizeof(MemoryStatsContextId); ctl.hcxt = CurrentMemoryContext; context_id_lookup = hash_create("pg_get_backend_memory_contexts", @@ -216,7 +228,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) foreach_ptr(MemoryContextData, cur, contexts) { - MemoryContextId *entry; + MemoryStatsContextId *entry; bool found; /* @@ -224,8 +236,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) * PutMemoryContextsStatsTupleStore needs this to populate the "path" * column with the parent context_ids. */ - entry = (MemoryContextId *) hash_search(context_id_lookup, &cur, - HASH_ENTER, &found); + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); entry->context_id = context_id++; Assert(!found); @@ -305,3 +317,349 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } + +/* + * pg_get_process_memory_contexts + * Signal a backend or an auxiliary process to send its memory contexts, + * wait for the results and display them. + * + * By default, only superusers or users with PG_READ_ALL_STATS are allowed to + * signal a process to return the memory contexts. This is because allowing + * any users to issue this request at an unbounded rate would cause lots of + * requests to be sent, which can lead to denial of service. Additional roles + * can be permitted with GRANT. + * + * On receipt of this signal, a backend or an auxiliary process sets the flag + * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS() + * or process-specific interrupt handler to copy the memory context details + * to a dynamic shared memory space. + * + * We have defined a limit on DSA memory that could be allocated per process - + * if the process has more memory contexts than what can fit in the allocated + * size, the excess contexts are summarized and represented as cumulative total + * at the end of the buffer. + * + * After sending the signal, wait on a condition variable. The publishing + * backend, after copying the data to shared memory, sends signal on that + * condition variable. There is one condition variable per publishing backend. + * Once the condition variable is signalled, check if the latest memory context + * information is available and display. + * + * If the publishing backend does not respond before the condition variable + * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is + * time left within the timeout specified by the user, before giving up and + * returning previously published statistics, if any. If no previous statistics + * exist, return NULL. + */ +#define MEMSTATS_WAIT_TIMEOUT 100 +Datum +pg_get_process_memory_contexts(PG_FUNCTION_ARGS) +{ + int pid = PG_GETARG_INT32(0); + bool summary = PG_GETARG_BOOL(1); + double timeout = PG_GETARG_FLOAT8(2); + PGPROC *proc; + ProcNumber procNumber = INVALID_PROC_NUMBER; + bool proc_is_aux = false; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryStatsEntry *memcxt_info; + TimestampTz start_timestamp; + + /* + * See if the process with given pid is a backend or an auxiliary process + * and remember the type for when we requery the process later. + */ + proc = BackendPidGetProc(pid); + if (proc == NULL) + { + proc = AuxiliaryPidGetProc(pid); + proc_is_aux = true; + } + + /* + * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid + * isn't valid; this is however not a problem and leave with a WARNING. + * See comment in pg_log_backend_memory_contexts for a discussion on this. + */ + if (proc == NULL) + { + /* + * This is just a warning so a loop-through-resultset will not abort + * if one backend terminated on its own during the run. + */ + ereport(WARNING, + errmsg("PID %d is not a PostgreSQL server process", pid)); + PG_RETURN_NULL(); + } + + InitMaterializedSRF(fcinfo, 0); + + procNumber = GetNumberFromPGProc(proc); + + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + memCxtState[procNumber].summary = summary; + LWLockRelease(&memCxtState[procNumber].lw_lock); + + start_timestamp = GetCurrentTimestamp(); + + /* + * Send a signal to a PostgreSQL process, informing it we want it to + * produce information about its memory contexts. + */ + if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0) + { + ereport(WARNING, + errmsg("could not send signal to process %d: %m", pid)); + PG_RETURN_NULL(); + } + + /* + * Even if the proc has published statistics, the may not be due to the + * current request, but previously published stats. Check if the stats + * are updated by comparing the timestamp, if the stats are newer than our + * previously recorded timestamp from before sending the procsignal, they + * must by definition be updated. Wait for the timeout specified by the + * user, following which display old statistics if available or return + * NULL. + */ + while (1) + { + long msecs; + + /* + * We expect to come out of sleep when the requested process has + * finished publishing the statistics, verified using the valid DSA + * pointer. + * + * Make sure that the information belongs to pid we requested + * information for, Otherwise loop back and wait for the server + * process to finish publishing statistics. + */ + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + + /* + * Note in procnumber.h file says that a procNumber can be re-used for + * a different backend immediately after a backend exits. In case an + * old process' data was there and not updated by the current process + * in the slot identified by the procNumber, the pid of the requested + * process and the proc_id might not match. + */ + if (memCxtState[procNumber].proc_id == pid) + { + /* + * Break if the latest stats have been read, indicated by + * statistics timestamp being newer than the current request + * timestamp. + */ + msecs = TimestampDifferenceMilliseconds(start_timestamp, + memCxtState[procNumber].stats_timestamp); + + if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer) + && msecs > 0) + break; + } + LWLockRelease(&memCxtState[procNumber].lw_lock); + + /* + * Recheck the state of the backend before sleeping on the condition + * variable to ensure the process is still alive. Only check the + * relevant process type based on the earlier PID check. + */ + if (proc_is_aux) + proc = AuxiliaryPidGetProc(pid); + else + proc = BackendPidGetProc(pid); + + /* + * The process ending during memory context processing is not an + * error. + */ + if (proc == NULL) + { + ereport(WARNING, + errmsg("PID %d is no longer a PostgreSQL server process", + pid)); + PG_RETURN_NULL(); + } + + msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp()); + + /* + * If we haven't already exceeded the timeout value, sleep for the + * remainder of the timeout on the condition variable. + */ + if (msecs > 0 && msecs < (timeout * 1000)) + { + /* + * Wait for the timeout as defined by the user. If no updated + * statistics are available within the allowed time then display + * previously published statistics if there are any. If no + * previous statistics are available then return NULL. The timer + * is defined in milliseconds since thats what the condition + * variable sleep uses. + */ + if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv, + ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH)) + { + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + /* Displaying previously published statistics if available */ + if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) + break; + else + { + LWLockRelease(&memCxtState[procNumber].lw_lock); + PG_RETURN_NULL(); + } + } + } + else + { + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + /* Displaying previously published statistics if available */ + if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) + break; + else + { + LWLockRelease(&memCxtState[procNumber].lw_lock); + PG_RETURN_NULL(); + } + } + } + + /* + * We should only reach here with a valid DSA handle, either containing + * updated statistics or previously published statistics (identified by + * the timestamp. + */ + Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID); + /* Attach to the dsa area if we have not already done so */ + if (area == NULL) + { + MemoryContext oldcontext = CurrentMemoryContext; + + MemoryContextSwitchTo(TopMemoryContext); + area = dsa_attach(memCxtArea->memstats_dsa_handle); + MemoryContextSwitchTo(oldcontext); + dsa_pin_mapping(area); + } + + /* + * Backend has finished publishing the stats, project them. + */ + memcxt_info = (MemoryStatsEntry *) + dsa_get_address(area, memCxtState[procNumber].memstats_dsa_pointer); + +#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12 + for (int i = 0; i < memCxtState[procNumber].total_stats; i++) + { + ArrayType *path_array; + int path_length; + Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; + bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; + char *name; + char *ident; + Datum *path_datum = NULL; + int *path_int = NULL; + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + if (DsaPointerIsValid(memcxt_info[i].name)) + { + name = (char *) dsa_get_address(area, memcxt_info[i].name); + values[0] = CStringGetTextDatum(name); + } + else + nulls[0] = true; + + if (DsaPointerIsValid(memcxt_info[i].ident)) + { + ident = (char *) dsa_get_address(area, memcxt_info[i].ident); + values[1] = CStringGetTextDatum(ident); + } + else + nulls[1] = true; + + values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type)); + + path_length = memcxt_info[i].path_length; + path_datum = (Datum *) palloc(path_length * sizeof(Datum)); + if (DsaPointerIsValid(memcxt_info[i].path)) + { + path_int = (int *) dsa_get_address(area, memcxt_info[i].path); + for (int j = 0; j < path_length; j++) + path_datum[j] = Int32GetDatum(path_int[j]); + path_array = construct_array_builtin(path_datum, path_length, INT4OID); + values[3] = PointerGetDatum(path_array); + } + else + nulls[3] = true; + + values[4] = Int32GetDatum(memcxt_info[i].levels); + values[5] = Int64GetDatum(memcxt_info[i].totalspace); + values[6] = Int64GetDatum(memcxt_info[i].nblocks); + values[7] = Int64GetDatum(memcxt_info[i].freespace); + values[8] = Int64GetDatum(memcxt_info[i].freechunks); + values[9] = Int64GetDatum(memcxt_info[i].totalspace - + memcxt_info[i].freespace); + values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats); + values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + LWLockRelease(&memCxtState[procNumber].lw_lock); + + ConditionVariableCancelSleep(); + + PG_RETURN_NULL(); +} + +Size +MemoryContextReportingShmemSize(void) +{ + Size sz = 0; + Size TotalProcs = 0; + + TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS); + TotalProcs = add_size(TotalProcs, MaxBackends); + sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState))); + + sz = add_size(sz, sizeof(MemoryStatsCtl)); + + return sz; +} + +/* + * Initialize shared memory for displaying memory context statistics + */ +void +MemoryContextReportingShmemInit(void) +{ + bool found; + + memCxtArea = (MemoryStatsCtl *) + ShmemInitStruct("MemoryStatsCtl", + sizeof(MemoryStatsCtl), &found); + + if (!found) + { + LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE); + memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID; + } + + memCxtState = (MemoryStatsBackendState *) + ShmemInitStruct("MemoryStatsBackendState", + ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)), + &found); + + if (found) + return; + + for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++) + { + ConditionVariableInit(&memCxtState[i].memcxt_cv); + LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC); + memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer; + } +} diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 2152aad97d9..92304a1f124 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -39,6 +39,7 @@ volatile sig_atomic_t TransactionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; volatile sig_atomic_t ProcSignalBarrierPending = false; volatile sig_atomic_t LogMemoryContextPending = false; +volatile sig_atomic_t PublishMemoryContextPending = false; volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index c09c4d404ba..01309ef3f86 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -667,6 +667,13 @@ BaseInit(void) * drop ephemeral slots, which in turn triggers stats reporting. */ ReplicationSlotInitialize(); + + /* + * The before shmem exit callback frees the DSA memory occupied by the + * latest memory context statistics that could be published by this proc + * if requested. + */ + before_shmem_exit(AtProcExit_memstats_cleanup, 0); } diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index d98ae9db6be..cf4e22bf1cc 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -23,6 +23,11 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/pg_list.h" +#include "storage/lwlock.h" +#include "storage/ipc.h" +#include "utils/dsa.h" +#include "utils/hsearch.h" #include "utils/memdebug.h" #include "utils/memutils.h" #include "utils/memutils_internal.h" @@ -135,6 +140,17 @@ static const MemoryContextMethods mcxt_methods[] = { }; #undef BOGUS_MCTX +/* + * This is passed to MemoryContextStatsInternal to determine whether + * to print context statistics or not and where to print them logs or + * stderr. + */ +typedef enum PrintDestination +{ + PRINT_STATS_TO_STDERR = 0, + PRINT_STATS_TO_LOGS, + PRINT_STATS_NONE +} PrintDestination; /* * CurrentMemoryContext @@ -156,16 +172,31 @@ MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ MemoryContext PortalContext = NULL; +dsa_area *area = NULL; static void MemoryContextDeleteOnly(MemoryContext context); static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, int max_level, int max_children, MemoryContextCounters *totals, - bool print_to_stderr); + PrintDestination print_location, + int *num_contexts); static void MemoryContextStatsPrint(MemoryContext context, void *passthru, const char *stats_string, bool print_to_stderr); +static void PublishMemoryContext(MemoryStatsEntry *memcxt_infos, + int curr_id, MemoryContext context, + List *path, + MemoryContextCounters stat, + int num_contexts, dsa_area *area, + int max_levels); +static void compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup, + int *stats_count, + bool summary); +static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup); +static void free_memorycontextstate_dsa(dsa_area *area, int total_stats, + dsa_pointer prev_dsa_pointer); +static void end_memorycontext_reporting(void); /* * You should not do memory allocations within a critical section, because @@ -831,11 +862,19 @@ MemoryContextStatsDetail(MemoryContext context, bool print_to_stderr) { MemoryContextCounters grand_totals; + int num_contexts; + PrintDestination print_location; memset(&grand_totals, 0, sizeof(grand_totals)); + if (print_to_stderr) + print_location = PRINT_STATS_TO_STDERR; + else + print_location = PRINT_STATS_TO_LOGS; + + /* num_contexts report number of contexts aggregated in the output */ MemoryContextStatsInternal(context, 0, max_level, max_children, - &grand_totals, print_to_stderr); + &grand_totals, print_location, &num_contexts); if (print_to_stderr) fprintf(stderr, @@ -870,13 +909,14 @@ MemoryContextStatsDetail(MemoryContext context, * One recursion level for MemoryContextStats * * Print stats for this context if possible, but in any case accumulate counts - * into *totals (if not NULL). + * into *totals (if not NULL). The callers should make sure that print_location + * is set to PRINT_STATS_STDERR or PRINT_STATS_TO_LOGS or PRINT_STATS_NONE. */ static void MemoryContextStatsInternal(MemoryContext context, int level, int max_level, int max_children, MemoryContextCounters *totals, - bool print_to_stderr) + PrintDestination print_location, int *num_contexts) { MemoryContext child; int ichild; @@ -884,10 +924,39 @@ MemoryContextStatsInternal(MemoryContext context, int level, Assert(MemoryContextIsValid(context)); /* Examine the context itself */ - context->methods->stats(context, - MemoryContextStatsPrint, - &level, - totals, print_to_stderr); + switch (print_location) + { + case PRINT_STATS_TO_STDERR: + context->methods->stats(context, + MemoryContextStatsPrint, + &level, + totals, true); + break; + + case PRINT_STATS_TO_LOGS: + context->methods->stats(context, + MemoryContextStatsPrint, + &level, + totals, false); + break; + + case PRINT_STATS_NONE: + + /* + * Do not print the statistics if print_location is + * PRINT_STATS_NONE, only compute totals. This is used in + * reporting of memory context statistics via a sql function. Last + * parameter is not relevant. + */ + context->methods->stats(context, + NULL, + NULL, + totals, false); + break; + } + + /* Increment the context count for each of the recursive call */ + *num_contexts = *num_contexts + 1; /* * Examine children. @@ -907,7 +976,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, MemoryContextStatsInternal(child, level + 1, max_level, max_children, totals, - print_to_stderr); + print_location, num_contexts); } } @@ -926,7 +995,13 @@ MemoryContextStatsInternal(MemoryContext context, int level, child = MemoryContextTraverseNext(child, context); } - if (print_to_stderr) + /* + * Add the count of children contexts which are traversed in the + * non-recursive manner. + */ + *num_contexts = *num_contexts + ichild; + + if (print_location == PRINT_STATS_TO_STDERR) { for (int i = 0; i <= level; i++) fprintf(stderr, " "); @@ -939,7 +1014,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, local_totals.freechunks, local_totals.totalspace - local_totals.freespace); } - else + else if (print_location == PRINT_STATS_TO_LOGS) ereport(LOG_SERVER_ONLY, (errhidestmt(true), errhidecontext(true), @@ -1276,6 +1351,22 @@ HandleLogMemoryContextInterrupt(void) /* latch will be set by procsignal_sigusr1_handler */ } +/* + * HandleGetMemoryContextInterrupt + * Handle receipt of an interrupt indicating a request to publish memory + * contexts statistics. + * + * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as + * this cannot be performed in a signal handler. + */ +void +HandleGetMemoryContextInterrupt(void) +{ + InterruptPending = true; + PublishMemoryContextPending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + /* * ProcessLogMemoryContextInterrupt * Perform logging of memory contexts of this backend process. @@ -1313,6 +1404,538 @@ ProcessLogMemoryContextInterrupt(void) MemoryContextStatsDetail(TopMemoryContext, 100, 100, false); } +/* + * ProcessGetMemoryContextInterrupt + * Generate information about memory contexts used by the process. + * + * Performs a breadth first search on the memory context tree, thus parents + * statistics are reported before their children in the monitoring function + * output. + * + * Statistics for all the processes are shared via the same dynamic shared + * area. Statistics written by each process are tracked independently in + * per-process DSA pointers. These pointers are stored in static shared memory. + * + * We calculate maximum number of context's statistics that can be displayed + * using a pre-determined limit for memory available per process for this + * utility maximum size of statistics for each context. The remaining context + * statistics if any are captured as a cumulative total at the end of + * individual context's statistics. + * + * If summary is true, we capture the level 1 and level 2 contexts + * statistics. For that we traverse the memory context tree recursively in + * depth first search manner to cover all the children of a parent context, to + * be able to display a cumulative total of memory consumption by a parent at + * level 2 and all its children. + */ +void +ProcessGetMemoryContextInterrupt(void) +{ + List *contexts; + HASHCTL ctl; + HTAB *context_id_lookup; + int context_id = 0; + MemoryStatsEntry *meminfo; + bool summary = false; + int max_stats; + int idx = MyProcNumber; + int stats_count = 0; + int stats_num = 0; + MemoryContextCounters stat; + int num_individual_stats = 0; + + PublishMemoryContextPending = false; + + /* + * The hash table is used for constructing "path" column of the view, + * similar to its local backend counterpart. + */ + ctl.keysize = sizeof(MemoryContext); + ctl.entrysize = sizeof(MemoryStatsContextId); + ctl.hcxt = CurrentMemoryContext; + + context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts", + 256, + &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* List of contexts to process in the next round - start at the top. */ + contexts = list_make1(TopMemoryContext); + + /* Compute the number of stats that can fit in the defined limit */ + max_stats = + MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE; + LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); + summary = memCxtState[idx].summary; + LWLockRelease(&memCxtState[idx].lw_lock); + + /* + * Traverse the memory context tree to find total number of contexts. If + * summary is requested report the total number of contexts at level 1 and + * 2 from the top. Also, populate the hash table of context ids. + */ + compute_contexts_count_and_ids(contexts, context_id_lookup, &stats_count, + summary); + + /* + * Allocate memory in this process's DSA for storing statistics of the the + * memory contexts upto max_stats, for contexts that don't fit within a + * limit, a cumulative total is written as the last record in the DSA + * segment. + */ + stats_num = Min(stats_count, max_stats); + + LWLockAcquire(&memCxtArea->lw_lock, LW_EXCLUSIVE); + + /* + * Create a DSA and send handle to the the client process after storing + * the context statistics. If number of contexts exceed a predefined + * limit(8MB), a cumulative total is stored for such contexts. + */ + if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID) + { + MemoryContext oldcontext = CurrentMemoryContext; + dsa_handle handle; + + MemoryContextSwitchTo(TopMemoryContext); + + area = dsa_create(memCxtArea->lw_lock.tranche); + + handle = dsa_get_handle(area); + MemoryContextSwitchTo(oldcontext); + + dsa_pin_mapping(area); + + /* + * Pin the DSA area, this is to make sure the area remains attachable + * even if current backend exits. This is done so that the statistics + * are published even if the process exits while a client is waiting. + */ + dsa_pin(area); + + /* Set the handle in shared memory */ + memCxtArea->memstats_dsa_handle = handle; + } + + /* + * If DSA exists, created by another process publishing statistics, attach + * to it. + */ + else if (area == NULL) + { + MemoryContext oldcontext = CurrentMemoryContext; + + MemoryContextSwitchTo(TopMemoryContext); + area = dsa_attach(memCxtArea->memstats_dsa_handle); + MemoryContextSwitchTo(oldcontext); + dsa_pin_mapping(area); + } + LWLockRelease(&memCxtArea->lw_lock); + + /* + * Hold the process lock to protect writes to process specific memory. Two + * processes publishing statistics do not block each other. + */ + LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); + memCxtState[idx].proc_id = MyProcPid; + + if (DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer)) + { + /* + * Free any previous allocations, free the name, ident and path + * pointers before freeing the pointer that contains them. + */ + free_memorycontextstate_dsa(area, memCxtState[idx].total_stats, + memCxtState[idx].memstats_dsa_pointer); + } + + /* + * Assigning total stats before allocating memory so that memory cleanup + * can run if any subsequent dsa_allocate call to allocate name/ident/path + * fails. + */ + memCxtState[idx].total_stats = stats_num; + memCxtState[idx].memstats_dsa_pointer = + dsa_allocate0(area, stats_num * sizeof(MemoryStatsEntry)); + + meminfo = (MemoryStatsEntry *) + dsa_get_address(area, memCxtState[idx].memstats_dsa_pointer); + + if (summary) + { + int cxt_id = 0; + List *path = NIL; + + /* Copy TopMemoryContext statistics to DSA */ + memset(&stat, 0, sizeof(stat)); + (*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL, + &stat, true); + path = lcons_int(1, path); + PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat, + 1, area, 100); + cxt_id = cxt_id + 1; + + /* + * Copy statistics for each of TopMemoryContexts children. This + * includes statistics of at most 100 children per node, with each + * child node limited to a depth of 100 in its subtree. + */ + for (MemoryContext c = TopMemoryContext->firstchild; c != NULL; + c = c->nextchild) + { + MemoryContextCounters grand_totals; + int num_contexts = 0; + int level = 0; + + path = NIL; + memset(&grand_totals, 0, sizeof(grand_totals)); + + MemoryContextStatsInternal(c, level, 100, 100, &grand_totals, + PRINT_STATS_NONE, &num_contexts); + + path = compute_context_path(c, context_id_lookup); + + /* + * Register the stats entry first, that way the cleanup handler + * can reach it in case of allocation failures of one or more + * members. + */ + memCxtState[idx].total_stats = cxt_id++; + PublishMemoryContext(meminfo, cxt_id, c, path, + grand_totals, num_contexts, area, 100); + } + memCxtState[idx].total_stats = cxt_id; + + end_memorycontext_reporting(); + + /* Notify waiting backends and return */ + hash_destroy(context_id_lookup); + + return; + } + + foreach_ptr(MemoryContextData, cur, contexts) + { + List *path = NIL; + + /* + * Figure out the transient context_id of this context and each of its + * ancestors, to compute a path for this context. + */ + path = compute_context_path(cur, context_id_lookup); + + /* Examine the context stats */ + memset(&stat, 0, sizeof(stat)); + (*cur->methods->stats) (cur, NULL, NULL, &stat, true); + + /* Account for saving one statistics slot for cumulative reporting */ + if (context_id < (max_stats - 1) || stats_count <= max_stats) + { + /* Copy statistics to DSA memory */ + PublishMemoryContext(meminfo, context_id, cur, path, stat, 1, area, 100); + } + else + { + meminfo[max_stats - 1].totalspace += stat.totalspace; + meminfo[max_stats - 1].nblocks += stat.nblocks; + meminfo[max_stats - 1].freespace += stat.freespace; + meminfo[max_stats - 1].freechunks += stat.freechunks; + } + + /* + * DSA max limit per process is reached, write aggregate of the + * remaining statistics. + * + * We can store contexts from 0 to max_stats - 1. When stats_count is + * greater than max_stats, we stop reporting individual statistics + * when context_id equals max_stats - 2. As we use max_stats - 1 array + * slot for reporting cumulative statistics or "Remaining Totals". + */ + if (stats_count > max_stats && context_id == (max_stats - 2)) + { + char *nameptr; + int namelen = strlen("Remaining Totals"); + + num_individual_stats = context_id + 1; + meminfo[max_stats - 1].name = dsa_allocate(area, namelen + 1); + nameptr = dsa_get_address(area, meminfo[max_stats - 1].name); + strncpy(nameptr, "Remaining Totals", namelen); + meminfo[max_stats - 1].ident = InvalidDsaPointer; + meminfo[max_stats - 1].path = InvalidDsaPointer; + meminfo[max_stats - 1].type = 0; + } + context_id++; + } + + /* + * Statistics are not aggregated, i.e individual statistics reported when + * stats_count <= max_stats. + */ + if (stats_count <= max_stats) + { + memCxtState[idx].total_stats = context_id; + } + /* Report number of aggregated memory contexts */ + else + { + meminfo[max_stats - 1].num_agg_stats = context_id - + num_individual_stats; + + /* + * Total stats equals num_individual_stats + 1 record for cumulative + * statistics. + */ + memCxtState[idx].total_stats = num_individual_stats + 1; + } + + /* Notify waiting backends and return */ + end_memorycontext_reporting(); + + hash_destroy(context_id_lookup); +} + +/* + * Update timestamp and signal all the waiting client backends after copying + * all the statistics. + */ +static void +end_memorycontext_reporting(void) +{ + memCxtState[MyProcNumber].stats_timestamp = GetCurrentTimestamp(); + LWLockRelease(&memCxtState[MyProcNumber].lw_lock); + ConditionVariableBroadcast(&memCxtState[MyProcNumber].memcxt_cv); +} + +/* + * compute_context_path + * + * Append the transient context_id of this context and each of its ancestors + * to a list, in order to compute a path. + */ +static List * +compute_context_path(MemoryContext c, HTAB *context_id_lookup) +{ + bool found; + List *path = NIL; + MemoryContext cur_context; + + for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent) + { + MemoryStatsContextId *cur_entry; + + cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found); + + if (!found) + elog(ERROR, "hash table corrupted, can't construct path value"); + + path = lcons_int(cur_entry->context_id, path); + } + + return path; +} + +/* + * Return the number of contexts allocated currently by the backend + * Assign context ids to each of the contexts. + */ +static void +compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup, + int *stats_count, bool summary) +{ + foreach_ptr(MemoryContextData, cur, contexts) + { + MemoryStatsContextId *entry; + bool found; + + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); + Assert(!found); + + /* + * context id starts with 1 so increment the stats_count before + * assigning. + */ + entry->context_id = ++(*stats_count); + + /* Append the children of the current context to the main list. */ + for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild) + { + if (summary) + { + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c, + HASH_ENTER, &found); + Assert(!found); + + entry->context_id = ++(*stats_count); + } + + contexts = lappend(contexts, c); + } + + /* + * In summary mode only the first two level (from top) contexts are + * displayed. + */ + if (summary) + break; + } +} + +/* + * PublishMemoryContext + * + * Copy the memory context statistics of a single context to a DSA memory + */ +static void +PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id, + MemoryContext context, List *path, + MemoryContextCounters stat, int num_contexts, + dsa_area *area, int max_levels) +{ + const char *ident = context->ident; + const char *name = context->name; + int *path_list; + + /* + * To be consistent with logging output, we label dynahash contexts with + * just the hash table name as with MemoryContextStatsPrint(). + */ + if (context->ident && strncmp(context->name, "dynahash", 8) == 0) + { + name = context->ident; + ident = NULL; + } + + if (name != NULL) + { + int namelen = strlen(name); + char *nameptr; + + if (strlen(name) >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) + namelen = pg_mbcliplen(name, namelen, + MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); + + memcxt_info[curr_id].name = dsa_allocate(area, namelen + 1); + nameptr = (char *) dsa_get_address(area, memcxt_info[curr_id].name); + strlcpy(nameptr, name, namelen + 1); + } + else + memcxt_info[curr_id].name = InvalidDsaPointer; + + /* Trim and copy the identifier if it is not set to NULL */ + if (ident != NULL) + { + int idlen = strlen(context->ident); + char *identptr; + + /* + * Some identifiers such as SQL query string can be very long, + * truncate oversize identifiers. + */ + if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) + idlen = pg_mbcliplen(ident, idlen, + MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); + + memcxt_info[curr_id].ident = dsa_allocate(area, idlen + 1); + identptr = (char *) dsa_get_address(area, memcxt_info[curr_id].ident); + strlcpy(identptr, ident, idlen + 1); + } + else + memcxt_info[curr_id].ident = InvalidDsaPointer; + + /* Allocate DSA memory for storing path information */ + if (path == NIL) + memcxt_info[curr_id].path = InvalidDsaPointer; + else + { + int levels = Min(list_length(path), max_levels); + + memcxt_info[curr_id].path_length = levels; + memcxt_info[curr_id].path = dsa_allocate0(area, levels * sizeof(int)); + memcxt_info[curr_id].levels = list_length(path); + path_list = (int *) dsa_get_address(area, memcxt_info[curr_id].path); + + foreach_int(i, path) + { + path_list[foreach_current_index(i)] = i; + if (--levels == 0) + break; + } + } + memcxt_info[curr_id].type = context->type; + memcxt_info[curr_id].totalspace = stat.totalspace; + memcxt_info[curr_id].nblocks = stat.nblocks; + memcxt_info[curr_id].freespace = stat.freespace; + memcxt_info[curr_id].freechunks = stat.freechunks; + memcxt_info[curr_id].num_agg_stats = num_contexts; +} + +/* + * free_memorycontextstate_dsa + * + * Worker for freeing resources from a MemoryStatsEntry. Callers are + * responsible for ensuring that the DSA pointer is valid. + */ +static void +free_memorycontextstate_dsa(dsa_area *area, int total_stats, + dsa_pointer prev_dsa_pointer) +{ + MemoryStatsEntry *meminfo; + + meminfo = (MemoryStatsEntry *) dsa_get_address(area, prev_dsa_pointer); + Assert(meminfo != NULL); + for (int i = 0; i < total_stats; i++) + { + if (DsaPointerIsValid(meminfo[i].name)) + dsa_free(area, meminfo[i].name); + + if (DsaPointerIsValid(meminfo[i].ident)) + dsa_free(area, meminfo[i].ident); + + if (DsaPointerIsValid(meminfo[i].path)) + dsa_free(area, meminfo[i].path); + } + + dsa_free(area, memCxtState[MyProcNumber].memstats_dsa_pointer); + memCxtState[MyProcNumber].memstats_dsa_pointer = InvalidDsaPointer; +} + +/* + * Free the memory context statistics stored by this process + * in DSA area. + */ +void +AtProcExit_memstats_cleanup(int code, Datum arg) +{ + int idx = MyProcNumber; + + if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID) + return; + + LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); + + if (!DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer)) + { + LWLockRelease(&memCxtState[idx].lw_lock); + return; + } + + /* If the dsa mapping could not be found, attach to the area */ + if (area == NULL) + area = dsa_attach(memCxtArea->memstats_dsa_handle); + + /* + * Free the memory context statistics, free the name, ident and path + * pointers before freeing the pointer that contains these pointers and + * integer statistics. + */ + free_memorycontextstate_dsa(area, memCxtState[idx].total_stats, + memCxtState[idx].memstats_dsa_pointer); + + dsa_detach(area); + LWLockRelease(&memCxtState[idx].lw_lock); +} + void * palloc(Size size) { diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 37a484147a8..4708f55be18 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8571,6 +8571,16 @@ prorettype => 'bool', proargtypes => 'int4', prosrc => 'pg_log_backend_memory_contexts' }, +# publishing memory contexts of the specified postgres process +{ oid => '2173', descr => 'publish memory contexts of the specified backend', + proname => 'pg_get_process_memory_contexts', provolatile => 'v', + prorows => '100', proretset => 't', proparallel => 'r', + prorettype => 'record', proargtypes => 'int4 bool float8', + proallargtypes => '{int4,bool,float8,text,text,text,_int4,int4,int8,int8,int8,int8,int8,int4,timestamptz}', + proargmodes => '{i,i,i,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{pid, summary, retries, name, ident, type, path, level, total_bytes, total_nblocks, free_bytes, free_chunks, used_bytes, num_agg_contexts, stats_timestamp}', + prosrc => 'pg_get_process_memory_contexts' }, + # non-persistent series generator { oid => '1066', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 0d8528b2875..58b2496a9cb 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -96,6 +96,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending; extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending; +extern PGDLLIMPORT volatile sig_atomic_t PublishMemoryContextPending; extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 4df1d25c045..d333f338ebb 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -219,6 +219,8 @@ typedef enum BuiltinTrancheIds LWTRANCHE_XACT_SLRU, LWTRANCHE_PARALLEL_VACUUM_DSA, LWTRANCHE_AIO_URING_COMPLETION, + LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE, + LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC, LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 016dfd9b3f6..cfe14631445 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -35,6 +35,7 @@ typedef enum PROCSIG_WALSND_INIT_STOPPING, /* ask walsenders to prepare for shutdown */ PROCSIG_BARRIER, /* global barrier interrupt */ PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */ + PROCSIG_GET_MEMORY_CONTEXT, /* ask backend to send the memory contexts */ PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ /* Recovery conflict reasons */ diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index 8abc26abce2..d328270fafc 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -18,6 +18,9 @@ #define MEMUTILS_H #include "nodes/memnodes.h" +#include "storage/condition_variable.h" +#include "storage/lmgr.h" +#include "utils/dsa.h" /* @@ -48,6 +51,23 @@ #define AllocHugeSizeIsValid(size) ((Size) (size) <= MaxAllocHugeSize) +/* + * Memory Context reporting size limits. + */ + +/* Max length of context name and ident */ +#define MEMORY_CONTEXT_IDENT_SHMEM_SIZE 64 +/* Maximum size (in bytes) of DSA area per process */ +#define MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND ((size_t) (1 * 1024 * 1024)) + +/* + * Maximum size per context. Actual size may be lower as this assumes the worst + * case of deepest path and longest identifiers (name and ident, thus the + * multiplication by 2). The path depth is limited to 100 like for memory + * context logging. + */ +#define MAX_MEMORY_CONTEXT_STATS_SIZE (sizeof(MemoryStatsEntry) + \ + (100 * sizeof(int)) + (2 * MEMORY_CONTEXT_IDENT_SHMEM_SIZE)) /* * Standard top-level memory contexts. @@ -319,4 +339,66 @@ pg_memory_is_all_zeros(const void *ptr, size_t len) return true; } +/* Dynamic shared memory state for statistics per context */ +typedef struct MemoryStatsEntry +{ + dsa_pointer name; + dsa_pointer ident; + dsa_pointer path; + NodeTag type; + int path_length; + int levels; + int64 totalspace; + int64 nblocks; + int64 freespace; + int64 freechunks; + int num_agg_stats; +} MemoryStatsEntry; + +/* + * Static shared memory state representing the DSA area created for memory + * context statistics reporting. A single DSA area is created and used by all + * the processes, each having its specific DSA allocations for sharing memory + * statistics, tracked by per backend static shared memory state. + */ +typedef struct MemoryStatsCtl +{ + dsa_handle memstats_dsa_handle; + LWLock lw_lock; +} MemoryStatsCtl; + +/* + * Per backend static shared memory state for memory context statistics + * reporting. + */ +typedef struct MemoryStatsBackendState +{ + ConditionVariable memcxt_cv; + LWLock lw_lock; + int proc_id; + int total_stats; + bool summary; + dsa_pointer memstats_dsa_pointer; + TimestampTz stats_timestamp; +} MemoryStatsBackendState; + + +/* + * Used for storage of transient identifiers for pg_get_backend_memory_contexts + */ +typedef struct MemoryStatsContextId +{ + MemoryContext context; + int context_id; +} MemoryStatsContextId; + +extern PGDLLIMPORT MemoryStatsBackendState *memCxtState; +extern PGDLLIMPORT MemoryStatsCtl *memCxtArea; +extern void ProcessGetMemoryContextInterrupt(void); +extern const char *ContextTypeToString(NodeTag type); +extern void HandleGetMemoryContextInterrupt(void); +extern Size MemoryContextReportingShmemSize(void); +extern void MemoryContextReportingShmemInit(void); +extern void AtProcExit_memstats_cleanup(int code, Datum arg); +extern dsa_area *area; #endif /* MEMUTILS_H */ diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 83228cfca29..ae17d028ed3 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -232,3 +232,22 @@ select * from pg_timezone_abbrevs where abbrev = 'LMT'; LMT | @ 7 hours 52 mins 58 secs ago | f (1 row) +DO $$ +DECLARE + bg_writer_pid int; + r RECORD; +BEGIN + SELECT pid from pg_stat_activity where backend_type='background writer' + INTO bg_writer_pid; + + select type, name, ident + from pg_get_process_memory_contexts(bg_writer_pid, false, 20) + where path = '{1}' into r; + RAISE NOTICE '%', r; + select type, name, ident + from pg_get_process_memory_contexts(pg_backend_pid(), false, 20) + where path = '{1}' into r; + RAISE NOTICE '%', r; +END $$; +NOTICE: (AllocSet,TopMemoryContext,) +NOTICE: (AllocSet,TopMemoryContext,) diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql index 66179f026b3..d0917b6868e 100644 --- a/src/test/regress/sql/sysviews.sql +++ b/src/test/regress/sql/sysviews.sql @@ -101,3 +101,21 @@ select count(distinct utc_offset) >= 24 as ok from pg_timezone_abbrevs; -- One specific case we can check without much fear of breakage -- is the historical local-mean-time value used for America/Los_Angeles. select * from pg_timezone_abbrevs where abbrev = 'LMT'; + +DO $$ +DECLARE + bg_writer_pid int; + r RECORD; +BEGIN + SELECT pid from pg_stat_activity where backend_type='background writer' + INTO bg_writer_pid; + + select type, name, ident + from pg_get_process_memory_contexts(bg_writer_pid, false, 20) + where path = '{1}' into r; + RAISE NOTICE '%', r; + select type, name, ident + from pg_get_process_memory_contexts(pg_backend_pid(), false, 20) + where path = '{1}' into r; + RAISE NOTICE '%', r; +END $$; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 87e6da8d25e..780e4c4fc07 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1671,6 +1671,10 @@ MemoryContextCounters MemoryContextData MemoryContextMethodID MemoryContextMethods +MemoryStatsBackendState +MemoryStatsContextId +MemoryStatsCtl +MemoryStatsEntry MemoryStatsPrintFunc MergeAction MergeActionState