1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-28 05:21:27 +03:00
postgres/contrib/pg_buffercache/pg_buffercache_pages.c
David Rowley 84fd3bc141 Fix a few duplicate words in comments
These are all new to v18

Author: David Rowley <dgrowleyml@gmail.com>
Discussion: https://postgr.es/m/CAApHDvrMcr8XD107H3NV=WHgyBcu=sx5+7=WArr-n_cWUqdFXQ@mail.gmail.com
2025-04-21 10:41:18 +12:00

775 lines
22 KiB
C

/*-------------------------------------------------------------------------
*
* pg_buffercache_pages.c
* display some contents of the buffer cache
*
* contrib/pg_buffercache/pg_buffercache_pages.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "access/relation.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "port/pg_numa.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
#define NUM_BUFFERCACHE_PAGES_ELEM 9
#define NUM_BUFFERCACHE_SUMMARY_ELEM 5
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
#define NUM_BUFFERCACHE_EVICT_ELEM 2
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
#define NUM_BUFFERCACHE_NUMA_ELEM 3
PG_MODULE_MAGIC_EXT(
.name = "pg_buffercache",
.version = PG_VERSION
);
/*
* Record structure holding the to be exposed cache data.
*/
typedef struct
{
uint32 bufferid;
RelFileNumber relfilenumber;
Oid reltablespace;
Oid reldatabase;
ForkNumber forknum;
BlockNumber blocknum;
bool isvalid;
bool isdirty;
uint16 usagecount;
/*
* An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
* being pinned by too many backends and each backend will only pin once
* because of bufmgr.c's PrivateRefCount infrastructure.
*/
int32 pinning_backends;
} BufferCachePagesRec;
/*
* Function context for data persisting over repeated calls.
*/
typedef struct
{
TupleDesc tupdesc;
BufferCachePagesRec *record;
} BufferCachePagesContext;
/*
* Record structure holding the to be exposed cache data.
*/
typedef struct
{
uint32 bufferid;
int64 page_num;
int32 numa_node;
} BufferCacheNumaRec;
/*
* Function context for data persisting over repeated calls.
*/
typedef struct
{
TupleDesc tupdesc;
int buffers_per_page;
int pages_per_buffer;
int os_page_size;
BufferCacheNumaRec *record;
} BufferCacheNumaContext;
/*
* Function returning data from the shared buffer cache - buffer number,
* relation node/tablespace/database/blocknum and dirty indicator.
*/
PG_FUNCTION_INFO_V1(pg_buffercache_pages);
PG_FUNCTION_INFO_V1(pg_buffercache_numa_pages);
PG_FUNCTION_INFO_V1(pg_buffercache_summary);
PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
PG_FUNCTION_INFO_V1(pg_buffercache_evict);
PG_FUNCTION_INFO_V1(pg_buffercache_evict_relation);
PG_FUNCTION_INFO_V1(pg_buffercache_evict_all);
/* Only need to touch memory once per backend process lifetime */
static bool firstNumaTouch = true;
Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
MemoryContext oldcontext;
BufferCachePagesContext *fctx; /* User function context. */
TupleDesc tupledesc;
TupleDesc expected_tupledesc;
HeapTuple tuple;
if (SRF_IS_FIRSTCALL())
{
int i;
funcctx = SRF_FIRSTCALL_INIT();
/* Switch context when allocating stuff to be used in later calls */
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
/* Create a user function context for cross-call persistence */
fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
/*
* To smoothly support upgrades from version 1.0 of this extension
* transparently handle the (non-)existence of the pinning_backends
* column. We unfortunately have to get the result type for that... -
* we can't use the result type determined by the function definition
* without potentially crashing when somebody uses the old (or even
* wrong) function definition though.
*/
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
elog(ERROR, "incorrect number of output arguments");
/* Construct a tuple descriptor for the result rows. */
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
INT4OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
INT2OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
INT8OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
BOOLOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
INT2OID, -1, 0);
if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
INT4OID, -1, 0);
fctx->tupdesc = BlessTupleDesc(tupledesc);
/* Allocate NBuffers worth of BufferCachePagesRec records. */
fctx->record = (BufferCachePagesRec *)
MemoryContextAllocHuge(CurrentMemoryContext,
sizeof(BufferCachePagesRec) * NBuffers);
/* Set max calls and remember the user function context. */
funcctx->max_calls = NBuffers;
funcctx->user_fctx = fctx;
/* Return to original context when allocating transient memory */
MemoryContextSwitchTo(oldcontext);
/*
* Scan through all the buffers, saving the relevant fields in the
* fctx->record structure.
*
* We don't hold the partition locks, so we don't get a consistent
* snapshot across all buffers, but we do grab the buffer header
* locks, so the information of each buffer is self-consistent.
*/
for (i = 0; i < NBuffers; i++)
{
BufferDesc *bufHdr;
uint32 buf_state;
bufHdr = GetBufferDescriptor(i);
/* Lock each buffer header before inspecting. */
buf_state = LockBufHdr(bufHdr);
fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
fctx->record[i].reltablespace = bufHdr->tag.spcOid;
fctx->record[i].reldatabase = bufHdr->tag.dbOid;
fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
fctx->record[i].blocknum = bufHdr->tag.blockNum;
fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
if (buf_state & BM_DIRTY)
fctx->record[i].isdirty = true;
else
fctx->record[i].isdirty = false;
/* Note if the buffer is valid, and has storage created */
if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
fctx->record[i].isvalid = true;
else
fctx->record[i].isvalid = false;
UnlockBufHdr(bufHdr, buf_state);
}
}
funcctx = SRF_PERCALL_SETUP();
/* Get the saved state */
fctx = funcctx->user_fctx;
if (funcctx->call_cntr < funcctx->max_calls)
{
uint32 i = funcctx->call_cntr;
Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
values[0] = Int32GetDatum(fctx->record[i].bufferid);
nulls[0] = false;
/*
* Set all fields except the bufferid to null if the buffer is unused
* or not valid.
*/
if (fctx->record[i].blocknum == InvalidBlockNumber ||
fctx->record[i].isvalid == false)
{
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
nulls[4] = true;
nulls[5] = true;
nulls[6] = true;
nulls[7] = true;
/* unused for v1.0 callers, but the array is always long enough */
nulls[8] = true;
}
else
{
values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
nulls[1] = false;
values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
nulls[2] = false;
values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
nulls[3] = false;
values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
nulls[4] = false;
values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
nulls[5] = false;
values[6] = BoolGetDatum(fctx->record[i].isdirty);
nulls[6] = false;
values[7] = Int16GetDatum(fctx->record[i].usagecount);
nulls[7] = false;
/* unused for v1.0 callers, but the array is always long enough */
values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
nulls[8] = false;
}
/* Build and return the tuple. */
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
SRF_RETURN_NEXT(funcctx, result);
}
else
SRF_RETURN_DONE(funcctx);
}
/*
* Inquire about NUMA memory mappings for shared buffers.
*
* Returns NUMA node ID for each memory page used by the buffer. Buffers may
* be smaller or larger than OS memory pages. For each buffer we return one
* entry for each memory page used by the buffer (if the buffer is smaller,
* it only uses a part of one memory page).
*
* We expect both sizes (for buffers and memory pages) to be a power-of-2, so
* one is always a multiple of the other.
*
* In order to get reliable results we also need to touch memory pages, so
* that the inquiry about NUMA memory node doesn't return -2 (which indicates
* unmapped/unallocated pages).
*/
Datum
pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
MemoryContext oldcontext;
BufferCacheNumaContext *fctx; /* User function context. */
TupleDesc tupledesc;
TupleDesc expected_tupledesc;
HeapTuple tuple;
Datum result;
if (SRF_IS_FIRSTCALL())
{
int i,
idx;
Size os_page_size;
void **os_page_ptrs;
int *os_page_status;
uint64 os_page_count;
int pages_per_buffer;
int max_entries;
volatile uint64 touch pg_attribute_unused();
char *startptr,
*endptr;
if (pg_numa_init() == -1)
elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
/*
* The database block size and OS memory page size are unlikely to be
* the same. The block size is 1-32KB, the memory page size depends on
* platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
* there are also features like THP etc. Moreover, we don't quite know
* how the pages and buffers "align" in memory - the buffers may be
* shifted in some way, using more memory pages than necessary.
*
* So we need to be careful about mapping buffers to memory pages. We
* calculate the maximum number of pages a buffer might use, so that
* we allocate enough space for the entries. And then we count the
* actual number of entries as we scan the buffers.
*
* This information is needed before calling move_pages() for NUMA
* node id inquiry.
*/
os_page_size = pg_get_shmem_pagesize();
/*
* The pages and block size is expected to be 2^k, so one divides the
* other (we don't know in which direction). This does not say
* anything about relative alignment of pages/buffers.
*/
Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
/*
* How many addresses we are going to query? Simply get the page for
* the first buffer, and first page after the last buffer, and count
* the pages from that.
*/
startptr = (char *) TYPEALIGN_DOWN(os_page_size,
BufferGetBlock(1));
endptr = (char *) TYPEALIGN(os_page_size,
(char *) BufferGetBlock(NBuffers) + BLCKSZ);
os_page_count = (endptr - startptr) / os_page_size;
/* Used to determine the NUMA node for all OS pages at once */
os_page_ptrs = palloc0(sizeof(void *) * os_page_count);
os_page_status = palloc(sizeof(uint64) * os_page_count);
/* Fill pointers for all the memory pages. */
idx = 0;
for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
{
os_page_ptrs[idx++] = ptr;
/* Only need to touch memory once per backend process lifetime */
if (firstNumaTouch)
pg_numa_touch_mem_if_required(touch, ptr);
}
Assert(idx == os_page_count);
elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
"os_page_size=%zu", NBuffers, os_page_count, os_page_size);
/*
* If we ever get 0xff back from kernel inquiry, then we probably have
* bug in our buffers to OS page mapping code here.
*/
memset(os_page_status, 0xff, sizeof(int) * os_page_count);
/* Query NUMA status for all the pointers */
if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
elog(ERROR, "failed NUMA pages inquiry: %m");
/* Initialize the multi-call context, load entries about buffers */
funcctx = SRF_FIRSTCALL_INIT();
/* Switch context when allocating stuff to be used in later calls */
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
/* Create a user function context for cross-call persistence */
fctx = (BufferCacheNumaContext *) palloc(sizeof(BufferCacheNumaContext));
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
if (expected_tupledesc->natts != NUM_BUFFERCACHE_NUMA_ELEM)
elog(ERROR, "incorrect number of output arguments");
/* Construct a tuple descriptor for the result rows. */
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
INT4OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
INT8OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
INT4OID, -1, 0);
fctx->tupdesc = BlessTupleDesc(tupledesc);
/*
* Each buffer needs at least one entry, but it might be offset in
* some way, and use one extra entry. So we allocate space for the
* maximum number of entries we might need, and then count the exact
* number as we're walking buffers. That way we can do it in one pass,
* without reallocating memory.
*/
pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
max_entries = NBuffers * pages_per_buffer;
/* Allocate entries for BufferCachePagesRec records. */
fctx->record = (BufferCacheNumaRec *)
MemoryContextAllocHuge(CurrentMemoryContext,
sizeof(BufferCacheNumaRec) * max_entries);
/* Return to original context when allocating transient memory */
MemoryContextSwitchTo(oldcontext);
if (firstNumaTouch)
elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
/*
* Scan through all the buffers, saving the relevant fields in the
* fctx->record structure.
*
* We don't hold the partition locks, so we don't get a consistent
* snapshot across all buffers, but we do grab the buffer header
* locks, so the information of each buffer is self-consistent.
*
* This loop touches and stores addresses into os_page_ptrs[] as input
* to one big move_pages(2) inquiry system call. Basically we ask for
* all memory pages for NBuffers.
*/
startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
idx = 0;
for (i = 0; i < NBuffers; i++)
{
char *buffptr = (char *) BufferGetBlock(i + 1);
BufferDesc *bufHdr;
uint32 buf_state;
uint32 bufferid;
int32 page_num;
char *startptr_buff,
*endptr_buff;
CHECK_FOR_INTERRUPTS();
bufHdr = GetBufferDescriptor(i);
/* Lock each buffer header before inspecting. */
buf_state = LockBufHdr(bufHdr);
bufferid = BufferDescriptorGetBuffer(bufHdr);
UnlockBufHdr(bufHdr, buf_state);
/* start of the first page of this buffer */
startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
/* end of the buffer (no need to align to memory page) */
endptr_buff = buffptr + BLCKSZ;
Assert(startptr_buff < endptr_buff);
/* calculate ID of the first page for this buffer */
page_num = (startptr_buff - startptr) / os_page_size;
/* Add an entry for each OS page overlapping with this buffer. */
for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
{
fctx->record[idx].bufferid = bufferid;
fctx->record[idx].page_num = page_num;
fctx->record[idx].numa_node = os_page_status[page_num];
/* advance to the next entry/page */
++idx;
++page_num;
}
}
Assert((idx >= os_page_count) && (idx <= max_entries));
/* Set max calls and remember the user function context. */
funcctx->max_calls = idx;
funcctx->user_fctx = fctx;
/* Remember this backend touched the pages */
firstNumaTouch = false;
}
funcctx = SRF_PERCALL_SETUP();
/* Get the saved state */
fctx = funcctx->user_fctx;
if (funcctx->call_cntr < funcctx->max_calls)
{
uint32 i = funcctx->call_cntr;
Datum values[NUM_BUFFERCACHE_NUMA_ELEM];
bool nulls[NUM_BUFFERCACHE_NUMA_ELEM];
values[0] = Int32GetDatum(fctx->record[i].bufferid);
nulls[0] = false;
values[1] = Int64GetDatum(fctx->record[i].page_num);
nulls[1] = false;
values[2] = Int32GetDatum(fctx->record[i].numa_node);
nulls[2] = false;
/* Build and return the tuple. */
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
SRF_RETURN_NEXT(funcctx, result);
}
else
SRF_RETURN_DONE(funcctx);
}
Datum
pg_buffercache_summary(PG_FUNCTION_ARGS)
{
Datum result;
TupleDesc tupledesc;
HeapTuple tuple;
Datum values[NUM_BUFFERCACHE_SUMMARY_ELEM];
bool nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
int32 buffers_used = 0;
int32 buffers_unused = 0;
int32 buffers_dirty = 0;
int32 buffers_pinned = 0;
int64 usagecount_total = 0;
if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
for (int i = 0; i < NBuffers; i++)
{
BufferDesc *bufHdr;
uint32 buf_state;
/*
* This function summarizes the state of all headers. Locking the
* buffer headers wouldn't provide an improved result as the state of
* the buffer can still change after we release the lock and it'd
* noticeably increase the cost of the function.
*/
bufHdr = GetBufferDescriptor(i);
buf_state = pg_atomic_read_u32(&bufHdr->state);
if (buf_state & BM_VALID)
{
buffers_used++;
usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
if (buf_state & BM_DIRTY)
buffers_dirty++;
}
else
buffers_unused++;
if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
buffers_pinned++;
}
memset(nulls, 0, sizeof(nulls));
values[0] = Int32GetDatum(buffers_used);
values[1] = Int32GetDatum(buffers_unused);
values[2] = Int32GetDatum(buffers_dirty);
values[3] = Int32GetDatum(buffers_pinned);
if (buffers_used != 0)
values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
else
nulls[4] = true;
/* Build and return the tuple. */
tuple = heap_form_tuple(tupledesc, values, nulls);
result = HeapTupleGetDatum(tuple);
PG_RETURN_DATUM(result);
}
Datum
pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
InitMaterializedSRF(fcinfo, 0);
for (int i = 0; i < NBuffers; i++)
{
BufferDesc *bufHdr = GetBufferDescriptor(i);
uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
int usage_count;
usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
usage_counts[usage_count]++;
if (buf_state & BM_DIRTY)
dirty[usage_count]++;
if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
pinned[usage_count]++;
}
for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
{
values[0] = Int32GetDatum(i);
values[1] = Int32GetDatum(usage_counts[i]);
values[2] = Int32GetDatum(dirty[i]);
values[3] = Int32GetDatum(pinned[i]);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
return (Datum) 0;
}
/*
* Helper function to check if the user has superuser privileges.
*/
static void
pg_buffercache_superuser_check(char *func_name)
{
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use %s()",
func_name)));
}
/*
* Try to evict a shared buffer.
*/
Datum
pg_buffercache_evict(PG_FUNCTION_ARGS)
{
Datum result;
TupleDesc tupledesc;
HeapTuple tuple;
Datum values[NUM_BUFFERCACHE_EVICT_ELEM];
bool nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
Buffer buf = PG_GETARG_INT32(0);
bool buffer_flushed;
if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
pg_buffercache_superuser_check("pg_buffercache_evict");
if (buf < 1 || buf > NBuffers)
elog(ERROR, "bad buffer ID: %d", buf);
values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
values[1] = BoolGetDatum(buffer_flushed);
tuple = heap_form_tuple(tupledesc, values, nulls);
result = HeapTupleGetDatum(tuple);
PG_RETURN_DATUM(result);
}
/*
* Try to evict specified relation.
*/
Datum
pg_buffercache_evict_relation(PG_FUNCTION_ARGS)
{
Datum result;
TupleDesc tupledesc;
HeapTuple tuple;
Datum values[NUM_BUFFERCACHE_EVICT_RELATION_ELEM];
bool nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
Oid relOid;
Relation rel;
int32 buffers_evicted = 0;
int32 buffers_flushed = 0;
int32 buffers_skipped = 0;
if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
pg_buffercache_superuser_check("pg_buffercache_evict_relation");
relOid = PG_GETARG_OID(0);
rel = relation_open(relOid, AccessShareLock);
if (RelationUsesLocalBuffers(rel))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
"pg_buffercache_evict_relation")));
EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
&buffers_skipped);
relation_close(rel, AccessShareLock);
values[0] = Int32GetDatum(buffers_evicted);
values[1] = Int32GetDatum(buffers_flushed);
values[2] = Int32GetDatum(buffers_skipped);
tuple = heap_form_tuple(tupledesc, values, nulls);
result = HeapTupleGetDatum(tuple);
PG_RETURN_DATUM(result);
}
/*
* Try to evict all shared buffers.
*/
Datum
pg_buffercache_evict_all(PG_FUNCTION_ARGS)
{
Datum result;
TupleDesc tupledesc;
HeapTuple tuple;
Datum values[NUM_BUFFERCACHE_EVICT_ALL_ELEM];
bool nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
int32 buffers_evicted = 0;
int32 buffers_flushed = 0;
int32 buffers_skipped = 0;
if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
pg_buffercache_superuser_check("pg_buffercache_evict_all");
EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
&buffers_skipped);
values[0] = Int32GetDatum(buffers_evicted);
values[1] = Int32GetDatum(buffers_flushed);
values[2] = Int32GetDatum(buffers_skipped);
tuple = heap_form_tuple(tupledesc, values, nulls);
result = HeapTupleGetDatum(tuple);
PG_RETURN_DATUM(result);
}