mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
We have been using the term RelFileNode to refer to either (1) the integer that is used to name the sequence of files for a certain relation within the directory set aside for that tablespace/database combination; or (2) that value plus the OIDs of the tablespace and database; or occasionally (3) the whole series of files created for a relation based on those values. Using the same name for more than one thing is confusing. Replace RelFileNode with RelFileNumber when we're talking about just the single number, i.e. (1) from above, and with RelFileLocator when we're talking about all the things that are needed to locate a relation's files on disk, i.e. (2) from above. In the places where we refer to (3) as a relfilenode, instead refer to "relation storage". Since there is a ton of SQL code in the world that knows about pg_class.relfilenode, don't change the name of that column, or of other SQL-facing things that derive their name from it. On the other hand, do adjust closely-related internal terminology. For example, the structure member names dbNode and spcNode appear to be derived from the fact that the structure itself was called RelFileNode, so change those to dbOid and spcOid. Likewise, various variables with names like rnode and relnode get renamed appropriately, according to how they're being used in context. Hopefully, this is clearer than before. It is also preparation for future patches that intend to widen the relfilenumber fields from its current width of 32 bits. Variables that store a relfilenumber are now declared as type RelFileNumber rather than type Oid; right now, these are the same, but that can now more easily be changed. Dilip Kumar, per an idea from me. Reviewed also by Andres Freund. I fixed some whitespace issues, changed a couple of words in a comment, and made one other minor correction. Discussion: http://postgr.es/m/CA+TgmoamOtXbVAQf9hWFzonUo6bhhjS6toZQd7HZ-pmojtAmag@mail.gmail.com Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com Discussion: http://postgr.es/m/CAFiTN-vTe79M8uDH1yprOU64MNFE+R3ODRuA+JWf27JbhY4hJw@mail.gmail.com
240 lines
7.2 KiB
C
240 lines
7.2 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* pg_buffercache_pages.c
|
|
* display some contents of the buffer cache
|
|
*
|
|
* contrib/pg_buffercache/pg_buffercache_pages.c
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/htup_details.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "funcapi.h"
|
|
#include "storage/buf_internals.h"
|
|
#include "storage/bufmgr.h"
|
|
|
|
|
|
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
|
|
#define NUM_BUFFERCACHE_PAGES_ELEM 9
|
|
|
|
PG_MODULE_MAGIC;
|
|
|
|
/*
|
|
* Record structure holding the to be exposed cache data.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint32 bufferid;
|
|
RelFileNumber relfilenumber;
|
|
Oid reltablespace;
|
|
Oid reldatabase;
|
|
ForkNumber forknum;
|
|
BlockNumber blocknum;
|
|
bool isvalid;
|
|
bool isdirty;
|
|
uint16 usagecount;
|
|
|
|
/*
|
|
* An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
|
|
* being pinned by too many backends and each backend will only pin once
|
|
* because of bufmgr.c's PrivateRefCount infrastructure.
|
|
*/
|
|
int32 pinning_backends;
|
|
} BufferCachePagesRec;
|
|
|
|
|
|
/*
|
|
* Function context for data persisting over repeated calls.
|
|
*/
|
|
typedef struct
|
|
{
|
|
TupleDesc tupdesc;
|
|
BufferCachePagesRec *record;
|
|
} BufferCachePagesContext;
|
|
|
|
|
|
/*
|
|
* Function returning data from the shared buffer cache - buffer number,
|
|
* relation node/tablespace/database/blocknum and dirty indicator.
|
|
*/
|
|
PG_FUNCTION_INFO_V1(pg_buffercache_pages);
|
|
|
|
Datum
|
|
pg_buffercache_pages(PG_FUNCTION_ARGS)
|
|
{
|
|
FuncCallContext *funcctx;
|
|
Datum result;
|
|
MemoryContext oldcontext;
|
|
BufferCachePagesContext *fctx; /* User function context. */
|
|
TupleDesc tupledesc;
|
|
TupleDesc expected_tupledesc;
|
|
HeapTuple tuple;
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
{
|
|
int i;
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
|
|
/* Switch context when allocating stuff to be used in later calls */
|
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
|
|
|
/* Create a user function context for cross-call persistence */
|
|
fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
|
|
|
|
/*
|
|
* To smoothly support upgrades from version 1.0 of this extension
|
|
* transparently handle the (non-)existence of the pinning_backends
|
|
* column. We unfortunately have to get the result type for that... -
|
|
* we can't use the result type determined by the function definition
|
|
* without potentially crashing when somebody uses the old (or even
|
|
* wrong) function definition though.
|
|
*/
|
|
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
|
|
expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
|
|
elog(ERROR, "incorrect number of output arguments");
|
|
|
|
/* Construct a tuple descriptor for the result rows. */
|
|
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
|
|
INT4OID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
|
|
OIDOID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
|
|
OIDOID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
|
|
OIDOID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
|
|
INT2OID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
|
|
INT8OID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
|
|
BOOLOID, -1, 0);
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
|
|
INT2OID, -1, 0);
|
|
|
|
if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
|
|
TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
|
|
INT4OID, -1, 0);
|
|
|
|
fctx->tupdesc = BlessTupleDesc(tupledesc);
|
|
|
|
/* Allocate NBuffers worth of BufferCachePagesRec records. */
|
|
fctx->record = (BufferCachePagesRec *)
|
|
MemoryContextAllocHuge(CurrentMemoryContext,
|
|
sizeof(BufferCachePagesRec) * NBuffers);
|
|
|
|
/* Set max calls and remember the user function context. */
|
|
funcctx->max_calls = NBuffers;
|
|
funcctx->user_fctx = fctx;
|
|
|
|
/* Return to original context when allocating transient memory */
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
/*
|
|
* Scan through all the buffers, saving the relevant fields in the
|
|
* fctx->record structure.
|
|
*
|
|
* We don't hold the partition locks, so we don't get a consistent
|
|
* snapshot across all buffers, but we do grab the buffer header
|
|
* locks, so the information of each buffer is self-consistent.
|
|
*/
|
|
for (i = 0; i < NBuffers; i++)
|
|
{
|
|
BufferDesc *bufHdr;
|
|
uint32 buf_state;
|
|
|
|
bufHdr = GetBufferDescriptor(i);
|
|
/* Lock each buffer header before inspecting. */
|
|
buf_state = LockBufHdr(bufHdr);
|
|
|
|
fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
|
|
fctx->record[i].relfilenumber = bufHdr->tag.rlocator.relNumber;
|
|
fctx->record[i].reltablespace = bufHdr->tag.rlocator.spcOid;
|
|
fctx->record[i].reldatabase = bufHdr->tag.rlocator.dbOid;
|
|
fctx->record[i].forknum = bufHdr->tag.forkNum;
|
|
fctx->record[i].blocknum = bufHdr->tag.blockNum;
|
|
fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
|
|
fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
|
|
|
|
if (buf_state & BM_DIRTY)
|
|
fctx->record[i].isdirty = true;
|
|
else
|
|
fctx->record[i].isdirty = false;
|
|
|
|
/* Note if the buffer is valid, and has storage created */
|
|
if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
|
|
fctx->record[i].isvalid = true;
|
|
else
|
|
fctx->record[i].isvalid = false;
|
|
|
|
UnlockBufHdr(bufHdr, buf_state);
|
|
}
|
|
}
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
/* Get the saved state */
|
|
fctx = funcctx->user_fctx;
|
|
|
|
if (funcctx->call_cntr < funcctx->max_calls)
|
|
{
|
|
uint32 i = funcctx->call_cntr;
|
|
Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
|
|
bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
|
|
|
|
values[0] = Int32GetDatum(fctx->record[i].bufferid);
|
|
nulls[0] = false;
|
|
|
|
/*
|
|
* Set all fields except the bufferid to null if the buffer is unused
|
|
* or not valid.
|
|
*/
|
|
if (fctx->record[i].blocknum == InvalidBlockNumber ||
|
|
fctx->record[i].isvalid == false)
|
|
{
|
|
nulls[1] = true;
|
|
nulls[2] = true;
|
|
nulls[3] = true;
|
|
nulls[4] = true;
|
|
nulls[5] = true;
|
|
nulls[6] = true;
|
|
nulls[7] = true;
|
|
/* unused for v1.0 callers, but the array is always long enough */
|
|
nulls[8] = true;
|
|
}
|
|
else
|
|
{
|
|
values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
|
|
nulls[1] = false;
|
|
values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
|
|
nulls[2] = false;
|
|
values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
|
|
nulls[3] = false;
|
|
values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
|
|
nulls[4] = false;
|
|
values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
|
|
nulls[5] = false;
|
|
values[6] = BoolGetDatum(fctx->record[i].isdirty);
|
|
nulls[6] = false;
|
|
values[7] = Int16GetDatum(fctx->record[i].usagecount);
|
|
nulls[7] = false;
|
|
/* unused for v1.0 callers, but the array is always long enough */
|
|
values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
|
|
nulls[8] = false;
|
|
}
|
|
|
|
/* Build and return the tuple. */
|
|
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
}
|
|
else
|
|
SRF_RETURN_DONE(funcctx);
|
|
}
|