diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index d00f9859088..98016fc365e 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -73,7 +73,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { int i; - volatile BufferDesc *bufHdr; funcctx = SRF_FIRSTCALL_INIT(); @@ -146,8 +145,11 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) * Scan though all the buffers, saving the relevant fields in the * fctx->record structure. */ - for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) + for (i = 0; i < NBuffers; i++) { + volatile BufferDesc *bufHdr; + + bufHdr = GetBufferDescriptor(i); /* Lock each buffer header before inspecting. */ LockBufHdr(bufHdr); diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 4434bc36f18..0cd2530be99 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -18,7 +18,7 @@ #include "storage/buf_internals.h" -BufferDesc *BufferDescriptors; +BufferDescPadded *BufferDescriptors; char *BufferBlocks; @@ -67,9 +67,11 @@ InitBufferPool(void) bool foundBufs, foundDescs; - BufferDescriptors = (BufferDesc *) + /* Align descriptors to a cacheline boundary. */ + BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN( ShmemInitStruct("Buffer Descriptors", - NBuffers * sizeof(BufferDesc), &foundDescs); + NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE, + &foundDescs)); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", @@ -83,16 +85,15 @@ InitBufferPool(void) } else { - BufferDesc *buf; int i; - buf = BufferDescriptors; - /* * Initialize all the buffer headers. */ - for (i = 0; i < NBuffers; buf++, i++) + for (i = 0; i < NBuffers; i++) { + BufferDesc *buf = GetBufferDescriptor(i); + CLEAR_BUFFERTAG(buf->tag); buf->flags = 0; buf->usage_count = 0; @@ -114,7 +115,7 @@ InitBufferPool(void) } /* Correct last entry of linked list */ - BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST; + GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; } /* Init other shared buffer-management stuff */ @@ -133,7 +134,9 @@ BufferShmemSize(void) Size size = 0; /* size of buffer descriptors */ - size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc))); + size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); + /* to allow aligning buffer descriptors */ + size = add_size(size, PG_CACHE_LINE_SIZE); /* size of data pages */ size = add_size(size, mul_size(NBuffers, BLCKSZ)); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7430407788b..e1e6240fe3e 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -898,7 +898,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * buffer pool, and check to see if the correct data has been loaded * into the buffer. */ - buf = &BufferDescriptors[buf_id]; + buf = GetBufferDescriptor(buf_id); valid = PinBuffer(buf, strategy); @@ -1105,7 +1105,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* remaining code should match code at top of routine */ - buf = &BufferDescriptors[buf_id]; + buf = GetBufferDescriptor(buf_id); valid = PinBuffer(buf, strategy); @@ -1328,7 +1328,7 @@ MarkBufferDirty(Buffer buffer) return; } - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); Assert(BufferIsPinned(buffer)); /* unfortunately we can't check if the lock is held exclusively */ @@ -1380,7 +1380,7 @@ ReleaseAndReadBuffer(Buffer buffer, Assert(BufferIsPinned(buffer)); if (BufferIsLocal(buffer)) { - bufHdr = &LocalBufferDescriptors[-buffer - 1]; + bufHdr = GetLocalBufferDescriptor(-buffer - 1); if (bufHdr->tag.blockNum == blockNum && RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) && bufHdr->tag.forkNum == forkNum) @@ -1390,7 +1390,7 @@ ReleaseAndReadBuffer(Buffer buffer, } else { - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); /* we have pin, so it's ok to examine tag without spinlock */ if (bufHdr->tag.blockNum == blockNum && RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) && @@ -1609,7 +1609,7 @@ BufferSync(int flags) num_to_write = 0; for (buf_id = 0; buf_id < NBuffers; buf_id++) { - volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id); /* * Header spinlock is enough to examine BM_DIRTY, see comment in @@ -1644,7 +1644,7 @@ BufferSync(int flags) num_written = 0; while (num_to_scan-- > 0) { - volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id); /* * We don't need to acquire the lock here, because we're only looking @@ -2016,7 +2016,7 @@ BgBufferSync(void) static int SyncOneBuffer(int buf_id, bool skip_recently_used) { - volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id); int result = 0; ReservePrivateRefCountEntry(); @@ -2196,13 +2196,13 @@ PrintBufferLeakWarning(Buffer buffer) Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) { - buf = &LocalBufferDescriptors[-buffer - 1]; + buf = GetLocalBufferDescriptor(-buffer - 1); loccount = LocalRefCount[-buffer - 1]; backend = MyBackendId; } else { - buf = &BufferDescriptors[buffer - 1]; + buf = GetBufferDescriptor(buffer - 1); loccount = GetPrivateRefCount(buffer); backend = InvalidBackendId; } @@ -2265,9 +2265,9 @@ BufferGetBlockNumber(Buffer buffer) Assert(BufferIsPinned(buffer)); if (BufferIsLocal(buffer)) - bufHdr = &(LocalBufferDescriptors[-buffer - 1]); + bufHdr = GetLocalBufferDescriptor(-buffer - 1); else - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); /* pinned, so OK to read tag without spinlock */ return bufHdr->tag.blockNum; @@ -2288,9 +2288,9 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, Assert(BufferIsPinned(buffer)); if (BufferIsLocal(buffer)) - bufHdr = &(LocalBufferDescriptors[-buffer - 1]); + bufHdr = GetLocalBufferDescriptor(-buffer - 1); else - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); /* pinned, so OK to read tag without spinlock */ *rnode = bufHdr->tag.rnode; @@ -2473,7 +2473,7 @@ BufferIsPermanent(Buffer buffer) * changing an aligned 2-byte BufFlags value is atomic, so we'll read the * old value or the new value, but not random garbage. */ - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); return (bufHdr->flags & BM_PERMANENT) != 0; } @@ -2486,7 +2486,7 @@ BufferIsPermanent(Buffer buffer) XLogRecPtr BufferGetLSNAtomic(Buffer buffer) { - volatile BufferDesc *bufHdr = &BufferDescriptors[buffer - 1]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1); char *page = BufferGetPage(buffer); XLogRecPtr lsn; @@ -2549,7 +2549,7 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, for (i = 0; i < NBuffers; i++) { - volatile BufferDesc *bufHdr = &BufferDescriptors[i]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(i); /* * We can make this a tad faster by prechecking the buffer tag before @@ -2639,7 +2639,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) for (i = 0; i < NBuffers; i++) { RelFileNode *rnode = NULL; - volatile BufferDesc *bufHdr = &BufferDescriptors[i]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(i); /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe @@ -2703,7 +2703,7 @@ DropDatabaseBuffers(Oid dbid) for (i = 0; i < NBuffers; i++) { - volatile BufferDesc *bufHdr = &BufferDescriptors[i]; + volatile BufferDesc *bufHdr = GetBufferDescriptor(i); /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe @@ -2732,10 +2732,11 @@ void PrintBufferDescs(void) { int i; - volatile BufferDesc *buf = BufferDescriptors; - for (i = 0; i < NBuffers; ++i, ++buf) + for (i = 0; i < NBuffers; ++i) { + volatile BufferDesc *buf = GetBufferDescriptor(i); + /* theoretically we should lock the bufhdr here */ elog(LOG, "[%02d] (freeNext=%d, rel=%s, " @@ -2753,10 +2754,11 @@ void PrintPinnedBufs(void) { int i; - volatile BufferDesc *buf = BufferDescriptors; - for (i = 0; i < NBuffers; ++i, ++buf) + for (i = 0; i < NBuffers; ++i) { + volatile BufferDesc *buf = GetBufferDescriptor(i); + if (GetPrivateRefCount(i + 1) > 0) { /* theoretically we should lock the bufhdr here */ @@ -2804,7 +2806,7 @@ FlushRelationBuffers(Relation rel) { for (i = 0; i < NLocBuffer; i++) { - bufHdr = &LocalBufferDescriptors[i]; + bufHdr = GetLocalBufferDescriptor(i); if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) && (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { @@ -2842,7 +2844,7 @@ FlushRelationBuffers(Relation rel) for (i = 0; i < NBuffers; i++) { - bufHdr = &BufferDescriptors[i]; + bufHdr = GetBufferDescriptor(i); /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe @@ -2894,7 +2896,7 @@ FlushDatabaseBuffers(Oid dbid) for (i = 0; i < NBuffers; i++) { - bufHdr = &BufferDescriptors[i]; + bufHdr = GetBufferDescriptor(i); /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe @@ -2938,7 +2940,7 @@ ReleaseBuffer(Buffer buffer) return; } - UnpinBuffer(&BufferDescriptors[buffer - 1], true); + UnpinBuffer(GetBufferDescriptor(buffer - 1), true); } /* @@ -3007,7 +3009,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) return; } - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); Assert(GetPrivateRefCount(buffer) > 0); /* here, either share or exclusive lock is OK */ @@ -3161,7 +3163,7 @@ LockBuffer(Buffer buffer, int mode) if (BufferIsLocal(buffer)) return; /* local buffers need no lock */ - buf = &(BufferDescriptors[buffer - 1]); + buf = GetBufferDescriptor(buffer - 1); if (mode == BUFFER_LOCK_UNLOCK) LWLockRelease(buf->content_lock); @@ -3187,7 +3189,7 @@ ConditionalLockBuffer(Buffer buffer) if (BufferIsLocal(buffer)) return true; /* act as though we got it */ - buf = &(BufferDescriptors[buffer - 1]); + buf = GetBufferDescriptor(buffer - 1); return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); } @@ -3231,7 +3233,7 @@ LockBufferForCleanup(Buffer buffer) elog(ERROR, "incorrect local pin count: %d", GetPrivateRefCount(buffer)); - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); for (;;) { @@ -3332,7 +3334,7 @@ ConditionalLockBufferForCleanup(Buffer buffer) if (!ConditionalLockBuffer(buffer)) return false; - bufHdr = &BufferDescriptors[buffer - 1]; + bufHdr = GetBufferDescriptor(buffer - 1); LockBufHdr(bufHdr); Assert(bufHdr->refcount > 0); if (bufHdr->refcount == 1) diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 3add619b5da..0d1cbd15531 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -259,7 +259,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy) break; } - buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; + buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer); Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ @@ -296,7 +296,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy) for (;;) { - buf = &BufferDescriptors[ClockSweepTick()]; + buf = GetBufferDescriptor(ClockSweepTick()); /* * If the buffer is pinned or has a nonzero usage_count, we cannot use @@ -614,7 +614,7 @@ GetBufferFromRing(BufferAccessStrategy strategy) * higher usage_count indicates someone else has touched the buffer, so we * shouldn't re-use it. */ - buf = &BufferDescriptors[bufnum - 1]; + buf = GetBufferDescriptor(bufnum - 1); LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count <= 1) { diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 1fc0af386cb..3144afe37bc 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -122,7 +122,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, if (hresult) { b = hresult->id; - bufHdr = &LocalBufferDescriptors[b]; + bufHdr = GetLocalBufferDescriptor(b); Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag)); #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", @@ -165,7 +165,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, if (++nextFreeLocalBuf >= NLocBuffer) nextFreeLocalBuf = 0; - bufHdr = &LocalBufferDescriptors[b]; + bufHdr = GetLocalBufferDescriptor(b); if (LocalRefCount[b] == 0) { @@ -278,7 +278,7 @@ MarkLocalBufferDirty(Buffer buffer) Assert(LocalRefCount[bufid] > 0); - bufHdr = &LocalBufferDescriptors[bufid]; + bufHdr = GetLocalBufferDescriptor(bufid); if (!(bufHdr->flags & BM_DIRTY)) pgBufferUsage.local_blks_dirtied++; @@ -305,7 +305,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, for (i = 0; i < NLocBuffer; i++) { - BufferDesc *bufHdr = &LocalBufferDescriptors[i]; + BufferDesc *bufHdr = GetLocalBufferDescriptor(i); LocalBufferLookupEnt *hresult; if ((bufHdr->flags & BM_TAG_VALID) && @@ -347,7 +347,7 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode) for (i = 0; i < NLocBuffer; i++) { - BufferDesc *bufHdr = &LocalBufferDescriptors[i]; + BufferDesc *bufHdr = GetLocalBufferDescriptor(i); LocalBufferLookupEnt *hresult; if ((bufHdr->flags & BM_TAG_VALID) && @@ -400,7 +400,7 @@ InitLocalBuffers(void) /* initialize fields that need to start off nonzero */ for (i = 0; i < nbufs; i++) { - BufferDesc *buf = &LocalBufferDescriptors[i]; + BufferDesc *buf = GetLocalBufferDescriptor(i); /* * negative to indicate local buffer. This is tricky: shared buffers diff --git a/src/include/c.h b/src/include/c.h index c929d3d6aec..b1875209736 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -537,6 +537,7 @@ typedef NameData *Name; #define MAXALIGN(LEN) TYPEALIGN(MAXIMUM_ALIGNOF, (LEN)) /* MAXALIGN covers only built-in types, not buffers */ #define BUFFERALIGN(LEN) TYPEALIGN(ALIGNOF_BUFFER, (LEN)) +#define CACHELINEALIGN(LEN) TYPEALIGN(PG_CACHE_LINE_SIZE, (LEN)) #define TYPEALIGN_DOWN(ALIGNVAL,LEN) \ (((uintptr_t) (LEN)) & ~((uintptr_t) ((ALIGNVAL) - 1))) diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 9b8ace54da8..521ee1cfaea 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -134,7 +134,7 @@ typedef struct buftag * We use this same struct for local buffer headers, but the lock fields * are not used and not all of the flag bits are useful either. */ -typedef struct sbufdesc +typedef struct BufferDesc { BufferTag tag; /* ID of page contained in buffer */ BufFlags flags; /* see bit definitions above */ @@ -151,6 +151,37 @@ typedef struct sbufdesc LWLock *content_lock; /* to lock access to buffer contents */ } BufferDesc; +/* + * Concurrent access to buffer headers has proven to be more efficient if + * they're cache line aligned. So we force the start of the BufferDescriptors + * array to be on a cache line boundary and force the elements to be cache + * line sized. + * + * XXX: As this is primarily matters in highly concurrent workloads which + * probably all are 64bit these days, and the space wastage would be a bit + * more noticeable on 32bit systems, we don't force the stride to be cache + * line sized on those. If somebody does actual performance testing, we can + * reevaluate. + * + * Note that local buffer descriptors aren't forced to be aligned - as there's + * no concurrent access to those it's unlikely to be beneficial. + * + * We use 64bit as the cache line size here, because that's the most common + * size. Making it bigger would be a waste of memory. Even if running on a + * platform with either 32 or 128 byte line sizes, it's good to align to + * boundaries and avoid false sharing. + */ +#define BUFFERDESC_PAD_TO_SIZE (SIZEOF_VOID_P == 8 ? 64 : 1) + +typedef union BufferDescPadded +{ + BufferDesc bufferdesc; + char pad[BUFFERDESC_PAD_TO_SIZE]; +} BufferDescPadded; + +#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc) +#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)]) + #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) /* @@ -174,7 +205,7 @@ typedef struct sbufdesc /* in buf_init.c */ -extern PGDLLIMPORT BufferDesc *BufferDescriptors; +extern PGDLLIMPORT BufferDescPadded *BufferDescriptors; /* in localbuf.c */ extern BufferDesc *LocalBufferDescriptors;