mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to cache lines is important for scalability; especially on bigger, multi-socket, machines. Currently the array sometimes already happens to be aligned by happenstance, depending how large previous shared memory allocations were. That can lead to wildly varying performance results after minor configuration changes. In addition to aligning the start of descriptor array, also force the size of individual descriptors to be of a common cache line size (64 bytes). That happens to already be the case on 64bit platforms, but this way we can change the struct BufferDesc more easily. As the alignment primarily matters in highly concurrent workloads which probably all are 64bit these days, and the space wastage of element alignment would be a bit more noticeable on 32bit systems, we don't force the stride to be cacheline sized on 32bit platforms for now. If somebody does actual performance testing, we can reevaluate that decision by changing the definition of BUFFERDESC_PADDED_SIZE. Discussion: 20140202151319.GD32123@awork2.anarazel.de Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter Geoghegan.
This commit is contained in:
@@ -134,7 +134,7 @@ typedef struct buftag
|
||||
* We use this same struct for local buffer headers, but the lock fields
|
||||
* are not used and not all of the flag bits are useful either.
|
||||
*/
|
||||
typedef struct sbufdesc
|
||||
typedef struct BufferDesc
|
||||
{
|
||||
BufferTag tag; /* ID of page contained in buffer */
|
||||
BufFlags flags; /* see bit definitions above */
|
||||
@@ -151,6 +151,37 @@ typedef struct sbufdesc
|
||||
LWLock *content_lock; /* to lock access to buffer contents */
|
||||
} BufferDesc;
|
||||
|
||||
/*
|
||||
* Concurrent access to buffer headers has proven to be more efficient if
|
||||
* they're cache line aligned. So we force the start of the BufferDescriptors
|
||||
* array to be on a cache line boundary and force the elements to be cache
|
||||
* line sized.
|
||||
*
|
||||
* XXX: As this is primarily matters in highly concurrent workloads which
|
||||
* probably all are 64bit these days, and the space wastage would be a bit
|
||||
* more noticeable on 32bit systems, we don't force the stride to be cache
|
||||
* line sized on those. If somebody does actual performance testing, we can
|
||||
* reevaluate.
|
||||
*
|
||||
* Note that local buffer descriptors aren't forced to be aligned - as there's
|
||||
* no concurrent access to those it's unlikely to be beneficial.
|
||||
*
|
||||
* We use 64bit as the cache line size here, because that's the most common
|
||||
* size. Making it bigger would be a waste of memory. Even if running on a
|
||||
* platform with either 32 or 128 byte line sizes, it's good to align to
|
||||
* boundaries and avoid false sharing.
|
||||
*/
|
||||
#define BUFFERDESC_PAD_TO_SIZE (SIZEOF_VOID_P == 8 ? 64 : 1)
|
||||
|
||||
typedef union BufferDescPadded
|
||||
{
|
||||
BufferDesc bufferdesc;
|
||||
char pad[BUFFERDESC_PAD_TO_SIZE];
|
||||
} BufferDescPadded;
|
||||
|
||||
#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
|
||||
#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
|
||||
|
||||
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
|
||||
|
||||
/*
|
||||
@@ -174,7 +205,7 @@ typedef struct sbufdesc
|
||||
|
||||
|
||||
/* in buf_init.c */
|
||||
extern PGDLLIMPORT BufferDesc *BufferDescriptors;
|
||||
extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
|
||||
|
||||
/* in localbuf.c */
|
||||
extern BufferDesc *LocalBufferDescriptors;
|
||||
|
Reference in New Issue
Block a user