1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-09 06:21:09 +03:00

Index SLRUs by 64-bit integers rather than by 32-bit integers

We've had repeated bugs in the area of handling SLRU wraparound in the past,
some of which have caused data loss. Switching to an indexing system for SLRUs
that does not wrap around should allow us to get rid of a whole bunch
of problems and improve the overall reliability of the system.

This particular patch however only changes the indexing and doesn't address
the wraparound per se. This is going to be done in the following patches.

Author: Maxim Orlov, Aleksander Alekseev, Alexander Korotkov, Teodor Sigaev
Author: Nikita Glukhov, Pavel Borisov, Yura Sokolov
Reviewed-by: Jacob Champion, Heikki Linnakangas, Alexander Korotkov
Reviewed-by: Japin Li, Pavel Borisov, Tom Lane, Peter Eisentraut, Andres Freund
Reviewed-by: Andrey Borodin, Dilip Kumar, Aleksander Alekseev
Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com
Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com
This commit is contained in:
Alexander Korotkov
2023-11-29 01:39:55 +02:00
parent a916b47e23
commit 4ed8f0913b
18 changed files with 303 additions and 202 deletions

View File

@@ -60,8 +60,33 @@
#include "storage/fd.h"
#include "storage/shmem.h"
#define SlruFileName(ctl, path, seg) \
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
static int inline
SlruFileName(SlruCtl ctl, char *path, int64 segno)
{
if (ctl->long_segment_names)
{
/*
* We could use 16 characters here but the disadvantage would be that
* the SLRU segments will be hard to distinguish from WAL segments.
*
* For this reason we use 15 characters. It is enough but also means
* that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
*/
Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
return snprintf(path, MAXPGPATH, "%s/%015llX", ctl->Dir,
(long long) segno);
}
else
{
/*
* Despite the fact that %04X format string is used up to 24 bit
* integers are allowed. See SlruCorrectSegmentFilenameLength()
*/
Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
(unsigned int) segno);
}
}
/*
* During SimpleLruWriteAll(), we will usually not need to write more than one
@@ -75,7 +100,7 @@ typedef struct SlruWriteAllData
{
int num_files; /* # files actually open */
int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */
int segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */
int64 segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */
} SlruWriteAllData;
typedef struct SlruWriteAllData *SlruWriteAll;
@@ -138,15 +163,16 @@ static int slru_errno;
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno);
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno,
SlruWriteAll fdata);
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid);
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno);
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
int segpage, void *data);
static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
int64 segpage, void *data);
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno);
/*
* Initialization of shared memory
@@ -162,7 +188,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
@@ -187,7 +213,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
void
SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
LWLock *ctllock, const char *subdir, int tranche_id,
SyncRequestHandler sync_handler)
SyncRequestHandler sync_handler, bool long_segment_names)
{
SlruShared shared;
bool found;
@@ -226,8 +252,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
shared->page_dirty = (bool *) (ptr + offset);
offset += MAXALIGN(nslots * sizeof(bool));
shared->page_number = (int *) (ptr + offset);
offset += MAXALIGN(nslots * sizeof(int));
shared->page_number = (int64 *) (ptr + offset);
offset += MAXALIGN(nslots * sizeof(int64));
shared->page_lru_count = (int *) (ptr + offset);
offset += MAXALIGN(nslots * sizeof(int));
@@ -266,6 +292,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
*/
ctl->shared = shared;
ctl->sync_handler = sync_handler;
ctl->long_segment_names = long_segment_names;
strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
}
@@ -278,7 +305,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
* Control lock must be held at entry, and will be held at exit.
*/
int
SimpleLruZeroPage(SlruCtl ctl, int pageno)
SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
{
SlruShared shared = ctl->shared;
int slotno;
@@ -393,7 +420,7 @@ SimpleLruWaitIO(SlruCtl ctl, int slotno)
* Control lock must be held at entry, and will be held at exit.
*/
int
SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
TransactionId xid)
{
SlruShared shared = ctl->shared;
@@ -493,7 +520,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
* It is unspecified whether the lock will be shared or exclusive.
*/
int
SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
{
SlruShared shared = ctl->shared;
int slotno;
@@ -540,7 +567,7 @@ static void
SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
{
SlruShared shared = ctl->shared;
int pageno = shared->page_number[slotno];
int64 pageno = shared->page_number[slotno];
bool ok;
/* If a write is in progress, wait for it to finish */
@@ -624,9 +651,9 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
* large enough to contain the given page.
*/
bool
SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
{
int segno = pageno / SLRU_PAGES_PER_SEGMENT;
int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
int offset = rpageno * BLCKSZ;
char path[MAXPGPATH];
@@ -682,10 +709,10 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
* read/write operations. We could cache one virtual file pointer ...
*/
static bool
SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
{
SlruShared shared = ctl->shared;
int segno = pageno / SLRU_PAGES_PER_SEGMENT;
int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
off_t offset = rpageno * BLCKSZ;
char path[MAXPGPATH];
@@ -754,10 +781,10 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
* SimpleLruWriteAll.
*/
static bool
SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
{
SlruShared shared = ctl->shared;
int segno = pageno / SLRU_PAGES_PER_SEGMENT;
int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
off_t offset = rpageno * BLCKSZ;
char path[MAXPGPATH];
@@ -929,9 +956,9 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
* SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
*/
static void
SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
{
int segno = pageno / SLRU_PAGES_PER_SEGMENT;
int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
int offset = rpageno * BLCKSZ;
char path[MAXPGPATH];
@@ -1014,7 +1041,7 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
* Control lock must be held at entry, and will be held at exit.
*/
static int
SlruSelectLRUPage(SlruCtl ctl, int pageno)
SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
{
SlruShared shared = ctl->shared;
@@ -1025,10 +1052,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
int cur_count;
int bestvalidslot = 0; /* keep compiler quiet */
int best_valid_delta = -1;
int best_valid_page_number = 0; /* keep compiler quiet */
int64 best_valid_page_number = 0; /* keep compiler quiet */
int bestinvalidslot = 0; /* keep compiler quiet */
int best_invalid_delta = -1;
int best_invalid_page_number = 0; /* keep compiler quiet */
int64 best_invalid_page_number = 0; /* keep compiler quiet */
/* See if page already has a buffer assigned */
for (slotno = 0; slotno < shared->num_slots; slotno++)
@@ -1069,7 +1096,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
for (slotno = 0; slotno < shared->num_slots; slotno++)
{
int this_delta;
int this_page_number;
int64 this_page_number;
if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
return slotno;
@@ -1159,7 +1186,7 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
SlruShared shared = ctl->shared;
SlruWriteAllData fdata;
int slotno;
int pageno = 0;
int64 pageno = 0;
int i;
bool ok;
@@ -1224,7 +1251,7 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
* after it has accrued freshly-written data.
*/
void
SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
{
SlruShared shared = ctl->shared;
int slotno;
@@ -1302,7 +1329,7 @@ restart:
* they either can't yet contain anything, or have already been cleaned out.
*/
static void
SlruInternalDeleteSegment(SlruCtl ctl, int segno)
SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
{
char path[MAXPGPATH];
@@ -1325,7 +1352,7 @@ SlruInternalDeleteSegment(SlruCtl ctl, int segno)
* Delete an individual SLRU segment, identified by the segment number.
*/
void
SlruDeleteSegment(SlruCtl ctl, int segno)
SlruDeleteSegment(SlruCtl ctl, int64 segno)
{
SlruShared shared = ctl->shared;
int slotno;
@@ -1389,9 +1416,9 @@ restart:
* first>=cutoff && last>=cutoff: no; every page of this segment is too young
*/
static bool
SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage)
SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
{
int seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
@@ -1405,7 +1432,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
{
TransactionId lhs,
rhs;
int newestPage,
int64 newestPage,
oldestPage;
TransactionId newestXact,
oldestXact;
@@ -1498,9 +1525,10 @@ SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
* one containing the page passed as "data".
*/
bool
SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage,
void *data)
{
int cutoffPage = *(int *) data;
int64 cutoffPage = *(int64 *) data;
if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
return true; /* found one; don't iterate any more */
@@ -1513,9 +1541,10 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data
* This callback deletes segments prior to the one passed in as "data".
*/
static bool
SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage,
void *data)
{
int cutoffPage = *(int *) data;
int64 cutoffPage = *(int64 *) data;
if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
@@ -1528,13 +1557,37 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
* This callback deletes all segments.
*/
bool
SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data)
{
SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
return false; /* keep going */
}
/*
* An internal function used by SlruScanDirectory().
*
* Returns true if a file with a name of a given length may be a correct
* SLRU segment.
*/
static inline bool
SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
{
if (ctl->long_segment_names)
return (len == 15); /* see SlruFileName() */
else
/*
* Commit 638cf09e76d allowed 5-character lengths. Later commit
* 73c986adde5 allowed 6-character length.
*
* Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
* numbers, and the corresponding 15-character file names, which may
* eventually deprecate the support for 4, 5, and 6-character names.
*/
return (len == 4 || len == 5 || len == 6);
}
/*
* Scan the SimpleLru directory and apply a callback to each file found in it.
*
@@ -1556,8 +1609,8 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
bool retval = false;
DIR *cldir;
struct dirent *clde;
int segno;
int segpage;
int64 segno;
int64 segpage;
cldir = AllocateDir(ctl->Dir);
while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
@@ -1566,10 +1619,10 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
len = strlen(clde->d_name);
if ((len == 4 || len == 5 || len == 6) &&
if (SlruCorrectSegmentFilenameLength(ctl, len) &&
strspn(clde->d_name, "0123456789ABCDEF") == len)
{
segno = (int) strtol(clde->d_name, NULL, 16);
segno = strtoi64(clde->d_name, NULL, 16);
segpage = segno * SLRU_PAGES_PER_SEGMENT;
elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",