mirror of
https://github.com/postgres/postgres.git
synced 2025-10-21 02:52:47 +03:00
Increase width of RelFileNumbers from 32 bits to 56 bits.
RelFileNumbers are now assigned using a separate counter, instead of being assigned from the OID counter. This counter never wraps around: if all 2^56 possible RelFileNumbers are used, an internal error occurs. As the cluster is limited to 2^64 total bytes of WAL, this limitation should not cause a problem in practice. If the counter were 64 bits wide rather than 56 bits wide, we would need to increase the width of the BufferTag, which might adversely impact buffer lookup performance. Also, this lets us use bigint for pg_class.relfilenode and other places where these values are exposed at the SQL level without worrying about overflow. This should remove the need to keep "tombstone" files around until the next checkpoint when relations are removed. We do that to keep RelFileNumbers from being recycled, but now that won't happen anyway. However, this patch doesn't actually change anything in this area; it just makes it possible for a future patch to do so. Dilip Kumar, based on an idea from Andres Freund, who also reviewed some earlier versions of the patch. Further review and some wordsmithing by me. Also reviewed at various points by Ashutosh Sharma, Vignesh C, Amul Sul, Álvaro Herrera, and Tom Lane. Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
#define TRANSAM_H
|
||||
|
||||
#include "access/xlogdefs.h"
|
||||
#include "common/relpath.h"
|
||||
|
||||
|
||||
/* ----------------
|
||||
@@ -196,6 +197,33 @@ FullTransactionIdAdvance(FullTransactionId *dest)
|
||||
#define FirstUnpinnedObjectId 12000
|
||||
#define FirstNormalObjectId 16384
|
||||
|
||||
/* ----------
|
||||
* RelFileNumbers are normally assigned sequentially beginning with
|
||||
* FirstNormalRelFileNumber, but for system tables the initial RelFileNumber
|
||||
* is equal to the table OID. This scheme allows pg_upgrade to work: we expect
|
||||
* that the new cluster will contain only system tables, and that none of those
|
||||
* will have previously been rewritten, so any RelFileNumber which is in use
|
||||
* in both the old and new clusters will be used for the same relation in both
|
||||
* places.
|
||||
*
|
||||
* This is important because pg_upgrade can't reactively move conflicting
|
||||
* relations out of the way. If it tries to set the RelFileNumber for a
|
||||
* relation to some value that's already in use by a different relation, the
|
||||
* upgrade will just fail. It's OK if the same RelFileNumber is used for the
|
||||
* same relation, though, since then nothing needs to be changed.
|
||||
* ----------
|
||||
*/
|
||||
#define FirstNormalRelFileNumber ((RelFileNumber) 100000)
|
||||
|
||||
#define CHECK_RELFILENUMBER_RANGE(relfilenumber) \
|
||||
do { \
|
||||
if ((relfilenumber) < 0 || (relfilenumber) > MAX_RELFILENUMBER) \
|
||||
ereport(ERROR, \
|
||||
errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
|
||||
errmsg("relfilenumber %llu is out of range", \
|
||||
(unsigned long long) (relfilenumber))); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* VariableCache is a data structure in shared memory that is used to track
|
||||
* OID and XID assignment state. For largely historical reasons, there is
|
||||
@@ -214,6 +242,15 @@ typedef struct VariableCacheData
|
||||
Oid nextOid; /* next OID to assign */
|
||||
uint32 oidCount; /* OIDs available before must do XLOG work */
|
||||
|
||||
/*
|
||||
* These fields are protected by RelFileNumberGenLock.
|
||||
*/
|
||||
RelFileNumber nextRelFileNumber; /* next relfilenumber to assign */
|
||||
RelFileNumber loggedRelFileNumber; /* last logged relfilenumber */
|
||||
RelFileNumber flushedRelFileNumber; /* last flushed relfilenumber */
|
||||
XLogRecPtr loggedRelFileNumberRecPtr; /* xlog record pointer w.r.t.
|
||||
* loggedRelFileNumber */
|
||||
|
||||
/*
|
||||
* These fields are protected by XidGenLock.
|
||||
*/
|
||||
@@ -293,6 +330,9 @@ extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
|
||||
extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
|
||||
extern bool ForceTransactionIdLimitUpdate(void);
|
||||
extern Oid GetNewObjectId(void);
|
||||
extern RelFileNumber GetNewRelFileNumber(Oid reltablespace,
|
||||
char relpersistence);
|
||||
extern void SetNextRelFileNumber(RelFileNumber relnumber);
|
||||
extern void StopGeneratingPinnedObjectIds(void);
|
||||
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
|
@@ -236,6 +236,7 @@ extern void CreateCheckPoint(int flags);
|
||||
extern bool CreateRestartPoint(int flags);
|
||||
extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
|
||||
extern void XLogPutNextOid(Oid nextOid);
|
||||
extern XLogRecPtr LogNextRelFileNumber(RelFileNumber nextrelnumber);
|
||||
extern XLogRecPtr XLogRestorePoint(const char *rpName);
|
||||
extern void UpdateFullPageWrites(void);
|
||||
extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
|
||||
|
Reference in New Issue
Block a user