1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-29 23:43:17 +03:00

Include the backend ID in the relpath of temporary relations.

This allows us to reliably remove all leftover temporary relation
files on cluster startup without reference to system catalogs or WAL;
therefore, we no longer include temporary relations in XLOG_XACT_COMMIT
and XLOG_XACT_ABORT WAL records.

Since these changes require including a backend ID in each
SharedInvalSmgrMsg, the size of the SharedInvalidationMessage.id
field has been reduced from two bytes to one, and the maximum number
of connections has been reduced from INT_MAX / 4 to 2^23-1.  It would
be possible to remove these restrictions by increasing the size of
SharedInvalidationMessage by 4 bytes, but right now that doesn't seem
like a good trade-off.

Review by Jaime Casanova and Tom Lane.
This commit is contained in:
Robert Haas
2010-08-13 20:10:54 +00:00
parent 3f9479ef3f
commit debcec7dc3
37 changed files with 669 additions and 343 deletions

View File

@@ -5,7 +5,7 @@
* Copyright (c) 2002-2010, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.32 2010/08/05 14:45:04 rhaas Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.33 2010/08/13 20:10:52 rhaas Exp $
*
*/
@@ -244,14 +244,14 @@ pg_tablespace_size_name(PG_FUNCTION_ARGS)
* calculate size of (one fork of) a relation
*/
static int64
calculate_relation_size(RelFileNode *rfn, ForkNumber forknum)
calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum)
{
int64 totalsize = 0;
char *relationpath;
char pathname[MAXPGPATH];
unsigned int segcount = 0;
relationpath = relpath(*rfn, forknum);
relationpath = relpathbackend(*rfn, backend, forknum);
for (segcount = 0;; segcount++)
{
@@ -291,7 +291,7 @@ pg_relation_size(PG_FUNCTION_ARGS)
rel = relation_open(relOid, AccessShareLock);
size = calculate_relation_size(&(rel->rd_node),
size = calculate_relation_size(&(rel->rd_node), rel->rd_backend,
forkname_to_number(text_to_cstring(forkName)));
relation_close(rel, AccessShareLock);
@@ -315,12 +315,14 @@ calculate_toast_table_size(Oid toastrelid)
/* toast heap size, including FSM and VM size */
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(toastRel->rd_node), forkNum);
size += calculate_relation_size(&(toastRel->rd_node),
toastRel->rd_backend, forkNum);
/* toast index size, including FSM and VM size */
toastIdxRel = relation_open(toastRel->rd_rel->reltoastidxid, AccessShareLock);
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(toastIdxRel->rd_node), forkNum);
size += calculate_relation_size(&(toastIdxRel->rd_node),
toastIdxRel->rd_backend, forkNum);
relation_close(toastIdxRel, AccessShareLock);
relation_close(toastRel, AccessShareLock);
@@ -349,7 +351,8 @@ calculate_table_size(Oid relOid)
* heap size, including FSM and VM
*/
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(rel->rd_node), forkNum);
size += calculate_relation_size(&(rel->rd_node), rel->rd_backend,
forkNum);
/*
* Size of toast relation
@@ -392,7 +395,9 @@ calculate_indexes_size(Oid relOid)
idxRel = relation_open(idxOid, AccessShareLock);
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(idxRel->rd_node), forkNum);
size += calculate_relation_size(&(idxRel->rd_node),
idxRel->rd_backend,
forkNum);
relation_close(idxRel, AccessShareLock);
}
@@ -563,6 +568,7 @@ pg_relation_filepath(PG_FUNCTION_ARGS)
HeapTuple tuple;
Form_pg_class relform;
RelFileNode rnode;
BackendId backend;
char *path;
tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
@@ -600,12 +606,27 @@ pg_relation_filepath(PG_FUNCTION_ARGS)
break;
}
if (!OidIsValid(rnode.relNode))
{
ReleaseSysCache(tuple);
PG_RETURN_NULL();
}
/* If temporary, determine owning backend. */
if (!relform->relistemp)
backend = InvalidBackendId;
else if (isTempOrToastNamespace(relform->relnamespace))
backend = MyBackendId;
else
{
/* Do it the hard way. */
backend = GetTempNamespaceBackendId(relform->relnamespace);
Assert(backend != InvalidBackendId);
}
ReleaseSysCache(tuple);
if (!OidIsValid(rnode.relNode))
PG_RETURN_NULL();
path = relpath(rnode, MAIN_FORKNUM);
path = relpathbackend(rnode, backend, MAIN_FORKNUM);
PG_RETURN_TEXT_P(cstring_to_text(path));
}

View File

@@ -80,7 +80,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.98 2010/02/26 02:01:11 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.99 2010/08/13 20:10:52 rhaas Exp $
*
*-------------------------------------------------------------------------
*/
@@ -319,7 +319,8 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
{
SharedInvalidationMessage msg;
msg.cc.id = (int16) id;
Assert(id < CHAR_MAX);
msg.cc.id = (int8) id;
msg.cc.tuplePtr = *tuplePtr;
msg.cc.dbId = dbId;
msg.cc.hashValue = hashValue;
@@ -513,7 +514,10 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
* We could have smgr entries for relations of other databases, so no
* short-circuit test is possible here.
*/
smgrclosenode(msg->sm.rnode);
RelFileNodeBackend rnode;
rnode.node = msg->sm.rnode;
rnode.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo;
smgrclosenode(rnode);
}
else if (msg->id == SHAREDINVALRELMAP_ID)
{
@@ -1163,14 +1167,20 @@ CacheInvalidateRelcacheByRelid(Oid relid)
* in commit/abort WAL entries. Instead, calls to CacheInvalidateSmgr()
* should happen in low-level smgr.c routines, which are executed while
* replaying WAL as well as when creating it.
*
* Note: In order to avoid bloating SharedInvalidationMessage, we store only
* three bytes of the backend ID using what would otherwise be padding space.
* Thus, the maximum possible backend ID is 2^23-1.
*/
void
CacheInvalidateSmgr(RelFileNode rnode)
CacheInvalidateSmgr(RelFileNodeBackend rnode)
{
SharedInvalidationMessage msg;
msg.sm.id = SHAREDINVALSMGR_ID;
msg.sm.rnode = rnode;
msg.sm.backend_hi = rnode.backend >> 16;
msg.sm.backend_lo = rnode.backend & 0xffff;
msg.sm.rnode = rnode.node;
SendSharedInvalidMessages(&msg, 1);
}

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.311 2010/07/06 19:18:58 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.312 2010/08/13 20:10:52 rhaas Exp $
*
*-------------------------------------------------------------------------
*/
@@ -858,10 +858,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
relation->rd_createSubid = InvalidSubTransactionId;
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
relation->rd_istemp = relation->rd_rel->relistemp;
if (relation->rd_istemp)
relation->rd_islocaltemp = isTempOrToastNamespace(relation->rd_rel->relnamespace);
if (!relation->rd_istemp)
relation->rd_backend = InvalidBackendId;
else if (isTempOrToastNamespace(relation->rd_rel->relnamespace))
relation->rd_backend = MyBackendId;
else
relation->rd_islocaltemp = false;
{
/*
* If it's a temporary table, but not one of ours, we have to use
* the slow, grotty method to figure out the owning backend.
*/
relation->rd_backend =
GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
Assert(relation->rd_backend != InvalidBackendId);
}
/*
* initialize the tuple descriptor (relation->rd_att).
@@ -1424,7 +1434,7 @@ formrdesc(const char *relationName, Oid relationReltype,
relation->rd_createSubid = InvalidSubTransactionId;
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
relation->rd_istemp = false;
relation->rd_islocaltemp = false;
relation->rd_backend = InvalidBackendId;
/*
* initialize relation tuple form
@@ -2515,7 +2525,7 @@ RelationBuildLocalRelation(const char *relname,
/* it is temporary if and only if it is in my temp-table namespace */
rel->rd_istemp = isTempOrToastNamespace(relnamespace);
rel->rd_islocaltemp = rel->rd_istemp;
rel->rd_backend = rel->rd_istemp ? MyBackendId : InvalidBackendId;
/*
* create a new tuple descriptor from the one passed in. We do this
@@ -2629,7 +2639,7 @@ void
RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
{
Oid newrelfilenode;
RelFileNode newrnode;
RelFileNodeBackend newrnode;
Relation pg_class;
HeapTuple tuple;
Form_pg_class classform;
@@ -2640,7 +2650,8 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
TransactionIdIsNormal(freezeXid));
/* Allocate a new relfilenode */
newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL);
newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
relation->rd_backend);
/*
* Get a writable copy of the pg_class tuple for the given relation.
@@ -2660,9 +2671,10 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
* NOTE: any conflict in relfilenode value will be caught here, if
* GetNewRelFileNode messes up for any reason.
*/
newrnode = relation->rd_node;
newrnode.relNode = newrelfilenode;
RelationCreateStorage(newrnode, relation->rd_istemp);
newrnode.node = relation->rd_node;
newrnode.node.relNode = newrelfilenode;
newrnode.backend = relation->rd_backend;
RelationCreateStorage(newrnode.node, relation->rd_istemp);
smgrclosenode(newrnode);
/*

View File

@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.566 2010/08/06 14:51:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.567 2010/08/13 20:10:53 rhaas Exp $
*
*--------------------------------------------------------------------
*/
@@ -96,6 +96,16 @@
#define MAX_KILOBYTES (INT_MAX / 1024)
#endif
/*
* Note: MAX_BACKENDS is limited to 2^23-1 because inval.c stores the
* backend ID as a 3-byte signed integer. Even if that limitation were
* removed, we still could not exceed INT_MAX/4 because some places compute
* 4*MaxBackends without any overflow check. This is rechecked in
* assign_maxconnections, since MaxBackends is computed as MaxConnections
* plus autovacuum_max_workers plus one (for the autovacuum launcher).
*/
#define MAX_BACKENDS 0x7fffff
#define KB_PER_MB (1024)
#define KB_PER_GB (1024*1024)
@@ -1414,23 +1424,13 @@ static struct config_int ConfigureNamesInt[] =
30 * 1000, -1, INT_MAX / 1000, NULL, NULL
},
/*
* Note: MaxBackends is limited to INT_MAX/4 because some places compute
* 4*MaxBackends without any overflow check. This check is made in
* assign_maxconnections, since MaxBackends is computed as MaxConnections
* plus autovacuum_max_workers plus one (for the autovacuum launcher).
*
* Likewise we have to limit NBuffers to INT_MAX/2.
*
* See also CheckRequiredParameterValues() if this parameter changes
*/
{
{"max_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS,
gettext_noop("Sets the maximum number of concurrent connections."),
NULL
},
&MaxConnections,
100, 1, INT_MAX / 4, assign_maxconnections, NULL
100, 1, MAX_BACKENDS, assign_maxconnections, NULL
},
{
@@ -1439,9 +1439,13 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&ReservedBackends,
3, 0, INT_MAX / 4, NULL, NULL
3, 0, MAX_BACKENDS, NULL, NULL
},
/*
* We sometimes multiply the number of shared buffers by two without
* checking for overflow, so we mustn't allow more than INT_MAX / 2.
*/
{
{"shared_buffers", PGC_POSTMASTER, RESOURCES_MEM,
gettext_noop("Sets the number of shared memory buffers used by the server."),
@@ -1618,7 +1622,7 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&max_prepared_xacts,
0, 0, INT_MAX / 4, NULL, NULL
0, 0, MAX_BACKENDS, NULL, NULL
},
#ifdef LOCK_DEBUG
@@ -1782,7 +1786,7 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&max_wal_senders,
0, 0, INT_MAX / 4, NULL, NULL
0, 0, MAX_BACKENDS, NULL, NULL
},
{
@@ -2022,7 +2026,7 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&autovacuum_max_workers,
3, 1, INT_MAX / 4, assign_autovacuum_max_workers, NULL
3, 1, MAX_BACKENDS, assign_autovacuum_max_workers, NULL
},
{
@@ -7995,7 +7999,7 @@ show_tcp_keepalives_count(void)
static bool
assign_maxconnections(int newval, bool doit, GucSource source)
{
if (newval + autovacuum_max_workers + 1 > INT_MAX / 4)
if (newval + autovacuum_max_workers + 1 > MAX_BACKENDS)
return false;
if (doit)
@@ -8007,7 +8011,7 @@ assign_maxconnections(int newval, bool doit, GucSource source)
static bool
assign_autovacuum_max_workers(int newval, bool doit, GucSource source)
{
if (MaxConnections + newval + 1 > INT_MAX / 4)
if (MaxConnections + newval + 1 > MAX_BACKENDS)
return false;
if (doit)

View File

@@ -3,7 +3,7 @@
*
* Copyright (c) 2006-2010, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/backend/utils/probes.d,v 1.12 2010/01/02 16:57:53 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/probes.d,v 1.13 2010/08/13 20:10:52 rhaas Exp $
* ----------
*/
@@ -55,7 +55,7 @@ provider postgresql {
probe sort__done(bool, long);
probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, bool, bool);
probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, bool, bool, bool);
probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool);
probe buffer__flush__start(ForkNumber, BlockNumber, Oid, Oid, Oid);
probe buffer__flush__done(ForkNumber, BlockNumber, Oid, Oid, Oid);
@@ -81,10 +81,10 @@ provider postgresql {
probe twophase__checkpoint__start();
probe twophase__checkpoint__done();
probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid);
probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int);
probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid);
probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int);
probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int);
probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int);
probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int);
probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int);
probe xlog__insert(unsigned char, unsigned char);
probe xlog__switch();