1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Fix subtransaction behavior for large objects, temp namespace, files,

password/group files.  Also allow read-only subtransactions of a read-write
parent, but not vice versa.  These are the reasonably noncontroversial
parts of Alvaro's recent mop-up patch, plus further work on large objects
to minimize use of the TopTransactionResourceOwner.
This commit is contained in:
Tom Lane
2004-07-28 14:23:31 +00:00
parent cc813fc2b8
commit 1bf3d61504
17 changed files with 572 additions and 205 deletions

View File

@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.109 2004/05/31 03:48:04 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.110 2004/07/28 14:23:28 tgl Exp $
*
* NOTES:
*
@@ -47,6 +47,7 @@
#include <fcntl.h>
#include "miscadmin.h"
#include "access/xact.h"
#include "storage/fd.h"
#include "storage/ipc.h"
@@ -122,6 +123,7 @@ typedef struct vfd
{
signed short fd; /* current FD, or VFD_CLOSED if none */
unsigned short fdstate; /* bitflags for VFD's state */
TransactionId create_xid; /* for XACT_TEMPORARY fds, creating Xid */
File nextFree; /* link to next free VFD, if in freelist */
File lruMoreRecently; /* doubly linked recency-of-use list */
File lruLessRecently;
@@ -146,27 +148,31 @@ static Size SizeVfdCache = 0;
static int nfile = 0;
/*
* List of stdio FILEs opened with AllocateFile.
* List of stdio FILEs and <dirent.h> DIRs opened with AllocateFile
* and AllocateDir.
*
* Since we don't want to encourage heavy use of AllocateFile, it seems
* OK to put a pretty small maximum limit on the number of simultaneously
* allocated files.
* Since we don't want to encourage heavy use of AllocateFile or AllocateDir,
* it seems OK to put a pretty small maximum limit on the number of
* simultaneously allocated descs.
*/
#define MAX_ALLOCATED_FILES 32
#define MAX_ALLOCATED_DESCS 32
static int numAllocatedFiles = 0;
static FILE *allocatedFiles[MAX_ALLOCATED_FILES];
typedef enum {
AllocateDescFile,
AllocateDescDir
} AllocateDescKind;
/*
* List of <dirent.h> DIRs opened with AllocateDir.
*
* Since we don't have heavy use of AllocateDir, it seems OK to put a pretty
* small maximum limit on the number of simultaneously allocated dirs.
*/
#define MAX_ALLOCATED_DIRS 10
typedef struct {
AllocateDescKind kind;
union {
FILE *file;
DIR *dir;
} desc;
TransactionId create_xid;
} AllocateDesc;
static int numAllocatedDirs = 0;
static DIR *allocatedDirs[MAX_ALLOCATED_DIRS];
static int numAllocatedDescs = 0;
static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS];
/*
* Number of temporary files opened during the current session;
@@ -499,7 +505,7 @@ LruInsert(File file)
if (FileIsNotOpen(file))
{
while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds)
while (nfile + numAllocatedDescs >= max_safe_fds)
{
if (!ReleaseLruFile())
break;
@@ -759,7 +765,7 @@ fileNameOpenFile(FileName fileName,
file = AllocateVfd();
vfdP = &VfdCache[file];
while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds)
while (nfile + numAllocatedDescs >= max_safe_fds)
{
if (!ReleaseLruFile())
break;
@@ -876,7 +882,10 @@ OpenTemporaryFile(bool interXact)
/* Mark it for deletion at EOXact */
if (!interXact)
{
VfdCache[file].fdstate |= FD_XACT_TEMPORARY;
VfdCache[file].create_xid = GetCurrentTransactionId();
}
return file;
}
@@ -1134,24 +1143,29 @@ AllocateFile(char *name, char *mode)
{
FILE *file;
DO_DB(elog(LOG, "AllocateFile: Allocated %d", numAllocatedFiles));
DO_DB(elog(LOG, "AllocateFile: Allocated %d (%s)",
numAllocatedDescs, name));
/*
* The test against MAX_ALLOCATED_FILES prevents us from overflowing
* The test against MAX_ALLOCATED_DESCS prevents us from overflowing
* allocatedFiles[]; the test against max_safe_fds prevents AllocateFile
* from hogging every one of the available FDs, which'd lead to infinite
* looping.
*/
if (numAllocatedFiles >= MAX_ALLOCATED_FILES ||
numAllocatedFiles + numAllocatedDirs >= max_safe_fds - 1)
if (numAllocatedDescs >= MAX_ALLOCATED_DESCS ||
numAllocatedDescs >= max_safe_fds - 1)
elog(ERROR, "too many private files demanded");
TryAgain:
if ((file = fopen(name, mode)) != NULL)
{
allocatedFiles[numAllocatedFiles] = file;
numAllocatedFiles++;
return file;
AllocateDesc *desc = &allocatedDescs[numAllocatedDescs];
desc->kind = AllocateDescFile;
desc->desc.file = file;
desc->create_xid = GetCurrentTransactionId();
numAllocatedDescs++;
return desc->desc.file;
}
if (errno == EMFILE || errno == ENFILE)
@@ -1170,6 +1184,38 @@ TryAgain:
return NULL;
}
/*
* Free an AllocateDesc of either type.
*
* The argument *must* point into the allocatedDescs[] array.
*/
static int
FreeDesc(AllocateDesc *desc)
{
int result;
/* Close the underlying object */
switch (desc->kind)
{
case AllocateDescFile:
result = fclose(desc->desc.file);
break;
case AllocateDescDir:
result = closedir(desc->desc.dir);
break;
default:
elog(ERROR, "AllocateDesc kind not recognized");
result = 0; /* keep compiler quiet */
break;
}
/* Compact storage in the allocatedDescs array */
numAllocatedDescs--;
*desc = allocatedDescs[numAllocatedDescs];
return result;
}
/*
* Close a file returned by AllocateFile.
*
@@ -1181,20 +1227,19 @@ FreeFile(FILE *file)
{
int i;
DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedFiles));
DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedDescs));
/* Remove file from list of allocated files, if it's present */
for (i = numAllocatedFiles; --i >= 0;)
for (i = numAllocatedDescs; --i >= 0;)
{
if (allocatedFiles[i] == file)
{
numAllocatedFiles--;
allocatedFiles[i] = allocatedFiles[numAllocatedFiles];
break;
}
AllocateDesc *desc = &allocatedDescs[i];
if (desc->kind == AllocateDescFile && desc->desc.file == file)
return FreeDesc(desc);
}
if (i < 0)
elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile");
/* Only get here if someone passes us a file not in allocatedDescs */
elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile");
return fclose(file);
}
@@ -1213,24 +1258,29 @@ AllocateDir(const char *dirname)
{
DIR *dir;
DO_DB(elog(LOG, "AllocateDir: Allocated %d", numAllocatedDirs));
DO_DB(elog(LOG, "AllocateDir: Allocated %d (%s)",
numAllocatedDescs, dirname));
/*
* The test against MAX_ALLOCATED_DIRS prevents us from overflowing
* allocatedDirs[]; the test against max_safe_fds prevents AllocateDir
* The test against MAX_ALLOCATED_DESCS prevents us from overflowing
* allocatedDescs[]; the test against max_safe_fds prevents AllocateDir
* from hogging every one of the available FDs, which'd lead to infinite
* looping.
*/
if (numAllocatedDirs >= MAX_ALLOCATED_DIRS ||
numAllocatedDirs + numAllocatedFiles >= max_safe_fds - 1)
if (numAllocatedDescs >= MAX_ALLOCATED_DESCS ||
numAllocatedDescs >= max_safe_fds - 1)
elog(ERROR, "too many private dirs demanded");
TryAgain:
if ((dir = opendir(dirname)) != NULL)
{
allocatedDirs[numAllocatedDirs] = dir;
numAllocatedDirs++;
return dir;
AllocateDesc *desc = &allocatedDescs[numAllocatedDescs];
desc->kind = AllocateDescDir;
desc->desc.dir = dir;
desc->create_xid = GetCurrentTransactionId();
numAllocatedDescs++;
return desc->desc.dir;
}
if (errno == EMFILE || errno == ENFILE)
@@ -1260,20 +1310,19 @@ FreeDir(DIR *dir)
{
int i;
DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDirs));
DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDescs));
/* Remove dir from list of allocated dirs, if it's present */
for (i = numAllocatedDirs; --i >= 0;)
for (i = numAllocatedDescs; --i >= 0;)
{
if (allocatedDirs[i] == dir)
{
numAllocatedDirs--;
allocatedDirs[i] = allocatedDirs[numAllocatedDirs];
break;
}
AllocateDesc *desc = &allocatedDescs[i];
if (desc->kind == AllocateDescDir && desc->desc.dir == dir)
return FreeDesc(desc);
}
if (i < 0)
elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir");
/* Only get here if someone passes us a dir not in allocatedDescs */
elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir");
return closedir(dir);
}
@@ -1302,6 +1351,51 @@ closeAllVfds(void)
}
}
/*
* AtEOSubXact_Files
*
* Take care of subtransaction commit/abort. At abort, we close temp files
* that the subtransaction may have opened. At commit, we reassign the
* files that were opened to the parent transaction.
*/
void
AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid)
{
Index i;
if (SizeVfdCache > 0)
{
Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */
for (i = 1; i < SizeVfdCache; i++)
{
unsigned short fdstate = VfdCache[i].fdstate;
if ((fdstate & FD_XACT_TEMPORARY) &&
VfdCache[i].create_xid == myXid)
{
if (isCommit)
VfdCache[i].create_xid = parentXid;
else if (VfdCache[i].fileName != NULL)
FileClose(i);
}
}
}
for (i = 0; i < numAllocatedDescs; i++)
{
if (allocatedDescs[i].create_xid == myXid)
{
if (isCommit)
allocatedDescs[i].create_xid = parentXid;
else
{
/* have to recheck the item after FreeDesc (ugly) */
FreeDesc(&allocatedDescs[i--]);
}
}
}
}
/*
* AtEOXact_Files
*
@@ -1362,11 +1456,8 @@ CleanupTempFiles(bool isProcExit)
}
}
while (numAllocatedFiles > 0)
FreeFile(allocatedFiles[0]);
while (numAllocatedDirs > 0)
FreeDir(allocatedDirs[0]);
while (numAllocatedDescs > 0)
FreeDesc(&allocatedDescs[0]);
}

View File

@@ -9,36 +9,92 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.102 2003/11/29 19:51:56 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.103 2004/07/28 14:23:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <sys/file.h>
#include <sys/stat.h>
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup.h"
#include "access/tuptoaster.h"
#include "catalog/catalog.h"
#include "catalog/catname.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_largeobject.h"
#include "catalog/pg_type.h"
#include "commands/comment.h"
#include "libpq/libpq-fs.h"
#include "miscadmin.h"
#include "storage/large_object.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/resowner.h"
/*
* All accesses to pg_largeobject and its index make use of a single Relation
* reference, so that we only need to open pg_relation once per transaction.
* To avoid problems when the first such reference occurs inside a
* subtransaction, we execute a slightly klugy maneuver to assign ownership of
* the Relation reference to TopTransactionResourceOwner.
*/
static Relation lo_heap_r = NULL;
static Relation lo_index_r = NULL;
/*
* Open pg_largeobject and its index, if not already done in current xact
*/
static void
open_lo_relation(void)
{
ResourceOwner currentOwner;
if (lo_heap_r && lo_index_r)
return; /* already open in current xact */
/* Arrange for the top xact to own these relation references */
currentOwner = CurrentResourceOwner;
CurrentResourceOwner = TopTransactionResourceOwner;
/* Use RowExclusiveLock since we might either read or write */
if (lo_heap_r == NULL)
lo_heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock);
if (lo_index_r == NULL)
lo_index_r = index_openr(LargeObjectLOidPNIndex);
CurrentResourceOwner = currentOwner;
}
/*
* Clean up at main transaction end
*/
void
close_lo_relation(bool isCommit)
{
if (lo_heap_r || lo_index_r)
{
/*
* Only bother to close if committing; else abort cleanup will
* handle it
*/
if (isCommit)
{
ResourceOwner currentOwner;
currentOwner = CurrentResourceOwner;
CurrentResourceOwner = TopTransactionResourceOwner;
if (lo_index_r)
index_close(lo_index_r);
if (lo_heap_r)
heap_close(lo_heap_r, NoLock);
CurrentResourceOwner = currentOwner;
}
lo_heap_r = NULL;
lo_index_r = NULL;
}
}
static int32
@@ -50,6 +106,7 @@ getbytealen(bytea *data)
return (VARSIZE(data) - VARHDRSZ);
}
/*
* inv_create -- create a new large object.
*
@@ -92,23 +149,20 @@ inv_create(int flags)
retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc));
retval->id = file_oid;
retval->xid = GetCurrentTransactionId();
retval->offset = 0;
if (flags & INV_WRITE)
{
retval->flags = IFS_WRLOCK | IFS_RDLOCK;
retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock);
}
else if (flags & INV_READ)
{
retval->flags = IFS_RDLOCK;
retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock);
}
else
elog(ERROR, "invalid flags: %d", flags);
retval->index_r = index_openr(LargeObjectLOidPNIndex);
return retval;
}
@@ -131,23 +185,20 @@ inv_open(Oid lobjId, int flags)
retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc));
retval->id = lobjId;
retval->xid = GetCurrentTransactionId();
retval->offset = 0;
if (flags & INV_WRITE)
{
retval->flags = IFS_WRLOCK | IFS_RDLOCK;
retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock);
}
else if (flags & INV_READ)
{
retval->flags = IFS_RDLOCK;
retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock);
}
else
elog(ERROR, "invalid flags: %d", flags);
retval->index_r = index_openr(LargeObjectLOidPNIndex);
return retval;
}
@@ -158,13 +209,6 @@ void
inv_close(LargeObjectDesc *obj_desc)
{
Assert(PointerIsValid(obj_desc));
if (obj_desc->flags & IFS_WRLOCK)
heap_close(obj_desc->heap_r, RowExclusiveLock);
else if (obj_desc->flags & IFS_RDLOCK)
heap_close(obj_desc->heap_r, AccessShareLock);
index_close(obj_desc->index_r);
pfree(obj_desc);
}
@@ -212,12 +256,14 @@ inv_getsize(LargeObjectDesc *obj_desc)
Assert(PointerIsValid(obj_desc));
open_lo_relation();
ScanKeyInit(&skey[0],
Anum_pg_largeobject_loid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(obj_desc->id));
sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r,
sd = index_beginscan(lo_heap_r, lo_index_r,
SnapshotNow, 1, skey);
/*
@@ -316,6 +362,8 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
if (nbytes <= 0)
return 0;
open_lo_relation();
ScanKeyInit(&skey[0],
Anum_pg_largeobject_loid,
BTEqualStrategyNumber, F_OIDEQ,
@@ -326,7 +374,7 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
BTGreaterEqualStrategyNumber, F_INT4GE,
Int32GetDatum(pageno));
sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r,
sd = index_beginscan(lo_heap_r, lo_index_r,
SnapshotNow, 2, skey);
while ((tuple = index_getnext(sd, ForwardScanDirection)) != NULL)
@@ -421,7 +469,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
if (nbytes <= 0)
return 0;
indstate = CatalogOpenIndexes(obj_desc->heap_r);
open_lo_relation();
indstate = CatalogOpenIndexes(lo_heap_r);
ScanKeyInit(&skey[0],
Anum_pg_largeobject_loid,
@@ -433,7 +483,7 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
BTGreaterEqualStrategyNumber, F_INT4GE,
Int32GetDatum(pageno));
sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r,
sd = index_beginscan(lo_heap_r, lo_index_r,
SnapshotNow, 2, skey);
oldtuple = NULL;
@@ -510,9 +560,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
memset(replace, ' ', sizeof(replace));
values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
replace[Anum_pg_largeobject_data - 1] = 'r';
newtup = heap_modifytuple(oldtuple, obj_desc->heap_r,
newtup = heap_modifytuple(oldtuple, lo_heap_r,
values, nulls, replace);
simple_heap_update(obj_desc->heap_r, &newtup->t_self, newtup);
simple_heap_update(lo_heap_r, &newtup->t_self, newtup);
CatalogIndexInsert(indstate, newtup);
heap_freetuple(newtup);
@@ -554,8 +604,8 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
newtup = heap_formtuple(obj_desc->heap_r->rd_att, values, nulls);
simple_heap_insert(obj_desc->heap_r, newtup);
newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls);
simple_heap_insert(lo_heap_r, newtup);
CatalogIndexInsert(indstate, newtup);
heap_freetuple(newtup);
}

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.65 2004/07/27 05:10:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.66 2004/07/28 14:23:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -137,7 +137,7 @@ LockRelation(Relation relation, LOCKMODE lockmode)
tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
tag.objId.blkno = InvalidBlockNumber;
if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
lockmode, false))
elog(ERROR, "LockAcquire failed");
@@ -171,7 +171,7 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
tag.objId.blkno = InvalidBlockNumber;
if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
lockmode, true))
return false;
@@ -201,7 +201,7 @@ UnlockRelation(Relation relation, LOCKMODE lockmode)
tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
tag.objId.blkno = InvalidBlockNumber;
LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode);
LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode);
}
/*
@@ -264,7 +264,7 @@ LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
tag.objId.blkno = blkno;
if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
lockmode, false))
elog(ERROR, "LockAcquire failed");
}
@@ -285,7 +285,7 @@ ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
tag.objId.blkno = blkno;
return LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
return LockAcquire(LockTableId, &tag, GetTopTransactionId(),
lockmode, true);
}
@@ -302,7 +302,7 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
tag.objId.blkno = blkno;
LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode);
LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode);
}
/*
@@ -343,7 +343,7 @@ void
XactLockTableWait(TransactionId xid)
{
LOCKTAG tag;
TransactionId myxid = GetCurrentTransactionId();
TransactionId myxid = GetTopTransactionId();
Assert(!SubTransXidsHaveCommonAncestor(xid, myxid));