1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-09 17:03:00 +03:00

I'm gonna stick my neck out a little and back-patch these

changes into REL6_5 ... they could use some more testing before we release
6.5.2, though.
This commit is contained in:
Tom Lane
1999-09-02 04:07:18 +00:00
parent 632b56946b
commit aa291955a8
3 changed files with 158 additions and 127 deletions

View File

@@ -7,25 +7,19 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.46 1999/06/18 16:47:23 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.46.2.1 1999/09/02 04:07:16 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include <unistd.h> #include <unistd.h>
#include <stdio.h> /* for sprintf() */ #include <fcntl.h>
#include <string.h>
#include <fcntl.h> /* for open() flags */
#include <sys/file.h> #include <sys/file.h>
#include "postgres.h" #include "postgres.h"
#include "miscadmin.h" /* for DataDir */
#include "catalog/catalog.h" #include "catalog/catalog.h"
#include "storage/block.h" #include "miscadmin.h"
#include "storage/fd.h" #include "storage/smgr.h"
#include "storage/smgr.h" /* where the declarations go */
#include "utils/mcxt.h"
#include "utils/rel.h"
#undef DIAGNOSTIC #undef DIAGNOSTIC
@@ -38,6 +32,15 @@
* In order to do that, we break relations up into chunks of < 2GBytes * In order to do that, we break relations up into chunks of < 2GBytes
* and store one chunk in each of several files that represent the relation. * and store one chunk in each of several files that represent the relation.
* See the BLCKSZ and RELSEG_SIZE configuration constants in include/config.h. * See the BLCKSZ and RELSEG_SIZE configuration constants in include/config.h.
*
* The file descriptor stored in the relation cache (see RelationGetFile())
* is actually an index into the Md_fdvec array. -1 indicates not open.
*
* When a relation is broken into multiple chunks, only the first chunk
* has its own entry in the Md_fdvec array; the remaining chunks have
* palloc'd MdfdVec objects that are chained onto the first chunk via the
* mdfd_chain links. All chunks except the last MUST have size exactly
* equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
*/ */
typedef struct _MdfdVec typedef struct _MdfdVec
@@ -51,18 +54,19 @@ typedef struct _MdfdVec
#endif #endif
} MdfdVec; } MdfdVec;
static int Nfds = 100; static int Nfds = 100; /* initial/current size of Md_fdvec array */
static MdfdVec *Md_fdvec = (MdfdVec *) NULL; static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
static int Md_Free = -1; static int Md_Free = -1; /* head of freelist of unused fdvec entries */
static int CurFd = 0; static int CurFd = 0; /* first never-used fdvec index */
static MemoryContext MdCxt; static MemoryContext MdCxt; /* context for all my allocations */
#define MDFD_DIRTY (uint16) 0x01 #define MDFD_DIRTY (uint16) 0x01
#define MDFD_FREE (uint16) 0x02 #define MDFD_FREE (uint16) 0x02
/* routines declared here */ /* routines declared here */
static int _mdfd_getrelnfd(Relation reln);
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags); static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag); static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
static int _fdvec_alloc(void); static int _fdvec_alloc(void);
static void _fdvec_free(int); static void _fdvec_free(int);
static BlockNumber _mdnblocks(File file, Size blcksz); static BlockNumber _mdnblocks(File file, Size blcksz);
@@ -167,43 +171,39 @@ mdcreate(Relation reln)
int int
mdunlink(Relation reln) mdunlink(Relation reln)
{ {
int nblocks;
int fd; int fd;
int i; MdfdVec *v;
MdfdVec *v,
*ov;
MemoryContext oldcxt; MemoryContext oldcxt;
char fname[NAMEDATALEN];
char tname[NAMEDATALEN + 10]; /* leave room for overflow
* suffixes */
/* /*
* On Windows NT you can't unlink a file if it is open so we have * to * Force all segments of the relation to be opened, so that we
* do this. * won't miss deleting any of them.
*/ */
nblocks = mdnblocks(reln);
StrNCpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN); /*
* Clean out the mdfd vector, letting fd.c unlink the physical files.
if (FileNameUnlink(fname) < 0) *
return SM_FAIL; * NOTE: We truncate the file(s) before deleting 'em, because if other
* backends are holding the files open, the unlink will fail on some
/* unlink all the overflow files for large relations */ * platforms (think Microsoft). Better a zero-size file gets left around
for (i = 1;; i++) * than a big file. Those other backends will be forced to close the
{ * relation by cache invalidation, but that probably hasn't happened yet.
sprintf(tname, "%s.%d", fname, i); */
if (FileNameUnlink(tname) < 0)
break;
}
/* finally, clean out the mdfd vector */
fd = RelationGetFile(reln); fd = RelationGetFile(reln);
if (fd < 0) /* should not happen */
elog(ERROR, "mdunlink: mdnblocks didn't open relation");
Md_fdvec[fd].mdfd_flags = (uint16) 0; Md_fdvec[fd].mdfd_flags = (uint16) 0;
oldcxt = MemoryContextSwitchTo(MdCxt); oldcxt = MemoryContextSwitchTo(MdCxt);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;) for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
{ {
MdfdVec *ov = v;
FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd); FileUnlink(v->mdfd_vfd);
ov = v;
v = v->mdfd_chain; v = v->mdfd_chain;
if (ov != &Md_fdvec[fd]) if (ov != &Md_fdvec[fd])
pfree(ov); pfree(ov);
@@ -211,13 +211,16 @@ mdunlink(Relation reln)
Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL; Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
#else #else
v = &Md_fdvec[fd]; v = &Md_fdvec[fd];
if (v != (MdfdVec *) NULL) FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd); FileUnlink(v->mdfd_vfd);
#endif #endif
MemoryContextSwitchTo(oldcxt); MemoryContextSwitchTo(oldcxt);
_fdvec_free(fd); _fdvec_free(fd);
/* be sure to mark relation closed */
reln->rd_fd = -1;
return SM_SUCCESS; return SM_SUCCESS;
} }
@@ -235,7 +238,7 @@ mdextend(Relation reln, char *buffer)
MdfdVec *v; MdfdVec *v;
nblocks = mdnblocks(reln); nblocks = mdnblocks(reln);
v = _mdfd_getseg(reln, nblocks, O_CREAT); v = _mdfd_getseg(reln, nblocks);
if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0) if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0)
return SM_FAIL; return SM_FAIL;
@@ -309,7 +312,7 @@ mdopen(Relation reln)
} }
/* /*
* mdclose() -- Close the specified relation * mdclose() -- Close the specified relation, if it isn't closed already.
* *
* AND FREE fd vector! It may be re-used for other relation! * AND FREE fd vector! It may be re-used for other relation!
* reln should be flushed from cache after closing !.. * reln should be flushed from cache after closing !..
@@ -320,16 +323,19 @@ int
mdclose(Relation reln) mdclose(Relation reln)
{ {
int fd; int fd;
MdfdVec *v, MdfdVec *v;
*ov;
MemoryContext oldcxt; MemoryContext oldcxt;
fd = RelationGetFile(reln); fd = RelationGetFile(reln);
if (fd < 0)
return SM_SUCCESS; /* already closed, so no work */
oldcxt = MemoryContextSwitchTo(MdCxt); oldcxt = MemoryContextSwitchTo(MdCxt);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;) for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
{ {
MdfdVec *ov = v;
/* if not closed already */ /* if not closed already */
if (v->mdfd_vfd >= 0) if (v->mdfd_vfd >= 0)
{ {
@@ -346,7 +352,6 @@ mdclose(Relation reln)
v->mdfd_flags &= ~MDFD_DIRTY; v->mdfd_flags &= ~MDFD_DIRTY;
} }
/* Now free vector */ /* Now free vector */
ov = v;
v = v->mdfd_chain; v = v->mdfd_chain;
if (ov != &Md_fdvec[fd]) if (ov != &Md_fdvec[fd])
pfree(ov); pfree(ov);
@@ -377,6 +382,9 @@ mdclose(Relation reln)
_fdvec_free(fd); _fdvec_free(fd);
/* be sure to mark relation closed */
reln->rd_fd = -1;
return SM_SUCCESS; return SM_SUCCESS;
} }
@@ -393,7 +401,7 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
int nbytes; int nbytes;
MdfdVec *v; MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, 0); v = _mdfd_getseg(reln, blocknum);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
@@ -433,7 +441,7 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
long seekpos; long seekpos;
MdfdVec *v; MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, 0); v = _mdfd_getseg(reln, blocknum);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
@@ -470,7 +478,7 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
long seekpos; long seekpos;
MdfdVec *v; MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, 0); v = _mdfd_getseg(reln, blocknum);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
@@ -652,25 +660,27 @@ mdblindwrt(char *dbstr,
/* /*
* mdnblocks() -- Get the number of blocks stored in a relation. * mdnblocks() -- Get the number of blocks stored in a relation.
* *
* Returns # of blocks or -1 on error. * Important side effect: all segments of the relation are opened
* and added to the mdfd_chain list. If this routine has not been
* called, then only segments up to the last one actually touched
* are present in the chain...
*
* Returns # of blocks, elog's on error.
*/ */
int int
mdnblocks(Relation reln) mdnblocks(Relation reln)
{ {
int fd; int fd;
MdfdVec *v; MdfdVec *v;
#ifndef LET_OS_MANAGE_FILESIZE
int nblocks; int nblocks;
int segno; int segno;
#endif
fd = RelationGetFile(reln); fd = _mdfd_getrelnfd(reln);
v = &Md_fdvec[fd]; v = &Md_fdvec[fd];
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
#ifdef DIAGNOSTIC
if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE)
elog(FATAL, "segment too big in getseg!");
#endif
segno = 0; segno = 0;
for (;;) for (;;)
{ {
@@ -708,54 +718,74 @@ mdnblocks(Relation reln)
int int
mdtruncate(Relation reln, int nblocks) mdtruncate(Relation reln, int nblocks)
{ {
int curnblk;
int fd; int fd;
MdfdVec *v; MdfdVec *v;
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
int curnblk, MemoryContext oldcxt;
i, int priorblocks;
oldsegno,
newsegno,
lastsegblocks;
MdfdVec **varray;
curnblk = mdnblocks(reln);
if (nblocks > curnblk)
return -1;
oldsegno = curnblk / RELSEG_SIZE;
newsegno = nblocks / RELSEG_SIZE;
#endif #endif
fd = RelationGetFile(reln); /* NOTE: mdnblocks makes sure we have opened all existing segments,
* so that truncate/delete loop will get them all!
*/
curnblk = mdnblocks(reln);
if (nblocks < 0 || nblocks > curnblk)
return -1; /* bogus request */
if (nblocks == curnblk)
return nblocks; /* no work */
fd = _mdfd_getrelnfd(reln);
v = &Md_fdvec[fd]; v = &Md_fdvec[fd];
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
varray = (MdfdVec **)palloc((oldsegno + 1) * sizeof(MdfdVec *)); oldcxt = MemoryContextSwitchTo(MdCxt);
for (i = 0; i <= oldsegno; i++) priorblocks = 0;
while (v != (MdfdVec *) NULL)
{ {
if (!v) MdfdVec *ov = v;
elog(ERROR,"segment isn't open in mdtruncate!");
varray[i] = v; if (priorblocks > nblocks)
{
/* This segment is no longer wanted at all (and has already been
* unlinked from the mdfd_chain).
* We truncate the file before deleting it because if other
* backends are holding the file open, the unlink will fail on
* some platforms. Better a zero-size file gets left around than
* a big file...
*/
FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd);
v = v->mdfd_chain; v = v->mdfd_chain;
Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st segment */
pfree(ov);
} }
for (i = oldsegno; i > newsegno; i--) else if (priorblocks + RELSEG_SIZE > nblocks)
{ {
v = varray[i]; /* This is the last segment we want to keep.
if (FileTruncate(v->mdfd_vfd, 0) < 0) * Truncate the file to the right length, and clear chain link
{ * that points to any remaining segments (which we shall zap).
pfree(varray); * NOTE: if nblocks is exactly a multiple K of RELSEG_SIZE,
return -1; * we will truncate the K+1st segment to 0 length but keep it.
} * This is mainly so that the right thing happens if nblocks=0.
v->mdfd_lstbcnt = 0; */
} int lastsegblocks = nblocks - priorblocks;
/* Calculate the # of blocks in the last segment */
lastsegblocks = nblocks - (newsegno * RELSEG_SIZE);
v = varray[i];
pfree(varray);
if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
return -1; return -1;
v->mdfd_lstbcnt = lastsegblocks; v->mdfd_lstbcnt = lastsegblocks;
v = v->mdfd_chain;
ov->mdfd_chain = (MdfdVec *) NULL;
}
else
{
/* We still need this segment and 0 or more blocks beyond it,
* so nothing to do here.
*/
v = v->mdfd_chain;
}
priorblocks += RELSEG_SIZE;
}
MemoryContextSwitchTo(oldcxt);
#else #else
if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
return -1; return -1;
@@ -820,11 +850,11 @@ mdabort()
{ {
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)
v->mdfd_flags &= ~MDFD_DIRTY;
#else #else
v = &Md_fdvec[i]; v = &Md_fdvec[i];
if (v != (MdfdVec *) NULL)
#endif
v->mdfd_flags &= ~MDFD_DIRTY; v->mdfd_flags &= ~MDFD_DIRTY;
#endif
} }
return SM_SUCCESS; return SM_SUCCESS;
@@ -901,6 +931,7 @@ _fdvec_free(int fdvec)
{ {
Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE);
Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE);
Md_fdvec[fdvec].mdfd_nextFree = Md_Free; Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
Md_Free = fdvec; Md_Free = fdvec;
@@ -932,9 +963,9 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
/* open the file */ /* open the file */
#ifndef __CYGWIN32__ #ifndef __CYGWIN32__
fd = PathNameOpenFile(fullpath, O_RDWR | oflags, 0600); fd = FileNameOpenFile(fullpath, O_RDWR | oflags, 0600);
#else #else
fd = PathNameOpenFile(fullpath, O_RDWR | O_BINARY | oflags, 0600); fd = FileNameOpenFile(fullpath, O_RDWR | O_BINARY | oflags, 0600);
#endif #endif
if (dofree) if (dofree)
@@ -965,13 +996,12 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
return v; return v;
} }
static MdfdVec * /* Get the fd for the relation, opening it if it's not already open */
_mdfd_getseg(Relation reln, int blkno, int oflag)
static int
_mdfd_getrelnfd(Relation reln)
{ {
MdfdVec *v;
int segno;
int fd; int fd;
int i;
fd = RelationGetFile(reln); fd = RelationGetFile(reln);
if (fd < 0) if (fd < 0)
@@ -981,6 +1011,20 @@ _mdfd_getseg(Relation reln, int blkno, int oflag)
RelationGetRelationName(reln)); RelationGetRelationName(reln));
reln->rd_fd = fd; reln->rd_fd = fd;
} }
return fd;
}
/* Find the segment of the relation holding the specified block */
static MdfdVec *
_mdfd_getseg(Relation reln, int blkno)
{
MdfdVec *v;
int segno;
int fd;
int i;
fd = _mdfd_getrelnfd(reln);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1; for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1;
@@ -990,7 +1034,7 @@ _mdfd_getseg(Relation reln, int blkno, int oflag)
if (v->mdfd_chain == (MdfdVec *) NULL) if (v->mdfd_chain == (MdfdVec *) NULL)
{ {
v->mdfd_chain = _mdfd_openseg(reln, i, oflag); v->mdfd_chain = _mdfd_openseg(reln, i, O_CREAT);
if (v->mdfd_chain == (MdfdVec *) NULL) if (v->mdfd_chain == (MdfdVec *) NULL)
elog(ERROR, "cannot open segment %d of relation %s", elog(ERROR, "cannot open segment %d of relation %s",

View File

@@ -10,18 +10,13 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.24 1999/05/25 16:11:34 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.24.2.1 1999/09/02 04:07:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include <string.h>
#include "postgres.h" #include "postgres.h"
#include "storage/ipc.h"
#include "storage/block.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/rel.h"
#include "utils/palloc.h"
static void smgrshutdown(int dummy); static void smgrshutdown(int dummy);
@@ -196,12 +191,11 @@ smgropen(int16 which, Relation reln)
/* /*
* smgrclose() -- Close a relation. * smgrclose() -- Close a relation.
* *
* NOTE: mdclose frees fd vector! It may be re-used for other relation! * NOTE: underlying manager should allow case where relation is
* reln should be flushed from cache after closing !.. * already closed. Indeed relation may have been unlinked!
* Currently, smgrclose is calling by * This is currently called only from RelationFlushRelation() when
* relcache.c:RelationPurgeLocalRelation() only. * the relation cache entry is about to be dropped; could be doing
* It would be nice to have smgrfree(), but because of * simple relation cache clear, or finishing up DROP TABLE.
* smgrclose is called from single place... - vadim 05/22/97
* *
* Returns SM_SUCCESS on success, aborts on failure. * Returns SM_SUCCESS on success, aborts on failure.
*/ */

View File

@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.62.2.1 1999/08/02 05:25:01 scrappy Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.62.2.2 1999/09/02 04:07:18 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@@ -1256,9 +1256,13 @@ RelationFlushRelation(Relation *relationPtr,
if (!onlyFlushReferenceCountZero || if (!onlyFlushReferenceCountZero ||
RelationHasReferenceCountZero(relation)) RelationHasReferenceCountZero(relation))
{ {
oldcxt = MemoryContextSwitchTo((MemoryContext) CacheCxt); oldcxt = MemoryContextSwitchTo((MemoryContext) CacheCxt);
/* make sure smgr and lower levels close the relation's files,
* if they weren't closed already
*/
smgrclose(DEFAULT_SMGR, relation);
RelationCacheDelete(relation); RelationCacheDelete(relation);
FreeTupleDesc(relation->rd_att); FreeTupleDesc(relation->rd_att);
@@ -1515,17 +1519,6 @@ RelationPurgeLocalRelation(bool xactCommitted)
else else
smgrunlink(DEFAULT_SMGR, reln); smgrunlink(DEFAULT_SMGR, reln);
} }
else if (!IsBootstrapProcessingMode() && !(reln->rd_isnoname))
/*
* RelationFlushRelation () below will flush relation
* information from the cache. We must call smgrclose to flush
* relation information from SMGR & FMGR, too. We assume that
* for temp relations smgrunlink is already called by
* heap_destroyr and we skip smgrclose for them. -
* vadim 05/22/97
*/
smgrclose(DEFAULT_SMGR, reln);
reln->rd_myxactonly = FALSE; reln->rd_myxactonly = FALSE;