mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Add #include "postgres.h" as required Remove #include "c.h" *and* "utils/elog.h" as applicable
700 lines
16 KiB
C
700 lines
16 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* md.c--
|
|
* This code manages relations that reside on magnetic disk.
|
|
*
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.6 1996/11/03 05:07:55 scrappy Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include <stdio.h> /* for sprintf() */
|
|
#include <sys/file.h>
|
|
|
|
#include "postgres.h"
|
|
#include "miscadmin.h" /* for DataDir */
|
|
|
|
#include "storage/block.h"
|
|
#include "storage/smgr.h" /* where the declarations go */
|
|
#include "storage/fd.h"
|
|
#include "utils/mcxt.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/palloc.h"
|
|
#include "catalog/catalog.h"
|
|
|
|
#undef DIAGNOSTIC
|
|
|
|
/*
|
|
* The magnetic disk storage manager keeps track of open file descriptors
|
|
* in its own descriptor pool. This happens for two reasons. First, at
|
|
* transaction boundaries, we walk the list of descriptors and flush
|
|
* anything that we've dirtied in the current transaction. Second, we
|
|
* have to support relations of > 4GBytes. In order to do this, we break
|
|
* relations up into chunks of < 2GBytes and store one chunk in each of
|
|
* several files that represent the relation.
|
|
*/
|
|
|
|
typedef struct _MdfdVec {
|
|
int mdfd_vfd; /* fd number in vfd pool */
|
|
uint16 mdfd_flags; /* clean, dirty */
|
|
int mdfd_lstbcnt; /* most recent block count */
|
|
struct _MdfdVec *mdfd_chain; /* for large relations */
|
|
} MdfdVec;
|
|
|
|
static int Nfds = 100;
|
|
static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
|
|
static int CurFd = 0;
|
|
static MemoryContext MdCxt;
|
|
|
|
#define MDFD_DIRTY (uint16) 0x01
|
|
|
|
#define RELSEG_SIZE 262144 /* (2 ** 31) / 8192 -- 2GB file */
|
|
|
|
/* routines declared here */
|
|
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
|
|
static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag);
|
|
static int _fdvec_ext(void);
|
|
static BlockNumber _mdnblocks(File file, Size blcksz);
|
|
|
|
/*
|
|
* mdinit() -- Initialize private state for magnetic disk storage manager.
|
|
*
|
|
* We keep a private table of all file descriptors. Whenever we do
|
|
* a write to one, we mark it dirty in our table. Whenever we force
|
|
* changes to disk, we mark the file descriptor clean. At transaction
|
|
* commit, we force changes to disk for all dirty file descriptors.
|
|
* This routine allocates and initializes the table.
|
|
*
|
|
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
|
*/
|
|
int
|
|
mdinit()
|
|
{
|
|
MemoryContext oldcxt;
|
|
|
|
MdCxt = (MemoryContext) CreateGlobalMemory("MdSmgr");
|
|
if (MdCxt == (MemoryContext) NULL)
|
|
return (SM_FAIL);
|
|
|
|
oldcxt = MemoryContextSwitchTo(MdCxt);
|
|
Md_fdvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
|
|
(void) MemoryContextSwitchTo(oldcxt);
|
|
|
|
if (Md_fdvec == (MdfdVec *) NULL)
|
|
return (SM_FAIL);
|
|
|
|
memset(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
int
|
|
mdcreate(Relation reln)
|
|
{
|
|
int fd, vfd;
|
|
int tmp;
|
|
char *path;
|
|
extern bool IsBootstrapProcessingMode();
|
|
|
|
path = relpath(&(reln->rd_rel->relname.data[0]));
|
|
fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
|
|
|
/*
|
|
* If the file already exists and is empty, we pretend that the
|
|
* create succeeded. During bootstrap processing, we skip that check,
|
|
* because pg_time, pg_variable, and pg_log get created before their
|
|
* .bki file entries are processed.
|
|
*/
|
|
|
|
if (fd < 0) {
|
|
if ((fd = FileNameOpenFile(path, O_RDWR, 0600)) >= 0) {
|
|
if (!IsBootstrapProcessingMode() &&
|
|
FileRead(fd, (char *) &tmp, sizeof(tmp)) != 0) {
|
|
FileClose(fd);
|
|
return (-1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (CurFd >= Nfds) {
|
|
if (_fdvec_ext() == SM_FAIL)
|
|
return (-1);
|
|
}
|
|
|
|
Md_fdvec[CurFd].mdfd_vfd = fd;
|
|
Md_fdvec[CurFd].mdfd_flags = (uint16) 0;
|
|
Md_fdvec[CurFd].mdfd_chain = (MdfdVec *) NULL;
|
|
Md_fdvec[CurFd].mdfd_lstbcnt = 0;
|
|
|
|
vfd = CurFd++;
|
|
|
|
return (vfd);
|
|
}
|
|
|
|
/*
|
|
* mdunlink() -- Unlink a relation.
|
|
*/
|
|
int
|
|
mdunlink(Relation reln)
|
|
{
|
|
int fd;
|
|
int i;
|
|
MdfdVec *v, *ov;
|
|
MemoryContext oldcxt;
|
|
char fname[NAMEDATALEN];
|
|
char tname[NAMEDATALEN+10]; /* leave room for overflow suffixes*/
|
|
|
|
/* On Windows NT you can't unlink a file if it is open so we have
|
|
** to do this.
|
|
*/
|
|
#ifdef WIN32
|
|
(void) mdclose(reln);
|
|
#endif /* WIN32 */
|
|
|
|
|
|
memset(fname,0, NAMEDATALEN);
|
|
strncpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN);
|
|
|
|
if (FileNameUnlink(fname) < 0)
|
|
return (SM_FAIL);
|
|
|
|
/* unlink all the overflow files for large relations */
|
|
for (i = 1; ; i++) {
|
|
#ifdef WIN32
|
|
(void) mdclose(reln);
|
|
#endif /* WIN32 */
|
|
sprintf(tname, "%s.%d", fname, i);
|
|
if (FileNameUnlink(tname) < 0)
|
|
break;
|
|
}
|
|
|
|
/* finally, clean out the mdfd vector */
|
|
fd = RelationGetFile(reln);
|
|
Md_fdvec[fd].mdfd_flags = (uint16) 0;
|
|
|
|
oldcxt = MemoryContextSwitchTo(MdCxt);
|
|
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; ) {
|
|
ov = v;
|
|
v = v->mdfd_chain;
|
|
if (ov != &Md_fdvec[fd])
|
|
pfree(ov);
|
|
}
|
|
Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
|
|
(void) MemoryContextSwitchTo(oldcxt);
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
/*
|
|
* mdextend() -- Add a block to the specified relation.
|
|
*
|
|
* This routine returns SM_FAIL or SM_SUCCESS, with errno set as
|
|
* appropriate.
|
|
*/
|
|
int
|
|
mdextend(Relation reln, char *buffer)
|
|
{
|
|
long pos;
|
|
int nblocks;
|
|
MdfdVec *v;
|
|
|
|
nblocks = mdnblocks(reln);
|
|
v = _mdfd_getseg(reln, nblocks, O_CREAT);
|
|
|
|
if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0)
|
|
return (SM_FAIL);
|
|
|
|
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
|
|
return (SM_FAIL);
|
|
|
|
/* remember that we did a write, so we can sync at xact commit */
|
|
v->mdfd_flags |= MDFD_DIRTY;
|
|
|
|
/* try to keep the last block count current, though it's just a hint */
|
|
if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0)
|
|
v->mdfd_lstbcnt = RELSEG_SIZE;
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE
|
|
|| v->mdfd_lstbcnt > RELSEG_SIZE)
|
|
elog(FATAL, "segment too big!");
|
|
#endif
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
/*
|
|
* mdopen() -- Open the specified relation.
|
|
*/
|
|
int
|
|
mdopen(Relation reln)
|
|
{
|
|
char *path;
|
|
int fd;
|
|
int vfd;
|
|
|
|
if (CurFd >= Nfds) {
|
|
if (_fdvec_ext() == SM_FAIL)
|
|
return (-1);
|
|
}
|
|
|
|
path = relpath(&(reln->rd_rel->relname.data[0]));
|
|
|
|
fd = FileNameOpenFile(path, O_RDWR, 0600);
|
|
|
|
/* this should only happen during bootstrap processing */
|
|
if (fd < 0)
|
|
fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
|
|
|
Md_fdvec[CurFd].mdfd_vfd = fd;
|
|
Md_fdvec[CurFd].mdfd_flags = (uint16) 0;
|
|
Md_fdvec[CurFd].mdfd_chain = (MdfdVec *) NULL;
|
|
Md_fdvec[CurFd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (Md_fdvec[CurFd].mdfd_lstbcnt > RELSEG_SIZE)
|
|
elog(FATAL, "segment too big on relopen!");
|
|
#endif
|
|
|
|
vfd = CurFd++;
|
|
|
|
return (vfd);
|
|
}
|
|
|
|
/*
|
|
* mdclose() -- Close the specified relation.
|
|
*
|
|
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
|
*/
|
|
int
|
|
mdclose(Relation reln)
|
|
{
|
|
int fd;
|
|
MdfdVec *v;
|
|
|
|
fd = RelationGetFile(reln);
|
|
|
|
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
|
|
|
|
/* may be closed already */
|
|
if (v->mdfd_vfd < 0)
|
|
continue;
|
|
|
|
/*
|
|
* We sync the file descriptor so that we don't need to reopen it at
|
|
* transaction commit to force changes to disk.
|
|
*/
|
|
|
|
FileSync(v->mdfd_vfd);
|
|
FileClose(v->mdfd_vfd);
|
|
|
|
/* mark this file descriptor as clean in our private table */
|
|
v->mdfd_flags &= ~MDFD_DIRTY;
|
|
}
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
/*
|
|
* mdread() -- Read the specified block from a relation.
|
|
*
|
|
* Returns SM_SUCCESS or SM_FAIL.
|
|
*/
|
|
int
|
|
mdread(Relation reln, BlockNumber blocknum, char *buffer)
|
|
{
|
|
int status;
|
|
long seekpos;
|
|
int nbytes;
|
|
MdfdVec *v;
|
|
|
|
v = _mdfd_getseg(reln, blocknum, 0);
|
|
|
|
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
|
elog(FATAL, "seekpos too big!");
|
|
#endif
|
|
|
|
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
|
|
return (SM_FAIL);
|
|
}
|
|
|
|
status = SM_SUCCESS;
|
|
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) {
|
|
if (nbytes == 0) {
|
|
memset(buffer, 0, BLCKSZ);
|
|
} else {
|
|
status = SM_FAIL;
|
|
}
|
|
}
|
|
|
|
return (status);
|
|
}
|
|
|
|
/*
|
|
* mdwrite() -- Write the supplied block at the appropriate location.
|
|
*
|
|
* Returns SM_SUCCESS or SM_FAIL.
|
|
*/
|
|
int
|
|
mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
|
|
{
|
|
int status;
|
|
long seekpos;
|
|
MdfdVec *v;
|
|
|
|
v = _mdfd_getseg(reln, blocknum, 0);
|
|
|
|
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
|
|
#ifdef DIAGNOSTIC
|
|
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
|
elog(FATAL, "seekpos too big!");
|
|
#endif
|
|
|
|
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
|
|
return (SM_FAIL);
|
|
}
|
|
|
|
status = SM_SUCCESS;
|
|
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
|
|
status = SM_FAIL;
|
|
|
|
v->mdfd_flags |= MDFD_DIRTY;
|
|
|
|
return (status);
|
|
}
|
|
|
|
/*
|
|
* mdflush() -- Synchronously write a block to disk.
|
|
*
|
|
* This is exactly like mdwrite(), but doesn't return until the file
|
|
* system buffer cache has been flushed.
|
|
*/
|
|
int
|
|
mdflush(Relation reln, BlockNumber blocknum, char *buffer)
|
|
{
|
|
int status;
|
|
long seekpos;
|
|
MdfdVec *v;
|
|
|
|
v = _mdfd_getseg(reln, blocknum, 0);
|
|
|
|
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
|
|
#ifdef DIAGNOSTIC
|
|
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
|
elog(FATAL, "seekpos too big!");
|
|
#endif
|
|
|
|
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
|
|
return (SM_FAIL);
|
|
}
|
|
|
|
/* write and sync the block */
|
|
status = SM_SUCCESS;
|
|
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
|
|
|| FileSync(v->mdfd_vfd) < 0)
|
|
status = SM_FAIL;
|
|
|
|
/*
|
|
* By here, the block is written and changes have been forced to stable
|
|
* storage. Mark the descriptor as clean until the next write, so we
|
|
* don't sync it again unnecessarily at transaction commit.
|
|
*/
|
|
|
|
v->mdfd_flags &= ~MDFD_DIRTY;
|
|
|
|
return (status);
|
|
}
|
|
|
|
/*
|
|
* mdblindwrt() -- Write a block to disk blind.
|
|
*
|
|
* We have to be able to do this using only the name and OID of
|
|
* the database and relation in which the block belongs. This
|
|
* is a synchronous write.
|
|
*/
|
|
int
|
|
mdblindwrt(char *dbstr,
|
|
char *relstr,
|
|
Oid dbid,
|
|
Oid relid,
|
|
BlockNumber blkno,
|
|
char *buffer)
|
|
{
|
|
int fd;
|
|
int segno;
|
|
long seekpos;
|
|
int status;
|
|
char *path;
|
|
int nchars;
|
|
|
|
/* be sure we have enough space for the '.segno', if any */
|
|
segno = blkno / RELSEG_SIZE;
|
|
if (segno > 0)
|
|
nchars = 10;
|
|
else
|
|
nchars = 0;
|
|
|
|
/* construct the path to the file and open it */
|
|
if (dbid == (Oid) 0) {
|
|
path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars);
|
|
if (segno == 0)
|
|
sprintf(path, "%s/%.*s", DataDir, NAMEDATALEN, relstr);
|
|
else
|
|
sprintf(path, "%s/%.*s.%d", DataDir, NAMEDATALEN, relstr, segno);
|
|
} else {
|
|
path = (char *) palloc(strlen(DataDir) + strlen("/base/") + 2 * sizeof(NameData) + 2 + nchars);
|
|
if (segno == 0)
|
|
sprintf(path, "%s/base/%.*s/%.*s", DataDir, NAMEDATALEN,
|
|
dbstr, NAMEDATALEN, relstr);
|
|
else
|
|
sprintf(path, "%s/base/%.*s/%.*s.%d", DataDir, NAMEDATALEN, dbstr,
|
|
NAMEDATALEN, relstr, segno);
|
|
}
|
|
|
|
if ((fd = open(path, O_RDWR, 0600)) < 0)
|
|
return (SM_FAIL);
|
|
|
|
/* seek to the right spot */
|
|
seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE));
|
|
if (lseek(fd, seekpos, SEEK_SET) != seekpos) {
|
|
(void) close(fd);
|
|
return (SM_FAIL);
|
|
}
|
|
|
|
status = SM_SUCCESS;
|
|
|
|
/* write and sync the block */
|
|
#ifdef OPENLINK_PATCHES
|
|
if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0))
|
|
#else
|
|
if (write(fd, buffer, BLCKSZ) != BLCKSZ || fsync(fd) < 0)
|
|
#endif
|
|
status = SM_FAIL;
|
|
|
|
if (close(fd) < 0)
|
|
status = SM_FAIL;
|
|
|
|
pfree(path);
|
|
|
|
return (status);
|
|
}
|
|
|
|
/*
|
|
* mdnblocks() -- Get the number of blocks stored in a relation.
|
|
*
|
|
* Returns # of blocks or -1 on error.
|
|
*/
|
|
int
|
|
mdnblocks(Relation reln)
|
|
{
|
|
int fd;
|
|
MdfdVec *v;
|
|
int nblocks;
|
|
int segno;
|
|
|
|
fd = RelationGetFile(reln);
|
|
v = &Md_fdvec[fd];
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE)
|
|
elog(FATAL, "segment too big in getseg!");
|
|
#endif
|
|
|
|
segno = 0;
|
|
for (;;) {
|
|
if (v->mdfd_lstbcnt == RELSEG_SIZE
|
|
|| (nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ)) == RELSEG_SIZE) {
|
|
|
|
v->mdfd_lstbcnt = RELSEG_SIZE;
|
|
segno++;
|
|
|
|
if (v->mdfd_chain == (MdfdVec *) NULL) {
|
|
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
|
|
if (v->mdfd_chain == (MdfdVec *) NULL)
|
|
elog(WARN, "cannot count blocks for %.16s -- open failed",
|
|
RelationGetRelationName(reln));
|
|
}
|
|
|
|
v = v->mdfd_chain;
|
|
} else {
|
|
return ((segno * RELSEG_SIZE) + nblocks);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* mdcommit() -- Commit a transaction.
|
|
*
|
|
* All changes to magnetic disk relations must be forced to stable
|
|
* storage. This routine makes a pass over the private table of
|
|
* file descriptors. Any descriptors to which we have done writes,
|
|
* but not synced, are synced here.
|
|
*
|
|
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
|
*/
|
|
int
|
|
mdcommit()
|
|
{
|
|
int i;
|
|
MdfdVec *v;
|
|
|
|
for (i = 0; i < CurFd; i++) {
|
|
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
|
|
if (v->mdfd_flags & MDFD_DIRTY) {
|
|
if (FileSync(v->mdfd_vfd) < 0)
|
|
return (SM_FAIL);
|
|
|
|
v->mdfd_flags &= ~MDFD_DIRTY;
|
|
}
|
|
}
|
|
}
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
/*
|
|
* mdabort() -- Abort a transaction.
|
|
*
|
|
* Changes need not be forced to disk at transaction abort. We mark
|
|
* all file descriptors as clean here. Always returns SM_SUCCESS.
|
|
*/
|
|
int
|
|
mdabort()
|
|
{
|
|
int i;
|
|
MdfdVec *v;
|
|
|
|
for (i = 0; i < CurFd; i++) {
|
|
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
|
|
v->mdfd_flags &= ~MDFD_DIRTY;
|
|
}
|
|
}
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
/*
|
|
* _fdvec_ext() -- Extend the md file descriptor vector.
|
|
*
|
|
* The file descriptor vector must be large enough to hold at least
|
|
* 'fd' entries.
|
|
*/
|
|
static
|
|
int _fdvec_ext()
|
|
{
|
|
MdfdVec *nvec;
|
|
MemoryContext oldcxt;
|
|
|
|
Nfds *= 2;
|
|
|
|
oldcxt = MemoryContextSwitchTo(MdCxt);
|
|
|
|
nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
|
|
memset(nvec, 0, Nfds * sizeof(MdfdVec));
|
|
memmove(nvec, (char *) Md_fdvec, (Nfds / 2) * sizeof(MdfdVec));
|
|
pfree(Md_fdvec);
|
|
|
|
(void) MemoryContextSwitchTo(oldcxt);
|
|
|
|
Md_fdvec = nvec;
|
|
|
|
return (SM_SUCCESS);
|
|
}
|
|
|
|
static MdfdVec *
|
|
_mdfd_openseg(Relation reln, int segno, int oflags)
|
|
{
|
|
MemoryContext oldcxt;
|
|
MdfdVec *v;
|
|
int fd;
|
|
bool dofree;
|
|
char *path, *fullpath;
|
|
|
|
/* be sure we have enough space for the '.segno', if any */
|
|
path = relpath(RelationGetRelationName(reln)->data);
|
|
|
|
dofree = false;
|
|
if (segno > 0) {
|
|
dofree = true;
|
|
fullpath = (char *) palloc(strlen(path) + 12);
|
|
sprintf(fullpath, "%s.%d", path, segno);
|
|
} else
|
|
fullpath = path;
|
|
|
|
/* open the file */
|
|
fd = PathNameOpenFile(fullpath, O_RDWR|oflags, 0600);
|
|
|
|
if (dofree)
|
|
pfree(fullpath);
|
|
|
|
if (fd < 0)
|
|
return ((MdfdVec *) NULL);
|
|
|
|
/* allocate an mdfdvec entry for it */
|
|
oldcxt = MemoryContextSwitchTo(MdCxt);
|
|
v = (MdfdVec *) palloc(sizeof(MdfdVec));
|
|
(void) MemoryContextSwitchTo(oldcxt);
|
|
|
|
/* fill the entry */
|
|
v->mdfd_vfd = fd;
|
|
v->mdfd_flags = (uint16) 0;
|
|
v->mdfd_chain = (MdfdVec *) NULL;
|
|
v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (v->mdfd_lstbcnt > RELSEG_SIZE)
|
|
elog(FATAL, "segment too big on open!");
|
|
#endif
|
|
|
|
/* all done */
|
|
return (v);
|
|
}
|
|
|
|
static MdfdVec *
|
|
_mdfd_getseg(Relation reln, int blkno, int oflag)
|
|
{
|
|
MdfdVec *v;
|
|
int segno;
|
|
int fd;
|
|
int i;
|
|
|
|
fd = RelationGetFile(reln);
|
|
if (fd < 0) {
|
|
if ((fd = mdopen(reln)) < 0)
|
|
elog(WARN, "cannot open relation %.16s",
|
|
RelationGetRelationName(reln));
|
|
reln->rd_fd = fd;
|
|
}
|
|
|
|
for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1;
|
|
segno > 0;
|
|
i++, segno--) {
|
|
|
|
if (v->mdfd_chain == (MdfdVec *) NULL) {
|
|
v->mdfd_chain = _mdfd_openseg(reln, i, oflag);
|
|
|
|
if (v->mdfd_chain == (MdfdVec *) NULL)
|
|
elog(WARN, "cannot open segment %d of relation %.16s",
|
|
i, RelationGetRelationName(reln));
|
|
}
|
|
v = v->mdfd_chain;
|
|
}
|
|
|
|
return (v);
|
|
}
|
|
|
|
static BlockNumber
|
|
_mdnblocks(File file, Size blcksz)
|
|
{
|
|
long len;
|
|
|
|
len = FileSeek(file, 0L, SEEK_END) - 1;
|
|
return((BlockNumber)((len < 0) ? 0 : 1 + len / blcksz));
|
|
}
|