mirror of
https://github.com/postgres/postgres.git
synced 2025-11-12 05:01:15 +03:00
Postgres95 1.01 Distribution - Virgin Sources
This commit is contained in:
31
src/backend/storage/Makefile.inc
Normal file
31
src/backend/storage/Makefile.inc
Normal file
@@ -0,0 +1,31 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for the storage modules
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
stordir= $(CURDIR)/storage
|
||||
VPATH:= $(VPATH):$(stordir):$(stordir)/buffer:$(stordir)/file:$(stordir)/ipc:\
|
||||
$(stordir)/large_object:$(stordir)/lmgr:$(stordir)/page:$(stordir)/smgr
|
||||
|
||||
SUBSRCS=
|
||||
include $(stordir)/buffer/Makefile.inc
|
||||
include $(stordir)/file/Makefile.inc
|
||||
include $(stordir)/ipc/Makefile.inc
|
||||
include $(stordir)/large_object/Makefile.inc
|
||||
include $(stordir)/lmgr/Makefile.inc
|
||||
include $(stordir)/page/Makefile.inc
|
||||
include $(stordir)/smgr/Makefile.inc
|
||||
SRCS_STORAGE:= $(SUBSRCS)
|
||||
|
||||
HEADERS+= backendid.h block.h buf.h buf_internals.h bufmgr.h bufpage.h \
|
||||
fd.h ipc.h item.h itemid.h itempos.h \
|
||||
itemptr.h large_object.h lmgr.h lock.h multilev.h off.h page.h \
|
||||
pagenum.h pos.h proc.h shmem.h sinval.h sinvaladt.h smgr.h spin.h
|
||||
32
src/backend/storage/backendid.h
Normal file
32
src/backend/storage/backendid.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* backendid.h--
|
||||
* POSTGRES backend id communication definitions
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: backendid.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BACKENDID_H
|
||||
#define BACKENDID_H
|
||||
|
||||
/* ----------------
|
||||
* pulled out of sinval.h to temporarily reduce #include nesting.
|
||||
* -cim 8/17/90
|
||||
* ----------------
|
||||
*/
|
||||
typedef int16 BackendId; /* unique currently active backend identifier */
|
||||
|
||||
#define InvalidBackendId (-1)
|
||||
|
||||
typedef int32 BackendTag; /* unique backend identifier */
|
||||
|
||||
#define InvalidBackendTag (-1)
|
||||
|
||||
extern BackendId MyBackendId; /* backend id of this backend */
|
||||
extern BackendTag MyBackendTag; /* backend tag of this backend */
|
||||
|
||||
#endif /* BACKENDID_H */
|
||||
114
src/backend/storage/block.h
Normal file
114
src/backend/storage/block.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* block.h--
|
||||
* POSTGRES disk block definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: block.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BLOCK_H
|
||||
#define BLOCK_H
|
||||
|
||||
#include "c.h"
|
||||
|
||||
/*
|
||||
* BlockNumber:
|
||||
*
|
||||
* each data file (heap or index) is divided into postgres disk blocks
|
||||
* (which may be thought of as the unit of i/o -- a postgres buffer
|
||||
* contains exactly one disk block). the blocks are numbered
|
||||
* sequentially, 0 to 0xFFFFFFFE.
|
||||
*
|
||||
* InvalidBlockNumber is the same thing as P_NEW in buf.h.
|
||||
*
|
||||
* the access methods, the buffer manager and the storage manager are
|
||||
* more or less the only pieces of code that should be accessing disk
|
||||
* blocks directly.
|
||||
*/
|
||||
typedef uint32 BlockNumber;
|
||||
|
||||
#define InvalidBlockNumber ((BlockNumber) 0xFFFFFFFF)
|
||||
|
||||
/*
|
||||
* BlockId:
|
||||
*
|
||||
* this is a storage type for BlockNumber. in other words, this type
|
||||
* is used for on-disk structures (e.g., in HeapTupleData) whereas
|
||||
* BlockNumber is the type on which calculations are performed (e.g.,
|
||||
* in access method code).
|
||||
*
|
||||
* there doesn't appear to be any reason to have separate types except
|
||||
* for the fact that BlockIds can be SHORTALIGN'd (and therefore any
|
||||
* structures that contains them, such as ItemPointerData, can also be
|
||||
* SHORTALIGN'd). this is an important consideration for reducing the
|
||||
* space requirements of the line pointer (ItemIdData) array on each
|
||||
* page and the header of each heap or index tuple, so it doesn't seem
|
||||
* wise to change this without good reason.
|
||||
*/
|
||||
typedef struct BlockIdData {
|
||||
uint16 bi_hi;
|
||||
uint16 bi_lo;
|
||||
} BlockIdData;
|
||||
|
||||
typedef BlockIdData *BlockId; /* block identifier */
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* BlockNumberIsValid --
|
||||
* True iff blockNumber is valid.
|
||||
*/
|
||||
#define BlockNumberIsValid(blockNumber) \
|
||||
((bool) ((int32) (blockNumber) != InvalidBlockNumber))
|
||||
|
||||
/*
|
||||
* BlockIdIsValid --
|
||||
* True iff the block identifier is valid.
|
||||
*/
|
||||
#define BlockIdIsValid(blockId) \
|
||||
((bool) PointerIsValid(blockId))
|
||||
|
||||
/*
|
||||
* BlockIdSet --
|
||||
* Sets a block identifier to the specified value.
|
||||
*/
|
||||
#define BlockIdSet(blockId, blockNumber) \
|
||||
Assert(PointerIsValid(blockId)); \
|
||||
(blockId)->bi_hi = (blockNumber) >> 16; \
|
||||
(blockId)->bi_lo = (blockNumber) & 0xffff
|
||||
|
||||
/*
|
||||
* BlockIdCopy --
|
||||
* Copy a block identifier.
|
||||
*/
|
||||
#define BlockIdCopy(toBlockId, fromBlockId) \
|
||||
Assert(PointerIsValid(toBlockId)); \
|
||||
Assert(PointerIsValid(fromBlockId)); \
|
||||
(toBlockId)->bi_hi = (fromBlockId)->bi_hi; \
|
||||
(toBlockId)->bi_lo = (fromBlockId)->bi_lo
|
||||
|
||||
/*
|
||||
* BlockIdEquals --
|
||||
* Check for block number equality.
|
||||
*/
|
||||
#define BlockIdEquals(blockId1, blockId2) \
|
||||
((blockId1)->bi_hi == (blockId2)->bi_hi && \
|
||||
(blockId1)->bi_lo == (blockId2)->bi_lo)
|
||||
|
||||
/*
|
||||
* BlockIdGetBlockNumber --
|
||||
* Retrieve the block number from a block identifier.
|
||||
*/
|
||||
#define BlockIdGetBlockNumber(blockId) \
|
||||
(AssertMacro(BlockIdIsValid(blockId)) ? \
|
||||
(BlockNumber) (((blockId)->bi_hi << 16) | ((uint16) (blockId)->bi_lo)) : \
|
||||
(BlockNumber) InvalidBlockNumber)
|
||||
|
||||
#endif /* BLOCK_H */
|
||||
47
src/backend/storage/buf.h
Normal file
47
src/backend/storage/buf.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buf.h--
|
||||
* Basic buffer manager data types.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: buf.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUF_H
|
||||
#define BUF_H
|
||||
|
||||
#define InvalidBuffer (0)
|
||||
#define UnknownBuffer (-99999)
|
||||
|
||||
typedef long Buffer;
|
||||
|
||||
/*
|
||||
* BufferIsInvalid --
|
||||
* True iff the buffer is invalid.
|
||||
*/
|
||||
#define BufferIsInvalid(buffer) ((buffer) == InvalidBuffer)
|
||||
|
||||
/*
|
||||
* BufferIsUnknown --
|
||||
* True iff the buffer is unknown.
|
||||
*/
|
||||
#define BufferIsUnknown(buffer) ((buffer) == UnknownBuffer)
|
||||
|
||||
/*
|
||||
* BufferIsLocal --
|
||||
* True iff the buffer is local (not visible to other servers).
|
||||
*/
|
||||
#define BufferIsLocal(buffer) ((buffer) < 0)
|
||||
|
||||
/*
|
||||
* If NO_BUFFERISVALID is defined, all error checking using BufferIsValid()
|
||||
* are suppressed. Decision-making using BufferIsValid is not affected.
|
||||
* This should be set only if one is sure there will be no errors.
|
||||
* - plai 9/10/90
|
||||
*/
|
||||
#undef NO_BUFFERISVALID
|
||||
|
||||
#endif /* BUF_H */
|
||||
220
src/backend/storage/buf_internals.h
Normal file
220
src/backend/storage/buf_internals.h
Normal file
@@ -0,0 +1,220 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buf_internals.h--
|
||||
* Internal definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: buf_internals.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
* NOTE
|
||||
* If BUFFERPAGE0 is defined, then 0 will be used as a
|
||||
* valid buffer page number.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUFMGR_INTERNALS_H
|
||||
#define BUFMGR_INTERNALS_H
|
||||
|
||||
#include "postgres.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/relcache.h"
|
||||
|
||||
/* Buf Mgr constants */
|
||||
/* in bufmgr.c */
|
||||
extern int NBuffers;
|
||||
extern int Data_Descriptors;
|
||||
extern int Free_List_Descriptor;
|
||||
extern int Lookup_List_Descriptor;
|
||||
extern int Num_Descriptors;
|
||||
|
||||
/*
|
||||
* Flags for buffer descriptors
|
||||
*/
|
||||
#define BM_DIRTY (1 << 0)
|
||||
#define BM_PRIVATE (1 << 1)
|
||||
#define BM_VALID (1 << 2)
|
||||
#define BM_DELETED (1 << 3)
|
||||
#define BM_FREE (1 << 4)
|
||||
#define BM_IO_IN_PROGRESS (1 << 5)
|
||||
#define BM_IO_ERROR (1 << 6)
|
||||
|
||||
typedef bits16 BufFlags;
|
||||
|
||||
typedef struct sbufdesc BufferDesc;
|
||||
typedef struct sbufdesc BufferHdr;
|
||||
typedef struct buftag BufferTag;
|
||||
/* long * so alignment will be correct */
|
||||
typedef long **BufferBlock;
|
||||
|
||||
struct buftag{
|
||||
LRelId relId;
|
||||
BlockNumber blockNum; /* blknum relative to begin of reln */
|
||||
};
|
||||
|
||||
#define CLEAR_BUFFERTAG(a)\
|
||||
(a)->relId.dbId = InvalidOid; \
|
||||
(a)->relId.relId = InvalidOid; \
|
||||
(a)->blockNum = InvalidBlockNumber
|
||||
|
||||
#define INIT_BUFFERTAG(a,xx_reln,xx_blockNum) \
|
||||
{ \
|
||||
(a)->blockNum = xx_blockNum;\
|
||||
(a)->relId = RelationGetLRelId(xx_reln); \
|
||||
}
|
||||
|
||||
#define COPY_BUFFERTAG(a,b)\
|
||||
{ \
|
||||
(a)->blockNum = (b)->blockNum;\
|
||||
LRelIdAssign(*(a),*(b));\
|
||||
}
|
||||
|
||||
#define EQUAL_BUFFERTAG(a,b) \
|
||||
(((a)->blockNum == (b)->blockNum) &&\
|
||||
(OID_Equal((a)->relId.relId,(b)->relId.relId)))
|
||||
|
||||
|
||||
#define BAD_BUFFER_ID(bid) ((bid<1) || (bid>(NBuffers)))
|
||||
#define INVALID_DESCRIPTOR (-3)
|
||||
|
||||
/*
|
||||
* bletch hack -- anyplace that we declare space for relation or
|
||||
* database names, we just use '16', not a symbolic constant, to
|
||||
* specify their lengths. BM_NAMESIZE is the length of these names,
|
||||
* and is used in the buffer manager code. somebody with lots of
|
||||
* spare time should do this for all the other modules, too.
|
||||
*/
|
||||
#define BM_NAMESIZE 16
|
||||
|
||||
/*
|
||||
* struct sbufdesc -- shared buffer cache metadata for a single
|
||||
* shared buffer descriptor.
|
||||
*
|
||||
* We keep the name of the database and relation in which this
|
||||
* buffer appears in order to avoid a catalog lookup on cache
|
||||
* flush if we don't have the reldesc in the cache. It is also
|
||||
* possible that the relation to which this buffer belongs is
|
||||
* not visible to all backends at the time that it gets flushed.
|
||||
* Dbname, relname, dbid, and relid are enough to determine where
|
||||
* to put the buffer, for all storage managers.
|
||||
*/
|
||||
|
||||
struct sbufdesc {
|
||||
Buffer freeNext; /* link for freelist chain */
|
||||
Buffer freePrev;
|
||||
SHMEM_OFFSET data; /* pointer to data in buf pool */
|
||||
|
||||
/* tag and id must be together for table lookup to work */
|
||||
BufferTag tag; /* file/block identifier */
|
||||
int buf_id; /* maps global desc to local desc */
|
||||
|
||||
BufFlags flags; /* described below */
|
||||
int16 bufsmgr; /* storage manager id for buffer */
|
||||
unsigned refcount; /* # of times buffer is pinned */
|
||||
|
||||
char *sb_dbname; /* name of db in which buf belongs */
|
||||
char *sb_relname; /* name of reln */
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
/* can afford a dedicated lock if test-and-set locks are available */
|
||||
slock_t io_in_progress_lock;
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
|
||||
/*
|
||||
* I padded this structure to a power of 2 (128 bytes on a MIPS) because
|
||||
* BufferDescriptorGetBuffer is called a billion times and it does an
|
||||
* C pointer subtraction (i.e., "x - y" -> array index of x relative
|
||||
* to y, which is calculated using division by struct size). Integer
|
||||
* ".div" hits you for 35 cycles, as opposed to a 1-cycle "sra" ...
|
||||
* this hack cut 10% off of the time to create the Wisconsin database!
|
||||
* It eats up more shared memory, of course, but we're (allegedly)
|
||||
* going to make some of these types bigger soon anyway... -pma 1/2/93
|
||||
*/
|
||||
#if defined(PORTNAME_ultrix4)
|
||||
char sb_pad[60]; /* no slock_t */
|
||||
#endif /* mips */
|
||||
#if defined(PORTNAME_sparc) || defined(PORTNAME_sparc_solaris) || defined(PORTNAME_irix5)
|
||||
char sb_pad[56]; /* has slock_t */
|
||||
#endif /* sparc || irix5 */
|
||||
#if defined(PORTNAME_hpux)
|
||||
char sb_pad[44]; /* has slock_t */
|
||||
#endif /* alpha */
|
||||
#if defined(PORTNAME_alpha)
|
||||
char sb_pad[40]; /* has slock_t */
|
||||
#endif /* alpha */
|
||||
};
|
||||
|
||||
/*
|
||||
* mao tracing buffer allocation
|
||||
*/
|
||||
|
||||
/*#define BMTRACE*/
|
||||
#ifdef BMTRACE
|
||||
|
||||
typedef struct _bmtrace {
|
||||
int bmt_pid;
|
||||
long bmt_buf;
|
||||
long bmt_dbid;
|
||||
long bmt_relid;
|
||||
int bmt_blkno;
|
||||
int bmt_op;
|
||||
|
||||
#define BMT_NOTUSED 0
|
||||
#define BMT_ALLOCFND 1
|
||||
#define BMT_ALLOCNOTFND 2
|
||||
#define BMT_DEALLOC 3
|
||||
|
||||
} bmtrace;
|
||||
|
||||
#endif /* BMTRACE */
|
||||
|
||||
|
||||
/*
|
||||
* Bufmgr Interface:
|
||||
*/
|
||||
|
||||
/* Internal routines: only called by buf.c */
|
||||
|
||||
/*freelist.c*/
|
||||
extern void AddBufferToFreelist(BufferDesc *bf);
|
||||
extern void PinBuffer(BufferDesc *buf);
|
||||
extern void PinBuffer_Debug(char *file, int line, BufferDesc *buf);
|
||||
extern void UnpinBuffer(BufferDesc *buf);
|
||||
extern void UnpinBuffer_Debug(char *file, int line, BufferDesc *buf);
|
||||
extern BufferDesc *GetFreeBuffer(void);
|
||||
extern void InitFreeList(bool init);
|
||||
extern void DBG_FreeListCheck(int nfree);
|
||||
|
||||
/* buf_table.c */
|
||||
extern void InitBufTable(void);
|
||||
extern BufferDesc *BufTableLookup(BufferTag *tagPtr);
|
||||
extern bool BufTableDelete(BufferDesc *buf);
|
||||
extern bool BufTableInsert(BufferDesc *buf);
|
||||
extern void DBG_LookupListCheck(int nlookup);
|
||||
|
||||
/* bufmgr.c */
|
||||
extern BufferDesc *BufferDescriptors;
|
||||
extern BufferBlock BufferBlocks;
|
||||
extern long *PrivateRefCount;
|
||||
extern long *LastRefCount;
|
||||
extern SPINLOCK BufMgrLock;
|
||||
|
||||
/* localbuf.c */
|
||||
extern long *LocalRefCount;
|
||||
extern BufferDesc *LocalBufferDescriptors;
|
||||
extern int NLocBuffer;
|
||||
|
||||
extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
|
||||
bool *foundPtr);
|
||||
extern int WriteLocalBuffer(Buffer buffer, bool release);
|
||||
extern int FlushLocalBuffer(Buffer buffer);
|
||||
extern void InitLocalBuffer();
|
||||
extern void LocalBufferSync();
|
||||
extern void ResetLocalBufferPool();
|
||||
|
||||
#endif /* BUFMGR_INTERNALS_H */
|
||||
16
src/backend/storage/buffer/Makefile.inc
Normal file
16
src/backend/storage/buffer/Makefile.inc
Normal file
@@ -0,0 +1,16 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/buffer
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= buf_table.c buf_init.c bufmgr.c freelist.c localbuf.c
|
||||
|
||||
SRCS_SITEMGR+= buf_table.c buf_init.c freelist.c
|
||||
280
src/backend/storage/buffer/buf_init.c
Normal file
280
src/backend/storage/buffer/buf_init.c
Normal file
@@ -0,0 +1,280 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buf_init.c--
|
||||
* buffer manager initialization routines
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <sys/file.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <signal.h>
|
||||
|
||||
/* declarations split between these three files */
|
||||
#include "storage/buf.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
#include "storage/fd.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "executor/execdebug.h" /* for NDirectFileRead */
|
||||
#include "catalog/catalog.h"
|
||||
|
||||
/*
|
||||
* if BMTRACE is defined, we trace the last 200 buffer allocations and
|
||||
* deallocations in a circular buffer in shared memory.
|
||||
*/
|
||||
#ifdef BMTRACE
|
||||
bmtrace *TraceBuf;
|
||||
long *CurTraceBuf;
|
||||
#define BMT_LIMIT 200
|
||||
#endif /* BMTRACE */
|
||||
int ShowPinTrace = 0;
|
||||
|
||||
int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */
|
||||
int Data_Descriptors;
|
||||
int Free_List_Descriptor;
|
||||
int Lookup_List_Descriptor;
|
||||
int Num_Descriptors;
|
||||
|
||||
BufferDesc *BufferDescriptors;
|
||||
BufferBlock BufferBlocks;
|
||||
#ifndef HAS_TEST_AND_SET
|
||||
long *NWaitIOBackendP;
|
||||
#endif
|
||||
|
||||
extern IpcSemaphoreId WaitIOSemId;
|
||||
|
||||
long *PrivateRefCount; /* also used in freelist.c */
|
||||
long *LastRefCount; /* refcounts of last ExecMain level */
|
||||
|
||||
/*
|
||||
* Data Structures:
|
||||
* buffers live in a freelist and a lookup data structure.
|
||||
*
|
||||
*
|
||||
* Buffer Lookup:
|
||||
* Two important notes. First, the buffer has to be
|
||||
* available for lookup BEFORE an IO begins. Otherwise
|
||||
* a second process trying to read the buffer will
|
||||
* allocate its own copy and the buffeer pool will
|
||||
* become inconsistent.
|
||||
*
|
||||
* Buffer Replacement:
|
||||
* see freelist.c. A buffer cannot be replaced while in
|
||||
* use either by data manager or during IO.
|
||||
*
|
||||
* WriteBufferBack:
|
||||
* currently, a buffer is only written back at the time
|
||||
* it is selected for replacement. It should
|
||||
* be done sooner if possible to reduce latency of
|
||||
* BufferAlloc(). Maybe there should be a daemon process.
|
||||
*
|
||||
* Synchronization/Locking:
|
||||
*
|
||||
* BufMgrLock lock -- must be acquired before manipulating the
|
||||
* buffer queues (lookup/freelist). Must be released
|
||||
* before exit and before doing any IO.
|
||||
*
|
||||
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
|
||||
* It must be set when an IO is initiated and cleared at
|
||||
* the end of the IO. It is there to make sure that one
|
||||
* process doesn't start to use a buffer while another is
|
||||
* faulting it in. see IOWait/IOSignal.
|
||||
*
|
||||
* refcount -- A buffer is pinned during IO and immediately
|
||||
* after a BufferAlloc(). A buffer is always either pinned
|
||||
* or on the freelist but never both. The buffer must be
|
||||
* released, written, or flushed before the end of
|
||||
* transaction.
|
||||
*
|
||||
* PrivateRefCount -- Each buffer also has a private refcount the keeps
|
||||
* track of the number of times the buffer is pinned in the current
|
||||
* processes. This is used for two purposes, first, if we pin a
|
||||
* a buffer more than once, we only need to change the shared refcount
|
||||
* once, thus only lock the buffer pool once, second, when a transaction
|
||||
* aborts, it should only unpin the buffers exactly the number of times it
|
||||
* has pinned them, so that it will not blow away buffers of another
|
||||
* backend.
|
||||
*
|
||||
*/
|
||||
|
||||
SPINLOCK BufMgrLock;
|
||||
|
||||
/* delayed write: TRUE on, FALSE off */
|
||||
int LateWrite = TRUE;
|
||||
|
||||
int ReadBufferCount;
|
||||
int BufferHitCount;
|
||||
int BufferFlushCount;
|
||||
|
||||
|
||||
/*
|
||||
* Initialize module:
|
||||
*
|
||||
* should calculate size of pool dynamically based on the
|
||||
* amount of available memory.
|
||||
*/
|
||||
void
|
||||
InitBufferPool(IPCKey key)
|
||||
{
|
||||
bool foundBufs,foundDescs;
|
||||
int i;
|
||||
|
||||
Data_Descriptors = NBuffers;
|
||||
Free_List_Descriptor = Data_Descriptors;
|
||||
Lookup_List_Descriptor = Data_Descriptors + 1;
|
||||
Num_Descriptors = Data_Descriptors + 1;
|
||||
|
||||
SpinAcquire(BufMgrLock);
|
||||
|
||||
#ifdef BMTRACE
|
||||
CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
|
||||
(BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
|
||||
&foundDescs);
|
||||
if (!foundDescs)
|
||||
memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));
|
||||
|
||||
TraceBuf = (bmtrace *) &(CurTraceBuf[1]);
|
||||
#endif
|
||||
|
||||
BufferDescriptors = (BufferDesc *)
|
||||
ShmemInitStruct("Buffer Descriptors",
|
||||
Num_Descriptors*sizeof(BufferDesc),&foundDescs);
|
||||
|
||||
BufferBlocks = (BufferBlock)
|
||||
ShmemInitStruct("Buffer Blocks",
|
||||
NBuffers*BLCKSZ,&foundBufs);
|
||||
|
||||
#ifndef HAS_TEST_AND_SET
|
||||
{
|
||||
bool foundNWaitIO;
|
||||
|
||||
NWaitIOBackendP = (long *)ShmemInitStruct("#Backends Waiting IO",
|
||||
sizeof(long),
|
||||
&foundNWaitIO);
|
||||
if (!foundNWaitIO)
|
||||
*NWaitIOBackendP = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (foundDescs || foundBufs) {
|
||||
|
||||
/* both should be present or neither */
|
||||
Assert(foundDescs && foundBufs);
|
||||
|
||||
} else {
|
||||
BufferDesc *buf;
|
||||
unsigned long block;
|
||||
|
||||
buf = BufferDescriptors;
|
||||
block = (unsigned long) BufferBlocks;
|
||||
|
||||
/*
|
||||
* link the buffers into a circular, doubly-linked list to
|
||||
* initialize free list. Still don't know anything about
|
||||
* replacement strategy in this file.
|
||||
*/
|
||||
for (i = 0; i < Data_Descriptors; block+=BLCKSZ,buf++,i++) {
|
||||
Assert(ShmemIsValid((unsigned long)block));
|
||||
|
||||
buf->freeNext = i+1;
|
||||
buf->freePrev = i-1;
|
||||
|
||||
CLEAR_BUFFERTAG(&(buf->tag));
|
||||
buf->data = MAKE_OFFSET(block);
|
||||
buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
|
||||
buf->refcount = 0;
|
||||
buf->buf_id = i;
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
S_INIT_LOCK(&(buf->io_in_progress_lock));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* close the circular queue */
|
||||
BufferDescriptors[0].freePrev = Data_Descriptors-1;
|
||||
BufferDescriptors[Data_Descriptors-1].freeNext = 0;
|
||||
}
|
||||
|
||||
/* Init the rest of the module */
|
||||
InitBufTable();
|
||||
InitFreeList(!foundDescs);
|
||||
|
||||
SpinRelease(BufMgrLock);
|
||||
|
||||
#ifndef HAS_TEST_AND_SET
|
||||
{
|
||||
int status;
|
||||
WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
|
||||
1, IPCProtection, 0, 1, &status);
|
||||
}
|
||||
#endif
|
||||
PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
|
||||
LastRefCount = (long *) calloc(NBuffers, sizeof(long));
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------
|
||||
* BufferShmemSize
|
||||
*
|
||||
* compute the size of shared memory for the buffer pool including
|
||||
* data pages, buffer descriptors, hash tables, etc.
|
||||
* ----------------------------------------------------
|
||||
*/
|
||||
int
|
||||
BufferShmemSize()
|
||||
{
|
||||
int size = 0;
|
||||
int nbuckets;
|
||||
int nsegs;
|
||||
int tmp;
|
||||
|
||||
nbuckets = 1 << (int)my_log2((NBuffers - 1) / DEF_FFACTOR + 1);
|
||||
nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
|
||||
|
||||
/* size of shmem binding table */
|
||||
size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */
|
||||
size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
|
||||
size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
|
||||
size += BUCKET_ALLOC_INCR *
|
||||
(MAXALIGN(sizeof(BUCKET_INDEX)) +
|
||||
MAXALIGN(BTABLE_KEYSIZE) +
|
||||
MAXALIGN(BTABLE_DATASIZE));
|
||||
|
||||
/* size of buffer descriptors */
|
||||
size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));
|
||||
|
||||
/* size of data pages */
|
||||
size += NBuffers * MAXALIGN(BLCKSZ);
|
||||
|
||||
/* size of buffer hash table */
|
||||
size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */
|
||||
size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
|
||||
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
|
||||
tmp = (int)ceil((double)NBuffers/BUCKET_ALLOC_INCR);
|
||||
size += tmp * BUCKET_ALLOC_INCR *
|
||||
(MAXALIGN(sizeof(BUCKET_INDEX)) +
|
||||
MAXALIGN(sizeof(BufferTag)) +
|
||||
MAXALIGN(sizeof(Buffer)));
|
||||
|
||||
#ifdef BMTRACE
|
||||
size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
|
||||
#endif
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
162
src/backend/storage/buffer/buf_table.c
Normal file
162
src/backend/storage/buffer/buf_table.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* buf_table.c--
|
||||
* routines for finding buffers in the buffer pool.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* OLD COMMENTS
|
||||
*
|
||||
* Data Structures:
|
||||
*
|
||||
* Buffers are identified by their BufferTag (buf.h). This
|
||||
* file contains routines for allocating a shmem hash table to
|
||||
* map buffer tags to buffer descriptors.
|
||||
*
|
||||
* Synchronization:
|
||||
*
|
||||
* All routines in this file assume buffer manager spinlock is
|
||||
* held by their caller.
|
||||
*/
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/buf_internals.h" /* where the declarations go */
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
static HTAB *SharedBufHash;
|
||||
|
||||
extern HTAB *ShmemInitHash();
|
||||
|
||||
typedef struct lookup {
|
||||
BufferTag key;
|
||||
Buffer id;
|
||||
} LookupEnt;
|
||||
|
||||
/*
|
||||
* Initialize shmem hash table for mapping buffers
|
||||
*/
|
||||
void
|
||||
InitBufTable()
|
||||
{
|
||||
HASHCTL info;
|
||||
int hash_flags;
|
||||
|
||||
/* assume lock is held */
|
||||
|
||||
/* BufferTag maps to Buffer */
|
||||
info.keysize = sizeof(BufferTag);
|
||||
info.datasize = sizeof(Buffer);
|
||||
info.hash = tag_hash;
|
||||
|
||||
hash_flags = (HASH_ELEM | HASH_FUNCTION);
|
||||
|
||||
|
||||
SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table",
|
||||
NBuffers,NBuffers,
|
||||
&info,hash_flags);
|
||||
|
||||
if (! SharedBufHash) {
|
||||
elog(FATAL,"couldn't initialize shared buffer pool Hash Tbl");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
BufferDesc *
|
||||
BufTableLookup(BufferTag *tagPtr)
|
||||
{
|
||||
LookupEnt * result;
|
||||
bool found;
|
||||
|
||||
if (tagPtr->blockNum == P_NEW)
|
||||
return(NULL);
|
||||
|
||||
result = (LookupEnt *)
|
||||
hash_search(SharedBufHash,(char *) tagPtr,HASH_FIND,&found);
|
||||
|
||||
if (! result){
|
||||
elog(WARN,"BufTableLookup: BufferLookup table corrupted");
|
||||
return(NULL);
|
||||
}
|
||||
if (! found) {
|
||||
return(NULL);
|
||||
}
|
||||
return(&(BufferDescriptors[result->id]));
|
||||
}
|
||||
|
||||
/*
|
||||
* BufTableDelete
|
||||
*/
|
||||
bool
|
||||
BufTableDelete(BufferDesc *buf)
|
||||
{
|
||||
LookupEnt * result;
|
||||
bool found;
|
||||
|
||||
/* buffer not initialized or has been removed from
|
||||
* table already. BM_DELETED keeps us from removing
|
||||
* buffer twice.
|
||||
*/
|
||||
if (buf->flags & BM_DELETED) {
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
buf->flags |= BM_DELETED;
|
||||
|
||||
result = (LookupEnt *)
|
||||
hash_search(SharedBufHash,(char *) &(buf->tag),HASH_REMOVE,&found);
|
||||
|
||||
if (! (result && found)) {
|
||||
elog(WARN,"BufTableDelete: BufferLookup table corrupted");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
bool
|
||||
BufTableInsert(BufferDesc *buf)
|
||||
{
|
||||
LookupEnt * result;
|
||||
bool found;
|
||||
|
||||
/* cannot insert it twice */
|
||||
Assert (buf->flags & BM_DELETED);
|
||||
buf->flags &= ~(BM_DELETED);
|
||||
|
||||
result = (LookupEnt *)
|
||||
hash_search(SharedBufHash,(char *) &(buf->tag),HASH_ENTER,&found);
|
||||
|
||||
if (! result) {
|
||||
Assert(0);
|
||||
elog(WARN,"BufTableInsert: BufferLookup table corrupted");
|
||||
return(FALSE);
|
||||
}
|
||||
/* found something else in the table ! */
|
||||
if (found) {
|
||||
Assert(0);
|
||||
elog(WARN,"BufTableInsert: BufferLookup table corrupted");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
result->id = buf->buf_id;
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
/* prints out collision stats for the buf table */
|
||||
void
|
||||
DBG_LookupListCheck(int nlookup)
|
||||
{
|
||||
nlookup = 10;
|
||||
|
||||
hash_stats("Shared",SharedBufHash);
|
||||
}
|
||||
1581
src/backend/storage/buffer/bufmgr.c
Normal file
1581
src/backend/storage/buffer/bufmgr.c
Normal file
File diff suppressed because it is too large
Load Diff
285
src/backend/storage/buffer/freelist.c
Normal file
285
src/backend/storage/buffer/freelist.c
Normal file
@@ -0,0 +1,285 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* freelist.c--
|
||||
* routines for manipulating the buffer pool's replacement strategy
|
||||
* freelist.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* OLD COMMENTS
|
||||
*
|
||||
* Data Structures:
|
||||
* SharedFreeList is a circular queue. Notice that this
|
||||
* is a shared memory queue so the next/prev "ptrs" are
|
||||
* buffer ids, not addresses.
|
||||
*
|
||||
* Sync: all routines in this file assume that the buffer
|
||||
* semaphore has been acquired by the caller.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/buf_internals.h" /* where declarations go */
|
||||
#include "storage/spin.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
|
||||
static BufferDesc *SharedFreeList;
|
||||
|
||||
/* only actually used in debugging. The lock
|
||||
* should be acquired before calling the freelist manager.
|
||||
*/
|
||||
extern SPINLOCK BufMgrLock;
|
||||
|
||||
#define IsInQueue(bf) \
|
||||
Assert((bf->freeNext != INVALID_DESCRIPTOR));\
|
||||
Assert((bf->freePrev != INVALID_DESCRIPTOR));\
|
||||
Assert((bf->flags & BM_FREE))
|
||||
|
||||
#define NotInQueue(bf) \
|
||||
Assert((bf->freeNext == INVALID_DESCRIPTOR));\
|
||||
Assert((bf->freePrev == INVALID_DESCRIPTOR));\
|
||||
Assert(! (bf->flags & BM_FREE))
|
||||
|
||||
|
||||
/*
|
||||
* AddBufferToFreelist --
|
||||
*
|
||||
* In theory, this is the only routine that needs to be changed
|
||||
* if the buffer replacement strategy changes. Just change
|
||||
* the manner in which buffers are added to the freelist queue.
|
||||
* Currently, they are added on an LRU basis.
|
||||
*/
|
||||
void
|
||||
AddBufferToFreelist(BufferDesc *bf)
|
||||
{
|
||||
#ifdef BMTRACE
|
||||
_bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum,
|
||||
BufferDescriptorGetBuffer(bf), BMT_DEALLOC);
|
||||
#endif /* BMTRACE */
|
||||
NotInQueue(bf);
|
||||
|
||||
/* change bf so it points to inFrontOfNew and its successor */
|
||||
bf->freePrev = SharedFreeList->freePrev;
|
||||
bf->freeNext = Free_List_Descriptor;
|
||||
|
||||
/* insert new into chain */
|
||||
BufferDescriptors[bf->freeNext].freePrev = bf->buf_id;
|
||||
BufferDescriptors[bf->freePrev].freeNext = bf->buf_id;
|
||||
}
|
||||
|
||||
#undef PinBuffer
|
||||
|
||||
/*
|
||||
* PinBuffer -- make buffer unavailable for replacement.
|
||||
*/
|
||||
void
|
||||
PinBuffer(BufferDesc *buf)
|
||||
{
|
||||
long b;
|
||||
|
||||
/* Assert (buf->refcount < 25); */
|
||||
|
||||
if (buf->refcount == 0) {
|
||||
IsInQueue(buf);
|
||||
|
||||
/* remove from freelist queue */
|
||||
BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
|
||||
BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
|
||||
buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
|
||||
|
||||
/* mark buffer as no longer free */
|
||||
buf->flags &= ~BM_FREE;
|
||||
} else {
|
||||
NotInQueue(buf);
|
||||
}
|
||||
|
||||
b = BufferDescriptorGetBuffer(buf) - 1;
|
||||
Assert(PrivateRefCount[b] >= 0);
|
||||
if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0)
|
||||
buf->refcount++;
|
||||
PrivateRefCount[b]++;
|
||||
}
|
||||
|
||||
void
|
||||
PinBuffer_Debug(char *file, int line, BufferDesc *buf)
|
||||
{
|
||||
PinBuffer(buf);
|
||||
if (ShowPinTrace) {
|
||||
Buffer buffer = BufferDescriptorGetBuffer(buf);
|
||||
|
||||
fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
|
||||
#undef UnpinBuffer
|
||||
|
||||
/*
|
||||
* UnpinBuffer -- make buffer available for replacement.
|
||||
*/
|
||||
void
|
||||
UnpinBuffer(BufferDesc *buf)
|
||||
{
|
||||
long b = BufferDescriptorGetBuffer(buf) - 1;
|
||||
|
||||
Assert(buf->refcount);
|
||||
Assert(PrivateRefCount[b] > 0);
|
||||
PrivateRefCount[b]--;
|
||||
if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0)
|
||||
buf->refcount--;
|
||||
NotInQueue(buf);
|
||||
|
||||
if (buf->refcount == 0) {
|
||||
AddBufferToFreelist(buf);
|
||||
buf->flags |= BM_FREE;
|
||||
} else {
|
||||
/* do nothing */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
UnpinBuffer_Debug(char *file, int line, BufferDesc *buf)
|
||||
{
|
||||
UnpinBuffer(buf);
|
||||
if (ShowPinTrace) {
|
||||
Buffer buffer = BufferDescriptorGetBuffer(buf);
|
||||
|
||||
fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GetFreeBuffer() -- get the 'next' buffer from the freelist.
|
||||
*
|
||||
*/
|
||||
BufferDesc *
|
||||
GetFreeBuffer()
|
||||
{
|
||||
BufferDesc *buf;
|
||||
|
||||
if (Free_List_Descriptor == SharedFreeList->freeNext) {
|
||||
|
||||
/* queue is empty. All buffers in the buffer pool are pinned. */
|
||||
elog(WARN,"out of free buffers: time to abort !\n");
|
||||
return(NULL);
|
||||
}
|
||||
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
|
||||
|
||||
/* remove from freelist queue */
|
||||
BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
|
||||
BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
|
||||
buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
|
||||
|
||||
buf->flags &= ~(BM_FREE);
|
||||
|
||||
return(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* InitFreeList -- initialize the dummy buffer descriptor used
|
||||
* as a freelist head.
|
||||
*
|
||||
* Assume: All of the buffers are already linked in a circular
|
||||
* queue. Only called by postmaster and only during
|
||||
* initialization.
|
||||
*/
|
||||
void
|
||||
InitFreeList(bool init)
|
||||
{
|
||||
SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]);
|
||||
|
||||
if (init) {
|
||||
/* we only do this once, normally the postmaster */
|
||||
SharedFreeList->data = INVALID_OFFSET;
|
||||
SharedFreeList->flags = 0;
|
||||
SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE);
|
||||
SharedFreeList->buf_id = Free_List_Descriptor;
|
||||
|
||||
/* insert it into a random spot in the circular queue */
|
||||
SharedFreeList->freeNext = BufferDescriptors[0].freeNext;
|
||||
SharedFreeList->freePrev = 0;
|
||||
BufferDescriptors[SharedFreeList->freeNext].freePrev =
|
||||
BufferDescriptors[SharedFreeList->freePrev].freeNext =
|
||||
Free_List_Descriptor;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* print out the free list and check for breaks.
|
||||
*/
|
||||
void
|
||||
DBG_FreeListCheck(int nfree)
|
||||
{
|
||||
int i;
|
||||
BufferDesc *buf;
|
||||
|
||||
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
|
||||
for (i=0;i<nfree;i++,buf = &(BufferDescriptors[buf->freeNext])) {
|
||||
|
||||
if (! (buf->flags & (BM_FREE))){
|
||||
if (buf != SharedFreeList) {
|
||||
printf("\tfree list corrupted: %d flags %x\n",
|
||||
buf->buf_id,buf->flags);
|
||||
} else {
|
||||
printf("\tfree list corrupted: too short -- %d not %d\n",
|
||||
i,nfree);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) ||
|
||||
(BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) {
|
||||
printf("\tfree list links corrupted: %d %ld %ld\n",
|
||||
buf->buf_id,buf->freePrev,buf->freeNext);
|
||||
}
|
||||
|
||||
}
|
||||
if (buf != SharedFreeList) {
|
||||
printf("\tfree list corrupted: %d-th buffer is %d\n",
|
||||
nfree,buf->buf_id);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PrintBufferFreeList -
|
||||
* prints the buffer free list, for debugging
|
||||
*/
|
||||
void
|
||||
PrintBufferFreeList()
|
||||
{
|
||||
BufferDesc *buf;
|
||||
|
||||
if (SharedFreeList->freeNext == Free_List_Descriptor) {
|
||||
printf("free list is empty.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
|
||||
for (;;) {
|
||||
int i = (buf - BufferDescriptors);
|
||||
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n",
|
||||
i, buf->sb_relname, buf->tag.blockNum,
|
||||
buf->flags, buf->refcount, PrivateRefCount[i],
|
||||
buf->freeNext, buf->freePrev);
|
||||
|
||||
if (buf->freeNext == Free_List_Descriptor)
|
||||
break;
|
||||
|
||||
buf = &(BufferDescriptors[buf->freeNext]);
|
||||
}
|
||||
}
|
||||
284
src/backend/storage/buffer/localbuf.c
Normal file
284
src/backend/storage/buffer/localbuf.c
Normal file
@@ -0,0 +1,284 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* localbuf.c--
|
||||
* local buffer manager. Fast buffer manager for temporary tables
|
||||
* or special cases when the operation is not visible to other backends.
|
||||
*
|
||||
* When a relation is being created, the descriptor will have rd_islocal
|
||||
* set to indicate that the local buffer manager should be used. During
|
||||
* the same transaction the relation is being created, any inserts or
|
||||
* selects from the newly created relation will use the local buffer
|
||||
* pool. rd_islocal is reset at the end of a transaction (commit/abort).
|
||||
* This is useful for queries like SELECT INTO TABLE and create index.
|
||||
*
|
||||
* Copyright (c) 1994-5, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <sys/file.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <signal.h>
|
||||
|
||||
/* declarations split between these three files */
|
||||
#include "storage/buf.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
#include "storage/fd.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "executor/execdebug.h" /* for NDirectFileRead */
|
||||
#include "catalog/catalog.h"
|
||||
|
||||
int NLocBuffer = 64;
|
||||
BufferDesc *LocalBufferDescriptors = NULL;
|
||||
long *LocalRefCount = NULL;
|
||||
|
||||
static int nextFreeLocalBuf = 0;
|
||||
|
||||
/*#define LBDEBUG*/
|
||||
|
||||
/*
|
||||
* LocalBufferAlloc -
|
||||
* allocate a local buffer. We do round robin allocation for now.
|
||||
*/
|
||||
BufferDesc *
|
||||
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||
{
|
||||
int i;
|
||||
BufferDesc *bufHdr = (BufferDesc *) NULL;
|
||||
|
||||
if (blockNum == P_NEW) {
|
||||
blockNum = reln->rd_nblocks;
|
||||
reln->rd_nblocks++;
|
||||
}
|
||||
|
||||
/* a low tech search for now -- not optimized for scans */
|
||||
for (i=0; i < NLocBuffer; i++) {
|
||||
if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id &&
|
||||
LocalBufferDescriptors[i].tag.blockNum == blockNum) {
|
||||
|
||||
#ifdef LBDEBUG
|
||||
fprintf(stderr, "LB ALLOC (%d,%d) %d\n",
|
||||
reln->rd_id, blockNum, -i-1);
|
||||
#endif
|
||||
LocalRefCount[i]++;
|
||||
*foundPtr = TRUE;
|
||||
return &LocalBufferDescriptors[i];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LBDEBUG
|
||||
fprintf(stderr, "LB ALLOC (%d,%d) %d\n",
|
||||
reln->rd_id, blockNum, -nextFreeLocalBuf-1);
|
||||
#endif
|
||||
|
||||
/* need to get a new buffer (round robin for now) */
|
||||
for(i=0; i < NLocBuffer; i++) {
|
||||
int b = (nextFreeLocalBuf + i) % NLocBuffer;
|
||||
|
||||
if (LocalRefCount[b]==0) {
|
||||
bufHdr = &LocalBufferDescriptors[b];
|
||||
LocalRefCount[b]++;
|
||||
nextFreeLocalBuf = (b + 1) % NLocBuffer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (bufHdr==NULL)
|
||||
elog(WARN, "no empty local buffer.");
|
||||
|
||||
/*
|
||||
* this buffer is not referenced but it might still be dirty (the
|
||||
* last transaction to touch it doesn't need its contents but has
|
||||
* not flushed it). if that's the case, write it out before
|
||||
* reusing it!
|
||||
*/
|
||||
if (bufHdr->flags & BM_DIRTY) {
|
||||
Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
|
||||
|
||||
Assert(bufrel != NULL);
|
||||
|
||||
/* flush this page */
|
||||
smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum,
|
||||
(char *) MAKE_PTR(bufHdr->data));
|
||||
}
|
||||
|
||||
/*
|
||||
* it's all ours now.
|
||||
*/
|
||||
bufHdr->tag.relId.relId = reln->rd_id;
|
||||
bufHdr->tag.blockNum = blockNum;
|
||||
bufHdr->flags &= ~BM_DIRTY;
|
||||
|
||||
/*
|
||||
* lazy memory allocation. (see MAKE_PTR for why we need to do
|
||||
* MAKE_OFFSET.)
|
||||
*/
|
||||
if (bufHdr->data == (SHMEM_OFFSET)0) {
|
||||
char *data = (char *)malloc(BLCKSZ);
|
||||
|
||||
bufHdr->data = MAKE_OFFSET(data);
|
||||
}
|
||||
|
||||
*foundPtr = FALSE;
|
||||
return bufHdr;
|
||||
}
|
||||
|
||||
/*
|
||||
* WriteLocalBuffer -
|
||||
* writes out a local buffer
|
||||
*/
|
||||
int
|
||||
WriteLocalBuffer(Buffer buffer, bool release)
|
||||
{
|
||||
int bufid;
|
||||
|
||||
Assert(BufferIsLocal(buffer));
|
||||
|
||||
#ifdef LBDEBUG
|
||||
fprintf(stderr, "LB WRITE %d\n", buffer);
|
||||
#endif
|
||||
|
||||
bufid = - (buffer + 1);
|
||||
LocalBufferDescriptors[bufid].flags |= BM_DIRTY;
|
||||
|
||||
if (release) {
|
||||
Assert(LocalRefCount[bufid] > 0);
|
||||
LocalRefCount[bufid]--;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* FlushLocalBuffer -
|
||||
* flushes a local buffer
|
||||
*/
|
||||
int
|
||||
FlushLocalBuffer(Buffer buffer)
|
||||
{
|
||||
int bufid;
|
||||
Relation bufrel;
|
||||
BufferDesc *bufHdr;
|
||||
|
||||
Assert(BufferIsLocal(buffer));
|
||||
|
||||
#ifdef LBDEBUG
|
||||
fprintf(stderr, "LB FLUSH %d\n", buffer);
|
||||
#endif
|
||||
|
||||
bufid = - (buffer + 1);
|
||||
bufHdr = &LocalBufferDescriptors[bufid];
|
||||
bufHdr->flags &= ~BM_DIRTY;
|
||||
bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
|
||||
|
||||
Assert(bufrel != NULL);
|
||||
smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum,
|
||||
(char *) MAKE_PTR(bufHdr->data));
|
||||
|
||||
Assert(LocalRefCount[bufid] > 0);
|
||||
LocalRefCount[bufid]--;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* InitLocalBuffer -
|
||||
* init the local buffer cache. Since most queries (esp. multi-user ones)
|
||||
* don't involve local buffers, we delay allocating memory for actual the
|
||||
* buffer until we need it.
|
||||
*/
|
||||
void
|
||||
InitLocalBuffer()
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* these aren't going away. I'm not gonna use palloc.
|
||||
*/
|
||||
LocalBufferDescriptors =
|
||||
(BufferDesc *)malloc(sizeof(BufferDesc) * NLocBuffer);
|
||||
memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer);
|
||||
nextFreeLocalBuf = 0;
|
||||
|
||||
for (i = 0; i < NLocBuffer; i++) {
|
||||
BufferDesc *buf = &LocalBufferDescriptors[i];
|
||||
|
||||
/*
|
||||
* negative to indicate local buffer. This is tricky: shared buffers
|
||||
* start with 0. We have to start with -2. (Note that the routine
|
||||
* BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
|
||||
* is -1.)
|
||||
*/
|
||||
buf->buf_id = - i - 2;
|
||||
}
|
||||
|
||||
LocalRefCount =
|
||||
(long *)malloc(sizeof(long) * NLocBuffer);
|
||||
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* LocalBufferSync -
|
||||
* flush all dirty buffers in the local buffer cache. Since the buffer
|
||||
* cache is only used for keeping relations visible during a transaction,
|
||||
* we will not need these buffers again.
|
||||
*/
|
||||
void
|
||||
LocalBufferSync()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NLocBuffer; i++) {
|
||||
BufferDesc *buf = &LocalBufferDescriptors[i];
|
||||
Relation bufrel;
|
||||
|
||||
if (buf->flags & BM_DIRTY) {
|
||||
#ifdef LBDEBUG
|
||||
fprintf(stderr, "LB SYNC %d\n", -i-1);
|
||||
#endif
|
||||
bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId);
|
||||
|
||||
Assert(bufrel != NULL);
|
||||
|
||||
smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum,
|
||||
(char *) MAKE_PTR(buf->data));
|
||||
|
||||
buf->tag.relId.relId = InvalidOid;
|
||||
buf->flags &= ~BM_DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
|
||||
}
|
||||
|
||||
void
|
||||
ResetLocalBufferPool()
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer);
|
||||
nextFreeLocalBuf = 0;
|
||||
|
||||
for (i = 0; i < NLocBuffer; i++) {
|
||||
BufferDesc *buf = &LocalBufferDescriptors[i];
|
||||
|
||||
/* just like InitLocalBuffer() */
|
||||
buf->buf_id = - i - 2;
|
||||
}
|
||||
|
||||
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
|
||||
}
|
||||
112
src/backend/storage/bufmgr.h
Normal file
112
src/backend/storage/bufmgr.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* bufmgr.h--
|
||||
* POSTGRES buffer manager definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: bufmgr.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUFMGR_H
|
||||
#define BUFMGR_H
|
||||
|
||||
#include "c.h"
|
||||
|
||||
#include "machine.h" /* for BLCKSZ */
|
||||
#include "utils/rel.h"
|
||||
|
||||
#include "storage/buf_internals.h" /* UGLY! -- ay */
|
||||
|
||||
/*
|
||||
* the maximum size of a disk block for any possible installation.
|
||||
*
|
||||
* in theory this could be anything, but in practice this is actually
|
||||
* limited to 2^13 bytes because we have limited ItemIdData.lp_off and
|
||||
* ItemIdData.lp_len to 13 bits (see itemid.h).
|
||||
*/
|
||||
#define MAXBLCKSZ 8192
|
||||
|
||||
typedef void *Block;
|
||||
|
||||
|
||||
/* special pageno for bget */
|
||||
#define P_NEW InvalidBlockNumber /* grow the file to get a new page */
|
||||
|
||||
typedef bits16 BufferLock;
|
||||
|
||||
/**********************************************************************
|
||||
|
||||
the rest is function defns in the bufmgr that are externally callable
|
||||
|
||||
**********************************************************************/
|
||||
|
||||
/*
|
||||
* These routines are beaten on quite heavily, hence the macroization.
|
||||
* See buf_internals.h for a related comment.
|
||||
*/
|
||||
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
|
||||
|
||||
/*
|
||||
* BufferIsPinned --
|
||||
* True iff the buffer is pinned (and therefore valid)
|
||||
*
|
||||
* Note:
|
||||
* Smenatics are identical to BufferIsValid
|
||||
* XXX - need to remove either one eventually.
|
||||
*/
|
||||
#define BufferIsPinned BufferIsValid
|
||||
|
||||
|
||||
extern int ShowPinTrace;
|
||||
|
||||
/*
|
||||
* prototypes for functions in bufmgr.c
|
||||
*/
|
||||
extern Buffer RelationGetBufferWithBuffer(Relation relation,
|
||||
BlockNumber blockNumber, Buffer buffer);
|
||||
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
|
||||
extern Buffer ReadBuffer_Debug(char *file, int line, Relation reln,
|
||||
BlockNumber blockNum);
|
||||
extern int WriteBuffer(Buffer buffer);
|
||||
extern void WriteBuffer_Debug(char *file, int line, Buffer buffer);
|
||||
extern void DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno,
|
||||
char *dest);
|
||||
extern int WriteNoReleaseBuffer(Buffer buffer);
|
||||
extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
|
||||
BlockNumber blockNum);
|
||||
|
||||
extern void InitBufferPool(IPCKey key);
|
||||
extern void PrintBufferUsage(FILE *statfp);
|
||||
extern void ResetBufferUsage(void);
|
||||
extern void ResetBufferPool(void);
|
||||
extern int BufferPoolCheckLeak(void);
|
||||
extern void FlushBufferPool(int StableMainMemoryFlag);
|
||||
extern bool BufferIsValid(Buffer bufnum);
|
||||
extern BlockNumber BufferGetBlockNumber(Buffer buffer);
|
||||
extern Relation BufferGetRelation(Buffer buffer);
|
||||
extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
|
||||
extern Block BufferGetBlock(Buffer buffer);
|
||||
extern void ReleaseTmpRelBuffers(Relation tempreldesc);
|
||||
extern void DropBuffers(Oid dbid);
|
||||
extern void PrintBufferDescs(void);
|
||||
extern void PrintPinnedBufs(void);
|
||||
extern int BufferShmemSize(void);
|
||||
extern void BufferPoolBlowaway(void);
|
||||
extern void IncrBufferRefCount(Buffer buffer);
|
||||
extern int ReleaseBuffer(Buffer buffer);
|
||||
|
||||
extern void IncrBufferRefCount_Debug(char *file, int line, Buffer buffer);
|
||||
extern void ReleaseBuffer_Debug(char *file, int line, Buffer buffer);
|
||||
extern int ReleaseAndReadBuffer_Debug(char *file,
|
||||
int line,
|
||||
Buffer buffer,
|
||||
Relation relation,
|
||||
BlockNumber blockNum);
|
||||
extern void BufferRefCountReset(int *refcountsave);
|
||||
extern void BufferRefCountRestore(int *refcountsave);
|
||||
|
||||
#endif /* !defined(BufMgrIncluded) */
|
||||
|
||||
256
src/backend/storage/bufpage.h
Normal file
256
src/backend/storage/bufpage.h
Normal file
@@ -0,0 +1,256 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* bufpage.h--
|
||||
* Standard POSTGRES buffer page definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: bufpage.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef BUFPAGE_H
|
||||
#define BUFPAGE_H
|
||||
|
||||
#include "c.h"
|
||||
#include "machine.h" /* for BLCKSZ */
|
||||
|
||||
#include "storage/buf.h"
|
||||
#include "storage/item.h"
|
||||
#include "storage/itemid.h"
|
||||
#include "storage/itemptr.h"
|
||||
|
||||
/*
|
||||
* a postgres disk page is an abstraction layered on top of a postgres
|
||||
* disk block (which is simply a unit of i/o, see block.h).
|
||||
*
|
||||
* specifically, while a disk block can be unformatted, a postgres
|
||||
* disk page is always a slotted page of the form:
|
||||
*
|
||||
* +----------------+---------------------------------+
|
||||
* | PageHeaderData | linp0 linp1 linp2 ... |
|
||||
* +-----------+----+---------------------------------+
|
||||
* | ... linpN | |
|
||||
* +-----------+--------------------------------------+
|
||||
* | ^ pd_lower |
|
||||
* | |
|
||||
* | v pd_upper |
|
||||
* +-------------+------------------------------------+
|
||||
* | | tupleN ... |
|
||||
* +-------------+------------------+-----------------+
|
||||
* | ... tuple2 tuple1 tuple0 | "special space" |
|
||||
* +--------------------------------+-----------------+
|
||||
* ^ pd_special
|
||||
*
|
||||
* a page is full when nothing can be added between pd_lower and
|
||||
* pd_upper.
|
||||
*
|
||||
* all blocks written out by an access method must be disk pages.
|
||||
*
|
||||
* EXCEPTIONS:
|
||||
*
|
||||
* obviously, a page is not formatted before it is initialized with by
|
||||
* a call to PageInit.
|
||||
*
|
||||
* the contents of the special pg_variable/pg_time/pg_log tables are
|
||||
* raw disk blocks with special formats. these are the only "access
|
||||
* methods" that need not write disk pages.
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
* linp0..N form an ItemId array. ItemPointers point into this array
|
||||
* rather than pointing directly to a tuple.
|
||||
*
|
||||
* tuple0..N are added "backwards" on the page. because a tuple's
|
||||
* ItemPointer points to its ItemId entry rather than its actual
|
||||
* byte-offset position, tuples can be physically shuffled on a page
|
||||
* whenever the need arises.
|
||||
*
|
||||
* AM-generic per-page information is kept in the pd_opaque field of
|
||||
* the PageHeaderData. (this is currently only the page size.)
|
||||
* AM-specific per-page data is kept in the area marked "special
|
||||
* space"; each AM has an "opaque" structure defined somewhere that is
|
||||
* stored as the page trailer. an access method should always
|
||||
* initialize its pages with PageInit and then set its own opaque
|
||||
* fields.
|
||||
*/
|
||||
typedef Pointer Page;
|
||||
|
||||
/*
|
||||
* PageIsValid --
|
||||
* True iff page is valid.
|
||||
*/
|
||||
#define PageIsValid(page) PointerIsValid(page)
|
||||
|
||||
|
||||
/*
|
||||
* location (byte offset) within a page.
|
||||
*
|
||||
* note that this is actually limited to 2^13 because we have limited
|
||||
* ItemIdData.lp_off and ItemIdData.lp_len to 13 bits (see itemid.h).
|
||||
*/
|
||||
typedef uint16 LocationIndex;
|
||||
|
||||
|
||||
/*
|
||||
* space management information generic to any page
|
||||
*
|
||||
* od_pagesize - size in bytes.
|
||||
* in reality, we need at least 64B to fit the
|
||||
* page header, opaque space and a minimal tuple;
|
||||
* on the high end, we can only support pages up
|
||||
* to 8KB because lp_off/lp_len are 13 bits.
|
||||
*/
|
||||
typedef struct OpaqueData {
|
||||
uint16 od_pagesize;
|
||||
} OpaqueData;
|
||||
|
||||
typedef OpaqueData *Opaque;
|
||||
|
||||
|
||||
/*
|
||||
* disk page organization
|
||||
*/
|
||||
typedef struct PageHeaderData {
|
||||
LocationIndex pd_lower; /* offset to start of free space */
|
||||
LocationIndex pd_upper; /* offset to end of free space */
|
||||
LocationIndex pd_special; /* offset to start of special space */
|
||||
OpaqueData pd_opaque; /* AM-generic information */
|
||||
ItemIdData pd_linp[1]; /* line pointers */
|
||||
} PageHeaderData;
|
||||
|
||||
typedef PageHeaderData *PageHeader;
|
||||
|
||||
typedef enum {
|
||||
ShufflePageManagerMode,
|
||||
OverwritePageManagerMode
|
||||
} PageManagerMode;
|
||||
|
||||
/* ----------------
|
||||
* misc support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* XXX this is wrong -- ignores padding/alignment, variable page size,
|
||||
* AM-specific opaque space at the end of the page (as in btrees), ...
|
||||
* however, it at least serves as an upper bound for heap pages.
|
||||
*/
|
||||
#define MAXTUPLEN (BLCKSZ - sizeof (PageHeaderData))
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* page support macros
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* PageIsValid -- This is defined in page.h.
|
||||
*/
|
||||
|
||||
/*
|
||||
* PageIsUsed --
|
||||
* True iff the page size is used.
|
||||
*
|
||||
* Note:
|
||||
* Assumes page is valid.
|
||||
*/
|
||||
#define PageIsUsed(page) \
|
||||
(AssertMacro(PageIsValid(page)) ? \
|
||||
((bool) (((PageHeader) (page))->pd_lower != 0)) : false)
|
||||
|
||||
/*
|
||||
* PageIsEmpty --
|
||||
* returns true iff no itemid has been allocated on the page
|
||||
*/
|
||||
#define PageIsEmpty(page) \
|
||||
(((PageHeader) (page))->pd_lower == \
|
||||
(sizeof(PageHeaderData) - sizeof(ItemIdData)) ? true : false)
|
||||
|
||||
/*
|
||||
* PageGetItemId --
|
||||
* Returns an item identifier of a page.
|
||||
*/
|
||||
#define PageGetItemId(page, offsetNumber) \
|
||||
((ItemId) (&((PageHeader) (page))->pd_linp[(-1) + (offsetNumber)]))
|
||||
|
||||
/* ----------------
|
||||
* macros to access opaque space
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* PageSizeIsValid --
|
||||
* True iff the page size is valid.
|
||||
*
|
||||
* XXX currently all page sizes are "valid" but we only actually
|
||||
* use BLCKSZ.
|
||||
*/
|
||||
#define PageSizeIsValid(pageSize) 1
|
||||
|
||||
/*
|
||||
* PageGetPageSize --
|
||||
* Returns the page size of a page.
|
||||
*
|
||||
* this can only be called on a formatted page (unlike
|
||||
* BufferGetPageSize, which can be called on an unformatted page).
|
||||
* however, it can be called on a page for which there is no buffer.
|
||||
*/
|
||||
#define PageGetPageSize(page) \
|
||||
((Size) ((PageHeader) (page))->pd_opaque.od_pagesize)
|
||||
|
||||
/*
|
||||
* PageSetPageSize --
|
||||
* Sets the page size of a page.
|
||||
*/
|
||||
#define PageSetPageSize(page, size) \
|
||||
((PageHeader) (page))->pd_opaque.od_pagesize = (size)
|
||||
|
||||
/* ----------------
|
||||
* page special data macros
|
||||
* ----------------
|
||||
*/
|
||||
/*
|
||||
* PageGetSpecialSize --
|
||||
* Returns size of special space on a page.
|
||||
*
|
||||
* Note:
|
||||
* Assumes page is locked.
|
||||
*/
|
||||
#define PageGetSpecialSize(page) \
|
||||
((uint16) (PageGetPageSize(page) - ((PageHeader)page)->pd_special))
|
||||
|
||||
/*
|
||||
* PageGetSpecialPointer --
|
||||
* Returns pointer to special space on a page.
|
||||
*
|
||||
* Note:
|
||||
* Assumes page is locked.
|
||||
*/
|
||||
#define PageGetSpecialPointer(page) \
|
||||
(AssertMacro(PageIsValid(page)) ? \
|
||||
(char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
|
||||
: (char *) 0)
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* extern declarations
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
extern Size BufferGetPageSize(Buffer buffer);
|
||||
extern Page BufferGetPage(Buffer buffer);
|
||||
extern void PageInit(Page page, Size pageSize, Size specialSize);
|
||||
extern Item PageGetItem(Page page, ItemId itemId);
|
||||
extern OffsetNumber PageAddItem(Page page, Item item, Size size,
|
||||
OffsetNumber offsetNumber, ItemIdFlags flags);
|
||||
extern Page PageGetTempPage(Page page, Size specialSize);
|
||||
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
|
||||
extern OffsetNumber PageGetMaxOffsetNumber(Page page);
|
||||
extern void PageRepairFragmentation(Page page);
|
||||
extern Size PageGetFreeSpace(Page page);
|
||||
extern void PageManagerModeSet(PageManagerMode mode);
|
||||
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
|
||||
extern void PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr,
|
||||
char *location, Size size);
|
||||
|
||||
|
||||
#endif /* BUFPAGE_H */
|
||||
96
src/backend/storage/fd.h
Normal file
96
src/backend/storage/fd.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* fd.h--
|
||||
* Virtual file descriptor definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: fd.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* calls:
|
||||
*
|
||||
* File {Close, Read, Write, Seek, Tell, Sync}
|
||||
* {File Name Open, Allocate, Free} File
|
||||
*
|
||||
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
|
||||
* use them for all file activity...
|
||||
*
|
||||
* fd = FilePathOpenFile("foo", O_RDONLY);
|
||||
* File fd;
|
||||
*
|
||||
* use AllocateFile if you need a file descriptor in some other context.
|
||||
* it will make sure that there is a file descriptor free
|
||||
*
|
||||
* use FreeFile to let the virtual file descriptor package know that
|
||||
* there is now a free fd (when you are done with it)
|
||||
*
|
||||
* AllocateFile();
|
||||
* FreeFile();
|
||||
*/
|
||||
#ifndef FD_H
|
||||
#define FD_H
|
||||
|
||||
/*
|
||||
* FileOpen uses the standard UNIX open(2) flags.
|
||||
*/
|
||||
#include <fcntl.h> /* for O_ on most */
|
||||
#ifndef O_RDONLY
|
||||
#include <sys/file.h> /* for O_ on the rest */
|
||||
#endif /* O_RDONLY */
|
||||
|
||||
/*
|
||||
* FileSeek uses the standard UNIX lseek(2) flags.
|
||||
*/
|
||||
#ifndef WIN32
|
||||
#include <unistd.h> /* for SEEK_ on most */
|
||||
#else
|
||||
#ifndef SEEK_SET
|
||||
#include <stdio.h> /* for SEEK_ on the rest */
|
||||
#endif /* SEEK_SET */
|
||||
#endif /* WIN32 */
|
||||
|
||||
#include "c.h"
|
||||
#include "storage/block.h"
|
||||
|
||||
typedef char *FileName;
|
||||
|
||||
typedef int File;
|
||||
|
||||
/* originally in libpq-fs.h */
|
||||
struct pgstat { /* just the fields we need from stat structure */
|
||||
int st_ino;
|
||||
int st_mode;
|
||||
unsigned int st_size;
|
||||
unsigned int st_sizehigh; /* high order bits */
|
||||
/* 2^64 == 1.8 x 10^20 bytes */
|
||||
int st_uid;
|
||||
int st_atime_s; /* just the seconds */
|
||||
int st_mtime_s; /* since SysV and the new BSD both have */
|
||||
int st_ctime_s; /* usec fields.. */
|
||||
};
|
||||
|
||||
/*
|
||||
* prototypes for functions in fd.c
|
||||
*/
|
||||
extern void FileInvalidate(File file);
|
||||
extern File FileNameOpenFile(FileName fileName, int fileFlags, int fileMode);
|
||||
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
|
||||
extern void FileClose(File file);
|
||||
extern void FileUnlink(File file);
|
||||
extern int FileRead(File file, char *buffer, int amount);
|
||||
extern int FileWrite(File file, char *buffer, int amount);
|
||||
extern long FileSeek(File file, long offset, int whence);
|
||||
extern long FileTell(File file);
|
||||
extern int FileTruncate(File file, int offset);
|
||||
extern int FileSync(File file);
|
||||
extern int FileNameUnlink(char *filename);
|
||||
extern void AllocateFile(void);
|
||||
extern void FreeFile(void);
|
||||
extern void closeAllVfds(void);
|
||||
extern void closeOneVfd(void);
|
||||
|
||||
#endif /* FD_H */
|
||||
14
src/backend/storage/file/Makefile.inc
Normal file
14
src/backend/storage/file/Makefile.inc
Normal file
@@ -0,0 +1,14 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/file
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/file/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= fd.c
|
||||
888
src/backend/storage/file/fd.c
Normal file
888
src/backend/storage/file/fd.c
Normal file
@@ -0,0 +1,888 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* fd.c--
|
||||
* Virtual file descriptor code.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Id: fd.c,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
* This code manages a cache of 'virtual' file descriptors (VFDs).
|
||||
* The server opens many file descriptors for a variety of reasons,
|
||||
* including base tables, scratch files (e.g., sort and hash spool
|
||||
* files), and random calls to C library routines like system(3); it
|
||||
* is quite easy to exceed system limits on the number of open files a
|
||||
* single process can have. (This is around 256 on many modern
|
||||
* operating systems, but can be as low as 32 on others.)
|
||||
*
|
||||
* VFDs are managed as an LRU pool, with actual OS file descriptors
|
||||
* being opened and closed as needed. Obviously, if a routine is
|
||||
* opened using these interfaces, all subsequent operations must also
|
||||
* be through these interfaces (the File type is not a real file
|
||||
* descriptor).
|
||||
*
|
||||
* For this scheme to work, most (if not all) routines throughout the
|
||||
* server should use these interfaces instead of calling the C library
|
||||
* routines (e.g., open(2) and fopen(3)) themselves. Otherwise, we
|
||||
* may find ourselves short of real file descriptors anyway.
|
||||
*
|
||||
* This file used to contain a bunch of stuff to support RAID levels 0
|
||||
* (jbod), 1 (duplex) and 5 (xor parity). That stuff is all gone
|
||||
* because the parallel query processing code that called it is all
|
||||
* gone. If you really need it you could get it from the original
|
||||
* POSTGRES source.
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/param.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "c.h"
|
||||
#include "miscadmin.h" /* for DataDir */
|
||||
#include "utils/palloc.h"
|
||||
|
||||
#ifdef PORTNAME_sparc
|
||||
/*
|
||||
* the SunOS 4 NOFILE is a lie, because the default limit is *not* the
|
||||
* maximum number of file descriptors you can have open.
|
||||
*
|
||||
* we have to either use this number (the default dtablesize) or
|
||||
* explicitly call setrlimit(RLIMIT_NOFILE, NOFILE).
|
||||
*/
|
||||
#include <sys/user.h>
|
||||
#undef NOFILE
|
||||
#define NOFILE NOFILE_IN_U
|
||||
#endif /* PORTNAME_sparc */
|
||||
|
||||
/*
|
||||
* Problem: Postgres does a system(ld...) to do dynamic loading. This
|
||||
* will open several extra files in addition to those used by
|
||||
* Postgres. We need to do this hack to guarentee that there are file
|
||||
* descriptors free for ld to use.
|
||||
*
|
||||
* The current solution is to limit the number of files descriptors
|
||||
* that this code will allocated at one time. (it leaves
|
||||
* RESERVE_FOR_LD free).
|
||||
*
|
||||
* (Even though most dynamic loaders now use dlopen(3) or the
|
||||
* equivalent, the OS must still open several files to perform the
|
||||
* dynamic loading. Keep this here.)
|
||||
*/
|
||||
#define RESERVE_FOR_LD 10
|
||||
|
||||
/*
|
||||
* If we are using weird storage managers, we may need to keep real
|
||||
* file descriptors open so that the jukebox server doesn't think we
|
||||
* have gone away (and no longer care about a platter or file that
|
||||
* we've been using). This might be an actual file descriptor for a
|
||||
* local jukebox interface that uses paths, or a socket connection for
|
||||
* a network jukebox server. Since we can't be opening and closing
|
||||
* these descriptors at whim, we must make allowances for them.
|
||||
*/
|
||||
#ifdef HP_JUKEBOX
|
||||
#define RESERVE_FOR_JB 25
|
||||
#define MAXFILES ((NOFILE - RESERVE_FOR_LD) - RESERVE_FOR_JB)
|
||||
#else /* HP_JUKEBOX */
|
||||
#define MAXFILES (NOFILE - RESERVE_FOR_LD)
|
||||
#endif /* HP_JUKEBOX */
|
||||
|
||||
/* Debugging.... */
|
||||
|
||||
#ifdef FDDEBUG
|
||||
# define DO_DB(A) A
|
||||
#else
|
||||
# define DO_DB(A) /* A */
|
||||
#endif
|
||||
|
||||
#define VFD_CLOSED -1
|
||||
|
||||
#include "storage/fd.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)
|
||||
|
||||
typedef struct vfd {
|
||||
signed short fd;
|
||||
unsigned short fdstate;
|
||||
|
||||
#define FD_DIRTY (1 << 0)
|
||||
|
||||
File nextFree;
|
||||
File lruMoreRecently;
|
||||
File lruLessRecently;
|
||||
long seekPos;
|
||||
char *fileName;
|
||||
int fileFlags;
|
||||
int fileMode;
|
||||
} Vfd;
|
||||
|
||||
/*
|
||||
* Virtual File Descriptor array pointer and size. This grows as
|
||||
* needed.
|
||||
*/
|
||||
static Vfd *VfdCache;
|
||||
static Size SizeVfdCache = 0;
|
||||
|
||||
/*
|
||||
* Minimum number of file descriptors known to be free.
|
||||
*/
|
||||
static int FreeFd = 0;
|
||||
|
||||
/*
|
||||
* Number of file descriptors known to be open.
|
||||
*/
|
||||
static int nfile = 0;
|
||||
|
||||
/*
|
||||
* we use the name of the null device in various places, mostly so
|
||||
* that we can open it and find out if we really have any descriptors
|
||||
* available or not.
|
||||
*/
|
||||
#ifndef WIN32
|
||||
static char *Nulldev = "/dev/null";
|
||||
static char Sep_char = '/';
|
||||
#else
|
||||
static char *Nulldev = "NUL";
|
||||
static char Sep_char = '\\';
|
||||
#endif /* WIN32 */
|
||||
|
||||
/*
|
||||
* Private Routines
|
||||
*
|
||||
* Delete - delete a file from the Lru ring
|
||||
* LruDelete - remove a file from the Lru ring and close
|
||||
* Insert - put a file at the front of the Lru ring
|
||||
* LruInsert - put a file at the front of the Lru ring and open
|
||||
* AssertLruRoom - make sure that there is a free fd.
|
||||
*
|
||||
* the Last Recently Used ring is a doubly linked list that begins and
|
||||
* ends on element zero.
|
||||
*
|
||||
* example:
|
||||
*
|
||||
* /--less----\ /---------\
|
||||
* v \ v \
|
||||
* #0 --more---> LeastRecentlyUsed --more-\ \
|
||||
* ^\ | |
|
||||
* \\less--> MostRecentlyUsedFile <---/ |
|
||||
* \more---/ \--less--/
|
||||
*
|
||||
* AllocateVfd - grab a free (or new) file record (from VfdArray)
|
||||
* FreeVfd - free a file record
|
||||
*
|
||||
*/
|
||||
static void Delete(File file);
|
||||
static void LruDelete(File file);
|
||||
static void Insert(File file);
|
||||
static int LruInsert (File file);
|
||||
static void AssertLruRoom(void);
|
||||
static File AllocateVfd(void);
|
||||
static void FreeVfd(File file);
|
||||
|
||||
static int FileAccess(File file);
|
||||
static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode);
|
||||
static char *filepath(char *filename);
|
||||
|
||||
#if defined(FDDEBUG)
|
||||
static void
|
||||
_dump_lru()
|
||||
{
|
||||
int mru = VfdCache[0].lruLessRecently;
|
||||
Vfd *vfdP = &VfdCache[mru];
|
||||
|
||||
printf("MOST %d ", mru);
|
||||
while (mru != 0)
|
||||
{
|
||||
mru = vfdP->lruLessRecently;
|
||||
vfdP = &VfdCache[mru];
|
||||
printf("%d ", mru);
|
||||
}
|
||||
printf("LEAST\n");
|
||||
}
|
||||
#endif /* FDDEBUG */
|
||||
|
||||
static void
|
||||
Delete(File file)
|
||||
{
|
||||
Vfd *fileP;
|
||||
|
||||
DO_DB(printf("DEBUG: Delete %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
DO_DB(_dump_lru());
|
||||
|
||||
Assert(file != 0);
|
||||
|
||||
fileP = &VfdCache[file];
|
||||
|
||||
VfdCache[fileP->lruLessRecently].lruMoreRecently =
|
||||
VfdCache[file].lruMoreRecently;
|
||||
VfdCache[fileP->lruMoreRecently].lruLessRecently =
|
||||
VfdCache[file].lruLessRecently;
|
||||
|
||||
DO_DB(_dump_lru());
|
||||
}
|
||||
|
||||
static void
|
||||
LruDelete(File file)
|
||||
{
|
||||
Vfd *fileP;
|
||||
int returnValue;
|
||||
|
||||
DO_DB(printf("DEBUG: LruDelete %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
Assert(file != 0);
|
||||
|
||||
fileP = &VfdCache[file];
|
||||
|
||||
/* delete the vfd record from the LRU ring */
|
||||
Delete(file);
|
||||
|
||||
/* save the seek position */
|
||||
fileP->seekPos = lseek(fileP->fd, 0L, SEEK_CUR);
|
||||
Assert( fileP->seekPos != -1);
|
||||
|
||||
/* if we have written to the file, sync it */
|
||||
if (fileP->fdstate & FD_DIRTY) {
|
||||
returnValue = fsync(fileP->fd);
|
||||
Assert(returnValue != -1);
|
||||
fileP->fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
|
||||
/* close the file */
|
||||
returnValue = close(fileP->fd);
|
||||
Assert(returnValue != -1);
|
||||
|
||||
--nfile;
|
||||
fileP->fd = VFD_CLOSED;
|
||||
|
||||
/* note that there is now one more free real file descriptor */
|
||||
FreeFd++;
|
||||
}
|
||||
|
||||
static void
|
||||
Insert(File file)
|
||||
{
|
||||
Vfd *vfdP;
|
||||
|
||||
DO_DB(printf("DEBUG: Insert %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
DO_DB(_dump_lru());
|
||||
|
||||
vfdP = &VfdCache[file];
|
||||
|
||||
vfdP->lruMoreRecently = 0;
|
||||
vfdP->lruLessRecently = VfdCache[0].lruLessRecently;
|
||||
VfdCache[0].lruLessRecently = file;
|
||||
VfdCache[vfdP->lruLessRecently].lruMoreRecently = file;
|
||||
|
||||
DO_DB(_dump_lru());
|
||||
}
|
||||
|
||||
static int
|
||||
LruInsert (File file)
|
||||
{
|
||||
Vfd *vfdP;
|
||||
int returnValue;
|
||||
|
||||
DO_DB(printf("DEBUG: LruInsert %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
vfdP = &VfdCache[file];
|
||||
|
||||
if (FileIsNotOpen(file)) {
|
||||
int tmpfd;
|
||||
|
||||
/*
|
||||
* Note, we check to see if there's a free file descriptor
|
||||
* before attempting to open a file. One general way to do
|
||||
* this is to try to open the null device which everybody
|
||||
* should be able to open all the time. If this fails, we
|
||||
* assume this is because there's no free file descriptors.
|
||||
*/
|
||||
tryAgain:
|
||||
tmpfd = open(Nulldev, O_CREAT|O_RDWR, 0666);
|
||||
if (tmpfd < 0) {
|
||||
FreeFd = 0;
|
||||
errno = 0;
|
||||
AssertLruRoom();
|
||||
goto tryAgain;
|
||||
} else {
|
||||
close(tmpfd);
|
||||
}
|
||||
vfdP->fd = open(vfdP->fileName,vfdP->fileFlags,vfdP->fileMode);
|
||||
|
||||
if (vfdP->fd < 0) {
|
||||
DO_DB(printf("RE_OPEN FAILED: %d\n",
|
||||
errno));
|
||||
return (vfdP->fd);
|
||||
} else {
|
||||
DO_DB(printf("RE_OPEN SUCCESS\n"));
|
||||
++nfile;
|
||||
}
|
||||
|
||||
/* seek to the right position */
|
||||
if (vfdP->seekPos != 0L) {
|
||||
returnValue =
|
||||
lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
|
||||
Assert(returnValue != -1);
|
||||
}
|
||||
|
||||
/* init state on open */
|
||||
vfdP->fdstate = 0x0;
|
||||
|
||||
/* note that a file descriptor has been used up */
|
||||
if (FreeFd > 0)
|
||||
FreeFd--;
|
||||
}
|
||||
|
||||
/*
|
||||
* put it at the head of the Lru ring
|
||||
*/
|
||||
|
||||
Insert(file);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
AssertLruRoom()
|
||||
{
|
||||
DO_DB(printf("DEBUG: AssertLruRoom (FreeFd = %d)\n",
|
||||
FreeFd));
|
||||
|
||||
if (FreeFd <= 0 || nfile >= MAXFILES) {
|
||||
LruDelete(VfdCache[0].lruMoreRecently);
|
||||
}
|
||||
}
|
||||
|
||||
static File
|
||||
AllocateVfd()
|
||||
{
|
||||
Index i;
|
||||
File file;
|
||||
|
||||
DO_DB(printf("DEBUG: AllocateVfd\n"));
|
||||
|
||||
if (SizeVfdCache == 0) {
|
||||
|
||||
/* initialize */
|
||||
VfdCache = (Vfd *)malloc(sizeof(Vfd));
|
||||
|
||||
VfdCache->nextFree = 0;
|
||||
VfdCache->lruMoreRecently = 0;
|
||||
VfdCache->lruLessRecently = 0;
|
||||
VfdCache->fd = VFD_CLOSED;
|
||||
VfdCache->fdstate = 0x0;
|
||||
|
||||
SizeVfdCache = 1;
|
||||
}
|
||||
|
||||
if (VfdCache[0].nextFree == 0) {
|
||||
|
||||
/*
|
||||
* The free list is empty so it is time to increase the
|
||||
* size of the array
|
||||
*/
|
||||
|
||||
VfdCache =(Vfd *)realloc(VfdCache, sizeof(Vfd)*SizeVfdCache*2);
|
||||
Assert(VfdCache != NULL);
|
||||
|
||||
/*
|
||||
* Set up the free list for the new entries
|
||||
*/
|
||||
|
||||
for (i = SizeVfdCache; i < 2*SizeVfdCache; i++) {
|
||||
memset((char *) &(VfdCache[i]), 0, sizeof(VfdCache[0]));
|
||||
VfdCache[i].nextFree = i+1;
|
||||
VfdCache[i].fd = VFD_CLOSED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Element 0 is the first and last element of the free
|
||||
* list
|
||||
*/
|
||||
|
||||
VfdCache[0].nextFree = SizeVfdCache;
|
||||
VfdCache[2*SizeVfdCache-1].nextFree = 0;
|
||||
|
||||
/*
|
||||
* Record the new size
|
||||
*/
|
||||
|
||||
SizeVfdCache *= 2;
|
||||
}
|
||||
file = VfdCache[0].nextFree;
|
||||
|
||||
VfdCache[0].nextFree = VfdCache[file].nextFree;
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
static void
|
||||
FreeVfd(File file)
|
||||
{
|
||||
DO_DB(printf("DB: FreeVfd: %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
VfdCache[file].nextFree = VfdCache[0].nextFree;
|
||||
VfdCache[0].nextFree = file;
|
||||
}
|
||||
|
||||
static char *
|
||||
filepath(char *filename)
|
||||
{
|
||||
char *buf;
|
||||
char basename[16];
|
||||
int len;
|
||||
|
||||
#ifndef WIN32
|
||||
if (*filename != Sep_char) {
|
||||
#else
|
||||
if (!(filename[1] == ':' && filename[2] == Sep_char)) {
|
||||
#endif /* WIN32 */
|
||||
|
||||
/* Either /base/ or \base\ */
|
||||
sprintf(basename, "%cbase%c", Sep_char, Sep_char);
|
||||
|
||||
len = strlen(DataDir) + strlen(basename) + strlen(GetDatabaseName())
|
||||
+ strlen(filename) + 2;
|
||||
buf = (char*) palloc(len);
|
||||
sprintf(buf, "%s%s%s%c%s",
|
||||
DataDir, basename, GetDatabaseName(), Sep_char, filename);
|
||||
} else {
|
||||
buf = (char *) palloc(strlen(filename) + 1);
|
||||
strcpy(buf, filename);
|
||||
}
|
||||
|
||||
return(buf);
|
||||
}
|
||||
|
||||
static int
|
||||
FileAccess(File file)
|
||||
{
|
||||
int returnValue;
|
||||
|
||||
DO_DB(printf("DB: FileAccess %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
/*
|
||||
* Is the file open? If not, close the least recently used,
|
||||
* then open it and stick it at the head of the used ring
|
||||
*/
|
||||
|
||||
if (FileIsNotOpen(file)) {
|
||||
|
||||
AssertLruRoom();
|
||||
|
||||
returnValue = LruInsert(file);
|
||||
if (returnValue != 0)
|
||||
return returnValue;
|
||||
|
||||
} else {
|
||||
|
||||
/*
|
||||
* We now know that the file is open and that it is not the
|
||||
* last one accessed, so we need to more it to the head of
|
||||
* the Lru ring.
|
||||
*/
|
||||
|
||||
Delete(file);
|
||||
Insert(file);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when we get a shared invalidation message on some relation.
|
||||
*/
|
||||
void
|
||||
FileInvalidate(File file)
|
||||
{
|
||||
if (!FileIsNotOpen(file)) {
|
||||
LruDelete(file);
|
||||
}
|
||||
}
|
||||
|
||||
/* VARARGS2 */
|
||||
static File
|
||||
fileNameOpenFile(FileName fileName,
|
||||
int fileFlags,
|
||||
int fileMode)
|
||||
{
|
||||
static int osRanOut = 0;
|
||||
File file;
|
||||
Vfd *vfdP;
|
||||
int tmpfd;
|
||||
|
||||
DO_DB(printf("DEBUG: FileNameOpenFile: %s %x %o\n",
|
||||
fileName, fileFlags, fileMode));
|
||||
|
||||
file = AllocateVfd();
|
||||
vfdP = &VfdCache[file];
|
||||
|
||||
if (nfile >= MAXFILES || (FreeFd == 0 && osRanOut)) {
|
||||
AssertLruRoom();
|
||||
}
|
||||
|
||||
tryAgain:
|
||||
tmpfd = open(Nulldev, O_CREAT|O_RDWR, 0666);
|
||||
if (tmpfd < 0) {
|
||||
DO_DB(printf("DB: not enough descs, retry, er= %d\n",
|
||||
errno));
|
||||
errno = 0;
|
||||
FreeFd = 0;
|
||||
osRanOut = 1;
|
||||
AssertLruRoom();
|
||||
goto tryAgain;
|
||||
} else {
|
||||
close(tmpfd);
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
fileFlags |= _O_BINARY;
|
||||
#endif /* WIN32 */
|
||||
vfdP->fd = open(fileName,fileFlags,fileMode);
|
||||
vfdP->fdstate = 0x0;
|
||||
|
||||
if (vfdP->fd < 0) {
|
||||
FreeVfd(file);
|
||||
return -1;
|
||||
}
|
||||
++nfile;
|
||||
DO_DB(printf("DB: FNOF success %d\n",
|
||||
vfdP->fd));
|
||||
|
||||
(void)LruInsert(file);
|
||||
|
||||
if (fileName==NULL) {
|
||||
elog(WARN, "fileNameOpenFile: NULL fname");
|
||||
}
|
||||
vfdP->fileName = malloc(strlen(fileName)+1);
|
||||
strcpy(vfdP->fileName,fileName);
|
||||
|
||||
vfdP->fileFlags = fileFlags & ~(O_TRUNC|O_EXCL);
|
||||
vfdP->fileMode = fileMode;
|
||||
vfdP->seekPos = 0;
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
/*
|
||||
* open a file in the database directory ($PGDATA/base/...)
|
||||
*/
|
||||
File
|
||||
FileNameOpenFile(FileName fileName, int fileFlags, int fileMode)
|
||||
{
|
||||
File fd;
|
||||
char *fname;
|
||||
|
||||
fname = filepath(fileName);
|
||||
fd = fileNameOpenFile(fname, fileFlags, fileMode);
|
||||
pfree(fname);
|
||||
return(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* open a file in an arbitrary directory
|
||||
*/
|
||||
File
|
||||
PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)
|
||||
{
|
||||
return(fileNameOpenFile(fileName, fileFlags, fileMode));
|
||||
}
|
||||
|
||||
void
|
||||
FileClose(File file)
|
||||
{
|
||||
int returnValue;
|
||||
|
||||
DO_DB(printf("DEBUG: FileClose: %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
if (!FileIsNotOpen(file)) {
|
||||
|
||||
/* remove the file from the lru ring */
|
||||
Delete(file);
|
||||
|
||||
/* record the new free operating system file descriptor */
|
||||
FreeFd++;
|
||||
|
||||
/* if we did any writes, sync the file before closing */
|
||||
if (VfdCache[file].fdstate & FD_DIRTY) {
|
||||
returnValue = fsync(VfdCache[file].fd);
|
||||
Assert(returnValue != -1);
|
||||
VfdCache[file].fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
|
||||
/* close the file */
|
||||
returnValue = close(VfdCache[file].fd);
|
||||
Assert(returnValue != -1);
|
||||
|
||||
--nfile;
|
||||
VfdCache[file].fd = VFD_CLOSED;
|
||||
}
|
||||
/*
|
||||
* Add the Vfd slot to the free list
|
||||
*/
|
||||
FreeVfd(file);
|
||||
/*
|
||||
* Free the filename string
|
||||
*/
|
||||
free(VfdCache[file].fileName);
|
||||
}
|
||||
|
||||
void
|
||||
FileUnlink(File file)
|
||||
{
|
||||
int returnValue;
|
||||
|
||||
DO_DB(printf("DB: FileClose: %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
if (!FileIsNotOpen(file)) {
|
||||
|
||||
/* remove the file from the lru ring */
|
||||
Delete(file);
|
||||
|
||||
/* record the new free operating system file descriptor */
|
||||
FreeFd++;
|
||||
|
||||
/* if we did any writes, sync the file before closing */
|
||||
if (VfdCache[file].fdstate & FD_DIRTY) {
|
||||
returnValue = fsync(VfdCache[file].fd);
|
||||
Assert(returnValue != -1);
|
||||
VfdCache[file].fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
|
||||
/* close the file */
|
||||
returnValue = close(VfdCache[file].fd);
|
||||
Assert(returnValue != -1);
|
||||
|
||||
--nfile;
|
||||
VfdCache[file].fd = VFD_CLOSED;
|
||||
}
|
||||
/* add the Vfd slot to the free list */
|
||||
FreeVfd(file);
|
||||
|
||||
/* free the filename string */
|
||||
unlink(VfdCache[file].fileName);
|
||||
free(VfdCache[file].fileName);
|
||||
}
|
||||
|
||||
int
|
||||
FileRead(File file, char *buffer, int amount)
|
||||
{
|
||||
int returnCode;
|
||||
|
||||
DO_DB(printf("DEBUG: FileRead: %d (%s) %d 0x%x\n",
|
||||
file, VfdCache[file].fileName, amount, buffer));
|
||||
|
||||
FileAccess(file);
|
||||
returnCode = read(VfdCache[file].fd, buffer, amount);
|
||||
if (returnCode > 0) {
|
||||
VfdCache[file].seekPos += returnCode;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
int
|
||||
FileWrite(File file, char *buffer, int amount)
|
||||
{
|
||||
int returnCode;
|
||||
|
||||
DO_DB(printf("DB: FileWrite: %d (%s) %d 0x%lx\n",
|
||||
file, VfdCache[file].fileName, amount, buffer));
|
||||
|
||||
FileAccess(file);
|
||||
returnCode = write(VfdCache[file].fd, buffer, amount);
|
||||
if (returnCode > 0) { /* changed by Boris with Mao's advice */
|
||||
VfdCache[file].seekPos += returnCode;
|
||||
}
|
||||
|
||||
/* record the write */
|
||||
VfdCache[file].fdstate |= FD_DIRTY;
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
long
|
||||
FileSeek(File file, long offset, int whence)
|
||||
{
|
||||
int returnCode;
|
||||
|
||||
DO_DB(printf("DEBUG: FileSeek: %d (%s) %d %d\n",
|
||||
file, VfdCache[file].fileName, offset, whence));
|
||||
|
||||
if (FileIsNotOpen(file)) {
|
||||
switch(whence) {
|
||||
case SEEK_SET:
|
||||
VfdCache[file].seekPos = offset;
|
||||
return offset;
|
||||
case SEEK_CUR:
|
||||
VfdCache[file].seekPos = VfdCache[file].seekPos +offset;
|
||||
return VfdCache[file].seekPos;
|
||||
case SEEK_END:
|
||||
FileAccess(file);
|
||||
returnCode = VfdCache[file].seekPos =
|
||||
lseek(VfdCache[file].fd, offset, whence);
|
||||
return returnCode;
|
||||
default:
|
||||
elog(WARN, "FileSeek: invalid whence: %d", whence);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
returnCode = VfdCache[file].seekPos =
|
||||
lseek(VfdCache[file].fd, offset, whence);
|
||||
return returnCode;
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
return(-1L);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX not actually used but here for completeness
|
||||
*/
|
||||
long
|
||||
FileTell(File file)
|
||||
{
|
||||
DO_DB(printf("DEBUG: FileTell %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
return VfdCache[file].seekPos;
|
||||
}
|
||||
|
||||
int
|
||||
FileTruncate(File file, int offset)
|
||||
{
|
||||
int returnCode;
|
||||
|
||||
DO_DB(printf("DEBUG: FileTruncate %d (%s)\n",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
(void) FileSync(file);
|
||||
(void) FileAccess(file);
|
||||
returnCode = ftruncate(VfdCache[file].fd, offset);
|
||||
return(returnCode);
|
||||
}
|
||||
|
||||
int
|
||||
FileSync(File file)
|
||||
{
|
||||
int returnCode;
|
||||
|
||||
/*
|
||||
* If the file isn't open, then we don't need to sync it; we
|
||||
* always sync files when we close them. Also, if we haven't
|
||||
* done any writes that we haven't already synced, we can ignore
|
||||
* the request.
|
||||
*/
|
||||
|
||||
if (VfdCache[file].fd < 0 || !(VfdCache[file].fdstate & FD_DIRTY)) {
|
||||
returnCode = 0;
|
||||
} else {
|
||||
returnCode = fsync(VfdCache[file].fd);
|
||||
VfdCache[file].fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
int
|
||||
FileNameUnlink(char *filename)
|
||||
{
|
||||
int retval;
|
||||
char *fname;
|
||||
|
||||
fname = filepath(filename);
|
||||
retval = unlink(fname);
|
||||
pfree(fname);
|
||||
return(retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* if we want to be sure that we have a real file descriptor available
|
||||
* (e.g., we want to know this in psort) we call AllocateFile to force
|
||||
* availability. when we are done we call FreeFile to deallocate the
|
||||
* descriptor.
|
||||
*
|
||||
* allocatedFiles keeps track of how many have been allocated so we
|
||||
* can give a warning if there are too few left.
|
||||
*/
|
||||
static int allocatedFiles = 0;
|
||||
|
||||
void
|
||||
AllocateFile()
|
||||
{
|
||||
int fd;
|
||||
int fdleft;
|
||||
|
||||
while ((fd = open(Nulldev,O_WRONLY,0)) < 0) {
|
||||
if (errno == EMFILE) {
|
||||
errno = 0;
|
||||
FreeFd = 0;
|
||||
AssertLruRoom();
|
||||
} else {
|
||||
elog(WARN,"Open: %s in %s line %d\n", Nulldev,
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
++allocatedFiles;
|
||||
fdleft = MAXFILES - allocatedFiles;
|
||||
if (fdleft < 6) {
|
||||
elog(DEBUG,"warning: few usable file descriptors left (%d)", fdleft);
|
||||
}
|
||||
|
||||
DO_DB(printf("DEBUG: AllocatedFile. FreeFd = %d\n",
|
||||
FreeFd));
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX What happens if FreeFile() is called without a previous
|
||||
* AllocateFile()?
|
||||
*/
|
||||
void
|
||||
FreeFile()
|
||||
{
|
||||
DO_DB(printf("DEBUG: FreeFile. FreeFd now %d\n",
|
||||
FreeFd));
|
||||
FreeFd++;
|
||||
nfile++; /* dangerous */
|
||||
Assert(allocatedFiles > 0);
|
||||
--allocatedFiles;
|
||||
}
|
||||
|
||||
void
|
||||
closeAllVfds()
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<SizeVfdCache; i++) {
|
||||
if (!FileIsNotOpen(i))
|
||||
LruDelete(i);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
closeOneVfd()
|
||||
{
|
||||
int tmpfd;
|
||||
|
||||
tmpfd = open(Nulldev, O_CREAT | O_RDWR, 0666);
|
||||
if (tmpfd < 0) {
|
||||
FreeFd = 0;
|
||||
AssertLruRoom();
|
||||
FreeFd = 0;
|
||||
}
|
||||
else
|
||||
close(tmpfd);
|
||||
}
|
||||
285
src/backend/storage/ipc.h
Normal file
285
src/backend/storage/ipc.h
Normal file
@@ -0,0 +1,285 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ipc.h--
|
||||
* POSTGRES inter-process communication definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: ipc.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file is very architecture-specific. This stuff should actually
|
||||
* be factored into the port/ directories.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef IPC_H
|
||||
#define IPC_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#ifndef _IPC_
|
||||
#define _IPC_
|
||||
#include <sys/ipc.h>
|
||||
#endif
|
||||
|
||||
#include "c.h"
|
||||
|
||||
/*
|
||||
* Many architectures have support for user-level spinlocks (i.e., an
|
||||
* atomic test-and-set instruction). However, we have only written
|
||||
* spinlock code for the architectures listed.
|
||||
*/
|
||||
#if defined(PORTNAME_aix) || \
|
||||
defined(PORTNAME_alpha) || \
|
||||
defined(PORTNAME_hpux) || \
|
||||
defined(PORTNAME_irix5) || \
|
||||
defined(PORTNAME_next) || \
|
||||
defined(PORTNAME_sparc) || \
|
||||
defined(PORTNAME_sparc_solaris) || \
|
||||
(defined(__i386__) && defined(__GNUC__))
|
||||
#define HAS_TEST_AND_SET
|
||||
#endif
|
||||
|
||||
#if defined(HAS_TEST_AND_SET)
|
||||
|
||||
#if defined(PORTNAME_next)
|
||||
/*
|
||||
* Use Mach mutex routines since these are, in effect, test-and-set
|
||||
* spinlocks.
|
||||
*/
|
||||
#undef NEVER /* definition in cthreads.h conflicts with parse.h */
|
||||
#include <mach/cthreads.h>
|
||||
|
||||
typedef struct mutex slock_t;
|
||||
#else /* next */
|
||||
#if defined(PORTNAME_aix)
|
||||
/*
|
||||
* The AIX C library has the cs(3) builtin for compare-and-set that
|
||||
* operates on ints.
|
||||
*/
|
||||
typedef unsigned int slock_t;
|
||||
#else /* aix */
|
||||
#if defined(PORTNAME_alpha)
|
||||
#include <sys/mman.h>
|
||||
typedef msemaphore slock_t;
|
||||
#else /* alpha */
|
||||
#if defined(PORTNAME_hpux)
|
||||
/*
|
||||
* The PA-RISC "semaphore" for the LDWCX instruction is 4 bytes aligned
|
||||
* to a 16-byte boundary.
|
||||
*/
|
||||
typedef struct { int sem[4]; } slock_t;
|
||||
#else /* hpux */
|
||||
#if defined(PORTNAME_irix5)
|
||||
#include <abi_mutex.h>
|
||||
typedef abilock_t slock_t;
|
||||
#else /* irix5 */
|
||||
/*
|
||||
* On all other architectures spinlocks are a single byte.
|
||||
*/
|
||||
typedef unsigned char slock_t;
|
||||
#endif /* irix5 */
|
||||
#endif /* hpux */
|
||||
#endif /* alpha */
|
||||
#endif /* aix */
|
||||
#endif /* next */
|
||||
|
||||
extern void S_LOCK(slock_t *lock);
|
||||
extern void S_UNLOCK(slock_t *lock);
|
||||
extern void S_INIT_LOCK(slock_t *lock);
|
||||
|
||||
#if defined(PORTNAME_hpux) || defined(PORTNAME_alpha) || defined(PORTNAME_irix5) || defined(PORTNAME_next)
|
||||
extern int S_LOCK_FREE(slock_t *lock);
|
||||
#else /* PORTNAME_hpux */
|
||||
#define S_LOCK_FREE(lock) ((*lock) == 0)
|
||||
#endif /* PORTNAME_hpux */
|
||||
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
|
||||
/*
|
||||
* On architectures for which we have not implemented spinlocks (or
|
||||
* cannot do so), we use System V semaphores. We also use them for
|
||||
* long locks. For some reason union semun is never defined in the
|
||||
* System V header files so we must do it ourselves.
|
||||
*/
|
||||
#if defined(sequent) || \
|
||||
defined(PORTNAME_aix) || \
|
||||
defined(PORTNAME_alpha) || \
|
||||
defined(PORTNAME_hpux) || \
|
||||
defined(PORTNAME_sparc_solaris) || \
|
||||
defined(WIN32) || \
|
||||
defined(PORTNAME_ultrix4)
|
||||
union semun {
|
||||
int val;
|
||||
struct semid_ds *buf;
|
||||
unsigned short *array;
|
||||
};
|
||||
#endif
|
||||
|
||||
typedef uint16 SystemPortAddress;
|
||||
|
||||
/* semaphore definitions */
|
||||
|
||||
#define IPCProtection (0600) /* access/modify by user only */
|
||||
|
||||
#define IPC_NMAXSEM 25 /* maximum number of semaphores */
|
||||
#define IpcSemaphoreDefaultStartValue 255
|
||||
#define IpcSharedLock (-1)
|
||||
#define IpcExclusiveLock (-255)
|
||||
|
||||
#define IpcUnknownStatus (-1)
|
||||
#define IpcInvalidArgument (-2)
|
||||
#define IpcSemIdExist (-3)
|
||||
#define IpcSemIdNotExist (-4)
|
||||
|
||||
typedef uint32 IpcSemaphoreKey; /* semaphore key */
|
||||
typedef int IpcSemaphoreId;
|
||||
|
||||
/* shared memory definitions */
|
||||
|
||||
#define IpcMemCreationFailed (-1)
|
||||
#define IpcMemIdGetFailed (-2)
|
||||
#define IpcMemAttachFailed 0
|
||||
|
||||
typedef uint32 IPCKey;
|
||||
#define PrivateIPCKey IPC_PRIVATE
|
||||
#define DefaultIPCKey 17317
|
||||
|
||||
typedef uint32 IpcMemoryKey; /* shared memory key */
|
||||
typedef int IpcMemoryId;
|
||||
|
||||
|
||||
/* ipc.c */
|
||||
extern void exitpg(int code);
|
||||
extern void quasi_exitpg(void);
|
||||
extern on_exitpg(void (*function)(), caddr_t arg);
|
||||
|
||||
extern IpcSemaphoreId IpcSemaphoreCreate(IpcSemaphoreKey semKey,
|
||||
int semNum, int permission, int semStartValue,
|
||||
int removeOnExit, int *status);
|
||||
extern void IpcSemaphoreSet(int semId, int semno, int value);
|
||||
extern void IpcSemaphoreKill(IpcSemaphoreKey key);
|
||||
extern void IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock);
|
||||
extern void IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock);
|
||||
extern int IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem);
|
||||
extern int IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem);
|
||||
extern IpcMemoryId IpcMemoryCreate(IpcMemoryKey memKey, uint32 size,
|
||||
int permission);
|
||||
extern IpcMemoryId IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size);
|
||||
extern void IpcMemoryDetach(int status, char *shmaddr);
|
||||
extern char *IpcMemoryAttach(IpcMemoryId memId);
|
||||
extern void IpcMemoryKill(IpcMemoryKey memKey);
|
||||
extern void CreateAndInitSLockMemory(IPCKey key);
|
||||
extern void AttachSLockMemory(IPCKey key);
|
||||
|
||||
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
|
||||
#define NSLOCKS 2048
|
||||
#define NOLOCK 0
|
||||
#define SHAREDLOCK 1
|
||||
#define EXCLUSIVELOCK 2
|
||||
|
||||
typedef enum _LockId_ {
|
||||
BUFMGRLOCKID,
|
||||
LOCKLOCKID,
|
||||
OIDGENLOCKID,
|
||||
SHMEMLOCKID,
|
||||
BINDINGLOCKID,
|
||||
LOCKMGRLOCKID,
|
||||
SINVALLOCKID,
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
MMCACHELOCKID,
|
||||
#endif /* MAIN_MEMORY */
|
||||
|
||||
PROCSTRUCTLOCKID,
|
||||
FIRSTFREELOCKID
|
||||
} _LockId_;
|
||||
|
||||
#define MAX_SPINS FIRSTFREELOCKID
|
||||
|
||||
typedef struct slock {
|
||||
slock_t locklock;
|
||||
unsigned char flag;
|
||||
short nshlocks;
|
||||
slock_t shlock;
|
||||
slock_t exlock;
|
||||
slock_t comlock;
|
||||
struct slock *next;
|
||||
} SLock;
|
||||
|
||||
extern void ExclusiveLock(int lockid);
|
||||
extern void ExclusiveUnlock(int lockid);
|
||||
extern bool LockIsFree(int lockid);
|
||||
#else /* HAS_TEST_AND_SET */
|
||||
|
||||
typedef enum _LockId_ {
|
||||
SHMEMLOCKID,
|
||||
BINDINGLOCKID,
|
||||
BUFMGRLOCKID,
|
||||
LOCKMGRLOCKID,
|
||||
SINVALLOCKID,
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
MMCACHELOCKID,
|
||||
#endif /* MAIN_MEMORY */
|
||||
|
||||
PROCSTRUCTLOCKID,
|
||||
OIDGENLOCKID,
|
||||
FIRSTFREELOCKID
|
||||
} _LockId_;
|
||||
|
||||
#define MAX_SPINS FIRSTFREELOCKID
|
||||
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
|
||||
/*
|
||||
* the following are originally in ipci.h but the prototypes have circular
|
||||
* dependencies and most files include both ipci.h and ipc.h anyway, hence
|
||||
* combined.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* These must not hash to DefaultIPCKey or PrivateIPCKey.
|
||||
*/
|
||||
#define SystemPortAddressGetIPCKey(address) \
|
||||
(28597 * (address) + 17491)
|
||||
|
||||
/*
|
||||
* these keys are originally numbered from 1 to 12 consecutively but not
|
||||
* all are used. The unused ones are removed. - ay 4/95.
|
||||
*/
|
||||
#define IPCKeyGetBufferMemoryKey(key) \
|
||||
((key == PrivateIPCKey) ? key : 1 + (key))
|
||||
|
||||
#define IPCKeyGetSIBufferMemoryBlock(key) \
|
||||
((key == PrivateIPCKey) ? key : 7 + (key))
|
||||
|
||||
#define IPCKeyGetSLockSharedMemoryKey(key) \
|
||||
((key == PrivateIPCKey) ? key : 10 + (key))
|
||||
|
||||
#define IPCKeyGetSpinLockSemaphoreKey(key) \
|
||||
((key == PrivateIPCKey) ? key : 11 + (key))
|
||||
#define IPCKeyGetWaitIOSemaphoreKey(key) \
|
||||
((key == PrivateIPCKey) ? key : 12 + (key))
|
||||
|
||||
/* --------------------------
|
||||
* NOTE: This macro must always give the highest numbered key as every backend
|
||||
* process forked off by the postmaster will be trying to acquire a semaphore
|
||||
* with a unique key value starting at key+14 and incrementing up. Each
|
||||
* backend uses the current key value then increments it by one.
|
||||
* --------------------------
|
||||
*/
|
||||
#define IPCGetProcessSemaphoreInitKey(key) \
|
||||
((key == PrivateIPCKey) ? key : 14 + (key))
|
||||
|
||||
/* ipci.c */
|
||||
extern IPCKey SystemPortAddressCreateIPCKey(SystemPortAddress address);
|
||||
extern void CreateSharedMemoryAndSemaphores(IPCKey key);
|
||||
extern void AttachSharedMemoryAndSemaphores(IPCKey key);
|
||||
|
||||
#endif /* IPC_H */
|
||||
15
src/backend/storage/ipc/Makefile.inc
Normal file
15
src/backend/storage/ipc/Makefile.inc
Normal file
@@ -0,0 +1,15 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/ipc
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= ipc.c ipci.c s_lock.c shmem.c shmqueue.c sinval.c \
|
||||
sinvaladt.c spin.c
|
||||
31
src/backend/storage/ipc/README
Normal file
31
src/backend/storage/ipc/README
Normal file
@@ -0,0 +1,31 @@
|
||||
$Header: /cvsroot/pgsql/src/backend/storage/ipc/README,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
Mon Jul 18 11:09:22 PDT 1988 W.KLAS
|
||||
|
||||
Cache invalidation synchronization routines:
|
||||
===========================================
|
||||
|
||||
The cache synchronization is done using a message queue. Every
|
||||
backend can register a message which then has to be read by
|
||||
all backends. A message read by all backends is removed from the
|
||||
queue automatically. If a message has been lost because the buffer
|
||||
was full, all backends that haven't read this message will be
|
||||
noticed that they have to reset their cache state. This is done
|
||||
at the time when they try to read the message queue.
|
||||
|
||||
The message queue is implemented as a shared buffer segment. Actually,
|
||||
the queue is a circle to allow fast inserting, reading (invalidate data) and
|
||||
maintaining the buffer.
|
||||
|
||||
Access to this shared message buffer is synchronized by the lock manager.
|
||||
The lock manager treats the buffer as a regular relation and sets
|
||||
relation level locks (with mode = LockWait) to block backends while
|
||||
another backend is writing or reading the buffer. The identifiers used
|
||||
for this special 'relation' are database id = 0 and relation id = 0.
|
||||
|
||||
The current implementation prints regular (e)log information
|
||||
when a message has been removed from the buffer because the buffer
|
||||
is full, and a backend has to reset its cache state. The elog level
|
||||
is NOTICE. This can be used to improve teh behavior of backends
|
||||
when invalidating or reseting their cache state.
|
||||
|
||||
|
||||
718
src/backend/storage/ipc/ipc.c
Normal file
718
src/backend/storage/ipc/ipc.c
Normal file
@@ -0,0 +1,718 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ipc.c--
|
||||
* POSTGRES inter-process communication definitions.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
*
|
||||
* Currently, semaphores are used (my understanding anyway) in two
|
||||
* different ways:
|
||||
* 1. as mutexes on machines that don't have test-and-set (eg.
|
||||
* mips R3000).
|
||||
* 2. for putting processes to sleep when waiting on a lock
|
||||
* and waking them up when the lock is free.
|
||||
* The number of semaphores in (1) is fixed and those are shared
|
||||
* among all backends. In (2), there is 1 semaphore per process and those
|
||||
* are not shared with anyone else.
|
||||
* -ay 4/95
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <sys/file.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* XXX - the following dependency should be moved into the defaults.mk file */
|
||||
#ifndef _IPC_
|
||||
#define _IPC_
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/sem.h>
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
|
||||
#include "storage/ipc.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
#if defined(PORTNAME_bsd44)
|
||||
int UsePrivateMemory = 1;
|
||||
#else
|
||||
int UsePrivateMemory = 0;
|
||||
#endif
|
||||
|
||||
#if defined(PORTNAME_bsdi)
|
||||
/* hacka, hacka, hacka (XXX) */
|
||||
union semun {
|
||||
int val; /* value for SETVAL */
|
||||
struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */
|
||||
ushort *array; /* array for GETALL & SETALL */
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* exit() handling stuff
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define MAX_ON_EXITS 20
|
||||
|
||||
static struct ONEXIT {
|
||||
void (*function)();
|
||||
caddr_t arg;
|
||||
} onexit_list[ MAX_ON_EXITS ];
|
||||
|
||||
static int onexit_index;
|
||||
|
||||
typedef struct _PrivateMemStruct {
|
||||
int id;
|
||||
char *memptr;
|
||||
} PrivateMem;
|
||||
|
||||
PrivateMem IpcPrivateMem[16];
|
||||
|
||||
static int
|
||||
PrivateMemoryCreate(IpcMemoryKey memKey,
|
||||
uint32 size)
|
||||
{
|
||||
static int memid = 0;
|
||||
|
||||
UsePrivateMemory = 1;
|
||||
|
||||
IpcPrivateMem[memid].id = memid;
|
||||
IpcPrivateMem[memid].memptr = malloc(size);
|
||||
if (IpcPrivateMem[memid].memptr == NULL)
|
||||
elog(WARN, "PrivateMemoryCreate: not enough memory to malloc");
|
||||
memset(IpcPrivateMem[memid].memptr, 0, size); /* XXX PURIFY */
|
||||
|
||||
return (memid++);
|
||||
}
|
||||
|
||||
static char *
|
||||
PrivateMemoryAttach(IpcMemoryId memid)
|
||||
{
|
||||
return ( IpcPrivateMem[memid].memptr );
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* exitpg
|
||||
*
|
||||
* this function calls all the callbacks registered
|
||||
* for it (to free resources) and then calls exit.
|
||||
* This should be the only function to call exit().
|
||||
* -cim 2/6/90
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
static int exitpg_inprogress = 0;
|
||||
|
||||
void
|
||||
exitpg(int code)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* ----------------
|
||||
* if exitpg_inprocess is true, then it means that we
|
||||
* are being invoked from within an on_exit() handler
|
||||
* and so we return immediately to avoid recursion.
|
||||
* ----------------
|
||||
*/
|
||||
if (exitpg_inprogress)
|
||||
return;
|
||||
|
||||
exitpg_inprogress = 1;
|
||||
|
||||
/* ----------------
|
||||
* call all the callbacks registered before calling exit().
|
||||
* ----------------
|
||||
*/
|
||||
for (i = onexit_index - 1; i >= 0; --i)
|
||||
(*onexit_list[i].function)(code, onexit_list[i].arg);
|
||||
|
||||
exit(code);
|
||||
}
|
||||
|
||||
/* ------------------
|
||||
* Run all of the on_exitpg routines but don't exit in the end.
|
||||
* This is used by the postmaster to re-initialize shared memory and
|
||||
* semaphores after a backend dies horribly
|
||||
* ------------------
|
||||
*/
|
||||
void
|
||||
quasi_exitpg()
|
||||
{
|
||||
int i;
|
||||
|
||||
/* ----------------
|
||||
* if exitpg_inprocess is true, then it means that we
|
||||
* are being invoked from within an on_exit() handler
|
||||
* and so we return immediately to avoid recursion.
|
||||
* ----------------
|
||||
*/
|
||||
if (exitpg_inprogress)
|
||||
return;
|
||||
|
||||
exitpg_inprogress = 1;
|
||||
|
||||
/* ----------------
|
||||
* call all the callbacks registered before calling exit().
|
||||
* ----------------
|
||||
*/
|
||||
for (i = onexit_index - 1; i >= 0; --i)
|
||||
(*onexit_list[i].function)(0, onexit_list[i].arg);
|
||||
|
||||
onexit_index = 0;
|
||||
exitpg_inprogress = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* on_exitpg
|
||||
*
|
||||
* this function adds a callback function to the list of
|
||||
* functions invoked by exitpg(). -cim 2/6/90
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
int
|
||||
on_exitpg(void (*function)(), caddr_t arg)
|
||||
{
|
||||
if (onexit_index >= MAX_ON_EXITS)
|
||||
return(-1);
|
||||
|
||||
onexit_list[ onexit_index ].function = function;
|
||||
onexit_list[ onexit_index ].arg = arg;
|
||||
|
||||
++onexit_index;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IPCPrivateSemaphoreKill(status, semId) */
|
||||
/* */
|
||||
/****************************************************************************/
|
||||
static void
|
||||
IPCPrivateSemaphoreKill(int status,
|
||||
int semId) /* caddr_t */
|
||||
{
|
||||
union semun semun;
|
||||
semctl(semId, 0, IPC_RMID, semun);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* IPCPrivateMemoryKill(status, shmId) */
|
||||
/* */
|
||||
/****************************************************************************/
|
||||
static void
|
||||
IPCPrivateMemoryKill(int status,
|
||||
int shmId) /* caddr_t */
|
||||
{
|
||||
if ( UsePrivateMemory ) {
|
||||
/* free ( IpcPrivateMem[shmId].memptr ); */
|
||||
} else {
|
||||
if (shmctl(shmId, IPC_RMID, (struct shmid_ds *) NULL) < 0) {
|
||||
elog(NOTICE, "IPCPrivateMemoryKill: shmctl(%d, %d, 0) failed: %m",
|
||||
shmId, IPC_RMID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcSemaphoreCreate(semKey, semNum, permission, semStartValue) */
|
||||
/* */
|
||||
/* - returns a semaphore identifier: */
|
||||
/* */
|
||||
/* if key doesn't exist: return a new id, status:= IpcSemIdNotExist */
|
||||
/* if key exists: return the old id, status:= IpcSemIdExist */
|
||||
/* if semNum > MAX : return # of argument, status:=IpcInvalidArgument */
|
||||
/* */
|
||||
/****************************************************************************/
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* XXX This should be split into two different calls. One should
|
||||
* XXX be used to create a semaphore set. The other to "attach" a
|
||||
* XXX existing set. It should be an error for the semaphore set
|
||||
* XXX to to already exist or for it not to, respectively.
|
||||
*
|
||||
* Currently, the semaphore sets are "attached" and an error
|
||||
* is detected only when a later shared memory attach fails.
|
||||
*/
|
||||
|
||||
IpcSemaphoreId
|
||||
IpcSemaphoreCreate(IpcSemaphoreKey semKey,
|
||||
int semNum,
|
||||
int permission,
|
||||
int semStartValue,
|
||||
int removeOnExit,
|
||||
int *status)
|
||||
{
|
||||
int i;
|
||||
int errStatus;
|
||||
int semId;
|
||||
u_short array[IPC_NMAXSEM];
|
||||
union semun semun;
|
||||
|
||||
/* get a semaphore if non-existent */
|
||||
/* check arguments */
|
||||
if (semNum > IPC_NMAXSEM || semNum <= 0) {
|
||||
*status = IpcInvalidArgument;
|
||||
return(2); /* returns the number of the invalid argument */
|
||||
}
|
||||
|
||||
semId = semget(semKey, 0, 0);
|
||||
|
||||
if (semId == -1) {
|
||||
*status = IpcSemIdNotExist; /* there doesn't exist a semaphore */
|
||||
#ifdef DEBUG_IPC
|
||||
fprintf(stderr,"calling semget with %d, %d , %d\n",
|
||||
semKey,
|
||||
semNum,
|
||||
IPC_CREAT|permission );
|
||||
#endif
|
||||
semId = semget(semKey, semNum, IPC_CREAT|permission);
|
||||
|
||||
if (semId < 0) {
|
||||
perror("semget");
|
||||
exitpg(3);
|
||||
}
|
||||
for (i = 0; i < semNum; i++) {
|
||||
array[i] = semStartValue;
|
||||
}
|
||||
semun.array = array;
|
||||
errStatus = semctl(semId, 0, SETALL, semun);
|
||||
if (errStatus == -1) {
|
||||
perror("semctl");
|
||||
}
|
||||
|
||||
if (removeOnExit)
|
||||
on_exitpg(IPCPrivateSemaphoreKill, (caddr_t)semId);
|
||||
|
||||
} else {
|
||||
/* there is a semaphore id for this key */
|
||||
*status = IpcSemIdExist;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_IPC
|
||||
fprintf(stderr,"\nIpcSemaphoreCreate, status %d, returns %d\n",
|
||||
*status,
|
||||
semId );
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
#endif
|
||||
return(semId);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcSemaphoreSet() - sets the initial value of the semaphore */
|
||||
/* */
|
||||
/* note: the xxx_return variables are only used for debugging. */
|
||||
/****************************************************************************/
|
||||
static int IpcSemaphoreSet_return;
|
||||
|
||||
void
|
||||
IpcSemaphoreSet(int semId, int semno, int value)
|
||||
{
|
||||
int errStatus;
|
||||
union semun semun;
|
||||
|
||||
semun.val = value;
|
||||
errStatus = semctl(semId, semno, SETVAL, semun);
|
||||
IpcSemaphoreSet_return = errStatus;
|
||||
|
||||
if (errStatus == -1)
|
||||
perror("semctl");
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcSemaphoreKill(key) - removes a semaphore */
|
||||
/* */
|
||||
/****************************************************************************/
|
||||
void
|
||||
IpcSemaphoreKill(IpcSemaphoreKey key)
|
||||
{
|
||||
int semId;
|
||||
union semun semun;
|
||||
|
||||
/* kill semaphore if existent */
|
||||
|
||||
semId = semget(key, 0, 0);
|
||||
if (semId != -1)
|
||||
semctl(semId, 0, IPC_RMID, semun);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcSemaphoreLock(semId, sem, lock) - locks a semaphore */
|
||||
/* */
|
||||
/* note: the xxx_return variables are only used for debugging. */
|
||||
/****************************************************************************/
|
||||
static int IpcSemaphoreLock_return;
|
||||
|
||||
void
|
||||
IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock)
|
||||
{
|
||||
extern int errno;
|
||||
int errStatus;
|
||||
struct sembuf sops;
|
||||
|
||||
sops.sem_op = lock;
|
||||
sops.sem_flg = 0;
|
||||
sops.sem_num = sem;
|
||||
|
||||
/* ----------------
|
||||
* Note: if errStatus is -1 and errno == EINTR then it means we
|
||||
* returned from the operation prematurely because we were
|
||||
* sent a signal. So we try and lock the semaphore again.
|
||||
* I am not certain this is correct, but the semantics aren't
|
||||
* clear it fixes problems with parallel abort synchronization,
|
||||
* namely that after processing an abort signal, the semaphore
|
||||
* call returns with -1 (and errno == EINTR) before it should.
|
||||
* -cim 3/28/90
|
||||
* ----------------
|
||||
*/
|
||||
do {
|
||||
errStatus = semop(semId, &sops, 1);
|
||||
} while (errStatus == -1 && errno == EINTR);
|
||||
|
||||
IpcSemaphoreLock_return = errStatus;
|
||||
|
||||
if (errStatus == -1) {
|
||||
perror("semop");
|
||||
exitpg(255);
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcSemaphoreUnlock(semId, sem, lock) - unlocks a semaphore */
|
||||
/* */
|
||||
/* note: the xxx_return variables are only used for debugging. */
|
||||
/****************************************************************************/
|
||||
static int IpcSemaphoreUnlock_return;
|
||||
|
||||
void
|
||||
IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock)
|
||||
{
|
||||
extern int errno;
|
||||
int errStatus;
|
||||
struct sembuf sops;
|
||||
|
||||
sops.sem_op = -lock;
|
||||
sops.sem_flg = 0;
|
||||
sops.sem_num = sem;
|
||||
|
||||
|
||||
/* ----------------
|
||||
* Note: if errStatus is -1 and errno == EINTR then it means we
|
||||
* returned from the operation prematurely because we were
|
||||
* sent a signal. So we try and lock the semaphore again.
|
||||
* I am not certain this is correct, but the semantics aren't
|
||||
* clear it fixes problems with parallel abort synchronization,
|
||||
* namely that after processing an abort signal, the semaphore
|
||||
* call returns with -1 (and errno == EINTR) before it should.
|
||||
* -cim 3/28/90
|
||||
* ----------------
|
||||
*/
|
||||
do {
|
||||
errStatus = semop(semId, &sops, 1);
|
||||
} while (errStatus == -1 && errno == EINTR);
|
||||
|
||||
IpcSemaphoreUnlock_return = errStatus;
|
||||
|
||||
if (errStatus == -1) {
|
||||
perror("semop");
|
||||
exitpg(255);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem)
|
||||
{
|
||||
int semncnt;
|
||||
union semun dummy; /* for Solaris */
|
||||
|
||||
semncnt = semctl(semId, sem, GETNCNT, dummy);
|
||||
return semncnt;
|
||||
}
|
||||
|
||||
int
|
||||
IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
|
||||
{
|
||||
int semval;
|
||||
union semun dummy; /* for Solaris */
|
||||
|
||||
semval = semctl(semId, sem, GETVAL, dummy);
|
||||
return semval;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcMemoryCreate(memKey) */
|
||||
/* */
|
||||
/* - returns the memory identifier, if creation succeeds */
|
||||
/* returns IpcMemCreationFailed, if failure */
|
||||
/****************************************************************************/
|
||||
|
||||
IpcMemoryId
|
||||
IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
|
||||
{
|
||||
IpcMemoryId shmid;
|
||||
|
||||
if (memKey == PrivateIPCKey) {
|
||||
/* private */
|
||||
shmid = PrivateMemoryCreate(memKey, size);
|
||||
}else {
|
||||
shmid = shmget(memKey, size, IPC_CREAT|permission);
|
||||
}
|
||||
|
||||
if (shmid < 0) {
|
||||
fprintf(stderr,"IpcMemoryCreate: memKey=%d , size=%d , permission=%d",
|
||||
memKey, size , permission );
|
||||
perror("IpcMemoryCreate: shmget(..., create, ...) failed");
|
||||
return(IpcMemCreationFailed);
|
||||
}
|
||||
|
||||
/* if (memKey == PrivateIPCKey) */
|
||||
on_exitpg(IPCPrivateMemoryKill, (caddr_t)shmid);
|
||||
|
||||
return(shmid);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcMemoryIdGet(memKey, size) returns the shared memory Id */
|
||||
/* or IpcMemIdGetFailed */
|
||||
/****************************************************************************/
|
||||
IpcMemoryId
|
||||
IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size)
|
||||
{
|
||||
IpcMemoryId shmid;
|
||||
|
||||
shmid = shmget(memKey, size, 0);
|
||||
|
||||
if (shmid < 0) {
|
||||
fprintf(stderr,"IpcMemoryIdGet: memKey=%d , size=%d , permission=%d",
|
||||
memKey, size , 0 );
|
||||
perror("IpcMemoryIdGet: shmget() failed");
|
||||
return(IpcMemIdGetFailed);
|
||||
}
|
||||
|
||||
return(shmid);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
|
||||
/* from a backend address space */
|
||||
/* (only called by backends running under the postmaster) */
|
||||
/****************************************************************************/
|
||||
void
|
||||
IpcMemoryDetach(int status, char *shmaddr)
|
||||
{
|
||||
if (shmdt(shmaddr) < 0) {
|
||||
elog(NOTICE, "IpcMemoryDetach: shmdt(0x%x): %m", shmaddr);
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcMemoryAttach(memId) returns the adress of shared memory */
|
||||
/* or IpcMemAttachFailed */
|
||||
/* */
|
||||
/* CALL IT: addr = (struct <MemoryStructure> *) IpcMemoryAttach(memId); */
|
||||
/* */
|
||||
/****************************************************************************/
|
||||
char *
|
||||
IpcMemoryAttach(IpcMemoryId memId)
|
||||
{
|
||||
char *memAddress;
|
||||
|
||||
if (UsePrivateMemory) {
|
||||
memAddress = (char *) PrivateMemoryAttach(memId);
|
||||
} else {
|
||||
memAddress = (char *) shmat(memId, 0, 0);
|
||||
}
|
||||
|
||||
/* if ( *memAddress == -1) { XXX ??? */
|
||||
if ( memAddress == (char *)-1) {
|
||||
perror("IpcMemoryAttach: shmat() failed");
|
||||
return(IpcMemAttachFailed);
|
||||
}
|
||||
|
||||
if (!UsePrivateMemory)
|
||||
on_exitpg(IpcMemoryDetach, (caddr_t) memAddress);
|
||||
|
||||
return((char *) memAddress);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* IpcMemoryKill(memKey) removes a shared memory segment */
|
||||
/* (only called by the postmaster and standalone backends) */
|
||||
/****************************************************************************/
|
||||
void
|
||||
IpcMemoryKill(IpcMemoryKey memKey)
|
||||
{
|
||||
IpcMemoryId shmid;
|
||||
|
||||
if (!UsePrivateMemory && (shmid = shmget(memKey, 0, 0)) >= 0) {
|
||||
if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) {
|
||||
elog(NOTICE, "IpcMemoryKill: shmctl(%d, %d, 0) failed: %m",
|
||||
shmid, IPC_RMID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
/* ------------------
|
||||
* use hardware locks to replace semaphores for sequent machines
|
||||
* to avoid costs of swapping processes and to provide unlimited
|
||||
* supply of locks.
|
||||
* ------------------
|
||||
*/
|
||||
static SLock *SLockArray = NULL;
|
||||
static SLock **FreeSLockPP;
|
||||
static int *UnusedSLockIP;
|
||||
static slock_t *SLockMemoryLock;
|
||||
static IpcMemoryId SLockMemoryId = -1;
|
||||
|
||||
struct ipcdummy { /* to get alignment/size right */
|
||||
SLock *free;
|
||||
int unused;
|
||||
slock_t memlock;
|
||||
SLock slocks[NSLOCKS];
|
||||
};
|
||||
static int SLockMemorySize = sizeof(struct ipcdummy);
|
||||
|
||||
void
|
||||
CreateAndInitSLockMemory(IPCKey key)
|
||||
{
|
||||
int id;
|
||||
SLock *slckP;
|
||||
|
||||
SLockMemoryId = IpcMemoryCreate(key,
|
||||
SLockMemorySize,
|
||||
0700);
|
||||
AttachSLockMemory(key);
|
||||
*FreeSLockPP = NULL;
|
||||
*UnusedSLockIP = (int)FIRSTFREELOCKID;
|
||||
for (id=0; id<(int)FIRSTFREELOCKID; id++) {
|
||||
slckP = &(SLockArray[id]);
|
||||
S_INIT_LOCK(&(slckP->locklock));
|
||||
slckP->flag = NOLOCK;
|
||||
slckP->nshlocks = 0;
|
||||
S_INIT_LOCK(&(slckP->shlock));
|
||||
S_INIT_LOCK(&(slckP->exlock));
|
||||
S_INIT_LOCK(&(slckP->comlock));
|
||||
slckP->next = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
AttachSLockMemory(IPCKey key)
|
||||
{
|
||||
struct ipcdummy *slockM;
|
||||
|
||||
if (SLockMemoryId == -1)
|
||||
SLockMemoryId = IpcMemoryIdGet(key,SLockMemorySize);
|
||||
if (SLockMemoryId == -1)
|
||||
elog(FATAL, "SLockMemory not in shared memory");
|
||||
slockM = (struct ipcdummy *) IpcMemoryAttach(SLockMemoryId);
|
||||
if (slockM == IpcMemAttachFailed)
|
||||
elog(FATAL, "AttachSLockMemory: could not attach segment");
|
||||
FreeSLockPP = (SLock **) &(slockM->free);
|
||||
UnusedSLockIP = (int *) &(slockM->unused);
|
||||
SLockMemoryLock = (slock_t *) &(slockM->memlock);
|
||||
S_INIT_LOCK(SLockMemoryLock);
|
||||
SLockArray = (SLock *) &(slockM->slocks[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
#ifdef LOCKDEBUG
|
||||
#define PRINT_LOCK(LOCK) printf("(locklock = %d, flag = %d, nshlocks = %d, \
|
||||
shlock = %d, exlock =%d)\n", LOCK->locklock, \
|
||||
LOCK->flag, LOCK->nshlocks, LOCK->shlock, \
|
||||
LOCK->exlock)
|
||||
#endif
|
||||
|
||||
void
|
||||
ExclusiveLock(int lockid)
|
||||
{
|
||||
SLock *slckP;
|
||||
slckP = &(SLockArray[lockid]);
|
||||
#ifdef LOCKDEBUG
|
||||
printf("ExclusiveLock(%d)\n", lockid);
|
||||
printf("IN: ");
|
||||
PRINT_LOCK(slckP);
|
||||
#endif
|
||||
ex_try_again:
|
||||
S_LOCK(&(slckP->locklock));
|
||||
switch (slckP->flag) {
|
||||
case NOLOCK:
|
||||
slckP->flag = EXCLUSIVELOCK;
|
||||
S_LOCK(&(slckP->exlock));
|
||||
S_LOCK(&(slckP->shlock));
|
||||
S_UNLOCK(&(slckP->locklock));
|
||||
#ifdef LOCKDEBUG
|
||||
printf("OUT: ");
|
||||
PRINT_LOCK(slckP);
|
||||
#endif
|
||||
return;
|
||||
case SHAREDLOCK:
|
||||
case EXCLUSIVELOCK:
|
||||
S_UNLOCK(&(slckP->locklock));
|
||||
S_LOCK(&(slckP->exlock));
|
||||
S_UNLOCK(&(slckP->exlock));
|
||||
goto ex_try_again;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ExclusiveUnlock(int lockid)
|
||||
{
|
||||
SLock *slckP;
|
||||
|
||||
slckP = &(SLockArray[lockid]);
|
||||
#ifdef LOCKDEBUG
|
||||
printf("ExclusiveUnlock(%d)\n", lockid);
|
||||
printf("IN: ");
|
||||
PRINT_LOCK(slckP);
|
||||
#endif
|
||||
S_LOCK(&(slckP->locklock));
|
||||
/* -------------
|
||||
* give favor to read processes
|
||||
* -------------
|
||||
*/
|
||||
slckP->flag = NOLOCK;
|
||||
if (slckP->nshlocks > 0) {
|
||||
while (slckP->nshlocks > 0) {
|
||||
S_UNLOCK(&(slckP->shlock));
|
||||
S_LOCK(&(slckP->comlock));
|
||||
}
|
||||
S_UNLOCK(&(slckP->shlock));
|
||||
}
|
||||
else {
|
||||
S_UNLOCK(&(slckP->shlock));
|
||||
}
|
||||
S_UNLOCK(&(slckP->exlock));
|
||||
S_UNLOCK(&(slckP->locklock));
|
||||
#ifdef LOCKDEBUG
|
||||
printf("OUT: ");
|
||||
PRINT_LOCK(slckP);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
bool
|
||||
LockIsFree(int lockid)
|
||||
{
|
||||
return(SLockArray[lockid].flag == NOLOCK);
|
||||
}
|
||||
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
149
src/backend/storage/ipc/ipci.c
Normal file
149
src/backend/storage/ipc/ipci.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ipci.c--
|
||||
* POSTGRES inter-process communication initialization code.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "c.h"
|
||||
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/multilev.h"
|
||||
#include "utils/elog.h"
|
||||
#include "storage/sinval.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/proc.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/lock.h"
|
||||
#include "miscadmin.h" /* for DebugLvl */
|
||||
|
||||
/*
|
||||
* SystemPortAddressCreateMemoryKey --
|
||||
* Returns a memory key given a port address.
|
||||
*/
|
||||
IPCKey
|
||||
SystemPortAddressCreateIPCKey(SystemPortAddress address)
|
||||
{
|
||||
Assert(address < 32768); /* XXX */
|
||||
|
||||
return (SystemPortAddressGetIPCKey(address));
|
||||
}
|
||||
|
||||
/*
|
||||
* CreateSharedMemoryAndSemaphores --
|
||||
* Creates and initializes shared memory and semaphores.
|
||||
*/
|
||||
/**************************************************
|
||||
|
||||
CreateSharedMemoryAndSemaphores
|
||||
is called exactly *ONCE* by the postmaster.
|
||||
It is *NEVER* called by the postgres backend
|
||||
|
||||
0) destroy any existing semaphores for both buffer
|
||||
and lock managers.
|
||||
1) create the appropriate *SHARED* memory segments
|
||||
for the two resource managers.
|
||||
|
||||
**************************************************/
|
||||
|
||||
void
|
||||
CreateSharedMemoryAndSemaphores(IPCKey key)
|
||||
{
|
||||
int size;
|
||||
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
/* ---------------
|
||||
* create shared memory for slocks
|
||||
* --------------
|
||||
*/
|
||||
CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
|
||||
#endif
|
||||
/* ----------------
|
||||
* kill and create the buffer manager buffer pool (and semaphore)
|
||||
* ----------------
|
||||
*/
|
||||
CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key));
|
||||
size = BufferShmemSize() + LockShmemSize();
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
size += MMShmemSize();
|
||||
#endif /* MAIN_MEMORY */
|
||||
|
||||
if (DebugLvl > 1) {
|
||||
fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n",
|
||||
IPCKeyGetBufferMemoryKey(key), size);
|
||||
}
|
||||
ShmemCreate(IPCKeyGetBufferMemoryKey(key), size);
|
||||
ShmemBindingTabReset();
|
||||
InitShmem(key, size);
|
||||
InitBufferPool(key);
|
||||
|
||||
/* ----------------
|
||||
* do the lock table stuff
|
||||
* ----------------
|
||||
*/
|
||||
InitLocks();
|
||||
InitMultiLevelLockm();
|
||||
if (InitMultiLevelLockm() == INVALID_TABLEID)
|
||||
elog(FATAL, "Couldn't create the lock table");
|
||||
|
||||
/* ----------------
|
||||
* do process table stuff
|
||||
* ----------------
|
||||
*/
|
||||
InitProcGlobal(key);
|
||||
on_exitpg(ProcFreeAllSemaphores, 0);
|
||||
|
||||
CreateSharedInvalidationState(key);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AttachSharedMemoryAndSemaphores --
|
||||
* Attachs existant shared memory and semaphores.
|
||||
*/
|
||||
void
|
||||
AttachSharedMemoryAndSemaphores(IPCKey key)
|
||||
{
|
||||
int size;
|
||||
|
||||
/* ----------------
|
||||
* create rather than attach if using private key
|
||||
* ----------------
|
||||
*/
|
||||
if (key == PrivateIPCKey) {
|
||||
CreateSharedMemoryAndSemaphores(key);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
/* ----------------
|
||||
* attach the slock shared memory
|
||||
* ----------------
|
||||
*/
|
||||
AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
|
||||
#endif
|
||||
/* ----------------
|
||||
* attach the buffer manager buffer pool (and semaphore)
|
||||
* ----------------
|
||||
*/
|
||||
size = BufferShmemSize() + LockShmemSize();
|
||||
InitShmem(key, size);
|
||||
InitBufferPool(key);
|
||||
|
||||
/* ----------------
|
||||
* initialize lock table stuff
|
||||
* ----------------
|
||||
*/
|
||||
InitLocks();
|
||||
if (InitMultiLevelLockm() == INVALID_TABLEID)
|
||||
elog(FATAL, "Couldn't attach to the lock table");
|
||||
|
||||
AttachSharedInvalidationState(key);
|
||||
}
|
||||
440
src/backend/storage/ipc/s_lock.c
Normal file
440
src/backend/storage/ipc/s_lock.c
Normal file
@@ -0,0 +1,440 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* s_lock.c--
|
||||
* This file contains the implementation (if any) for spinlocks.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/s_lock.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* DESCRIPTION
|
||||
* The following code fragment should be written (in assembly
|
||||
* language) on machines that have a native test-and-set instruction:
|
||||
*
|
||||
* void
|
||||
* S_LOCK(char_address)
|
||||
* char *char_address;
|
||||
* {
|
||||
* while (test_and_set(char_address))
|
||||
* ;
|
||||
* }
|
||||
*
|
||||
* If this is not done, POSTGRES will default to using System V
|
||||
* semaphores (and take a large performance hit -- around 40% of
|
||||
* its time on a DS5000/240 is spent in semop(3)...).
|
||||
*
|
||||
* NOTES
|
||||
* AIX has a test-and-set but the recommended interface is the cs(3)
|
||||
* system call. This provides an 8-instruction (plus system call
|
||||
* overhead) uninterruptible compare-and-set operation. True
|
||||
* spinlocks might be faster but using cs(3) still speeds up the
|
||||
* regression test suite by about 25%. I don't have an assembler
|
||||
* manual for POWER in any case.
|
||||
*
|
||||
*/
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#endif /* WIN32 */
|
||||
#include "storage/ipc.h"
|
||||
|
||||
|
||||
#if defined(HAS_TEST_AND_SET)
|
||||
|
||||
#if defined (PORTNAME_next)
|
||||
/*
|
||||
* NEXTSTEP (mach)
|
||||
* slock_t is defined as a struct mutex.
|
||||
*/
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
mutex_lock(lock);
|
||||
}
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
mutex_unlock(lock);
|
||||
}
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
mutex_init(lock);
|
||||
}
|
||||
|
||||
/* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */
|
||||
int
|
||||
S_LOCK_FREE(slock_t *lock)
|
||||
{
|
||||
/* For Mach, we have to delve inside the entrails of `struct
|
||||
mutex'. Ick! */
|
||||
return (lock->lock == 0);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_next */
|
||||
|
||||
|
||||
|
||||
#if defined(PORTNAME_irix5)
|
||||
/*
|
||||
* SGI IRIX 5
|
||||
* slock_t is defined as a struct abilock_t, which has a single unsigned long
|
||||
* member.
|
||||
*
|
||||
* This stuff may be supplemented in the future with Masato Kataoka's MIPS-II
|
||||
* assembly from his NECEWS SVR4 port, but we probably ought to retain this
|
||||
* for the R3000 chips out there.
|
||||
*/
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
/* spin_lock(lock); */
|
||||
while (!acquire_lock(lock))
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
(void)release_lock(lock);
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
(void)init_lock(lock);
|
||||
}
|
||||
|
||||
/* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */
|
||||
int
|
||||
S_LOCK_FREE(slock_t *lock)
|
||||
{
|
||||
return(stat_lock(lock)==UNLOCKED);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_irix5 */
|
||||
|
||||
|
||||
/*
|
||||
* OSF/1 (Alpha AXP)
|
||||
*
|
||||
* Note that slock_t on the Alpha AXP is msemaphore instead of char
|
||||
* (see storage/ipc.h).
|
||||
*/
|
||||
|
||||
#if defined(PORTNAME_alpha)
|
||||
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
while (msem_lock(lock, MSEM_IF_NOWAIT) < 0)
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
(void) msem_unlock(lock, 0);
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
(void) msem_init(lock, MSEM_UNLOCKED);
|
||||
}
|
||||
|
||||
int
|
||||
S_LOCK_FREE(slock_t *lock)
|
||||
{
|
||||
return(lock->msem_state ? 0 : 1);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_alpha */
|
||||
|
||||
/*
|
||||
* Solaris 2
|
||||
*/
|
||||
|
||||
#if defined(PORTNAME_sparc_solaris)
|
||||
|
||||
/* defined in port/.../tas.s */
|
||||
extern int tas(slock_t *lock);
|
||||
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
while (tas(lock))
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
*lock = 0;
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
S_UNLOCK(lock);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_sparc_solaris */
|
||||
|
||||
/*
|
||||
* AIX (POWER)
|
||||
*
|
||||
* Note that slock_t on POWER/POWER2/PowerPC is int instead of char
|
||||
* (see storage/ipc.h).
|
||||
*/
|
||||
|
||||
#if defined(PORTNAME_aix)
|
||||
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
while (cs((int *) lock, 0, 1))
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
*lock = 0;
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
S_UNLOCK(lock);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_aix */
|
||||
|
||||
/*
|
||||
* HP-UX (PA-RISC)
|
||||
*
|
||||
* Note that slock_t on PA-RISC is a structure instead of char
|
||||
* (see storage/ipc.h).
|
||||
*/
|
||||
|
||||
#if defined(PORTNAME_hpux)
|
||||
|
||||
/* defined in port/.../tas.s */
|
||||
extern int tas(slock_t *lock);
|
||||
|
||||
/*
|
||||
* a "set" slock_t has a single word cleared. a "clear" slock_t has
|
||||
* all words set to non-zero.
|
||||
*/
|
||||
static slock_t clear_lock = { -1, -1, -1, -1 };
|
||||
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
while (tas(lock))
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
*lock = clear_lock; /* struct assignment */
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
S_UNLOCK(lock);
|
||||
}
|
||||
|
||||
int
|
||||
S_LOCK_FREE(slock_t *lock)
|
||||
{
|
||||
register int *lock_word = (int *) (((long) lock + 15) & ~15);
|
||||
|
||||
return(*lock_word != 0);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_hpux */
|
||||
|
||||
/*
|
||||
* sun3
|
||||
*/
|
||||
|
||||
#if (defined(sun) && ! defined(sparc))
|
||||
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
while (tas(lock));
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
*lock = 0;
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
S_UNLOCK(lock);
|
||||
}
|
||||
|
||||
static int
|
||||
tas_dummy()
|
||||
{
|
||||
asm("LLA0:");
|
||||
asm(" .data");
|
||||
asm(" .text");
|
||||
asm("|#PROC# 04");
|
||||
asm(" .globl _tas");
|
||||
asm("_tas:");
|
||||
asm("|#PROLOGUE# 1");
|
||||
asm(" movel sp@(0x4),a0");
|
||||
asm(" tas a0@");
|
||||
asm(" beq LLA1");
|
||||
asm(" moveq #-128,d0");
|
||||
asm(" rts");
|
||||
asm("LLA1:");
|
||||
asm(" moveq #0,d0");
|
||||
asm(" rts");
|
||||
asm(" .data");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SPARC (SunOS 4)
|
||||
*/
|
||||
|
||||
#if defined(PORTNAME_sparc)
|
||||
|
||||
/* if we're using -ansi w/ gcc, use __asm__ instead of asm */
|
||||
#if defined(__STRICT_ANSI__)
|
||||
#define asm(x) __asm__(x)
|
||||
#endif
|
||||
|
||||
static int
|
||||
tas_dummy()
|
||||
{
|
||||
asm(".seg \"data\"");
|
||||
asm(".seg \"text\"");
|
||||
asm(".global _tas");
|
||||
asm("_tas:");
|
||||
|
||||
/*
|
||||
* Sparc atomic test and set (sparc calls it "atomic load-store")
|
||||
*/
|
||||
|
||||
asm("ldstub [%r8], %r8");
|
||||
|
||||
/*
|
||||
* Did test and set actually do the set?
|
||||
*/
|
||||
|
||||
asm("tst %r8");
|
||||
|
||||
asm("be,a ReturnZero");
|
||||
|
||||
/*
|
||||
* otherwise, just return.
|
||||
*/
|
||||
|
||||
asm("clr %r8");
|
||||
asm("mov 0x1, %r8");
|
||||
asm("ReturnZero:");
|
||||
asm("retl");
|
||||
asm("nop");
|
||||
}
|
||||
|
||||
void
|
||||
S_LOCK(unsigned char *addr)
|
||||
{
|
||||
while (tas(addr));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* addr should be as in the above S_LOCK routine
|
||||
*/
|
||||
void
|
||||
S_UNLOCK(unsigned char *addr)
|
||||
{
|
||||
*addr = 0;
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(unsigned char *addr)
|
||||
{
|
||||
*addr = 0;
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_sparc */
|
||||
|
||||
/*
|
||||
* Linux and friends
|
||||
*/
|
||||
|
||||
#if defined(PORTNAME_linux) || defined(PORTNAME_BSD44_derived)
|
||||
|
||||
int
|
||||
tas(slock_t *m)
|
||||
{
|
||||
slock_t res;
|
||||
__asm__("xchgb %0,%1":"=q" (res),"=m" (*m):"0" (0x1));
|
||||
return(res);
|
||||
}
|
||||
|
||||
void
|
||||
S_LOCK(slock_t *lock)
|
||||
{
|
||||
while (tas(lock))
|
||||
;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(slock_t *lock)
|
||||
{
|
||||
*lock = 0;
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(slock_t *lock)
|
||||
{
|
||||
S_UNLOCK(lock);
|
||||
}
|
||||
|
||||
#endif /* PORTNAME_linux || PORTNAME_BSD44_derived */
|
||||
|
||||
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
|
||||
|
||||
#ifdef WIN32
|
||||
void
|
||||
S_LOCK(HANDLE *lock)
|
||||
{
|
||||
int x = 0;
|
||||
x = x / x;
|
||||
}
|
||||
|
||||
void
|
||||
S_UNLOCK(HANDLE *lock)
|
||||
{
|
||||
int x = 0;
|
||||
x = x / x;
|
||||
}
|
||||
|
||||
void
|
||||
S_INIT_LOCK(HANDLE *lock)
|
||||
{
|
||||
int x = 0;
|
||||
x = x / x;
|
||||
}
|
||||
#endif /*WIN32*/
|
||||
561
src/backend/storage/ipc/shmem.c
Normal file
561
src/backend/storage/ipc/shmem.c
Normal file
@@ -0,0 +1,561 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* shmem.c--
|
||||
* create shared memory and initialize shared memory data structures.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* POSTGRES processes share one or more regions of shared memory.
|
||||
* The shared memory is created by a postmaster and is "attached to"
|
||||
* by each of the backends. The routines in this file are used for
|
||||
* allocating and binding to shared memory data structures.
|
||||
*
|
||||
* NOTES:
|
||||
* (a) There are three kinds of shared memory data structures
|
||||
* available to POSTGRES: fixed-size structures, queues and hash
|
||||
* tables. Fixed-size structures contain things like global variables
|
||||
* for a module and should never be allocated after the process
|
||||
* initialization phase. Hash tables have a fixed maximum size, but
|
||||
* their actual size can vary dynamically. When entries are added
|
||||
* to the table, more space is allocated. Queues link data structures
|
||||
* that have been allocated either as fixed size structures or as hash
|
||||
* buckets. Each shared data structure has a string name to identify
|
||||
* it (assigned in the module that declares it).
|
||||
*
|
||||
* (b) During initialization, each module looks for its
|
||||
* shared data structures in a hash table called the "Binding Table".
|
||||
* If the data structure is not present, the caller can allocate
|
||||
* a new one and initialize it. If the data structure is present,
|
||||
* the caller "attaches" to the structure by initializing a pointer
|
||||
* in the local address space.
|
||||
* The binding table has two purposes: first, it gives us
|
||||
* a simple model of how the world looks when a backend process
|
||||
* initializes. If something is present in the binding table,
|
||||
* it is initialized. If it is not, it is uninitialized. Second,
|
||||
* the binding table allows us to allocate shared memory on demand
|
||||
* instead of trying to preallocate structures and hard-wire the
|
||||
* sizes and locations in header files. If you are using a lot
|
||||
* of shared memory in a lot of different places (and changing
|
||||
* things during development), this is important.
|
||||
*
|
||||
* (c) memory allocation model: shared memory can never be
|
||||
* freed, once allocated. Each hash table has its own free list,
|
||||
* so hash buckets can be reused when an item is deleted. However,
|
||||
* if one hash table grows very large and then shrinks, its space
|
||||
* cannot be redistributed to other tables. We could build a simple
|
||||
* hash bucket garbage collector if need be. Right now, it seems
|
||||
* unnecessary.
|
||||
*
|
||||
* See InitSem() in sem.c for an example of how to use the
|
||||
* binding table.
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "postgres.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
/* shared memory global variables */
|
||||
|
||||
unsigned long ShmemBase = 0; /* start and end address of
|
||||
* shared memory
|
||||
*/
|
||||
static unsigned long ShmemEnd = 0;
|
||||
static unsigned long ShmemSize = 0; /* current size (and default) */
|
||||
|
||||
SPINLOCK ShmemLock; /* lock for shared memory allocation */
|
||||
|
||||
SPINLOCK BindingLock; /* lock for binding table access */
|
||||
|
||||
static unsigned long *ShmemFreeStart = NULL; /* pointer to the OFFSET of
|
||||
* first free shared memory
|
||||
*/
|
||||
static unsigned long *ShmemBindingTabOffset = NULL; /* start of the binding
|
||||
* table (for bootstrap)
|
||||
*/
|
||||
static int ShmemBootstrap = FALSE; /* flag becomes true when shared mem
|
||||
* is created by POSTMASTER
|
||||
*/
|
||||
|
||||
static HTAB *BindingTable = NULL;
|
||||
|
||||
/* ---------------------
|
||||
* ShmemBindingTabReset() - Resets the binding table to NULL....
|
||||
* useful when the postmaster destroys existing shared memory
|
||||
* and creates all new segments after a backend crash.
|
||||
* ----------------------
|
||||
*/
|
||||
void
|
||||
ShmemBindingTabReset()
|
||||
{
|
||||
BindingTable = (HTAB *)NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* CreateSharedRegion() --
|
||||
*
|
||||
* This routine is called once by the postmaster to
|
||||
* initialize the shared buffer pool. Assume there is
|
||||
* only one postmaster so no synchronization is necessary
|
||||
* until after this routine completes successfully.
|
||||
*
|
||||
* key is a unique identifier for the shmem region.
|
||||
* size is the size of the region.
|
||||
*/
|
||||
static IpcMemoryId ShmemId;
|
||||
|
||||
void
|
||||
ShmemCreate(unsigned int key, unsigned int size)
|
||||
{
|
||||
if (size)
|
||||
ShmemSize = size;
|
||||
/* create shared mem region */
|
||||
if ((ShmemId=IpcMemoryCreate(key,ShmemSize,IPCProtection))
|
||||
==IpcMemCreationFailed) {
|
||||
elog(FATAL,"ShmemCreate: cannot create region");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* ShmemBootstrap is true if shared memory has been
|
||||
* created, but not yet initialized. Only the
|
||||
* postmaster/creator-of-all-things should have
|
||||
* this flag set.
|
||||
*/
|
||||
ShmemBootstrap = TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* InitShmem() -- map region into process address space
|
||||
* and initialize shared data structures.
|
||||
*
|
||||
*/
|
||||
int
|
||||
InitShmem(unsigned int key, unsigned int size)
|
||||
{
|
||||
Pointer sharedRegion;
|
||||
unsigned long currFreeSpace;
|
||||
|
||||
HASHCTL info;
|
||||
int hash_flags;
|
||||
BindingEnt * result,item;
|
||||
bool found;
|
||||
IpcMemoryId shmid;
|
||||
|
||||
/* if zero key, use default memory size */
|
||||
if (size)
|
||||
ShmemSize = size;
|
||||
|
||||
/* default key is 0 */
|
||||
|
||||
/* attach to shared memory region (SysV or BSD OS specific) */
|
||||
if (ShmemBootstrap && key == PrivateIPCKey)
|
||||
/* if we are running backend alone */
|
||||
shmid = ShmemId;
|
||||
else
|
||||
shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize);
|
||||
sharedRegion = IpcMemoryAttach(shmid);
|
||||
if (sharedRegion == NULL) {
|
||||
elog(FATAL,"AttachSharedRegion: couldn't attach to shmem\n");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/* get pointers to the dimensions of shared memory */
|
||||
ShmemBase = (unsigned long) sharedRegion;
|
||||
ShmemEnd = (unsigned long) sharedRegion + ShmemSize;
|
||||
currFreeSpace = 0;
|
||||
|
||||
/* First long in shared memory is the count of available space */
|
||||
ShmemFreeStart = (unsigned long *) ShmemBase;
|
||||
/* next is a shmem pointer to the binding table */
|
||||
ShmemBindingTabOffset = ShmemFreeStart + 1;
|
||||
|
||||
currFreeSpace +=
|
||||
sizeof(ShmemFreeStart) + sizeof(ShmemBindingTabOffset);
|
||||
|
||||
/* bootstrap initialize spin locks so we can start to use the
|
||||
* allocator and binding table.
|
||||
*/
|
||||
if (! InitSpinLocks(ShmemBootstrap, IPCKeyGetSpinLockSemaphoreKey(key))) {
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/* We have just allocated additional space for two spinlocks.
|
||||
* Now setup the global free space count
|
||||
*/
|
||||
if (ShmemBootstrap) {
|
||||
*ShmemFreeStart = currFreeSpace;
|
||||
}
|
||||
|
||||
/* if ShmemFreeStart is NULL, then the allocator won't work */
|
||||
Assert(*ShmemFreeStart);
|
||||
|
||||
/* create OR attach to the shared memory binding table */
|
||||
info.keysize = BTABLE_KEYSIZE;
|
||||
info.datasize = BTABLE_DATASIZE;
|
||||
hash_flags = (HASH_ELEM);
|
||||
|
||||
/* This will acquire the binding table lock, but not release it. */
|
||||
BindingTable = ShmemInitHash("BindingTable",
|
||||
BTABLE_SIZE,BTABLE_SIZE,
|
||||
&info,hash_flags);
|
||||
|
||||
if (! BindingTable) {
|
||||
elog(FATAL,"InitShmem: couldn't initialize Binding Table");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/* Now, check the binding table for an entry to the binding
|
||||
* table. If there is an entry there, someone else created
|
||||
* the table. Otherwise, we did and we have to initialize it.
|
||||
*/
|
||||
memset(item.key, 0, BTABLE_KEYSIZE);
|
||||
strncpy(item.key,"BindingTable",BTABLE_KEYSIZE);
|
||||
|
||||
result = (BindingEnt *)
|
||||
hash_search(BindingTable,(char *) &item,HASH_ENTER, &found);
|
||||
|
||||
|
||||
if (! result ) {
|
||||
elog(FATAL,"InitShmem: corrupted binding table");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
if (! found) {
|
||||
/* bootstrapping shmem: we have to initialize the
|
||||
* binding table now.
|
||||
*/
|
||||
|
||||
Assert(ShmemBootstrap);
|
||||
result->location = MAKE_OFFSET(BindingTable->hctl);
|
||||
*ShmemBindingTabOffset = result->location;
|
||||
result->size = BTABLE_SIZE;
|
||||
|
||||
ShmemBootstrap = FALSE;
|
||||
|
||||
} else {
|
||||
Assert(! ShmemBootstrap);
|
||||
}
|
||||
/* now release the lock acquired in ShmemHashInit */
|
||||
SpinRelease (BindingLock);
|
||||
|
||||
Assert (result->location == MAKE_OFFSET(BindingTable->hctl));
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* ShmemAlloc -- allocate word-aligned byte string from
|
||||
* shared memory
|
||||
*
|
||||
* Assumes ShmemLock and ShmemFreeStart are initialized.
|
||||
* Returns: real pointer to memory or NULL if we are out
|
||||
* of space. Has to return a real pointer in order
|
||||
* to be compatable with malloc().
|
||||
*/
|
||||
long *
|
||||
ShmemAlloc(unsigned long size)
|
||||
{
|
||||
unsigned long tmpFree;
|
||||
long *newSpace;
|
||||
|
||||
/*
|
||||
* ensure space is word aligned.
|
||||
*
|
||||
* Word-alignment is not good enough. We have to be more
|
||||
* conservative: doubles need 8-byte alignment. (We probably only need
|
||||
* this on RISC platforms but this is not a big waste of space.)
|
||||
* - ay 12/94
|
||||
*/
|
||||
if (size % sizeof(double))
|
||||
size += sizeof(double) - (size % sizeof(double));
|
||||
|
||||
Assert(*ShmemFreeStart);
|
||||
|
||||
SpinAcquire(ShmemLock);
|
||||
|
||||
tmpFree = *ShmemFreeStart + size;
|
||||
if (tmpFree <= ShmemSize) {
|
||||
newSpace = (long *)MAKE_PTR(*ShmemFreeStart);
|
||||
*ShmemFreeStart += size;
|
||||
} else {
|
||||
newSpace = NULL;
|
||||
}
|
||||
|
||||
SpinRelease(ShmemLock);
|
||||
|
||||
if (! newSpace) {
|
||||
elog(NOTICE,"ShmemAlloc: out of memory ");
|
||||
}
|
||||
return(newSpace);
|
||||
}
|
||||
|
||||
/*
|
||||
* ShmemIsValid -- test if an offset refers to valid shared memory
|
||||
*
|
||||
* Returns TRUE if the pointer is valid.
|
||||
*/
|
||||
int
|
||||
ShmemIsValid(unsigned long addr)
|
||||
{
|
||||
return ((addr<ShmemEnd) && (addr>=ShmemBase));
|
||||
}
|
||||
|
||||
/*
|
||||
* ShmemInitHash -- Create/Attach to and initialize
|
||||
* shared memory hash table.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
* assume caller is doing some kind of synchronization
|
||||
* so that two people dont try to create/initialize the
|
||||
* table at once. Use SpinAlloc() to create a spinlock
|
||||
* for the structure before creating the structure itself.
|
||||
*/
|
||||
HTAB *
|
||||
ShmemInitHash(char *name, /* table string name for binding */
|
||||
long init_size, /* initial size */
|
||||
long max_size, /* max size of the table */
|
||||
HASHCTL *infoP, /* info about key and bucket size */
|
||||
int hash_flags) /* info about infoP */
|
||||
{
|
||||
bool found;
|
||||
long * location;
|
||||
|
||||
/* shared memory hash tables have a fixed max size so that the
|
||||
* control structures don't try to grow. The segbase is for
|
||||
* calculating pointer values. The shared memory allocator
|
||||
* must be specified.
|
||||
*/
|
||||
infoP->segbase = (long *) ShmemBase;
|
||||
infoP->alloc = ShmemAlloc;
|
||||
infoP->max_size = max_size;
|
||||
hash_flags |= HASH_SHARED_MEM;
|
||||
|
||||
/* look it up in the binding table */
|
||||
location =
|
||||
ShmemInitStruct(name,my_log2(max_size) + sizeof(HHDR),&found);
|
||||
|
||||
/* binding table is corrupted. Let someone else give the
|
||||
* error message since they have more information
|
||||
*/
|
||||
if (location == NULL) {
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* it already exists, attach to it rather than allocate and
|
||||
* initialize new space
|
||||
*/
|
||||
if (found) {
|
||||
hash_flags |= HASH_ATTACH;
|
||||
}
|
||||
|
||||
/* these structures were allocated or bound in ShmemInitStruct */
|
||||
/* control information and parameters */
|
||||
infoP->hctl = (long *) location;
|
||||
/* directory for hash lookup */
|
||||
infoP->dir = (long *) (location + sizeof(HHDR));
|
||||
|
||||
return(hash_create(init_size, infoP, hash_flags));;
|
||||
}
|
||||
|
||||
/*
|
||||
* ShmemPIDLookup -- lookup process data structure using process id
|
||||
*
|
||||
* Returns: TRUE if no error. locationPtr is initialized if PID is
|
||||
* found in the binding table.
|
||||
*
|
||||
* NOTES:
|
||||
* only information about success or failure is the value of
|
||||
* locationPtr.
|
||||
*/
|
||||
bool
|
||||
ShmemPIDLookup(int pid, SHMEM_OFFSET* locationPtr)
|
||||
{
|
||||
BindingEnt * result,item;
|
||||
bool found;
|
||||
|
||||
Assert (BindingTable);
|
||||
memset(item.key, 0, BTABLE_KEYSIZE);
|
||||
sprintf(item.key,"PID %d",pid);
|
||||
|
||||
SpinAcquire(BindingLock);
|
||||
result = (BindingEnt *)
|
||||
hash_search(BindingTable,(char *) &item, HASH_ENTER, &found);
|
||||
|
||||
if (! result) {
|
||||
|
||||
SpinRelease(BindingLock);
|
||||
elog(WARN,"ShmemInitPID: BindingTable corrupted");
|
||||
return(FALSE);
|
||||
|
||||
}
|
||||
|
||||
if (found) {
|
||||
*locationPtr = result->location;
|
||||
} else {
|
||||
result->location = *locationPtr;
|
||||
}
|
||||
|
||||
SpinRelease(BindingLock);
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* ShmemPIDDestroy -- destroy binding table entry for process
|
||||
* using process id
|
||||
*
|
||||
* Returns: offset of the process struct in shared memory or
|
||||
* INVALID_OFFSET if not found.
|
||||
*
|
||||
* Side Effect: removes the entry from the binding table
|
||||
*/
|
||||
SHMEM_OFFSET
|
||||
ShmemPIDDestroy(int pid)
|
||||
{
|
||||
BindingEnt * result,item;
|
||||
bool found;
|
||||
SHMEM_OFFSET location;
|
||||
|
||||
Assert(BindingTable);
|
||||
|
||||
memset(item.key, 0, BTABLE_KEYSIZE);
|
||||
sprintf(item.key,"PID %d",pid);
|
||||
|
||||
SpinAcquire(BindingLock);
|
||||
result = (BindingEnt *)
|
||||
hash_search(BindingTable,(char *) &item, HASH_REMOVE, &found);
|
||||
|
||||
if (found)
|
||||
location = result->location;
|
||||
SpinRelease(BindingLock);
|
||||
|
||||
if (! result) {
|
||||
|
||||
elog(WARN,"ShmemPIDDestroy: PID table corrupted");
|
||||
return(INVALID_OFFSET);
|
||||
|
||||
}
|
||||
|
||||
if (found)
|
||||
return (location);
|
||||
else {
|
||||
return(INVALID_OFFSET);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ShmemInitStruct -- Create/attach to a structure in shared
|
||||
* memory.
|
||||
*
|
||||
* This is called during initialization to find or allocate
|
||||
* a data structure in shared memory. If no other processes
|
||||
* have created the structure, this routine allocates space
|
||||
* for it. If it exists already, a pointer to the existing
|
||||
* table is returned.
|
||||
*
|
||||
* Returns: real pointer to the object. FoundPtr is TRUE if
|
||||
* the object is already in the binding table (hence, already
|
||||
* initialized).
|
||||
*/
|
||||
long *
|
||||
ShmemInitStruct(char *name, unsigned long size, bool *foundPtr)
|
||||
{
|
||||
BindingEnt * result,item;
|
||||
long * structPtr;
|
||||
|
||||
strncpy(item.key,name,BTABLE_KEYSIZE);
|
||||
item.location = BAD_LOCATION;
|
||||
|
||||
SpinAcquire(BindingLock);
|
||||
|
||||
if (! BindingTable) {
|
||||
/* Assert() is a macro now. substitutes inside quotes. */
|
||||
char *strname = "BindingTable";
|
||||
|
||||
/* If the binding table doesnt exist, we fake it.
|
||||
*
|
||||
* If we are creating the first binding table, then let
|
||||
* shmemalloc() allocate the space for a new HTAB. Otherwise,
|
||||
* find the old one and return that. Notice that the
|
||||
* BindingLock is held until the binding table has been completely
|
||||
* initialized.
|
||||
*/
|
||||
Assert (! strcmp(name,strname)) ;
|
||||
if (ShmemBootstrap) {
|
||||
/* in POSTMASTER/Single process */
|
||||
|
||||
*foundPtr = FALSE;
|
||||
return((long *)ShmemAlloc(size));
|
||||
|
||||
} else {
|
||||
Assert (ShmemBindingTabOffset);
|
||||
|
||||
*foundPtr = TRUE;
|
||||
return((long *)MAKE_PTR(*ShmemBindingTabOffset));
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
/* look it up in the bindint table */
|
||||
result = (BindingEnt *)
|
||||
hash_search(BindingTable,(char *) &item,HASH_ENTER, foundPtr);
|
||||
}
|
||||
|
||||
if (! result) {
|
||||
|
||||
SpinRelease(BindingLock);
|
||||
|
||||
elog(WARN,"ShmemInitStruct: Binding Table corrupted");
|
||||
return(NULL);
|
||||
|
||||
} else if (*foundPtr) {
|
||||
/*
|
||||
* Structure is in the binding table so someone else has allocated
|
||||
* it already. The size better be the same as the size we are
|
||||
* trying to initialize to or there is a name conflict (or worse).
|
||||
*/
|
||||
if (result->size != size) {
|
||||
SpinRelease(BindingLock);
|
||||
|
||||
elog(NOTICE,"ShmemInitStruct: BindingTable entry size is wrong");
|
||||
/* let caller print its message too */
|
||||
return(NULL);
|
||||
}
|
||||
structPtr = (long *)MAKE_PTR(result->location);
|
||||
} else {
|
||||
|
||||
/* It isn't in the table yet. allocate and initialize it */
|
||||
structPtr = ShmemAlloc((long)size);
|
||||
if (! structPtr) {
|
||||
/* out of memory */
|
||||
Assert (BindingTable);
|
||||
(void) hash_search(BindingTable,(char *) &item,HASH_REMOVE, foundPtr);
|
||||
SpinRelease(BindingLock);
|
||||
*foundPtr = FALSE;
|
||||
|
||||
elog(NOTICE,"ShmemInitStruct: cannot allocate '%s'",
|
||||
name);
|
||||
return(NULL);
|
||||
}
|
||||
result->size = size;
|
||||
result->location = MAKE_OFFSET(structPtr);
|
||||
}
|
||||
Assert (ShmemIsValid((unsigned long)structPtr));
|
||||
|
||||
SpinRelease(BindingLock);
|
||||
return(structPtr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
251
src/backend/storage/ipc/shmqueue.c
Normal file
251
src/backend/storage/ipc/shmqueue.c
Normal file
@@ -0,0 +1,251 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* shmqueue.c--
|
||||
* shared memory linked lists
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmqueue.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
* NOTES
|
||||
*
|
||||
* Package for managing doubly-linked lists in shared memory.
|
||||
* The only tricky thing is that SHM_QUEUE will usually be a field
|
||||
* in a larger record. SHMQueueGetFirst has to return a pointer
|
||||
* to the record itself instead of a pointer to the SHMQueue field
|
||||
* of the record. It takes an extra pointer and does some extra
|
||||
* pointer arithmetic to do this correctly.
|
||||
*
|
||||
* NOTE: These are set up so they can be turned into macros some day.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <stdio.h> /* for sprintf() */
|
||||
#include "postgres.h"
|
||||
#include "storage/shmem.h" /* where the declarations go */
|
||||
#include "utils/elog.h"
|
||||
|
||||
/*#define SHMQUEUE_DEBUG*/
|
||||
#ifdef SHMQUEUE_DEBUG
|
||||
#define SHMQUEUE_DEBUG_DEL /* deletions */
|
||||
#define SHMQUEUE_DEBUG_HD /* head inserts */
|
||||
#define SHMQUEUE_DEBUG_TL /* tail inserts */
|
||||
#define SHMQUEUE_DEBUG_ELOG NOTICE
|
||||
#endif /* SHMQUEUE_DEBUG */
|
||||
|
||||
/*
|
||||
* ShmemQueueInit -- make the head of a new queue point
|
||||
* to itself
|
||||
*/
|
||||
void
|
||||
SHMQueueInit(SHM_QUEUE *queue)
|
||||
{
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
(queue)->prev = (queue)->next = MAKE_OFFSET(queue);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHMQueueIsDetached -- TRUE if element is not currently
|
||||
* in a queue.
|
||||
*/
|
||||
bool
|
||||
SHMQueueIsDetached(SHM_QUEUE *queue)
|
||||
{
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
return ((queue)->prev == INVALID_OFFSET);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHMQueueElemInit -- clear an element's links
|
||||
*/
|
||||
void
|
||||
SHMQueueElemInit(SHM_QUEUE *queue)
|
||||
{
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
(queue)->prev = (queue)->next = INVALID_OFFSET;
|
||||
}
|
||||
|
||||
/*
|
||||
* SHMQueueDelete -- remove an element from the queue and
|
||||
* close the links
|
||||
*/
|
||||
void
|
||||
SHMQueueDelete(SHM_QUEUE *queue)
|
||||
{
|
||||
SHM_QUEUE *nextElem = (SHM_QUEUE *) MAKE_PTR((queue)->next);
|
||||
SHM_QUEUE *prevElem = (SHM_QUEUE *) MAKE_PTR((queue)->prev);
|
||||
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
Assert(SHM_PTR_VALID(nextElem));
|
||||
Assert(SHM_PTR_VALID(prevElem));
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG_DEL
|
||||
dumpQ(queue, "in SHMQueueDelete: begin");
|
||||
#endif /* SHMQUEUE_DEBUG_DEL */
|
||||
|
||||
prevElem->next = (queue)->next;
|
||||
nextElem->prev = (queue)->prev;
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG_DEL
|
||||
dumpQ((SHM_QUEUE *)MAKE_PTR(queue->prev), "in SHMQueueDelete: end");
|
||||
#endif /* SHMQUEUE_DEBUG_DEL */
|
||||
}
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG
|
||||
void
|
||||
dumpQ(SHM_QUEUE *q, char *s)
|
||||
{
|
||||
char elem[16];
|
||||
char buf[1024];
|
||||
SHM_QUEUE *start = q;
|
||||
int count = 0;
|
||||
|
||||
sprintf(buf, "q prevs: %x", MAKE_OFFSET(q));
|
||||
q = (SHM_QUEUE *)MAKE_PTR(q->prev);
|
||||
while (q != start)
|
||||
{
|
||||
sprintf(elem, "--->%x", MAKE_OFFSET(q));
|
||||
strcat(buf, elem);
|
||||
q = (SHM_QUEUE *)MAKE_PTR(q->prev);
|
||||
if (q->prev == MAKE_OFFSET(q))
|
||||
break;
|
||||
if (count++ > 40)
|
||||
{
|
||||
strcat(buf, "BAD PREV QUEUE!!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
sprintf(elem, "--->%x", MAKE_OFFSET(q));
|
||||
strcat(buf, elem);
|
||||
elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf);
|
||||
|
||||
sprintf(buf, "q nexts: %x", MAKE_OFFSET(q));
|
||||
count = 0;
|
||||
q = (SHM_QUEUE *)MAKE_PTR(q->next);
|
||||
while (q != start)
|
||||
{
|
||||
sprintf(elem, "--->%x", MAKE_OFFSET(q));
|
||||
strcat(buf, elem);
|
||||
q = (SHM_QUEUE *)MAKE_PTR(q->next);
|
||||
if (q->next == MAKE_OFFSET(q))
|
||||
break;
|
||||
if (count++ > 10)
|
||||
{
|
||||
strcat(buf, "BAD NEXT QUEUE!!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
sprintf(elem, "--->%x", MAKE_OFFSET(q));
|
||||
strcat(buf, elem);
|
||||
elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf);
|
||||
}
|
||||
#endif /* SHMQUEUE_DEBUG */
|
||||
|
||||
/*
|
||||
* SHMQueueInsertHD -- put elem in queue between the queue head
|
||||
* and its "prev" element.
|
||||
*/
|
||||
void
|
||||
SHMQueueInsertHD(SHM_QUEUE *queue, SHM_QUEUE *elem)
|
||||
{
|
||||
SHM_QUEUE *prevPtr = (SHM_QUEUE *) MAKE_PTR((queue)->prev);
|
||||
SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem);
|
||||
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
Assert(SHM_PTR_VALID(elem));
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG_HD
|
||||
dumpQ(queue, "in SHMQueueInsertHD: begin");
|
||||
#endif /* SHMQUEUE_DEBUG_HD */
|
||||
|
||||
(elem)->next = prevPtr->next;
|
||||
(elem)->prev = queue->prev;
|
||||
(queue)->prev = elemOffset;
|
||||
prevPtr->next = elemOffset;
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG_HD
|
||||
dumpQ(queue, "in SHMQueueInsertHD: end");
|
||||
#endif /* SHMQUEUE_DEBUG_HD */
|
||||
}
|
||||
|
||||
void
|
||||
SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem)
|
||||
{
|
||||
SHM_QUEUE *nextPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next);
|
||||
SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem);
|
||||
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
Assert(SHM_PTR_VALID(elem));
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG_TL
|
||||
dumpQ(queue, "in SHMQueueInsertTL: begin");
|
||||
#endif /* SHMQUEUE_DEBUG_TL */
|
||||
|
||||
(elem)->prev = nextPtr->prev;
|
||||
(elem)->next = queue->next;
|
||||
(queue)->next = elemOffset;
|
||||
nextPtr->prev = elemOffset;
|
||||
|
||||
#ifdef SHMQUEUE_DEBUG_TL
|
||||
dumpQ(queue, "in SHMQueueInsertTL: end");
|
||||
#endif /* SHMQUEUE_DEBUG_TL */
|
||||
}
|
||||
|
||||
/*
|
||||
* SHMQueueFirst -- Get the first element from a queue
|
||||
*
|
||||
* First element is queue->next. If SHMQueue is part of
|
||||
* a larger structure, we want to return a pointer to the
|
||||
* whole structure rather than a pointer to its SHMQueue field.
|
||||
* I.E. struct {
|
||||
* int stuff;
|
||||
* SHMQueue elem;
|
||||
* } ELEMType;
|
||||
* when this element is in a queue (queue->next) is struct.elem.
|
||||
* nextQueue allows us to calculate the offset of the SHMQueue
|
||||
* field in the structure.
|
||||
*
|
||||
* call to SHMQueueFirst should take these parameters:
|
||||
*
|
||||
* &(queueHead),&firstElem,&(firstElem->next)
|
||||
*
|
||||
* Note that firstElem may well be uninitialized. if firstElem
|
||||
* is initially K, &(firstElem->next) will be K+ the offset to
|
||||
* next.
|
||||
*/
|
||||
void
|
||||
SHMQueueFirst(SHM_QUEUE *queue, Pointer *nextPtrPtr, SHM_QUEUE *nextQueue)
|
||||
{
|
||||
SHM_QUEUE *elemPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next);
|
||||
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
*nextPtrPtr = (Pointer) (((unsigned long) *nextPtrPtr) +
|
||||
((unsigned long) elemPtr) - ((unsigned long) nextQueue));
|
||||
|
||||
/*
|
||||
nextPtrPtr a ptr to a structure linked in the queue
|
||||
nextQueue is the SHMQueue field of the structure
|
||||
*nextPtrPtr - nextQueue is 0 minus the offset of the queue
|
||||
field n the record
|
||||
elemPtr + (*nextPtrPtr - nexQueue) is the start of the
|
||||
structure containing elemPtr.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* SHMQueueEmpty -- TRUE if queue head is only element, FALSE otherwise
|
||||
*/
|
||||
bool
|
||||
SHMQueueEmpty(SHM_QUEUE *queue)
|
||||
{
|
||||
Assert(SHM_PTR_VALID(queue));
|
||||
|
||||
if (queue->prev == MAKE_OFFSET(queue))
|
||||
{
|
||||
Assert(queue->next = MAKE_OFFSET(queue));
|
||||
return(TRUE);
|
||||
}
|
||||
return(FALSE);
|
||||
}
|
||||
169
src/backend/storage/ipc/sinval.c
Normal file
169
src/backend/storage/ipc/sinval.c
Normal file
@@ -0,0 +1,169 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* sinval.c--
|
||||
* POSTGRES shared cache invalidation communication code.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/* #define INVALIDDEBUG 1 */
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/sinval.h"
|
||||
#include "storage/sinvaladt.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
extern SISeg *shmInvalBuffer;/* the shared buffer segment, set by*/
|
||||
/* SISegmentAttach() */
|
||||
extern BackendId MyBackendId;
|
||||
extern BackendTag MyBackendTag;
|
||||
|
||||
SPINLOCK SInvalLock = (SPINLOCK) NULL;
|
||||
|
||||
/****************************************************************************/
|
||||
/* CreateSharedInvalidationState(key) Create a buffer segment */
|
||||
/* */
|
||||
/* should be called only by the POSTMASTER */
|
||||
/****************************************************************************/
|
||||
void
|
||||
CreateSharedInvalidationState(IPCKey key)
|
||||
{
|
||||
int status;
|
||||
|
||||
/* REMOVED
|
||||
SISyncKill(IPCKeyGetSIBufferMemorySemaphoreKey(key));
|
||||
SISyncInit(IPCKeyGetSIBufferMemorySemaphoreKey(key));
|
||||
*/
|
||||
|
||||
/* SInvalLock gets set in spin.c, during spinlock init */
|
||||
status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key));
|
||||
|
||||
if (status == -1) {
|
||||
elog(FATAL, "CreateSharedInvalidationState: failed segment init");
|
||||
}
|
||||
}
|
||||
/****************************************************************************/
|
||||
/* AttachSharedInvalidationState(key) Attach a buffer segment */
|
||||
/* */
|
||||
/* should be called only by the POSTMASTER */
|
||||
/****************************************************************************/
|
||||
void
|
||||
AttachSharedInvalidationState(IPCKey key)
|
||||
{
|
||||
int status;
|
||||
|
||||
if (key == PrivateIPCKey) {
|
||||
CreateSharedInvalidationState(key);
|
||||
return;
|
||||
}
|
||||
/* SInvalLock gets set in spin.c, during spinlock init */
|
||||
status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key));
|
||||
|
||||
if (status == -1) {
|
||||
elog(FATAL, "AttachSharedInvalidationState: failed segment init");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
InitSharedInvalidationState()
|
||||
{
|
||||
SpinAcquire(SInvalLock);
|
||||
if (!SIBackendInit(shmInvalBuffer))
|
||||
{
|
||||
SpinRelease(SInvalLock);
|
||||
elog(FATAL, "Backend cache invalidation initialization failed");
|
||||
}
|
||||
SpinRelease(SInvalLock);
|
||||
}
|
||||
|
||||
/*
|
||||
* RegisterSharedInvalid --
|
||||
* Returns a new local cache invalidation state containing a new entry.
|
||||
*
|
||||
* Note:
|
||||
* Assumes hash index is valid.
|
||||
* Assumes item pointer is valid.
|
||||
*/
|
||||
/****************************************************************************/
|
||||
/* RegisterSharedInvalid(cacheId, hashIndex, pointer) */
|
||||
/* */
|
||||
/* register a message in the buffer */
|
||||
/* should be called by a backend */
|
||||
/****************************************************************************/
|
||||
void
|
||||
RegisterSharedInvalid(int cacheId, /* XXX */
|
||||
Index hashIndex,
|
||||
ItemPointer pointer)
|
||||
{
|
||||
SharedInvalidData newInvalid;
|
||||
|
||||
/*
|
||||
* This code has been hacked to accept two types of messages. This might
|
||||
* be treated more generally in the future.
|
||||
*
|
||||
* (1)
|
||||
* cacheId= system cache id
|
||||
* hashIndex= system cache hash index for a (possibly) cached tuple
|
||||
* pointer= pointer of (possibly) cached tuple
|
||||
*
|
||||
* (2)
|
||||
* cacheId= special non-syscache id
|
||||
* hashIndex= object id contained in (possibly) cached relation descriptor
|
||||
* pointer= null
|
||||
*/
|
||||
|
||||
newInvalid.cacheId = cacheId;
|
||||
newInvalid.hashIndex = hashIndex;
|
||||
|
||||
if (ItemPointerIsValid(pointer)) {
|
||||
ItemPointerCopy(pointer, &newInvalid.pointerData);
|
||||
} else {
|
||||
ItemPointerSetInvalid(&newInvalid.pointerData);
|
||||
}
|
||||
|
||||
SpinAcquire(SInvalLock);
|
||||
if (!SISetDataEntry(shmInvalBuffer, &newInvalid)) {
|
||||
/* buffer full */
|
||||
/* release a message, mark process cache states to be invalid */
|
||||
SISetProcStateInvalid(shmInvalBuffer);
|
||||
|
||||
if (!SIDelDataEntry(shmInvalBuffer)) {
|
||||
/* inconsistent buffer state -- shd never happen */
|
||||
SpinRelease(SInvalLock);
|
||||
elog(FATAL, "RegisterSharedInvalid: inconsistent buffer state");
|
||||
}
|
||||
|
||||
/* write again */
|
||||
(void) SISetDataEntry(shmInvalBuffer, &newInvalid);
|
||||
}
|
||||
SpinRelease(SInvalLock);
|
||||
}
|
||||
|
||||
/*
|
||||
* InvalidateSharedInvalid --
|
||||
* Processes all entries in a shared cache invalidation state.
|
||||
*/
|
||||
/****************************************************************************/
|
||||
/* InvalidateSharedInvalid(invalFunction, resetFunction) */
|
||||
/* */
|
||||
/* invalidate a message in the buffer (read and clean up) */
|
||||
/* should be called by a backend */
|
||||
/****************************************************************************/
|
||||
void
|
||||
InvalidateSharedInvalid(void (*invalFunction)(),
|
||||
void (*resetFunction)())
|
||||
{
|
||||
SpinAcquire(SInvalLock);
|
||||
SIReadEntryData(shmInvalBuffer, MyBackendId,
|
||||
invalFunction, resetFunction);
|
||||
|
||||
SIDelExpiredDataEntries(shmInvalBuffer);
|
||||
SpinRelease(SInvalLock);
|
||||
}
|
||||
797
src/backend/storage/ipc/sinvaladt.c
Normal file
797
src/backend/storage/ipc/sinvaladt.c
Normal file
@@ -0,0 +1,797 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* sinvaladt.c--
|
||||
* POSTGRES shared cache invalidation segment definitions.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/sinvaladt.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
|
||||
/* ----------------
|
||||
* global variable notes
|
||||
*
|
||||
* SharedInvalidationSemaphore
|
||||
*
|
||||
* shmInvalBuffer
|
||||
* the shared buffer segment, set by SISegmentAttach()
|
||||
*
|
||||
* MyBackendId
|
||||
* might be removed later, used only for
|
||||
* debugging in debug routines (end of file)
|
||||
*
|
||||
* SIDbId
|
||||
* identification of buffer (disappears)
|
||||
*
|
||||
* SIRelId \
|
||||
* SIDummyOid \ identification of buffer
|
||||
* SIXidData /
|
||||
* SIXid /
|
||||
*
|
||||
* XXX This file really needs to be cleaned up. We switched to using
|
||||
* spinlocks to protect critical sections (as opposed to using fake
|
||||
* relations and going through the lock manager) and some of the old
|
||||
* cruft was 'ifdef'ed out, while other parts (now unused) are still
|
||||
* compiled into the system. -mer 5/24/92
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
int SharedInvalidationLockId;
|
||||
#else
|
||||
IpcSemaphoreId SharedInvalidationSemaphore;
|
||||
#endif
|
||||
|
||||
SISeg *shmInvalBuffer;
|
||||
extern BackendId MyBackendId;
|
||||
|
||||
static void CleanupInvalidationState(int status, SISeg *segInOutP);
|
||||
static BackendId SIAssignBackendId(SISeg *segInOutP, BackendTag backendTag);
|
||||
static int SIGetNumEntries(SISeg *segP);
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetActiveProcess(segP, backendId) set the backend status active */
|
||||
/* should be called only by the postmaster when creating a backend */
|
||||
/************************************************************************/
|
||||
/* XXX I suspect that the segP parameter is extraneous. -hirohama */
|
||||
static void
|
||||
SISetActiveProcess(SISeg *segInOutP, BackendId backendId)
|
||||
{
|
||||
/* mark all messages as read */
|
||||
|
||||
/* Assert(segP->procState[backendId - 1].tag == MyBackendTag); */
|
||||
|
||||
segInOutP->procState[backendId - 1].resetState = false;
|
||||
segInOutP->procState[backendId - 1].limit = SIGetNumEntries(segInOutP);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* SIBackendInit() initializes a backend to operate on the buffer */
|
||||
/****************************************************************************/
|
||||
int
|
||||
SIBackendInit(SISeg *segInOutP)
|
||||
{
|
||||
LRelId LtCreateRelId();
|
||||
TransactionId LMITransactionIdCopy();
|
||||
|
||||
Assert(MyBackendTag > 0);
|
||||
|
||||
MyBackendId = SIAssignBackendId(segInOutP, MyBackendTag);
|
||||
if (MyBackendId == InvalidBackendTag)
|
||||
return 0;
|
||||
|
||||
#ifdef INVALIDDEBUG
|
||||
elog(DEBUG, "SIBackendInit: backend tag %d; backend id %d.",
|
||||
MyBackendTag, MyBackendId);
|
||||
#endif /* INVALIDDEBUG */
|
||||
|
||||
SISetActiveProcess(segInOutP, MyBackendId);
|
||||
on_exitpg(CleanupInvalidationState, (caddr_t)segInOutP);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* SIAssignBackendId
|
||||
* ----------------
|
||||
*/
|
||||
static BackendId
|
||||
SIAssignBackendId(SISeg *segInOutP, BackendTag backendTag)
|
||||
{
|
||||
Index index;
|
||||
ProcState *stateP;
|
||||
|
||||
stateP = NULL;
|
||||
|
||||
for (index = 0; index < MaxBackendId; index += 1) {
|
||||
if (segInOutP->procState[index].tag == InvalidBackendTag ||
|
||||
segInOutP->procState[index].tag == backendTag)
|
||||
{
|
||||
stateP = &segInOutP->procState[index];
|
||||
break;
|
||||
}
|
||||
|
||||
if (!PointerIsValid(stateP) ||
|
||||
(segInOutP->procState[index].resetState &&
|
||||
(!stateP->resetState ||
|
||||
stateP->tag < backendTag)) ||
|
||||
(!stateP->resetState &&
|
||||
(segInOutP->procState[index].limit <
|
||||
stateP->limit ||
|
||||
stateP->tag < backendTag)))
|
||||
{
|
||||
stateP = &segInOutP->procState[index];
|
||||
}
|
||||
}
|
||||
|
||||
/* verify that all "procState" entries checked for matching tags */
|
||||
|
||||
for (index += 1; index < MaxBackendId; index += 1) {
|
||||
if (segInOutP->procState[index].tag == backendTag) {
|
||||
elog (FATAL, "SIAssignBackendId: tag %d found twice",
|
||||
backendTag);
|
||||
}
|
||||
}
|
||||
|
||||
if (stateP->tag != InvalidBackendTag) {
|
||||
if (stateP->tag == backendTag) {
|
||||
elog(NOTICE, "SIAssignBackendId: reusing tag %d",
|
||||
backendTag);
|
||||
} else {
|
||||
elog(NOTICE,
|
||||
"SIAssignBackendId: discarding tag %d",
|
||||
stateP->tag);
|
||||
return InvalidBackendTag;
|
||||
}
|
||||
}
|
||||
|
||||
stateP->tag = backendTag;
|
||||
|
||||
return (1 + stateP - &segInOutP->procState[0]);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* The following function should be called only by the postmaster !! */
|
||||
/************************************************************************/
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetDeadProcess(segP, backendId) set the backend status DEAD */
|
||||
/* should be called only by the postmaster when a backend died */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISetDeadProcess(SISeg *segP, int backendId)
|
||||
{
|
||||
/* XXX call me.... */
|
||||
|
||||
segP->procState[backendId - 1].resetState = false;
|
||||
segP->procState[backendId - 1].limit = -1;
|
||||
segP->procState[backendId - 1].tag = InvalidBackendTag;
|
||||
}
|
||||
|
||||
/*
|
||||
* CleanupInvalidationState --
|
||||
* Note:
|
||||
* This is a temporary hack. ExitBackend should call this instead
|
||||
* of exit (via on_exitpg).
|
||||
*/
|
||||
static void
|
||||
CleanupInvalidationState(int status, /* XXX */
|
||||
SISeg *segInOutP) /* XXX style */
|
||||
{
|
||||
Assert(PointerIsValid(segInOutP));
|
||||
|
||||
SISetDeadProcess(segInOutP, MyBackendId);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIComputeSize() - retuns the size of a buffer segment */
|
||||
/************************************************************************/
|
||||
static SISegOffsets *
|
||||
SIComputeSize(int *segSize)
|
||||
{
|
||||
int A, B, a, b, totalSize;
|
||||
SISegOffsets *oP;
|
||||
|
||||
A = 0;
|
||||
a = SizeSISeg; /* offset to first data entry */
|
||||
b = SizeOfOneSISegEntry * MAXNUMMESSAGES;
|
||||
B = A + a + b;
|
||||
totalSize = B - A;
|
||||
*segSize = totalSize;
|
||||
|
||||
oP = (SISegOffsets *) palloc(sizeof(SISegOffsets));
|
||||
oP->startSegment = A;
|
||||
oP->offsetToFirstEntry = a; /* relatiove to A */
|
||||
oP->offsetToEndOfSegemnt = totalSize; /* relative to A */
|
||||
return(oP);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetStartEntrySection(segP, offset) - sets the offset */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISetStartEntrySection(SISeg *segP, Offset offset)
|
||||
{
|
||||
segP->startEntrySection = offset;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetStartEntrySection(segP) - returnss the offset */
|
||||
/************************************************************************/
|
||||
static Offset
|
||||
SIGetStartEntrySection(SISeg *segP)
|
||||
{
|
||||
return(segP->startEntrySection);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetEndEntrySection(segP, offset) - sets the offset */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISetEndEntrySection(SISeg *segP, Offset offset)
|
||||
{
|
||||
segP->endEntrySection = offset;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetEndEntryChain(segP, offset) - sets the offset */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISetEndEntryChain(SISeg *segP, Offset offset)
|
||||
{
|
||||
segP->endEntryChain = offset;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetEndEntryChain(segP) - returnss the offset */
|
||||
/************************************************************************/
|
||||
static Offset
|
||||
SIGetEndEntryChain(SISeg *segP)
|
||||
{
|
||||
return(segP->endEntryChain);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetStartEntryChain(segP, offset) - sets the offset */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISetStartEntryChain(SISeg *segP, Offset offset)
|
||||
{
|
||||
segP->startEntryChain = offset;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetStartEntryChain(segP) - returns the offset */
|
||||
/************************************************************************/
|
||||
static Offset
|
||||
SIGetStartEntryChain(SISeg *segP)
|
||||
{
|
||||
return(segP->startEntryChain);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetNumEntries(segP, num) sets the current nuber of entries */
|
||||
/************************************************************************/
|
||||
static bool
|
||||
SISetNumEntries(SISeg *segP, int num)
|
||||
{
|
||||
if ( num <= MAXNUMMESSAGES) {
|
||||
segP->numEntries = num;
|
||||
return(true);
|
||||
} else {
|
||||
return(false); /* table full */
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetNumEntries(segP) - returns the current nuber of entries */
|
||||
/************************************************************************/
|
||||
static int
|
||||
SIGetNumEntries(SISeg *segP)
|
||||
{
|
||||
return(segP->numEntries);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetMaxNumEntries(segP, num) sets the maximal number of entries */
|
||||
/************************************************************************/
|
||||
static bool
|
||||
SISetMaxNumEntries(SISeg *segP, int num)
|
||||
{
|
||||
if ( num <= MAXNUMMESSAGES) {
|
||||
segP->maxNumEntries = num;
|
||||
return(true);
|
||||
} else {
|
||||
return(false); /* wrong number */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetProcStateLimit(segP, i) returns the limit of read messages */
|
||||
/************************************************************************/
|
||||
static int
|
||||
SIGetProcStateLimit(SISeg *segP, int i)
|
||||
{
|
||||
return(segP->procState[i].limit);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIIncNumEntries(segP, num) increments the current nuber of entries */
|
||||
/************************************************************************/
|
||||
static bool
|
||||
SIIncNumEntries(SISeg *segP, int num)
|
||||
{
|
||||
if ((segP->numEntries + num) <= MAXNUMMESSAGES) {
|
||||
segP->numEntries = segP->numEntries + num;
|
||||
return(true);
|
||||
} else {
|
||||
return(false); /* table full */
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIDecNumEntries(segP, num) decrements the current nuber of entries */
|
||||
/************************************************************************/
|
||||
static bool
|
||||
SIDecNumEntries(SISeg *segP, int num)
|
||||
{
|
||||
if ((segP->numEntries - num) >= 0) {
|
||||
segP->numEntries = segP->numEntries - num;
|
||||
return(true);
|
||||
} else {
|
||||
return(false); /* not enough entries in table */
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetStartFreeSpace(segP, offset) - sets the offset */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISetStartFreeSpace(SISeg *segP, Offset offset)
|
||||
{
|
||||
segP->startFreeSpace = offset;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetStartFreeSpace(segP) - returns the offset */
|
||||
/************************************************************************/
|
||||
static Offset
|
||||
SIGetStartFreeSpace(SISeg *segP)
|
||||
{
|
||||
return(segP->startFreeSpace);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetFirstDataEntry(segP) returns first data entry */
|
||||
/************************************************************************/
|
||||
static SISegEntry *
|
||||
SIGetFirstDataEntry(SISeg *segP)
|
||||
{
|
||||
SISegEntry *eP;
|
||||
Offset startChain;
|
||||
|
||||
startChain = SIGetStartEntryChain(segP);
|
||||
|
||||
if (startChain == InvalidOffset)
|
||||
return(NULL);
|
||||
|
||||
eP = (SISegEntry *) ((Pointer) segP +
|
||||
SIGetStartEntrySection(segP) +
|
||||
startChain );
|
||||
return(eP);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetLastDataEntry(segP) returns last data entry in the chain */
|
||||
/************************************************************************/
|
||||
static SISegEntry *
|
||||
SIGetLastDataEntry(SISeg *segP)
|
||||
{
|
||||
SISegEntry *eP;
|
||||
Offset endChain;
|
||||
|
||||
endChain = SIGetEndEntryChain(segP);
|
||||
|
||||
if (endChain == InvalidOffset)
|
||||
return(NULL);
|
||||
|
||||
eP = (SISegEntry *) ((Pointer) segP +
|
||||
SIGetStartEntrySection(segP) +
|
||||
endChain );
|
||||
return(eP);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetNextDataEntry(segP, offset) returns next data entry */
|
||||
/************************************************************************/
|
||||
static SISegEntry *
|
||||
SIGetNextDataEntry(SISeg *segP, Offset offset)
|
||||
{
|
||||
SISegEntry *eP;
|
||||
|
||||
if (offset == InvalidOffset)
|
||||
return(NULL);
|
||||
|
||||
eP = (SISegEntry *) ((Pointer) segP +
|
||||
SIGetStartEntrySection(segP) +
|
||||
offset);
|
||||
return(eP);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIGetNthDataEntry(segP, n) returns the n-th data entry in chain */
|
||||
/************************************************************************/
|
||||
static SISegEntry *
|
||||
SIGetNthDataEntry(SISeg *segP,
|
||||
int n) /* must range from 1 to MaxMessages */
|
||||
{
|
||||
SISegEntry *eP;
|
||||
int i;
|
||||
|
||||
if (n <= 0) return(NULL);
|
||||
|
||||
eP = SIGetFirstDataEntry(segP);
|
||||
for (i = 1; i < n; i++) {
|
||||
/* skip one and get the next */
|
||||
eP = SIGetNextDataEntry(segP, eP->next);
|
||||
}
|
||||
|
||||
return(eP);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIEntryOffset(segP, entryP) returns the offset for an pointer */
|
||||
/************************************************************************/
|
||||
static Offset
|
||||
SIEntryOffset(SISeg *segP, SISegEntry *entryP)
|
||||
{
|
||||
/* relative to B !! */
|
||||
return ((Offset) ((Pointer) entryP -
|
||||
(Pointer) segP -
|
||||
SIGetStartEntrySection(segP) ));
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetDataEntry(segP, data) - sets a message in the segemnt */
|
||||
/************************************************************************/
|
||||
bool
|
||||
SISetDataEntry(SISeg *segP, SharedInvalidData *data)
|
||||
{
|
||||
Offset offsetToNewData;
|
||||
SISegEntry *eP, *lastP;
|
||||
bool SISegFull();
|
||||
Offset SIEntryOffset();
|
||||
Offset SIGetStartFreeSpace();
|
||||
SISegEntry *SIGetFirstDataEntry();
|
||||
SISegEntry *SIGetNextDataEntry();
|
||||
SISegEntry *SIGetLastDataEntry();
|
||||
|
||||
if (!SIIncNumEntries(segP, 1))
|
||||
return(false); /* no space */
|
||||
|
||||
/* get a free entry */
|
||||
offsetToNewData = SIGetStartFreeSpace(segP);
|
||||
eP = SIGetNextDataEntry(segP, offsetToNewData); /* it's a free one */
|
||||
SISetStartFreeSpace(segP, eP->next);
|
||||
/* fill it up */
|
||||
eP->entryData = *data;
|
||||
eP->isfree = false;
|
||||
eP->next = InvalidOffset;
|
||||
|
||||
/* handle insertion point at the end of the chain !!*/
|
||||
lastP = SIGetLastDataEntry(segP);
|
||||
if (lastP == NULL) {
|
||||
/* there is no chain, insert the first entry */
|
||||
SISetStartEntryChain(segP, SIEntryOffset(segP, eP));
|
||||
} else {
|
||||
/* there is a last entry in the chain */
|
||||
lastP->next = SIEntryOffset(segP, eP);
|
||||
}
|
||||
SISetEndEntryChain(segP, SIEntryOffset(segP, eP));
|
||||
return(true);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIDecProcLimit(segP, num) decrements all process limits */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SIDecProcLimit(SISeg *segP, int num)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i < MaxBackendId; i++) {
|
||||
/* decrement only, if there is a limit > 0 */
|
||||
if (segP->procState[i].limit > 0) {
|
||||
segP->procState[i].limit = segP->procState[i].limit - num;
|
||||
if (segP->procState[i].limit < 0) {
|
||||
/* limit was not high enough, reset to zero */
|
||||
/* negative means it's a dead backend */
|
||||
segP->procState[i].limit = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SIDelDataEntry(segP) - free the FIRST entry */
|
||||
/************************************************************************/
|
||||
bool
|
||||
SIDelDataEntry(SISeg *segP)
|
||||
{
|
||||
SISegEntry *e1P;
|
||||
SISegEntry *SIGetFirstDataEntry();
|
||||
|
||||
if (!SIDecNumEntries(segP, 1)) {
|
||||
/* no entries in buffer */
|
||||
return(false);
|
||||
}
|
||||
|
||||
e1P = SIGetFirstDataEntry(segP);
|
||||
SISetStartEntryChain(segP, e1P->next);
|
||||
if (SIGetStartEntryChain(segP) == InvalidOffset) {
|
||||
/* it was the last entry */
|
||||
SISetEndEntryChain(segP, InvalidOffset);
|
||||
}
|
||||
/* free the entry */
|
||||
e1P->isfree = true;
|
||||
e1P->next = SIGetStartFreeSpace(segP);
|
||||
SISetStartFreeSpace(segP, SIEntryOffset(segP, e1P));
|
||||
SIDecProcLimit(segP, 1);
|
||||
return(true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISetProcStateInvalid(segP) checks and marks a backends state as */
|
||||
/* invalid */
|
||||
/************************************************************************/
|
||||
void
|
||||
SISetProcStateInvalid(SISeg *segP)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i < MaxBackendId; i++) {
|
||||
if (segP->procState[i].limit == 0) {
|
||||
/* backend i didn't read any message */
|
||||
segP->procState[i].resetState = true;
|
||||
/*XXX signal backend that it has to reset its internal cache ? */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIReadEntryData(segP, backendId, function) */
|
||||
/* - marks messages to be read by id */
|
||||
/* and executes function */
|
||||
/************************************************************************/
|
||||
void
|
||||
SIReadEntryData(SISeg *segP,
|
||||
int backendId,
|
||||
void (*invalFunction)(),
|
||||
void (*resetFunction)())
|
||||
{
|
||||
int i = 0;
|
||||
SISegEntry *data;
|
||||
|
||||
Assert(segP->procState[backendId - 1].tag == MyBackendTag);
|
||||
|
||||
if (!segP->procState[backendId - 1].resetState) {
|
||||
/* invalidate data, but only those, you have not seen yet !!*/
|
||||
/* therefore skip read messages */
|
||||
data = SIGetNthDataEntry(segP,
|
||||
SIGetProcStateLimit(segP, backendId - 1) + 1);
|
||||
while (data != NULL) {
|
||||
i++;
|
||||
segP->procState[backendId - 1].limit++; /* one more message read */
|
||||
invalFunction(data->entryData.cacheId,
|
||||
data->entryData.hashIndex,
|
||||
&data->entryData.pointerData);
|
||||
data = SIGetNextDataEntry(segP, data->next);
|
||||
}
|
||||
/* SIDelExpiredDataEntries(segP); */
|
||||
} else {
|
||||
/*backend must not read messages, its own state has to be reset */
|
||||
elog(NOTICE, "SIMarkEntryData: cache state reset");
|
||||
resetFunction(); /* XXXX call it here, parameters? */
|
||||
|
||||
/* new valid state--mark all messages "read" */
|
||||
segP->procState[backendId - 1].resetState = false;
|
||||
segP->procState[backendId - 1].limit = SIGetNumEntries(segP);
|
||||
}
|
||||
/* check whether we can remove dead messages */
|
||||
if (i > MAXNUMMESSAGES) {
|
||||
elog(FATAL, "SIReadEntryData: Invalid segment state");
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SIDelExpiredDataEntries (segP) - removes irrelevant messages */
|
||||
/************************************************************************/
|
||||
void
|
||||
SIDelExpiredDataEntries(SISeg *segP)
|
||||
{
|
||||
int min, i, h;
|
||||
|
||||
min = 9999999;
|
||||
for (i = 0; i < MaxBackendId; i++) {
|
||||
h = SIGetProcStateLimit(segP, i);
|
||||
if (h >= 0) { /* backend active */
|
||||
if (h < min ) min = h;
|
||||
}
|
||||
}
|
||||
if (min != 9999999) {
|
||||
/* we can remove min messages */
|
||||
for (i = 1; i <= min; i++) {
|
||||
/* this adjusts also the state limits!*/
|
||||
if (!SIDelDataEntry(segP)) {
|
||||
elog(FATAL, "SIDelExpiredDataEntries: Invalid segment state");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISegInit(segP) - initializes the segment */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISegInit(SISeg *segP)
|
||||
{
|
||||
SISegOffsets *oP;
|
||||
int segSize, i;
|
||||
SISegEntry *eP;
|
||||
|
||||
oP = SIComputeSize(&segSize);
|
||||
/* set sempahore ids in the segment */
|
||||
/* XXX */
|
||||
SISetStartEntrySection(segP, oP->offsetToFirstEntry);
|
||||
SISetEndEntrySection(segP, oP->offsetToEndOfSegemnt);
|
||||
SISetStartFreeSpace(segP, 0);
|
||||
SISetStartEntryChain(segP, InvalidOffset);
|
||||
SISetEndEntryChain(segP, InvalidOffset);
|
||||
(void) SISetNumEntries(segP, 0);
|
||||
(void) SISetMaxNumEntries(segP, MAXNUMMESSAGES);
|
||||
for (i = 0; i < MaxBackendId; i++) {
|
||||
segP->procState[i].limit = -1; /* no backend active !!*/
|
||||
segP->procState[i].resetState = false;
|
||||
segP->procState[i].tag = InvalidBackendTag;
|
||||
}
|
||||
/* construct a chain of free entries */
|
||||
for (i = 1; i < MAXNUMMESSAGES; i++) {
|
||||
eP = (SISegEntry *) ((Pointer) segP +
|
||||
SIGetStartEntrySection(segP) +
|
||||
(i - 1) * sizeof(SISegEntry));
|
||||
eP->isfree = true;
|
||||
eP->next = i * sizeof(SISegEntry); /* relative to B */
|
||||
}
|
||||
/* handle the last free entry separate */
|
||||
eP = (SISegEntry *) ((Pointer) segP +
|
||||
SIGetStartEntrySection(segP) +
|
||||
(MAXNUMMESSAGES - 1) * sizeof(SISegEntry));
|
||||
eP->isfree = true;
|
||||
eP->next = InvalidOffset; /* it's the end of the chain !! */
|
||||
/*
|
||||
* Be tidy
|
||||
*/
|
||||
pfree(oP);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISegmentKill(key) - kill any segment */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISegmentKill(int key) /* the corresponding key for the segment */
|
||||
{
|
||||
IpcMemoryKill(key);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISegmentGet(key, size) - get a shared segment of size <size> */
|
||||
/* returns a segment id */
|
||||
/************************************************************************/
|
||||
static IpcMemoryId
|
||||
SISegmentGet(int key, /* the corresponding key for the segment */
|
||||
int size, /* size of segment in bytes */
|
||||
bool create)
|
||||
{
|
||||
IpcMemoryId shmid;
|
||||
|
||||
if (create) {
|
||||
shmid = IpcMemoryCreate(key, size, IPCProtection);
|
||||
} else {
|
||||
shmid = IpcMemoryIdGet(key, size);
|
||||
}
|
||||
return(shmid);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SISegmentAttach(shmid) - attach a shared segment with id shmid */
|
||||
/************************************************************************/
|
||||
static void
|
||||
SISegmentAttach(IpcMemoryId shmid)
|
||||
{
|
||||
shmInvalBuffer = (struct SISeg *) IpcMemoryAttach(shmid);
|
||||
if (shmInvalBuffer == IpcMemAttachFailed) {
|
||||
/* XXX use validity function */
|
||||
elog(NOTICE, "SISegmentAttach: Could not attach segment");
|
||||
elog(FATAL, "SISegmentAttach: %m");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
/* SISegmentInit(killExistingSegment, key) initialize segment */
|
||||
/************************************************************************/
|
||||
int
|
||||
SISegmentInit(bool killExistingSegment, IPCKey key)
|
||||
{
|
||||
SISegOffsets *oP;
|
||||
int segSize;
|
||||
IpcMemoryId shmId;
|
||||
bool create;
|
||||
|
||||
if (killExistingSegment) {
|
||||
/* Kill existing segment */
|
||||
/* set semaphore */
|
||||
SISegmentKill(key);
|
||||
|
||||
/* Get a shared segment */
|
||||
|
||||
oP = SIComputeSize(&segSize);
|
||||
/*
|
||||
* Be tidy
|
||||
*/
|
||||
pfree(oP);
|
||||
|
||||
create = true;
|
||||
shmId = SISegmentGet(key,segSize, create);
|
||||
if (shmId < 0) {
|
||||
perror("SISegmentGet: failed");
|
||||
return(-1); /* an error */
|
||||
}
|
||||
|
||||
/* Attach the shared cache invalidation segment */
|
||||
/* sets the global variable shmInvalBuffer */
|
||||
SISegmentAttach(shmId);
|
||||
|
||||
/* Init shared memory table */
|
||||
SISegInit(shmInvalBuffer);
|
||||
} else {
|
||||
/* use an existing segment */
|
||||
create = false;
|
||||
shmId = SISegmentGet(key, 0, create);
|
||||
if (shmId < 0) {
|
||||
perror("SISegmentGet: getting an existent segment failed");
|
||||
return(-1); /* an error */
|
||||
}
|
||||
/* Attach the shared cache invalidation segment */
|
||||
SISegmentAttach(shmId);
|
||||
}
|
||||
return(1);
|
||||
}
|
||||
|
||||
247
src/backend/storage/ipc/spin.c
Normal file
247
src/backend/storage/ipc/spin.c
Normal file
@@ -0,0 +1,247 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* spin.c--
|
||||
* routines for managing spin locks
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* POSTGRES has two kinds of locks: semaphores (which put the
|
||||
* process to sleep) and spinlocks (which are supposed to be
|
||||
* short term locks). Currently both are implemented as SysV
|
||||
* semaphores, but presumably this can change if we move to
|
||||
* a machine with a test-and-set (TAS) instruction. Its probably
|
||||
* a good idea to think about (and allocate) short term and long
|
||||
* term semaphores separately anyway.
|
||||
*
|
||||
* NOTE: These routines are not supposed to be widely used in Postgres.
|
||||
* They are preserved solely for the purpose of porting Mark Sullivan's
|
||||
* buffer manager to Postgres.
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include "postgres.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "storage/proc.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
/* globals used in this file */
|
||||
IpcSemaphoreId SpinLockId;
|
||||
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
/* real spin lock implementations */
|
||||
|
||||
bool
|
||||
CreateSpinlocks(IPCKey key)
|
||||
{
|
||||
/* the spin lock shared memory must have been created by now */
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
bool
|
||||
AttachSpinLocks(IPCKey key)
|
||||
{
|
||||
/* the spin lock shared memory must have been attached by now */
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
bool
|
||||
InitSpinLocks(int init, IPCKey key)
|
||||
{
|
||||
extern SPINLOCK ShmemLock;
|
||||
extern SPINLOCK BindingLock;
|
||||
extern SPINLOCK BufMgrLock;
|
||||
extern SPINLOCK LockMgrLock;
|
||||
extern SPINLOCK ProcStructLock;
|
||||
extern SPINLOCK SInvalLock;
|
||||
extern SPINLOCK OidGenLockId;
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
extern SPINLOCK MMCacheLock;
|
||||
#endif /* SONY_JUKEBOX */
|
||||
|
||||
/* These six spinlocks have fixed location is shmem */
|
||||
ShmemLock = (SPINLOCK) SHMEMLOCKID;
|
||||
BindingLock = (SPINLOCK) BINDINGLOCKID;
|
||||
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
|
||||
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
|
||||
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
|
||||
SInvalLock = (SPINLOCK) SINVALLOCKID;
|
||||
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
|
||||
#endif /* MAIN_MEMORY */
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
SpinAcquire(SPINLOCK lock)
|
||||
{
|
||||
ExclusiveLock(lock);
|
||||
PROC_INCR_SLOCK(lock);
|
||||
}
|
||||
|
||||
void
|
||||
SpinRelease(SPINLOCK lock)
|
||||
{
|
||||
PROC_DECR_SLOCK(lock);
|
||||
ExclusiveUnlock(lock);
|
||||
}
|
||||
|
||||
bool
|
||||
SpinIsLocked(SPINLOCK lock)
|
||||
{
|
||||
return(!LockIsFree(lock));
|
||||
}
|
||||
|
||||
#else /* HAS_TEST_AND_SET */
|
||||
/* Spinlocks are implemented using SysV semaphores */
|
||||
|
||||
|
||||
/*
|
||||
* SpinAcquire -- try to grab a spinlock
|
||||
*
|
||||
* FAILS if the semaphore is corrupted.
|
||||
*/
|
||||
void
|
||||
SpinAcquire(SPINLOCK lock)
|
||||
{
|
||||
IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock);
|
||||
PROC_INCR_SLOCK(lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* SpinRelease -- release a spin lock
|
||||
*
|
||||
* FAILS if the semaphore is corrupted
|
||||
*/
|
||||
void
|
||||
SpinRelease(SPINLOCK lock)
|
||||
{
|
||||
Assert(SpinIsLocked(lock))
|
||||
PROC_DECR_SLOCK(lock);
|
||||
IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock);
|
||||
}
|
||||
|
||||
bool
|
||||
SpinIsLocked(SPINLOCK lock)
|
||||
{
|
||||
int semval;
|
||||
|
||||
semval = IpcSemaphoreGetValue(SpinLockId, lock);
|
||||
return(semval < IpcSemaphoreDefaultStartValue);
|
||||
}
|
||||
|
||||
/*
|
||||
* CreateSpinlocks -- Create a sysV semaphore array for
|
||||
* the spinlocks
|
||||
*
|
||||
*/
|
||||
bool
|
||||
CreateSpinlocks(IPCKey key)
|
||||
{
|
||||
|
||||
int status;
|
||||
IpcSemaphoreId semid;
|
||||
semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection,
|
||||
IpcSemaphoreDefaultStartValue, 1, &status);
|
||||
if (status == IpcSemIdExist) {
|
||||
IpcSemaphoreKill(key);
|
||||
elog(NOTICE,"Destroying old spinlock semaphore");
|
||||
semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection,
|
||||
IpcSemaphoreDefaultStartValue, 1, &status);
|
||||
}
|
||||
|
||||
if (semid >= 0) {
|
||||
SpinLockId = semid;
|
||||
return(TRUE);
|
||||
}
|
||||
/* cannot create spinlocks */
|
||||
elog(FATAL,"CreateSpinlocks: cannot create spin locks");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach to existing spinlock set
|
||||
*/
|
||||
bool
|
||||
AttachSpinLocks(IPCKey key)
|
||||
{
|
||||
IpcSemaphoreId id;
|
||||
|
||||
id = semget (key, MAX_SPINS, 0);
|
||||
if (id < 0) {
|
||||
if (errno == EEXIST) {
|
||||
/* key is the name of someone else's semaphore */
|
||||
elog (FATAL,"AttachSpinlocks: SPIN_KEY belongs to someone else");
|
||||
}
|
||||
/* cannot create spinlocks */
|
||||
elog(FATAL,"AttachSpinlocks: cannot create spin locks");
|
||||
return(FALSE);
|
||||
}
|
||||
SpinLockId = id;
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* InitSpinLocks -- Spinlock bootstrapping
|
||||
*
|
||||
* We need several spinlocks for bootstrapping:
|
||||
* BindingLock (for the shmem binding table) and
|
||||
* ShmemLock (for the shmem allocator), BufMgrLock (for buffer
|
||||
* pool exclusive access), LockMgrLock (for the lock table), and
|
||||
* ProcStructLock (a spin lock for the shared process structure).
|
||||
* If there's a Sony WORM drive attached, we also have a spinlock
|
||||
* (SJCacheLock) for it. Same story for the main memory storage mgr.
|
||||
*
|
||||
*/
|
||||
bool
|
||||
InitSpinLocks(int init, IPCKey key)
|
||||
{
|
||||
extern SPINLOCK ShmemLock;
|
||||
extern SPINLOCK BindingLock;
|
||||
extern SPINLOCK BufMgrLock;
|
||||
extern SPINLOCK LockMgrLock;
|
||||
extern SPINLOCK ProcStructLock;
|
||||
extern SPINLOCK SInvalLock;
|
||||
extern SPINLOCK OidGenLockId;
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
extern SPINLOCK MMCacheLock;
|
||||
#endif /* MAIN_MEMORY */
|
||||
|
||||
if (!init || key != IPC_PRIVATE) {
|
||||
/* if bootstrap and key is IPC_PRIVATE, it means that we are running
|
||||
* backend by itself. no need to attach spinlocks
|
||||
*/
|
||||
if (! AttachSpinLocks(key)) {
|
||||
elog(FATAL,"InitSpinLocks: couldnt attach spin locks");
|
||||
return(FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
/* These five (or six) spinlocks have fixed location is shmem */
|
||||
ShmemLock = (SPINLOCK) SHMEMLOCKID;
|
||||
BindingLock = (SPINLOCK) BINDINGLOCKID;
|
||||
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
|
||||
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
|
||||
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
|
||||
SInvalLock = (SPINLOCK) SINVALLOCKID;
|
||||
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
|
||||
#endif /* MAIN_MEMORY */
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
20
src/backend/storage/item.h
Normal file
20
src/backend/storage/item.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* item.h--
|
||||
* POSTGRES disk item definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: item.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEM_H
|
||||
#define ITEM_H
|
||||
|
||||
#include "c.h"
|
||||
|
||||
typedef Pointer Item;
|
||||
|
||||
#endif /* ITEM_H */
|
||||
75
src/backend/storage/itemid.h
Normal file
75
src/backend/storage/itemid.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* itemid.h--
|
||||
* Standard POSTGRES buffer page item identifier definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: itemid.h,v 1.1.1.1 1996/07/09 06:21:52 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEMID_H
|
||||
#define ITEMID_H
|
||||
|
||||
typedef uint16 ItemOffset;
|
||||
typedef uint16 ItemLength;
|
||||
|
||||
typedef bits16 ItemIdFlags;
|
||||
|
||||
|
||||
|
||||
typedef struct ItemIdData { /* line pointers */
|
||||
unsigned lp_off:13, /* offset to find tup */
|
||||
/* can be reduced by 2 if necc. */
|
||||
lp_flags:6, /* flags on tuple */
|
||||
lp_len:13; /* length of tuple */
|
||||
} ItemIdData;
|
||||
|
||||
typedef struct ItemIdData *ItemId;
|
||||
|
||||
#ifndef LP_USED
|
||||
#define LP_USED 0x01 /* this line pointer is being used */
|
||||
#endif
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
/*
|
||||
* ItemIdGetLength
|
||||
*/
|
||||
#define ItemIdGetLength(itemId) \
|
||||
((itemId)->lp_len)
|
||||
|
||||
/*
|
||||
* ItemIdGetOffset
|
||||
*/
|
||||
#define ItemIdGetOffset(itemId) \
|
||||
((itemId)->lp_off)
|
||||
|
||||
/*
|
||||
* ItemIdGetFlags
|
||||
*/
|
||||
#define ItemIdGetFlags(itemId) \
|
||||
((itemId)->lp_flags)
|
||||
|
||||
/*
|
||||
* ItemIdIsValid --
|
||||
* True iff disk item identifier is valid.
|
||||
*/
|
||||
#define ItemIdIsValid(itemId) PointerIsValid(itemId)
|
||||
|
||||
/*
|
||||
* ItemIdIsUsed --
|
||||
* True iff disk item identifier is in use.
|
||||
*
|
||||
* Note:
|
||||
* Assumes disk item identifier is valid.
|
||||
*/
|
||||
#define ItemIdIsUsed(itemId) \
|
||||
(AssertMacro(ItemIdIsValid(itemId)) ? \
|
||||
(bool) (((itemId)->lp_flags & LP_USED) != 0) : false)
|
||||
|
||||
#endif /* ITEMID_H */
|
||||
44
src/backend/storage/itempos.h
Normal file
44
src/backend/storage/itempos.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* itempos.h--
|
||||
* Standard POSTGRES buffer page long item subposition definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: itempos.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEMPOS_H
|
||||
#define ITEMPOS_H
|
||||
|
||||
#include "c.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/itemid.h"
|
||||
|
||||
typedef struct ItemSubpositionData {
|
||||
Buffer op_db;
|
||||
ItemId op_lpp;
|
||||
char *op_cp; /* XXX */
|
||||
uint32 op_len;
|
||||
} ItemSubpositionData;
|
||||
|
||||
typedef ItemSubpositionData *ItemSubposition;
|
||||
|
||||
/*
|
||||
* PNOBREAK(OBJP, LEN)
|
||||
* struct objpos *OBJP;
|
||||
* unsigned LEN;
|
||||
*/
|
||||
#define PNOBREAK(OBJP, LEN) ((OBJP)->op_len >= LEN)
|
||||
|
||||
/*
|
||||
* PSKIP(OBJP, LEN)
|
||||
* struct objpos *OBJP;
|
||||
* unsigned LEN;
|
||||
*/
|
||||
#define PSKIP(OBJP, LEN)\
|
||||
{ (OBJP)->op_cp += (LEN); (OBJP)->op_len -= (LEN); }
|
||||
|
||||
#endif /* ITEMPOS_H */
|
||||
115
src/backend/storage/itemptr.h
Normal file
115
src/backend/storage/itemptr.h
Normal file
@@ -0,0 +1,115 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* itemptr.h--
|
||||
* POSTGRES disk item pointer definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: itemptr.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef ITEMPTR_H
|
||||
#define ITEMPTR_H
|
||||
|
||||
#include "c.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/off.h"
|
||||
#include "storage/itemid.h"
|
||||
|
||||
/*
|
||||
* ItemPointer:
|
||||
*
|
||||
* this is a pointer to an item on another disk page in the same file.
|
||||
* blkid tells us which block, posid tells us which entry in the linp
|
||||
* (ItemIdData) array we want.
|
||||
*/
|
||||
typedef struct ItemPointerData {
|
||||
BlockIdData ip_blkid;
|
||||
OffsetNumber ip_posid;
|
||||
} ItemPointerData;
|
||||
|
||||
typedef ItemPointerData *ItemPointer;
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* ItemPointerIsValid --
|
||||
* True iff the disk item pointer is not NULL.
|
||||
*/
|
||||
#define ItemPointerIsValid(pointer) \
|
||||
((bool) (PointerIsValid(pointer) && ((pointer)->ip_posid != 0)))
|
||||
|
||||
/*
|
||||
* ItemPointerGetBlockNumber --
|
||||
* Returns the block number of a disk item pointer.
|
||||
*/
|
||||
#define ItemPointerGetBlockNumber(pointer) \
|
||||
(AssertMacro(ItemPointerIsValid(pointer)) ? \
|
||||
BlockIdGetBlockNumber(&(pointer)->ip_blkid) : (BlockNumber) 0)
|
||||
|
||||
/*
|
||||
* ItemPointerGetOffsetNumber --
|
||||
* Returns the offset number of a disk item pointer.
|
||||
*/
|
||||
#define ItemPointerGetOffsetNumber(pointer) \
|
||||
(AssertMacro(ItemPointerIsValid(pointer)) ? \
|
||||
(pointer)->ip_posid : \
|
||||
InvalidOffsetNumber)
|
||||
|
||||
/*
|
||||
* ItemPointerSet --
|
||||
* Sets a disk item pointer to the specified block and offset.
|
||||
*/
|
||||
#define ItemPointerSet(pointer, blockNumber, offNum) \
|
||||
Assert(PointerIsValid(pointer)); \
|
||||
BlockIdSet(&((pointer)->ip_blkid), blockNumber); \
|
||||
(pointer)->ip_posid = offNum
|
||||
|
||||
/*
|
||||
* ItemPointerSetBlockNumber --
|
||||
* Sets a disk item pointer to the specified block.
|
||||
*/
|
||||
#define ItemPointerSetBlockNumber(pointer, blockNumber) \
|
||||
Assert(PointerIsValid(pointer)); \
|
||||
BlockIdSet(&((pointer)->ip_blkid), blockNumber)
|
||||
|
||||
/*
|
||||
* ItemPointerSetOffsetNumber --
|
||||
* Sets a disk item pointer to the specified offset.
|
||||
*/
|
||||
#define ItemPointerSetOffsetNumber(pointer, offsetNumber) \
|
||||
AssertMacro(PointerIsValid(pointer)); \
|
||||
(pointer)->ip_posid = (offsetNumber)
|
||||
|
||||
/*
|
||||
* ItemPointerCopy --
|
||||
* Copies the contents of one disk item pointer to another.
|
||||
*/
|
||||
#define ItemPointerCopy(fromPointer, toPointer) \
|
||||
Assert(PointerIsValid(toPointer)); \
|
||||
Assert(PointerIsValid(fromPointer)); \
|
||||
*(toPointer) = *(fromPointer)
|
||||
|
||||
/*
|
||||
* ItemPointerSetInvalid --
|
||||
* Sets a disk item pointer to be invalid.
|
||||
*/
|
||||
#define ItemPointerSetInvalid(pointer) \
|
||||
Assert(PointerIsValid(pointer)); \
|
||||
BlockIdSet(&((pointer)->ip_blkid), InvalidBlockNumber); \
|
||||
(pointer)->ip_posid = InvalidOffsetNumber
|
||||
|
||||
/* ----------------
|
||||
* externs
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
|
||||
|
||||
#endif /* ITEMPTR_H */
|
||||
|
||||
58
src/backend/storage/large_object.h
Normal file
58
src/backend/storage/large_object.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* large_object.h--
|
||||
* file of info for Postgres large objects. POSTGRES 4.2 supports
|
||||
* zillions of large objects (internal, external, jaquith, inversion).
|
||||
* Now we only support inversion.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: large_object.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LARGE_OBJECT_H
|
||||
#define LARGE_OBJECT_H
|
||||
|
||||
#include "c.h"
|
||||
#include "utils/rel.h"
|
||||
#include "access/relscan.h"
|
||||
|
||||
/*
|
||||
* This structure will eventually have lots more stuff associated with it.
|
||||
*/
|
||||
typedef struct LargeObjectDesc
|
||||
{
|
||||
Relation heap_r; /* heap relation */
|
||||
Relation index_r; /* index relation on seqno attribute */
|
||||
IndexScanDesc iscan; /* index scan we're using */
|
||||
TupleDesc hdesc; /* heap relation tuple desc */
|
||||
TupleDesc idesc; /* index relation tuple desc */
|
||||
uint32 lowbyte; /* low byte on the current page */
|
||||
uint32 highbyte; /* high byte on the current page */
|
||||
uint32 offset; /* current seek pointer */
|
||||
ItemPointerData htid; /* tid of current heap tuple */
|
||||
|
||||
#define IFS_RDLOCK (1 << 0)
|
||||
#define IFS_WRLOCK (1 << 1)
|
||||
#define IFS_ATEOF (1 << 2)
|
||||
|
||||
u_long flags; /* locking info, etc */
|
||||
} LargeObjectDesc;
|
||||
|
||||
/*
|
||||
* Function definitions...
|
||||
*/
|
||||
|
||||
/* inversion stuff in inv_api.c */
|
||||
extern LargeObjectDesc *inv_create(int flags);
|
||||
extern LargeObjectDesc *inv_open(Oid lobjId, int flags);
|
||||
extern void inv_close(LargeObjectDesc *obj_desc);
|
||||
extern int inv_destroy(Oid lobjId);
|
||||
extern int inv_stat(LargeObjectDesc *obj_desc, struct pgstat *stbuf);
|
||||
extern int inv_seek(LargeObjectDesc *obj_desc, int offset, int whence);
|
||||
extern int inv_tell(LargeObjectDesc *obj_desc);
|
||||
extern int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes);
|
||||
extern int inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes);
|
||||
|
||||
#endif /* LARGE_OBJECT_H */
|
||||
14
src/backend/storage/large_object/Makefile.inc
Normal file
14
src/backend/storage/large_object/Makefile.inc
Normal file
@@ -0,0 +1,14 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/large_object
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/large_object/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= inv_api.c
|
||||
1165
src/backend/storage/large_object/inv_api.c
Normal file
1165
src/backend/storage/large_object/inv_api.c
Normal file
File diff suppressed because it is too large
Load Diff
84
src/backend/storage/lmgr.h
Normal file
84
src/backend/storage/lmgr.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lmgr.h--
|
||||
* POSTGRES lock manager definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: lmgr.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LMGR_H
|
||||
#define LMGR_H
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/lock.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
/*
|
||||
* This was moved from pladt.h for the new lock manager. Want to obsolete
|
||||
* all of the old code.
|
||||
*/
|
||||
typedef struct LRelId {
|
||||
Oid relId; /* a relation identifier */
|
||||
Oid dbId; /* a database identifier */
|
||||
} LRelId;
|
||||
|
||||
typedef struct LockInfoData {
|
||||
bool initialized;
|
||||
LRelId lRelId;
|
||||
TransactionId transactionIdData;
|
||||
uint16 flags;
|
||||
} LockInfoData;
|
||||
typedef LockInfoData *LockInfo;
|
||||
|
||||
#define LockInfoIsValid(linfo) \
|
||||
((PointerIsValid(linfo)) && ((LockInfo) linfo)->initialized)
|
||||
|
||||
|
||||
extern LRelId RelationGetLRelId(Relation relation);
|
||||
extern Oid LRelIdGetDatabaseId(LRelId lRelId);
|
||||
extern Oid LRelIdGetRelationId(LRelId lRelId);
|
||||
extern bool DatabaseIdIsMyDatabaseId(Oid databaseId);
|
||||
extern bool LRelIdContainsMyDatabaseId(LRelId lRelId);
|
||||
extern void RelationInitLockInfo(Relation relation);
|
||||
extern void RelationDiscardLockInfo(Relation relation);
|
||||
extern void RelationSetLockForDescriptorOpen(Relation relation);
|
||||
extern void RelationSetLockForRead(Relation relation);
|
||||
extern void RelationUnsetLockForRead(Relation relation);
|
||||
extern void RelationSetLockForWrite(Relation relation);
|
||||
extern void RelationUnsetLockForWrite(Relation relation);
|
||||
extern void RelationSetLockForTupleRead(Relation relation,
|
||||
ItemPointer itemPointer);
|
||||
|
||||
/* used in vaccum.c */
|
||||
extern void RelationSetLockForWritePage(Relation relation,
|
||||
ItemPointer itemPointer);
|
||||
|
||||
/* used in nbtpage.c, hashpage.c */
|
||||
extern void RelationSetSingleWLockPage(Relation relation,
|
||||
ItemPointer itemPointer);
|
||||
extern void RelationUnsetSingleWLockPage(Relation relation,
|
||||
ItemPointer itemPointer);
|
||||
extern void RelationSetSingleRLockPage(Relation relation,
|
||||
ItemPointer itemPointer);
|
||||
extern void RelationUnsetSingleRLockPage(Relation relation,
|
||||
ItemPointer itemPointer);
|
||||
extern void RelationSetRIntentLock(Relation relation);
|
||||
extern void RelationUnsetRIntentLock(Relation relation);
|
||||
extern void RelationSetWIntentLock(Relation relation);
|
||||
extern void RelationUnsetWIntentLock(Relation relation);
|
||||
extern void RelationSetLockForExtend(Relation relation);
|
||||
extern void RelationUnsetLockForExtend(Relation relation);
|
||||
extern void LRelIdAssign(LRelId *lRelId, Oid dbId, Oid relId);
|
||||
|
||||
/* single.c */
|
||||
extern bool SingleLockReln(LockInfo linfo, LOCKT lockt, int action);
|
||||
extern bool SingleLockPage(LockInfo linfo, ItemPointer tidPtr,
|
||||
LOCKT lockt, int action);
|
||||
|
||||
#endif /* LMGR_H */
|
||||
14
src/backend/storage/lmgr/Makefile.inc
Normal file
14
src/backend/storage/lmgr/Makefile.inc
Normal file
@@ -0,0 +1,14 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/lmgr
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= lmgr.c lock.c multi.c proc.c single.c
|
||||
93
src/backend/storage/lmgr/README
Normal file
93
src/backend/storage/lmgr/README
Normal file
@@ -0,0 +1,93 @@
|
||||
$Header: /cvsroot/pgsql/src/backend/storage/lmgr/README,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $
|
||||
|
||||
This file is an attempt to save me (and future code maintainers) some
|
||||
time and a lot of headaches. The existing lock manager code at the time
|
||||
of this writing (June 16 1992) can best be described as confusing. The
|
||||
complexity seems inherent in lock manager functionality, but variable
|
||||
names chosen in the current implementation really confuse me everytime
|
||||
I have to track down a bug. Also, what gets done where and by whom isn't
|
||||
always clear....
|
||||
|
||||
Starting with the data structures the lock manager relies upon...
|
||||
|
||||
(NOTE - these will undoubtedly change over time and it is likely
|
||||
that this file won't always be updated along with the structs.)
|
||||
|
||||
The lock manager's LOCK:
|
||||
|
||||
tag -
|
||||
The key fields that are used for hashing locks in the shared memory
|
||||
lock hash table. This is kept as a separate struct to ensure that we
|
||||
always zero out the correct number of bytes. This is a problem as
|
||||
part of the tag is an itempointer which is 6 bytes and causes 2
|
||||
additional bytes to be added as padding.
|
||||
|
||||
tag.relId -
|
||||
Uniquely identifies the relation that the lock corresponds to.
|
||||
|
||||
tag.dbId -
|
||||
Uniquely identifies the database in which the relation lives. If
|
||||
this is a shared system relation (e.g. pg_user) the dbId should be
|
||||
set to 0.
|
||||
|
||||
tag.tupleId -
|
||||
Uniquely identifies the block/page within the relation and the
|
||||
tuple within the block. If we are setting a table level lock
|
||||
both the blockId and tupleId (in an item pointer this is called
|
||||
the position) are set to invalid, if it is a page level lock the
|
||||
blockId is valid, while the tuleId is still invalid. Finally if
|
||||
this is a tuple level lock (we currently never do this) then both
|
||||
the blockId and tupleId are set to valid specifications. This is
|
||||
how we get the appearance of a multi-level lock table while using
|
||||
only a single table (see Gray's paper on 2 phase locking if
|
||||
you are puzzled about how multi-level lock tables work).
|
||||
|
||||
mask -
|
||||
This field indicates what types of locks are currently held in the
|
||||
given lock. It is used (against the lock table's conflict table)
|
||||
to determine if the new lock request will conflict with existing
|
||||
lock types held. Conficts are determined by bitwise AND operations
|
||||
between the mask and the conflict table entry for the given lock type
|
||||
to be set. The current representation is that each bit (1 through 5)
|
||||
is set when that lock type (WRITE, READ, WRITE INTENT, READ INTENT, EXTEND)
|
||||
has been acquired for the lock.
|
||||
|
||||
waitProcs -
|
||||
This is a shared memory queue of all process structures corresponding to
|
||||
a backend that is waiting (sleeping) until another backend releases this
|
||||
lock. The process structure holds the information needed to determine
|
||||
if it should be woken up when this lock is released. If, for example,
|
||||
we are releasing a read lock and the process is sleeping trying to acquire
|
||||
a read lock then there is no point in waking it since the lock being
|
||||
released isn't what caused it to sleep in the first place. There will
|
||||
be more on this below (when I get to releasing locks and waking sleeping
|
||||
process routines).
|
||||
|
||||
nHolding -
|
||||
Keeps a count of how many times this lock has been attempted to be
|
||||
acquired. The count includes attempts by processes which were put
|
||||
to sleep due to conflicts. It also counts the same backend twice
|
||||
if, for example, a backend process first acquires a read and then
|
||||
acquires a write.
|
||||
|
||||
holders -
|
||||
Keeps a count of how many locks of each type have been attempted. Only
|
||||
elements 1 through MAX_LOCK_TYPES are used as they correspond to the lock
|
||||
type defined constants (WRITE through EXTEND). Summing the values of
|
||||
holders should come out equal to nHolding.
|
||||
|
||||
nActive -
|
||||
Keeps a count of how many times this lock has been succesfully acquired.
|
||||
This count does not include attempts that were rejected due to conflicts,
|
||||
but can count the same backend twice (e.g. a read then a write -- since
|
||||
its the same transaction this won't cause a conflict)
|
||||
|
||||
activeHolders -
|
||||
Keeps a count of how locks of each type are currently held. Once again
|
||||
only elements 1 through MAX_LOCK_TYPES are used (0 is not). Also, like
|
||||
holders, summing the values of activeHolders should total to the value
|
||||
of nActive.
|
||||
|
||||
|
||||
This is all I had the stomach for right now..... I will get back to this
|
||||
someday. -mer 17 June 1992 12:00 am
|
||||
933
src/backend/storage/lmgr/lmgr.c
Normal file
933
src/backend/storage/lmgr/lmgr.c
Normal file
@@ -0,0 +1,933 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lmgr.c--
|
||||
* POSTGRES lock manager code
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lmgr.c,v 1.1.1.1 1996/07/09 06:21:56 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/* #define LOCKDEBUGALL 1 */
|
||||
/* #define LOCKDEBUG 1 */
|
||||
|
||||
#ifdef LOCKDEBUGALL
|
||||
#define LOCKDEBUG 1
|
||||
#endif /* LOCKDEBUGALL */
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/htup.h"
|
||||
#include "access/relscan.h"
|
||||
#include "access/skey.h"
|
||||
#include "utils/tqual.h"
|
||||
#include "access/xact.h"
|
||||
|
||||
#include "storage/block.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "storage/multilev.h"
|
||||
#include "storage/lmgr.h"
|
||||
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
#include "catalog/catname.h"
|
||||
#include "catalog/catalog.h"
|
||||
#include "catalog/pg_class.h"
|
||||
|
||||
#include "nodes/memnodes.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "access/transam.h" /* for AmiTransactionId */
|
||||
|
||||
/* ----------------
|
||||
*
|
||||
* ----------------
|
||||
*/
|
||||
#define MaxRetries 4 /* XXX about 1/4 minute--a hack */
|
||||
|
||||
#define IntentReadRelationLock 0x0100
|
||||
#define ReadRelationLock 0x0200
|
||||
#define IntentWriteRelationLock 0x0400
|
||||
#define WriteRelationLock 0x0800
|
||||
#define IntentReadPageLock 0x1000
|
||||
#define ReadTupleLock 0x2000
|
||||
|
||||
#define TupleLevelLockCountMask 0x000f
|
||||
|
||||
#define TupleLevelLockLimit 10
|
||||
|
||||
extern Oid MyDatabaseId;
|
||||
|
||||
static LRelId VariableRelationLRelId = {
|
||||
RelOid_pg_variable,
|
||||
InvalidOid
|
||||
};
|
||||
|
||||
/* ----------------
|
||||
* RelationGetLRelId
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_10 \
|
||||
elog(NOTICE, "RelationGetLRelId(%s) invalid lockInfo", \
|
||||
RelationGetRelationName(relation));
|
||||
#else
|
||||
#define LOCKDEBUG_10
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/*
|
||||
* RelationGetLRelId --
|
||||
* Returns "lock" relation identifier for a relation.
|
||||
*/
|
||||
LRelId
|
||||
RelationGetLRelId(Relation relation)
|
||||
{
|
||||
LockInfo linfo;
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
|
||||
/* ----------------
|
||||
* initialize lock info if necessary
|
||||
* ----------------
|
||||
*/
|
||||
if (! LockInfoIsValid(linfo)) {
|
||||
LOCKDEBUG_10;
|
||||
RelationInitLockInfo(relation);
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* XXX hack to prevent problems during
|
||||
* VARIABLE relation initialization
|
||||
* ----------------
|
||||
*/
|
||||
if (strcmp(RelationGetRelationName(relation)->data,
|
||||
VariableRelationName) == 0) {
|
||||
return (VariableRelationLRelId);
|
||||
}
|
||||
|
||||
return (linfo->lRelId);
|
||||
}
|
||||
|
||||
/*
|
||||
* LRelIdGetDatabaseId --
|
||||
* Returns database identifier for a "lock" relation identifier.
|
||||
*/
|
||||
/* ----------------
|
||||
* LRelIdGetDatabaseId
|
||||
*
|
||||
* Note: The argument may not be correct, if it is not used soon
|
||||
* after it is created.
|
||||
* ----------------
|
||||
*/
|
||||
Oid
|
||||
LRelIdGetDatabaseId(LRelId lRelId)
|
||||
{
|
||||
return (lRelId.dbId);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* LRelIdGetRelationId --
|
||||
* Returns relation identifier for a "lock" relation identifier.
|
||||
*/
|
||||
Oid
|
||||
LRelIdGetRelationId(LRelId lRelId)
|
||||
{
|
||||
return (lRelId.relId);
|
||||
}
|
||||
|
||||
/*
|
||||
* DatabaseIdIsMyDatabaseId --
|
||||
* True iff database object identifier is valid in my present database.
|
||||
*/
|
||||
bool
|
||||
DatabaseIdIsMyDatabaseId(Oid databaseId)
|
||||
{
|
||||
return (bool)
|
||||
(!OidIsValid(databaseId) || databaseId == MyDatabaseId);
|
||||
}
|
||||
|
||||
/*
|
||||
* LRelIdContainsMyDatabaseId --
|
||||
* True iff "lock" relation identifier is valid in my present database.
|
||||
*/
|
||||
bool
|
||||
LRelIdContainsMyDatabaseId(LRelId lRelId)
|
||||
{
|
||||
return (bool)
|
||||
(!OidIsValid(lRelId.dbId) || lRelId.dbId == MyDatabaseId);
|
||||
}
|
||||
|
||||
/*
|
||||
* RelationInitLockInfo --
|
||||
* Initializes the lock information in a relation descriptor.
|
||||
*/
|
||||
/* ----------------
|
||||
* RelationInitLockInfo
|
||||
*
|
||||
* XXX processingVariable is a hack to prevent problems during
|
||||
* VARIABLE relation initialization.
|
||||
* ----------------
|
||||
*/
|
||||
void
|
||||
RelationInitLockInfo(Relation relation)
|
||||
{
|
||||
LockInfo info;
|
||||
char *relname;
|
||||
Oid relationid;
|
||||
bool processingVariable;
|
||||
extern Oid MyDatabaseId; /* XXX use include */
|
||||
extern GlobalMemory CacheCxt;
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
Assert(OidIsValid(RelationGetRelationId(relation)));
|
||||
|
||||
/* ----------------
|
||||
* get information from relation descriptor
|
||||
* ----------------
|
||||
*/
|
||||
info = (LockInfo) relation->lockInfo;
|
||||
relname = (char *) RelationGetRelationName(relation);
|
||||
relationid = RelationGetRelationId(relation);
|
||||
processingVariable = (strcmp(relname, VariableRelationName) == 0);
|
||||
|
||||
/* ----------------
|
||||
* create a new lockinfo if not already done
|
||||
* ----------------
|
||||
*/
|
||||
if (! PointerIsValid(info))
|
||||
{
|
||||
MemoryContext oldcxt;
|
||||
|
||||
oldcxt = MemoryContextSwitchTo((MemoryContext)CacheCxt);
|
||||
info = (LockInfo)palloc(sizeof(LockInfoData));
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
}
|
||||
else if (processingVariable) {
|
||||
if (IsTransactionState()) {
|
||||
TransactionIdStore(GetCurrentTransactionId(),
|
||||
&info->transactionIdData);
|
||||
}
|
||||
info->flags = 0x0;
|
||||
return; /* prevent an infinite loop--still true? */
|
||||
}
|
||||
else if (info->initialized)
|
||||
{
|
||||
/* ------------
|
||||
* If we've already initialized we're done.
|
||||
* ------------
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* initialize lockinfo.dbId and .relId appropriately
|
||||
* ----------------
|
||||
*/
|
||||
if (IsSharedSystemRelationName(relname))
|
||||
LRelIdAssign(&info->lRelId, InvalidOid, relationid);
|
||||
else
|
||||
LRelIdAssign(&info->lRelId, MyDatabaseId, relationid);
|
||||
|
||||
/* ----------------
|
||||
* store the transaction id in the lockInfo field
|
||||
* ----------------
|
||||
*/
|
||||
if (processingVariable)
|
||||
TransactionIdStore(AmiTransactionId,
|
||||
&info->transactionIdData);
|
||||
else if (IsTransactionState())
|
||||
TransactionIdStore(GetCurrentTransactionId(),
|
||||
&info->transactionIdData);
|
||||
else
|
||||
StoreInvalidTransactionId(&(info->transactionIdData));
|
||||
|
||||
/* ----------------
|
||||
* initialize rest of lockinfo
|
||||
* ----------------
|
||||
*/
|
||||
info->flags = 0x0;
|
||||
info->initialized = (bool)true;
|
||||
relation->lockInfo = (Pointer) info;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationDiscardLockInfo
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_20 \
|
||||
elog(DEBUG, "DiscardLockInfo: NULL relation->lockInfo")
|
||||
#else
|
||||
#define LOCKDEBUG_20
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/*
|
||||
* RelationDiscardLockInfo --
|
||||
* Discards the lock information in a relation descriptor.
|
||||
*/
|
||||
void
|
||||
RelationDiscardLockInfo(Relation relation)
|
||||
{
|
||||
if (! LockInfoIsValid(relation->lockInfo)) {
|
||||
LOCKDEBUG_20;
|
||||
return;
|
||||
}
|
||||
|
||||
pfree(relation->lockInfo);
|
||||
relation->lockInfo = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* RelationSetLockForDescriptorOpen --
|
||||
* Sets read locks for a relation descriptor.
|
||||
*/
|
||||
#ifdef LOCKDEBUGALL
|
||||
#define LOCKDEBUGALL_30 \
|
||||
elog(DEBUG, "RelationSetLockForDescriptorOpen(%s[%d,%d]) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId)
|
||||
#else
|
||||
#define LOCKDEBUGALL_30
|
||||
#endif /* LOCKDEBUGALL*/
|
||||
|
||||
void
|
||||
RelationSetLockForDescriptorOpen(Relation relation)
|
||||
{
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
LOCKDEBUGALL_30;
|
||||
|
||||
/* ----------------
|
||||
* read lock catalog tuples which compose the relation descriptor
|
||||
* XXX race condition? XXX For now, do nothing.
|
||||
* ----------------
|
||||
*/
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationSetLockForRead
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_40 \
|
||||
elog(DEBUG, "RelationSetLockForRead(%s[%d,%d]) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId)
|
||||
#else
|
||||
#define LOCKDEBUG_40
|
||||
#endif /* LOCKDEBUG*/
|
||||
|
||||
/*
|
||||
* RelationSetLockForRead --
|
||||
* Sets relation level read lock.
|
||||
*/
|
||||
void
|
||||
RelationSetLockForRead(Relation relation)
|
||||
{
|
||||
LockInfo linfo;
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
LOCKDEBUG_40;
|
||||
|
||||
/* ----------------
|
||||
* If we don't have lock info on the reln just go ahead and
|
||||
* lock it without trying to short circuit the lock manager.
|
||||
* ----------------
|
||||
*/
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
{
|
||||
RelationInitLockInfo(relation);
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
linfo->flags |= ReadRelationLock;
|
||||
MultiLockReln(linfo, READ_LOCK);
|
||||
return;
|
||||
}
|
||||
else
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
|
||||
MultiLockReln(linfo, READ_LOCK);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationUnsetLockForRead
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_50 \
|
||||
elog(DEBUG, "RelationUnsetLockForRead(%s[%d,%d]) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId)
|
||||
#else
|
||||
#define LOCKDEBUG_50
|
||||
#endif /* LOCKDEBUG*/
|
||||
|
||||
/*
|
||||
* RelationUnsetLockForRead --
|
||||
* Unsets relation level read lock.
|
||||
*/
|
||||
void
|
||||
RelationUnsetLockForRead(Relation relation)
|
||||
{
|
||||
LockInfo linfo;
|
||||
|
||||
/* ----------------
|
||||
* sanity check
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
|
||||
/* ----------------
|
||||
* If we don't have lock info on the reln just go ahead and
|
||||
* release it.
|
||||
* ----------------
|
||||
*/
|
||||
if (!LockInfoIsValid(linfo))
|
||||
{
|
||||
elog(WARN,
|
||||
"Releasing a lock on %s with invalid lock information",
|
||||
RelationGetRelationName(relation));
|
||||
}
|
||||
|
||||
MultiReleaseReln(linfo, READ_LOCK);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationSetLockForWrite(relation)
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_60 \
|
||||
elog(DEBUG, "RelationSetLockForWrite(%s[%d,%d]) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId)
|
||||
#else
|
||||
#define LOCKDEBUG_60
|
||||
#endif /* LOCKDEBUG*/
|
||||
|
||||
/*
|
||||
* RelationSetLockForWrite --
|
||||
* Sets relation level write lock.
|
||||
*/
|
||||
void
|
||||
RelationSetLockForWrite(Relation relation)
|
||||
{
|
||||
LockInfo linfo;
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
LOCKDEBUG_60;
|
||||
|
||||
/* ----------------
|
||||
* If we don't have lock info on the reln just go ahead and
|
||||
* lock it without trying to short circuit the lock manager.
|
||||
* ----------------
|
||||
*/
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
{
|
||||
RelationInitLockInfo(relation);
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
linfo->flags |= WriteRelationLock;
|
||||
MultiLockReln(linfo, WRITE_LOCK);
|
||||
return;
|
||||
}
|
||||
else
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
|
||||
MultiLockReln(linfo, WRITE_LOCK);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationUnsetLockForWrite
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_70 \
|
||||
elog(DEBUG, "RelationUnsetLockForWrite(%s[%d,%d]) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId);
|
||||
#else
|
||||
#define LOCKDEBUG_70
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/*
|
||||
* RelationUnsetLockForWrite --
|
||||
* Unsets relation level write lock.
|
||||
*/
|
||||
void
|
||||
RelationUnsetLockForWrite(Relation relation)
|
||||
{
|
||||
LockInfo linfo;
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
|
||||
if (!LockInfoIsValid(linfo))
|
||||
{
|
||||
elog(WARN,
|
||||
"Releasing a lock on %s with invalid lock information",
|
||||
RelationGetRelationName(relation));
|
||||
}
|
||||
|
||||
MultiReleaseReln(linfo, WRITE_LOCK);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationSetLockForTupleRead
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_80 \
|
||||
elog(DEBUG, "RelationSetLockForTupleRead(%s[%d,%d], 0x%x) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, \
|
||||
itemPointer)
|
||||
#define LOCKDEBUG_81 \
|
||||
elog(DEBUG, "RelationSetLockForTupleRead() escalating");
|
||||
#else
|
||||
#define LOCKDEBUG_80
|
||||
#define LOCKDEBUG_81
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/*
|
||||
* RelationSetLockForTupleRead --
|
||||
* Sets tuple level read lock.
|
||||
*/
|
||||
void
|
||||
RelationSetLockForTupleRead(Relation relation, ItemPointer itemPointer)
|
||||
{
|
||||
LockInfo linfo;
|
||||
TransactionId curXact;
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
LOCKDEBUG_80;
|
||||
|
||||
/* ---------------------
|
||||
* If our lock info is invalid don't bother trying to short circuit
|
||||
* the lock manager.
|
||||
* ---------------------
|
||||
*/
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
{
|
||||
RelationInitLockInfo(relation);
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
linfo->flags |=
|
||||
IntentReadRelationLock |
|
||||
IntentReadPageLock |
|
||||
ReadTupleLock;
|
||||
MultiLockTuple(linfo, itemPointer, READ_LOCK);
|
||||
return;
|
||||
}
|
||||
else
|
||||
linfo = (LockInfo) relation->lockInfo;
|
||||
|
||||
/* ----------------
|
||||
* no need to set a lower granularity lock
|
||||
* ----------------
|
||||
*/
|
||||
curXact = GetCurrentTransactionId();
|
||||
if ((linfo->flags & ReadRelationLock) &&
|
||||
TransactionIdEquals(curXact, linfo->transactionIdData))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* If we don't already have a tuple lock this transaction
|
||||
* ----------------
|
||||
*/
|
||||
if (!( (linfo->flags & ReadTupleLock) &&
|
||||
TransactionIdEquals(curXact, linfo->transactionIdData) )) {
|
||||
|
||||
linfo->flags |=
|
||||
IntentReadRelationLock |
|
||||
IntentReadPageLock |
|
||||
ReadTupleLock;
|
||||
|
||||
/* clear count */
|
||||
linfo->flags &= ~TupleLevelLockCountMask;
|
||||
|
||||
} else {
|
||||
if (TupleLevelLockLimit == (TupleLevelLockCountMask &
|
||||
linfo->flags)) {
|
||||
LOCKDEBUG_81;
|
||||
|
||||
/* escalate */
|
||||
MultiLockReln(linfo, READ_LOCK);
|
||||
|
||||
/* clear count */
|
||||
linfo->flags &= ~TupleLevelLockCountMask;
|
||||
return;
|
||||
}
|
||||
|
||||
/* increment count */
|
||||
linfo->flags =
|
||||
(linfo->flags & ~TupleLevelLockCountMask) |
|
||||
(1 + (TupleLevelLockCountMask & linfo->flags));
|
||||
}
|
||||
|
||||
TransactionIdStore(curXact, &linfo->transactionIdData);
|
||||
|
||||
/* ----------------
|
||||
* Lock the tuple.
|
||||
* ----------------
|
||||
*/
|
||||
MultiLockTuple(linfo, itemPointer, READ_LOCK);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationSetLockForReadPage
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_90 \
|
||||
elog(DEBUG, "RelationSetLockForReadPage(%s[%d,%d], @%d) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page);
|
||||
#else
|
||||
#define LOCKDEBUG_90
|
||||
#endif /* LOCKDEBUG*/
|
||||
|
||||
/* ----------------
|
||||
* RelationSetLockForWritePage
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_100 \
|
||||
elog(DEBUG, "RelationSetLockForWritePage(%s[%d,%d], @%d) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page);
|
||||
#else
|
||||
#define LOCKDEBUG_100
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/*
|
||||
* RelationSetLockForWritePage --
|
||||
* Sets write lock on a page.
|
||||
*/
|
||||
void
|
||||
RelationSetLockForWritePage(Relation relation,
|
||||
ItemPointer itemPointer)
|
||||
{
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
/* ---------------
|
||||
* Make sure linfo is initialized
|
||||
* ---------------
|
||||
*/
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
/* ----------------
|
||||
* attempt to set lock
|
||||
* ----------------
|
||||
*/
|
||||
MultiLockPage((LockInfo) relation->lockInfo, itemPointer, WRITE_LOCK);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* RelationUnsetLockForReadPage
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_110 \
|
||||
elog(DEBUG, "RelationUnsetLockForReadPage(%s[%d,%d], @%d) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page)
|
||||
#else
|
||||
#define LOCKDEBUG_110
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/* ----------------
|
||||
* RelationUnsetLockForWritePage
|
||||
* ----------------
|
||||
*/
|
||||
#ifdef LOCKDEBUG
|
||||
#define LOCKDEBUG_120 \
|
||||
elog(DEBUG, "RelationUnsetLockForWritePage(%s[%d,%d], @%d) called", \
|
||||
RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page)
|
||||
#else
|
||||
#define LOCKDEBUG_120
|
||||
#endif /* LOCKDEBUG */
|
||||
|
||||
/*
|
||||
* Set a single level write page lock. Assumes that you already
|
||||
* have a write intent lock on the relation.
|
||||
*/
|
||||
void
|
||||
RelationSetSingleWLockPage(Relation relation,
|
||||
ItemPointer itemPointer)
|
||||
{
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
SingleLockPage((LockInfo)relation->lockInfo, itemPointer, WRITE_LOCK, !UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unset a single level write page lock
|
||||
*/
|
||||
void
|
||||
RelationUnsetSingleWLockPage(Relation relation,
|
||||
ItemPointer itemPointer)
|
||||
{
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
elog(WARN,
|
||||
"Releasing a lock on %s with invalid lock information",
|
||||
RelationGetRelationName(relation));
|
||||
|
||||
SingleLockPage((LockInfo)relation->lockInfo, itemPointer, WRITE_LOCK, UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a single level read page lock. Assumes you already have a read
|
||||
* intent lock set on the relation.
|
||||
*/
|
||||
void
|
||||
RelationSetSingleRLockPage(Relation relation,
|
||||
ItemPointer itemPointer)
|
||||
{
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
SingleLockPage((LockInfo)relation->lockInfo, itemPointer, READ_LOCK, !UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unset a single level read page lock.
|
||||
*/
|
||||
void
|
||||
RelationUnsetSingleRLockPage(Relation relation,
|
||||
ItemPointer itemPointer)
|
||||
{
|
||||
|
||||
/* ----------------
|
||||
* sanity checks
|
||||
* ----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
elog(WARN,
|
||||
"Releasing a lock on %s with invalid lock information",
|
||||
RelationGetRelationName(relation));
|
||||
|
||||
SingleLockPage((LockInfo)relation->lockInfo, itemPointer, READ_LOCK, UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a read intent lock on a relation.
|
||||
*
|
||||
* Usually these are set in a multi-level table when you acquiring a
|
||||
* page level lock. i.e. To acquire a lock on a page you first acquire
|
||||
* an intent lock on the entire relation. Acquiring an intent lock along
|
||||
* allows one to use the single level locking routines later. Good for
|
||||
* index scans that do a lot of page level locking.
|
||||
*/
|
||||
void
|
||||
RelationSetRIntentLock(Relation relation)
|
||||
{
|
||||
/* -----------------
|
||||
* Sanity check
|
||||
* -----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
SingleLockReln((LockInfo)relation->lockInfo, READ_LOCK+INTENT, !UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unset a read intent lock on a relation
|
||||
*/
|
||||
void
|
||||
RelationUnsetRIntentLock(Relation relation)
|
||||
{
|
||||
/* -----------------
|
||||
* Sanity check
|
||||
* -----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
SingleLockReln((LockInfo)relation->lockInfo, READ_LOCK+INTENT, UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a write intent lock on a relation. For a more complete explanation
|
||||
* see RelationSetRIntentLock()
|
||||
*/
|
||||
void
|
||||
RelationSetWIntentLock(Relation relation)
|
||||
{
|
||||
/* -----------------
|
||||
* Sanity check
|
||||
* -----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
SingleLockReln((LockInfo)relation->lockInfo, WRITE_LOCK+INTENT, !UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unset a write intent lock.
|
||||
*/
|
||||
void
|
||||
RelationUnsetWIntentLock(Relation relation)
|
||||
{
|
||||
/* -----------------
|
||||
* Sanity check
|
||||
* -----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
SingleLockReln((LockInfo)relation->lockInfo, WRITE_LOCK+INTENT, UNLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extend locks are used primarily in tertiary storage devices such as
|
||||
* a WORM disk jukebox. Sometimes need exclusive access to extend a
|
||||
* file by a block.
|
||||
*/
|
||||
void
|
||||
RelationSetLockForExtend(Relation relation)
|
||||
{
|
||||
/* -----------------
|
||||
* Sanity check
|
||||
* -----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
MultiLockReln((LockInfo) relation->lockInfo, EXTEND_LOCK);
|
||||
}
|
||||
|
||||
void
|
||||
RelationUnsetLockForExtend(Relation relation)
|
||||
{
|
||||
/* -----------------
|
||||
* Sanity check
|
||||
* -----------------
|
||||
*/
|
||||
Assert(RelationIsValid(relation));
|
||||
if (LockingDisabled())
|
||||
return;
|
||||
|
||||
if (!LockInfoIsValid(relation->lockInfo))
|
||||
RelationInitLockInfo(relation);
|
||||
|
||||
MultiReleaseReln((LockInfo) relation->lockInfo, EXTEND_LOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create an LRelid --- Why not just pass in a pointer to the storage?
|
||||
*/
|
||||
void
|
||||
LRelIdAssign(LRelId *lRelId, Oid dbId, Oid relId)
|
||||
{
|
||||
lRelId->dbId = dbId;
|
||||
lRelId->relId = relId;
|
||||
}
|
||||
1020
src/backend/storage/lmgr/lock.c
Normal file
1020
src/backend/storage/lmgr/lock.c
Normal file
File diff suppressed because it is too large
Load Diff
415
src/backend/storage/lmgr/multi.c
Normal file
415
src/backend/storage/lmgr/multi.c
Normal file
@@ -0,0 +1,415 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* multi.c--
|
||||
* multi level lock table manager
|
||||
*
|
||||
* Standard multi-level lock manager as per the Gray paper
|
||||
* (at least, that is what it is supposed to be). We implement
|
||||
* three levels -- RELN, PAGE, TUPLE. Tuple is actually TID
|
||||
* a physical record pointer. It isn't an object id.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.1.1.1 1996/07/09 06:21:56 scrappy Exp $
|
||||
*
|
||||
* NOTES:
|
||||
* (1) The lock.c module assumes that the caller here is doing
|
||||
* two phase locking.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "storage/lmgr.h"
|
||||
#include "storage/multilev.h"
|
||||
|
||||
#include "utils/rel.h"
|
||||
#include "utils/elog.h"
|
||||
#include "miscadmin.h" /* MyDatabaseId */
|
||||
|
||||
|
||||
/*
|
||||
* INTENT indicates to higher level that a lower level lock has been
|
||||
* set. For example, a write lock on a tuple conflicts with a write
|
||||
* lock on a relation. This conflict is detected as a WRITE_INTENT/
|
||||
* WRITE conflict between the tuple's intent lock and the relation's
|
||||
* write lock.
|
||||
*/
|
||||
static int MultiConflicts[] = {
|
||||
(int)NULL,
|
||||
/* All reads and writes at any level conflict with a write lock */
|
||||
(1 << WRITE_LOCK)|(1 << WRITE_INTENT)|(1 << READ_LOCK)|(1 << READ_INTENT),
|
||||
/* read locks conflict with write locks at curr and lower levels */
|
||||
(1 << WRITE_LOCK)| (1 << WRITE_INTENT),
|
||||
/* write intent locks */
|
||||
(1 << READ_LOCK) | (1 << WRITE_LOCK),
|
||||
/* read intent locks*/
|
||||
(1 << WRITE_LOCK),
|
||||
/* extend locks for archive storage manager conflict only w/extend locks */
|
||||
(1 << EXTEND_LOCK)
|
||||
};
|
||||
|
||||
/*
|
||||
* write locks have higher priority than read locks and extend locks. May
|
||||
* want to treat INTENT locks differently.
|
||||
*/
|
||||
static int MultiPrios[] = {
|
||||
(int)NULL,
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
1
|
||||
};
|
||||
|
||||
/*
|
||||
* Lock table identifier for this lock table. The multi-level
|
||||
* lock table is ONE lock table, not three.
|
||||
*/
|
||||
LockTableId MultiTableId = (LockTableId)NULL;
|
||||
LockTableId ShortTermTableId = (LockTableId)NULL;
|
||||
|
||||
/*
|
||||
* Create the lock table described by MultiConflicts and Multiprio.
|
||||
*/
|
||||
LockTableId
|
||||
InitMultiLevelLockm()
|
||||
{
|
||||
int tableId;
|
||||
|
||||
/* -----------------------
|
||||
* If we're already initialized just return the table id.
|
||||
* -----------------------
|
||||
*/
|
||||
if (MultiTableId)
|
||||
return MultiTableId;
|
||||
|
||||
tableId = LockTabInit("LockTable", MultiConflicts, MultiPrios, 5);
|
||||
MultiTableId = tableId;
|
||||
if (! (MultiTableId)) {
|
||||
elog(WARN,"InitMultiLockm: couldnt initialize lock table");
|
||||
}
|
||||
/* -----------------------
|
||||
* No short term lock table for now. -Jeff 15 July 1991
|
||||
*
|
||||
* ShortTermTableId = LockTabRename(tableId);
|
||||
* if (! (ShortTermTableId)) {
|
||||
* elog(WARN,"InitMultiLockm: couldnt rename lock table");
|
||||
* }
|
||||
* -----------------------
|
||||
*/
|
||||
return MultiTableId;
|
||||
}
|
||||
|
||||
/*
|
||||
* MultiLockReln -- lock a relation
|
||||
*
|
||||
* Returns: TRUE if the lock can be set, FALSE otherwise.
|
||||
*/
|
||||
bool
|
||||
MultiLockReln(LockInfo linfo, LOCKT lockt)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
*/
|
||||
memset(&tag,0,sizeof(tag));
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
return(MultiAcquire(MultiTableId, &tag, lockt, RELN_LEVEL));
|
||||
}
|
||||
|
||||
/*
|
||||
* MultiLockTuple -- Lock the TID associated with a tuple
|
||||
*
|
||||
* Returns: TRUE if lock is set, FALSE otherwise.
|
||||
*
|
||||
* Side Effects: causes intention level locks to be set
|
||||
* at the page and relation level.
|
||||
*/
|
||||
bool
|
||||
MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
*/
|
||||
memset(&tag,0,sizeof(tag));
|
||||
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
|
||||
/* not locking any valid Tuple, just the page */
|
||||
tag.tupleId = *tidPtr;
|
||||
return(MultiAcquire(MultiTableId, &tag, lockt, TUPLE_LEVEL));
|
||||
}
|
||||
|
||||
/*
|
||||
* same as above at page level
|
||||
*/
|
||||
bool
|
||||
MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
*/
|
||||
memset(&tag,0,sizeof(tag));
|
||||
|
||||
|
||||
/* ----------------------------
|
||||
* Now we want to set the page offset to be invalid
|
||||
* and lock the block. There is some confusion here as to what
|
||||
* a page is. In Postgres a page is an 8k block, however this
|
||||
* block may be partitioned into many subpages which are sometimes
|
||||
* also called pages. The term is overloaded, so don't be fooled
|
||||
* when we say lock the page we mean the 8k block. -Jeff 16 July 1991
|
||||
* ----------------------------
|
||||
*/
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
|
||||
return(MultiAcquire(MultiTableId, &tag, lockt, PAGE_LEVEL));
|
||||
}
|
||||
|
||||
/*
|
||||
* MultiAcquire -- acquire multi level lock at requested level
|
||||
*
|
||||
* Returns: TRUE if lock is set, FALSE if not
|
||||
* Side Effects:
|
||||
*/
|
||||
bool
|
||||
MultiAcquire(LockTableId tableId,
|
||||
LOCKTAG *tag,
|
||||
LOCKT lockt,
|
||||
LOCK_LEVEL level)
|
||||
{
|
||||
LOCKT locks[N_LEVELS];
|
||||
int i,status;
|
||||
LOCKTAG xxTag, *tmpTag = &xxTag;
|
||||
int retStatus = TRUE;
|
||||
|
||||
/*
|
||||
* Three levels implemented. If we set a low level (e.g. Tuple)
|
||||
* lock, we must set INTENT locks on the higher levels. The
|
||||
* intent lock detects conflicts between the low level lock
|
||||
* and an existing high level lock. For example, setting a
|
||||
* write lock on a tuple in a relation is disallowed if there
|
||||
* is an existing read lock on the entire relation. The
|
||||
* write lock would set a WRITE + INTENT lock on the relation
|
||||
* and that lock would conflict with the read.
|
||||
*/
|
||||
switch (level) {
|
||||
case RELN_LEVEL:
|
||||
locks[0] = lockt;
|
||||
locks[1] = NO_LOCK;
|
||||
locks[2] = NO_LOCK;
|
||||
break;
|
||||
case PAGE_LEVEL:
|
||||
locks[0] = lockt + INTENT;
|
||||
locks[1] = lockt;
|
||||
locks[2] = NO_LOCK;
|
||||
break;
|
||||
case TUPLE_LEVEL:
|
||||
locks[0] = lockt + INTENT;
|
||||
locks[1] = lockt + INTENT;
|
||||
locks[2] = lockt;
|
||||
break;
|
||||
default:
|
||||
elog(WARN,"MultiAcquire: bad lock level");
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* construct a new tag as we go. Always loop through all levels,
|
||||
* but if we arent' seting a low level lock, locks[i] is set to
|
||||
* NO_LOCK for the lower levels. Always start from the highest
|
||||
* level and go to the lowest level.
|
||||
*/
|
||||
memset(tmpTag,0,sizeof(*tmpTag));
|
||||
tmpTag->relId = tag->relId;
|
||||
tmpTag->dbId = tag->dbId;
|
||||
|
||||
for (i=0;i<N_LEVELS;i++) {
|
||||
if (locks[i] != NO_LOCK) {
|
||||
switch (i) {
|
||||
case RELN_LEVEL:
|
||||
/* -------------
|
||||
* Set the block # and offset to invalid
|
||||
* -------------
|
||||
*/
|
||||
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
|
||||
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
||||
break;
|
||||
case PAGE_LEVEL:
|
||||
/* -------------
|
||||
* Copy the block #, set the offset to invalid
|
||||
* -------------
|
||||
*/
|
||||
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
|
||||
&(tag->tupleId.ip_blkid));
|
||||
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
||||
break;
|
||||
case TUPLE_LEVEL:
|
||||
/* --------------
|
||||
* Copy the entire tuple id.
|
||||
* --------------
|
||||
*/
|
||||
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
|
||||
break;
|
||||
}
|
||||
|
||||
status = LockAcquire(tableId, tmpTag, locks[i]);
|
||||
if (! status) {
|
||||
/* failed for some reason. Before returning we have
|
||||
* to release all of the locks we just acquired.
|
||||
* MultiRelease(xx,xx,xx, i) means release starting from
|
||||
* the last level lock we successfully acquired
|
||||
*/
|
||||
retStatus = FALSE;
|
||||
(void) MultiRelease(tableId, tag, lockt, i);
|
||||
/* now leave the loop. Don't try for any more locks */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return(retStatus);
|
||||
}
|
||||
|
||||
/* ------------------
|
||||
* Release a page in the multi-level lock table
|
||||
* ------------------
|
||||
*/
|
||||
bool
|
||||
MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/* ------------------
|
||||
* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
* ------------------
|
||||
*/
|
||||
memset(&tag, 0,sizeof(LOCKTAG));
|
||||
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
|
||||
|
||||
return (MultiRelease(MultiTableId, &tag, lockt, PAGE_LEVEL));
|
||||
}
|
||||
|
||||
/* ------------------
|
||||
* Release a relation in the multi-level lock table
|
||||
* ------------------
|
||||
*/
|
||||
bool
|
||||
MultiReleaseReln(LockInfo linfo, LOCKT lockt)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/* ------------------
|
||||
* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
* ------------------
|
||||
*/
|
||||
memset(&tag, 0, sizeof(LOCKTAG));
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
|
||||
return (MultiRelease(MultiTableId, &tag, lockt, RELN_LEVEL));
|
||||
}
|
||||
|
||||
/*
|
||||
* MultiRelease -- release a multi-level lock
|
||||
*
|
||||
* Returns: TRUE if successful, FALSE otherwise.
|
||||
*/
|
||||
bool
|
||||
MultiRelease(LockTableId tableId,
|
||||
LOCKTAG *tag,
|
||||
LOCKT lockt,
|
||||
LOCK_LEVEL level)
|
||||
{
|
||||
LOCKT locks[N_LEVELS];
|
||||
int i,status;
|
||||
LOCKTAG xxTag, *tmpTag = &xxTag;
|
||||
|
||||
/*
|
||||
* same level scheme as MultiAcquire().
|
||||
*/
|
||||
switch (level) {
|
||||
case RELN_LEVEL:
|
||||
locks[0] = lockt;
|
||||
locks[1] = NO_LOCK;
|
||||
locks[2] = NO_LOCK;
|
||||
break;
|
||||
case PAGE_LEVEL:
|
||||
locks[0] = lockt + INTENT;
|
||||
locks[1] = lockt;
|
||||
locks[2] = NO_LOCK;
|
||||
break;
|
||||
case TUPLE_LEVEL:
|
||||
locks[0] = lockt + INTENT;
|
||||
locks[1] = lockt + INTENT;
|
||||
locks[2] = lockt;
|
||||
break;
|
||||
default:
|
||||
elog(WARN,"MultiRelease: bad lockt");
|
||||
}
|
||||
|
||||
/*
|
||||
* again, construct the tag on the fly. This time, however,
|
||||
* we release the locks in the REVERSE order -- from lowest
|
||||
* level to highest level.
|
||||
*
|
||||
* Must zero out the tag to set padding byes to zero and ensure
|
||||
* hashing consistency.
|
||||
*/
|
||||
memset(tmpTag, 0, sizeof(*tmpTag));
|
||||
tmpTag->relId = tag->relId;
|
||||
tmpTag->dbId = tag->dbId;
|
||||
|
||||
for (i=(N_LEVELS-1); i>=0; i--) {
|
||||
if (locks[i] != NO_LOCK) {
|
||||
switch (i) {
|
||||
case RELN_LEVEL:
|
||||
/* -------------
|
||||
* Set the block # and offset to invalid
|
||||
* -------------
|
||||
*/
|
||||
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
|
||||
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
||||
break;
|
||||
case PAGE_LEVEL:
|
||||
/* -------------
|
||||
* Copy the block #, set the offset to invalid
|
||||
* -------------
|
||||
*/
|
||||
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
|
||||
&(tag->tupleId.ip_blkid));
|
||||
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
||||
break;
|
||||
case TUPLE_LEVEL:
|
||||
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
|
||||
break;
|
||||
}
|
||||
status = LockRelease(tableId, tmpTag, locks[i]);
|
||||
if (! status) {
|
||||
elog(WARN,"MultiRelease: couldn't release after error");
|
||||
}
|
||||
}
|
||||
}
|
||||
/* shouldn't reach here */
|
||||
return false;
|
||||
}
|
||||
826
src/backend/storage/lmgr/proc.c
Normal file
826
src/backend/storage/lmgr/proc.c
Normal file
@@ -0,0 +1,826 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* proc.c--
|
||||
* routines to manage per-process shared memory data structure
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.1.1.1 1996/07/09 06:21:57 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* Each postgres backend gets one of these. We'll use it to
|
||||
* clean up after the process should the process suddenly die.
|
||||
*
|
||||
*
|
||||
* Interface (a):
|
||||
* ProcSleep(), ProcWakeup(), ProcWakeupNext(),
|
||||
* ProcQueueAlloc() -- create a shm queue for sleeping processes
|
||||
* ProcQueueInit() -- create a queue without allocing memory
|
||||
*
|
||||
* Locking and waiting for buffers can cause the backend to be
|
||||
* put to sleep. Whoever releases the lock, etc. wakes the
|
||||
* process up again (and gives it an error code so it knows
|
||||
* whether it was awoken on an error condition).
|
||||
*
|
||||
* Interface (b):
|
||||
*
|
||||
* ProcReleaseLocks -- frees the locks associated with this process,
|
||||
* ProcKill -- destroys the shared memory state (and locks)
|
||||
* associated with the process.
|
||||
*
|
||||
* 5/15/91 -- removed the buffer pool based lock chain in favor
|
||||
* of a shared memory lock chain. The write-protection is
|
||||
* more expensive if the lock chain is in the buffer pool.
|
||||
* The only reason I kept the lock chain in the buffer pool
|
||||
* in the first place was to allow the lock table to grow larger
|
||||
* than available shared memory and that isn't going to work
|
||||
* without a lot of unimplemented support anyway.
|
||||
*
|
||||
* 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
|
||||
* allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
|
||||
* shared among backends (we keep a few sets of semaphores around).
|
||||
* This is so that we can support more backends. (system-wide semaphore
|
||||
* sets run out pretty fast.) -ay 4/95
|
||||
*
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.1.1.1 1996/07/09 06:21:57 scrappy Exp $
|
||||
*/
|
||||
#include <sys/time.h>
|
||||
#ifndef WIN32
|
||||
#include <unistd.h>
|
||||
#endif /* WIN32 */
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include "libpq/pqsignal.h" /* substitute for <signal.h> */
|
||||
|
||||
#if defined(PORTNAME_bsdi)
|
||||
/* hacka, hacka, hacka (XXX) */
|
||||
union semun {
|
||||
int val; /* value for SETVAL */
|
||||
struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */
|
||||
ushort *array; /* array for GETALL & SETALL */
|
||||
};
|
||||
#endif
|
||||
|
||||
#include "access/xact.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/elog.h"
|
||||
|
||||
#include "storage/buf.h"
|
||||
#include "storage/lock.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "storage/proc.h"
|
||||
|
||||
/*
|
||||
* timeout (in seconds) for resolving possible deadlock
|
||||
*/
|
||||
#ifndef DEADLOCK_TIMEOUT
|
||||
#define DEADLOCK_TIMEOUT 60
|
||||
#endif
|
||||
|
||||
/* --------------------
|
||||
* Spin lock for manipulating the shared process data structure:
|
||||
* ProcGlobal.... Adding an extra spin lock seemed like the smallest
|
||||
* hack to get around reading and updating this structure in shared
|
||||
* memory. -mer 17 July 1991
|
||||
* --------------------
|
||||
*/
|
||||
SPINLOCK ProcStructLock;
|
||||
|
||||
/*
|
||||
* For cleanup routines. Don't cleanup if the initialization
|
||||
* has not happened.
|
||||
*/
|
||||
static bool ProcInitialized = FALSE;
|
||||
|
||||
static PROC_HDR *ProcGlobal = NULL;
|
||||
|
||||
PROC *MyProc = NULL;
|
||||
|
||||
static void ProcKill(int exitStatus, int pid);
|
||||
static void ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum);
|
||||
static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum);
|
||||
#if defined(PORTNAME_linux)
|
||||
extern int HandleDeadLock(int);
|
||||
#else
|
||||
extern int HandleDeadLock(void);
|
||||
#endif
|
||||
/*
|
||||
* InitProcGlobal -
|
||||
* initializes the global process table. We put it here so that
|
||||
* the postmaster can do this initialization. (ProcFreeAllSem needs
|
||||
* to read this table on exiting the postmaster. If we have the first
|
||||
* backend do this, starting up and killing the postmaster without
|
||||
* starting any backends will be a problem.)
|
||||
*/
|
||||
void
|
||||
InitProcGlobal(IPCKey key)
|
||||
{
|
||||
bool found = false;
|
||||
|
||||
/* attach to the free list */
|
||||
ProcGlobal = (PROC_HDR *)
|
||||
ShmemInitStruct("Proc Header",(unsigned)sizeof(PROC_HDR),&found);
|
||||
|
||||
/* --------------------
|
||||
* We're the first - initialize.
|
||||
* --------------------
|
||||
*/
|
||||
if (! found)
|
||||
{
|
||||
int i;
|
||||
|
||||
ProcGlobal->numProcs = 0;
|
||||
ProcGlobal->freeProcs = INVALID_OFFSET;
|
||||
ProcGlobal->currKey = IPCGetProcessSemaphoreInitKey(key);
|
||||
for (i=0; i < MAX_PROC_SEMS/PROC_NSEMS_PER_SET; i++)
|
||||
ProcGlobal->freeSemMap[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------
|
||||
* InitProc -- create a per-process data structure for this process
|
||||
* used by the lock manager on semaphore queues.
|
||||
* ------------------------
|
||||
*/
|
||||
void
|
||||
InitProcess(IPCKey key)
|
||||
{
|
||||
bool found = false;
|
||||
int pid;
|
||||
int semstat;
|
||||
unsigned long location, myOffset;
|
||||
|
||||
/* ------------------
|
||||
* Routine called if deadlock timer goes off. See ProcSleep()
|
||||
* ------------------
|
||||
*/
|
||||
#ifndef WIN32
|
||||
signal(SIGALRM, HandleDeadLock);
|
||||
#endif /* WIN32 we'll have to figure out how to handle this later */
|
||||
|
||||
SpinAcquire(ProcStructLock);
|
||||
|
||||
/* attach to the free list */
|
||||
ProcGlobal = (PROC_HDR *)
|
||||
ShmemInitStruct("Proc Header",(unsigned)sizeof(PROC_HDR),&found);
|
||||
if (!found) {
|
||||
/* this should not happen. InitProcGlobal() is called before this. */
|
||||
elog(WARN, "InitProcess: Proc Header uninitialized");
|
||||
}
|
||||
|
||||
if (MyProc != NULL)
|
||||
{
|
||||
SpinRelease(ProcStructLock);
|
||||
elog(WARN,"ProcInit: you already exist");
|
||||
return;
|
||||
}
|
||||
|
||||
/* try to get a proc from the free list first */
|
||||
|
||||
myOffset = ProcGlobal->freeProcs;
|
||||
|
||||
if (myOffset != INVALID_OFFSET)
|
||||
{
|
||||
MyProc = (PROC *) MAKE_PTR(myOffset);
|
||||
ProcGlobal->freeProcs = MyProc->links.next;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* have to allocate one. We can't use the normal binding
|
||||
* table mechanism because the proc structure is stored
|
||||
* by PID instead of by a global name (need to look it
|
||||
* up by PID when we cleanup dead processes).
|
||||
*/
|
||||
|
||||
MyProc = (PROC *) ShmemAlloc((unsigned)sizeof(PROC));
|
||||
if (! MyProc)
|
||||
{
|
||||
SpinRelease(ProcStructLock);
|
||||
elog (FATAL,"cannot create new proc: out of memory");
|
||||
}
|
||||
|
||||
/* this cannot be initialized until after the buffer pool */
|
||||
SHMQueueInit(&(MyProc->lockQueue));
|
||||
MyProc->procId = ProcGlobal->numProcs;
|
||||
ProcGlobal->numProcs++;
|
||||
}
|
||||
|
||||
/*
|
||||
* zero out the spin lock counts and set the sLocks field for
|
||||
* ProcStructLock to 1 as we have acquired this spinlock above but
|
||||
* didn't record it since we didn't have MyProc until now.
|
||||
*/
|
||||
memset(MyProc->sLocks, 0, sizeof(MyProc->sLocks));
|
||||
MyProc->sLocks[ProcStructLock] = 1;
|
||||
|
||||
|
||||
if (IsUnderPostmaster) {
|
||||
IPCKey semKey;
|
||||
int semNum;
|
||||
int semId;
|
||||
union semun semun;
|
||||
|
||||
ProcGetNewSemKeyAndNum(&semKey, &semNum);
|
||||
|
||||
semId = IpcSemaphoreCreate(semKey,
|
||||
PROC_NSEMS_PER_SET,
|
||||
IPCProtection,
|
||||
IpcSemaphoreDefaultStartValue,
|
||||
0,
|
||||
&semstat);
|
||||
/*
|
||||
* we might be reusing a semaphore that belongs to a dead
|
||||
* backend. So be careful and reinitialize its value here.
|
||||
*/
|
||||
semun.val = IpcSemaphoreDefaultStartValue;
|
||||
semctl(semId, semNum, SETVAL, semun);
|
||||
|
||||
IpcSemaphoreLock(semId, semNum, IpcExclusiveLock);
|
||||
MyProc->sem.semId = semId;
|
||||
MyProc->sem.semNum = semNum;
|
||||
MyProc->sem.semKey = semKey;
|
||||
} else {
|
||||
MyProc->sem.semId = -1;
|
||||
}
|
||||
|
||||
/* ----------------------
|
||||
* Release the lock.
|
||||
* ----------------------
|
||||
*/
|
||||
SpinRelease(ProcStructLock);
|
||||
|
||||
MyProc->pid = 0;
|
||||
#if 0
|
||||
MyProc->pid = MyPid;
|
||||
#endif
|
||||
|
||||
/* ----------------
|
||||
* Start keeping spin lock stats from here on. Any botch before
|
||||
* this initialization is forever botched
|
||||
* ----------------
|
||||
*/
|
||||
memset(MyProc->sLocks, 0, MAX_SPINS*sizeof(*MyProc->sLocks));
|
||||
|
||||
/* -------------------------
|
||||
* Install ourselves in the binding table. The name to
|
||||
* use is determined by the OS-assigned process id. That
|
||||
* allows the cleanup process to find us after any untimely
|
||||
* exit.
|
||||
* -------------------------
|
||||
*/
|
||||
pid = getpid();
|
||||
location = MAKE_OFFSET(MyProc);
|
||||
if ((! ShmemPIDLookup(pid,&location)) || (location != MAKE_OFFSET(MyProc)))
|
||||
{
|
||||
elog(FATAL,"InitProc: ShmemPID table broken");
|
||||
}
|
||||
|
||||
MyProc->errType = NO_ERROR;
|
||||
SHMQueueElemInit(&(MyProc->links));
|
||||
|
||||
on_exitpg(ProcKill, (caddr_t)pid);
|
||||
|
||||
ProcInitialized = TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcReleaseLocks() -- release all locks associated with this process
|
||||
*
|
||||
*/
|
||||
void
|
||||
ProcReleaseLocks()
|
||||
{
|
||||
if (!MyProc)
|
||||
return;
|
||||
LockReleaseAll(1,&MyProc->lockQueue);
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcRemove -
|
||||
* used by the postmaster to clean up the global tables. This also frees
|
||||
* up the semaphore used for the lmgr of the process. (We have to do
|
||||
* this is the postmaster instead of doing a IpcSemaphoreKill on exiting
|
||||
* the process because the semaphore set is shared among backends and
|
||||
* we don't want to remove other's semaphores on exit.)
|
||||
*/
|
||||
bool
|
||||
ProcRemove(int pid)
|
||||
{
|
||||
SHMEM_OFFSET location;
|
||||
PROC *proc;
|
||||
|
||||
location = INVALID_OFFSET;
|
||||
|
||||
location = ShmemPIDDestroy(pid);
|
||||
if (location == INVALID_OFFSET)
|
||||
return(FALSE);
|
||||
proc = (PROC *) MAKE_PTR(location);
|
||||
|
||||
SpinAcquire(ProcStructLock);
|
||||
|
||||
ProcFreeSem(proc->sem.semKey, proc->sem.semNum);
|
||||
|
||||
proc->links.next = ProcGlobal->freeProcs;
|
||||
ProcGlobal->freeProcs = MAKE_OFFSET(proc);
|
||||
|
||||
SpinRelease(ProcStructLock);
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcKill() -- Destroy the per-proc data structure for
|
||||
* this process. Release any of its held spin locks.
|
||||
*/
|
||||
static void
|
||||
ProcKill(int exitStatus, int pid)
|
||||
{
|
||||
PROC *proc;
|
||||
SHMEM_OFFSET location;
|
||||
|
||||
/* --------------------
|
||||
* If this is a FATAL exit the postmaster will have to kill all the
|
||||
* existing backends and reinitialize shared memory. So all we don't
|
||||
* need to do anything here.
|
||||
* --------------------
|
||||
*/
|
||||
if (exitStatus != 0)
|
||||
return;
|
||||
|
||||
if (! pid)
|
||||
{
|
||||
pid = getpid();
|
||||
}
|
||||
|
||||
ShmemPIDLookup(pid,&location);
|
||||
if (location == INVALID_OFFSET)
|
||||
return;
|
||||
|
||||
proc = (PROC *) MAKE_PTR(location);
|
||||
|
||||
if (proc != MyProc) {
|
||||
Assert( pid != getpid() );
|
||||
} else
|
||||
MyProc = NULL;
|
||||
|
||||
/* ---------------
|
||||
* Assume one lock table.
|
||||
* ---------------
|
||||
*/
|
||||
ProcReleaseSpins(proc);
|
||||
LockReleaseAll(1,&proc->lockQueue);
|
||||
|
||||
/* ----------------
|
||||
* get off the wait queue
|
||||
* ----------------
|
||||
*/
|
||||
LockLockTable();
|
||||
if (proc->links.next != INVALID_OFFSET) {
|
||||
Assert(proc->waitLock->waitProcs.size > 0);
|
||||
SHMQueueDelete(&(proc->links));
|
||||
--proc->waitLock->waitProcs.size;
|
||||
}
|
||||
SHMQueueElemInit(&(proc->links));
|
||||
UnlockLockTable();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcQueue package: routines for putting processes to sleep
|
||||
* and waking them up
|
||||
*/
|
||||
|
||||
/*
|
||||
* ProcQueueAlloc -- alloc/attach to a shared memory process queue
|
||||
*
|
||||
* Returns: a pointer to the queue or NULL
|
||||
* Side Effects: Initializes the queue if we allocated one
|
||||
*/
|
||||
PROC_QUEUE *
|
||||
ProcQueueAlloc(char *name)
|
||||
{
|
||||
bool found;
|
||||
PROC_QUEUE *queue = (PROC_QUEUE *)
|
||||
ShmemInitStruct(name,(unsigned)sizeof(PROC_QUEUE),&found);
|
||||
|
||||
if (! queue)
|
||||
{
|
||||
return(NULL);
|
||||
}
|
||||
if (! found)
|
||||
{
|
||||
ProcQueueInit(queue);
|
||||
}
|
||||
return(queue);
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcQueueInit -- initialize a shared memory process queue
|
||||
*/
|
||||
void
|
||||
ProcQueueInit(PROC_QUEUE *queue)
|
||||
{
|
||||
SHMQueueInit(&(queue->links));
|
||||
queue->size = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* ProcSleep -- put a process to sleep
|
||||
*
|
||||
* P() on the semaphore should put us to sleep. The process
|
||||
* semaphore is cleared by default, so the first time we try
|
||||
* to acquire it, we sleep.
|
||||
*
|
||||
* ASSUME: that no one will fiddle with the queue until after
|
||||
* we release the spin lock.
|
||||
*
|
||||
* NOTES: The process queue is now a priority queue for locking.
|
||||
*/
|
||||
int
|
||||
ProcSleep(PROC_QUEUE *queue,
|
||||
SPINLOCK spinlock,
|
||||
int token,
|
||||
int prio,
|
||||
LOCK *lock)
|
||||
{
|
||||
int i;
|
||||
PROC *proc;
|
||||
#ifndef WIN32 /* figure this out later */
|
||||
struct itimerval timeval, dummy;
|
||||
#endif /* WIN32 */
|
||||
|
||||
proc = (PROC *) MAKE_PTR(queue->links.prev);
|
||||
for (i=0;i<queue->size;i++)
|
||||
{
|
||||
if (proc->prio < prio)
|
||||
proc = (PROC *) MAKE_PTR(proc->links.prev);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
MyProc->token = token;
|
||||
MyProc->waitLock = lock;
|
||||
|
||||
/* -------------------
|
||||
* currently, we only need this for the ProcWakeup routines
|
||||
* -------------------
|
||||
*/
|
||||
TransactionIdStore((TransactionId) GetCurrentTransactionId(), &MyProc->xid);
|
||||
|
||||
/* -------------------
|
||||
* assume that these two operations are atomic (because
|
||||
* of the spinlock).
|
||||
* -------------------
|
||||
*/
|
||||
SHMQueueInsertTL(&(proc->links),&(MyProc->links));
|
||||
queue->size++;
|
||||
|
||||
SpinRelease(spinlock);
|
||||
|
||||
/* --------------
|
||||
* Postgres does not have any deadlock detection code and for this
|
||||
* reason we must set a timer to wake up the process in the event of
|
||||
* a deadlock. For now the timer is set for 1 minute and we assume that
|
||||
* any process which sleeps for this amount of time is deadlocked and will
|
||||
* receive a SIGALRM signal. The handler should release the processes
|
||||
* semaphore and abort the current transaction.
|
||||
*
|
||||
* Need to zero out struct to set the interval and the micro seconds fields
|
||||
* to 0.
|
||||
* --------------
|
||||
*/
|
||||
#ifndef WIN32
|
||||
memset(&timeval, 0, sizeof(struct itimerval));
|
||||
timeval.it_value.tv_sec = DEADLOCK_TIMEOUT;
|
||||
|
||||
if (setitimer(ITIMER_REAL, &timeval, &dummy))
|
||||
elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
|
||||
#endif /* WIN32 */
|
||||
|
||||
/* --------------
|
||||
* if someone wakes us between SpinRelease and IpcSemaphoreLock,
|
||||
* IpcSemaphoreLock will not block. The wakeup is "saved" by
|
||||
* the semaphore implementation.
|
||||
* --------------
|
||||
*/
|
||||
IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock);
|
||||
|
||||
/* ---------------
|
||||
* We were awoken before a timeout - now disable the timer
|
||||
* ---------------
|
||||
*/
|
||||
#ifndef WIN32
|
||||
timeval.it_value.tv_sec = 0;
|
||||
|
||||
|
||||
if (setitimer(ITIMER_REAL, &timeval, &dummy))
|
||||
elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup");
|
||||
#endif /* WIN32 */
|
||||
|
||||
/* ----------------
|
||||
* We were assumed to be in a critical section when we went
|
||||
* to sleep.
|
||||
* ----------------
|
||||
*/
|
||||
SpinAcquire(spinlock);
|
||||
|
||||
return(MyProc->errType);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ProcWakeup -- wake up a process by releasing its private semaphore.
|
||||
*
|
||||
* remove the process from the wait queue and set its links invalid.
|
||||
* RETURN: the next process in the wait queue.
|
||||
*/
|
||||
PROC *
|
||||
ProcWakeup(PROC *proc, int errType)
|
||||
{
|
||||
PROC *retProc;
|
||||
/* assume that spinlock has been acquired */
|
||||
|
||||
if (proc->links.prev == INVALID_OFFSET ||
|
||||
proc->links.next == INVALID_OFFSET)
|
||||
return((PROC *) NULL);
|
||||
|
||||
retProc = (PROC *) MAKE_PTR(proc->links.prev);
|
||||
|
||||
/* you have to update waitLock->waitProcs.size yourself */
|
||||
SHMQueueDelete(&(proc->links));
|
||||
SHMQueueElemInit(&(proc->links));
|
||||
|
||||
proc->errType = errType;
|
||||
|
||||
IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum, IpcExclusiveLock);
|
||||
|
||||
return retProc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ProcGetId --
|
||||
*/
|
||||
int
|
||||
ProcGetId()
|
||||
{
|
||||
return( MyProc->procId );
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcLockWakeup -- routine for waking up processes when a lock is
|
||||
* released.
|
||||
*/
|
||||
int
|
||||
ProcLockWakeup(PROC_QUEUE *queue, char *ltable, char *lock)
|
||||
{
|
||||
PROC *proc;
|
||||
int count;
|
||||
|
||||
if (! queue->size)
|
||||
return(STATUS_NOT_FOUND);
|
||||
|
||||
proc = (PROC *) MAKE_PTR(queue->links.prev);
|
||||
count = 0;
|
||||
while ((LockResolveConflicts ((LOCKTAB *) ltable,
|
||||
(LOCK *) lock,
|
||||
proc->token,
|
||||
proc->xid) == STATUS_OK))
|
||||
{
|
||||
/* there was a waiting process, grant it the lock before waking it
|
||||
* up. This will prevent another process from seizing the lock
|
||||
* between the time we release the lock master (spinlock) and
|
||||
* the time that the awoken process begins executing again.
|
||||
*/
|
||||
GrantLock((LOCK *) lock, proc->token);
|
||||
queue->size--;
|
||||
|
||||
/*
|
||||
* ProcWakeup removes proc from the lock waiting process queue and
|
||||
* returns the next proc in chain. If a writer just dropped
|
||||
* its lock and there are several waiting readers, wake them all up.
|
||||
*/
|
||||
proc = ProcWakeup(proc, NO_ERROR);
|
||||
|
||||
count++;
|
||||
if (!proc || queue->size == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (count)
|
||||
return(STATUS_OK);
|
||||
else
|
||||
/* Something is still blocking us. May have deadlocked. */
|
||||
return(STATUS_NOT_FOUND);
|
||||
}
|
||||
|
||||
void
|
||||
ProcAddLock(SHM_QUEUE *elem)
|
||||
{
|
||||
SHMQueueInsertTL(&MyProc->lockQueue,elem);
|
||||
}
|
||||
|
||||
/* --------------------
|
||||
* We only get to this routine if we got SIGALRM after DEADLOCK_TIMEOUT
|
||||
* while waiting for a lock to be released by some other process. After
|
||||
* the one minute deadline we assume we have a deadlock and must abort
|
||||
* this transaction. We must also indicate that I'm no longer waiting
|
||||
* on a lock so that other processes don't try to wake me up and screw
|
||||
* up my semaphore.
|
||||
* --------------------
|
||||
*/
|
||||
int
|
||||
#if defined(PORTNAME_linux)
|
||||
HandleDeadLock(int i)
|
||||
#else
|
||||
HandleDeadLock()
|
||||
#endif
|
||||
{
|
||||
LOCK *lock;
|
||||
int size;
|
||||
|
||||
LockLockTable();
|
||||
|
||||
/* ---------------------
|
||||
* Check to see if we've been awoken by anyone in the interim.
|
||||
*
|
||||
* If we have we can return and resume our transaction -- happy day.
|
||||
* Before we are awoken the process releasing the lock grants it to
|
||||
* us so we know that we don't have to wait anymore.
|
||||
*
|
||||
* Damn these names are LONG! -mer
|
||||
* ---------------------
|
||||
*/
|
||||
if (IpcSemaphoreGetCount(MyProc->sem.semId, MyProc->sem.semNum) ==
|
||||
IpcSemaphoreDefaultStartValue) {
|
||||
UnlockLockTable();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* you would think this would be unnecessary, but...
|
||||
*
|
||||
* this also means we've been removed already. in some ports
|
||||
* (e.g., sparc and aix) the semop(2) implementation is such that
|
||||
* we can actually end up in this handler after someone has removed
|
||||
* us from the queue and bopped the semaphore *but the test above
|
||||
* fails to detect the semaphore update* (presumably something weird
|
||||
* having to do with the order in which the semaphore wakeup signal
|
||||
* and SIGALRM get handled).
|
||||
*/
|
||||
if (MyProc->links.prev == INVALID_OFFSET ||
|
||||
MyProc->links.next == INVALID_OFFSET) {
|
||||
UnlockLockTable();
|
||||
return(1);
|
||||
}
|
||||
|
||||
lock = MyProc->waitLock;
|
||||
size = lock->waitProcs.size; /* so we can look at this in the core */
|
||||
|
||||
/* ------------------------
|
||||
* Get this process off the lock's wait queue
|
||||
* ------------------------
|
||||
*/
|
||||
Assert(lock->waitProcs.size > 0);
|
||||
--lock->waitProcs.size;
|
||||
SHMQueueDelete(&(MyProc->links));
|
||||
SHMQueueElemInit(&(MyProc->links));
|
||||
|
||||
/* ------------------
|
||||
* Unlock my semaphore so that the count is right for next time.
|
||||
* I was awoken by a signal, not by someone unlocking my semaphore.
|
||||
* ------------------
|
||||
*/
|
||||
IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock);
|
||||
|
||||
/* -------------
|
||||
* Set MyProc->errType to STATUS_ERROR so that we abort after
|
||||
* returning from this handler.
|
||||
* -------------
|
||||
*/
|
||||
MyProc->errType = STATUS_ERROR;
|
||||
|
||||
/*
|
||||
* if this doesn't follow the IpcSemaphoreUnlock then we get lock
|
||||
* table corruption ("LockReplace: xid table corrupted") due to
|
||||
* race conditions. i don't claim to understand this...
|
||||
*/
|
||||
UnlockLockTable();
|
||||
|
||||
elog(NOTICE, "Timeout -- possible deadlock");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
ProcReleaseSpins(PROC *proc)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!proc)
|
||||
proc = MyProc;
|
||||
|
||||
if (!proc)
|
||||
return;
|
||||
for (i=0; i < (int)MAX_SPINS; i++)
|
||||
{
|
||||
if (proc->sLocks[i])
|
||||
{
|
||||
Assert(proc->sLocks[i] == 1);
|
||||
SpinRelease(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
* ProcGetNewSemKeyAndNum -
|
||||
* scan the free semaphore bitmap and allocate a single semaphore from
|
||||
* a semaphore set. (If the semaphore set doesn't exist yet,
|
||||
* IpcSemaphoreCreate will create it. Otherwise, we use the existing
|
||||
* semaphore set.)
|
||||
*/
|
||||
static void
|
||||
ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum)
|
||||
{
|
||||
int i;
|
||||
int32 *freeSemMap = ProcGlobal->freeSemMap;
|
||||
unsigned int fullmask;
|
||||
|
||||
/*
|
||||
* we hold ProcStructLock when entering this routine. We scan through
|
||||
* the bitmap to look for a free semaphore.
|
||||
*/
|
||||
fullmask = ~0 >> (32 - PROC_NSEMS_PER_SET);
|
||||
for(i=0; i < MAX_PROC_SEMS/PROC_NSEMS_PER_SET; i++) {
|
||||
int mask = 1;
|
||||
int j;
|
||||
|
||||
if (freeSemMap[i] == fullmask)
|
||||
continue; /* none free for this set */
|
||||
|
||||
for(j = 0; j < PROC_NSEMS_PER_SET; j++) {
|
||||
if ((freeSemMap[i] & mask) == 0) {
|
||||
/*
|
||||
* a free semaphore found. Mark it as allocated.
|
||||
*/
|
||||
freeSemMap[i] |= mask;
|
||||
|
||||
*key = ProcGlobal->currKey + i;
|
||||
*semNum = j;
|
||||
return;
|
||||
}
|
||||
mask <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we reach here, all the semaphores are in use. */
|
||||
elog(WARN, "InitProc: cannot allocate a free semaphore");
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcFreeSem -
|
||||
* free up our semaphore in the semaphore set. If we're the last one
|
||||
* in the set, also remove the semaphore set.
|
||||
*/
|
||||
static void
|
||||
ProcFreeSem(IpcSemaphoreKey semKey, int semNum)
|
||||
{
|
||||
int mask;
|
||||
int i;
|
||||
int32 *freeSemMap = ProcGlobal->freeSemMap;
|
||||
|
||||
i = semKey - ProcGlobal->currKey;
|
||||
mask = ~(1 << semNum);
|
||||
freeSemMap[i] &= mask;
|
||||
|
||||
if (freeSemMap[i]==0)
|
||||
IpcSemaphoreKill(semKey);
|
||||
}
|
||||
|
||||
/*
|
||||
* ProcFreeAllSemaphores -
|
||||
* on exiting the postmaster, we free up all the semaphores allocated
|
||||
* to the lmgrs of the backends.
|
||||
*/
|
||||
void
|
||||
ProcFreeAllSemaphores()
|
||||
{
|
||||
int i;
|
||||
int32 *freeSemMap = ProcGlobal->freeSemMap;
|
||||
|
||||
for(i=0; i < MAX_PROC_SEMS/PROC_NSEMS_PER_SET; i++) {
|
||||
if (freeSemMap[i]!=0)
|
||||
IpcSemaphoreKill(ProcGlobal->currKey + i);
|
||||
}
|
||||
}
|
||||
86
src/backend/storage/lmgr/single.c
Normal file
86
src/backend/storage/lmgr/single.c
Normal file
@@ -0,0 +1,86 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* single.c--
|
||||
* set single locks in the multi-level lock hierarchy
|
||||
*
|
||||
* Sometimes we don't want to set all levels of the multi-level
|
||||
* lock hierarchy at once. This allows us to set and release
|
||||
* one level at a time. It's useful in index scans when
|
||||
* you can set an intent lock at the beginning and thereafter
|
||||
* only set page locks. Tends to speed things up.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/single.c,v 1.1.1.1 1996/07/09 06:21:57 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "storage/lmgr.h" /* where the declarations go */
|
||||
#include "storage/lock.h"
|
||||
#include "storage/multilev.h"
|
||||
#include "utils/rel.h"
|
||||
|
||||
/*
|
||||
* SingleLockReln -- lock a relation
|
||||
*
|
||||
* Returns: TRUE if the lock can be set, FALSE otherwise.
|
||||
*/
|
||||
bool
|
||||
SingleLockReln(LockInfo linfo, LOCKT lockt, int action)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/*
|
||||
* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
*/
|
||||
memset(&tag,0,sizeof(tag));
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
BlockIdSet(&(tag.tupleId.ip_blkid), InvalidBlockNumber);
|
||||
tag.tupleId.ip_posid = InvalidOffsetNumber;
|
||||
|
||||
if (action == UNLOCK)
|
||||
return(LockRelease(MultiTableId, &tag, lockt));
|
||||
else
|
||||
return(LockAcquire(MultiTableId, &tag, lockt));
|
||||
}
|
||||
|
||||
/*
|
||||
* SingleLockPage -- use multi-level lock table, but lock
|
||||
* only at the page level.
|
||||
*
|
||||
* Assumes that an INTENT lock has already been set in the
|
||||
* multi-level lock table.
|
||||
*
|
||||
*/
|
||||
bool
|
||||
SingleLockPage(LockInfo linfo,
|
||||
ItemPointer tidPtr,
|
||||
LOCKT lockt,
|
||||
int action)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
/*
|
||||
* LOCKTAG has two bytes of padding, unfortunately. The
|
||||
* hash function will return miss if the padding bytes aren't
|
||||
* zero'd.
|
||||
*/
|
||||
memset(&tag,0,sizeof(tag));
|
||||
tag.relId = linfo->lRelId.relId;
|
||||
tag.dbId = linfo->lRelId.dbId;
|
||||
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
|
||||
tag.tupleId.ip_posid = InvalidOffsetNumber;
|
||||
|
||||
|
||||
if (action == UNLOCK)
|
||||
return(LockRelease(MultiTableId, &tag, lockt));
|
||||
else
|
||||
return(LockAcquire(MultiTableId, &tag, lockt));
|
||||
}
|
||||
|
||||
218
src/backend/storage/lock.h
Normal file
218
src/backend/storage/lock.h
Normal file
@@ -0,0 +1,218 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* lock.h--
|
||||
*
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: lock.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef LOCK_H_
|
||||
#define LOCK_H_
|
||||
|
||||
#include "postgres.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
#include "storage/backendid.h"
|
||||
#include "utils/hsearch.h"
|
||||
|
||||
extern SPINLOCK LockMgrLock;
|
||||
typedef int MASK;
|
||||
|
||||
#define INIT_TABLE_SIZE 100
|
||||
#define MAX_TABLE_SIZE 1000
|
||||
|
||||
|
||||
/* ----------------------
|
||||
* The following defines are used to estimate how much shared
|
||||
* memory the lock manager is going to require.
|
||||
*
|
||||
* NBACKENDS - The number of concurrently running backends
|
||||
* NLOCKS_PER_XACT - The number of unique locks acquired in a transaction
|
||||
* NLOCKENTS - The maximum number of lock entries in the lock table.
|
||||
* ----------------------
|
||||
*/
|
||||
#define NBACKENDS 50
|
||||
#define NLOCKS_PER_XACT 40
|
||||
#define NLOCKENTS NLOCKS_PER_XACT*NBACKENDS
|
||||
|
||||
typedef int LOCK_TYPE;
|
||||
typedef int LOCKT;
|
||||
typedef int LockTableId;
|
||||
|
||||
/* MAX_LOCKTYPES cannot be larger than the bits in MASK */
|
||||
#define MAX_LOCKTYPES 6
|
||||
|
||||
/*
|
||||
* MAX_TABLES corresponds to the number of spin locks allocated in
|
||||
* CreateSpinLocks() or the number of shared memory locations allocated
|
||||
* for lock table spin locks in the case of machines with TAS instructions.
|
||||
*/
|
||||
#define MAX_TABLES 2
|
||||
|
||||
#define INVALID_TABLEID 0
|
||||
|
||||
/*typedef struct LOCK LOCK; */
|
||||
|
||||
|
||||
typedef struct ltag {
|
||||
Oid relId;
|
||||
Oid dbId;
|
||||
ItemPointerData tupleId;
|
||||
} LOCKTAG;
|
||||
|
||||
#define TAGSIZE (sizeof(LOCKTAG))
|
||||
|
||||
/* This is the control structure for a lock table. It
|
||||
* lives in shared memory:
|
||||
*
|
||||
* tableID -- the handle used by the lock table's clients to
|
||||
* refer to the table.
|
||||
*
|
||||
* nLockTypes -- number of lock types (READ,WRITE,etc) that
|
||||
* are defined on this lock table
|
||||
*
|
||||
* conflictTab -- this is an array of bitmasks showing lock
|
||||
* type conflicts. conflictTab[i] is a mask with the j-th bit
|
||||
* turned on if lock types i and j conflict.
|
||||
*
|
||||
* prio -- each locktype has a priority, so, for example, waiting
|
||||
* writers can be given priority over readers (to avoid
|
||||
* starvation).
|
||||
*
|
||||
* masterlock -- synchronizes access to the table
|
||||
*
|
||||
*/
|
||||
typedef struct lockctl {
|
||||
LockTableId tableId;
|
||||
int nLockTypes;
|
||||
int conflictTab[MAX_LOCKTYPES];
|
||||
int prio[MAX_LOCKTYPES];
|
||||
SPINLOCK masterLock;
|
||||
} LOCKCTL;
|
||||
|
||||
/*
|
||||
* lockHash -- hash table on lock Ids,
|
||||
* xidHash -- hash on xid and lockId in case
|
||||
* multiple processes are holding the lock
|
||||
* ctl - control structure described above.
|
||||
*/
|
||||
typedef struct ltable {
|
||||
HTAB *lockHash;
|
||||
HTAB *xidHash;
|
||||
LOCKCTL *ctl;
|
||||
} LOCKTAB;
|
||||
|
||||
/* -----------------------
|
||||
* A transaction never conflicts with its own locks. Hence, if
|
||||
* multiple transactions hold non-conflicting locks on the same
|
||||
* data, private per-transaction information must be stored in the
|
||||
* XID table. The tag is XID + shared memory lock address so that
|
||||
* all locks can use the same XID table. The private information
|
||||
* we store is the number of locks of each type (holders) and the
|
||||
* total number of locks (nHolding) held by the transaction.
|
||||
*
|
||||
* NOTE: --
|
||||
* There were some problems with the fact that currently TransactionIdData
|
||||
* is a 5 byte entity and compilers long word aligning of structure fields.
|
||||
* If the 3 byte padding is put in front of the actual xid data then the
|
||||
* hash function (which uses XID_TAGSIZE when deciding how many bytes of a
|
||||
* struct to look at for the key) might only see the last two bytes of the xid.
|
||||
*
|
||||
* Clearly this is not good since its likely that these bytes will be the
|
||||
* same for many transactions and hence they will share the same entry in
|
||||
* hash table causing the entry to be corrupted. For this long-winded
|
||||
* reason I have put the tag in a struct of its own to ensure that the
|
||||
* XID_TAGSIZE is computed correctly. It used to be sizeof (SHMEM_OFFSET) +
|
||||
* sizeof(TransactionIdData) which != sizeof(XIDTAG).
|
||||
*
|
||||
* Finally since the hash function will now look at all 12 bytes of the tag
|
||||
* the padding bytes MUST be zero'd before use in hash_search() as they
|
||||
* will have random values otherwise. Jeff 22 July 1991.
|
||||
* -----------------------
|
||||
*/
|
||||
|
||||
typedef struct XIDTAG {
|
||||
SHMEM_OFFSET lock;
|
||||
int pid;
|
||||
TransactionId xid;
|
||||
} XIDTAG;
|
||||
|
||||
typedef struct XIDLookupEnt {
|
||||
/* tag */
|
||||
XIDTAG tag;
|
||||
|
||||
/* data */
|
||||
int holders[MAX_LOCKTYPES];
|
||||
int nHolding;
|
||||
SHM_QUEUE queue;
|
||||
} XIDLookupEnt;
|
||||
|
||||
#define XID_TAGSIZE (sizeof(XIDTAG))
|
||||
|
||||
/* originally in procq.h */
|
||||
typedef struct procQueue {
|
||||
SHM_QUEUE links;
|
||||
int size;
|
||||
} PROC_QUEUE;
|
||||
|
||||
|
||||
/*
|
||||
* lock information:
|
||||
*
|
||||
* tag -- uniquely identifies the object being locked
|
||||
* mask -- union of the conflict masks of all lock types
|
||||
* currently held on this object.
|
||||
* waitProcs -- queue of processes waiting for this lock
|
||||
* holders -- count of each lock type currently held on the
|
||||
* lock.
|
||||
* nHolding -- total locks of all types.
|
||||
*/
|
||||
typedef struct Lock {
|
||||
/* hash key */
|
||||
LOCKTAG tag;
|
||||
|
||||
/* data */
|
||||
int mask;
|
||||
PROC_QUEUE waitProcs;
|
||||
int holders[MAX_LOCKTYPES];
|
||||
int nHolding;
|
||||
int activeHolders[MAX_LOCKTYPES];
|
||||
int nActive;
|
||||
} LOCK;
|
||||
|
||||
#define LockGetLock_nHolders(l) l->nHolders
|
||||
|
||||
#define LockDecrWaitHolders(lock, lockt) \
|
||||
lock->nHolding--; \
|
||||
lock->holders[lockt]--
|
||||
|
||||
#define LockLockTable() SpinAcquire(LockMgrLock);
|
||||
#define UnlockLockTable() SpinRelease(LockMgrLock);
|
||||
|
||||
extern SPINLOCK LockMgrLock;
|
||||
|
||||
/*
|
||||
* function prototypes
|
||||
*/
|
||||
extern void InitLocks(void);
|
||||
extern void LockDisable(int status);
|
||||
extern LockTableId LockTabInit(char *tabName, MASK *conflictsP, int *prioP,
|
||||
int ntypes);
|
||||
extern LockTableId LockTabRename(LockTableId tableId);
|
||||
extern bool LockAcquire(LockTableId tableId, LOCKTAG *lockName, LOCKT lockt);
|
||||
extern int LockResolveConflicts(LOCKTAB *ltable, LOCK *lock, LOCKT lockt,
|
||||
TransactionId xid);
|
||||
extern int WaitOnLock(LOCKTAB *ltable, LockTableId tableId, LOCK *lock,
|
||||
LOCKT lockt);
|
||||
extern bool LockRelease(LockTableId tableId, LOCKTAG *lockName, LOCKT lockt);
|
||||
extern void GrantLock(LOCK *lock, LOCKT lockt);
|
||||
extern bool LockReleaseAll(LockTableId tableId, SHM_QUEUE *lockQueue);
|
||||
extern int LockShmemSize(void);
|
||||
extern bool LockingDisabled(void);
|
||||
|
||||
#endif /* LOCK_H */
|
||||
64
src/backend/storage/multilev.h
Normal file
64
src/backend/storage/multilev.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* multilev.h--
|
||||
* multi level lock table consts/defs for single.c and multi.c and their
|
||||
* clients
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: multilev.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef MULTILEV_H
|
||||
#define MULTILEV_H
|
||||
|
||||
#include "storage/lock.h"
|
||||
#include "storage/lmgr.h"
|
||||
|
||||
#define READ_LOCK 2
|
||||
#define WRITE_LOCK 1
|
||||
|
||||
/* any time a small granularity READ/WRITE lock is set.
|
||||
* Higher granularity READ_INTENT/WRITE_INTENT locks must
|
||||
* also be set. A read intent lock is has value READ+INTENT.
|
||||
* in this implementation.
|
||||
*/
|
||||
#define NO_LOCK 0
|
||||
#define INTENT 2
|
||||
#define READ_INTENT (READ_LOCK+INTENT)
|
||||
#define WRITE_INTENT (WRITE_LOCK+INTENT)
|
||||
|
||||
#define EXTEND_LOCK 5
|
||||
|
||||
#define SHORT_TERM 1
|
||||
#define LONG_TERM 2
|
||||
#define UNLOCK 0
|
||||
|
||||
#define N_LEVELS 3
|
||||
#define RELN_LEVEL 0
|
||||
#define PAGE_LEVEL 1
|
||||
#define TUPLE_LEVEL 2
|
||||
typedef int LOCK_LEVEL;
|
||||
|
||||
/* multi.c */
|
||||
|
||||
extern LockTableId MultiTableId;
|
||||
extern LockTableId ShortTermTableId;
|
||||
|
||||
/*
|
||||
* function prototypes
|
||||
*/
|
||||
extern LockTableId InitMultiLevelLockm(void);
|
||||
extern bool MultiLockReln(LockInfo linfo, LOCKT lockt);
|
||||
extern bool MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt);
|
||||
extern bool MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt);
|
||||
extern bool MultiAcquire(LockTableId tableId, LOCKTAG *tag, LOCKT lockt,
|
||||
LOCK_LEVEL level);
|
||||
extern bool MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt);
|
||||
extern bool MultiReleaseReln(LockInfo linfo, LOCKT lockt);
|
||||
extern bool MultiRelease(LockTableId tableId, LOCKTAG *tag, LOCKT lockt,
|
||||
LOCK_LEVEL level);
|
||||
|
||||
#endif /* MULTILEV_H */
|
||||
60
src/backend/storage/off.h
Normal file
60
src/backend/storage/off.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* off.h--
|
||||
* POSTGRES disk "offset" definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: off.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef OFF_H
|
||||
#define OFF_H
|
||||
|
||||
#include "c.h"
|
||||
#include "machine.h" /* for BLCKSZ */
|
||||
#include "storage/itemid.h"
|
||||
|
||||
/*
|
||||
* OffsetNumber:
|
||||
*
|
||||
* this is a 1-based index into the linp (ItemIdData) array in the
|
||||
* header of each disk page.
|
||||
*/
|
||||
typedef uint16 OffsetNumber;
|
||||
|
||||
#define InvalidOffsetNumber ((OffsetNumber) 0)
|
||||
#define FirstOffsetNumber ((OffsetNumber) 1)
|
||||
#define MaxOffsetNumber ((OffsetNumber) (BLCKSZ / sizeof(ItemIdData)))
|
||||
#define OffsetNumberMask (0xffff) /* valid uint16 bits */
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* OffsetNumberIsValid --
|
||||
* True iff the offset number is valid.
|
||||
*/
|
||||
#define OffsetNumberIsValid(offsetNumber) \
|
||||
((bool) ((offsetNumber != InvalidOffsetNumber) && \
|
||||
(offsetNumber <= MaxOffsetNumber)))
|
||||
|
||||
/*
|
||||
* OffsetNumberNext --
|
||||
* OffsetNumberPrev --
|
||||
* Increments/decrements the argument. These macros look pointless
|
||||
* but they help us disambiguate the different manipulations on
|
||||
* OffsetNumbers (e.g., sometimes we substract one from an
|
||||
* OffsetNumber to move back, and sometimes we do so to form a
|
||||
* real C array index).
|
||||
*/
|
||||
#define OffsetNumberNext(offsetNumber) \
|
||||
((OffsetNumber) (1 + (offsetNumber)))
|
||||
#define OffsetNumberPrev(offsetNumber) \
|
||||
((OffsetNumber) (-1 + (offsetNumber)))
|
||||
|
||||
#endif /* OFF_H */
|
||||
26
src/backend/storage/page.h
Normal file
26
src/backend/storage/page.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* page.h--
|
||||
* POSTGRES buffer page abstraction definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: page.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PAGE_H
|
||||
#define PAGE_H
|
||||
|
||||
#include "c.h"
|
||||
|
||||
typedef Pointer Page;
|
||||
|
||||
/*
|
||||
* PageIsValid --
|
||||
* True iff page is valid.
|
||||
*/
|
||||
#define PageIsValid(page) PointerIsValid(page)
|
||||
|
||||
#endif /* PAGE_H */
|
||||
16
src/backend/storage/page/Makefile.inc
Normal file
16
src/backend/storage/page/Makefile.inc
Normal file
@@ -0,0 +1,16 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/page
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/page/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:58 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= bufpage.c itemptr.c
|
||||
|
||||
|
||||
519
src/backend/storage/page/bufpage.c
Normal file
519
src/backend/storage/page/bufpage.c
Normal file
@@ -0,0 +1,519 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* bufpage.c--
|
||||
* POSTGRES standard buffer page code.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.1.1.1 1996/07/09 06:21:58 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <sys/file.h>
|
||||
|
||||
#include "c.h"
|
||||
|
||||
#include "storage/item.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "lib/qsort.h"
|
||||
|
||||
static bool PageManagerShuffle = true; /* default is shuffle mode */
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Buffer support functions
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
/*
|
||||
* BufferGetPageSize --
|
||||
* Returns the page size within a buffer.
|
||||
*
|
||||
* Notes:
|
||||
* Assumes buffer is valid.
|
||||
*
|
||||
* The buffer can be a raw disk block and need not contain a valid
|
||||
* (formatted) disk page.
|
||||
*/
|
||||
Size
|
||||
BufferGetPageSize(Buffer buffer)
|
||||
{
|
||||
Size pageSize;
|
||||
|
||||
Assert(BufferIsValid(buffer));
|
||||
pageSize = BLCKSZ; /* XXX dig out of buffer descriptor */
|
||||
|
||||
Assert(PageSizeIsValid(pageSize));
|
||||
return (pageSize);
|
||||
}
|
||||
|
||||
/*
|
||||
* BufferGetPage --
|
||||
* Returns the page associated with a buffer.
|
||||
*/
|
||||
Page
|
||||
BufferGetPage(Buffer buffer)
|
||||
{
|
||||
return (Page) BufferGetBlock(buffer);
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Page support functions
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* PageInit --
|
||||
* Initializes the contents of a page.
|
||||
*/
|
||||
void
|
||||
PageInit(Page page, Size pageSize, Size specialSize)
|
||||
{
|
||||
PageHeader p = (PageHeader) page;
|
||||
|
||||
Assert(pageSize == BLCKSZ);
|
||||
Assert(pageSize >
|
||||
specialSize + sizeof(PageHeaderData) - sizeof(ItemIdData));
|
||||
|
||||
specialSize = DOUBLEALIGN(specialSize);
|
||||
|
||||
p->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData);
|
||||
p->pd_upper = pageSize - specialSize;
|
||||
p->pd_special = pageSize - specialSize;
|
||||
PageSetPageSize(page, pageSize);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageGetItem --
|
||||
* Retrieves an item on the given page.
|
||||
*
|
||||
* Note:
|
||||
* This does change the status of any of the resources passed.
|
||||
* The semantics may change in the future.
|
||||
*/
|
||||
Item
|
||||
PageGetItem(Page page, ItemId itemId)
|
||||
{
|
||||
Item item;
|
||||
|
||||
Assert(PageIsValid(page));
|
||||
Assert((*itemId).lp_flags & LP_USED);
|
||||
|
||||
item = (Item)(((char *)page) + (*itemId).lp_off);
|
||||
|
||||
return (item);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageAddItem --
|
||||
* Adds item to the given page.
|
||||
*
|
||||
* Note:
|
||||
* This does not assume that the item resides on a single page.
|
||||
* It is the responsiblity of the caller to act appropriately
|
||||
* depending on this fact. The "pskip" routines provide a
|
||||
* friendlier interface, in this case.
|
||||
*
|
||||
* This does change the status of any of the resources passed.
|
||||
* The semantics may change in the future.
|
||||
*
|
||||
* This routine should probably be combined with others?
|
||||
*/
|
||||
/* ----------------
|
||||
* PageAddItem
|
||||
*
|
||||
* add an item to a page.
|
||||
*
|
||||
* Notes on interface:
|
||||
* If offsetNumber is valid, shuffle ItemId's down to make room
|
||||
* to use it, if PageManagerShuffle is true. If PageManagerShuffle is
|
||||
* false, then overwrite the specified ItemId. (PageManagerShuffle is
|
||||
* true by default, and is modified by calling PageManagerModeSet.)
|
||||
* If offsetNumber is not valid, then assign one by finding the first
|
||||
* one that is both unused and deallocated.
|
||||
*
|
||||
* NOTE: If offsetNumber is valid, and PageManagerShuffle is true, it
|
||||
* is assumed that there is room on the page to shuffle the ItemId's
|
||||
* down by one.
|
||||
* ----------------
|
||||
*/
|
||||
OffsetNumber
|
||||
PageAddItem(Page page,
|
||||
Item item,
|
||||
Size size,
|
||||
OffsetNumber offsetNumber,
|
||||
ItemIdFlags flags)
|
||||
{
|
||||
register i;
|
||||
Size alignedSize;
|
||||
Offset lower;
|
||||
Offset upper;
|
||||
ItemId itemId;
|
||||
ItemId fromitemId, toitemId;
|
||||
OffsetNumber limit;
|
||||
|
||||
bool shuffled = false;
|
||||
|
||||
/*
|
||||
* Find first unallocated offsetNumber
|
||||
*/
|
||||
limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
||||
|
||||
/* was offsetNumber passed in? */
|
||||
if (OffsetNumberIsValid(offsetNumber)) {
|
||||
if (PageManagerShuffle == true) {
|
||||
/* shuffle ItemId's (Do the PageManager Shuffle...) */
|
||||
for (i = (limit - 1); i >= offsetNumber; i--) {
|
||||
fromitemId = &((PageHeader)page)->pd_linp[i - 1];
|
||||
toitemId = &((PageHeader)page)->pd_linp[i];
|
||||
*toitemId = *fromitemId;
|
||||
}
|
||||
shuffled = true; /* need to increase "lower" */
|
||||
} else { /* overwrite mode */
|
||||
itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1];
|
||||
if (((*itemId).lp_flags & LP_USED) ||
|
||||
((*itemId).lp_len != 0)) {
|
||||
elog(WARN, "PageAddItem: tried overwrite of used ItemId");
|
||||
return (InvalidOffsetNumber);
|
||||
}
|
||||
}
|
||||
} else { /* offsetNumber was not passed in, so find one */
|
||||
/* look for "recyclable" (unused & deallocated) ItemId */
|
||||
for (offsetNumber = 1; offsetNumber < limit; offsetNumber++) {
|
||||
itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1];
|
||||
if ((((*itemId).lp_flags & LP_USED) == 0) &&
|
||||
((*itemId).lp_len == 0))
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (offsetNumber > limit)
|
||||
lower = (Offset) (((char *) (&((PageHeader)page)->pd_linp[offsetNumber])) - ((char *) page));
|
||||
else if (offsetNumber == limit || shuffled == true)
|
||||
lower = ((PageHeader)page)->pd_lower + sizeof (ItemIdData);
|
||||
else
|
||||
lower = ((PageHeader)page)->pd_lower;
|
||||
|
||||
alignedSize = DOUBLEALIGN(size);
|
||||
|
||||
upper = ((PageHeader)page)->pd_upper - alignedSize;
|
||||
|
||||
if (lower > upper) {
|
||||
return (InvalidOffsetNumber);
|
||||
}
|
||||
|
||||
itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1];
|
||||
(*itemId).lp_off = upper;
|
||||
(*itemId).lp_len = size;
|
||||
(*itemId).lp_flags = flags;
|
||||
memmove((char *)page + upper, item, size);
|
||||
((PageHeader)page)->pd_lower = lower;
|
||||
((PageHeader)page)->pd_upper = upper;
|
||||
|
||||
return (offsetNumber);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageGetTempPage --
|
||||
* Get a temporary page in local memory for special processing
|
||||
*/
|
||||
Page
|
||||
PageGetTempPage(Page page, Size specialSize)
|
||||
{
|
||||
Size pageSize;
|
||||
Size size;
|
||||
Page temp;
|
||||
PageHeader thdr;
|
||||
|
||||
pageSize = PageGetPageSize(page);
|
||||
|
||||
if ((temp = (Page) palloc(pageSize)) == (Page) NULL)
|
||||
elog(FATAL, "Cannot allocate %d bytes for temp page.", pageSize);
|
||||
thdr = (PageHeader) temp;
|
||||
|
||||
/* copy old page in */
|
||||
memmove(temp, page, pageSize);
|
||||
|
||||
/* clear out the middle */
|
||||
size = (pageSize - sizeof(PageHeaderData)) + sizeof(ItemIdData);
|
||||
size -= DOUBLEALIGN(specialSize);
|
||||
memset((char *) &(thdr->pd_linp[0]), 0, size);
|
||||
|
||||
/* set high, low water marks */
|
||||
thdr->pd_lower = sizeof (PageHeaderData) - sizeof (ItemIdData);
|
||||
thdr->pd_upper = pageSize - DOUBLEALIGN(specialSize);
|
||||
|
||||
return (temp);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageRestoreTempPage --
|
||||
* Copy temporary page back to permanent page after special processing
|
||||
* and release the temporary page.
|
||||
*/
|
||||
void
|
||||
PageRestoreTempPage(Page tempPage, Page oldPage)
|
||||
{
|
||||
Size pageSize;
|
||||
|
||||
pageSize = PageGetPageSize(tempPage);
|
||||
memmove((char *) oldPage, (char *) tempPage, pageSize);
|
||||
|
||||
pfree(tempPage);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageGetMaxOffsetNumber --
|
||||
* Returns the maximum offset number used by the given page.
|
||||
*
|
||||
* NOTE: The offset is invalid if the page is non-empty.
|
||||
* Test whether PageIsEmpty before calling this routine
|
||||
* and/or using its return value.
|
||||
*/
|
||||
OffsetNumber
|
||||
PageGetMaxOffsetNumber(Page page)
|
||||
{
|
||||
LocationIndex low;
|
||||
OffsetNumber i;
|
||||
|
||||
low = ((PageHeader) page)->pd_lower;
|
||||
i = (low - (sizeof(PageHeaderData) - sizeof(ItemIdData)))
|
||||
/ sizeof(ItemIdData);
|
||||
|
||||
return(i);
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* itemid stuff for PageRepairFragmentation
|
||||
* ----------------
|
||||
*/
|
||||
struct itemIdSortData {
|
||||
int offsetindex; /* linp array index */
|
||||
ItemIdData itemiddata;
|
||||
};
|
||||
|
||||
static int
|
||||
itemidcompare(struct itemIdSortData *itemidp1, struct itemIdSortData *itemidp2)
|
||||
{
|
||||
if (itemidp1->itemiddata.lp_off == itemidp2->itemiddata.lp_off)
|
||||
return(0);
|
||||
else if (itemidp1->itemiddata.lp_off < itemidp2->itemiddata.lp_off)
|
||||
return(1);
|
||||
else
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageRepairFragmentation --
|
||||
* Frees fragmented space on a page.
|
||||
*/
|
||||
void
|
||||
PageRepairFragmentation(Page page)
|
||||
{
|
||||
int i;
|
||||
struct itemIdSortData *itemidbase, *itemidptr;
|
||||
ItemId lp;
|
||||
int nline, nused;
|
||||
int itemidcompare();
|
||||
Offset upper;
|
||||
Size alignedSize;
|
||||
|
||||
nline = (int16) PageGetMaxOffsetNumber(page);
|
||||
nused = 0;
|
||||
for (i=0; i<nline; i++) {
|
||||
lp = ((PageHeader)page)->pd_linp + i;
|
||||
if ((*lp).lp_flags & LP_USED)
|
||||
nused++;
|
||||
}
|
||||
|
||||
if (nused == 0) {
|
||||
for (i=0; i<nline; i++) {
|
||||
lp = ((PageHeader)page)->pd_linp + i;
|
||||
if ((*lp).lp_len > 0) /* unused, but allocated */
|
||||
(*lp).lp_len = 0; /* indicate unused & deallocated */
|
||||
}
|
||||
|
||||
((PageHeader)page)->pd_upper = ((PageHeader)page)->pd_special;
|
||||
} else { /* nused != 0 */
|
||||
itemidbase = (struct itemIdSortData *)
|
||||
palloc(sizeof(struct itemIdSortData) * nused);
|
||||
memset((char *) itemidbase, 0, sizeof(struct itemIdSortData) * nused);
|
||||
itemidptr = itemidbase;
|
||||
for (i=0; i<nline; i++) {
|
||||
lp = ((PageHeader)page)->pd_linp + i;
|
||||
if ((*lp).lp_flags & LP_USED) {
|
||||
itemidptr->offsetindex = i;
|
||||
itemidptr->itemiddata = *lp;
|
||||
itemidptr++;
|
||||
} else {
|
||||
if ((*lp).lp_len > 0) /* unused, but allocated */
|
||||
(*lp).lp_len = 0; /* indicate unused & deallocated */
|
||||
}
|
||||
}
|
||||
|
||||
/* sort itemIdSortData array...*/
|
||||
pg_qsort((char *) itemidbase, nused, sizeof(struct itemIdSortData),
|
||||
(void*) itemidcompare);
|
||||
|
||||
/* compactify page */
|
||||
((PageHeader)page)->pd_upper = ((PageHeader)page)->pd_special;
|
||||
|
||||
for (i=0, itemidptr = itemidbase; i<nused; i++, itemidptr++) {
|
||||
lp = ((PageHeader)page)->pd_linp + itemidptr->offsetindex;
|
||||
alignedSize = DOUBLEALIGN((*lp).lp_len);
|
||||
upper = ((PageHeader)page)->pd_upper - alignedSize;
|
||||
memmove((char *) page + upper,
|
||||
(char *)page + (*lp).lp_off,
|
||||
(*lp).lp_len);
|
||||
(*lp).lp_off = upper;
|
||||
((PageHeader)page)->pd_upper = upper;
|
||||
}
|
||||
|
||||
pfree(itemidbase);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PageGetFreeSpace --
|
||||
* Returns the size of the free (allocatable) space on a page.
|
||||
*/
|
||||
Size
|
||||
PageGetFreeSpace(Page page)
|
||||
{
|
||||
Size space;
|
||||
|
||||
|
||||
space = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
|
||||
|
||||
if (space < sizeof (ItemIdData)) {
|
||||
return (0);
|
||||
}
|
||||
space -= sizeof (ItemIdData); /* XXX not always true */
|
||||
|
||||
return (space);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageManagerModeSet --
|
||||
*
|
||||
* Sets mode to either: ShufflePageManagerMode (the default) or
|
||||
* OverwritePageManagerMode. For use by access methods code
|
||||
* for determining semantics of PageAddItem when the offsetNumber
|
||||
* argument is passed in.
|
||||
*/
|
||||
void
|
||||
PageManagerModeSet(PageManagerMode mode)
|
||||
{
|
||||
if (mode == ShufflePageManagerMode)
|
||||
PageManagerShuffle = true;
|
||||
else if (mode == OverwritePageManagerMode)
|
||||
PageManagerShuffle = false;
|
||||
}
|
||||
|
||||
/*
|
||||
*----------------------------------------------------------------
|
||||
* PageIndexTupleDelete
|
||||
*----------------------------------------------------------------
|
||||
*
|
||||
* This routine does the work of removing a tuple from an index page.
|
||||
*/
|
||||
void
|
||||
PageIndexTupleDelete(Page page, OffsetNumber offnum)
|
||||
{
|
||||
PageHeader phdr;
|
||||
char *addr;
|
||||
ItemId tup;
|
||||
Size size;
|
||||
char *locn;
|
||||
int nbytes;
|
||||
int offidx;
|
||||
|
||||
phdr = (PageHeader) page;
|
||||
|
||||
/* change offset number to offset index */
|
||||
offidx = offnum - 1;
|
||||
|
||||
tup = PageGetItemId(page, offnum);
|
||||
size = ItemIdGetLength(tup);
|
||||
size = DOUBLEALIGN(size);
|
||||
|
||||
/* location of deleted tuple data */
|
||||
locn = (char *) (page + ItemIdGetOffset(tup));
|
||||
|
||||
/*
|
||||
* First, we want to get rid of the pd_linp entry for the index
|
||||
* tuple. We copy all subsequent linp's back one slot in the
|
||||
* array.
|
||||
*/
|
||||
|
||||
nbytes = phdr->pd_lower -
|
||||
((char *)&phdr->pd_linp[offidx + 1] - (char *) phdr);
|
||||
memmove((char *) &(phdr->pd_linp[offidx]),
|
||||
(char *) &(phdr->pd_linp[offidx + 1]),
|
||||
nbytes);
|
||||
|
||||
/*
|
||||
* Now move everything between the old upper bound (beginning of tuple
|
||||
* space) and the beginning of the deleted tuple forward, so that
|
||||
* space in the middle of the page is left free. If we've just deleted
|
||||
* the tuple at the beginning of tuple space, then there's no need
|
||||
* to do the copy (and bcopy on some architectures SEGV's if asked
|
||||
* to move zero bytes).
|
||||
*/
|
||||
|
||||
/* beginning of tuple space */
|
||||
addr = (char *) (page + phdr->pd_upper);
|
||||
|
||||
if (locn != addr)
|
||||
memmove(addr + size, addr, (int) (locn - addr));
|
||||
|
||||
/* adjust free space boundary pointers */
|
||||
phdr->pd_upper += size;
|
||||
phdr->pd_lower -= sizeof (ItemIdData);
|
||||
|
||||
/* finally, we need to adjust the linp entries that remain */
|
||||
if (!PageIsEmpty(page))
|
||||
PageIndexTupleDeleteAdjustLinePointers(phdr, locn, size);
|
||||
}
|
||||
|
||||
/*
|
||||
*----------------------------------------------------------------
|
||||
* PageIndexTupleDeleteAdjustLinePointers
|
||||
*----------------------------------------------------------------
|
||||
*
|
||||
* Once the line pointers and tuple data have been shifted around
|
||||
* on the page, we need to go down the line pointer vector and
|
||||
* adjust pointers to reflect new locations. Anything that used
|
||||
* to be before the deleted tuple's data was moved forward by the
|
||||
* size of the deleted tuple.
|
||||
*
|
||||
* This routine does the work of adjusting the line pointers.
|
||||
* Location is where the tuple data used to lie; size is how
|
||||
* much space it occupied. We assume that size has been aligned
|
||||
* as required by the time we get here.
|
||||
*
|
||||
* This routine should never be called on an empty page.
|
||||
*/
|
||||
void
|
||||
PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr,
|
||||
char *location,
|
||||
Size size)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* location is an index into the page... */
|
||||
location -= (int) phdr;
|
||||
|
||||
for (i = PageGetMaxOffsetNumber((Page) phdr) - 1; i >= 0; i--) {
|
||||
if (phdr->pd_linp[i].lp_off <= (unsigned) location) {
|
||||
phdr->pd_linp[i].lp_off += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
40
src/backend/storage/page/itemptr.c
Normal file
40
src/backend/storage/page/itemptr.c
Normal file
@@ -0,0 +1,40 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* itemptr.c--
|
||||
* POSTGRES disk item pointer code.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/page/itemptr.c,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "c.h"
|
||||
|
||||
#include "storage/block.h"
|
||||
#include "storage/off.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
/*
|
||||
* ItemPointerEquals --
|
||||
* Returns true if both item pointers point to the same item,
|
||||
* otherwise returns false.
|
||||
*
|
||||
* Note:
|
||||
* Assumes that the disk item pointers are not NULL.
|
||||
*/
|
||||
bool
|
||||
ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
|
||||
{
|
||||
if (ItemPointerGetBlockNumber(pointer1) ==
|
||||
ItemPointerGetBlockNumber(pointer2) &&
|
||||
ItemPointerGetOffsetNumber(pointer1) ==
|
||||
ItemPointerGetOffsetNumber(pointer2))
|
||||
return(true);
|
||||
else
|
||||
return(false);
|
||||
}
|
||||
|
||||
33
src/backend/storage/pagenum.h
Normal file
33
src/backend/storage/pagenum.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pagenum.h--
|
||||
* POSTGRES page number definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pagenum.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PAGENUM_H
|
||||
#define PAGENUM_H
|
||||
|
||||
#include "c.h"
|
||||
#include "storage/page.h"
|
||||
|
||||
typedef uint16 PageNumber;
|
||||
|
||||
typedef uint32 LogicalPageNumber;
|
||||
|
||||
#define InvalidLogicalPageNumber 0
|
||||
|
||||
/*
|
||||
* LogicalPageNumberIsValid --
|
||||
* True iff the logical page number is valid.
|
||||
*/
|
||||
#define LogicalPageNumberIsValid(pageNumber) \
|
||||
((bool)((pageNumber) != InvalidLogicalPageNumber))
|
||||
|
||||
|
||||
#endif /* PAGENUM_H */
|
||||
64
src/backend/storage/pos.h
Normal file
64
src/backend/storage/pos.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pos.h--
|
||||
* POSTGRES "position" definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pos.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef POS_H
|
||||
#define POS_H
|
||||
|
||||
#include "c.h"
|
||||
|
||||
/*
|
||||
* a 'position' used to be <pagenumber, offset> in postgres. this has
|
||||
* been changed to just <offset> as the notion of having multiple pages
|
||||
* within a block has been removed.
|
||||
*
|
||||
* the 'offset' abstraction is somewhat confusing. it is NOT a byte
|
||||
* offset within the page; instead, it is an offset into the line
|
||||
* pointer array contained on every page that store (heap or index)
|
||||
* tuples.
|
||||
*/
|
||||
typedef bits16 PositionIdData;
|
||||
typedef PositionIdData *PositionId;
|
||||
|
||||
/* ----------------
|
||||
* support macros
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* PositionIdIsValid --
|
||||
* True iff the position identifier is valid.
|
||||
*/
|
||||
#define PositionIdIsValid(positionId) \
|
||||
PointerIsValid(positionId)
|
||||
|
||||
/*
|
||||
* PositionIdSetInvalid --
|
||||
* Make an invalid position.
|
||||
*/
|
||||
#define PositionIdSetInvalid(positionId) \
|
||||
*(positionId) = (bits16) 0
|
||||
|
||||
/*
|
||||
* PositionIdSet --
|
||||
* Sets a position identifier to the specified value.
|
||||
*/
|
||||
#define PositionIdSet(positionId, offsetNumber) \
|
||||
*(positionId) = (offsetNumber)
|
||||
|
||||
/*
|
||||
* PositionIdGetOffsetNumber --
|
||||
* Retrieve the offset number from a position identifier.
|
||||
*/
|
||||
#define PositionIdGetOffsetNumber(positionId) \
|
||||
((OffsetNumber) *(positionId))
|
||||
|
||||
#endif /* POS_H */
|
||||
127
src/backend/storage/proc.h
Normal file
127
src/backend/storage/proc.h
Normal file
@@ -0,0 +1,127 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* proc.h--
|
||||
*
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: proc.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef _PROC_H_
|
||||
#define _PROC_H_
|
||||
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/lock.h"
|
||||
#ifndef WIN32
|
||||
#include <sys/sem.h>
|
||||
#else
|
||||
/* This is because WIN32 already defines PROC */
|
||||
#define PROC PGL_PROC
|
||||
#endif /* WIN32 */
|
||||
#include "storage/shmem.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
int sleeplock;
|
||||
int semNum;
|
||||
IpcSemaphoreId semId;
|
||||
IpcSemaphoreKey semKey;
|
||||
} SEMA;
|
||||
|
||||
/*
|
||||
* Each backend has:
|
||||
*/
|
||||
typedef struct proc {
|
||||
|
||||
/* proc->links MUST BE THE FIRST ELEMENT OF STRUCT (see ProcWakeup()) */
|
||||
|
||||
SHM_QUEUE links; /* proc can be waiting for one event(lock) */
|
||||
SEMA sem; /* ONE semaphore to sleep on */
|
||||
int errType; /* error code tells why we woke up */
|
||||
|
||||
int procId; /* unique number for this structure
|
||||
* NOT unique per backend, these things
|
||||
* are reused after the backend dies.
|
||||
*/
|
||||
|
||||
int critSects; /* If critSects > 0, we are in sensitive
|
||||
* routines that cannot be recovered when
|
||||
* the process fails.
|
||||
*/
|
||||
|
||||
int prio; /* priority for sleep queue */
|
||||
|
||||
TransactionId xid; /* transaction currently being executed
|
||||
* by this proc
|
||||
*/
|
||||
|
||||
LOCK * waitLock; /* Lock we're sleeping on */
|
||||
int token; /* info for proc wakeup routines */
|
||||
int pid; /* This procs process id */
|
||||
short sLocks[MAX_SPINS]; /* Spin lock stats */
|
||||
SHM_QUEUE lockQueue; /* locks associated with current transaction */
|
||||
} PROC;
|
||||
|
||||
|
||||
/*
|
||||
* MAX_PROC_SEMS is the maximum number of per-process semaphores (those used
|
||||
* by the lock mgr) we can keep track of. PROC_NSEMS_PER_SET is the number
|
||||
* of semaphores in each (sys-V) semaphore set allocated. (Be careful not
|
||||
* to set it to greater 32. Otherwise, the bitmap will overflow.)
|
||||
*/
|
||||
#define MAX_PROC_SEMS 128
|
||||
#define PROC_NSEMS_PER_SET 16
|
||||
|
||||
typedef struct procglobal {
|
||||
SHMEM_OFFSET freeProcs;
|
||||
int numProcs;
|
||||
IPCKey currKey;
|
||||
int32 freeSemMap[MAX_PROC_SEMS/PROC_NSEMS_PER_SET];
|
||||
} PROC_HDR;
|
||||
|
||||
extern PROC *MyProc;
|
||||
|
||||
#define PROC_INCR_SLOCK(lock) if (MyProc) (MyProc->sLocks[(lock)])++
|
||||
#define PROC_DECR_SLOCK(lock) if (MyProc) (MyProc->sLocks[(lock)])--
|
||||
|
||||
/*
|
||||
* flags explaining why process woke up
|
||||
*/
|
||||
#define NO_ERROR 0
|
||||
#define ERR_TIMEOUT 1
|
||||
#define ERR_BUFFER_IO 2
|
||||
|
||||
#define MAX_PRIO 50
|
||||
#define MIN_PRIO (-1)
|
||||
|
||||
extern SPINLOCK ProcStructLock;
|
||||
|
||||
/*
|
||||
* Function Prototypes
|
||||
*/
|
||||
extern void InitProcess(IPCKey key);
|
||||
extern void ProcReleaseLocks(void);
|
||||
extern bool ProcRemove(int pid);
|
||||
/* extern bool ProcKill(int exitStatus, int pid); */
|
||||
/* make static in storage/lmgr/proc.c -- jolly */
|
||||
|
||||
extern PROC_QUEUE *ProcQueueAlloc(char *name);
|
||||
extern void ProcQueueInit(PROC_QUEUE *queue);
|
||||
extern int ProcSleep(PROC_QUEUE *queue, SPINLOCK spinlock, int token,
|
||||
int prio, LOCK *lock);
|
||||
extern PROC *ProcWakeup(PROC *proc, int errType);
|
||||
extern int ProcGetId(void);
|
||||
extern int ProcLockWakeup(PROC_QUEUE *queue, char * ltable, char * lock);
|
||||
extern void ProcAddLock(SHM_QUEUE *elem);
|
||||
#if defined(PORTNAME_linux)
|
||||
extern int HandleDeadLock(int);
|
||||
#else
|
||||
extern int HandleDeadLock(void);
|
||||
#endif
|
||||
extern void ProcReleaseSpins(PROC *proc);
|
||||
extern void ProcFreeAllSemaphores(void);
|
||||
|
||||
#endif /* PROC_H */
|
||||
104
src/backend/storage/shmem.h
Normal file
104
src/backend/storage/shmem.h
Normal file
@@ -0,0 +1,104 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* shmem.h--
|
||||
* shared memory management structures
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: shmem.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SHMEM_H
|
||||
#define SHMEM_H
|
||||
|
||||
#include "storage/spin.h" /* for SPINLOCK */
|
||||
#include "utils/hsearch.h" /* for HTAB */
|
||||
|
||||
/* The shared memory region can start at a different address
|
||||
* in every process. Shared memory "pointers" are actually
|
||||
* offsets relative to the start of the shared memory region(s).
|
||||
*/
|
||||
typedef unsigned long SHMEM_OFFSET;
|
||||
#define INVALID_OFFSET (-1)
|
||||
#define BAD_LOCATION (-1)
|
||||
|
||||
/* start of the lowest shared memory region. For now, assume that
|
||||
* there is only one shared memory region
|
||||
*/
|
||||
extern SHMEM_OFFSET ShmemBase;
|
||||
|
||||
|
||||
/* coerce an offset into a pointer in this process's address space */
|
||||
#define MAKE_PTR(xx_offs)\
|
||||
(ShmemBase+((unsigned long)(xx_offs)))
|
||||
|
||||
/* coerce a pointer into a shmem offset */
|
||||
#define MAKE_OFFSET(xx_ptr)\
|
||||
(SHMEM_OFFSET) (((unsigned long)(xx_ptr))-ShmemBase)
|
||||
|
||||
#define SHM_PTR_VALID(xx_ptr)\
|
||||
(((unsigned long)xx_ptr) > ShmemBase)
|
||||
|
||||
/* cannot have an offset to ShmemFreeStart (offset 0) */
|
||||
#define SHM_OFFSET_VALID(xx_offs)\
|
||||
((xx_offs != 0) && (xx_offs != INVALID_OFFSET))
|
||||
|
||||
|
||||
extern SPINLOCK ShmemLock;
|
||||
extern SPINLOCK BindingLock;
|
||||
|
||||
/* shmemqueue.c */
|
||||
typedef struct SHM_QUEUE {
|
||||
SHMEM_OFFSET prev;
|
||||
SHMEM_OFFSET next;
|
||||
} SHM_QUEUE;
|
||||
|
||||
/* shmem.c */
|
||||
extern void ShmemBindingTabReset();
|
||||
extern void ShmemCreate(unsigned int key, unsigned int size);
|
||||
extern int InitShmem(unsigned int key, unsigned int size);
|
||||
extern long *ShmemAlloc(unsigned long size);
|
||||
extern int ShmemIsValid(unsigned long addr);
|
||||
extern HTAB *ShmemInitHash(char *name, long init_size, long max_size,
|
||||
HASHCTL *infoP, int hash_flags);
|
||||
extern bool ShmemPIDLookup(int pid, SHMEM_OFFSET* locationPtr);
|
||||
extern SHMEM_OFFSET ShmemPIDDestroy(int pid);
|
||||
extern long *ShmemInitStruct(char *name, unsigned long size,
|
||||
bool *foundPtr);
|
||||
|
||||
|
||||
typedef int TableID;
|
||||
|
||||
/* size constants for the binding table */
|
||||
/* max size of data structure string name */
|
||||
#define BTABLE_KEYSIZE (50)
|
||||
/* data in binding table hash bucket */
|
||||
#define BTABLE_DATASIZE (sizeof(BindingEnt) - BTABLE_KEYSIZE)
|
||||
/* maximum size of the binding table */
|
||||
#define BTABLE_SIZE (100)
|
||||
|
||||
/* this is a hash bucket in the binding table */
|
||||
typedef struct {
|
||||
char key[BTABLE_KEYSIZE]; /* string name */
|
||||
unsigned long location; /* location in shared mem */
|
||||
unsigned long size; /* numbytes allocated for the
|
||||
* structure
|
||||
*/
|
||||
} BindingEnt;
|
||||
|
||||
/*
|
||||
* prototypes for functions in shmqueue.c
|
||||
*/
|
||||
extern void SHMQueueInit(SHM_QUEUE *queue);
|
||||
extern bool SHMQueueIsDetached(SHM_QUEUE *queue);
|
||||
extern void SHMQueueElemInit(SHM_QUEUE *queue);
|
||||
extern void SHMQueueDelete(SHM_QUEUE *queue);
|
||||
extern void SHMQueueInsertHD(SHM_QUEUE *queue, SHM_QUEUE *elem);
|
||||
extern void SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem);
|
||||
extern void SHMQueueFirst(SHM_QUEUE *queue, Pointer *nextPtrPtr,
|
||||
SHM_QUEUE *nextQueue);
|
||||
extern bool SHMQueueEmpty(SHM_QUEUE *queue);
|
||||
|
||||
#endif /* SHMEM_H */
|
||||
33
src/backend/storage/sinval.h
Normal file
33
src/backend/storage/sinval.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* sinval.h--
|
||||
* POSTGRES shared cache invalidation communication definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: sinval.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SINVAL_H
|
||||
#define SINVAL_H
|
||||
|
||||
#include "c.h"
|
||||
#include "storage/spin.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/backendid.h"
|
||||
|
||||
extern SPINLOCK SInvalLock;
|
||||
|
||||
extern void CreateSharedInvalidationState(IPCKey key);
|
||||
extern void AttachSharedInvalidationState(IPCKey key);
|
||||
extern void InitSharedInvalidationState();
|
||||
extern void RegisterSharedInvalid(int cacheId, Index hashIndex,
|
||||
ItemPointer pointer);
|
||||
extern void InvalidateSharedInvalid(void (*invalFunction)(),
|
||||
void (*resetFunction)());
|
||||
|
||||
|
||||
#endif /* SINVAL_H */
|
||||
126
src/backend/storage/sinvaladt.h
Normal file
126
src/backend/storage/sinvaladt.h
Normal file
@@ -0,0 +1,126 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* sinvaladt.h--
|
||||
* POSTGRES shared cache invalidation segment definitions.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: sinvaladt.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SINVALADT_H
|
||||
#define SINVALADT_H
|
||||
|
||||
#include "postgres.h" /* XXX */
|
||||
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/itemptr.h"
|
||||
#include "storage/sinval.h"
|
||||
|
||||
/*
|
||||
* The structure of the shared cache invaidation segment
|
||||
*
|
||||
*/
|
||||
/*
|
||||
A------------- Header info --------------
|
||||
criticalSectionSemaphoreId
|
||||
generalSemaphoreId
|
||||
startEntrySection (offset a)
|
||||
endEntrySection (offset a + b)
|
||||
startFreeSpace (offset relative to B)
|
||||
startEntryChain (offset relatiev to B)
|
||||
endEntryChain (offset relative to B)
|
||||
numEntries
|
||||
maxNumEntries
|
||||
procState[MaxBackendId] --> limit
|
||||
resetState (bool)
|
||||
a tag (POSTID)
|
||||
B------------- Start entry section -------
|
||||
SISegEntry --> entryData --> ... (see SharedInvalidData!)
|
||||
isfree (bool)
|
||||
next (offset to next entry in chain )
|
||||
b .... (dynamically growing down)
|
||||
C----------------End shared segment -------
|
||||
|
||||
*/
|
||||
|
||||
/* Parameters (configurable) *******************************************/
|
||||
#define MaxBackendId 32 /* maximum number of backends */
|
||||
#define MAXNUMMESSAGES 1000 /* maximum number of messages in seg*/
|
||||
|
||||
|
||||
#define InvalidOffset 1000000000 /* a invalid offset (End of chain) */
|
||||
|
||||
typedef struct ProcState {
|
||||
int limit; /* the number of read messages */
|
||||
bool resetState; /* true, if backend has to reset its state */
|
||||
int tag; /* special tag, recieved from the postmaster */
|
||||
} ProcState;
|
||||
|
||||
|
||||
typedef struct SISeg {
|
||||
IpcSemaphoreId criticalSectionSemaphoreId; /* semaphore id */
|
||||
IpcSemaphoreId generalSemaphoreId; /* semaphore id */
|
||||
Offset startEntrySection; /* (offset a) */
|
||||
Offset endEntrySection; /* (offset a + b) */
|
||||
Offset startFreeSpace; /* (offset relative to B) */
|
||||
Offset startEntryChain; /* (offset relative to B) */
|
||||
Offset endEntryChain; /* (offset relative to B) */
|
||||
int numEntries;
|
||||
int maxNumEntries;
|
||||
ProcState procState[MaxBackendId]; /* reflects the invalidation state */
|
||||
/* here starts the entry section, controlled by offsets */
|
||||
} SISeg;
|
||||
#define SizeSISeg sizeof(SISeg)
|
||||
|
||||
typedef struct SharedInvalidData {
|
||||
int cacheId; /* XXX */
|
||||
Index hashIndex;
|
||||
ItemPointerData pointerData;
|
||||
} SharedInvalidData;
|
||||
|
||||
typedef SharedInvalidData *SharedInvalid;
|
||||
|
||||
|
||||
typedef struct SISegEntry {
|
||||
SharedInvalidData entryData; /* the message data */
|
||||
bool isfree; /* entry free? */
|
||||
Offset next; /* offset to next entry*/
|
||||
} SISegEntry;
|
||||
|
||||
#define SizeOfOneSISegEntry sizeof(SISegEntry)
|
||||
|
||||
typedef struct SISegOffsets {
|
||||
Offset startSegment; /* always 0 (for now) */
|
||||
Offset offsetToFirstEntry; /* A + a = B */
|
||||
Offset offsetToEndOfSegemnt; /* A + a + b */
|
||||
} SISegOffsets;
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* synchronization of the shared buffer access */
|
||||
/* access to the buffer is synchronized by the lock manager !! */
|
||||
/****************************************************************************/
|
||||
|
||||
#define SI_LockStartValue 255
|
||||
#define SI_SharedLock (-1)
|
||||
#define SI_ExclusiveLock (-255)
|
||||
|
||||
extern SISeg *shmInvalBuffer;
|
||||
|
||||
/*
|
||||
* prototypes for functions in sinvaladt.c
|
||||
*/
|
||||
extern int SIBackendInit(SISeg *segInOutP);
|
||||
extern int SISegmentInit(bool killExistingSegment, IPCKey key);
|
||||
|
||||
extern bool SISetDataEntry(SISeg *segP, SharedInvalidData *data);
|
||||
extern void SISetProcStateInvalid(SISeg *segP);
|
||||
extern bool SIDelDataEntry(SISeg *segP);
|
||||
extern void SIReadEntryData(SISeg *segP, int backendId,
|
||||
void (*invalFunction)(), void (*resetFunction)());
|
||||
extern void SIDelExpiredDataEntries(SISeg *segP);
|
||||
|
||||
#endif /* SINVALADT_H */
|
||||
84
src/backend/storage/smgr.h
Normal file
84
src/backend/storage/smgr.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* smgr.h--
|
||||
* storage manager switch public interface declarations.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: smgr.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SMGR_H
|
||||
#define SMGR_H
|
||||
|
||||
#include "utils/rel.h"
|
||||
#include "storage/spin.h" /* for SPINLOCK */
|
||||
|
||||
#define SM_FAIL 0
|
||||
#define SM_SUCCESS 1
|
||||
|
||||
#define DEFAULT_SMGR 0
|
||||
|
||||
extern int smgrinit(void);
|
||||
extern void smgrshutdown(int dummy);
|
||||
extern int smgrcreate(int16 which, Relation reln);
|
||||
extern int smgrunlink(int16 which, Relation reln);
|
||||
extern int smgrextend(int16 which, Relation reln, char *buffer);
|
||||
extern int smgropen(int16 which, Relation reln);
|
||||
extern int smgrclose(int16 which, Relation reln);
|
||||
extern int smgrread(int16 which, Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern int smgrflush(int16 which, Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern int smgrblindwrt(int16 which, char *dbname, char *relname, Oid dbid,
|
||||
Oid relid, BlockNumber blkno, char *buffer);
|
||||
extern int smgrnblocks(int16 which, Relation reln);
|
||||
extern int smgrcommit(void);
|
||||
extern int smgrabort(void);
|
||||
extern bool smgriswo(int16 smgrno);
|
||||
|
||||
|
||||
|
||||
/* internals: move me elsewhere -- ay 7/94 */
|
||||
|
||||
/* in md.c */
|
||||
extern int mdinit(void);
|
||||
extern int mdcreate(Relation reln);
|
||||
extern int mdunlink(Relation reln);
|
||||
extern int mdextend(Relation reln, char *buffer);
|
||||
extern int mdopen(Relation reln);
|
||||
extern int mdclose(Relation reln);
|
||||
extern int mdread(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mdblindwrt(char *dbstr, char *relstr, Oid dbid, Oid relid,
|
||||
BlockNumber blkno, char *buffer);
|
||||
extern int mdnblocks(Relation reln);
|
||||
extern int mdcommit(void);
|
||||
extern int mdabort(void);
|
||||
|
||||
/* mm.c */
|
||||
extern SPINLOCK MMCacheLock;
|
||||
|
||||
extern int mminit(void);
|
||||
extern int mmshutdown(void);
|
||||
extern int mmcreate(Relation reln);
|
||||
extern int mmunlink(Relation reln);
|
||||
extern int mmextend(Relation reln, char *buffer);
|
||||
extern int mmopen(Relation reln);
|
||||
extern int mmclose(Relation reln);
|
||||
extern int mmread(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mmflush(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mmblindwrt(char *dbstr, char *relstr, Oid dbid, Oid relid,
|
||||
BlockNumber blkno, char *buffer);
|
||||
extern int mmnblocks(Relation reln);
|
||||
extern int mmcommit(void);
|
||||
extern int mmabort(void);
|
||||
extern int MMShmemSize(void);
|
||||
|
||||
#endif /* SMGR_H */
|
||||
14
src/backend/storage/smgr/Makefile.inc
Normal file
14
src/backend/storage/smgr/Makefile.inc
Normal file
@@ -0,0 +1,14 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile.inc--
|
||||
# Makefile for storage/smgr
|
||||
#
|
||||
# Copyright (c) 1994, Regents of the University of California
|
||||
#
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
SUBSRCS+= md.c mm.c smgr.c smgrtype.c
|
||||
40
src/backend/storage/smgr/README
Normal file
40
src/backend/storage/smgr/README
Normal file
@@ -0,0 +1,40 @@
|
||||
# $Header: /cvsroot/pgsql/src/backend/storage/smgr/README,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
|
||||
This directory contains the code that supports the Postgres storage manager
|
||||
switch and all of the installed storage managers. In released systems,
|
||||
the only supported storage manager is the magnetic disk manager. At UC
|
||||
Berkeley, the Sony WORM optical disk jukebox and persistent main memory are
|
||||
also supported.
|
||||
|
||||
As of Postgres Release 3.0, every relation in the system is tagged with the
|
||||
storage manager on which it resides. The storage manager switch code turns
|
||||
what used to by filesystem operations into operations on the correct store,
|
||||
for any given relation.
|
||||
|
||||
The files in this directory, and their contents, are
|
||||
|
||||
smgrtype.c Storage manager type -- maps string names to storage manager
|
||||
IDs and provides simple comparison operators. This is the
|
||||
regproc support for type 'smgr' in the system catalogs.
|
||||
|
||||
smgr.c The storage manager switch dispatch code. The routines in
|
||||
this file call the appropriate storage manager to do hardware
|
||||
accesses requested by the backend.
|
||||
|
||||
md.c The magnetic disk storage manager.
|
||||
|
||||
mm.c The persistent main memory storage manager (#undef'ed in
|
||||
tmp/c.h for all distributed systems).
|
||||
|
||||
sj.c The sony jukebox storage manager and cache management code
|
||||
(#undef'ed in tmp/c.h for all distributed systems). The
|
||||
routines in this file allocate extents, maintain block
|
||||
maps, and guarantee the persistence and coherency of a cache
|
||||
of jukebox blocks on magnetic disk.
|
||||
|
||||
pgjb.c The postgres jukebox interface routines. The routines here
|
||||
handle exclusion on the physical device and translate requests
|
||||
from the storage manager code (sj.c) into jbaccess calls.
|
||||
|
||||
jbaccess.c Access code for the physical Sony jukebox device. This code
|
||||
was swiped from Andy McFadden's jblib.a code at UC Berkeley.
|
||||
697
src/backend/storage/smgr/md.c
Normal file
697
src/backend/storage/smgr/md.c
Normal file
@@ -0,0 +1,697 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* md.c--
|
||||
* This code manages relations that reside on magnetic disk.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <stdio.h> /* for sprintf() */
|
||||
#include <sys/file.h>
|
||||
|
||||
#include "postgres.h"
|
||||
#include "miscadmin.h" /* for DataDir */
|
||||
|
||||
#include "machine.h"
|
||||
#include "storage/smgr.h" /* where the declarations go */
|
||||
#include "storage/block.h"
|
||||
#include "storage/fd.h"
|
||||
#include "utils/mcxt.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "catalog/catalog.h"
|
||||
|
||||
#undef DIAGNOSTIC
|
||||
|
||||
/*
|
||||
* The magnetic disk storage manager keeps track of open file descriptors
|
||||
* in its own descriptor pool. This happens for two reasons. First, at
|
||||
* transaction boundaries, we walk the list of descriptors and flush
|
||||
* anything that we've dirtied in the current transaction. Second, we
|
||||
* have to support relations of > 4GBytes. In order to do this, we break
|
||||
* relations up into chunks of < 2GBytes and store one chunk in each of
|
||||
* several files that represent the relation.
|
||||
*/
|
||||
|
||||
typedef struct _MdfdVec {
|
||||
int mdfd_vfd; /* fd number in vfd pool */
|
||||
uint16 mdfd_flags; /* clean, dirty */
|
||||
int mdfd_lstbcnt; /* most recent block count */
|
||||
struct _MdfdVec *mdfd_chain; /* for large relations */
|
||||
} MdfdVec;
|
||||
|
||||
static int Nfds = 100;
|
||||
static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
|
||||
static int CurFd = 0;
|
||||
static MemoryContext MdCxt;
|
||||
|
||||
#define MDFD_DIRTY (uint16) 0x01
|
||||
|
||||
#define RELSEG_SIZE 262144 /* (2 ** 31) / 8192 -- 2GB file */
|
||||
|
||||
/* routines declared here */
|
||||
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
|
||||
static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag);
|
||||
static int _fdvec_ext(void);
|
||||
static BlockNumber _mdnblocks(File file, Size blcksz);
|
||||
|
||||
/*
|
||||
* mdinit() -- Initialize private state for magnetic disk storage manager.
|
||||
*
|
||||
* We keep a private table of all file descriptors. Whenever we do
|
||||
* a write to one, we mark it dirty in our table. Whenever we force
|
||||
* changes to disk, we mark the file descriptor clean. At transaction
|
||||
* commit, we force changes to disk for all dirty file descriptors.
|
||||
* This routine allocates and initializes the table.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
||||
*/
|
||||
int
|
||||
mdinit()
|
||||
{
|
||||
MemoryContext oldcxt;
|
||||
|
||||
MdCxt = (MemoryContext) CreateGlobalMemory("MdSmgr");
|
||||
if (MdCxt == (MemoryContext) NULL)
|
||||
return (SM_FAIL);
|
||||
|
||||
oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
Md_fdvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
|
||||
(void) MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
if (Md_fdvec == (MdfdVec *) NULL)
|
||||
return (SM_FAIL);
|
||||
|
||||
memset(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
mdcreate(Relation reln)
|
||||
{
|
||||
int fd, vfd;
|
||||
int tmp;
|
||||
char *path;
|
||||
extern bool IsBootstrapProcessingMode();
|
||||
|
||||
path = relpath(&(reln->rd_rel->relname.data[0]));
|
||||
fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
||||
|
||||
/*
|
||||
* If the file already exists and is empty, we pretend that the
|
||||
* create succeeded. During bootstrap processing, we skip that check,
|
||||
* because pg_time, pg_variable, and pg_log get created before their
|
||||
* .bki file entries are processed.
|
||||
*/
|
||||
|
||||
if (fd < 0) {
|
||||
if ((fd = FileNameOpenFile(path, O_RDWR, 0600)) >= 0) {
|
||||
if (!IsBootstrapProcessingMode() &&
|
||||
FileRead(fd, (char *) &tmp, sizeof(tmp)) != 0) {
|
||||
FileClose(fd);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (CurFd >= Nfds) {
|
||||
if (_fdvec_ext() == SM_FAIL)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
Md_fdvec[CurFd].mdfd_vfd = fd;
|
||||
Md_fdvec[CurFd].mdfd_flags = (uint16) 0;
|
||||
Md_fdvec[CurFd].mdfd_chain = (MdfdVec *) NULL;
|
||||
Md_fdvec[CurFd].mdfd_lstbcnt = 0;
|
||||
|
||||
vfd = CurFd++;
|
||||
|
||||
return (vfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdunlink() -- Unlink a relation.
|
||||
*/
|
||||
int
|
||||
mdunlink(Relation reln)
|
||||
{
|
||||
int fd;
|
||||
int i;
|
||||
MdfdVec *v, *ov;
|
||||
MemoryContext oldcxt;
|
||||
char fname[20]; /* XXX should have NAMESIZE defined */
|
||||
char tname[20];
|
||||
|
||||
/* On Windows NT you can't unlink a file if it is open so we have
|
||||
** to do this.
|
||||
*/
|
||||
#ifdef WIN32
|
||||
(void) mdclose(reln);
|
||||
#endif /* WIN32 */
|
||||
|
||||
|
||||
memset(fname,0,20);
|
||||
strncpy(fname, RelationGetRelationName(reln)->data, 16);
|
||||
|
||||
if (FileNameUnlink(fname) < 0)
|
||||
return (SM_FAIL);
|
||||
|
||||
/* unlink all the overflow files for large relations */
|
||||
for (i = 1; ; i++) {
|
||||
#ifdef WIN32
|
||||
(void) mdclose(reln);
|
||||
#endif /* WIN32 */
|
||||
sprintf(tname, "%s.%d", fname, i);
|
||||
if (FileNameUnlink(tname) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/* finally, clean out the mdfd vector */
|
||||
fd = RelationGetFile(reln);
|
||||
Md_fdvec[fd].mdfd_flags = (uint16) 0;
|
||||
|
||||
oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; ) {
|
||||
ov = v;
|
||||
v = v->mdfd_chain;
|
||||
if (ov != &Md_fdvec[fd])
|
||||
pfree(ov);
|
||||
}
|
||||
Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
|
||||
(void) MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdextend() -- Add a block to the specified relation.
|
||||
*
|
||||
* This routine returns SM_FAIL or SM_SUCCESS, with errno set as
|
||||
* appropriate.
|
||||
*/
|
||||
int
|
||||
mdextend(Relation reln, char *buffer)
|
||||
{
|
||||
long pos;
|
||||
int nblocks;
|
||||
MdfdVec *v;
|
||||
|
||||
nblocks = mdnblocks(reln);
|
||||
v = _mdfd_getseg(reln, nblocks, O_CREAT);
|
||||
|
||||
if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0)
|
||||
return (SM_FAIL);
|
||||
|
||||
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
|
||||
return (SM_FAIL);
|
||||
|
||||
/* remember that we did a write, so we can sync at xact commit */
|
||||
v->mdfd_flags |= MDFD_DIRTY;
|
||||
|
||||
/* try to keep the last block count current, though it's just a hint */
|
||||
if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0)
|
||||
v->mdfd_lstbcnt = RELSEG_SIZE;
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE
|
||||
|| v->mdfd_lstbcnt > RELSEG_SIZE)
|
||||
elog(FATAL, "segment too big!");
|
||||
#endif
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdopen() -- Open the specified relation.
|
||||
*/
|
||||
int
|
||||
mdopen(Relation reln)
|
||||
{
|
||||
char *path;
|
||||
int fd;
|
||||
int vfd;
|
||||
|
||||
if (CurFd >= Nfds) {
|
||||
if (_fdvec_ext() == SM_FAIL)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
path = relpath(&(reln->rd_rel->relname.data[0]));
|
||||
|
||||
fd = FileNameOpenFile(path, O_RDWR, 0600);
|
||||
|
||||
/* this should only happen during bootstrap processing */
|
||||
if (fd < 0)
|
||||
fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
||||
|
||||
Md_fdvec[CurFd].mdfd_vfd = fd;
|
||||
Md_fdvec[CurFd].mdfd_flags = (uint16) 0;
|
||||
Md_fdvec[CurFd].mdfd_chain = (MdfdVec *) NULL;
|
||||
Md_fdvec[CurFd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (Md_fdvec[CurFd].mdfd_lstbcnt > RELSEG_SIZE)
|
||||
elog(FATAL, "segment too big on relopen!");
|
||||
#endif
|
||||
|
||||
vfd = CurFd++;
|
||||
|
||||
return (vfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdclose() -- Close the specified relation.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
||||
*/
|
||||
int
|
||||
mdclose(Relation reln)
|
||||
{
|
||||
int fd;
|
||||
MdfdVec *v;
|
||||
|
||||
fd = RelationGetFile(reln);
|
||||
|
||||
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
|
||||
|
||||
/* may be closed already */
|
||||
if (v->mdfd_vfd < 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We sync the file descriptor so that we don't need to reopen it at
|
||||
* transaction commit to force changes to disk.
|
||||
*/
|
||||
|
||||
FileSync(v->mdfd_vfd);
|
||||
FileClose(v->mdfd_vfd);
|
||||
|
||||
/* mark this file descriptor as clean in our private table */
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
}
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdread() -- Read the specified block from a relation.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL.
|
||||
*/
|
||||
int
|
||||
mdread(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
int status;
|
||||
long seekpos;
|
||||
int nbytes;
|
||||
MdfdVec *v;
|
||||
|
||||
v = _mdfd_getseg(reln, blocknum, 0);
|
||||
|
||||
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
||||
elog(FATAL, "seekpos too big!");
|
||||
#endif
|
||||
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
status = SM_SUCCESS;
|
||||
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) {
|
||||
if (nbytes == 0) {
|
||||
memset(buffer, 0, BLCKSZ);
|
||||
} else {
|
||||
status = SM_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdwrite() -- Write the supplied block at the appropriate location.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL.
|
||||
*/
|
||||
int
|
||||
mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
int status;
|
||||
long seekpos;
|
||||
MdfdVec *v;
|
||||
|
||||
v = _mdfd_getseg(reln, blocknum, 0);
|
||||
|
||||
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
|
||||
#ifdef DIAGNOSTIC
|
||||
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
||||
elog(FATAL, "seekpos too big!");
|
||||
#endif
|
||||
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
status = SM_SUCCESS;
|
||||
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
|
||||
status = SM_FAIL;
|
||||
|
||||
v->mdfd_flags |= MDFD_DIRTY;
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdflush() -- Synchronously write a block to disk.
|
||||
*
|
||||
* This is exactly like mdwrite(), but doesn't return until the file
|
||||
* system buffer cache has been flushed.
|
||||
*/
|
||||
int
|
||||
mdflush(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
int status;
|
||||
long seekpos;
|
||||
MdfdVec *v;
|
||||
|
||||
v = _mdfd_getseg(reln, blocknum, 0);
|
||||
|
||||
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
|
||||
#ifdef DIAGNOSTIC
|
||||
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
||||
elog(FATAL, "seekpos too big!");
|
||||
#endif
|
||||
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
/* write and sync the block */
|
||||
status = SM_SUCCESS;
|
||||
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
|
||||
|| FileSync(v->mdfd_vfd) < 0)
|
||||
status = SM_FAIL;
|
||||
|
||||
/*
|
||||
* By here, the block is written and changes have been forced to stable
|
||||
* storage. Mark the descriptor as clean until the next write, so we
|
||||
* don't sync it again unnecessarily at transaction commit.
|
||||
*/
|
||||
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdblindwrt() -- Write a block to disk blind.
|
||||
*
|
||||
* We have to be able to do this using only the name and OID of
|
||||
* the database and relation in which the block belongs. This
|
||||
* is a synchronous write.
|
||||
*/
|
||||
int
|
||||
mdblindwrt(char *dbstr,
|
||||
char *relstr,
|
||||
Oid dbid,
|
||||
Oid relid,
|
||||
BlockNumber blkno,
|
||||
char *buffer)
|
||||
{
|
||||
int fd;
|
||||
int segno;
|
||||
long seekpos;
|
||||
int status;
|
||||
char *path;
|
||||
int nchars;
|
||||
|
||||
/* be sure we have enough space for the '.segno', if any */
|
||||
segno = blkno / RELSEG_SIZE;
|
||||
if (segno > 0)
|
||||
nchars = 10;
|
||||
else
|
||||
nchars = 0;
|
||||
|
||||
/* construct the path to the file and open it */
|
||||
if (dbid == (Oid) 0) {
|
||||
path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars);
|
||||
if (segno == 0)
|
||||
sprintf(path, "%s/%.*s", DataDir, NAMEDATALEN, relstr);
|
||||
else
|
||||
sprintf(path, "%s/%.*s.%d", DataDir, NAMEDATALEN, relstr, segno);
|
||||
} else {
|
||||
path = (char *) palloc(strlen(DataDir) + strlen("/base/") + 2 * sizeof(NameData) + 2 + nchars);
|
||||
if (segno == 0)
|
||||
sprintf(path, "%s/base/%.*s/%.*s", DataDir, NAMEDATALEN,
|
||||
dbstr, NAMEDATALEN, relstr);
|
||||
else
|
||||
sprintf(path, "%s/base/%.*s/%.*s.%d", DataDir, NAMEDATALEN, dbstr,
|
||||
NAMEDATALEN, relstr, segno);
|
||||
}
|
||||
|
||||
if ((fd = open(path, O_RDWR, 0600)) < 0)
|
||||
return (SM_FAIL);
|
||||
|
||||
/* seek to the right spot */
|
||||
seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE));
|
||||
if (lseek(fd, seekpos, SEEK_SET) != seekpos) {
|
||||
(void) close(fd);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
status = SM_SUCCESS;
|
||||
|
||||
/* write and sync the block */
|
||||
if (write(fd, buffer, BLCKSZ) != BLCKSZ || fsync(fd) < 0)
|
||||
status = SM_FAIL;
|
||||
|
||||
if (close(fd) < 0)
|
||||
status = SM_FAIL;
|
||||
|
||||
pfree(path);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdnblocks() -- Get the number of blocks stored in a relation.
|
||||
*
|
||||
* Returns # of blocks or -1 on error.
|
||||
*/
|
||||
int
|
||||
mdnblocks(Relation reln)
|
||||
{
|
||||
int fd;
|
||||
MdfdVec *v;
|
||||
int nblocks;
|
||||
int segno;
|
||||
|
||||
fd = RelationGetFile(reln);
|
||||
v = &Md_fdvec[fd];
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE)
|
||||
elog(FATAL, "segment too big in getseg!");
|
||||
#endif
|
||||
|
||||
segno = 0;
|
||||
for (;;) {
|
||||
if (v->mdfd_lstbcnt == RELSEG_SIZE
|
||||
|| (nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ)) == RELSEG_SIZE) {
|
||||
|
||||
v->mdfd_lstbcnt = RELSEG_SIZE;
|
||||
segno++;
|
||||
|
||||
if (v->mdfd_chain == (MdfdVec *) NULL) {
|
||||
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
|
||||
if (v->mdfd_chain == (MdfdVec *) NULL)
|
||||
elog(WARN, "cannot count blocks for %.16s -- open failed",
|
||||
RelationGetRelationName(reln));
|
||||
}
|
||||
|
||||
v = v->mdfd_chain;
|
||||
} else {
|
||||
return ((segno * RELSEG_SIZE) + nblocks);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* mdcommit() -- Commit a transaction.
|
||||
*
|
||||
* All changes to magnetic disk relations must be forced to stable
|
||||
* storage. This routine makes a pass over the private table of
|
||||
* file descriptors. Any descriptors to which we have done writes,
|
||||
* but not synced, are synced here.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
||||
*/
|
||||
int
|
||||
mdcommit()
|
||||
{
|
||||
int i;
|
||||
MdfdVec *v;
|
||||
|
||||
for (i = 0; i < CurFd; i++) {
|
||||
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
|
||||
if (v->mdfd_flags & MDFD_DIRTY) {
|
||||
if (FileSync(v->mdfd_vfd) < 0)
|
||||
return (SM_FAIL);
|
||||
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mdabort() -- Abort a transaction.
|
||||
*
|
||||
* Changes need not be forced to disk at transaction abort. We mark
|
||||
* all file descriptors as clean here. Always returns SM_SUCCESS.
|
||||
*/
|
||||
int
|
||||
mdabort()
|
||||
{
|
||||
int i;
|
||||
MdfdVec *v;
|
||||
|
||||
for (i = 0; i < CurFd; i++) {
|
||||
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* _fdvec_ext() -- Extend the md file descriptor vector.
|
||||
*
|
||||
* The file descriptor vector must be large enough to hold at least
|
||||
* 'fd' entries.
|
||||
*/
|
||||
static
|
||||
int _fdvec_ext()
|
||||
{
|
||||
MdfdVec *nvec;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
Nfds *= 2;
|
||||
|
||||
oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
|
||||
nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
|
||||
memset(nvec, 0, Nfds * sizeof(MdfdVec));
|
||||
memmove(nvec, (char *) Md_fdvec, (Nfds / 2) * sizeof(MdfdVec));
|
||||
pfree(Md_fdvec);
|
||||
|
||||
(void) MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
Md_fdvec = nvec;
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
static MdfdVec *
|
||||
_mdfd_openseg(Relation reln, int segno, int oflags)
|
||||
{
|
||||
MemoryContext oldcxt;
|
||||
MdfdVec *v;
|
||||
int fd;
|
||||
bool dofree;
|
||||
char *path, *fullpath;
|
||||
|
||||
/* be sure we have enough space for the '.segno', if any */
|
||||
path = relpath(RelationGetRelationName(reln)->data);
|
||||
|
||||
dofree = false;
|
||||
if (segno > 0) {
|
||||
dofree = true;
|
||||
fullpath = (char *) palloc(strlen(path) + 12);
|
||||
sprintf(fullpath, "%s.%d", path, segno);
|
||||
} else
|
||||
fullpath = path;
|
||||
|
||||
/* open the file */
|
||||
fd = PathNameOpenFile(fullpath, O_RDWR|oflags, 0600);
|
||||
|
||||
if (dofree)
|
||||
pfree(fullpath);
|
||||
|
||||
if (fd < 0)
|
||||
return ((MdfdVec *) NULL);
|
||||
|
||||
/* allocate an mdfdvec entry for it */
|
||||
oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
v = (MdfdVec *) palloc(sizeof(MdfdVec));
|
||||
(void) MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
/* fill the entry */
|
||||
v->mdfd_vfd = fd;
|
||||
v->mdfd_flags = (uint16) 0;
|
||||
v->mdfd_chain = (MdfdVec *) NULL;
|
||||
v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (v->mdfd_lstbcnt > RELSEG_SIZE)
|
||||
elog(FATAL, "segment too big on open!");
|
||||
#endif
|
||||
|
||||
/* all done */
|
||||
return (v);
|
||||
}
|
||||
|
||||
static MdfdVec *
|
||||
_mdfd_getseg(Relation reln, int blkno, int oflag)
|
||||
{
|
||||
MdfdVec *v;
|
||||
int segno;
|
||||
int fd;
|
||||
int i;
|
||||
|
||||
fd = RelationGetFile(reln);
|
||||
if (fd < 0) {
|
||||
if ((fd = mdopen(reln)) < 0)
|
||||
elog(WARN, "cannot open relation %.16s",
|
||||
RelationGetRelationName(reln));
|
||||
reln->rd_fd = fd;
|
||||
}
|
||||
|
||||
for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1;
|
||||
segno > 0;
|
||||
i++, segno--) {
|
||||
|
||||
if (v->mdfd_chain == (MdfdVec *) NULL) {
|
||||
v->mdfd_chain = _mdfd_openseg(reln, i, oflag);
|
||||
|
||||
if (v->mdfd_chain == (MdfdVec *) NULL)
|
||||
elog(WARN, "cannot open segment %d of relation %.16s",
|
||||
i, RelationGetRelationName(reln));
|
||||
}
|
||||
v = v->mdfd_chain;
|
||||
}
|
||||
|
||||
return (v);
|
||||
}
|
||||
|
||||
static BlockNumber
|
||||
_mdnblocks(File file, Size blcksz)
|
||||
{
|
||||
long len;
|
||||
|
||||
len = FileSeek(file, 0L, SEEK_END) - 1;
|
||||
return((BlockNumber)((len < 0) ? 0 : 1 + len / blcksz));
|
||||
}
|
||||
586
src/backend/storage/smgr/mm.c
Normal file
586
src/backend/storage/smgr/mm.c
Normal file
@@ -0,0 +1,586 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* mm.c--
|
||||
* main memory storage manager
|
||||
*
|
||||
* This code manages relations that reside in (presumably stable)
|
||||
* main memory.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
|
||||
#include <math.h>
|
||||
#include "machine.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/smgr.h" /* where the declarations go */
|
||||
#include "storage/block.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/spin.h"
|
||||
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
/*
|
||||
* MMCacheTag -- Unique triplet for blocks stored by the main memory
|
||||
* storage manager.
|
||||
*/
|
||||
|
||||
typedef struct MMCacheTag {
|
||||
Oid mmct_dbid;
|
||||
Oid mmct_relid;
|
||||
BlockNumber mmct_blkno;
|
||||
} MMCacheTag;
|
||||
|
||||
/*
|
||||
* Shared-memory hash table for main memory relations contains
|
||||
* entries of this form.
|
||||
*/
|
||||
|
||||
typedef struct MMHashEntry {
|
||||
MMCacheTag mmhe_tag;
|
||||
int mmhe_bufno;
|
||||
} MMHashEntry;
|
||||
|
||||
/*
|
||||
* MMRelTag -- Unique identifier for each relation that is stored in the
|
||||
* main-memory storage manager.
|
||||
*/
|
||||
|
||||
typedef struct MMRelTag {
|
||||
Oid mmrt_dbid;
|
||||
Oid mmrt_relid;
|
||||
} MMRelTag;
|
||||
|
||||
/*
|
||||
* Shared-memory hash table for # blocks in main memory relations contains
|
||||
* entries of this form.
|
||||
*/
|
||||
|
||||
typedef struct MMRelHashEntry {
|
||||
MMRelTag mmrhe_tag;
|
||||
int mmrhe_nblocks;
|
||||
} MMRelHashEntry;
|
||||
|
||||
#define MMNBUFFERS 10
|
||||
#define MMNRELATIONS 2
|
||||
|
||||
SPINLOCK MMCacheLock;
|
||||
extern bool IsPostmaster;
|
||||
extern Oid MyDatabaseId;
|
||||
|
||||
static int *MMCurTop;
|
||||
static int *MMCurRelno;
|
||||
static MMCacheTag *MMBlockTags;
|
||||
static char *MMBlockCache;
|
||||
static HTAB *MMCacheHT;
|
||||
static HTAB *MMRelCacheHT;
|
||||
|
||||
int
|
||||
mminit()
|
||||
{
|
||||
char *mmcacheblk;
|
||||
int mmsize = 0;
|
||||
bool found;
|
||||
HASHCTL info;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
|
||||
mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
|
||||
mmsize += MAXALIGN(sizeof(*MMCurTop));
|
||||
mmsize += MAXALIGN(sizeof(*MMCurRelno));
|
||||
mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag)));
|
||||
mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found);
|
||||
|
||||
if (mmcacheblk == (char *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
info.keysize = sizeof(MMCacheTag);
|
||||
info.datasize = sizeof(int);
|
||||
info.hash = tag_hash;
|
||||
|
||||
MMCacheHT = (HTAB *) ShmemInitHash("Main memory store HT",
|
||||
MMNBUFFERS, MMNBUFFERS,
|
||||
&info, (HASH_ELEM|HASH_FUNCTION));
|
||||
|
||||
if (MMCacheHT == (HTAB *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
info.keysize = sizeof(MMRelTag);
|
||||
info.datasize = sizeof(int);
|
||||
info.hash = tag_hash;
|
||||
|
||||
MMRelCacheHT = (HTAB *) ShmemInitHash("Main memory rel HT",
|
||||
MMNRELATIONS, MMNRELATIONS,
|
||||
&info, (HASH_ELEM|HASH_FUNCTION));
|
||||
|
||||
if (MMRelCacheHT == (HTAB *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
if (IsPostmaster) {
|
||||
memset(mmcacheblk, 0, mmsize);
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
|
||||
MMCurTop = (int *) mmcacheblk;
|
||||
mmcacheblk += sizeof(int);
|
||||
MMCurRelno = (int *) mmcacheblk;
|
||||
mmcacheblk += sizeof(int);
|
||||
MMBlockTags = (MMCacheTag *) mmcacheblk;
|
||||
mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag));
|
||||
MMBlockCache = mmcacheblk;
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
mmshutdown()
|
||||
{
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
mmcreate(Relation reln)
|
||||
{
|
||||
MMRelHashEntry *entry;
|
||||
bool found;
|
||||
MMRelTag tag;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
|
||||
if (*MMCurRelno == MMNRELATIONS) {
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
(*MMCurRelno)++;
|
||||
|
||||
tag.mmrt_relid = reln->rd_id;
|
||||
if (reln->rd_rel->relisshared)
|
||||
tag.mmrt_dbid = (Oid) 0;
|
||||
else
|
||||
tag.mmrt_dbid = MyDatabaseId;
|
||||
|
||||
entry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
|
||||
(char *) &tag, HASH_ENTER, &found);
|
||||
|
||||
if (entry == (MMRelHashEntry *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "main memory storage mgr rel cache hash table corrupt");
|
||||
}
|
||||
|
||||
if (found) {
|
||||
/* already exists */
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
entry->mmrhe_nblocks = 0;
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmunlink() -- Unlink a relation.
|
||||
*/
|
||||
int
|
||||
mmunlink(Relation reln)
|
||||
{
|
||||
int i;
|
||||
Oid reldbid;
|
||||
MMHashEntry *entry;
|
||||
MMRelHashEntry *rentry;
|
||||
bool found;
|
||||
MMRelTag rtag;
|
||||
|
||||
if (reln->rd_rel->relisshared)
|
||||
reldbid = (Oid) 0;
|
||||
else
|
||||
reldbid = MyDatabaseId;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
|
||||
for (i = 0; i < MMNBUFFERS; i++) {
|
||||
if (MMBlockTags[i].mmct_dbid == reldbid
|
||||
&& MMBlockTags[i].mmct_relid == reln->rd_id) {
|
||||
entry = (MMHashEntry *) hash_search(MMCacheHT,
|
||||
(char *) &MMBlockTags[i],
|
||||
HASH_REMOVE, &found);
|
||||
if (entry == (MMHashEntry *) NULL || !found) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmunlink: cache hash table corrupted");
|
||||
}
|
||||
MMBlockTags[i].mmct_dbid = (Oid) 0;
|
||||
MMBlockTags[i].mmct_relid = (Oid) 0;
|
||||
MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
|
||||
}
|
||||
}
|
||||
rtag.mmrt_dbid = reldbid;
|
||||
rtag.mmrt_relid = reln->rd_id;
|
||||
|
||||
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
|
||||
HASH_REMOVE, &found);
|
||||
|
||||
if (rentry == (MMRelHashEntry *) NULL || !found) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmunlink: rel cache hash table corrupted");
|
||||
}
|
||||
|
||||
(*MMCurRelno)--;
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* mmextend() -- Add a block to the specified relation.
|
||||
*
|
||||
* This routine returns SM_FAIL or SM_SUCCESS, with errno set as
|
||||
* appropriate.
|
||||
*/
|
||||
int
|
||||
mmextend(Relation reln, char *buffer)
|
||||
{
|
||||
MMRelHashEntry *rentry;
|
||||
MMHashEntry *entry;
|
||||
int i;
|
||||
Oid reldbid;
|
||||
int offset;
|
||||
bool found;
|
||||
MMRelTag rtag;
|
||||
MMCacheTag tag;
|
||||
|
||||
if (reln->rd_rel->relisshared)
|
||||
reldbid = (Oid) 0;
|
||||
else
|
||||
reldbid = MyDatabaseId;
|
||||
|
||||
tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
|
||||
tag.mmct_relid = rtag.mmrt_relid = reln->rd_id;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
|
||||
if (*MMCurTop == MMNBUFFERS) {
|
||||
for (i = 0; i < MMNBUFFERS; i++) {
|
||||
if (MMBlockTags[i].mmct_dbid == 0 &&
|
||||
MMBlockTags[i].mmct_relid == 0)
|
||||
break;
|
||||
}
|
||||
if (i == MMNBUFFERS) {
|
||||
SpinRelease(MMCacheLock);
|
||||
return (SM_FAIL);
|
||||
}
|
||||
} else {
|
||||
i = *MMCurTop;
|
||||
(*MMCurTop)++;
|
||||
}
|
||||
|
||||
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
|
||||
HASH_FIND, &found);
|
||||
if (rentry == (MMRelHashEntry *) NULL || !found) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmextend: rel cache hash table corrupt");
|
||||
}
|
||||
|
||||
tag.mmct_blkno = rentry->mmrhe_nblocks;
|
||||
|
||||
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
|
||||
HASH_ENTER, &found);
|
||||
if (entry == (MMHashEntry *) NULL || found) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmextend: cache hash table corrupt");
|
||||
}
|
||||
|
||||
entry->mmhe_bufno = i;
|
||||
MMBlockTags[i].mmct_dbid = reldbid;
|
||||
MMBlockTags[i].mmct_relid = reln->rd_id;
|
||||
MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks;
|
||||
|
||||
/* page numbers are zero-based, so we increment this at the end */
|
||||
(rentry->mmrhe_nblocks)++;
|
||||
|
||||
/* write the extended page */
|
||||
offset = (i * BLCKSZ);
|
||||
memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmopen() -- Open the specified relation.
|
||||
*/
|
||||
int
|
||||
mmopen(Relation reln)
|
||||
{
|
||||
/* automatically successful */
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmclose() -- Close the specified relation.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
||||
*/
|
||||
int
|
||||
mmclose(Relation reln)
|
||||
{
|
||||
/* automatically successful */
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmread() -- Read the specified block from a relation.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL.
|
||||
*/
|
||||
int
|
||||
mmread(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
MMHashEntry *entry;
|
||||
bool found;
|
||||
int offset;
|
||||
MMCacheTag tag;
|
||||
|
||||
if (reln->rd_rel->relisshared)
|
||||
tag.mmct_dbid = (Oid) 0;
|
||||
else
|
||||
tag.mmct_dbid = MyDatabaseId;
|
||||
|
||||
tag.mmct_relid = reln->rd_id;
|
||||
tag.mmct_blkno = blocknum;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
|
||||
HASH_FIND, &found);
|
||||
|
||||
if (entry == (MMHashEntry *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmread: hash table corrupt");
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
/* reading nonexistent pages is defined to fill them with zeroes */
|
||||
SpinRelease(MMCacheLock);
|
||||
memset(buffer, 0, BLCKSZ);
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
offset = (entry->mmhe_bufno * BLCKSZ);
|
||||
memmove(buffer, &MMBlockCache[offset], BLCKSZ);
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmwrite() -- Write the supplied block at the appropriate location.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL.
|
||||
*/
|
||||
int
|
||||
mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
MMHashEntry *entry;
|
||||
bool found;
|
||||
int offset;
|
||||
MMCacheTag tag;
|
||||
|
||||
if (reln->rd_rel->relisshared)
|
||||
tag.mmct_dbid = (Oid) 0;
|
||||
else
|
||||
tag.mmct_dbid = MyDatabaseId;
|
||||
|
||||
tag.mmct_relid = reln->rd_id;
|
||||
tag.mmct_blkno = blocknum;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
|
||||
HASH_FIND, &found);
|
||||
|
||||
if (entry == (MMHashEntry *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmread: hash table corrupt");
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmwrite: hash table missing requested page");
|
||||
}
|
||||
|
||||
offset = (entry->mmhe_bufno * BLCKSZ);
|
||||
memmove(&MMBlockCache[offset], buffer, BLCKSZ);
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmflush() -- Synchronously write a block to stable storage.
|
||||
*
|
||||
* For main-memory relations, this is exactly equivalent to mmwrite().
|
||||
*/
|
||||
int
|
||||
mmflush(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
return (mmwrite(reln, blocknum, buffer));
|
||||
}
|
||||
|
||||
/*
|
||||
* mmblindwrt() -- Write a block to stable storage blind.
|
||||
*
|
||||
* We have to be able to do this using only the name and OID of
|
||||
* the database and relation in which the block belongs.
|
||||
*/
|
||||
int
|
||||
mmblindwrt(char *dbstr,
|
||||
char *relstr,
|
||||
Oid dbid,
|
||||
Oid relid,
|
||||
BlockNumber blkno,
|
||||
char *buffer)
|
||||
{
|
||||
return (SM_FAIL);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmnblocks() -- Get the number of blocks stored in a relation.
|
||||
*
|
||||
* Returns # of blocks or -1 on error.
|
||||
*/
|
||||
int
|
||||
mmnblocks(Relation reln)
|
||||
{
|
||||
MMRelTag rtag;
|
||||
MMRelHashEntry *rentry;
|
||||
bool found;
|
||||
int nblocks;
|
||||
|
||||
if (reln->rd_rel->relisshared)
|
||||
rtag.mmrt_dbid = (Oid) 0;
|
||||
else
|
||||
rtag.mmrt_dbid = MyDatabaseId;
|
||||
|
||||
rtag.mmrt_relid = reln->rd_id;
|
||||
|
||||
SpinAcquire(MMCacheLock);
|
||||
|
||||
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
|
||||
HASH_FIND, &found);
|
||||
|
||||
if (rentry == (MMRelHashEntry *) NULL) {
|
||||
SpinRelease(MMCacheLock);
|
||||
elog(FATAL, "mmnblocks: rel cache hash table corrupt");
|
||||
}
|
||||
|
||||
if (found)
|
||||
nblocks = rentry->mmrhe_nblocks;
|
||||
else
|
||||
nblocks = -1;
|
||||
|
||||
SpinRelease(MMCacheLock);
|
||||
|
||||
return (nblocks);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmcommit() -- Commit a transaction.
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
|
||||
*/
|
||||
int
|
||||
mmcommit()
|
||||
{
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmabort() -- Abort a transaction.
|
||||
*/
|
||||
|
||||
int
|
||||
mmabort()
|
||||
{
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* MMShmemSize() -- Declare amount of shared memory we require.
|
||||
*
|
||||
* The shared memory initialization code creates a block of shared
|
||||
* memory exactly big enough to hold all the structures it needs to.
|
||||
* This routine declares how much space the main memory storage
|
||||
* manager will use.
|
||||
*/
|
||||
int
|
||||
MMShmemSize()
|
||||
{
|
||||
int size = 0;
|
||||
int nbuckets;
|
||||
int nsegs;
|
||||
int tmp;
|
||||
|
||||
/*
|
||||
* first compute space occupied by the (dbid,relid,blkno) hash table
|
||||
*/
|
||||
|
||||
nbuckets = 1 << (int)my_log2((MMNBUFFERS - 1) / DEF_FFACTOR + 1);
|
||||
nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
|
||||
|
||||
size += MAXALIGN(my_log2(MMNBUFFERS) * sizeof(void *));
|
||||
size += MAXALIGN(sizeof(HHDR));
|
||||
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
|
||||
tmp = (int)ceil((double)MMNBUFFERS/BUCKET_ALLOC_INCR);
|
||||
size += tmp * BUCKET_ALLOC_INCR *
|
||||
(MAXALIGN(sizeof(BUCKET_INDEX)) +
|
||||
MAXALIGN(sizeof(MMHashEntry))); /* contains hash key */
|
||||
|
||||
/*
|
||||
* now do the same for the rel hash table
|
||||
*/
|
||||
|
||||
size += MAXALIGN(my_log2(MMNRELATIONS) * sizeof(void *));
|
||||
size += MAXALIGN(sizeof(HHDR));
|
||||
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
|
||||
tmp = (int)ceil((double)MMNRELATIONS/BUCKET_ALLOC_INCR);
|
||||
size += tmp * BUCKET_ALLOC_INCR *
|
||||
(MAXALIGN(sizeof(BUCKET_INDEX)) +
|
||||
MAXALIGN(sizeof(MMRelHashEntry))); /* contains hash key */
|
||||
|
||||
/*
|
||||
* finally, add in the memory block we use directly
|
||||
*/
|
||||
|
||||
size += MAXALIGN(BLCKSZ * MMNBUFFERS);
|
||||
size += MAXALIGN(sizeof(*MMCurTop));
|
||||
size += MAXALIGN(sizeof(*MMCurRelno));
|
||||
size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag));
|
||||
|
||||
return (size);
|
||||
}
|
||||
|
||||
#endif /* MAIN_MEMORY */
|
||||
371
src/backend/storage/smgr/smgr.c
Normal file
371
src/backend/storage/smgr/smgr.c
Normal file
@@ -0,0 +1,371 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* smgr.c--
|
||||
* public interface routines to storage manager switch.
|
||||
*
|
||||
* All file system operations in POSTGRES dispatch through these
|
||||
* routines.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "postgres.h"
|
||||
|
||||
#include "machine.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/block.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
|
||||
typedef struct f_smgr {
|
||||
int (*smgr_init)(); /* may be NULL */
|
||||
int (*smgr_shutdown)(); /* may be NULL */
|
||||
int (*smgr_create)();
|
||||
int (*smgr_unlink)();
|
||||
int (*smgr_extend)();
|
||||
int (*smgr_open)();
|
||||
int (*smgr_close)();
|
||||
int (*smgr_read)();
|
||||
int (*smgr_write)();
|
||||
int (*smgr_flush)();
|
||||
int (*smgr_blindwrt)();
|
||||
int (*smgr_nblocks)();
|
||||
int (*smgr_commit)(); /* may be NULL */
|
||||
int (*smgr_abort)(); /* may be NULL */
|
||||
} f_smgr;
|
||||
|
||||
/*
|
||||
* The weird placement of commas in this init block is to keep the compiler
|
||||
* happy, regardless of what storage managers we have (or don't have).
|
||||
*/
|
||||
|
||||
static f_smgr smgrsw[] = {
|
||||
|
||||
/* magnetic disk */
|
||||
{ mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
|
||||
mdread, mdwrite, mdflush, mdblindwrt, mdnblocks, mdcommit, mdabort },
|
||||
|
||||
#ifdef MAIN_MEMORY
|
||||
/* main memory */
|
||||
{ mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
|
||||
mmread, mmwrite, mmflush, mmblindwrt, mmnblocks, mmcommit, mmabort },
|
||||
|
||||
#endif /* MAIN_MEMORY */
|
||||
};
|
||||
|
||||
/*
|
||||
* This array records which storage managers are write-once, and which
|
||||
* support overwrite. A 'true' entry means that the storage manager is
|
||||
* write-once. In the best of all possible worlds, there would be no
|
||||
* write-once storage managers.
|
||||
*/
|
||||
|
||||
static bool smgrwo[] = {
|
||||
false, /* magnetic disk */
|
||||
#ifdef MAIN_MEMORY
|
||||
false, /* main memory*/
|
||||
#endif /* MAIN_MEMORY */
|
||||
};
|
||||
static int NSmgr = lengthof(smgrsw);
|
||||
|
||||
/*
|
||||
* smgrinit(), smgrshutdown() -- Initialize or shut down all storage
|
||||
* managers.
|
||||
*
|
||||
*/
|
||||
int
|
||||
smgrinit()
|
||||
{
|
||||
int i;
|
||||
extern char *smgrout();
|
||||
|
||||
for (i = 0; i < NSmgr; i++) {
|
||||
if (smgrsw[i].smgr_init) {
|
||||
if ((*(smgrsw[i].smgr_init))() == SM_FAIL)
|
||||
elog(FATAL, "initialization failed on %s", smgrout(i));
|
||||
}
|
||||
}
|
||||
|
||||
/* register the shutdown proc */
|
||||
on_exitpg(smgrshutdown, 0);
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
void
|
||||
smgrshutdown(int dummy)
|
||||
{
|
||||
int i;
|
||||
extern char *smgrout();
|
||||
|
||||
for (i = 0; i < NSmgr; i++) {
|
||||
if (smgrsw[i].smgr_shutdown) {
|
||||
if ((*(smgrsw[i].smgr_shutdown))() == SM_FAIL)
|
||||
elog(FATAL, "shutdown failed on %s", smgrout(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrcreate() -- Create a new relation.
|
||||
*
|
||||
* This routine takes a reldesc, creates the relation on the appropriate
|
||||
* device, and returns a file descriptor for it.
|
||||
*/
|
||||
int
|
||||
smgrcreate(int16 which, Relation reln)
|
||||
{
|
||||
int fd;
|
||||
|
||||
if ((fd = (*(smgrsw[which].smgr_create))(reln)) < 0)
|
||||
elog(WARN, "cannot open %.*s",
|
||||
NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrunlink() -- Unlink a relation.
|
||||
*
|
||||
* The relation is removed from the store.
|
||||
*/
|
||||
int
|
||||
smgrunlink(int16 which, Relation reln)
|
||||
{
|
||||
int status;
|
||||
|
||||
if ((status = (*(smgrsw[which].smgr_unlink))(reln)) == SM_FAIL)
|
||||
elog(WARN, "cannot unlink %.*s",
|
||||
NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrextend() -- Add a new block to a file.
|
||||
*
|
||||
* Returns SM_SUCCESS on success; aborts the current transaction on
|
||||
* failure.
|
||||
*/
|
||||
int
|
||||
smgrextend(int16 which, Relation reln, char *buffer)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = (*(smgrsw[which].smgr_extend))(reln, buffer);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(WARN, "%.*s: cannot extend",
|
||||
NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgropen() -- Open a relation using a particular storage manager.
|
||||
*
|
||||
* Returns the fd for the open relation on success, aborts the
|
||||
* transaction on failure.
|
||||
*/
|
||||
int
|
||||
smgropen(int16 which, Relation reln)
|
||||
{
|
||||
int fd;
|
||||
|
||||
if ((fd = (*(smgrsw[which].smgr_open))(reln)) < 0)
|
||||
elog(WARN, "cannot open %.*s",
|
||||
NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrclose() -- Close a relation.
|
||||
*
|
||||
* Returns SM_SUCCESS on success, aborts on failure.
|
||||
*/
|
||||
int
|
||||
smgrclose(int16 which, Relation reln)
|
||||
{
|
||||
if ((*(smgrsw[which].smgr_close))(reln) == SM_FAIL)
|
||||
elog(WARN, "cannot close %.*s",
|
||||
NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrread() -- read a particular block from a relation into the supplied
|
||||
* buffer.
|
||||
*
|
||||
* This routine is called from the buffer manager in order to
|
||||
* instantiate pages in the shared buffer cache. All storage managers
|
||||
* return pages in the format that POSTGRES expects. This routine
|
||||
* dispatches the read. On success, it returns SM_SUCCESS. On failure,
|
||||
* the current transaction is aborted.
|
||||
*/
|
||||
int
|
||||
smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = (*(smgrsw[which].smgr_read))(reln, blocknum, buffer);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(WARN, "cannot read block %d of %.*s",
|
||||
blocknum, NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrwrite() -- Write the supplied buffer out.
|
||||
*
|
||||
* This is not a synchronous write -- the interface for that is
|
||||
* smgrflush(). The buffer is written out via the appropriate
|
||||
* storage manager. This routine returns SM_SUCCESS or aborts
|
||||
* the current transaction.
|
||||
*/
|
||||
int
|
||||
smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = (*(smgrsw[which].smgr_write))(reln, blocknum, buffer);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(WARN, "cannot write block %d of %.*s",
|
||||
blocknum, NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrflush() -- A synchronous smgrwrite().
|
||||
*/
|
||||
int
|
||||
smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = (*(smgrsw[which].smgr_flush))(reln, blocknum, buffer);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(WARN, "cannot flush block %d of %.*s to stable store",
|
||||
blocknum, NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrblindwrt() -- Write a page out blind.
|
||||
*
|
||||
* In some cases, we may find a page in the buffer cache that we
|
||||
* can't make a reldesc for. This happens, for example, when we
|
||||
* want to reuse a dirty page that was written by a transaction
|
||||
* that has not yet committed, which created a new relation. In
|
||||
* this case, the buffer manager will call smgrblindwrt() with
|
||||
* the name and OID of the database and the relation to which the
|
||||
* buffer belongs. Every storage manager must be able to force
|
||||
* this page down to stable storage in this circumstance.
|
||||
*/
|
||||
int
|
||||
smgrblindwrt(int16 which,
|
||||
char *dbname,
|
||||
char *relname,
|
||||
Oid dbid,
|
||||
Oid relid,
|
||||
BlockNumber blkno,
|
||||
char *buffer)
|
||||
{
|
||||
char *dbstr;
|
||||
char *relstr;
|
||||
int status;
|
||||
|
||||
dbstr = pstrdup(dbname);
|
||||
relstr = pstrdup(relname);
|
||||
|
||||
status = (*(smgrsw[which].smgr_blindwrt))(dbstr, relstr, dbid, relid,
|
||||
blkno, buffer);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(WARN, "cannot write block %d of %s [%s] blind",
|
||||
blkno, relstr, dbstr);
|
||||
|
||||
pfree(dbstr);
|
||||
pfree(relstr);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrnblocks() -- Calculate the number of POSTGRES blocks in the
|
||||
* supplied relation.
|
||||
*
|
||||
* Returns the number of blocks on success, aborts the current
|
||||
* transaction on failure.
|
||||
*/
|
||||
int
|
||||
smgrnblocks(int16 which, Relation reln)
|
||||
{
|
||||
int nblocks;
|
||||
|
||||
if ((nblocks = (*(smgrsw[which].smgr_nblocks))(reln)) < 0)
|
||||
elog(WARN, "cannot count blocks for %.*s",
|
||||
NAMEDATALEN, &(reln->rd_rel->relname.data[0]));
|
||||
|
||||
return (nblocks);
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrcommit(), smgrabort() -- Commit or abort changes made during the
|
||||
* current transaction.
|
||||
*/
|
||||
int
|
||||
smgrcommit()
|
||||
{
|
||||
int i;
|
||||
extern char *smgrout();
|
||||
|
||||
for (i = 0; i < NSmgr; i++) {
|
||||
if (smgrsw[i].smgr_commit) {
|
||||
if ((*(smgrsw[i].smgr_commit))() == SM_FAIL)
|
||||
elog(FATAL, "transaction commit failed on %s", smgrout(i));
|
||||
}
|
||||
}
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
smgrabort()
|
||||
{
|
||||
int i;
|
||||
extern char *smgrout();
|
||||
|
||||
for (i = 0; i < NSmgr; i++) {
|
||||
if (smgrsw[i].smgr_abort) {
|
||||
if ((*(smgrsw[i].smgr_abort))() == SM_FAIL)
|
||||
elog(FATAL, "transaction abort failed on %s", smgrout(i));
|
||||
}
|
||||
}
|
||||
|
||||
return (SM_SUCCESS);
|
||||
}
|
||||
|
||||
bool
|
||||
smgriswo(int16 smgrno)
|
||||
{
|
||||
if (smgrno < 0 || smgrno >= NSmgr)
|
||||
elog(WARN, "illegal storage manager number %d", smgrno);
|
||||
|
||||
return (smgrwo[smgrno]);
|
||||
}
|
||||
82
src/backend/storage/smgr/smgrtype.c
Normal file
82
src/backend/storage/smgr/smgrtype.c
Normal file
@@ -0,0 +1,82 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* smgrtype.c--
|
||||
* storage manager type
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgrtype.c,v 1.1.1.1 1996/07/09 06:21:59 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "postgres.h"
|
||||
|
||||
#include "utils/builtins.h" /* where the declarations go */
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "storage/smgr.h"
|
||||
|
||||
typedef struct smgrid {
|
||||
char *smgr_name;
|
||||
} smgrid;
|
||||
|
||||
/*
|
||||
* StorageManager[] -- List of defined storage managers.
|
||||
*
|
||||
* The weird comma placement is to keep compilers happy no matter
|
||||
* which of these is (or is not) defined.
|
||||
*/
|
||||
|
||||
static smgrid StorageManager[] = {
|
||||
{"magnetic disk"},
|
||||
#ifdef MAIN_MEMORY
|
||||
{"main memory"}
|
||||
#endif /* MAIN_MEMORY */
|
||||
};
|
||||
|
||||
static int NStorageManagers = lengthof(StorageManager);
|
||||
|
||||
int2
|
||||
smgrin(char *s)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NStorageManagers; i++) {
|
||||
if (strcmp(s, StorageManager[i].smgr_name) == 0)
|
||||
return((int2) i);
|
||||
}
|
||||
elog(WARN, "smgrin: illegal storage manager name %s", s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *
|
||||
smgrout(int2 i)
|
||||
{
|
||||
char *s;
|
||||
|
||||
if (i >= NStorageManagers || i < 0)
|
||||
elog(WARN, "Illegal storage manager id %d", i);
|
||||
|
||||
s = (char *) palloc(strlen(StorageManager[i].smgr_name) + 1);
|
||||
strcpy(s, StorageManager[i].smgr_name);
|
||||
return (s);
|
||||
}
|
||||
|
||||
bool
|
||||
smgreq(int2 a, int2 b)
|
||||
{
|
||||
if (a == b)
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
|
||||
bool
|
||||
smgrne(int2 a, int2 b)
|
||||
{
|
||||
if (a == b)
|
||||
return (false);
|
||||
return (true);
|
||||
}
|
||||
38
src/backend/storage/spin.h
Normal file
38
src/backend/storage/spin.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* spin.h--
|
||||
* synchronization routines
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: spin.h,v 1.1.1.1 1996/07/09 06:21:53 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef SPIN_H
|
||||
#define SPIN_H
|
||||
|
||||
#include "ipc.h"
|
||||
|
||||
/*
|
||||
* two implementations of spin locks
|
||||
*
|
||||
* sequent, sparc, sun3: real spin locks. uses a TAS instruction; see
|
||||
* src/storage/ipc/s_lock.c for details.
|
||||
*
|
||||
* default: fake spin locks using semaphores. see spin.c
|
||||
*
|
||||
*/
|
||||
|
||||
typedef int SPINLOCK;
|
||||
|
||||
extern bool CreateSpinlocks(IPCKey key);
|
||||
extern bool AttachSpinLocks(IPCKey key);
|
||||
extern bool InitSpinLocks(int init, IPCKey key);
|
||||
|
||||
extern void SpinAcquire(SPINLOCK lock);
|
||||
extern void SpinRelease(SPINLOCK lock);
|
||||
extern bool SpinIsLocked(SPINLOCK lock);
|
||||
|
||||
#endif /* SPIN_H */
|
||||
Reference in New Issue
Block a user