1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-12 05:01:15 +03:00

Massive commit to run PGINDENT on all *.c and *.h files.

This commit is contained in:
Bruce Momjian
1997-09-07 05:04:48 +00:00
parent 8fecd4febf
commit 1ccd423235
687 changed files with 150775 additions and 136888 deletions

View File

@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* buf_init.c--
* buffer manager initialization routines
* buffer manager initialization routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.10 1997/07/28 00:54:33 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.11 1997/09/07 04:48:15 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -35,98 +35,103 @@
#include "utils/dynahash.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "executor/execdebug.h" /* for NDirectFileRead */
#include "executor/execdebug.h" /* for NDirectFileRead */
#include "catalog/catalog.h"
/*
* if BMTRACE is defined, we trace the last 200 buffer allocations and
* deallocations in a circular buffer in shared memory.
* if BMTRACE is defined, we trace the last 200 buffer allocations and
* deallocations in a circular buffer in shared memory.
*/
#ifdef BMTRACE
bmtrace *TraceBuf;
long *CurTraceBuf;
#define BMT_LIMIT 200
#endif /* BMTRACE */
int ShowPinTrace = 0;
bmtrace *TraceBuf;
long *CurTraceBuf;
int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */
int Data_Descriptors;
int Free_List_Descriptor;
int Lookup_List_Descriptor;
int Num_Descriptors;
#define BMT_LIMIT 200
#endif /* BMTRACE */
int ShowPinTrace = 0;
int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */
int Data_Descriptors;
int Free_List_Descriptor;
int Lookup_List_Descriptor;
int Num_Descriptors;
BufferDesc *BufferDescriptors;
BufferBlock BufferBlocks;
BufferDesc *BufferDescriptors;
BufferBlock BufferBlocks;
#ifndef HAS_TEST_AND_SET
long *NWaitIOBackendP;
long *NWaitIOBackendP;
#endif
extern IpcSemaphoreId WaitIOSemId;
extern IpcSemaphoreId WaitIOSemId;
long *PrivateRefCount; /* also used in freelist.c */
long *LastRefCount; /* refcounts of last ExecMain level */
long *CommitInfoNeedsSave; /* to write buffers where we have filled in */
/* t_tmin (or t_tmax) */
long *PrivateRefCount;/* also used in freelist.c */
long *LastRefCount; /* refcounts of last ExecMain level */
long *CommitInfoNeedsSave; /* to write buffers where we have
* filled in */
/* t_tmin (or t_tmax) */
/*
* Data Structures:
* buffers live in a freelist and a lookup data structure.
*
* buffers live in a freelist and a lookup data structure.
*
*
* Buffer Lookup:
* Two important notes. First, the buffer has to be
* available for lookup BEFORE an IO begins. Otherwise
* a second process trying to read the buffer will
* allocate its own copy and the buffeer pool will
* become inconsistent.
* Two important notes. First, the buffer has to be
* available for lookup BEFORE an IO begins. Otherwise
* a second process trying to read the buffer will
* allocate its own copy and the buffeer pool will
* become inconsistent.
*
* Buffer Replacement:
* see freelist.c. A buffer cannot be replaced while in
* use either by data manager or during IO.
* see freelist.c. A buffer cannot be replaced while in
* use either by data manager or during IO.
*
* WriteBufferBack:
* currently, a buffer is only written back at the time
* it is selected for replacement. It should
* be done sooner if possible to reduce latency of
* BufferAlloc(). Maybe there should be a daemon process.
* currently, a buffer is only written back at the time
* it is selected for replacement. It should
* be done sooner if possible to reduce latency of
* BufferAlloc(). Maybe there should be a daemon process.
*
* Synchronization/Locking:
*
* BufMgrLock lock -- must be acquired before manipulating the
* buffer queues (lookup/freelist). Must be released
* before exit and before doing any IO.
* BufMgrLock lock -- must be acquired before manipulating the
* buffer queues (lookup/freelist). Must be released
* before exit and before doing any IO.
*
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
* It must be set when an IO is initiated and cleared at
* the end of the IO. It is there to make sure that one
* process doesn't start to use a buffer while another is
* faulting it in. see IOWait/IOSignal.
* It must be set when an IO is initiated and cleared at
* the end of the IO. It is there to make sure that one
* process doesn't start to use a buffer while another is
* faulting it in. see IOWait/IOSignal.
*
* refcount -- A buffer is pinned during IO and immediately
* after a BufferAlloc(). A buffer is always either pinned
* or on the freelist but never both. The buffer must be
* released, written, or flushed before the end of
* transaction.
* refcount -- A buffer is pinned during IO and immediately
* after a BufferAlloc(). A buffer is always either pinned
* or on the freelist but never both. The buffer must be
* released, written, or flushed before the end of
* transaction.
*
* PrivateRefCount -- Each buffer also has a private refcount the keeps
* track of the number of times the buffer is pinned in the current
* processes. This is used for two purposes, first, if we pin a
* a buffer more than once, we only need to change the shared refcount
* once, thus only lock the buffer pool once, second, when a transaction
* aborts, it should only unpin the buffers exactly the number of times it
* has pinned them, so that it will not blow away buffers of another
* backend.
* track of the number of times the buffer is pinned in the current
* processes. This is used for two purposes, first, if we pin a
* a buffer more than once, we only need to change the shared refcount
* once, thus only lock the buffer pool once, second, when a transaction
* aborts, it should only unpin the buffers exactly the number of times it
* has pinned them, so that it will not blow away buffers of another
* backend.
*
*/
SPINLOCK BufMgrLock;
SPINLOCK BufMgrLock;
long int ReadBufferCount;
long int ReadLocalBufferCount;
long int BufferHitCount;
long int LocalBufferHitCount;
long int BufferFlushCount;
long int LocalBufferFlushCount;
long int ReadBufferCount;
long int ReadLocalBufferCount;
long int BufferHitCount;
long int LocalBufferHitCount;
long int BufferFlushCount;
long int LocalBufferFlushCount;
/*
@@ -138,111 +143,121 @@ long int LocalBufferFlushCount;
void
InitBufferPool(IPCKey key)
{
bool foundBufs,foundDescs;
int i;
/* check padding of BufferDesc and BufferHdr */
/* we need both checks because a sbufdesc_padded > PADDED_SBUFDESC_SIZE
will shrink sbufdesc to the required size, which is bad */
if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE ||
sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE)
elog(WARN,"Internal error: sbufdesc does not have the proper size, "
"contact the Postgres developers");
if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE/2)
elog(WARN,"Internal error: sbufdesc is greatly over-sized, "
"contact the Postgres developers");
bool foundBufs,
foundDescs;
int i;
/* check padding of BufferDesc and BufferHdr */
Data_Descriptors = NBuffers;
Free_List_Descriptor = Data_Descriptors;
Lookup_List_Descriptor = Data_Descriptors + 1;
Num_Descriptors = Data_Descriptors + 1;
SpinAcquire(BufMgrLock);
#ifdef BMTRACE
CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
(BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
&foundDescs);
if (!foundDescs)
memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));
TraceBuf = (bmtrace *) &(CurTraceBuf[1]);
#endif
BufferDescriptors = (BufferDesc *)
ShmemInitStruct("Buffer Descriptors",
Num_Descriptors*sizeof(BufferDesc),&foundDescs);
BufferBlocks = (BufferBlock)
ShmemInitStruct("Buffer Blocks",
NBuffers*BLCKSZ,&foundBufs);
#ifndef HAS_TEST_AND_SET
{
bool foundNWaitIO;
NWaitIOBackendP = (long *)ShmemInitStruct("#Backends Waiting IO",
sizeof(long),
&foundNWaitIO);
if (!foundNWaitIO)
*NWaitIOBackendP = 0;
}
#endif
if (foundDescs || foundBufs) {
/* both should be present or neither */
Assert(foundDescs && foundBufs);
} else {
BufferDesc *buf;
unsigned long block;
buf = BufferDescriptors;
block = (unsigned long) BufferBlocks;
/*
* link the buffers into a circular, doubly-linked list to
* initialize free list. Still don't know anything about
* replacement strategy in this file.
* we need both checks because a sbufdesc_padded >
* PADDED_SBUFDESC_SIZE will shrink sbufdesc to the required size,
* which is bad
*/
for (i = 0; i < Data_Descriptors; block+=BLCKSZ,buf++,i++) {
Assert(ShmemIsValid((unsigned long)block));
buf->freeNext = i+1;
buf->freePrev = i-1;
CLEAR_BUFFERTAG(&(buf->tag));
buf->data = MAKE_OFFSET(block);
buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
buf->refcount = 0;
buf->buf_id = i;
#ifdef HAS_TEST_AND_SET
S_INIT_LOCK(&(buf->io_in_progress_lock));
if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE ||
sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE)
elog(WARN, "Internal error: sbufdesc does not have the proper size, "
"contact the Postgres developers");
if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE / 2)
elog(WARN, "Internal error: sbufdesc is greatly over-sized, "
"contact the Postgres developers");
Data_Descriptors = NBuffers;
Free_List_Descriptor = Data_Descriptors;
Lookup_List_Descriptor = Data_Descriptors + 1;
Num_Descriptors = Data_Descriptors + 1;
SpinAcquire(BufMgrLock);
#ifdef BMTRACE
CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
(BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
&foundDescs);
if (!foundDescs)
memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));
TraceBuf = (bmtrace *) & (CurTraceBuf[1]);
#endif
}
/* close the circular queue */
BufferDescriptors[0].freePrev = Data_Descriptors-1;
BufferDescriptors[Data_Descriptors-1].freeNext = 0;
}
/* Init the rest of the module */
InitBufTable();
InitFreeList(!foundDescs);
SpinRelease(BufMgrLock);
BufferDescriptors = (BufferDesc *)
ShmemInitStruct("Buffer Descriptors",
Num_Descriptors * sizeof(BufferDesc), &foundDescs);
BufferBlocks = (BufferBlock)
ShmemInitStruct("Buffer Blocks",
NBuffers * BLCKSZ, &foundBufs);
#ifndef HAS_TEST_AND_SET
{
int status;
WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
1, IPCProtection, 0, 1, &status);
}
{
bool foundNWaitIO;
NWaitIOBackendP = (long *) ShmemInitStruct("#Backends Waiting IO",
sizeof(long),
&foundNWaitIO);
if (!foundNWaitIO)
*NWaitIOBackendP = 0;
}
#endif
PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
LastRefCount = (long *) calloc(NBuffers, sizeof(long));
CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long));
if (foundDescs || foundBufs)
{
/* both should be present or neither */
Assert(foundDescs && foundBufs);
}
else
{
BufferDesc *buf;
unsigned long block;
buf = BufferDescriptors;
block = (unsigned long) BufferBlocks;
/*
* link the buffers into a circular, doubly-linked list to
* initialize free list. Still don't know anything about
* replacement strategy in this file.
*/
for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
{
Assert(ShmemIsValid((unsigned long) block));
buf->freeNext = i + 1;
buf->freePrev = i - 1;
CLEAR_BUFFERTAG(&(buf->tag));
buf->data = MAKE_OFFSET(block);
buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
buf->refcount = 0;
buf->buf_id = i;
#ifdef HAS_TEST_AND_SET
S_INIT_LOCK(&(buf->io_in_progress_lock));
#endif
}
/* close the circular queue */
BufferDescriptors[0].freePrev = Data_Descriptors - 1;
BufferDescriptors[Data_Descriptors - 1].freeNext = 0;
}
/* Init the rest of the module */
InitBufTable();
InitFreeList(!foundDescs);
SpinRelease(BufMgrLock);
#ifndef HAS_TEST_AND_SET
{
int status;
WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
1, IPCProtection, 0, 1, &status);
}
#endif
PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
LastRefCount = (long *) calloc(NBuffers, sizeof(long));
CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long));
}
/* -----------------------------------------------------
@@ -255,43 +270,41 @@ InitBufferPool(IPCKey key)
int
BufferShmemSize()
{
int size = 0;
int nbuckets;
int nsegs;
int tmp;
nbuckets = 1 << (int)my_log2((NBuffers - 1) / DEF_FFACTOR + 1);
nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
/* size of shmem binding table */
size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */
size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
size += BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(BTABLE_KEYSIZE) +
MAXALIGN(BTABLE_DATASIZE));
/* size of buffer descriptors */
size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));
/* size of data pages */
size += NBuffers * MAXALIGN(BLCKSZ);
/* size of buffer hash table */
size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */
size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
tmp = (int)ceil((double)NBuffers/BUCKET_ALLOC_INCR);
size += tmp * BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(sizeof(BufferTag)) +
MAXALIGN(sizeof(Buffer)));
int size = 0;
int nbuckets;
int nsegs;
int tmp;
nbuckets = 1 << (int) my_log2((NBuffers - 1) / DEF_FFACTOR + 1);
nsegs = 1 << (int) my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
/* size of shmem binding table */
size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */
size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
size += BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(BTABLE_KEYSIZE) +
MAXALIGN(BTABLE_DATASIZE));
/* size of buffer descriptors */
size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));
/* size of data pages */
size += NBuffers * MAXALIGN(BLCKSZ);
/* size of buffer hash table */
size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */
size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
tmp = (int) ceil((double) NBuffers / BUCKET_ALLOC_INCR);
size += tmp * BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(sizeof(BufferTag)) +
MAXALIGN(sizeof(Buffer)));
#ifdef BMTRACE
size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
#endif
return size;
return size;
}

View File

@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* buf_table.c--
* routines for finding buffers in the buffer pool.
* routines for finding buffers in the buffer pool.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.4 1997/08/19 21:32:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.5 1997/09/07 04:48:17 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,30 +16,31 @@
*
* Data Structures:
*
* Buffers are identified by their BufferTag (buf.h). This
* Buffers are identified by their BufferTag (buf.h). This
* file contains routines for allocating a shmem hash table to
* map buffer tags to buffer descriptors.
*
* Synchronization:
*
* All routines in this file assume buffer manager spinlock is
* held by their caller.
*
* All routines in this file assume buffer manager spinlock is
* held by their caller.
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/buf_internals.h" /* where the declarations go */
#include "storage/buf_internals.h" /* where the declarations go */
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/hsearch.h"
static HTAB *SharedBufHash;
static HTAB *SharedBufHash;
typedef struct lookup {
BufferTag key;
Buffer id;
} LookupEnt;
typedef struct lookup
{
BufferTag key;
Buffer id;
} LookupEnt;
/*
* Initialize shmem hash table for mapping buffers
@@ -47,109 +48,116 @@ typedef struct lookup {
void
InitBufTable()
{
HASHCTL info;
int hash_flags;
/* assume lock is held */
/* BufferTag maps to Buffer */
info.keysize = sizeof(BufferTag);
info.datasize = sizeof(Buffer);
info.hash = tag_hash;
hash_flags = (HASH_ELEM | HASH_FUNCTION);
SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table",
NBuffers,NBuffers,
&info,hash_flags);
if (! SharedBufHash) {
elog(FATAL,"couldn't initialize shared buffer pool Hash Tbl");
exit(1);
}
HASHCTL info;
int hash_flags;
/* assume lock is held */
/* BufferTag maps to Buffer */
info.keysize = sizeof(BufferTag);
info.datasize = sizeof(Buffer);
info.hash = tag_hash;
hash_flags = (HASH_ELEM | HASH_FUNCTION);
SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table",
NBuffers, NBuffers,
&info, hash_flags);
if (!SharedBufHash)
{
elog(FATAL, "couldn't initialize shared buffer pool Hash Tbl");
exit(1);
}
}
BufferDesc *
BufTableLookup(BufferTag *tagPtr)
BufferDesc *
BufTableLookup(BufferTag * tagPtr)
{
LookupEnt * result;
bool found;
if (tagPtr->blockNum == P_NEW)
return(NULL);
result = (LookupEnt *)
hash_search(SharedBufHash,(char *) tagPtr,HASH_FIND,&found);
if (! result){
elog(WARN,"BufTableLookup: BufferLookup table corrupted");
return(NULL);
}
if (! found) {
return(NULL);
}
return(&(BufferDescriptors[result->id]));
LookupEnt *result;
bool found;
if (tagPtr->blockNum == P_NEW)
return (NULL);
result = (LookupEnt *)
hash_search(SharedBufHash, (char *) tagPtr, HASH_FIND, &found);
if (!result)
{
elog(WARN, "BufTableLookup: BufferLookup table corrupted");
return (NULL);
}
if (!found)
{
return (NULL);
}
return (&(BufferDescriptors[result->id]));
}
/*
* BufTableDelete
*/
bool
BufTableDelete(BufferDesc *buf)
BufTableDelete(BufferDesc * buf)
{
LookupEnt * result;
bool found;
/* buffer not initialized or has been removed from
* table already. BM_DELETED keeps us from removing
* buffer twice.
*/
if (buf->flags & BM_DELETED) {
return(TRUE);
}
buf->flags |= BM_DELETED;
result = (LookupEnt *)
hash_search(SharedBufHash,(char *) &(buf->tag),HASH_REMOVE,&found);
if (! (result && found)) {
elog(WARN,"BufTableDelete: BufferLookup table corrupted");
return(FALSE);
}
return(TRUE);
LookupEnt *result;
bool found;
/*
* buffer not initialized or has been removed from table already.
* BM_DELETED keeps us from removing buffer twice.
*/
if (buf->flags & BM_DELETED)
{
return (TRUE);
}
buf->flags |= BM_DELETED;
result = (LookupEnt *)
hash_search(SharedBufHash, (char *) &(buf->tag), HASH_REMOVE, &found);
if (!(result && found))
{
elog(WARN, "BufTableDelete: BufferLookup table corrupted");
return (FALSE);
}
return (TRUE);
}
bool
BufTableInsert(BufferDesc *buf)
BufTableInsert(BufferDesc * buf)
{
LookupEnt * result;
bool found;
/* cannot insert it twice */
Assert (buf->flags & BM_DELETED);
buf->flags &= ~(BM_DELETED);
result = (LookupEnt *)
hash_search(SharedBufHash,(char *) &(buf->tag),HASH_ENTER,&found);
if (! result) {
Assert(0);
elog(WARN,"BufTableInsert: BufferLookup table corrupted");
return(FALSE);
}
/* found something else in the table ! */
if (found) {
Assert(0);
elog(WARN,"BufTableInsert: BufferLookup table corrupted");
return(FALSE);
}
result->id = buf->buf_id;
return(TRUE);
LookupEnt *result;
bool found;
/* cannot insert it twice */
Assert(buf->flags & BM_DELETED);
buf->flags &= ~(BM_DELETED);
result = (LookupEnt *)
hash_search(SharedBufHash, (char *) &(buf->tag), HASH_ENTER, &found);
if (!result)
{
Assert(0);
elog(WARN, "BufTableInsert: BufferLookup table corrupted");
return (FALSE);
}
/* found something else in the table ! */
if (found)
{
Assert(0);
elog(WARN, "BufTableInsert: BufferLookup table corrupted");
return (FALSE);
}
result->id = buf->buf_id;
return (TRUE);
}
/* prints out collision stats for the buf table */
@@ -157,8 +165,9 @@ BufTableInsert(BufferDesc *buf)
void
DBG_LookupListCheck(int nlookup)
{
nlookup = 10;
hash_stats("Shared",SharedBufHash);
nlookup = 10;
hash_stats("Shared", SharedBufHash);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* freelist.c--
* routines for manipulating the buffer pool's replacement strategy
* freelist.
* routines for manipulating the buffer pool's replacement strategy
* freelist.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.4 1997/08/19 21:32:44 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.5 1997/09/07 04:48:22 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,23 +16,23 @@
* OLD COMMENTS
*
* Data Structures:
* SharedFreeList is a circular queue. Notice that this
* is a shared memory queue so the next/prev "ptrs" are
* buffer ids, not addresses.
* SharedFreeList is a circular queue. Notice that this
* is a shared memory queue so the next/prev "ptrs" are
* buffer ids, not addresses.
*
* Sync: all routines in this file assume that the buffer
* semaphore has been acquired by the caller.
* semaphore has been acquired by the caller.
*/
#include <stdio.h>
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/buf_internals.h" /* where declarations go */
#include "storage/buf_internals.h" /* where declarations go */
#include "storage/spin.h"
static BufferDesc *SharedFreeList;
static BufferDesc *SharedFreeList;
/* only actually used in debugging. The lock
* should be acquired before calling the freelist manager.
@@ -40,40 +40,40 @@ static BufferDesc *SharedFreeList;
extern SPINLOCK BufMgrLock;
#define IsInQueue(bf) \
Assert((bf->freeNext != INVALID_DESCRIPTOR));\
Assert((bf->freePrev != INVALID_DESCRIPTOR));\
Assert((bf->flags & BM_FREE))
Assert((bf->freeNext != INVALID_DESCRIPTOR));\
Assert((bf->freePrev != INVALID_DESCRIPTOR));\
Assert((bf->flags & BM_FREE))
#define NotInQueue(bf) \
Assert((bf->freeNext == INVALID_DESCRIPTOR));\
Assert((bf->freePrev == INVALID_DESCRIPTOR));\
Assert(! (bf->flags & BM_FREE))
Assert((bf->freeNext == INVALID_DESCRIPTOR));\
Assert((bf->freePrev == INVALID_DESCRIPTOR));\
Assert(! (bf->flags & BM_FREE))
/*
* AddBufferToFreelist --
* AddBufferToFreelist --
*
* In theory, this is the only routine that needs to be changed
* if the buffer replacement strategy changes. Just change
* if the buffer replacement strategy changes. Just change
* the manner in which buffers are added to the freelist queue.
* Currently, they are added on an LRU basis.
*/
void
AddBufferToFreelist(BufferDesc *bf)
AddBufferToFreelist(BufferDesc * bf)
{
#ifdef BMTRACE
_bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum,
BufferDescriptorGetBuffer(bf), BMT_DEALLOC);
#endif /* BMTRACE */
NotInQueue(bf);
/* change bf so it points to inFrontOfNew and its successor */
bf->freePrev = SharedFreeList->freePrev;
bf->freeNext = Free_List_Descriptor;
/* insert new into chain */
BufferDescriptors[bf->freeNext].freePrev = bf->buf_id;
BufferDescriptors[bf->freePrev].freeNext = bf->buf_id;
_bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum,
BufferDescriptorGetBuffer(bf), BMT_DEALLOC);
#endif /* BMTRACE */
NotInQueue(bf);
/* change bf so it points to inFrontOfNew and its successor */
bf->freePrev = SharedFreeList->freePrev;
bf->freeNext = Free_List_Descriptor;
/* insert new into chain */
BufferDescriptors[bf->freeNext].freePrev = bf->buf_id;
BufferDescriptors[bf->freePrev].freeNext = bf->buf_id;
}
#undef PinBuffer
@@ -82,47 +82,52 @@ AddBufferToFreelist(BufferDesc *bf)
* PinBuffer -- make buffer unavailable for replacement.
*/
void
PinBuffer(BufferDesc *buf)
PinBuffer(BufferDesc * buf)
{
long b;
/* Assert (buf->refcount < 25); */
if (buf->refcount == 0) {
IsInQueue(buf);
/* remove from freelist queue */
BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
/* mark buffer as no longer free */
buf->flags &= ~BM_FREE;
} else {
NotInQueue(buf);
}
b = BufferDescriptorGetBuffer(buf) - 1;
Assert(PrivateRefCount[b] >= 0);
if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0)
buf->refcount++;
PrivateRefCount[b]++;
long b;
/* Assert (buf->refcount < 25); */
if (buf->refcount == 0)
{
IsInQueue(buf);
/* remove from freelist queue */
BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
/* mark buffer as no longer free */
buf->flags &= ~BM_FREE;
}
else
{
NotInQueue(buf);
}
b = BufferDescriptorGetBuffer(buf) - 1;
Assert(PrivateRefCount[b] >= 0);
if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0)
buf->refcount++;
PrivateRefCount[b]++;
}
#ifdef NOT_USED
void
PinBuffer_Debug(char *file, int line, BufferDesc *buf)
PinBuffer_Debug(char *file, int line, BufferDesc * buf)
{
PinBuffer(buf);
if (ShowPinTrace) {
Buffer buffer = BufferDescriptorGetBuffer(buf);
fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \
PinBuffer(buf);
if (ShowPinTrace)
{
Buffer buffer = BufferDescriptorGetBuffer(buf);
fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->sb_relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
buffer, buf->sb_relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
}
#endif
#undef UnpinBuffer
@@ -131,95 +136,102 @@ refcount = %ld, file: %s, line: %d\n",
* UnpinBuffer -- make buffer available for replacement.
*/
void
UnpinBuffer(BufferDesc *buf)
UnpinBuffer(BufferDesc * buf)
{
long b = BufferDescriptorGetBuffer(buf) - 1;
Assert(buf->refcount);
Assert(PrivateRefCount[b] > 0);
PrivateRefCount[b]--;
if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0)
buf->refcount--;
NotInQueue(buf);
if (buf->refcount == 0) {
AddBufferToFreelist(buf);
buf->flags |= BM_FREE;
} else {
/* do nothing */
}
long b = BufferDescriptorGetBuffer(buf) - 1;
Assert(buf->refcount);
Assert(PrivateRefCount[b] > 0);
PrivateRefCount[b]--;
if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0)
buf->refcount--;
NotInQueue(buf);
if (buf->refcount == 0)
{
AddBufferToFreelist(buf);
buf->flags |= BM_FREE;
}
else
{
/* do nothing */
}
}
#ifdef NOT_USED
void
UnpinBuffer_Debug(char *file, int line, BufferDesc *buf)
UnpinBuffer_Debug(char *file, int line, BufferDesc * buf)
{
UnpinBuffer(buf);
if (ShowPinTrace) {
Buffer buffer = BufferDescriptorGetBuffer(buf);
fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \
UnpinBuffer(buf);
if (ShowPinTrace)
{
Buffer buffer = BufferDescriptorGetBuffer(buf);
fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->sb_relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
buffer, buf->sb_relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
}
#endif
/*
* GetFreeBuffer() -- get the 'next' buffer from the freelist.
*
*/
BufferDesc *
BufferDesc *
GetFreeBuffer()
{
BufferDesc *buf;
if (Free_List_Descriptor == SharedFreeList->freeNext) {
/* queue is empty. All buffers in the buffer pool are pinned. */
elog(WARN,"out of free buffers: time to abort !\n");
return(NULL);
}
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
/* remove from freelist queue */
BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
buf->flags &= ~(BM_FREE);
return(buf);
BufferDesc *buf;
if (Free_List_Descriptor == SharedFreeList->freeNext)
{
/* queue is empty. All buffers in the buffer pool are pinned. */
elog(WARN, "out of free buffers: time to abort !\n");
return (NULL);
}
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
/* remove from freelist queue */
BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
buf->flags &= ~(BM_FREE);
return (buf);
}
/*
* InitFreeList -- initialize the dummy buffer descriptor used
* as a freelist head.
* as a freelist head.
*
* Assume: All of the buffers are already linked in a circular
* queue. Only called by postmaster and only during
* initialization.
* queue. Only called by postmaster and only during
* initialization.
*/
void
InitFreeList(bool init)
{
SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]);
if (init) {
/* we only do this once, normally the postmaster */
SharedFreeList->data = INVALID_OFFSET;
SharedFreeList->flags = 0;
SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE);
SharedFreeList->buf_id = Free_List_Descriptor;
/* insert it into a random spot in the circular queue */
SharedFreeList->freeNext = BufferDescriptors[0].freeNext;
SharedFreeList->freePrev = 0;
BufferDescriptors[SharedFreeList->freeNext].freePrev =
BufferDescriptors[SharedFreeList->freePrev].freeNext =
Free_List_Descriptor;
}
SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]);
if (init)
{
/* we only do this once, normally the postmaster */
SharedFreeList->data = INVALID_OFFSET;
SharedFreeList->flags = 0;
SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE);
SharedFreeList->buf_id = Free_List_Descriptor;
/* insert it into a random spot in the circular queue */
SharedFreeList->freeNext = BufferDescriptors[0].freeNext;
SharedFreeList->freePrev = 0;
BufferDescriptors[SharedFreeList->freeNext].freePrev =
BufferDescriptors[SharedFreeList->freePrev].freeNext =
Free_List_Descriptor;
}
}
@@ -230,67 +242,78 @@ InitFreeList(bool init)
void
DBG_FreeListCheck(int nfree)
{
int i;
BufferDesc *buf;
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
for (i=0;i<nfree;i++,buf = &(BufferDescriptors[buf->freeNext])) {
if (! (buf->flags & (BM_FREE))){
if (buf != SharedFreeList) {
printf("\tfree list corrupted: %d flags %x\n",
buf->buf_id,buf->flags);
} else {
printf("\tfree list corrupted: too short -- %d not %d\n",
i,nfree);
}
int i;
BufferDesc *buf;
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
for (i = 0; i < nfree; i++, buf = &(BufferDescriptors[buf->freeNext]))
{
if (!(buf->flags & (BM_FREE)))
{
if (buf != SharedFreeList)
{
printf("\tfree list corrupted: %d flags %x\n",
buf->buf_id, buf->flags);
}
else
{
printf("\tfree list corrupted: too short -- %d not %d\n",
i, nfree);
}
}
if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) ||
(BufferDescriptors[buf->freePrev].freeNext != buf->buf_id))
{
printf("\tfree list links corrupted: %d %ld %ld\n",
buf->buf_id, buf->freePrev, buf->freeNext);
}
}
if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) ||
(BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) {
printf("\tfree list links corrupted: %d %ld %ld\n",
buf->buf_id,buf->freePrev,buf->freeNext);
if (buf != SharedFreeList)
{
printf("\tfree list corrupted: %d-th buffer is %d\n",
nfree, buf->buf_id);
}
}
if (buf != SharedFreeList) {
printf("\tfree list corrupted: %d-th buffer is %d\n",
nfree,buf->buf_id);
}
}
#endif
#ifdef NOT_USED
/*
* PrintBufferFreeList -
* prints the buffer free list, for debugging
* prints the buffer free list, for debugging
*/
static void
PrintBufferFreeList()
{
BufferDesc *buf;
BufferDesc *buf;
if (SharedFreeList->freeNext == Free_List_Descriptor) {
printf("free list is empty.\n");
return;
}
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
for (;;) {
int i = (buf - BufferDescriptors);
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n",
i, buf->sb_relname, buf->tag.blockNum,
buf->flags, buf->refcount, PrivateRefCount[i],
buf->freeNext, buf->freePrev);
if (buf->freeNext == Free_List_Descriptor)
break;
if (SharedFreeList->freeNext == Free_List_Descriptor)
{
printf("free list is empty.\n");
return;
}
buf = &(BufferDescriptors[buf->freeNext]);
}
buf = &(BufferDescriptors[SharedFreeList->freeNext]);
for (;;)
{
int i = (buf - BufferDescriptors);
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n",
i, buf->sb_relname, buf->tag.blockNum,
buf->flags, buf->refcount, PrivateRefCount[i],
buf->freeNext, buf->freePrev);
if (buf->freeNext == Free_List_Descriptor)
break;
buf = &(BufferDescriptors[buf->freeNext]);
}
}
#endif

View File

@@ -1,21 +1,21 @@
/*-------------------------------------------------------------------------
*
* localbuf.c--
* local buffer manager. Fast buffer manager for temporary tables
* or special cases when the operation is not visible to other backends.
* local buffer manager. Fast buffer manager for temporary tables
* or special cases when the operation is not visible to other backends.
*
* When a relation is being created, the descriptor will have rd_islocal
* set to indicate that the local buffer manager should be used. During
* the same transaction the relation is being created, any inserts or
* selects from the newly created relation will use the local buffer
* pool. rd_islocal is reset at the end of a transaction (commit/abort).
* This is useful for queries like SELECT INTO TABLE and create index.
* When a relation is being created, the descriptor will have rd_islocal
* set to indicate that the local buffer manager should be used. During
* the same transaction the relation is being created, any inserts or
* selects from the newly created relation will use the local buffer
* pool. rd_islocal is reset at the end of a transaction (commit/abort).
* This is useful for queries like SELECT INTO TABLE and create index.
*
* Copyright (c) 1994-5, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.8 1997/07/28 00:54:48 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.9 1997/09/07 04:48:23 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -45,252 +45,262 @@
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "executor/execdebug.h" /* for NDirectFileRead */
#include "executor/execdebug.h" /* for NDirectFileRead */
#include "catalog/catalog.h"
extern long int LocalBufferFlushCount;
int NLocBuffer = 64;
BufferDesc *LocalBufferDescriptors = NULL;
long *LocalRefCount = NULL;
int NLocBuffer = 64;
BufferDesc *LocalBufferDescriptors = NULL;
long *LocalRefCount = NULL;
static int nextFreeLocalBuf = 0;
static int nextFreeLocalBuf = 0;
/*#define LBDEBUG*/
/*
* LocalBufferAlloc -
* allocate a local buffer. We do round robin allocation for now.
* allocate a local buffer. We do round robin allocation for now.
*/
BufferDesc *
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
BufferDesc *
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool * foundPtr)
{
int i;
BufferDesc *bufHdr = (BufferDesc *) NULL;
int i;
BufferDesc *bufHdr = (BufferDesc *) NULL;
if (blockNum == P_NEW) {
blockNum = reln->rd_nblocks;
reln->rd_nblocks++;
}
if (blockNum == P_NEW)
{
blockNum = reln->rd_nblocks;
reln->rd_nblocks++;
}
/* a low tech search for now -- not optimized for scans */
for (i=0; i < NLocBuffer; i++) {
if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id &&
LocalBufferDescriptors[i].tag.blockNum == blockNum) {
/* a low tech search for now -- not optimized for scans */
for (i = 0; i < NLocBuffer; i++)
{
if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id &&
LocalBufferDescriptors[i].tag.blockNum == blockNum)
{
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%d,%d) %d\n",
reln->rd_id, blockNum, -i-1);
#endif
LocalRefCount[i]++;
*foundPtr = TRUE;
return &LocalBufferDescriptors[i];
fprintf(stderr, "LB ALLOC (%d,%d) %d\n",
reln->rd_id, blockNum, -i - 1);
#endif
LocalRefCount[i]++;
*foundPtr = TRUE;
return &LocalBufferDescriptors[i];
}
}
}
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%d,%d) %d\n",
reln->rd_id, blockNum, -nextFreeLocalBuf-1);
#endif
/* need to get a new buffer (round robin for now) */
for(i=0; i < NLocBuffer; i++) {
int b = (nextFreeLocalBuf + i) % NLocBuffer;
fprintf(stderr, "LB ALLOC (%d,%d) %d\n",
reln->rd_id, blockNum, -nextFreeLocalBuf - 1);
#endif
if (LocalRefCount[b]==0) {
bufHdr = &LocalBufferDescriptors[b];
LocalRefCount[b]++;
nextFreeLocalBuf = (b + 1) % NLocBuffer;
break;
/* need to get a new buffer (round robin for now) */
for (i = 0; i < NLocBuffer; i++)
{
int b = (nextFreeLocalBuf + i) % NLocBuffer;
if (LocalRefCount[b] == 0)
{
bufHdr = &LocalBufferDescriptors[b];
LocalRefCount[b]++;
nextFreeLocalBuf = (b + 1) % NLocBuffer;
break;
}
}
}
if (bufHdr==NULL)
elog(WARN, "no empty local buffer.");
if (bufHdr == NULL)
elog(WARN, "no empty local buffer.");
/*
* this buffer is not referenced but it might still be dirty (the
* last transaction to touch it doesn't need its contents but has
* not flushed it). if that's the case, write it out before
* reusing it!
*/
if (bufHdr->flags & BM_DIRTY) {
Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
/*
* this buffer is not referenced but it might still be dirty (the last
* transaction to touch it doesn't need its contents but has not
* flushed it). if that's the case, write it out before reusing it!
*/
if (bufHdr->flags & BM_DIRTY)
{
Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
Assert(bufrel != NULL);
/* flush this page */
smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
LocalBufferFlushCount++;
}
Assert(bufrel != NULL);
/*
* it's all ours now.
*/
bufHdr->tag.relId.relId = reln->rd_id;
bufHdr->tag.blockNum = blockNum;
bufHdr->flags &= ~BM_DIRTY;
/* flush this page */
smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
LocalBufferFlushCount++;
}
/*
* lazy memory allocation. (see MAKE_PTR for why we need to do
* MAKE_OFFSET.)
*/
if (bufHdr->data == (SHMEM_OFFSET)0) {
char *data = (char *)malloc(BLCKSZ);
/*
* it's all ours now.
*/
bufHdr->tag.relId.relId = reln->rd_id;
bufHdr->tag.blockNum = blockNum;
bufHdr->flags &= ~BM_DIRTY;
bufHdr->data = MAKE_OFFSET(data);
}
*foundPtr = FALSE;
return bufHdr;
/*
* lazy memory allocation. (see MAKE_PTR for why we need to do
* MAKE_OFFSET.)
*/
if (bufHdr->data == (SHMEM_OFFSET) 0)
{
char *data = (char *) malloc(BLCKSZ);
bufHdr->data = MAKE_OFFSET(data);
}
*foundPtr = FALSE;
return bufHdr;
}
/*
* WriteLocalBuffer -
* writes out a local buffer
* writes out a local buffer
*/
int
WriteLocalBuffer(Buffer buffer, bool release)
{
int bufid;
int bufid;
Assert(BufferIsLocal(buffer));
Assert(BufferIsLocal(buffer));
#ifdef LBDEBUG
fprintf(stderr, "LB WRITE %d\n", buffer);
#endif
bufid = - (buffer + 1);
LocalBufferDescriptors[bufid].flags |= BM_DIRTY;
fprintf(stderr, "LB WRITE %d\n", buffer);
#endif
if (release) {
Assert(LocalRefCount[bufid] > 0);
LocalRefCount[bufid]--;
}
bufid = -(buffer + 1);
LocalBufferDescriptors[bufid].flags |= BM_DIRTY;
return true;
if (release)
{
Assert(LocalRefCount[bufid] > 0);
LocalRefCount[bufid]--;
}
return true;
}
/*
* FlushLocalBuffer -
* flushes a local buffer
* flushes a local buffer
*/
int
FlushLocalBuffer(Buffer buffer, bool release)
{
int bufid;
Relation bufrel;
BufferDesc *bufHdr;
int bufid;
Relation bufrel;
BufferDesc *bufHdr;
Assert(BufferIsLocal(buffer));
Assert(BufferIsLocal(buffer));
#ifdef LBDEBUG
fprintf(stderr, "LB FLUSH %d\n", buffer);
#endif
fprintf(stderr, "LB FLUSH %d\n", buffer);
#endif
bufid = - (buffer + 1);
bufHdr = &LocalBufferDescriptors[bufid];
bufHdr->flags &= ~BM_DIRTY;
bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
bufid = -(buffer + 1);
bufHdr = &LocalBufferDescriptors[bufid];
bufHdr->flags &= ~BM_DIRTY;
bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
Assert(bufrel != NULL);
smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
LocalBufferFlushCount++;
Assert(bufrel != NULL);
smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
LocalBufferFlushCount++;
Assert(LocalRefCount[bufid] > 0);
if ( release )
LocalRefCount[bufid]--;
return true;
Assert(LocalRefCount[bufid] > 0);
if (release)
LocalRefCount[bufid]--;
return true;
}
/*
* InitLocalBuffer -
* init the local buffer cache. Since most queries (esp. multi-user ones)
* don't involve local buffers, we delay allocating memory for actual the
* buffer until we need it.
* init the local buffer cache. Since most queries (esp. multi-user ones)
* don't involve local buffers, we delay allocating memory for actual the
* buffer until we need it.
*/
void
InitLocalBuffer(void)
{
int i;
/*
* these aren't going away. I'm not gonna use palloc.
*/
LocalBufferDescriptors =
(BufferDesc *)malloc(sizeof(BufferDesc) * NLocBuffer);
memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer);
nextFreeLocalBuf = 0;
for (i = 0; i < NLocBuffer; i++) {
BufferDesc *buf = &LocalBufferDescriptors[i];
int i;
/*
* negative to indicate local buffer. This is tricky: shared buffers
* start with 0. We have to start with -2. (Note that the routine
* BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
* is -1.)
* these aren't going away. I'm not gonna use palloc.
*/
buf->buf_id = - i - 2;
}
LocalBufferDescriptors =
(BufferDesc *) malloc(sizeof(BufferDesc) * NLocBuffer);
memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer);
nextFreeLocalBuf = 0;
LocalRefCount =
(long *)malloc(sizeof(long) * NLocBuffer);
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
/*
* negative to indicate local buffer. This is tricky: shared
* buffers start with 0. We have to start with -2. (Note that the
* routine BufferDescriptorGetBuffer adds 1 to buf_id so our first
* buffer id is -1.)
*/
buf->buf_id = -i - 2;
}
LocalRefCount =
(long *) malloc(sizeof(long) * NLocBuffer);
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
}
/*
* LocalBufferSync -
* flush all dirty buffers in the local buffer cache. Since the buffer
* cache is only used for keeping relations visible during a transaction,
* we will not need these buffers again.
* flush all dirty buffers in the local buffer cache. Since the buffer
* cache is only used for keeping relations visible during a transaction,
* we will not need these buffers again.
*/
void
LocalBufferSync(void)
{
int i;
for (i = 0; i < NLocBuffer; i++) {
BufferDesc *buf = &LocalBufferDescriptors[i];
Relation bufrel;
int i;
if (buf->flags & BM_DIRTY) {
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
Relation bufrel;
if (buf->flags & BM_DIRTY)
{
#ifdef LBDEBUG
fprintf(stderr, "LB SYNC %d\n", -i-1);
#endif
bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId);
fprintf(stderr, "LB SYNC %d\n", -i - 1);
#endif
bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId);
Assert(bufrel != NULL);
smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum,
(char *) MAKE_PTR(buf->data));
LocalBufferFlushCount++;
Assert(bufrel != NULL);
buf->tag.relId.relId = InvalidOid;
buf->flags &= ~BM_DIRTY;
smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum,
(char *) MAKE_PTR(buf->data));
LocalBufferFlushCount++;
buf->tag.relId.relId = InvalidOid;
buf->flags &= ~BM_DIRTY;
}
}
}
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
nextFreeLocalBuf = 0;
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
nextFreeLocalBuf = 0;
}
void
ResetLocalBufferPool(void)
{
int i;
int i;
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
buf->tag.relId.relId = InvalidOid;
buf->flags &= ~BM_DIRTY;
buf->buf_id = - i - 2;
}
buf->tag.relId.relId = InvalidOid;
buf->flags &= ~BM_DIRTY;
buf->buf_id = -i - 2;
}
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
nextFreeLocalBuf = 0;
memset(LocalRefCount, 0, sizeof(long) * NLocBuffer);
nextFreeLocalBuf = 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* ipci.c--
* POSTGRES inter-process communication initialization code.
* POSTGRES inter-process communication initialization code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.5 1997/01/08 08:32:03 bryanh Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.6 1997/09/07 04:48:33 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,129 +23,131 @@
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/lock.h"
#include "miscadmin.h" /* for DebugLvl */
#include "miscadmin.h" /* for DebugLvl */
/*
* SystemPortAddressCreateMemoryKey --
* Returns a memory key given a port address.
* Returns a memory key given a port address.
*/
IPCKey
SystemPortAddressCreateIPCKey(SystemPortAddress address)
{
Assert(address < 32768); /* XXX */
return (SystemPortAddressGetIPCKey(address));
Assert(address < 32768); /* XXX */
return (SystemPortAddressGetIPCKey(address));
}
/*
* CreateSharedMemoryAndSemaphores --
* Creates and initializes shared memory and semaphores.
* Creates and initializes shared memory and semaphores.
*/
/**************************************************
CreateSharedMemoryAndSemaphores
is called exactly *ONCE* by the postmaster.
It is *NEVER* called by the postgres backend
0) destroy any existing semaphores for both buffer
and lock managers.
1) create the appropriate *SHARED* memory segments
for the two resource managers.
**************************************************/
void
CreateSharedMemoryAndSemaphores(IPCKey key)
{
int size;
#ifdef HAS_TEST_AND_SET
/* ---------------
* create shared memory for slocks
* --------------
*/
CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
#endif
/* ----------------
* kill and create the buffer manager buffer pool (and semaphore)
* ----------------
*/
CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key));
size = BufferShmemSize() + LockShmemSize();
#ifdef MAIN_MEMORY
size += MMShmemSize();
#endif /* MAIN_MEMORY */
if (DebugLvl > 1) {
fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n",
IPCKeyGetBufferMemoryKey(key), size);
}
ShmemCreate(IPCKeyGetBufferMemoryKey(key), size);
ShmemBindingTabReset();
InitShmem(key, size);
InitBufferPool(key);
/* ----------------
* do the lock table stuff
* ----------------
*/
InitLocks();
InitMultiLevelLockm();
if (InitMultiLevelLockm() == INVALID_TABLEID)
elog(FATAL, "Couldn't create the lock table");
int size;
/* ----------------
* do process table stuff
* ----------------
*/
InitProcGlobal(key);
on_exitpg(ProcFreeAllSemaphores, 0);
CreateSharedInvalidationState(key);
#ifdef HAS_TEST_AND_SET
/* ---------------
* create shared memory for slocks
* --------------
*/
CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
#endif
/* ----------------
* kill and create the buffer manager buffer pool (and semaphore)
* ----------------
*/
CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key));
size = BufferShmemSize() + LockShmemSize();
#ifdef MAIN_MEMORY
size += MMShmemSize();
#endif /* MAIN_MEMORY */
if (DebugLvl > 1)
{
fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n",
IPCKeyGetBufferMemoryKey(key), size);
}
ShmemCreate(IPCKeyGetBufferMemoryKey(key), size);
ShmemBindingTabReset();
InitShmem(key, size);
InitBufferPool(key);
/* ----------------
* do the lock table stuff
* ----------------
*/
InitLocks();
InitMultiLevelLockm();
if (InitMultiLevelLockm() == INVALID_TABLEID)
elog(FATAL, "Couldn't create the lock table");
/* ----------------
* do process table stuff
* ----------------
*/
InitProcGlobal(key);
on_exitpg(ProcFreeAllSemaphores, 0);
CreateSharedInvalidationState(key);
}
/*
* AttachSharedMemoryAndSemaphores --
* Attachs existant shared memory and semaphores.
* Attachs existant shared memory and semaphores.
*/
void
AttachSharedMemoryAndSemaphores(IPCKey key)
{
int size;
/* ----------------
* create rather than attach if using private key
* ----------------
*/
if (key == PrivateIPCKey) {
CreateSharedMemoryAndSemaphores(key);
return;
}
int size;
/* ----------------
* create rather than attach if using private key
* ----------------
*/
if (key == PrivateIPCKey)
{
CreateSharedMemoryAndSemaphores(key);
return;
}
#ifdef HAS_TEST_AND_SET
/* ----------------
* attach the slock shared memory
* ----------------
*/
AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
/* ----------------
* attach the slock shared memory
* ----------------
*/
AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key));
#endif
/* ----------------
* attach the buffer manager buffer pool (and semaphore)
* ----------------
*/
size = BufferShmemSize() + LockShmemSize();
InitShmem(key, size);
InitBufferPool(key);
/* ----------------
* initialize lock table stuff
* ----------------
*/
InitLocks();
if (InitMultiLevelLockm() == INVALID_TABLEID)
elog(FATAL, "Couldn't attach to the lock table");
AttachSharedInvalidationState(key);
/* ----------------
* attach the buffer manager buffer pool (and semaphore)
* ----------------
*/
size = BufferShmemSize() + LockShmemSize();
InitShmem(key, size);
InitBufferPool(key);
/* ----------------
* initialize lock table stuff
* ----------------
*/
InitLocks();
if (InitMultiLevelLockm() == INVALID_TABLEID)
elog(FATAL, "Couldn't attach to the lock table");
AttachSharedInvalidationState(key);
}

View File

@@ -1,40 +1,40 @@
/*-------------------------------------------------------------------------
*
* s_lock.c--
* This file contains the implementation (if any) for spinlocks.
* This file contains the implementation (if any) for spinlocks.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/s_lock.c,v 1.21 1997/09/05 18:10:54 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/s_lock.c,v 1.22 1997/09/07 04:48:35 momjian Exp $
*
*-------------------------------------------------------------------------
*/
/*
* DESCRIPTION
* The following code fragment should be written (in assembly
* language) on machines that have a native test-and-set instruction:
* DESCRIPTION
* The following code fragment should be written (in assembly
* language) on machines that have a native test-and-set instruction:
*
* void
* S_LOCK(char_address)
* char *char_address;
* {
* while (test_and_set(char_address))
* ;
* }
* void
* S_LOCK(char_address)
* char *char_address;
* {
* while (test_and_set(char_address))
* ;
* }
*
* If this is not done, POSTGRES will default to using System V
* semaphores (and take a large performance hit -- around 40% of
* its time on a DS5000/240 is spent in semop(3)...).
* If this is not done, POSTGRES will default to using System V
* semaphores (and take a large performance hit -- around 40% of
* its time on a DS5000/240 is spent in semop(3)...).
*
* NOTES
* AIX has a test-and-set but the recommended interface is the cs(3)
* system call. This provides an 8-instruction (plus system call
* overhead) uninterruptible compare-and-set operation. True
* spinlocks might be faster but using cs(3) still speeds up the
* regression test suite by about 25%. I don't have an assembler
* manual for POWER in any case.
* NOTES
* AIX has a test-and-set but the recommended interface is the cs(3)
* system call. This provides an 8-instruction (plus system call
* overhead) uninterruptible compare-and-set operation. True
* spinlocks might be faster but using cs(3) still speeds up the
* regression test suite by about 25%. I don't have an assembler
* manual for POWER in any case.
*
*/
#include "postgres.h"
@@ -50,71 +50,71 @@
* slock_t is defined as a struct mutex.
*/
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
mutex_lock(lock);
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
mutex_unlock(lock);
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
mutex_init(lock);
mutex_init(lock);
}
/* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */
int
S_LOCK_FREE(slock_t *lock)
S_LOCK_FREE(slock_t * lock)
{
/* For Mach, we have to delve inside the entrails of `struct mutex'. Ick! */
return (lock->lock == 0);
return (lock->lock == 0);
}
#endif /* next */
#endif /* next */
#if defined(irix5)
/*
* SGI IRIX 5
* slock_t is defined as a struct abilock_t, which has a single unsigned long
* slock_t is defined as a struct abilock_t, which has a single unsigned long
* member.
*
*
* This stuff may be supplemented in the future with Masato Kataoka's MIPS-II
* assembly from his NECEWS SVR4 port, but we probably ought to retain this
* for the R3000 chips out there.
*/
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
/* spin_lock(lock); */
while (!acquire_lock(lock))
;
;
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
release_lock(lock);
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
init_lock(lock);
init_lock(lock);
}
/* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */
int
S_LOCK_FREE(slock_t *lock)
S_LOCK_FREE(slock_t * lock)
{
return(stat_lock(lock)==UNLOCKED);
return (stat_lock(lock) == UNLOCKED);
}
#endif /* irix5 */
#endif /* irix5 */
/*
@@ -127,62 +127,62 @@ S_LOCK_FREE(slock_t *lock)
#if defined(__alpha__) || defined(__alpha)
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
while (msem_lock(lock, MSEM_IF_NOWAIT) < 0)
;
while (msem_lock(lock, MSEM_IF_NOWAIT) < 0)
;
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
msem_unlock(lock, 0);
msem_unlock(lock, 0);
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
msem_init(lock, MSEM_UNLOCKED);
msem_init(lock, MSEM_UNLOCKED);
}
int
S_LOCK_FREE(slock_t *lock)
S_LOCK_FREE(slock_t * lock)
{
return(lock->msem_state ? 0 : 1);
return (lock->msem_state ? 0 : 1);
}
#endif /* alpha */
#endif /* alpha */
/*
* Solaris 2
*/
#if defined(i386_solaris) || \
defined(sparc_solaris)
defined(sparc_solaris)
/* for xxxxx_solaris, this is defined in port/.../tas.s */
static int tas(slock_t *lock);
static int tas(slock_t * lock);
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
while (tas(lock))
;
while (tas(lock))
;
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = 0;
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
#endif /* i86pc_solaris || sparc_solaris */
#endif /* i86pc_solaris || sparc_solaris */
/*
* AIX (POWER)
@@ -194,25 +194,25 @@ S_INIT_LOCK(slock_t *lock)
#if defined(aix)
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
while (cs((int *) lock, 0, 1))
;
while (cs((int *) lock, 0, 1))
;
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = 0;
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
#endif /* aix */
#endif /* aix */
/*
* HP-UX (PA-RISC)
@@ -224,90 +224,90 @@ S_INIT_LOCK(slock_t *lock)
#if defined(hpux)
/*
* a "set" slock_t has a single word cleared. a "clear" slock_t has
* a "set" slock_t has a single word cleared. a "clear" slock_t has
* all words set to non-zero.
*/
static slock_t clear_lock = { -1, -1, -1, -1 };
static slock_t clear_lock = {-1, -1, -1, -1};
static int tas(slock_t *lock);
static int tas(slock_t * lock);
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
while (tas(lock))
;
while (tas(lock))
;
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = clear_lock; /* struct assignment */
*lock = clear_lock; /* struct assignment */
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
int
S_LOCK_FREE(slock_t *lock)
S_LOCK_FREE(slock_t * lock)
{
register int *lock_word = (int *) (((long) lock + 15) & ~15);
register int *lock_word = (int *) (((long) lock + 15) & ~15);
return(*lock_word != 0);
return (*lock_word != 0);
}
#endif /* hpux */
#endif /* hpux */
/*
* sun3
*/
#if defined(sun3)
static int tas(slock_t *lock);
static int tas(slock_t * lock);
void
S_LOCK(slock_t *lock)
void
S_LOCK(slock_t * lock)
{
while (tas(lock));
while (tas(lock));
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = 0;
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
static int
tas_dummy()
{
asm("LLA0:");
asm(" .data");
asm(" .text");
asm("|#PROC# 04");
asm(" .globl _tas");
asm("_tas:");
asm("|#PROLOGUE# 1");
asm(" movel sp@(0x4),a0");
asm(" tas a0@");
asm(" beq LLA1");
asm(" moveq #-128,d0");
asm(" rts");
asm("LLA1:");
asm(" moveq #0,d0");
asm(" rts");
asm(" .data");
asm("LLA0:");
asm(" .data");
asm(" .text");
asm("|#PROC# 04");
asm(" .globl _tas");
asm("_tas:");
asm("|#PROLOGUE# 1");
asm(" movel sp@(0x4),a0");
asm(" tas a0@");
asm(" beq LLA1");
asm(" moveq #-128,d0");
asm(" rts");
asm("LLA1:");
asm(" moveq #0,d0");
asm(" rts");
asm(" .data");
}
#endif /* sun3 */
#endif /* sun3 */
/*
* sparc machines
@@ -317,48 +317,48 @@ tas_dummy()
/* if we're using -ansi w/ gcc, use __asm__ instead of asm */
#if defined(__STRICT_ANSI__)
#define asm(x) __asm__(x)
#endif
#define asm(x) __asm__(x)
#endif
static int tas(slock_t *lock);
static int tas(slock_t * lock);
static int
tas_dummy()
{
asm(".seg \"data\"");
asm(".seg \"text\"");
asm(".global _tas");
asm("_tas:");
/*
* Sparc atomic test and set (sparc calls it "atomic load-store")
*/
asm("ldstub [%r8], %r8");
/*
* Did test and set actually do the set?
*/
asm("tst %r8");
asm("be,a ReturnZero");
/*
* otherwise, just return.
*/
asm("clr %r8");
asm("mov 0x1, %r8");
asm("ReturnZero:");
asm("retl");
asm("nop");
asm(".seg \"data\"");
asm(".seg \"text\"");
asm(".global _tas");
asm("_tas:");
/*
* Sparc atomic test and set (sparc calls it "atomic load-store")
*/
asm("ldstub [%r8], %r8");
/*
* Did test and set actually do the set?
*/
asm("tst %r8");
asm("be,a ReturnZero");
/*
* otherwise, just return.
*/
asm("clr %r8");
asm("mov 0x1, %r8");
asm("ReturnZero:");
asm("retl");
asm("nop");
}
void
S_LOCK(unsigned char *addr)
{
while (tas(addr));
while (tas(addr));
}
@@ -368,16 +368,16 @@ S_LOCK(unsigned char *addr)
void
S_UNLOCK(unsigned char *addr)
{
*addr = 0;
*addr = 0;
}
void
S_INIT_LOCK(unsigned char *addr)
{
*addr = 0;
*addr = 0;
}
#endif /* NEED_SPARC_TAS_ASM */
#endif /* NEED_SPARC_TAS_ASM */
/*
* i386 based things
@@ -386,39 +386,41 @@ S_INIT_LOCK(unsigned char *addr)
#if defined(NEED_I386_TAS_ASM)
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
slock_t res;
slock_t res;
do{
__asm__("xchgb %0,%1":"=q" (res),"=m" (*lock):"0" (0x1));
}while(res != 0);
do
{
__asm__("xchgb %0,%1": "=q"(res), "=m"(*lock):"0"(0x1));
} while (res != 0);
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = 0;
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
#endif /* NEED_I386_TAS_ASM */
#endif /* NEED_I386_TAS_ASM */
#if defined(__alpha__) && defined(linux)
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
slock_t res;
slock_t res;
do{
__asm__(" ldq $0, %0 \n\
do
{
__asm__(" ldq $0, %0 \n\
bne $0, already_set \n\
ldq_l $0, %0 \n\
bne $0, already_set \n\
@@ -430,56 +432,58 @@ S_LOCK(slock_t *lock)
jmp $31, end \n\
stqc_fail: or $31, 1, $0 \n\
already_set: bis $0, $0, %1 \n\
end: nop " : "=m" (*lock), "=r" (res) :: "0" );
}while(res != 0);
end: nop ": "=m"(*lock), "=r"(res): :"0");
} while (res != 0);
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
__asm__("mb");
*lock = 0;
__asm__("mb");
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
#endif /* defined(__alpha__) && defined(linux) */
#endif /* defined(__alpha__) && defined(linux) */
#if defined(linux) && defined(sparc)
void
S_LOCK(slock_t *lock)
{
slock_t res;
do{
__asm__("ldstub [%1], %0"
: "=&r" (res)
: "r" (lock));
}while(!res != 0);
void
S_LOCK(slock_t * lock)
{
slock_t res;
do
{
__asm__("ldstub [%1], %0"
: "=&r"(res)
: "r"(lock));
} while (!res != 0);
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = 0;
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
#endif /* defined(linux) && defined(sparc) */
#endif /* defined(linux) && defined(sparc) */
#if defined(linux) && defined(PPC)
static int tas_dummy()
static int
tas_dummy()
{
__asm__(" \n\
tas: \n\
@@ -496,26 +500,26 @@ success: \n\
blr \n\
");
}
void
S_LOCK(slock_t *lock)
S_LOCK(slock_t * lock)
{
while (tas(lock))
;
while (tas(lock))
;
}
void
S_UNLOCK(slock_t *lock)
S_UNLOCK(slock_t * lock)
{
*lock = 0;
*lock = 0;
}
void
S_INIT_LOCK(slock_t *lock)
S_INIT_LOCK(slock_t * lock)
{
S_UNLOCK(lock);
S_UNLOCK(lock);
}
#endif /* defined(linux) && defined(PPC) */
#endif /* defined(linux) && defined(PPC) */
#endif /* HAS_TEST_AND_SET */
#endif /* HAS_TEST_AND_SET */

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +1,19 @@
/*-------------------------------------------------------------------------
*
* shmqueue.c--
* shared memory linked lists
* shared memory linked lists
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmqueue.c,v 1.3 1997/08/19 21:33:06 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmqueue.c,v 1.4 1997/09/07 04:48:42 momjian Exp $
*
* NOTES
*
* Package for managing doubly-linked lists in shared memory.
* The only tricky thing is that SHM_QUEUE will usually be a field
* in a larger record. SHMQueueGetFirst has to return a pointer
* The only tricky thing is that SHM_QUEUE will usually be a field
* in a larger record. SHMQueueGetFirst has to return a pointer
* to the record itself instead of a pointer to the SHMQueue field
* of the record. It takes an extra pointer and does some extra
* pointer arithmetic to do this correctly.
@@ -22,178 +22,181 @@
*
*-------------------------------------------------------------------------
*/
#include <stdio.h> /* for sprintf() */
#include <stdio.h> /* for sprintf() */
#include "postgres.h"
#include "storage/shmem.h" /* where the declarations go */
#include "storage/shmem.h" /* where the declarations go */
/*#define SHMQUEUE_DEBUG*/
#ifdef SHMQUEUE_DEBUG
#define SHMQUEUE_DEBUG_DEL /* deletions */
#define SHMQUEUE_DEBUG_HD /* head inserts */
#define SHMQUEUE_DEBUG_TL /* tail inserts */
#define SHMQUEUE_DEBUG_DEL /* deletions */
#define SHMQUEUE_DEBUG_HD /* head inserts */
#define SHMQUEUE_DEBUG_TL /* tail inserts */
#define SHMQUEUE_DEBUG_ELOG NOTICE
#endif /* SHMQUEUE_DEBUG */
#endif /* SHMQUEUE_DEBUG */
/*
* ShmemQueueInit -- make the head of a new queue point
* to itself
* to itself
*/
void
SHMQueueInit(SHM_QUEUE *queue)
SHMQueueInit(SHM_QUEUE * queue)
{
Assert(SHM_PTR_VALID(queue));
(queue)->prev = (queue)->next = MAKE_OFFSET(queue);
Assert(SHM_PTR_VALID(queue));
(queue)->prev = (queue)->next = MAKE_OFFSET(queue);
}
/*
* SHMQueueIsDetached -- TRUE if element is not currently
* in a queue.
* in a queue.
*/
#ifdef NOT_USED
bool
SHMQueueIsDetached(SHM_QUEUE *queue)
SHMQueueIsDetached(SHM_QUEUE * queue)
{
Assert(SHM_PTR_VALID(queue));
return ((queue)->prev == INVALID_OFFSET);
Assert(SHM_PTR_VALID(queue));
return ((queue)->prev == INVALID_OFFSET);
}
#endif
/*
* SHMQueueElemInit -- clear an element's links
*/
void
SHMQueueElemInit(SHM_QUEUE *queue)
SHMQueueElemInit(SHM_QUEUE * queue)
{
Assert(SHM_PTR_VALID(queue));
(queue)->prev = (queue)->next = INVALID_OFFSET;
Assert(SHM_PTR_VALID(queue));
(queue)->prev = (queue)->next = INVALID_OFFSET;
}
/*
* SHMQueueDelete -- remove an element from the queue and
* close the links
* close the links
*/
void
SHMQueueDelete(SHM_QUEUE *queue)
SHMQueueDelete(SHM_QUEUE * queue)
{
SHM_QUEUE *nextElem = (SHM_QUEUE *) MAKE_PTR((queue)->next);
SHM_QUEUE *prevElem = (SHM_QUEUE *) MAKE_PTR((queue)->prev);
Assert(SHM_PTR_VALID(queue));
Assert(SHM_PTR_VALID(nextElem));
Assert(SHM_PTR_VALID(prevElem));
SHM_QUEUE *nextElem = (SHM_QUEUE *) MAKE_PTR((queue)->next);
SHM_QUEUE *prevElem = (SHM_QUEUE *) MAKE_PTR((queue)->prev);
Assert(SHM_PTR_VALID(queue));
Assert(SHM_PTR_VALID(nextElem));
Assert(SHM_PTR_VALID(prevElem));
#ifdef SHMQUEUE_DEBUG_DEL
dumpQ(queue, "in SHMQueueDelete: begin");
#endif /* SHMQUEUE_DEBUG_DEL */
prevElem->next = (queue)->next;
nextElem->prev = (queue)->prev;
dumpQ(queue, "in SHMQueueDelete: begin");
#endif /* SHMQUEUE_DEBUG_DEL */
prevElem->next = (queue)->next;
nextElem->prev = (queue)->prev;
#ifdef SHMQUEUE_DEBUG_DEL
dumpQ((SHM_QUEUE *)MAKE_PTR(queue->prev), "in SHMQueueDelete: end");
#endif /* SHMQUEUE_DEBUG_DEL */
dumpQ((SHM_QUEUE *) MAKE_PTR(queue->prev), "in SHMQueueDelete: end");
#endif /* SHMQUEUE_DEBUG_DEL */
}
#ifdef SHMQUEUE_DEBUG
void
dumpQ(SHM_QUEUE *q, char *s)
dumpQ(SHM_QUEUE * q, char *s)
{
char elem[16];
char buf[1024];
SHM_QUEUE *start = q;
int count = 0;
sprintf(buf, "q prevs: %x", MAKE_OFFSET(q));
q = (SHM_QUEUE *)MAKE_PTR(q->prev);
while (q != start)
char elem[16];
char buf[1024];
SHM_QUEUE *start = q;
int count = 0;
sprintf(buf, "q prevs: %x", MAKE_OFFSET(q));
q = (SHM_QUEUE *) MAKE_PTR(q->prev);
while (q != start)
{
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
q = (SHM_QUEUE *)MAKE_PTR(q->prev);
if (q->prev == MAKE_OFFSET(q))
break;
if (count++ > 40)
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
q = (SHM_QUEUE *) MAKE_PTR(q->prev);
if (q->prev == MAKE_OFFSET(q))
break;
if (count++ > 40)
{
strcat(buf, "BAD PREV QUEUE!!");
break;
strcat(buf, "BAD PREV QUEUE!!");
break;
}
}
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf);
sprintf(buf, "q nexts: %x", MAKE_OFFSET(q));
count = 0;
q = (SHM_QUEUE *)MAKE_PTR(q->next);
while (q != start)
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf);
sprintf(buf, "q nexts: %x", MAKE_OFFSET(q));
count = 0;
q = (SHM_QUEUE *) MAKE_PTR(q->next);
while (q != start)
{
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
q = (SHM_QUEUE *)MAKE_PTR(q->next);
if (q->next == MAKE_OFFSET(q))
break;
if (count++ > 10)
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
q = (SHM_QUEUE *) MAKE_PTR(q->next);
if (q->next == MAKE_OFFSET(q))
break;
if (count++ > 10)
{
strcat(buf, "BAD NEXT QUEUE!!");
break;
strcat(buf, "BAD NEXT QUEUE!!");
break;
}
}
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf);
sprintf(elem, "--->%x", MAKE_OFFSET(q));
strcat(buf, elem);
elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf);
}
#endif /* SHMQUEUE_DEBUG */
#endif /* SHMQUEUE_DEBUG */
/*
* SHMQueueInsertHD -- put elem in queue between the queue head
* and its "prev" element.
* and its "prev" element.
*/
#ifdef NOT_USED
void
SHMQueueInsertHD(SHM_QUEUE *queue, SHM_QUEUE *elem)
SHMQueueInsertHD(SHM_QUEUE * queue, SHM_QUEUE * elem)
{
SHM_QUEUE *prevPtr = (SHM_QUEUE *) MAKE_PTR((queue)->prev);
SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem);
Assert(SHM_PTR_VALID(queue));
Assert(SHM_PTR_VALID(elem));
SHM_QUEUE *prevPtr = (SHM_QUEUE *) MAKE_PTR((queue)->prev);
SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem);
Assert(SHM_PTR_VALID(queue));
Assert(SHM_PTR_VALID(elem));
#ifdef SHMQUEUE_DEBUG_HD
dumpQ(queue, "in SHMQueueInsertHD: begin");
#endif /* SHMQUEUE_DEBUG_HD */
(elem)->next = prevPtr->next;
(elem)->prev = queue->prev;
(queue)->prev = elemOffset;
prevPtr->next = elemOffset;
dumpQ(queue, "in SHMQueueInsertHD: begin");
#endif /* SHMQUEUE_DEBUG_HD */
(elem)->next = prevPtr->next;
(elem)->prev = queue->prev;
(queue)->prev = elemOffset;
prevPtr->next = elemOffset;
#ifdef SHMQUEUE_DEBUG_HD
dumpQ(queue, "in SHMQueueInsertHD: end");
#endif /* SHMQUEUE_DEBUG_HD */
dumpQ(queue, "in SHMQueueInsertHD: end");
#endif /* SHMQUEUE_DEBUG_HD */
}
#endif
void
SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem)
SHMQueueInsertTL(SHM_QUEUE * queue, SHM_QUEUE * elem)
{
SHM_QUEUE *nextPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next);
SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem);
Assert(SHM_PTR_VALID(queue));
Assert(SHM_PTR_VALID(elem));
SHM_QUEUE *nextPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next);
SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem);
Assert(SHM_PTR_VALID(queue));
Assert(SHM_PTR_VALID(elem));
#ifdef SHMQUEUE_DEBUG_TL
dumpQ(queue, "in SHMQueueInsertTL: begin");
#endif /* SHMQUEUE_DEBUG_TL */
(elem)->prev = nextPtr->prev;
(elem)->next = queue->next;
(queue)->next = elemOffset;
nextPtr->prev = elemOffset;
dumpQ(queue, "in SHMQueueInsertTL: begin");
#endif /* SHMQUEUE_DEBUG_TL */
(elem)->prev = nextPtr->prev;
(elem)->next = queue->next;
(queue)->next = elemOffset;
nextPtr->prev = elemOffset;
#ifdef SHMQUEUE_DEBUG_TL
dumpQ(queue, "in SHMQueueInsertTL: end");
#endif /* SHMQUEUE_DEBUG_TL */
dumpQ(queue, "in SHMQueueInsertTL: end");
#endif /* SHMQUEUE_DEBUG_TL */
}
/*
@@ -203,52 +206,51 @@ SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem)
* a larger structure, we want to return a pointer to the
* whole structure rather than a pointer to its SHMQueue field.
* I.E. struct {
* int stuff;
* SHMQueue elem;
* } ELEMType;
* int stuff;
* SHMQueue elem;
* } ELEMType;
* when this element is in a queue (queue->next) is struct.elem.
* nextQueue allows us to calculate the offset of the SHMQueue
* field in the structure.
*
* call to SHMQueueFirst should take these parameters:
*
* &(queueHead),&firstElem,&(firstElem->next)
* &(queueHead),&firstElem,&(firstElem->next)
*
* Note that firstElem may well be uninitialized. if firstElem
* is initially K, &(firstElem->next) will be K+ the offset to
* next.
*/
void
SHMQueueFirst(SHM_QUEUE *queue, Pointer *nextPtrPtr, SHM_QUEUE *nextQueue)
SHMQueueFirst(SHM_QUEUE * queue, Pointer * nextPtrPtr, SHM_QUEUE * nextQueue)
{
SHM_QUEUE *elemPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next);
Assert(SHM_PTR_VALID(queue));
*nextPtrPtr = (Pointer) (((unsigned long) *nextPtrPtr) +
((unsigned long) elemPtr) - ((unsigned long) nextQueue));
/*
nextPtrPtr a ptr to a structure linked in the queue
nextQueue is the SHMQueue field of the structure
*nextPtrPtr - nextQueue is 0 minus the offset of the queue
field n the record
elemPtr + (*nextPtrPtr - nexQueue) is the start of the
structure containing elemPtr.
*/
SHM_QUEUE *elemPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next);
Assert(SHM_PTR_VALID(queue));
*nextPtrPtr = (Pointer) (((unsigned long) *nextPtrPtr) +
((unsigned long) elemPtr) - ((unsigned long) nextQueue));
/*
* nextPtrPtr a ptr to a structure linked in the queue nextQueue is
* the SHMQueue field of the structure nextPtrPtr - nextQueue is 0
* minus the offset of the queue field n the record elemPtr +
* (*nextPtrPtr - nexQueue) is the start of the structure containing
* elemPtr.
*/
}
/*
* SHMQueueEmpty -- TRUE if queue head is only element, FALSE otherwise
*/
bool
SHMQueueEmpty(SHM_QUEUE *queue)
SHMQueueEmpty(SHM_QUEUE * queue)
{
Assert(SHM_PTR_VALID(queue));
if (queue->prev == MAKE_OFFSET(queue))
Assert(SHM_PTR_VALID(queue));
if (queue->prev == MAKE_OFFSET(queue))
{
Assert(queue->next = MAKE_OFFSET(queue));
return(TRUE);
Assert(queue->next = MAKE_OFFSET(queue));
return (TRUE);
}
return(FALSE);
return (FALSE);
}

View File

@@ -1,17 +1,17 @@
/*-------------------------------------------------------------------------
*
* sinval.c--
* POSTGRES shared cache invalidation communication code.
* POSTGRES shared cache invalidation communication code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.7 1997/08/12 22:53:58 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.8 1997/09/07 04:48:43 momjian Exp $
*
*-------------------------------------------------------------------------
*/
/* #define INVALIDDEBUG 1 */
/* #define INVALIDDEBUG 1 */
#include <sys/types.h>
@@ -22,150 +22,156 @@
#include "storage/sinvaladt.h"
#include "storage/spin.h"
extern SISeg *shmInvalBuffer;/* the shared buffer segment, set by*/
/* SISegmentAttach() */
extern BackendId MyBackendId;
extern BackendTag MyBackendTag;
extern SISeg *shmInvalBuffer; /* the shared buffer segment, set by */
/* SISegmentAttach() */
extern BackendId MyBackendId;
extern BackendTag MyBackendTag;
SPINLOCK SInvalLock = (SPINLOCK) NULL;
/****************************************************************************/
/* CreateSharedInvalidationState(key) Create a buffer segment */
/* */
/* should be called only by the POSTMASTER */
/* CreateSharedInvalidationState(key) Create a buffer segment */
/* */
/* should be called only by the POSTMASTER */
/****************************************************************************/
void
CreateSharedInvalidationState(IPCKey key)
{
int status;
/* REMOVED
SISyncKill(IPCKeyGetSIBufferMemorySemaphoreKey(key));
SISyncInit(IPCKeyGetSIBufferMemorySemaphoreKey(key));
*/
/* SInvalLock gets set in spin.c, during spinlock init */
status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key));
if (status == -1) {
elog(FATAL, "CreateSharedInvalidationState: failed segment init");
}
int status;
/*
* REMOVED SISyncKill(IPCKeyGetSIBufferMemorySemaphoreKey(key));
* SISyncInit(IPCKeyGetSIBufferMemorySemaphoreKey(key));
*/
/* SInvalLock gets set in spin.c, during spinlock init */
status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key));
if (status == -1)
{
elog(FATAL, "CreateSharedInvalidationState: failed segment init");
}
}
/****************************************************************************/
/* AttachSharedInvalidationState(key) Attach a buffer segment */
/* */
/* should be called only by the POSTMASTER */
/* AttachSharedInvalidationState(key) Attach a buffer segment */
/* */
/* should be called only by the POSTMASTER */
/****************************************************************************/
void
AttachSharedInvalidationState(IPCKey key)
{
int status;
if (key == PrivateIPCKey) {
CreateSharedInvalidationState(key);
return;
}
/* SInvalLock gets set in spin.c, during spinlock init */
status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key));
if (status == -1) {
elog(FATAL, "AttachSharedInvalidationState: failed segment init");
}
int status;
if (key == PrivateIPCKey)
{
CreateSharedInvalidationState(key);
return;
}
/* SInvalLock gets set in spin.c, during spinlock init */
status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key));
if (status == -1)
{
elog(FATAL, "AttachSharedInvalidationState: failed segment init");
}
}
void
InitSharedInvalidationState(void)
{
SpinAcquire(SInvalLock);
if (!SIBackendInit(shmInvalBuffer))
SpinAcquire(SInvalLock);
if (!SIBackendInit(shmInvalBuffer))
{
SpinRelease(SInvalLock);
elog(FATAL, "Backend cache invalidation initialization failed");
SpinRelease(SInvalLock);
elog(FATAL, "Backend cache invalidation initialization failed");
}
SpinRelease(SInvalLock);
SpinRelease(SInvalLock);
}
/*
* RegisterSharedInvalid --
* Returns a new local cache invalidation state containing a new entry.
* Returns a new local cache invalidation state containing a new entry.
*
* Note:
* Assumes hash index is valid.
* Assumes item pointer is valid.
* Assumes hash index is valid.
* Assumes item pointer is valid.
*/
/****************************************************************************/
/* RegisterSharedInvalid(cacheId, hashIndex, pointer) */
/* */
/* register a message in the buffer */
/* should be called by a backend */
/* RegisterSharedInvalid(cacheId, hashIndex, pointer) */
/* */
/* register a message in the buffer */
/* should be called by a backend */
/****************************************************************************/
void
RegisterSharedInvalid(int cacheId, /* XXX */
Index hashIndex,
ItemPointer pointer)
RegisterSharedInvalid(int cacheId, /* XXX */
Index hashIndex,
ItemPointer pointer)
{
SharedInvalidData newInvalid;
/*
* This code has been hacked to accept two types of messages. This might
* be treated more generally in the future.
*
* (1)
* cacheId= system cache id
* hashIndex= system cache hash index for a (possibly) cached tuple
* pointer= pointer of (possibly) cached tuple
*
* (2)
* cacheId= special non-syscache id
* hashIndex= object id contained in (possibly) cached relation descriptor
* pointer= null
*/
newInvalid.cacheId = cacheId;
newInvalid.hashIndex = hashIndex;
if (ItemPointerIsValid(pointer)) {
ItemPointerCopy(pointer, &newInvalid.pointerData);
} else {
ItemPointerSetInvalid(&newInvalid.pointerData);
}
SpinAcquire(SInvalLock);
if (!SISetDataEntry(shmInvalBuffer, &newInvalid)) {
/* buffer full */
/* release a message, mark process cache states to be invalid */
SISetProcStateInvalid(shmInvalBuffer);
if (!SIDelDataEntry(shmInvalBuffer)) {
/* inconsistent buffer state -- shd never happen */
SpinRelease(SInvalLock);
elog(FATAL, "RegisterSharedInvalid: inconsistent buffer state");
}
/* write again */
SISetDataEntry(shmInvalBuffer, &newInvalid);
}
SpinRelease(SInvalLock);
SharedInvalidData newInvalid;
/*
* This code has been hacked to accept two types of messages. This
* might be treated more generally in the future.
*
* (1) cacheId= system cache id hashIndex= system cache hash index for a
* (possibly) cached tuple pointer= pointer of (possibly) cached tuple
*
* (2) cacheId= special non-syscache id hashIndex= object id contained in
* (possibly) cached relation descriptor pointer= null
*/
newInvalid.cacheId = cacheId;
newInvalid.hashIndex = hashIndex;
if (ItemPointerIsValid(pointer))
{
ItemPointerCopy(pointer, &newInvalid.pointerData);
}
else
{
ItemPointerSetInvalid(&newInvalid.pointerData);
}
SpinAcquire(SInvalLock);
if (!SISetDataEntry(shmInvalBuffer, &newInvalid))
{
/* buffer full */
/* release a message, mark process cache states to be invalid */
SISetProcStateInvalid(shmInvalBuffer);
if (!SIDelDataEntry(shmInvalBuffer))
{
/* inconsistent buffer state -- shd never happen */
SpinRelease(SInvalLock);
elog(FATAL, "RegisterSharedInvalid: inconsistent buffer state");
}
/* write again */
SISetDataEntry(shmInvalBuffer, &newInvalid);
}
SpinRelease(SInvalLock);
}
/*
* InvalidateSharedInvalid --
* Processes all entries in a shared cache invalidation state.
* Processes all entries in a shared cache invalidation state.
*/
/****************************************************************************/
/* InvalidateSharedInvalid(invalFunction, resetFunction) */
/* */
/* invalidate a message in the buffer (read and clean up) */
/* should be called by a backend */
/* InvalidateSharedInvalid(invalFunction, resetFunction) */
/* */
/* invalidate a message in the buffer (read and clean up) */
/* should be called by a backend */
/****************************************************************************/
void
InvalidateSharedInvalid(void (*invalFunction)(),
void (*resetFunction)())
InvalidateSharedInvalid(void (*invalFunction) (),
void (*resetFunction) ())
{
SpinAcquire(SInvalLock);
SIReadEntryData(shmInvalBuffer, MyBackendId,
invalFunction, resetFunction);
SIDelExpiredDataEntries(shmInvalBuffer);
SpinRelease(SInvalLock);
SpinAcquire(SInvalLock);
SIReadEntryData(shmInvalBuffer, MyBackendId,
invalFunction, resetFunction);
SIDelExpiredDataEntries(shmInvalBuffer);
SpinRelease(SInvalLock);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* spin.c--
* routines for managing spin locks
* routines for managing spin locks
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.6 1997/08/21 13:43:46 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.7 1997/09/07 04:48:45 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,8 +21,8 @@
* term semaphores separately anyway.
*
* NOTE: These routines are not supposed to be widely used in Postgres.
* They are preserved solely for the purpose of porting Mark Sullivan's
* buffer manager to Postgres.
* They are preserved solely for the purpose of porting Mark Sullivan's
* buffer manager to Postgres.
*/
#include <errno.h>
#include "postgres.h"
@@ -43,61 +43,62 @@ IpcSemaphoreId SpinLockId;
bool
CreateSpinlocks(IPCKey key)
{
/* the spin lock shared memory must have been created by now */
return(TRUE);
{
/* the spin lock shared memory must have been created by now */
return (TRUE);
}
bool
InitSpinLocks(int init, IPCKey key)
{
extern SPINLOCK ShmemLock;
extern SPINLOCK BindingLock;
extern SPINLOCK BufMgrLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK OidGenLockId;
extern SPINLOCK ShmemLock;
extern SPINLOCK BindingLock;
extern SPINLOCK BufMgrLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK OidGenLockId;
#ifdef MAIN_MEMORY
extern SPINLOCK MMCacheLock;
#endif /* SONY_JUKEBOX */
/* These six spinlocks have fixed location is shmem */
ShmemLock = (SPINLOCK) SHMEMLOCKID;
BindingLock = (SPINLOCK) BINDINGLOCKID;
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
extern SPINLOCK MMCacheLock;
#endif /* SONY_JUKEBOX */
/* These six spinlocks have fixed location is shmem */
ShmemLock = (SPINLOCK) SHMEMLOCKID;
BindingLock = (SPINLOCK) BINDINGLOCKID;
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
#ifdef MAIN_MEMORY
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif /* MAIN_MEMORY */
return(TRUE);
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif /* MAIN_MEMORY */
return (TRUE);
}
void
SpinAcquire(SPINLOCK lock)
{
ExclusiveLock(lock);
PROC_INCR_SLOCK(lock);
ExclusiveLock(lock);
PROC_INCR_SLOCK(lock);
}
void
SpinRelease(SPINLOCK lock)
{
PROC_DECR_SLOCK(lock);
ExclusiveUnlock(lock);
PROC_DECR_SLOCK(lock);
ExclusiveUnlock(lock);
}
#else /* HAS_TEST_AND_SET */
#else /* HAS_TEST_AND_SET */
/* Spinlocks are implemented using SysV semaphores */
static bool AttachSpinLocks(IPCKey key);
static bool SpinIsLocked(SPINLOCK lock);
static bool AttachSpinLocks(IPCKey key);
static bool SpinIsLocked(SPINLOCK lock);
/*
* SpinAcquire -- try to grab a spinlock
@@ -107,86 +108,91 @@ static bool SpinIsLocked(SPINLOCK lock);
void
SpinAcquire(SPINLOCK lock)
{
IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock);
PROC_INCR_SLOCK(lock);
IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock);
PROC_INCR_SLOCK(lock);
}
/*
* SpinRelease -- release a spin lock
*
*
* FAILS if the semaphore is corrupted
*/
void
SpinRelease(SPINLOCK lock)
{
Assert(SpinIsLocked(lock))
Assert(SpinIsLocked(lock))
PROC_DECR_SLOCK(lock);
IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock);
IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock);
}
static bool
static bool
SpinIsLocked(SPINLOCK lock)
{
int semval;
semval = IpcSemaphoreGetValue(SpinLockId, lock);
return(semval < IpcSemaphoreDefaultStartValue);
int semval;
semval = IpcSemaphoreGetValue(SpinLockId, lock);
return (semval < IpcSemaphoreDefaultStartValue);
}
/*
* CreateSpinlocks -- Create a sysV semaphore array for
* the spinlocks
* the spinlocks
*
*/
bool
CreateSpinlocks(IPCKey key)
{
int status;
IpcSemaphoreId semid;
semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection,
IpcSemaphoreDefaultStartValue, 1, &status);
if (status == IpcSemIdExist) {
IpcSemaphoreKill(key);
elog(NOTICE,"Destroying old spinlock semaphore");
semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection,
IpcSemaphoreDefaultStartValue, 1, &status);
}
if (semid >= 0) {
SpinLockId = semid;
return(TRUE);
}
/* cannot create spinlocks */
elog(FATAL,"CreateSpinlocks: cannot create spin locks");
return(FALSE);
int status;
IpcSemaphoreId semid;
semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection,
IpcSemaphoreDefaultStartValue, 1, &status);
if (status == IpcSemIdExist)
{
IpcSemaphoreKill(key);
elog(NOTICE, "Destroying old spinlock semaphore");
semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection,
IpcSemaphoreDefaultStartValue, 1, &status);
}
if (semid >= 0)
{
SpinLockId = semid;
return (TRUE);
}
/* cannot create spinlocks */
elog(FATAL, "CreateSpinlocks: cannot create spin locks");
return (FALSE);
}
/*
* Attach to existing spinlock set
*/
static bool
static bool
AttachSpinLocks(IPCKey key)
{
IpcSemaphoreId id;
id = semget (key, MAX_SPINS, 0);
if (id < 0) {
if (errno == EEXIST) {
/* key is the name of someone else's semaphore */
elog (FATAL,"AttachSpinlocks: SPIN_KEY belongs to someone else");
IpcSemaphoreId id;
id = semget(key, MAX_SPINS, 0);
if (id < 0)
{
if (errno == EEXIST)
{
/* key is the name of someone else's semaphore */
elog(FATAL, "AttachSpinlocks: SPIN_KEY belongs to someone else");
}
/* cannot create spinlocks */
elog(FATAL, "AttachSpinlocks: cannot create spin locks");
return (FALSE);
}
/* cannot create spinlocks */
elog(FATAL,"AttachSpinlocks: cannot create spin locks");
return(FALSE);
}
SpinLockId = id;
return(TRUE);
SpinLockId = id;
return (TRUE);
}
/*
* InitSpinLocks -- Spinlock bootstrapping
*
*
* We need several spinlocks for bootstrapping:
* BindingLock (for the shmem binding table) and
* ShmemLock (for the shmem allocator), BufMgrLock (for buffer
@@ -199,41 +205,47 @@ AttachSpinLocks(IPCKey key)
bool
InitSpinLocks(int init, IPCKey key)
{
extern SPINLOCK ShmemLock;
extern SPINLOCK BindingLock;
extern SPINLOCK BufMgrLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK OidGenLockId;
extern SPINLOCK ShmemLock;
extern SPINLOCK BindingLock;
extern SPINLOCK BufMgrLock;
extern SPINLOCK LockMgrLock;
extern SPINLOCK ProcStructLock;
extern SPINLOCK SInvalLock;
extern SPINLOCK OidGenLockId;
#ifdef MAIN_MEMORY
extern SPINLOCK MMCacheLock;
#endif /* MAIN_MEMORY */
if (!init || key != IPC_PRIVATE) {
/* if bootstrap and key is IPC_PRIVATE, it means that we are running
* backend by itself. no need to attach spinlocks
*/
if (! AttachSpinLocks(key)) {
elog(FATAL,"InitSpinLocks: couldnt attach spin locks");
return(FALSE);
extern SPINLOCK MMCacheLock;
#endif /* MAIN_MEMORY */
if (!init || key != IPC_PRIVATE)
{
/*
* if bootstrap and key is IPC_PRIVATE, it means that we are
* running backend by itself. no need to attach spinlocks
*/
if (!AttachSpinLocks(key))
{
elog(FATAL, "InitSpinLocks: couldnt attach spin locks");
return (FALSE);
}
}
}
/* These five (or six) spinlocks have fixed location is shmem */
ShmemLock = (SPINLOCK) SHMEMLOCKID;
BindingLock = (SPINLOCK) BINDINGLOCKID;
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
/* These five (or six) spinlocks have fixed location is shmem */
ShmemLock = (SPINLOCK) SHMEMLOCKID;
BindingLock = (SPINLOCK) BINDINGLOCKID;
BufMgrLock = (SPINLOCK) BUFMGRLOCKID;
LockMgrLock = (SPINLOCK) LOCKMGRLOCKID;
ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID;
SInvalLock = (SPINLOCK) SINVALLOCKID;
OidGenLockId = (SPINLOCK) OIDGENLOCKID;
#ifdef MAIN_MEMORY
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif /* MAIN_MEMORY */
return(TRUE);
MMCacheLock = (SPINLOCK) MMCACHELOCKID;
#endif /* MAIN_MEMORY */
return (TRUE);
}
#endif /* HAS_TEST_AND_SET */
#endif /* HAS_TEST_AND_SET */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,22 +1,22 @@
/*-------------------------------------------------------------------------
*
* multi.c--
* multi level lock table manager
* multi level lock table manager
*
* Standard multi-level lock manager as per the Gray paper
* (at least, that is what it is supposed to be). We implement
* three levels -- RELN, PAGE, TUPLE. Tuple is actually TID
* a physical record pointer. It isn't an object id.
* Standard multi-level lock manager as per the Gray paper
* (at least, that is what it is supposed to be). We implement
* three levels -- RELN, PAGE, TUPLE. Tuple is actually TID
* a physical record pointer. It isn't an object id.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.4 1997/08/19 21:33:25 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.5 1997/09/07 04:49:02 momjian Exp $
*
* NOTES:
* (1) The lock.c module assumes that the caller here is doing
* two phase locking.
* (1) The lock.c module assumes that the caller here is doing
* two phase locking.
*
*-------------------------------------------------------------------------
*/
@@ -27,53 +27,59 @@
#include "storage/multilev.h"
#include "utils/rel.h"
#include "miscadmin.h" /* MyDatabaseId */
#include "miscadmin.h" /* MyDatabaseId */
static bool MultiAcquire(LockTableId tableId, LOCKTAG *tag, LOCKT lockt,
static bool
MultiAcquire(LockTableId tableId, LOCKTAG * tag, LOCKT lockt,
LOCK_LEVEL level);
static bool MultiRelease(LockTableId tableId, LOCKTAG *tag, LOCKT lockt,
static bool
MultiRelease(LockTableId tableId, LOCKTAG * tag, LOCKT lockt,
LOCK_LEVEL level);
/*
* INTENT indicates to higher level that a lower level lock has been
* set. For example, a write lock on a tuple conflicts with a write
* lock on a relation. This conflict is detected as a WRITE_INTENT/
* set. For example, a write lock on a tuple conflicts with a write
* lock on a relation. This conflict is detected as a WRITE_INTENT/
* WRITE conflict between the tuple's intent lock and the relation's
* write lock.
*/
static int MultiConflicts[] = {
(int)NULL,
/* All reads and writes at any level conflict with a write lock */
(1 << WRITE_LOCK)|(1 << WRITE_INTENT)|(1 << READ_LOCK)|(1 << READ_INTENT),
/* read locks conflict with write locks at curr and lower levels */
(1 << WRITE_LOCK)| (1 << WRITE_INTENT),
/* write intent locks */
(1 << READ_LOCK) | (1 << WRITE_LOCK),
/* read intent locks*/
(1 << WRITE_LOCK),
/* extend locks for archive storage manager conflict only w/extend locks */
(1 << EXTEND_LOCK)
static int MultiConflicts[] = {
(int) NULL,
/* All reads and writes at any level conflict with a write lock */
(1 << WRITE_LOCK) | (1 << WRITE_INTENT) | (1 << READ_LOCK) | (1 << READ_INTENT),
/* read locks conflict with write locks at curr and lower levels */
(1 << WRITE_LOCK) | (1 << WRITE_INTENT),
/* write intent locks */
(1 << READ_LOCK) | (1 << WRITE_LOCK),
/* read intent locks */
(1 << WRITE_LOCK),
/*
* extend locks for archive storage manager conflict only w/extend
* locks
*/
(1 << EXTEND_LOCK)
};
/*
* write locks have higher priority than read locks and extend locks. May
* want to treat INTENT locks differently.
*/
static int MultiPrios[] = {
(int)NULL,
2,
1,
2,
1,
1
static int MultiPrios[] = {
(int) NULL,
2,
1,
2,
1,
1
};
/*
/*
* Lock table identifier for this lock table. The multi-level
* lock table is ONE lock table, not three.
*/
LockTableId MultiTableId = (LockTableId)NULL;
LockTableId ShortTermTableId = (LockTableId)NULL;
LockTableId MultiTableId = (LockTableId) NULL;
LockTableId ShortTermTableId = (LockTableId) NULL;
/*
* Create the lock table described by MultiConflicts and Multiprio.
@@ -81,30 +87,31 @@ LockTableId ShortTermTableId = (LockTableId)NULL;
LockTableId
InitMultiLevelLockm()
{
int tableId;
/* -----------------------
* If we're already initialized just return the table id.
* -----------------------
*/
if (MultiTableId)
int tableId;
/* -----------------------
* If we're already initialized just return the table id.
* -----------------------
*/
if (MultiTableId)
return MultiTableId;
tableId = LockTabInit("LockTable", MultiConflicts, MultiPrios, 5);
MultiTableId = tableId;
if (!(MultiTableId))
{
elog(WARN, "InitMultiLockm: couldnt initialize lock table");
}
/* -----------------------
* No short term lock table for now. -Jeff 15 July 1991
*
* ShortTermTableId = LockTabRename(tableId);
* if (! (ShortTermTableId)) {
* elog(WARN,"InitMultiLockm: couldnt rename lock table");
* }
* -----------------------
*/
return MultiTableId;
tableId = LockTabInit("LockTable", MultiConflicts, MultiPrios, 5);
MultiTableId = tableId;
if (! (MultiTableId)) {
elog(WARN,"InitMultiLockm: couldnt initialize lock table");
}
/* -----------------------
* No short term lock table for now. -Jeff 15 July 1991
*
* ShortTermTableId = LockTabRename(tableId);
* if (! (ShortTermTableId)) {
* elog(WARN,"InitMultiLockm: couldnt rename lock table");
* }
* -----------------------
*/
return MultiTableId;
}
/*
@@ -115,16 +122,16 @@ InitMultiLevelLockm()
bool
MultiLockReln(LockInfo linfo, LOCKT lockt)
{
LOCKTAG tag;
/* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
*/
memset(&tag,0,sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
return(MultiAcquire(MultiTableId, &tag, lockt, RELN_LEVEL));
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The hash function
* will return miss if the padding bytes aren't zero'd.
*/
memset(&tag, 0, sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
return (MultiAcquire(MultiTableId, &tag, lockt, RELN_LEVEL));
}
/*
@@ -133,25 +140,25 @@ MultiLockReln(LockInfo linfo, LOCKT lockt)
* Returns: TRUE if lock is set, FALSE otherwise.
*
* Side Effects: causes intention level locks to be set
* at the page and relation level.
* at the page and relation level.
*/
bool
MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
{
LOCKTAG tag;
/* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
*/
memset(&tag,0,sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
/* not locking any valid Tuple, just the page */
tag.tupleId = *tidPtr;
return(MultiAcquire(MultiTableId, &tag, lockt, TUPLE_LEVEL));
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The hash function
* will return miss if the padding bytes aren't zero'd.
*/
memset(&tag, 0, sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
/* not locking any valid Tuple, just the page */
tag.tupleId = *tidPtr;
return (MultiAcquire(MultiTableId, &tag, lockt, TUPLE_LEVEL));
}
/*
@@ -160,28 +167,28 @@ MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
bool
MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
{
LOCKTAG tag;
/* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
*/
memset(&tag,0,sizeof(tag));
/* ----------------------------
* Now we want to set the page offset to be invalid
* and lock the block. There is some confusion here as to what
* a page is. In Postgres a page is an 8k block, however this
* block may be partitioned into many subpages which are sometimes
* also called pages. The term is overloaded, so don't be fooled
* when we say lock the page we mean the 8k block. -Jeff 16 July 1991
* ----------------------------
*/
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
return(MultiAcquire(MultiTableId, &tag, lockt, PAGE_LEVEL));
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The hash function
* will return miss if the padding bytes aren't zero'd.
*/
memset(&tag, 0, sizeof(tag));
/* ----------------------------
* Now we want to set the page offset to be invalid
* and lock the block. There is some confusion here as to what
* a page is. In Postgres a page is an 8k block, however this
* block may be partitioned into many subpages which are sometimes
* also called pages. The term is overloaded, so don't be fooled
* when we say lock the page we mean the 8k block. -Jeff 16 July 1991
* ----------------------------
*/
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
return (MultiAcquire(MultiTableId, &tag, lockt, PAGE_LEVEL));
}
/*
@@ -190,102 +197,110 @@ MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
* Returns: TRUE if lock is set, FALSE if not
* Side Effects:
*/
static bool
static bool
MultiAcquire(LockTableId tableId,
LOCKTAG *tag,
LOCKT lockt,
LOCK_LEVEL level)
LOCKTAG * tag,
LOCKT lockt,
LOCK_LEVEL level)
{
LOCKT locks[N_LEVELS];
int i,status;
LOCKTAG xxTag, *tmpTag = &xxTag;
int retStatus = TRUE;
/*
* Three levels implemented. If we set a low level (e.g. Tuple)
* lock, we must set INTENT locks on the higher levels. The
* intent lock detects conflicts between the low level lock
* and an existing high level lock. For example, setting a
* write lock on a tuple in a relation is disallowed if there
* is an existing read lock on the entire relation. The
* write lock would set a WRITE + INTENT lock on the relation
* and that lock would conflict with the read.
*/
switch (level) {
case RELN_LEVEL:
locks[0] = lockt;
locks[1] = NO_LOCK;
locks[2] = NO_LOCK;
break;
case PAGE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt;
locks[2] = NO_LOCK;
break;
case TUPLE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt + INTENT;
locks[2] = lockt;
break;
default:
elog(WARN,"MultiAcquire: bad lock level");
return(FALSE);
}
/*
* construct a new tag as we go. Always loop through all levels,
* but if we arent' seting a low level lock, locks[i] is set to
* NO_LOCK for the lower levels. Always start from the highest
* level and go to the lowest level.
*/
memset(tmpTag,0,sizeof(*tmpTag));
tmpTag->relId = tag->relId;
tmpTag->dbId = tag->dbId;
for (i=0;i<N_LEVELS;i++) {
if (locks[i] != NO_LOCK) {
switch (i) {
case RELN_LEVEL:
/* -------------
* Set the block # and offset to invalid
* -------------
*/
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
LOCKT locks[N_LEVELS];
int i,
status;
LOCKTAG xxTag,
*tmpTag = &xxTag;
int retStatus = TRUE;
/*
* Three levels implemented. If we set a low level (e.g. Tuple) lock,
* we must set INTENT locks on the higher levels. The intent lock
* detects conflicts between the low level lock and an existing high
* level lock. For example, setting a write lock on a tuple in a
* relation is disallowed if there is an existing read lock on the
* entire relation. The write lock would set a WRITE + INTENT lock on
* the relation and that lock would conflict with the read.
*/
switch (level)
{
case RELN_LEVEL:
locks[0] = lockt;
locks[1] = NO_LOCK;
locks[2] = NO_LOCK;
break;
case PAGE_LEVEL:
/* -------------
* Copy the block #, set the offset to invalid
* -------------
*/
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
&(tag->tupleId.ip_blkid));
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
case PAGE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt;
locks[2] = NO_LOCK;
break;
case TUPLE_LEVEL:
/* --------------
* Copy the entire tuple id.
* --------------
*/
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
case TUPLE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt + INTENT;
locks[2] = lockt;
break;
}
status = LockAcquire(tableId, tmpTag, locks[i]);
if (! status) {
/* failed for some reason. Before returning we have
* to release all of the locks we just acquired.
* MultiRelease(xx,xx,xx, i) means release starting from
* the last level lock we successfully acquired
*/
retStatus = FALSE;
MultiRelease(tableId, tag, lockt, i);
/* now leave the loop. Don't try for any more locks */
break;
}
default:
elog(WARN, "MultiAcquire: bad lock level");
return (FALSE);
}
}
return(retStatus);
/*
* construct a new tag as we go. Always loop through all levels, but
* if we arent' seting a low level lock, locks[i] is set to NO_LOCK
* for the lower levels. Always start from the highest level and go
* to the lowest level.
*/
memset(tmpTag, 0, sizeof(*tmpTag));
tmpTag->relId = tag->relId;
tmpTag->dbId = tag->dbId;
for (i = 0; i < N_LEVELS; i++)
{
if (locks[i] != NO_LOCK)
{
switch (i)
{
case RELN_LEVEL:
/* -------------
* Set the block # and offset to invalid
* -------------
*/
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
break;
case PAGE_LEVEL:
/* -------------
* Copy the block #, set the offset to invalid
* -------------
*/
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
&(tag->tupleId.ip_blkid));
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
break;
case TUPLE_LEVEL:
/* --------------
* Copy the entire tuple id.
* --------------
*/
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
break;
}
status = LockAcquire(tableId, tmpTag, locks[i]);
if (!status)
{
/*
* failed for some reason. Before returning we have to
* release all of the locks we just acquired.
* MultiRelease(xx,xx,xx, i) means release starting from
* the last level lock we successfully acquired
*/
retStatus = FALSE;
MultiRelease(tableId, tag, lockt, i);
/* now leave the loop. Don't try for any more locks */
break;
}
}
}
return (retStatus);
}
/* ------------------
@@ -294,24 +309,25 @@ MultiAcquire(LockTableId tableId,
*/
#ifdef NOT_USED
bool
MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
{
LOCKTAG tag;
/* ------------------
* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
* ------------------
*/
memset(&tag, 0,sizeof(LOCKTAG));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
return (MultiRelease(MultiTableId, &tag, lockt, PAGE_LEVEL));
LOCKTAG tag;
/* ------------------
* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
* ------------------
*/
memset(&tag, 0, sizeof(LOCKTAG));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
return (MultiRelease(MultiTableId, &tag, lockt, PAGE_LEVEL));
}
#endif
/* ------------------
@@ -319,21 +335,21 @@ MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt)
* ------------------
*/
bool
MultiReleaseReln(LockInfo linfo, LOCKT lockt)
MultiReleaseReln(LockInfo linfo, LOCKT lockt)
{
LOCKTAG tag;
/* ------------------
* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
* ------------------
*/
memset(&tag, 0, sizeof(LOCKTAG));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
return (MultiRelease(MultiTableId, &tag, lockt, RELN_LEVEL));
LOCKTAG tag;
/* ------------------
* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
* ------------------
*/
memset(&tag, 0, sizeof(LOCKTAG));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
return (MultiRelease(MultiTableId, &tag, lockt, RELN_LEVEL));
}
/*
@@ -341,81 +357,88 @@ MultiReleaseReln(LockInfo linfo, LOCKT lockt)
*
* Returns: TRUE if successful, FALSE otherwise.
*/
static bool
static bool
MultiRelease(LockTableId tableId,
LOCKTAG *tag,
LOCKT lockt,
LOCK_LEVEL level)
LOCKTAG * tag,
LOCKT lockt,
LOCK_LEVEL level)
{
LOCKT locks[N_LEVELS];
int i,status;
LOCKTAG xxTag, *tmpTag = &xxTag;
/*
* same level scheme as MultiAcquire().
*/
switch (level) {
case RELN_LEVEL:
locks[0] = lockt;
locks[1] = NO_LOCK;
locks[2] = NO_LOCK;
break;
case PAGE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt;
locks[2] = NO_LOCK;
break;
case TUPLE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt + INTENT;
locks[2] = lockt;
break;
default:
elog(WARN,"MultiRelease: bad lockt");
}
/*
* again, construct the tag on the fly. This time, however,
* we release the locks in the REVERSE order -- from lowest
* level to highest level.
*
* Must zero out the tag to set padding byes to zero and ensure
* hashing consistency.
*/
memset(tmpTag, 0, sizeof(*tmpTag));
tmpTag->relId = tag->relId;
tmpTag->dbId = tag->dbId;
for (i=(N_LEVELS-1); i>=0; i--) {
if (locks[i] != NO_LOCK) {
switch (i) {
case RELN_LEVEL:
/* -------------
* Set the block # and offset to invalid
* -------------
*/
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
LOCKT locks[N_LEVELS];
int i,
status;
LOCKTAG xxTag,
*tmpTag = &xxTag;
/*
* same level scheme as MultiAcquire().
*/
switch (level)
{
case RELN_LEVEL:
locks[0] = lockt;
locks[1] = NO_LOCK;
locks[2] = NO_LOCK;
break;
case PAGE_LEVEL:
/* -------------
* Copy the block #, set the offset to invalid
* -------------
*/
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
&(tag->tupleId.ip_blkid));
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
case PAGE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt;
locks[2] = NO_LOCK;
break;
case TUPLE_LEVEL:
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
case TUPLE_LEVEL:
locks[0] = lockt + INTENT;
locks[1] = lockt + INTENT;
locks[2] = lockt;
break;
}
status = LockRelease(tableId, tmpTag, locks[i]);
if (! status) {
elog(WARN,"MultiRelease: couldn't release after error");
}
default:
elog(WARN, "MultiRelease: bad lockt");
}
}
/* shouldn't reach here */
return false;
/*
* again, construct the tag on the fly. This time, however, we
* release the locks in the REVERSE order -- from lowest level to
* highest level.
*
* Must zero out the tag to set padding byes to zero and ensure hashing
* consistency.
*/
memset(tmpTag, 0, sizeof(*tmpTag));
tmpTag->relId = tag->relId;
tmpTag->dbId = tag->dbId;
for (i = (N_LEVELS - 1); i >= 0; i--)
{
if (locks[i] != NO_LOCK)
{
switch (i)
{
case RELN_LEVEL:
/* -------------
* Set the block # and offset to invalid
* -------------
*/
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
break;
case PAGE_LEVEL:
/* -------------
* Copy the block #, set the offset to invalid
* -------------
*/
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
&(tag->tupleId.ip_blkid));
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
break;
case TUPLE_LEVEL:
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
break;
}
status = LockRelease(tableId, tmpTag, locks[i]);
if (!status)
{
elog(WARN, "MultiRelease: couldn't release after error");
}
}
}
/* shouldn't reach here */
return false;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +1,19 @@
/*-------------------------------------------------------------------------
*
* single.c--
* set single locks in the multi-level lock hierarchy
* set single locks in the multi-level lock hierarchy
*
* Sometimes we don't want to set all levels of the multi-level
* lock hierarchy at once. This allows us to set and release
* one level at a time. It's useful in index scans when
* you can set an intent lock at the beginning and thereafter
* only set page locks. Tends to speed things up.
* Sometimes we don't want to set all levels of the multi-level
* lock hierarchy at once. This allows us to set and release
* one level at a time. It's useful in index scans when
* you can set an intent lock at the beginning and thereafter
* only set page locks. Tends to speed things up.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/single.c,v 1.2 1996/11/03 05:07:33 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/single.c,v 1.3 1997/09/07 04:49:04 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,7 +21,7 @@
#include "postgres.h"
#include "storage/lmgr.h" /* where the declarations go */
#include "storage/lmgr.h" /* where the declarations go */
#include "storage/lock.h"
#include "storage/multilev.h"
#include "utils/rel.h"
@@ -34,28 +34,27 @@
bool
SingleLockReln(LockInfo linfo, LOCKT lockt, int action)
{
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
*/
memset(&tag,0,sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdSet(&(tag.tupleId.ip_blkid), InvalidBlockNumber);
tag.tupleId.ip_posid = InvalidOffsetNumber;
if (action == UNLOCK)
return(LockRelease(MultiTableId, &tag, lockt));
else
return(LockAcquire(MultiTableId, &tag, lockt));
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The hash function
* will return miss if the padding bytes aren't zero'd.
*/
memset(&tag, 0, sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdSet(&(tag.tupleId.ip_blkid), InvalidBlockNumber);
tag.tupleId.ip_posid = InvalidOffsetNumber;
if (action == UNLOCK)
return (LockRelease(MultiTableId, &tag, lockt));
else
return (LockAcquire(MultiTableId, &tag, lockt));
}
/*
* SingleLockPage -- use multi-level lock table, but lock
* only at the page level.
* only at the page level.
*
* Assumes that an INTENT lock has already been set in the
* multi-level lock table.
@@ -63,27 +62,25 @@ SingleLockReln(LockInfo linfo, LOCKT lockt, int action)
*/
bool
SingleLockPage(LockInfo linfo,
ItemPointer tidPtr,
LOCKT lockt,
int action)
ItemPointer tidPtr,
LOCKT lockt,
int action)
{
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The
* hash function will return miss if the padding bytes aren't
* zero'd.
*/
memset(&tag,0,sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
tag.tupleId.ip_posid = InvalidOffsetNumber;
if (action == UNLOCK)
return(LockRelease(MultiTableId, &tag, lockt));
else
return(LockAcquire(MultiTableId, &tag, lockt));
}
LOCKTAG tag;
/*
* LOCKTAG has two bytes of padding, unfortunately. The hash function
* will return miss if the padding bytes aren't zero'd.
*/
memset(&tag, 0, sizeof(tag));
tag.relId = linfo->lRelId.relId;
tag.dbId = linfo->lRelId.dbId;
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
tag.tupleId.ip_posid = InvalidOffsetNumber;
if (action == UNLOCK)
return (LockRelease(MultiTableId, &tag, lockt));
else
return (LockAcquire(MultiTableId, &tag, lockt));
}

View File

@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* bufpage.c--
* POSTGRES standard buffer page code.
* POSTGRES standard buffer page code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.8 1997/08/24 23:07:30 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.9 1997/09/07 04:49:06 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,341 +26,368 @@
#include "lib/qsort.h"
static void PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr,
char *location, Size size);
static void
PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr,
char *location, Size size);
static bool PageManagerShuffle = true; /* default is shuffle mode */
static bool PageManagerShuffle = true; /* default is shuffle mode */
/* ----------------------------------------------------------------
* Page support functions
* Page support functions
* ----------------------------------------------------------------
*/
/*
* PageInit --
* Initializes the contents of a page.
* Initializes the contents of a page.
*/
void
PageInit(Page page, Size pageSize, Size specialSize)
{
PageHeader p = (PageHeader) page;
PageHeader p = (PageHeader) page;
Assert(pageSize == BLCKSZ);
Assert(pageSize >
specialSize + sizeof(PageHeaderData) - sizeof(ItemIdData));
specialSize = DOUBLEALIGN(specialSize);
Assert(pageSize == BLCKSZ);
Assert(pageSize >
specialSize + sizeof(PageHeaderData) - sizeof(ItemIdData));
p->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData);
p->pd_upper = pageSize - specialSize;
p->pd_special = pageSize - specialSize;
PageSetPageSize(page, pageSize);
specialSize = DOUBLEALIGN(specialSize);
p->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData);
p->pd_upper = pageSize - specialSize;
p->pd_special = pageSize - specialSize;
PageSetPageSize(page, pageSize);
}
/*
* PageAddItem --
* Adds item to the given page.
* Adds item to the given page.
*
* Note:
* This does not assume that the item resides on a single page.
* It is the responsiblity of the caller to act appropriately
* depending on this fact. The "pskip" routines provide a
* friendlier interface, in this case.
*
* This does change the status of any of the resources passed.
* The semantics may change in the future.
* This does not assume that the item resides on a single page.
* It is the responsiblity of the caller to act appropriately
* depending on this fact. The "pskip" routines provide a
* friendlier interface, in this case.
*
* This routine should probably be combined with others?
* This does change the status of any of the resources passed.
* The semantics may change in the future.
*
* This routine should probably be combined with others?
*/
/* ----------------
* PageAddItem
* PageAddItem
*
* add an item to a page.
* add an item to a page.
*
* Notes on interface:
* If offsetNumber is valid, shuffle ItemId's down to make room
* to use it, if PageManagerShuffle is true. If PageManagerShuffle is
* false, then overwrite the specified ItemId. (PageManagerShuffle is
* true by default, and is modified by calling PageManagerModeSet.)
* If offsetNumber is not valid, then assign one by finding the first
* one that is both unused and deallocated.
* Notes on interface:
* If offsetNumber is valid, shuffle ItemId's down to make room
* to use it, if PageManagerShuffle is true. If PageManagerShuffle is
* false, then overwrite the specified ItemId. (PageManagerShuffle is
* true by default, and is modified by calling PageManagerModeSet.)
* If offsetNumber is not valid, then assign one by finding the first
* one that is both unused and deallocated.
*
* NOTE: If offsetNumber is valid, and PageManagerShuffle is true, it
* is assumed that there is room on the page to shuffle the ItemId's
* down by one.
* NOTE: If offsetNumber is valid, and PageManagerShuffle is true, it
* is assumed that there is room on the page to shuffle the ItemId's
* down by one.
* ----------------
*/
OffsetNumber
PageAddItem(Page page,
Item item,
Size size,
OffsetNumber offsetNumber,
ItemIdFlags flags)
Item item,
Size size,
OffsetNumber offsetNumber,
ItemIdFlags flags)
{
register i;
Size alignedSize;
Offset lower;
Offset upper;
ItemId itemId;
ItemId fromitemId, toitemId;
OffsetNumber limit;
bool shuffled = false;
/*
* Find first unallocated offsetNumber
*/
limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
/* was offsetNumber passed in? */
if (OffsetNumberIsValid(offsetNumber)) {
if (PageManagerShuffle == true) {
/* shuffle ItemId's (Do the PageManager Shuffle...) */
for (i = (limit - 1); i >= offsetNumber; i--) {
fromitemId = &((PageHeader)page)->pd_linp[i - 1];
toitemId = &((PageHeader)page)->pd_linp[i];
*toitemId = *fromitemId;
}
shuffled = true; /* need to increase "lower" */
} else { /* overwrite mode */
itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1];
if (((*itemId).lp_flags & LP_USED) ||
((*itemId).lp_len != 0)) {
elog(WARN, "PageAddItem: tried overwrite of used ItemId");
register i;
Size alignedSize;
Offset lower;
Offset upper;
ItemId itemId;
ItemId fromitemId,
toitemId;
OffsetNumber limit;
bool shuffled = false;
/*
* Find first unallocated offsetNumber
*/
limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
/* was offsetNumber passed in? */
if (OffsetNumberIsValid(offsetNumber))
{
if (PageManagerShuffle == true)
{
/* shuffle ItemId's (Do the PageManager Shuffle...) */
for (i = (limit - 1); i >= offsetNumber; i--)
{
fromitemId = &((PageHeader) page)->pd_linp[i - 1];
toitemId = &((PageHeader) page)->pd_linp[i];
*toitemId = *fromitemId;
}
shuffled = true; /* need to increase "lower" */
}
else
{ /* overwrite mode */
itemId = &((PageHeader) page)->pd_linp[offsetNumber - 1];
if (((*itemId).lp_flags & LP_USED) ||
((*itemId).lp_len != 0))
{
elog(WARN, "PageAddItem: tried overwrite of used ItemId");
return (InvalidOffsetNumber);
}
}
}
else
{ /* offsetNumber was not passed in, so find
* one */
/* look for "recyclable" (unused & deallocated) ItemId */
for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
{
itemId = &((PageHeader) page)->pd_linp[offsetNumber - 1];
if ((((*itemId).lp_flags & LP_USED) == 0) &&
((*itemId).lp_len == 0))
break;
}
}
if (offsetNumber > limit)
lower = (Offset) (((char *) (&((PageHeader) page)->pd_linp[offsetNumber])) - ((char *) page));
else if (offsetNumber == limit || shuffled == true)
lower = ((PageHeader) page)->pd_lower + sizeof(ItemIdData);
else
lower = ((PageHeader) page)->pd_lower;
alignedSize = DOUBLEALIGN(size);
upper = ((PageHeader) page)->pd_upper - alignedSize;
if (lower > upper)
{
return (InvalidOffsetNumber);
}
}
} else { /* offsetNumber was not passed in, so find one */
/* look for "recyclable" (unused & deallocated) ItemId */
for (offsetNumber = 1; offsetNumber < limit; offsetNumber++) {
itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1];
if ((((*itemId).lp_flags & LP_USED) == 0) &&
((*itemId).lp_len == 0))
break;
}
}
if (offsetNumber > limit)
lower = (Offset) (((char *) (&((PageHeader)page)->pd_linp[offsetNumber])) - ((char *) page));
else if (offsetNumber == limit || shuffled == true)
lower = ((PageHeader)page)->pd_lower + sizeof (ItemIdData);
else
lower = ((PageHeader)page)->pd_lower;
alignedSize = DOUBLEALIGN(size);
upper = ((PageHeader)page)->pd_upper - alignedSize;
if (lower > upper) {
return (InvalidOffsetNumber);
}
itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1];
(*itemId).lp_off = upper;
(*itemId).lp_len = size;
(*itemId).lp_flags = flags;
memmove((char *)page + upper, item, size);
((PageHeader)page)->pd_lower = lower;
((PageHeader)page)->pd_upper = upper;
return (offsetNumber);
itemId = &((PageHeader) page)->pd_linp[offsetNumber - 1];
(*itemId).lp_off = upper;
(*itemId).lp_len = size;
(*itemId).lp_flags = flags;
memmove((char *) page + upper, item, size);
((PageHeader) page)->pd_lower = lower;
((PageHeader) page)->pd_upper = upper;
return (offsetNumber);
}
/*
* PageGetTempPage --
* Get a temporary page in local memory for special processing
* Get a temporary page in local memory for special processing
*/
Page
PageGetTempPage(Page page, Size specialSize)
{
Size pageSize;
Size size;
Page temp;
PageHeader thdr;
pageSize = PageGetPageSize(page);
if ((temp = (Page) palloc(pageSize)) == (Page) NULL)
elog(FATAL, "Cannot allocate %d bytes for temp page.", pageSize);
thdr = (PageHeader) temp;
/* copy old page in */
memmove(temp, page, pageSize);
/* clear out the middle */
size = (pageSize - sizeof(PageHeaderData)) + sizeof(ItemIdData);
size -= DOUBLEALIGN(specialSize);
memset((char *) &(thdr->pd_linp[0]), 0, size);
/* set high, low water marks */
thdr->pd_lower = sizeof (PageHeaderData) - sizeof (ItemIdData);
thdr->pd_upper = pageSize - DOUBLEALIGN(specialSize);
return (temp);
Size pageSize;
Size size;
Page temp;
PageHeader thdr;
pageSize = PageGetPageSize(page);
if ((temp = (Page) palloc(pageSize)) == (Page) NULL)
elog(FATAL, "Cannot allocate %d bytes for temp page.", pageSize);
thdr = (PageHeader) temp;
/* copy old page in */
memmove(temp, page, pageSize);
/* clear out the middle */
size = (pageSize - sizeof(PageHeaderData)) + sizeof(ItemIdData);
size -= DOUBLEALIGN(specialSize);
memset((char *) &(thdr->pd_linp[0]), 0, size);
/* set high, low water marks */
thdr->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData);
thdr->pd_upper = pageSize - DOUBLEALIGN(specialSize);
return (temp);
}
/*
* PageRestoreTempPage --
* Copy temporary page back to permanent page after special processing
* and release the temporary page.
* Copy temporary page back to permanent page after special processing
* and release the temporary page.
*/
void
PageRestoreTempPage(Page tempPage, Page oldPage)
{
Size pageSize;
pageSize = PageGetPageSize(tempPage);
memmove((char *) oldPage, (char *) tempPage, pageSize);
pfree(tempPage);
Size pageSize;
pageSize = PageGetPageSize(tempPage);
memmove((char *) oldPage, (char *) tempPage, pageSize);
pfree(tempPage);
}
/*
* PageGetMaxOffsetNumber --
* Returns the maximum offset number used by the given page.
* Returns the maximum offset number used by the given page.
*
* NOTE: The offset is invalid if the page is non-empty.
* Test whether PageIsEmpty before calling this routine
* and/or using its return value.
* NOTE: The offset is invalid if the page is non-empty.
* Test whether PageIsEmpty before calling this routine
* and/or using its return value.
*/
OffsetNumber
PageGetMaxOffsetNumber(Page page)
{
LocationIndex low;
OffsetNumber i;
low = ((PageHeader) page)->pd_lower;
i = (low - (sizeof(PageHeaderData) - sizeof(ItemIdData)))
/ sizeof(ItemIdData);
return(i);
}
LocationIndex low;
OffsetNumber i;
low = ((PageHeader) page)->pd_lower;
i = (low - (sizeof(PageHeaderData) - sizeof(ItemIdData)))
/ sizeof(ItemIdData);
return (i);
}
/* ----------------
* itemid stuff for PageRepairFragmentation
* itemid stuff for PageRepairFragmentation
* ----------------
*/
struct itemIdSortData {
int offsetindex; /* linp array index */
ItemIdData itemiddata;
struct itemIdSortData
{
int offsetindex;/* linp array index */
ItemIdData itemiddata;
};
static int
itemidcompare(void *itemidp1, void *itemidp2)
{
if (((struct itemIdSortData *)itemidp1)->itemiddata.lp_off ==
((struct itemIdSortData *)itemidp2)->itemiddata.lp_off)
return(0);
else if (((struct itemIdSortData *)itemidp1)->itemiddata.lp_off <
((struct itemIdSortData *)itemidp2)->itemiddata.lp_off)
return(1);
else
return(-1);
if (((struct itemIdSortData *) itemidp1)->itemiddata.lp_off ==
((struct itemIdSortData *) itemidp2)->itemiddata.lp_off)
return (0);
else if (((struct itemIdSortData *) itemidp1)->itemiddata.lp_off <
((struct itemIdSortData *) itemidp2)->itemiddata.lp_off)
return (1);
else
return (-1);
}
/*
* PageRepairFragmentation --
* Frees fragmented space on a page.
* Frees fragmented space on a page.
*/
void
PageRepairFragmentation(Page page)
{
int i;
struct itemIdSortData *itemidbase, *itemidptr;
ItemId lp;
int nline, nused;
Offset upper;
Size alignedSize;
nline = (int16) PageGetMaxOffsetNumber(page);
nused = 0;
for (i=0; i<nline; i++) {
lp = ((PageHeader)page)->pd_linp + i;
if ((*lp).lp_flags & LP_USED)
nused++;
}
if (nused == 0) {
for (i=0; i<nline; i++) {
lp = ((PageHeader)page)->pd_linp + i;
if ((*lp).lp_len > 0) /* unused, but allocated */
(*lp).lp_len = 0; /* indicate unused & deallocated */
int i;
struct itemIdSortData *itemidbase,
*itemidptr;
ItemId lp;
int nline,
nused;
Offset upper;
Size alignedSize;
nline = (int16) PageGetMaxOffsetNumber(page);
nused = 0;
for (i = 0; i < nline; i++)
{
lp = ((PageHeader) page)->pd_linp + i;
if ((*lp).lp_flags & LP_USED)
nused++;
}
((PageHeader)page)->pd_upper = ((PageHeader)page)->pd_special;
} else { /* nused != 0 */
itemidbase = (struct itemIdSortData *)
palloc(sizeof(struct itemIdSortData) * nused);
memset((char *) itemidbase, 0, sizeof(struct itemIdSortData) * nused);
itemidptr = itemidbase;
for (i=0; i<nline; i++) {
lp = ((PageHeader)page)->pd_linp + i;
if ((*lp).lp_flags & LP_USED) {
itemidptr->offsetindex = i;
itemidptr->itemiddata = *lp;
itemidptr++;
} else {
if ((*lp).lp_len > 0) /* unused, but allocated */
(*lp).lp_len = 0; /* indicate unused & deallocated */
}
if (nused == 0)
{
for (i = 0; i < nline; i++)
{
lp = ((PageHeader) page)->pd_linp + i;
if ((*lp).lp_len > 0) /* unused, but allocated */
(*lp).lp_len = 0; /* indicate unused & deallocated */
}
((PageHeader) page)->pd_upper = ((PageHeader) page)->pd_special;
}
/* sort itemIdSortData array...*/
pg_qsort((char *) itemidbase, nused, sizeof(struct itemIdSortData),
itemidcompare);
/* compactify page */
((PageHeader)page)->pd_upper = ((PageHeader)page)->pd_special;
for (i=0, itemidptr = itemidbase; i<nused; i++, itemidptr++) {
lp = ((PageHeader)page)->pd_linp + itemidptr->offsetindex;
alignedSize = DOUBLEALIGN((*lp).lp_len);
upper = ((PageHeader)page)->pd_upper - alignedSize;
memmove((char *) page + upper,
(char *)page + (*lp).lp_off,
(*lp).lp_len);
(*lp).lp_off = upper;
((PageHeader)page)->pd_upper = upper;
else
{ /* nused != 0 */
itemidbase = (struct itemIdSortData *)
palloc(sizeof(struct itemIdSortData) * nused);
memset((char *) itemidbase, 0, sizeof(struct itemIdSortData) * nused);
itemidptr = itemidbase;
for (i = 0; i < nline; i++)
{
lp = ((PageHeader) page)->pd_linp + i;
if ((*lp).lp_flags & LP_USED)
{
itemidptr->offsetindex = i;
itemidptr->itemiddata = *lp;
itemidptr++;
}
else
{
if ((*lp).lp_len > 0) /* unused, but allocated */
(*lp).lp_len = 0; /* indicate unused & deallocated */
}
}
/* sort itemIdSortData array... */
pg_qsort((char *) itemidbase, nused, sizeof(struct itemIdSortData),
itemidcompare);
/* compactify page */
((PageHeader) page)->pd_upper = ((PageHeader) page)->pd_special;
for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
{
lp = ((PageHeader) page)->pd_linp + itemidptr->offsetindex;
alignedSize = DOUBLEALIGN((*lp).lp_len);
upper = ((PageHeader) page)->pd_upper - alignedSize;
memmove((char *) page + upper,
(char *) page + (*lp).lp_off,
(*lp).lp_len);
(*lp).lp_off = upper;
((PageHeader) page)->pd_upper = upper;
}
pfree(itemidbase);
}
pfree(itemidbase);
}
}
/*
* PageGetFreeSpace --
* Returns the size of the free (allocatable) space on a page.
* Returns the size of the free (allocatable) space on a page.
*/
Size
PageGetFreeSpace(Page page)
{
Size space;
space = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
if (space < sizeof (ItemIdData)) {
return (0);
}
space -= sizeof (ItemIdData); /* XXX not always true */
return (space);
Size space;
space = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
if (space < sizeof(ItemIdData))
{
return (0);
}
space -= sizeof(ItemIdData);/* XXX not always true */
return (space);
}
/*
* PageManagerModeSet --
*
* Sets mode to either: ShufflePageManagerMode (the default) or
* OverwritePageManagerMode. For use by access methods code
* for determining semantics of PageAddItem when the offsetNumber
* argument is passed in.
* Sets mode to either: ShufflePageManagerMode (the default) or
* OverwritePageManagerMode. For use by access methods code
* for determining semantics of PageAddItem when the offsetNumber
* argument is passed in.
*/
void
PageManagerModeSet(PageManagerMode mode)
{
if (mode == ShufflePageManagerMode)
PageManagerShuffle = true;
else if (mode == OverwritePageManagerMode)
PageManagerShuffle = false;
if (mode == ShufflePageManagerMode)
PageManagerShuffle = true;
else if (mode == OverwritePageManagerMode)
PageManagerShuffle = false;
}
/*
@@ -368,65 +395,64 @@ PageManagerModeSet(PageManagerMode mode)
* PageIndexTupleDelete
*----------------------------------------------------------------
*
* This routine does the work of removing a tuple from an index page.
* This routine does the work of removing a tuple from an index page.
*/
void
PageIndexTupleDelete(Page page, OffsetNumber offnum)
{
PageHeader phdr;
char *addr;
ItemId tup;
Size size;
char *locn;
int nbytes;
int offidx;
phdr = (PageHeader) page;
/* change offset number to offset index */
offidx = offnum - 1;
tup = PageGetItemId(page, offnum);
size = ItemIdGetLength(tup);
size = DOUBLEALIGN(size);
/* location of deleted tuple data */
locn = (char *) (page + ItemIdGetOffset(tup));
/*
* First, we want to get rid of the pd_linp entry for the index
* tuple. We copy all subsequent linp's back one slot in the
* array.
*/
nbytes = phdr->pd_lower -
((char *)&phdr->pd_linp[offidx + 1] - (char *) phdr);
memmove((char *) &(phdr->pd_linp[offidx]),
(char *) &(phdr->pd_linp[offidx + 1]),
nbytes);
/*
* Now move everything between the old upper bound (beginning of tuple
* space) and the beginning of the deleted tuple forward, so that
* space in the middle of the page is left free. If we've just deleted
* the tuple at the beginning of tuple space, then there's no need
* to do the copy (and bcopy on some architectures SEGV's if asked
* to move zero bytes).
*/
/* beginning of tuple space */
addr = (char *) (page + phdr->pd_upper);
if (locn != addr)
memmove(addr + size, addr, (int) (locn - addr));
/* adjust free space boundary pointers */
phdr->pd_upper += size;
phdr->pd_lower -= sizeof (ItemIdData);
/* finally, we need to adjust the linp entries that remain */
if (!PageIsEmpty(page))
PageIndexTupleDeleteAdjustLinePointers(phdr, locn, size);
PageHeader phdr;
char *addr;
ItemId tup;
Size size;
char *locn;
int nbytes;
int offidx;
phdr = (PageHeader) page;
/* change offset number to offset index */
offidx = offnum - 1;
tup = PageGetItemId(page, offnum);
size = ItemIdGetLength(tup);
size = DOUBLEALIGN(size);
/* location of deleted tuple data */
locn = (char *) (page + ItemIdGetOffset(tup));
/*
* First, we want to get rid of the pd_linp entry for the index tuple.
* We copy all subsequent linp's back one slot in the array.
*/
nbytes = phdr->pd_lower -
((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
memmove((char *) &(phdr->pd_linp[offidx]),
(char *) &(phdr->pd_linp[offidx + 1]),
nbytes);
/*
* Now move everything between the old upper bound (beginning of tuple
* space) and the beginning of the deleted tuple forward, so that
* space in the middle of the page is left free. If we've just
* deleted the tuple at the beginning of tuple space, then there's no
* need to do the copy (and bcopy on some architectures SEGV's if
* asked to move zero bytes).
*/
/* beginning of tuple space */
addr = (char *) (page + phdr->pd_upper);
if (locn != addr)
memmove(addr + size, addr, (int) (locn - addr));
/* adjust free space boundary pointers */
phdr->pd_upper += size;
phdr->pd_lower -= sizeof(ItemIdData);
/* finally, we need to adjust the linp entries that remain */
if (!PageIsEmpty(page))
PageIndexTupleDeleteAdjustLinePointers(phdr, locn, size);
}
/*
@@ -434,33 +460,35 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
* PageIndexTupleDeleteAdjustLinePointers
*----------------------------------------------------------------
*
* Once the line pointers and tuple data have been shifted around
* on the page, we need to go down the line pointer vector and
* adjust pointers to reflect new locations. Anything that used
* to be before the deleted tuple's data was moved forward by the
* size of the deleted tuple.
* Once the line pointers and tuple data have been shifted around
* on the page, we need to go down the line pointer vector and
* adjust pointers to reflect new locations. Anything that used
* to be before the deleted tuple's data was moved forward by the
* size of the deleted tuple.
*
* This routine does the work of adjusting the line pointers.
* Location is where the tuple data used to lie; size is how
* much space it occupied. We assume that size has been aligned
* as required by the time we get here.
* This routine does the work of adjusting the line pointers.
* Location is where the tuple data used to lie; size is how
* much space it occupied. We assume that size has been aligned
* as required by the time we get here.
*
* This routine should never be called on an empty page.
* This routine should never be called on an empty page.
*/
static void
PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr,
char *location,
Size size)
char *location,
Size size)
{
int i;
unsigned offset;
/* location is an index into the page... */
offset = (unsigned)(location - (char *)phdr);
for (i = PageGetMaxOffsetNumber((Page) phdr) - 1; i >= 0; i--) {
if (phdr->pd_linp[i].lp_off <= offset) {
phdr->pd_linp[i].lp_off += size;
int i;
unsigned offset;
/* location is an index into the page... */
offset = (unsigned) (location - (char *) phdr);
for (i = PageGetMaxOffsetNumber((Page) phdr) - 1; i >= 0; i--)
{
if (phdr->pd_linp[i].lp_off <= offset)
{
phdr->pd_linp[i].lp_off += size;
}
}
}
}

View File

@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* itemptr.c--
* POSTGRES disk item pointer code.
* POSTGRES disk item pointer code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/page/itemptr.c,v 1.2 1996/11/03 05:07:46 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/page/itemptr.c,v 1.3 1997/09/07 04:49:07 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,21 +20,20 @@
/*
* ItemPointerEquals --
* Returns true if both item pointers point to the same item,
* otherwise returns false.
* Returns true if both item pointers point to the same item,
* otherwise returns false.
*
* Note:
* Assumes that the disk item pointers are not NULL.
* Assumes that the disk item pointers are not NULL.
*/
bool
ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
{
if (ItemPointerGetBlockNumber(pointer1) ==
ItemPointerGetBlockNumber(pointer2) &&
ItemPointerGetOffsetNumber(pointer1) ==
ItemPointerGetOffsetNumber(pointer2))
return(true);
else
return(false);
if (ItemPointerGetBlockNumber(pointer1) ==
ItemPointerGetBlockNumber(pointer2) &&
ItemPointerGetOffsetNumber(pointer1) ==
ItemPointerGetOffsetNumber(pointer2))
return (true);
else
return (false);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +1,16 @@
/*-------------------------------------------------------------------------
*
* mm.c--
* main memory storage manager
* main memory storage manager
*
* This code manages relations that reside in (presumably stable)
* main memory.
* This code manages relations that reside in (presumably stable)
* main memory.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.4 1996/11/08 05:59:11 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.5 1997/09/07 04:49:22 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,7 +20,7 @@
#include <math.h>
#include "storage/ipc.h"
#include "storage/smgr.h" /* where the declarations go */
#include "storage/smgr.h" /* where the declarations go */
#include "storage/block.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -31,555 +31,582 @@
#include "utils/memutils.h"
/*
* MMCacheTag -- Unique triplet for blocks stored by the main memory
* storage manager.
* MMCacheTag -- Unique triplet for blocks stored by the main memory
* storage manager.
*/
typedef struct MMCacheTag {
Oid mmct_dbid;
Oid mmct_relid;
BlockNumber mmct_blkno;
} MMCacheTag;
typedef struct MMCacheTag
{
Oid mmct_dbid;
Oid mmct_relid;
BlockNumber mmct_blkno;
} MMCacheTag;
/*
* Shared-memory hash table for main memory relations contains
* entries of this form.
* Shared-memory hash table for main memory relations contains
* entries of this form.
*/
typedef struct MMHashEntry {
MMCacheTag mmhe_tag;
int mmhe_bufno;
} MMHashEntry;
typedef struct MMHashEntry
{
MMCacheTag mmhe_tag;
int mmhe_bufno;
} MMHashEntry;
/*
* MMRelTag -- Unique identifier for each relation that is stored in the
* main-memory storage manager.
* main-memory storage manager.
*/
typedef struct MMRelTag {
Oid mmrt_dbid;
Oid mmrt_relid;
} MMRelTag;
typedef struct MMRelTag
{
Oid mmrt_dbid;
Oid mmrt_relid;
} MMRelTag;
/*
* Shared-memory hash table for # blocks in main memory relations contains
* entries of this form.
* Shared-memory hash table for # blocks in main memory relations contains
* entries of this form.
*/
typedef struct MMRelHashEntry {
MMRelTag mmrhe_tag;
int mmrhe_nblocks;
} MMRelHashEntry;
typedef struct MMRelHashEntry
{
MMRelTag mmrhe_tag;
int mmrhe_nblocks;
} MMRelHashEntry;
#define MMNBUFFERS 10
#define MMNBUFFERS 10
#define MMNRELATIONS 2
SPINLOCK MMCacheLock;
extern bool IsPostmaster;
extern Oid MyDatabaseId;
SPINLOCK MMCacheLock;
extern bool IsPostmaster;
extern Oid MyDatabaseId;
static int *MMCurTop;
static int *MMCurRelno;
static MMCacheTag *MMBlockTags;
static char *MMBlockCache;
static HTAB *MMCacheHT;
static HTAB *MMRelCacheHT;
static int *MMCurTop;
static int *MMCurRelno;
static MMCacheTag *MMBlockTags;
static char *MMBlockCache;
static HTAB *MMCacheHT;
static HTAB *MMRelCacheHT;
int
mminit()
{
char *mmcacheblk;
int mmsize = 0;
bool found;
HASHCTL info;
char *mmcacheblk;
int mmsize = 0;
bool found;
HASHCTL info;
SpinAcquire(MMCacheLock);
SpinAcquire(MMCacheLock);
mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
mmsize += MAXALIGN(sizeof(*MMCurTop));
mmsize += MAXALIGN(sizeof(*MMCurRelno));
mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag)));
mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found);
mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
mmsize += MAXALIGN(sizeof(*MMCurTop));
mmsize += MAXALIGN(sizeof(*MMCurRelno));
mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag)));
mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found);
if (mmcacheblk == (char *) NULL)
{
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
info.keysize = sizeof(MMCacheTag);
info.datasize = sizeof(int);
info.hash = tag_hash;
MMCacheHT = (HTAB *) ShmemInitHash("Main memory store HT",
MMNBUFFERS, MMNBUFFERS,
&info, (HASH_ELEM | HASH_FUNCTION));
if (MMCacheHT == (HTAB *) NULL)
{
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
info.keysize = sizeof(MMRelTag);
info.datasize = sizeof(int);
info.hash = tag_hash;
MMRelCacheHT = (HTAB *) ShmemInitHash("Main memory rel HT",
MMNRELATIONS, MMNRELATIONS,
&info, (HASH_ELEM | HASH_FUNCTION));
if (MMRelCacheHT == (HTAB *) NULL)
{
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
if (IsPostmaster)
{
memset(mmcacheblk, 0, mmsize);
SpinRelease(MMCacheLock);
return (SM_SUCCESS);
}
if (mmcacheblk == (char *) NULL) {
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
info.keysize = sizeof(MMCacheTag);
info.datasize = sizeof(int);
info.hash = tag_hash;
MMCurTop = (int *) mmcacheblk;
mmcacheblk += sizeof(int);
MMCurRelno = (int *) mmcacheblk;
mmcacheblk += sizeof(int);
MMBlockTags = (MMCacheTag *) mmcacheblk;
mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag));
MMBlockCache = mmcacheblk;
MMCacheHT = (HTAB *) ShmemInitHash("Main memory store HT",
MMNBUFFERS, MMNBUFFERS,
&info, (HASH_ELEM|HASH_FUNCTION));
if (MMCacheHT == (HTAB *) NULL) {
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
info.keysize = sizeof(MMRelTag);
info.datasize = sizeof(int);
info.hash = tag_hash;
MMRelCacheHT = (HTAB *) ShmemInitHash("Main memory rel HT",
MMNRELATIONS, MMNRELATIONS,
&info, (HASH_ELEM|HASH_FUNCTION));
if (MMRelCacheHT == (HTAB *) NULL) {
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
if (IsPostmaster) {
memset(mmcacheblk, 0, mmsize);
SpinRelease(MMCacheLock);
return (SM_SUCCESS);
}
SpinRelease(MMCacheLock);
MMCurTop = (int *) mmcacheblk;
mmcacheblk += sizeof(int);
MMCurRelno = (int *) mmcacheblk;
mmcacheblk += sizeof(int);
MMBlockTags = (MMCacheTag *) mmcacheblk;
mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag));
MMBlockCache = mmcacheblk;
return (SM_SUCCESS);
}
int
mmshutdown()
{
return (SM_SUCCESS);
return (SM_SUCCESS);
}
int
mmcreate(Relation reln)
{
MMRelHashEntry *entry;
bool found;
MMRelTag tag;
MMRelHashEntry *entry;
bool found;
MMRelTag tag;
SpinAcquire(MMCacheLock);
SpinAcquire(MMCacheLock);
if (*MMCurRelno == MMNRELATIONS)
{
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
(*MMCurRelno)++;
tag.mmrt_relid = reln->rd_id;
if (reln->rd_rel->relisshared)
tag.mmrt_dbid = (Oid) 0;
else
tag.mmrt_dbid = MyDatabaseId;
entry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
(char *) &tag, HASH_ENTER, &found);
if (entry == (MMRelHashEntry *) NULL)
{
SpinRelease(MMCacheLock);
elog(FATAL, "main memory storage mgr rel cache hash table corrupt");
}
if (found)
{
/* already exists */
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
entry->mmrhe_nblocks = 0;
if (*MMCurRelno == MMNRELATIONS) {
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
(*MMCurRelno)++;
tag.mmrt_relid = reln->rd_id;
if (reln->rd_rel->relisshared)
tag.mmrt_dbid = (Oid) 0;
else
tag.mmrt_dbid = MyDatabaseId;
entry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
(char *) &tag, HASH_ENTER, &found);
if (entry == (MMRelHashEntry *) NULL) {
SpinRelease(MMCacheLock);
elog(FATAL, "main memory storage mgr rel cache hash table corrupt");
}
if (found) {
/* already exists */
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
entry->mmrhe_nblocks = 0;
SpinRelease(MMCacheLock);
return (SM_SUCCESS);
return (SM_SUCCESS);
}
/*
* mmunlink() -- Unlink a relation.
* mmunlink() -- Unlink a relation.
*/
int
mmunlink(Relation reln)
{
int i;
Oid reldbid;
MMHashEntry *entry;
MMRelHashEntry *rentry;
bool found;
MMRelTag rtag;
int i;
Oid reldbid;
MMHashEntry *entry;
MMRelHashEntry *rentry;
bool found;
MMRelTag rtag;
if (reln->rd_rel->relisshared)
reldbid = (Oid) 0;
else
reldbid = MyDatabaseId;
if (reln->rd_rel->relisshared)
reldbid = (Oid) 0;
else
reldbid = MyDatabaseId;
SpinAcquire(MMCacheLock);
SpinAcquire(MMCacheLock);
for (i = 0; i < MMNBUFFERS; i++) {
if (MMBlockTags[i].mmct_dbid == reldbid
&& MMBlockTags[i].mmct_relid == reln->rd_id) {
entry = (MMHashEntry *) hash_search(MMCacheHT,
(char *) &MMBlockTags[i],
HASH_REMOVE, &found);
if (entry == (MMHashEntry *) NULL || !found) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmunlink: cache hash table corrupted");
}
MMBlockTags[i].mmct_dbid = (Oid) 0;
MMBlockTags[i].mmct_relid = (Oid) 0;
MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
for (i = 0; i < MMNBUFFERS; i++)
{
if (MMBlockTags[i].mmct_dbid == reldbid
&& MMBlockTags[i].mmct_relid == reln->rd_id)
{
entry = (MMHashEntry *) hash_search(MMCacheHT,
(char *) &MMBlockTags[i],
HASH_REMOVE, &found);
if (entry == (MMHashEntry *) NULL || !found)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmunlink: cache hash table corrupted");
}
MMBlockTags[i].mmct_dbid = (Oid) 0;
MMBlockTags[i].mmct_relid = (Oid) 0;
MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
}
}
}
rtag.mmrt_dbid = reldbid;
rtag.mmrt_relid = reln->rd_id;
rtag.mmrt_dbid = reldbid;
rtag.mmrt_relid = reln->rd_id;
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_REMOVE, &found);
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_REMOVE, &found);
if (rentry == (MMRelHashEntry *) NULL || !found)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmunlink: rel cache hash table corrupted");
}
(*MMCurRelno)--;
if (rentry == (MMRelHashEntry *) NULL || !found) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmunlink: rel cache hash table corrupted");
}
(*MMCurRelno)--;
SpinRelease(MMCacheLock);
return 1;
return 1;
}
/*
* mmextend() -- Add a block to the specified relation.
* mmextend() -- Add a block to the specified relation.
*
* This routine returns SM_FAIL or SM_SUCCESS, with errno set as
* appropriate.
* This routine returns SM_FAIL or SM_SUCCESS, with errno set as
* appropriate.
*/
int
mmextend(Relation reln, char *buffer)
{
MMRelHashEntry *rentry;
MMHashEntry *entry;
int i;
Oid reldbid;
int offset;
bool found;
MMRelTag rtag;
MMCacheTag tag;
MMRelHashEntry *rentry;
MMHashEntry *entry;
int i;
Oid reldbid;
int offset;
bool found;
MMRelTag rtag;
MMCacheTag tag;
if (reln->rd_rel->relisshared)
reldbid = (Oid) 0;
else
reldbid = MyDatabaseId;
if (reln->rd_rel->relisshared)
reldbid = (Oid) 0;
else
reldbid = MyDatabaseId;
tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
tag.mmct_relid = rtag.mmrt_relid = reln->rd_id;
tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
tag.mmct_relid = rtag.mmrt_relid = reln->rd_id;
SpinAcquire(MMCacheLock);
SpinAcquire(MMCacheLock);
if (*MMCurTop == MMNBUFFERS) {
for (i = 0; i < MMNBUFFERS; i++) {
if (MMBlockTags[i].mmct_dbid == 0 &&
MMBlockTags[i].mmct_relid == 0)
break;
if (*MMCurTop == MMNBUFFERS)
{
for (i = 0; i < MMNBUFFERS; i++)
{
if (MMBlockTags[i].mmct_dbid == 0 &&
MMBlockTags[i].mmct_relid == 0)
break;
}
if (i == MMNBUFFERS)
{
SpinRelease(MMCacheLock);
return (SM_FAIL);
}
}
if (i == MMNBUFFERS) {
SpinRelease(MMCacheLock);
return (SM_FAIL);
else
{
i = *MMCurTop;
(*MMCurTop)++;
}
} else {
i = *MMCurTop;
(*MMCurTop)++;
}
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL || !found) {
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL || !found)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmextend: rel cache hash table corrupt");
}
tag.mmct_blkno = rentry->mmrhe_nblocks;
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_ENTER, &found);
if (entry == (MMHashEntry *) NULL || found)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmextend: cache hash table corrupt");
}
entry->mmhe_bufno = i;
MMBlockTags[i].mmct_dbid = reldbid;
MMBlockTags[i].mmct_relid = reln->rd_id;
MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks;
/* page numbers are zero-based, so we increment this at the end */
(rentry->mmrhe_nblocks)++;
/* write the extended page */
offset = (i * BLCKSZ);
memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
SpinRelease(MMCacheLock);
elog(FATAL, "mmextend: rel cache hash table corrupt");
}
tag.mmct_blkno = rentry->mmrhe_nblocks;
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_ENTER, &found);
if (entry == (MMHashEntry *) NULL || found) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmextend: cache hash table corrupt");
}
entry->mmhe_bufno = i;
MMBlockTags[i].mmct_dbid = reldbid;
MMBlockTags[i].mmct_relid = reln->rd_id;
MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks;
/* page numbers are zero-based, so we increment this at the end */
(rentry->mmrhe_nblocks)++;
/* write the extended page */
offset = (i * BLCKSZ);
memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
SpinRelease(MMCacheLock);
return (SM_SUCCESS);
return (SM_SUCCESS);
}
/*
* mmopen() -- Open the specified relation.
* mmopen() -- Open the specified relation.
*/
int
mmopen(Relation reln)
{
/* automatically successful */
return (0);
/* automatically successful */
return (0);
}
/*
* mmclose() -- Close the specified relation.
* mmclose() -- Close the specified relation.
*
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
mmclose(Relation reln)
{
/* automatically successful */
return (SM_SUCCESS);
/* automatically successful */
return (SM_SUCCESS);
}
/*
* mmread() -- Read the specified block from a relation.
* mmread() -- Read the specified block from a relation.
*
* Returns SM_SUCCESS or SM_FAIL.
* Returns SM_SUCCESS or SM_FAIL.
*/
int
mmread(Relation reln, BlockNumber blocknum, char *buffer)
{
MMHashEntry *entry;
bool found;
int offset;
MMCacheTag tag;
MMHashEntry *entry;
bool found;
int offset;
MMCacheTag tag;
if (reln->rd_rel->relisshared)
tag.mmct_dbid = (Oid) 0;
else
tag.mmct_dbid = MyDatabaseId;
if (reln->rd_rel->relisshared)
tag.mmct_dbid = (Oid) 0;
else
tag.mmct_dbid = MyDatabaseId;
tag.mmct_relid = reln->rd_id;
tag.mmct_blkno = blocknum;
tag.mmct_relid = reln->rd_id;
tag.mmct_blkno = blocknum;
SpinAcquire(MMCacheLock);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found);
SpinAcquire(MMCacheLock);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found);
if (entry == (MMHashEntry *) NULL)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt");
}
if (!found)
{
/* reading nonexistent pages is defined to fill them with zeroes */
SpinRelease(MMCacheLock);
memset(buffer, 0, BLCKSZ);
return (SM_SUCCESS);
}
offset = (entry->mmhe_bufno * BLCKSZ);
memmove(buffer, &MMBlockCache[offset], BLCKSZ);
if (entry == (MMHashEntry *) NULL) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt");
}
if (!found) {
/* reading nonexistent pages is defined to fill them with zeroes */
SpinRelease(MMCacheLock);
memset(buffer, 0, BLCKSZ);
return (SM_SUCCESS);
}
offset = (entry->mmhe_bufno * BLCKSZ);
memmove(buffer, &MMBlockCache[offset], BLCKSZ);
SpinRelease(MMCacheLock);
return (SM_SUCCESS);
}
/*
* mmwrite() -- Write the supplied block at the appropriate location.
* mmwrite() -- Write the supplied block at the appropriate location.
*
* Returns SM_SUCCESS or SM_FAIL.
* Returns SM_SUCCESS or SM_FAIL.
*/
int
mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
{
MMHashEntry *entry;
bool found;
int offset;
MMCacheTag tag;
MMHashEntry *entry;
bool found;
int offset;
MMCacheTag tag;
if (reln->rd_rel->relisshared)
tag.mmct_dbid = (Oid) 0;
else
tag.mmct_dbid = MyDatabaseId;
if (reln->rd_rel->relisshared)
tag.mmct_dbid = (Oid) 0;
else
tag.mmct_dbid = MyDatabaseId;
tag.mmct_relid = reln->rd_id;
tag.mmct_blkno = blocknum;
tag.mmct_relid = reln->rd_id;
tag.mmct_blkno = blocknum;
SpinAcquire(MMCacheLock);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found);
SpinAcquire(MMCacheLock);
entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag,
HASH_FIND, &found);
if (entry == (MMHashEntry *) NULL)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt");
}
if (!found)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmwrite: hash table missing requested page");
}
offset = (entry->mmhe_bufno * BLCKSZ);
memmove(&MMBlockCache[offset], buffer, BLCKSZ);
if (entry == (MMHashEntry *) NULL) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmread: hash table corrupt");
}
if (!found) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmwrite: hash table missing requested page");
}
offset = (entry->mmhe_bufno * BLCKSZ);
memmove(&MMBlockCache[offset], buffer, BLCKSZ);
SpinRelease(MMCacheLock);
return (SM_SUCCESS);
return (SM_SUCCESS);
}
/*
* mmflush() -- Synchronously write a block to stable storage.
* mmflush() -- Synchronously write a block to stable storage.
*
* For main-memory relations, this is exactly equivalent to mmwrite().
* For main-memory relations, this is exactly equivalent to mmwrite().
*/
int
mmflush(Relation reln, BlockNumber blocknum, char *buffer)
{
return (mmwrite(reln, blocknum, buffer));
return (mmwrite(reln, blocknum, buffer));
}
/*
* mmblindwrt() -- Write a block to stable storage blind.
* mmblindwrt() -- Write a block to stable storage blind.
*
* We have to be able to do this using only the name and OID of
* the database and relation in which the block belongs.
* We have to be able to do this using only the name and OID of
* the database and relation in which the block belongs.
*/
int
mmblindwrt(char *dbstr,
char *relstr,
Oid dbid,
Oid relid,
BlockNumber blkno,
char *buffer)
char *relstr,
Oid dbid,
Oid relid,
BlockNumber blkno,
char *buffer)
{
return (SM_FAIL);
return (SM_FAIL);
}
/*
* mmnblocks() -- Get the number of blocks stored in a relation.
* mmnblocks() -- Get the number of blocks stored in a relation.
*
* Returns # of blocks or -1 on error.
* Returns # of blocks or -1 on error.
*/
int
mmnblocks(Relation reln)
{
MMRelTag rtag;
MMRelHashEntry *rentry;
bool found;
int nblocks;
MMRelTag rtag;
MMRelHashEntry *rentry;
bool found;
int nblocks;
if (reln->rd_rel->relisshared)
rtag.mmrt_dbid = (Oid) 0;
else
rtag.mmrt_dbid = MyDatabaseId;
if (reln->rd_rel->relisshared)
rtag.mmrt_dbid = (Oid) 0;
else
rtag.mmrt_dbid = MyDatabaseId;
rtag.mmrt_relid = reln->rd_id;
rtag.mmrt_relid = reln->rd_id;
SpinAcquire(MMCacheLock);
SpinAcquire(MMCacheLock);
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_FIND, &found);
rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag,
HASH_FIND, &found);
if (rentry == (MMRelHashEntry *) NULL)
{
SpinRelease(MMCacheLock);
elog(FATAL, "mmnblocks: rel cache hash table corrupt");
}
if (found)
nblocks = rentry->mmrhe_nblocks;
else
nblocks = -1;
if (rentry == (MMRelHashEntry *) NULL) {
SpinRelease(MMCacheLock);
elog(FATAL, "mmnblocks: rel cache hash table corrupt");
}
if (found)
nblocks = rentry->mmrhe_nblocks;
else
nblocks = -1;
SpinRelease(MMCacheLock);
return (nblocks);
return (nblocks);
}
/*
* mmcommit() -- Commit a transaction.
* mmcommit() -- Commit a transaction.
*
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
mmcommit()
{
return (SM_SUCCESS);
return (SM_SUCCESS);
}
/*
* mmabort() -- Abort a transaction.
* mmabort() -- Abort a transaction.
*/
int
mmabort()
{
return (SM_SUCCESS);
return (SM_SUCCESS);
}
/*
* MMShmemSize() -- Declare amount of shared memory we require.
* MMShmemSize() -- Declare amount of shared memory we require.
*
* The shared memory initialization code creates a block of shared
* memory exactly big enough to hold all the structures it needs to.
* This routine declares how much space the main memory storage
* manager will use.
* The shared memory initialization code creates a block of shared
* memory exactly big enough to hold all the structures it needs to.
* This routine declares how much space the main memory storage
* manager will use.
*/
int
MMShmemSize()
{
int size = 0;
int nbuckets;
int nsegs;
int tmp;
int size = 0;
int nbuckets;
int nsegs;
int tmp;
/*
* first compute space occupied by the (dbid,relid,blkno) hash table
*/
/*
* first compute space occupied by the (dbid,relid,blkno) hash table
*/
nbuckets = 1 << (int)my_log2((MMNBUFFERS - 1) / DEF_FFACTOR + 1);
nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
size += MAXALIGN(my_log2(MMNBUFFERS) * sizeof(void *));
size += MAXALIGN(sizeof(HHDR));
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
tmp = (int)ceil((double)MMNBUFFERS/BUCKET_ALLOC_INCR);
size += tmp * BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(sizeof(MMHashEntry))); /* contains hash key */
nbuckets = 1 << (int) my_log2((MMNBUFFERS - 1) / DEF_FFACTOR + 1);
nsegs = 1 << (int) my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
/*
* now do the same for the rel hash table
*/
size += MAXALIGN(my_log2(MMNBUFFERS) * sizeof(void *));
size += MAXALIGN(sizeof(HHDR));
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
tmp = (int) ceil((double) MMNBUFFERS / BUCKET_ALLOC_INCR);
size += tmp * BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(sizeof(MMHashEntry))); /* contains hash key */
size += MAXALIGN(my_log2(MMNRELATIONS) * sizeof(void *));
size += MAXALIGN(sizeof(HHDR));
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
tmp = (int)ceil((double)MMNRELATIONS/BUCKET_ALLOC_INCR);
size += tmp * BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(sizeof(MMRelHashEntry))); /* contains hash key */
/*
* now do the same for the rel hash table
*/
/*
* finally, add in the memory block we use directly
*/
size += MAXALIGN(my_log2(MMNRELATIONS) * sizeof(void *));
size += MAXALIGN(sizeof(HHDR));
size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
tmp = (int) ceil((double) MMNRELATIONS / BUCKET_ALLOC_INCR);
size += tmp * BUCKET_ALLOC_INCR *
(MAXALIGN(sizeof(BUCKET_INDEX)) +
MAXALIGN(sizeof(MMRelHashEntry))); /* contains hash key */
size += MAXALIGN(BLCKSZ * MMNBUFFERS);
size += MAXALIGN(sizeof(*MMCurTop));
size += MAXALIGN(sizeof(*MMCurRelno));
size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag));
/*
* finally, add in the memory block we use directly
*/
return (size);
size += MAXALIGN(BLCKSZ * MMNBUFFERS);
size += MAXALIGN(sizeof(*MMCurTop));
size += MAXALIGN(sizeof(*MMCurRelno));
size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag));
return (size);
}
#endif /* MAIN_MEMORY */
#endif /* MAIN_MEMORY */

View File

@@ -1,16 +1,16 @@
/*-------------------------------------------------------------------------
*
* smgr.c--
* public interface routines to storage manager switch.
* public interface routines to storage manager switch.
*
* All file system operations in POSTGRES dispatch through these
* routines.
* All file system operations in POSTGRES dispatch through these
* routines.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.8 1997/08/19 21:33:38 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.9 1997/09/07 04:49:25 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,380 +23,390 @@
#include "utils/rel.h"
#include "utils/palloc.h"
static void smgrshutdown(int dummy);
static void smgrshutdown(int dummy);
typedef struct f_smgr {
int (*smgr_init)(); /* may be NULL */
int (*smgr_shutdown)(); /* may be NULL */
int (*smgr_create)();
int (*smgr_unlink)();
int (*smgr_extend)();
int (*smgr_open)();
int (*smgr_close)();
int (*smgr_read)();
int (*smgr_write)();
int (*smgr_flush)();
int (*smgr_blindwrt)();
int (*smgr_nblocks)();
int (*smgr_truncate)();
int (*smgr_commit)(); /* may be NULL */
int (*smgr_abort)(); /* may be NULL */
} f_smgr;
typedef struct f_smgr
{
int (*smgr_init) (); /* may be NULL */
int (*smgr_shutdown) (); /* may be NULL */
int (*smgr_create) ();
int (*smgr_unlink) ();
int (*smgr_extend) ();
int (*smgr_open) ();
int (*smgr_close) ();
int (*smgr_read) ();
int (*smgr_write) ();
int (*smgr_flush) ();
int (*smgr_blindwrt) ();
int (*smgr_nblocks) ();
int (*smgr_truncate) ();
int (*smgr_commit) (); /* may be NULL */
int (*smgr_abort) (); /* may be NULL */
} f_smgr;
/*
* The weird placement of commas in this init block is to keep the compiler
* happy, regardless of what storage managers we have (or don't have).
* The weird placement of commas in this init block is to keep the compiler
* happy, regardless of what storage managers we have (or don't have).
*/
static f_smgr smgrsw[] = {
static f_smgr smgrsw[] = {
/* magnetic disk */
{ mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
mdread, mdwrite, mdflush, mdblindwrt, mdnblocks, mdtruncate,
mdcommit, mdabort },
/* magnetic disk */
{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
mdread, mdwrite, mdflush, mdblindwrt, mdnblocks, mdtruncate,
mdcommit, mdabort},
#ifdef MAIN_MEMORY
/* main memory */
{ mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
mmread, mmwrite, mmflush, mmblindwrt, mmnblocks, NULL,
mmcommit, mmabort },
/* main memory */
{mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
mmread, mmwrite, mmflush, mmblindwrt, mmnblocks, NULL,
mmcommit, mmabort},
#endif /* MAIN_MEMORY */
#endif /* MAIN_MEMORY */
};
/*
* This array records which storage managers are write-once, and which
* support overwrite. A 'true' entry means that the storage manager is
* write-once. In the best of all possible worlds, there would be no
* write-once storage managers.
* This array records which storage managers are write-once, and which
* support overwrite. A 'true' entry means that the storage manager is
* write-once. In the best of all possible worlds, there would be no
* write-once storage managers.
*/
static bool smgrwo[] = {
false, /* magnetic disk */
static bool smgrwo[] = {
false, /* magnetic disk */
#ifdef MAIN_MEMORY
false, /* main memory*/
#endif /* MAIN_MEMORY */
false, /* main memory */
#endif /* MAIN_MEMORY */
};
static int NSmgr = lengthof(smgrsw);
static int NSmgr = lengthof(smgrsw);
/*
* smgrinit(), smgrshutdown() -- Initialize or shut down all storage
* managers.
* smgrinit(), smgrshutdown() -- Initialize or shut down all storage
* managers.
*
*/
int
smgrinit()
{
int i;
int i;
for (i = 0; i < NSmgr; i++) {
if (smgrsw[i].smgr_init) {
if ((*(smgrsw[i].smgr_init))() == SM_FAIL)
elog(FATAL, "initialization failed on %s", smgrout(i));
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_init)
{
if ((*(smgrsw[i].smgr_init)) () == SM_FAIL)
elog(FATAL, "initialization failed on %s", smgrout(i));
}
}
}
/* register the shutdown proc */
on_exitpg(smgrshutdown, 0);
/* register the shutdown proc */
on_exitpg(smgrshutdown, 0);
return (SM_SUCCESS);
return (SM_SUCCESS);
}
static void
smgrshutdown(int dummy)
{
int i;
int i;
for (i = 0; i < NSmgr; i++) {
if (smgrsw[i].smgr_shutdown) {
if ((*(smgrsw[i].smgr_shutdown))() == SM_FAIL)
elog(FATAL, "shutdown failed on %s", smgrout(i));
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_shutdown)
{
if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL)
elog(FATAL, "shutdown failed on %s", smgrout(i));
}
}
}
}
/*
* smgrcreate() -- Create a new relation.
* smgrcreate() -- Create a new relation.
*
* This routine takes a reldesc, creates the relation on the appropriate
* device, and returns a file descriptor for it.
* This routine takes a reldesc, creates the relation on the appropriate
* device, and returns a file descriptor for it.
*/
int
smgrcreate(int16 which, Relation reln)
{
int fd;
int fd;
if ((fd = (*(smgrsw[which].smgr_create))(reln)) < 0)
elog(WARN, "cannot open %s",
&(reln->rd_rel->relname.data[0]));
if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
elog(WARN, "cannot open %s",
&(reln->rd_rel->relname.data[0]));
return (fd);
return (fd);
}
/*
* smgrunlink() -- Unlink a relation.
* smgrunlink() -- Unlink a relation.
*
* The relation is removed from the store.
* The relation is removed from the store.
*/
int
smgrunlink(int16 which, Relation reln)
{
int status;
int status;
if ((status = (*(smgrsw[which].smgr_unlink))(reln)) == SM_FAIL)
elog(WARN, "cannot unlink %s",
&(reln->rd_rel->relname.data[0]));
if ((status = (*(smgrsw[which].smgr_unlink)) (reln)) == SM_FAIL)
elog(WARN, "cannot unlink %s",
&(reln->rd_rel->relname.data[0]));
return (status);
return (status);
}
/*
* smgrextend() -- Add a new block to a file.
* smgrextend() -- Add a new block to a file.
*
* Returns SM_SUCCESS on success; aborts the current transaction on
* failure.
* Returns SM_SUCCESS on success; aborts the current transaction on
* failure.
*/
int
smgrextend(int16 which, Relation reln, char *buffer)
{
int status;
int status;
status = (*(smgrsw[which].smgr_extend))(reln, buffer);
status = (*(smgrsw[which].smgr_extend)) (reln, buffer);
if (status == SM_FAIL)
elog(WARN, "%s: cannot extend",
&(reln->rd_rel->relname.data[0]));
if (status == SM_FAIL)
elog(WARN, "%s: cannot extend",
&(reln->rd_rel->relname.data[0]));
return (status);
return (status);
}
/*
* smgropen() -- Open a relation using a particular storage manager.
* smgropen() -- Open a relation using a particular storage manager.
*
* Returns the fd for the open relation on success, aborts the
* transaction on failure.
* Returns the fd for the open relation on success, aborts the
* transaction on failure.
*/
int
smgropen(int16 which, Relation reln)
{
int fd;
int fd;
if ((fd = (*(smgrsw[which].smgr_open))(reln)) < 0)
elog(WARN, "cannot open %s",
&(reln->rd_rel->relname.data[0]));
if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
elog(WARN, "cannot open %s",
&(reln->rd_rel->relname.data[0]));
return (fd);
return (fd);
}
/*
* smgrclose() -- Close a relation.
* smgrclose() -- Close a relation.
*
* NOTE: mdclose frees fd vector! It may be re-used for other relation!
* reln should be flushed from cache after closing !..
* Currently, smgrclose is calling by
* relcache.c:RelationPurgeLocalRelation() only.
* It would be nice to have smgrfree(), but because of
* smgrclose is called from single place... - vadim 05/22/97
* NOTE: mdclose frees fd vector! It may be re-used for other relation!
* reln should be flushed from cache after closing !..
* Currently, smgrclose is calling by
* relcache.c:RelationPurgeLocalRelation() only.
* It would be nice to have smgrfree(), but because of
* smgrclose is called from single place... - vadim 05/22/97
*
* Returns SM_SUCCESS on success, aborts on failure.
* Returns SM_SUCCESS on success, aborts on failure.
*/
int
smgrclose(int16 which, Relation reln)
{
if ((*(smgrsw[which].smgr_close))(reln) == SM_FAIL)
elog(WARN, "cannot close %s",
&(reln->rd_rel->relname.data[0]));
if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL)
elog(WARN, "cannot close %s",
&(reln->rd_rel->relname.data[0]));
return (SM_SUCCESS);
return (SM_SUCCESS);
}
/*
* smgrread() -- read a particular block from a relation into the supplied
* buffer.
* smgrread() -- read a particular block from a relation into the supplied
* buffer.
*
* This routine is called from the buffer manager in order to
* instantiate pages in the shared buffer cache. All storage managers
* return pages in the format that POSTGRES expects. This routine
* dispatches the read. On success, it returns SM_SUCCESS. On failure,
* the current transaction is aborted.
* This routine is called from the buffer manager in order to
* instantiate pages in the shared buffer cache. All storage managers
* return pages in the format that POSTGRES expects. This routine
* dispatches the read. On success, it returns SM_SUCCESS. On failure,
* the current transaction is aborted.
*/
int
smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
int status;
int status;
status = (*(smgrsw[which].smgr_read))(reln, blocknum, buffer);
status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer);
if (status == SM_FAIL)
elog(WARN, "cannot read block %d of %s",
blocknum, &(reln->rd_rel->relname.data[0]));
if (status == SM_FAIL)
elog(WARN, "cannot read block %d of %s",
blocknum, &(reln->rd_rel->relname.data[0]));
return (status);
return (status);
}
/*
* smgrwrite() -- Write the supplied buffer out.
* smgrwrite() -- Write the supplied buffer out.
*
* This is not a synchronous write -- the interface for that is
* smgrflush(). The buffer is written out via the appropriate
* storage manager. This routine returns SM_SUCCESS or aborts
* the current transaction.
* This is not a synchronous write -- the interface for that is
* smgrflush(). The buffer is written out via the appropriate
* storage manager. This routine returns SM_SUCCESS or aborts
* the current transaction.
*/
int
smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
int status;
int status;
status = (*(smgrsw[which].smgr_write))(reln, blocknum, buffer);
status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer);
if (status == SM_FAIL)
elog(WARN, "cannot write block %d of %s",
blocknum, &(reln->rd_rel->relname.data[0]));
if (status == SM_FAIL)
elog(WARN, "cannot write block %d of %s",
blocknum, &(reln->rd_rel->relname.data[0]));
return (status);
return (status);
}
/*
* smgrflush() -- A synchronous smgrwrite().
* smgrflush() -- A synchronous smgrwrite().
*/
int
smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
int status;
int status;
status = (*(smgrsw[which].smgr_flush))(reln, blocknum, buffer);
status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer);
if (status == SM_FAIL)
elog(WARN, "cannot flush block %d of %s to stable store",
blocknum, &(reln->rd_rel->relname.data[0]));
if (status == SM_FAIL)
elog(WARN, "cannot flush block %d of %s to stable store",
blocknum, &(reln->rd_rel->relname.data[0]));
return (status);
return (status);
}
/*
* smgrblindwrt() -- Write a page out blind.
* smgrblindwrt() -- Write a page out blind.
*
* In some cases, we may find a page in the buffer cache that we
* can't make a reldesc for. This happens, for example, when we
* want to reuse a dirty page that was written by a transaction
* that has not yet committed, which created a new relation. In
* this case, the buffer manager will call smgrblindwrt() with
* the name and OID of the database and the relation to which the
* buffer belongs. Every storage manager must be able to force
* this page down to stable storage in this circumstance.
* In some cases, we may find a page in the buffer cache that we
* can't make a reldesc for. This happens, for example, when we
* want to reuse a dirty page that was written by a transaction
* that has not yet committed, which created a new relation. In
* this case, the buffer manager will call smgrblindwrt() with
* the name and OID of the database and the relation to which the
* buffer belongs. Every storage manager must be able to force
* this page down to stable storage in this circumstance.
*/
int
smgrblindwrt(int16 which,
char *dbname,
char *relname,
Oid dbid,
Oid relid,
BlockNumber blkno,
char *buffer)
char *dbname,
char *relname,
Oid dbid,
Oid relid,
BlockNumber blkno,
char *buffer)
{
char *dbstr;
char *relstr;
int status;
char *dbstr;
char *relstr;
int status;
dbstr = pstrdup(dbname);
relstr = pstrdup(relname);
dbstr = pstrdup(dbname);
relstr = pstrdup(relname);
status = (*(smgrsw[which].smgr_blindwrt))(dbstr, relstr, dbid, relid,
blkno, buffer);
status = (*(smgrsw[which].smgr_blindwrt)) (dbstr, relstr, dbid, relid,
blkno, buffer);
if (status == SM_FAIL)
elog(WARN, "cannot write block %d of %s [%s] blind",
blkno, relstr, dbstr);
if (status == SM_FAIL)
elog(WARN, "cannot write block %d of %s [%s] blind",
blkno, relstr, dbstr);
pfree(dbstr);
pfree(relstr);
pfree(dbstr);
pfree(relstr);
return (status);
return (status);
}
/*
* smgrnblocks() -- Calculate the number of POSTGRES blocks in the
* supplied relation.
* smgrnblocks() -- Calculate the number of POSTGRES blocks in the
* supplied relation.
*
* Returns the number of blocks on success, aborts the current
* transaction on failure.
* Returns the number of blocks on success, aborts the current
* transaction on failure.
*/
int
smgrnblocks(int16 which, Relation reln)
{
int nblocks;
int nblocks;
if ((nblocks = (*(smgrsw[which].smgr_nblocks))(reln)) < 0)
elog(WARN, "cannot count blocks for %s",
&(reln->rd_rel->relname.data[0]));
if ((nblocks = (*(smgrsw[which].smgr_nblocks)) (reln)) < 0)
elog(WARN, "cannot count blocks for %s",
&(reln->rd_rel->relname.data[0]));
return (nblocks);
return (nblocks);
}
/*
* smgrtruncate() -- Truncate supplied relation to a specified number
* of blocks
* smgrtruncate() -- Truncate supplied relation to a specified number
* of blocks
*
* Returns the number of blocks on success, aborts the current
* transaction on failure.
* Returns the number of blocks on success, aborts the current
* transaction on failure.
*/
int
smgrtruncate(int16 which, Relation reln, int nblocks)
{
int newblks;
newblks = nblocks;
if (smgrsw[which].smgr_truncate)
{
if ((newblks = (*(smgrsw[which].smgr_truncate))(reln, nblocks)) < 0)
elog(WARN, "cannot truncate %s to %d blocks",
&(reln->rd_rel->relname.data[0]), nblocks);
}
int newblks;
return (newblks);
newblks = nblocks;
if (smgrsw[which].smgr_truncate)
{
if ((newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks)) < 0)
elog(WARN, "cannot truncate %s to %d blocks",
&(reln->rd_rel->relname.data[0]), nblocks);
}
return (newblks);
}
/*
* smgrcommit(), smgrabort() -- Commit or abort changes made during the
* current transaction.
* smgrcommit(), smgrabort() -- Commit or abort changes made during the
* current transaction.
*/
int
smgrcommit()
{
int i;
int i;
for (i = 0; i < NSmgr; i++) {
if (smgrsw[i].smgr_commit) {
if ((*(smgrsw[i].smgr_commit))() == SM_FAIL)
elog(FATAL, "transaction commit failed on %s", smgrout(i));
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_commit)
{
if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL)
elog(FATAL, "transaction commit failed on %s", smgrout(i));
}
}
}
return (SM_SUCCESS);
return (SM_SUCCESS);
}
#ifdef NOT_USED
int
smgrabort()
{
int i;
int i;
for (i = 0; i < NSmgr; i++) {
if (smgrsw[i].smgr_abort) {
if ((*(smgrsw[i].smgr_abort))() == SM_FAIL)
elog(FATAL, "transaction abort failed on %s", smgrout(i));
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_abort)
{
if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL)
elog(FATAL, "transaction abort failed on %s", smgrout(i));
}
}
}
return (SM_SUCCESS);
return (SM_SUCCESS);
}
#endif
bool
smgriswo(int16 smgrno)
{
if (smgrno < 0 || smgrno >= NSmgr)
elog(WARN, "illegal storage manager number %d", smgrno);
if (smgrno < 0 || smgrno >= NSmgr)
elog(WARN, "illegal storage manager number %d", smgrno);
return (smgrwo[smgrno]);
return (smgrwo[smgrno]);
}

View File

@@ -1,81 +1,83 @@
/*-------------------------------------------------------------------------
*
* smgrtype.c--
* storage manager type
* storage manager type
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgrtype.c,v 1.2 1996/11/03 05:08:01 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgrtype.c,v 1.3 1997/09/07 04:49:26 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <string.h>
#include "postgres.h"
#include "utils/builtins.h" /* where the declarations go */
#include "utils/builtins.h" /* where the declarations go */
#include "utils/palloc.h"
#include "storage/smgr.h"
typedef struct smgrid {
char *smgr_name;
} smgrid;
typedef struct smgrid
{
char *smgr_name;
} smgrid;
/*
* StorageManager[] -- List of defined storage managers.
* StorageManager[] -- List of defined storage managers.
*
* The weird comma placement is to keep compilers happy no matter
* which of these is (or is not) defined.
* The weird comma placement is to keep compilers happy no matter
* which of these is (or is not) defined.
*/
static smgrid StorageManager[] = {
static smgrid StorageManager[] = {
{"magnetic disk"},
#ifdef MAIN_MEMORY
{"main memory"}
#endif /* MAIN_MEMORY */
#endif /* MAIN_MEMORY */
};
static int NStorageManagers = lengthof(StorageManager);
static int NStorageManagers = lengthof(StorageManager);
int2
smgrin(char *s)
{
int i;
int i;
for (i = 0; i < NStorageManagers; i++) {
if (strcmp(s, StorageManager[i].smgr_name) == 0)
return((int2) i);
}
elog(WARN, "smgrin: illegal storage manager name %s", s);
return 0;
for (i = 0; i < NStorageManagers; i++)
{
if (strcmp(s, StorageManager[i].smgr_name) == 0)
return ((int2) i);
}
elog(WARN, "smgrin: illegal storage manager name %s", s);
return 0;
}
char *
char *
smgrout(int2 i)
{
char *s;
char *s;
if (i >= NStorageManagers || i < 0)
elog(WARN, "Illegal storage manager id %d", i);
if (i >= NStorageManagers || i < 0)
elog(WARN, "Illegal storage manager id %d", i);
s = (char *) palloc(strlen(StorageManager[i].smgr_name) + 1);
strcpy(s, StorageManager[i].smgr_name);
return (s);
s = (char *) palloc(strlen(StorageManager[i].smgr_name) + 1);
strcpy(s, StorageManager[i].smgr_name);
return (s);
}
bool
smgreq(int2 a, int2 b)
{
if (a == b)
return (true);
return (false);
if (a == b)
return (true);
return (false);
}
bool
smgrne(int2 a, int2 b)
{
if (a == b)
return (false);
return (true);
if (a == b)
return (false);
return (true);
}