mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Implement LockBufferForCleanup(), which will allow concurrent VACUUM
to wait until it's safe to remove tuples and compact free space in a shared buffer page. Miscellaneous small code cleanups in bufmgr, too.
This commit is contained in:
		| @@ -8,7 +8,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.104 2001/06/22 19:16:21 wieck Exp $ |  *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.105 2001/07/06 21:04:25 tgl Exp $ | ||||||
|  * |  * | ||||||
|  * NOTES |  * NOTES | ||||||
|  *		Transaction aborts can now occur two ways: |  *		Transaction aborts can now occur two ways: | ||||||
| @@ -653,7 +653,7 @@ void | |||||||
| RecordTransactionCommit() | RecordTransactionCommit() | ||||||
| { | { | ||||||
| 	TransactionId xid; | 	TransactionId xid; | ||||||
| 	int			leak; | 	bool		leak; | ||||||
|  |  | ||||||
| 	xid = GetCurrentTransactionId(); | 	xid = GetCurrentTransactionId(); | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										100
									
								
								src/backend/storage/buffer/README
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								src/backend/storage/buffer/README
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | |||||||
|  | $Header: /cvsroot/pgsql/src/backend/storage/buffer/README,v 1.1 2001/07/06 21:04:25 tgl Exp $ | ||||||
|  |  | ||||||
|  | Notes about shared buffer access rules | ||||||
|  | -------------------------------------- | ||||||
|  |  | ||||||
|  | There are two separate access control mechanisms for shared disk buffers: | ||||||
|  | reference counts (a/k/a pin counts) and buffer locks.  (Actually, there's | ||||||
|  | a third level of access control: one must hold the appropriate kind of | ||||||
|  | lock on a relation before one can legally access any page belonging to | ||||||
|  | the relation.  Relation-level locks are not discussed here.) | ||||||
|  |  | ||||||
|  | Pins: one must "hold a pin on" a buffer (increment its reference count) | ||||||
|  | before being allowed to do anything at all with it.  An unpinned buffer is | ||||||
|  | subject to being reclaimed and reused for a different page at any instant, | ||||||
|  | so touching it is unsafe.  Typically a pin is acquired via ReadBuffer and | ||||||
|  | released via WriteBuffer (if one modified the page) or ReleaseBuffer (if not). | ||||||
|  | It is OK and indeed common for a single backend to pin a page more than | ||||||
|  | once concurrently; the buffer manager handles this efficiently.  It is | ||||||
|  | considered OK to hold a pin for long intervals --- for example, sequential | ||||||
|  | scans hold a pin on the current page until done processing all the tuples | ||||||
|  | on the page, which could be quite a while if the scan is the outer scan of | ||||||
|  | a join.  Similarly, btree index scans hold a pin on the current index page. | ||||||
|  | This is OK because normal operations never wait for a page's pin count to | ||||||
|  | drop to zero.  (Anything that might need to do such a wait is instead | ||||||
|  | handled by waiting to obtain the relation-level lock, which is why you'd | ||||||
|  | better hold one first.)  Pins may not be held across transaction | ||||||
|  | boundaries, however. | ||||||
|  |  | ||||||
|  | Buffer locks: there are two kinds of buffer locks, shared and exclusive, | ||||||
|  | which act just as you'd expect: multiple backends can hold shared locks on | ||||||
|  | the same buffer, but an exclusive lock prevents anyone else from holding | ||||||
|  | either shared or exclusive lock.  (These can alternatively be called READ | ||||||
|  | and WRITE locks.)  These locks are short-term: they should not be held for | ||||||
|  | long.  They are implemented as per-buffer spinlocks, so another backend | ||||||
|  | trying to acquire a competing lock will spin as long as you hold yours! | ||||||
|  | Buffer locks are acquired and released by LockBuffer().  It will *not* work | ||||||
|  | for a single backend to try to acquire multiple locks on the same buffer. | ||||||
|  | One must pin a buffer before trying to lock it. | ||||||
|  |  | ||||||
|  | Buffer access rules: | ||||||
|  |  | ||||||
|  | 1. To scan a page for tuples, one must hold a pin and either shared or | ||||||
|  | exclusive lock.  To examine the commit status (XIDs and status bits) of | ||||||
|  | a tuple in a shared buffer, one must likewise hold a pin and either shared | ||||||
|  | or exclusive lock. | ||||||
|  |  | ||||||
|  | 2. Once one has determined that a tuple is interesting (visible to the | ||||||
|  | current transaction) one may drop the buffer lock, yet continue to access | ||||||
|  | the tuple's data for as long as one holds the buffer pin.  This is what is | ||||||
|  | typically done by heap scans, since the tuple returned by heap_fetch | ||||||
|  | contains a pointer to tuple data in the shared buffer.  Therefore the | ||||||
|  | tuple cannot go away while the pin is held (see rule #5).  Its state could | ||||||
|  | change, but that is assumed not to matter after the initial determination | ||||||
|  | of visibility is made. | ||||||
|  |  | ||||||
|  | 3. To add a tuple or change the xmin/xmax fields of an existing tuple, | ||||||
|  | one must hold a pin and an exclusive lock on the containing buffer. | ||||||
|  | This ensures that no one else might see a partially-updated state of the | ||||||
|  | tuple. | ||||||
|  |  | ||||||
|  | 4. It is considered OK to update tuple commit status bits (ie, OR the | ||||||
|  | values HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAP_XMAX_COMMITTED, or | ||||||
|  | HEAP_XMAX_INVALID into t_infomask) while holding only a shared lock and | ||||||
|  | pin on a buffer.  This is OK because another backend looking at the tuple | ||||||
|  | at about the same time would OR the same bits into the field, so there | ||||||
|  | is little or no risk of conflicting update; what's more, if there did | ||||||
|  | manage to be a conflict it would merely mean that one bit-update would | ||||||
|  | be lost and need to be done again later.  These four bits are only hints | ||||||
|  | (they cache the results of transaction status lookups in pg_log), so no | ||||||
|  | great harm is done if they get reset to zero by conflicting updates. | ||||||
|  |  | ||||||
|  | 5. To physically remove a tuple or compact free space on a page, one | ||||||
|  | must hold a pin and an exclusive lock, *and* observe while holding the | ||||||
|  | exclusive lock that the buffer's shared reference count is one (ie, | ||||||
|  | no other backend holds a pin).  If these conditions are met then no other | ||||||
|  | backend can perform a page scan until the exclusive lock is dropped, and | ||||||
|  | no other backend can be holding a reference to an existing tuple that it | ||||||
|  | might expect to examine again.  Note that another backend might pin the | ||||||
|  | buffer (increment the refcount) while one is performing the cleanup, but | ||||||
|  | it won't be able to actually examine the page until it acquires shared | ||||||
|  | or exclusive lock. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | As of 7.1, the only operation that removes tuples or compacts free space is | ||||||
|  | (oldstyle) VACUUM.  It does not have to implement rule #5 directly, because | ||||||
|  | it instead acquires exclusive lock at the relation level, which ensures | ||||||
|  | indirectly that no one else is accessing pages of the relation at all. | ||||||
|  |  | ||||||
|  | To implement concurrent VACUUM we will need to make it obey rule #5 fully. | ||||||
|  | To do this, we'll create a new buffer manager operation | ||||||
|  | LockBufferForCleanup() that gets an exclusive lock and then checks to see | ||||||
|  | if the shared pin count is currently 1.  If not, it releases the exclusive | ||||||
|  | lock (but not the caller's pin) and waits until signaled by another backend, | ||||||
|  | whereupon it tries again.  The signal will occur when UnpinBuffer | ||||||
|  | decrements the shared pin count to 1.  As indicated above, this operation | ||||||
|  | might have to wait a good while before it acquires lock, but that shouldn't | ||||||
|  | matter much for concurrent VACUUM.  The current implementation only | ||||||
|  | supports a single waiter for pin-count-1 on any particular shared buffer. | ||||||
|  | This is enough for VACUUM's use, since we don't allow multiple VACUUMs | ||||||
|  | concurrently on a single relation anyway. | ||||||
| @@ -8,7 +8,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.42 2001/03/22 03:59:44 momjian Exp $ |  *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.43 2001/07/06 21:04:25 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -63,7 +63,6 @@ long	   *PrivateRefCount;	/* also used in freelist.c */ | |||||||
| bits8	   *BufferLocks;		/* flag bits showing locks I have set */ | bits8	   *BufferLocks;		/* flag bits showing locks I have set */ | ||||||
| BufferTag  *BufferTagLastDirtied;		/* tag buffer had when last | BufferTag  *BufferTagLastDirtied;		/* tag buffer had when last | ||||||
| 										 * dirtied by me */ | 										 * dirtied by me */ | ||||||
| BufferBlindId *BufferBlindLastDirtied; |  | ||||||
| bool	   *BufferDirtiedByMe;	/* T if buf has been dirtied in cur xact */ | bool	   *BufferDirtiedByMe;	/* T if buf has been dirtied in cur xact */ | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -237,7 +236,6 @@ InitBufferPoolAccess(void) | |||||||
| 	PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); | 	PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); | ||||||
| 	BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8)); | 	BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8)); | ||||||
| 	BufferTagLastDirtied = (BufferTag *) calloc(NBuffers, sizeof(BufferTag)); | 	BufferTagLastDirtied = (BufferTag *) calloc(NBuffers, sizeof(BufferTag)); | ||||||
| 	BufferBlindLastDirtied = (BufferBlindId *) calloc(NBuffers, sizeof(BufferBlindId)); |  | ||||||
| 	BufferDirtiedByMe = (bool *) calloc(NBuffers, sizeof(bool)); | 	BufferDirtiedByMe = (bool *) calloc(NBuffers, sizeof(bool)); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.115 2001/07/02 18:47:18 tgl Exp $ |  *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.116 2001/07/06 21:04:25 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -46,14 +46,12 @@ | |||||||
| #include <math.h> | #include <math.h> | ||||||
| #include <signal.h> | #include <signal.h> | ||||||
|  |  | ||||||
| #include "executor/execdebug.h" |  | ||||||
| #include "miscadmin.h" | #include "miscadmin.h" | ||||||
| #include "storage/buf_internals.h" | #include "storage/buf_internals.h" | ||||||
| #include "storage/bufmgr.h" | #include "storage/bufmgr.h" | ||||||
| #include "storage/s_lock.h" | #include "storage/proc.h" | ||||||
| #include "storage/smgr.h" | #include "storage/smgr.h" | ||||||
| #include "utils/relcache.h" | #include "utils/relcache.h" | ||||||
| #include "catalog/pg_database.h" |  | ||||||
|  |  | ||||||
| #include "pgstat.h" | #include "pgstat.h" | ||||||
|  |  | ||||||
| @@ -254,7 +252,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, | |||||||
| 		if (!BufTableDelete(bufHdr)) | 		if (!BufTableDelete(bufHdr)) | ||||||
| 		{ | 		{ | ||||||
| 			SpinRelease(BufMgrLock); | 			SpinRelease(BufMgrLock); | ||||||
| 			elog(FATAL, "BufRead: buffer table broken after IO error\n"); | 			elog(FATAL, "BufRead: buffer table broken after IO error"); | ||||||
| 		} | 		} | ||||||
| 		/* remember that BufferAlloc() pinned the buffer */ | 		/* remember that BufferAlloc() pinned the buffer */ | ||||||
| 		UnpinBuffer(bufHdr); | 		UnpinBuffer(bufHdr); | ||||||
| @@ -426,33 +424,27 @@ BufferAlloc(Relation reln, | |||||||
|  |  | ||||||
| 			if (smok == FALSE) | 			if (smok == FALSE) | ||||||
| 			{ | 			{ | ||||||
| 				elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", | 				elog(NOTICE, "BufferAlloc: cannot write block %u for %u/%u", | ||||||
| 				buf->tag.blockNum, buf->blind.dbname, buf->blind.relname); | 					 buf->tag.blockNum, | ||||||
|  | 					 buf->tag.rnode.tblNode, buf->tag.rnode.relNode); | ||||||
| 				inProgress = FALSE; | 				inProgress = FALSE; | ||||||
| 				buf->flags |= BM_IO_ERROR; | 				buf->flags |= BM_IO_ERROR; | ||||||
| 				buf->flags &= ~BM_IO_IN_PROGRESS; | 				buf->flags &= ~BM_IO_IN_PROGRESS; | ||||||
| 				TerminateBufferIO(buf); | 				TerminateBufferIO(buf); | ||||||
| 				PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; | 				UnpinBuffer(buf); | ||||||
| 				Assert(buf->refcount > 0); |  | ||||||
| 				buf->refcount--; |  | ||||||
| 				if (buf->refcount == 0) |  | ||||||
| 				{ |  | ||||||
| 					AddBufferToFreelist(buf); |  | ||||||
| 					buf->flags |= BM_FREE; |  | ||||||
| 				} |  | ||||||
| 				buf = (BufferDesc *) NULL; | 				buf = (BufferDesc *) NULL; | ||||||
| 			} | 			} | ||||||
| 			else | 			else | ||||||
| 			{ | 			{ | ||||||
|  |  | ||||||
| 				/* | 				/* | ||||||
| 				 * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't | 				 * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't | ||||||
| 				 * be setted by anyone.		- vadim 01/17/97 | 				 * be setted by anyone.		- vadim 01/17/97 | ||||||
| 				 */ | 				 */ | ||||||
| 				if (buf->flags & BM_JUST_DIRTIED) | 				if (buf->flags & BM_JUST_DIRTIED) | ||||||
| 				{ | 				{ | ||||||
| 					elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing", | 					elog(STOP, "BufferAlloc: content of block %u (%u/%u) changed while flushing", | ||||||
| 						 buf->tag.blockNum, buf->blind.relname); | 						 buf->tag.blockNum, | ||||||
|  | 						 buf->tag.rnode.tblNode, buf->tag.rnode.relNode); | ||||||
| 				} | 				} | ||||||
| 				else | 				else | ||||||
| 					buf->flags &= ~BM_DIRTY; | 					buf->flags &= ~BM_DIRTY; | ||||||
| @@ -475,8 +467,7 @@ BufferAlloc(Relation reln, | |||||||
| 				inProgress = FALSE; | 				inProgress = FALSE; | ||||||
| 				buf->flags &= ~BM_IO_IN_PROGRESS; | 				buf->flags &= ~BM_IO_IN_PROGRESS; | ||||||
| 				TerminateBufferIO(buf); | 				TerminateBufferIO(buf); | ||||||
| 				PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; | 				UnpinBuffer(buf); | ||||||
| 				buf->refcount--; |  | ||||||
| 				buf = (BufferDesc *) NULL; | 				buf = (BufferDesc *) NULL; | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| @@ -501,15 +492,8 @@ BufferAlloc(Relation reln, | |||||||
| 				{ | 				{ | ||||||
| 					buf->flags &= ~BM_IO_IN_PROGRESS; | 					buf->flags &= ~BM_IO_IN_PROGRESS; | ||||||
| 					TerminateBufferIO(buf); | 					TerminateBufferIO(buf); | ||||||
| 					/* give up the buffer since we don't need it any more */ | 					/* give up old buffer since we don't need it any more */ | ||||||
| 					PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; | 					UnpinBuffer(buf); | ||||||
| 					Assert(buf->refcount > 0); |  | ||||||
| 					buf->refcount--; |  | ||||||
| 					if (buf->refcount == 0) |  | ||||||
| 					{ |  | ||||||
| 						AddBufferToFreelist(buf); |  | ||||||
| 						buf->flags |= BM_FREE; |  | ||||||
| 					} |  | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
| 				PinBuffer(buf2); | 				PinBuffer(buf2); | ||||||
| @@ -551,18 +535,15 @@ BufferAlloc(Relation reln, | |||||||
| 	if (!BufTableDelete(buf)) | 	if (!BufTableDelete(buf)) | ||||||
| 	{ | 	{ | ||||||
| 		SpinRelease(BufMgrLock); | 		SpinRelease(BufMgrLock); | ||||||
| 		elog(FATAL, "buffer wasn't in the buffer table\n"); | 		elog(FATAL, "buffer wasn't in the buffer table"); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* record the database name and relation name for this buffer */ |  | ||||||
| 	strcpy(buf->blind.dbname, (DatabaseName) ? DatabaseName : "Recovery"); |  | ||||||
| 	strcpy(buf->blind.relname, RelationGetPhysicalRelationName(reln)); |  | ||||||
|  |  | ||||||
| 	INIT_BUFFERTAG(&(buf->tag), reln, blockNum); | 	INIT_BUFFERTAG(&(buf->tag), reln, blockNum); | ||||||
|  |  | ||||||
| 	if (!BufTableInsert(buf)) | 	if (!BufTableInsert(buf)) | ||||||
| 	{ | 	{ | ||||||
| 		SpinRelease(BufMgrLock); | 		SpinRelease(BufMgrLock); | ||||||
| 		elog(FATAL, "Buffer in lookup table twice \n"); | 		elog(FATAL, "Buffer in lookup table twice"); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| @@ -704,14 +685,7 @@ ReleaseAndReadBuffer(Buffer buffer, | |||||||
| 			else | 			else | ||||||
| 			{ | 			{ | ||||||
| 				SpinAcquire(BufMgrLock); | 				SpinAcquire(BufMgrLock); | ||||||
| 				PrivateRefCount[buffer - 1] = 0; | 				UnpinBuffer(bufHdr); | ||||||
| 				Assert(bufHdr->refcount > 0); |  | ||||||
| 				bufHdr->refcount--; |  | ||||||
| 				if (bufHdr->refcount == 0) |  | ||||||
| 				{ |  | ||||||
| 					AddBufferToFreelist(bufHdr); |  | ||||||
| 					bufHdr->flags |= BM_FREE; |  | ||||||
| 				} |  | ||||||
| 				return ReadBufferInternal(relation, blockNum, true); | 				return ReadBufferInternal(relation, blockNum, true); | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| @@ -831,8 +805,9 @@ BufferSync() | |||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if (status == SM_FAIL)	/* disk failure ?! */ | 		if (status == SM_FAIL)	/* disk failure ?! */ | ||||||
| 			elog(STOP, "BufferSync: cannot write %u for %s", | 			elog(STOP, "BufferSync: cannot write %u for %u/%u", | ||||||
| 				 bufHdr->tag.blockNum, bufHdr->blind.relname); | 				 bufHdr->tag.blockNum, | ||||||
|  | 				 bufHdr->tag.rnode.tblNode, bufHdr->tag.rnode.relNode); | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Note that it's safe to change cntxDirty here because of we | 		 * Note that it's safe to change cntxDirty here because of we | ||||||
| @@ -956,16 +931,11 @@ ResetBufferPool(bool isCommit) | |||||||
| 		{ | 		{ | ||||||
| 			BufferDesc *buf = &BufferDescriptors[i]; | 			BufferDesc *buf = &BufferDescriptors[i]; | ||||||
|  |  | ||||||
|  | 			PrivateRefCount[i] = 1;	/* make sure we release shared pin */ | ||||||
| 			SpinAcquire(BufMgrLock); | 			SpinAcquire(BufMgrLock); | ||||||
| 			PrivateRefCount[i] = 0; | 			UnpinBuffer(buf); | ||||||
| 			Assert(buf->refcount > 0); |  | ||||||
| 			buf->refcount--; |  | ||||||
| 			if (buf->refcount == 0) |  | ||||||
| 			{ |  | ||||||
| 				AddBufferToFreelist(buf); |  | ||||||
| 				buf->flags |= BM_FREE; |  | ||||||
| 			} |  | ||||||
| 			SpinRelease(BufMgrLock); | 			SpinRelease(BufMgrLock); | ||||||
|  | 			Assert(PrivateRefCount[i] == 0); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -975,32 +945,31 @@ ResetBufferPool(bool isCommit) | |||||||
| 		smgrabort(); | 		smgrabort(); | ||||||
| } | } | ||||||
|  |  | ||||||
| /* ----------------------------------------------- | /* | ||||||
|  * BufferPoolCheckLeak |  * BufferPoolCheckLeak | ||||||
|  * |  * | ||||||
|  *		check if there is buffer leak |  *		check if there is buffer leak | ||||||
|  * |  | ||||||
|  * ----------------------------------------------- |  | ||||||
|  */ |  */ | ||||||
| int | bool | ||||||
| BufferPoolCheckLeak() | BufferPoolCheckLeak(void) | ||||||
| { | { | ||||||
| 	int			i; | 	int			i; | ||||||
| 	int			result = 0; | 	bool		result = false; | ||||||
|  |  | ||||||
| 	for (i = 1; i <= NBuffers; i++) | 	for (i = 0; i < NBuffers; i++) | ||||||
| 	{ | 	{ | ||||||
| 		if (PrivateRefCount[i - 1] != 0) | 		if (PrivateRefCount[i] != 0) | ||||||
| 		{ | 		{ | ||||||
| 			BufferDesc *buf = &(BufferDescriptors[i - 1]); | 			BufferDesc *buf = &(BufferDescriptors[i]); | ||||||
|  |  | ||||||
| 			elog(NOTICE, | 			elog(NOTICE, | ||||||
| 				 "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, \ | 				 "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, \ | ||||||
| relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)", | rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)", | ||||||
| 				 i - 1, buf->freeNext, buf->freePrev, | 				 i, buf->freeNext, buf->freePrev, | ||||||
| 				 buf->blind.relname, buf->tag.blockNum, buf->flags, | 				 buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
| 				 buf->refcount, PrivateRefCount[i - 1]); | 				 buf->tag.blockNum, buf->flags, | ||||||
| 			result = 1; | 				 buf->refcount, PrivateRefCount[i]); | ||||||
|  | 			result = true; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	return result; | 	return result; | ||||||
| @@ -1389,10 +1358,11 @@ PrintBufferDescs() | |||||||
| 		SpinAcquire(BufMgrLock); | 		SpinAcquire(BufMgrLock); | ||||||
| 		for (i = 0; i < NBuffers; ++i, ++buf) | 		for (i = 0; i < NBuffers; ++i, ++buf) | ||||||
| 		{ | 		{ | ||||||
| 			elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ | 			elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \ | ||||||
| blockNum=%d, flags=0x%x, refcount=%d %ld)", | blockNum=%u, flags=0x%x, refcount=%d %ld)", | ||||||
| 				 i, buf->freeNext, buf->freePrev, | 				 i, buf->freeNext, buf->freePrev, | ||||||
| 				 buf->blind.relname, buf->tag.blockNum, buf->flags, | 				 buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
|  | 				 buf->tag.blockNum, buf->flags, | ||||||
| 				 buf->refcount, PrivateRefCount[i]); | 				 buf->refcount, PrivateRefCount[i]); | ||||||
| 		} | 		} | ||||||
| 		SpinRelease(BufMgrLock); | 		SpinRelease(BufMgrLock); | ||||||
| @@ -1402,8 +1372,9 @@ blockNum=%d, flags=0x%x, refcount=%d %ld)", | |||||||
| 		/* interactive backend */ | 		/* interactive backend */ | ||||||
| 		for (i = 0; i < NBuffers; ++i, ++buf) | 		for (i = 0; i < NBuffers; ++i, ++buf) | ||||||
| 		{ | 		{ | ||||||
| 			printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n", | 			printf("[%-2d] (%u/%u, %u) flags=0x%x, refcnt=%d %ld)\n", | ||||||
| 				   i, buf->blind.relname, buf->tag.blockNum, | 				   i, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
|  | 				   buf->tag.blockNum, | ||||||
| 				   buf->flags, buf->refcount, PrivateRefCount[i]); | 				   buf->flags, buf->refcount, PrivateRefCount[i]); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -1419,9 +1390,10 @@ PrintPinnedBufs() | |||||||
| 	for (i = 0; i < NBuffers; ++i, ++buf) | 	for (i = 0; i < NBuffers; ++i, ++buf) | ||||||
| 	{ | 	{ | ||||||
| 		if (PrivateRefCount[i] > 0) | 		if (PrivateRefCount[i] > 0) | ||||||
| 			elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ | 			elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \ | ||||||
| blockNum=%d, flags=0x%x, refcount=%d %ld)\n", | blockNum=%u, flags=0x%x, refcount=%d %ld)", | ||||||
| 				 i, buf->freeNext, buf->freePrev, buf->blind.relname, | 				 i, buf->freeNext, buf->freePrev, | ||||||
|  | 				 buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
| 				 buf->tag.blockNum, buf->flags, | 				 buf->tag.blockNum, buf->flags, | ||||||
| 				 buf->refcount, PrivateRefCount[i]); | 				 buf->refcount, PrivateRefCount[i]); | ||||||
| 	} | 	} | ||||||
| @@ -1581,8 +1553,10 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) | |||||||
| 									   (char *) MAKE_PTR(bufHdr->data)); | 									   (char *) MAKE_PTR(bufHdr->data)); | ||||||
|  |  | ||||||
| 					if (status == SM_FAIL)		/* disk failure ?! */ | 					if (status == SM_FAIL)		/* disk failure ?! */ | ||||||
| 						elog(STOP, "FlushRelationBuffers: cannot write %u for %s", | 						elog(STOP, "FlushRelationBuffers: cannot write %u for %u/%u", | ||||||
| 							 bufHdr->tag.blockNum, bufHdr->blind.relname); | 							 bufHdr->tag.blockNum, | ||||||
|  | 							 bufHdr->tag.rnode.tblNode, | ||||||
|  | 							 bufHdr->tag.rnode.relNode); | ||||||
|  |  | ||||||
| 					BufferFlushCount++; | 					BufferFlushCount++; | ||||||
|  |  | ||||||
| @@ -1624,7 +1598,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) | |||||||
| /* | /* | ||||||
|  * ReleaseBuffer -- remove the pin on a buffer without |  * ReleaseBuffer -- remove the pin on a buffer without | ||||||
|  *		marking it dirty. |  *		marking it dirty. | ||||||
|  * |  | ||||||
|  */ |  */ | ||||||
| int | int | ||||||
| ReleaseBuffer(Buffer buffer) | ReleaseBuffer(Buffer buffer) | ||||||
| @@ -1649,14 +1622,7 @@ ReleaseBuffer(Buffer buffer) | |||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| 		SpinAcquire(BufMgrLock); | 		SpinAcquire(BufMgrLock); | ||||||
| 		PrivateRefCount[buffer - 1] = 0; | 		UnpinBuffer(bufHdr); | ||||||
| 		Assert(bufHdr->refcount > 0); |  | ||||||
| 		bufHdr->refcount--; |  | ||||||
| 		if (bufHdr->refcount == 0) |  | ||||||
| 		{ |  | ||||||
| 			AddBufferToFreelist(bufHdr); |  | ||||||
| 			bufHdr->flags |= BM_FREE; |  | ||||||
| 		} |  | ||||||
| 		SpinRelease(BufMgrLock); | 		SpinRelease(BufMgrLock); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -1665,7 +1631,7 @@ ReleaseBuffer(Buffer buffer) | |||||||
|  |  | ||||||
| /* | /* | ||||||
|  * ReleaseBufferWithBufferLock |  * ReleaseBufferWithBufferLock | ||||||
|  *		Same as ReleaseBuffer except we hold the lock |  *		Same as ReleaseBuffer except we hold the bufmgr lock | ||||||
|  */ |  */ | ||||||
| static int | static int | ||||||
| ReleaseBufferWithBufferLock(Buffer buffer) | ReleaseBufferWithBufferLock(Buffer buffer) | ||||||
| @@ -1688,16 +1654,7 @@ ReleaseBufferWithBufferLock(Buffer buffer) | |||||||
| 	if (PrivateRefCount[buffer - 1] > 1) | 	if (PrivateRefCount[buffer - 1] > 1) | ||||||
| 		PrivateRefCount[buffer - 1]--; | 		PrivateRefCount[buffer - 1]--; | ||||||
| 	else | 	else | ||||||
| 	{ | 		UnpinBuffer(bufHdr); | ||||||
| 		PrivateRefCount[buffer - 1] = 0; |  | ||||||
| 		Assert(bufHdr->refcount > 0); |  | ||||||
| 		bufHdr->refcount--; |  | ||||||
| 		if (bufHdr->refcount == 0) |  | ||||||
| 		{ |  | ||||||
| 			AddBufferToFreelist(bufHdr); |  | ||||||
| 			bufHdr->flags |= BM_FREE; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return STATUS_OK; | 	return STATUS_OK; | ||||||
| } | } | ||||||
| @@ -1712,9 +1669,11 @@ IncrBufferRefCount_Debug(char *file, int line, Buffer buffer) | |||||||
| 	{ | 	{ | ||||||
| 		BufferDesc *buf = &BufferDescriptors[buffer - 1]; | 		BufferDesc *buf = &BufferDescriptors[buffer - 1]; | ||||||
|  |  | ||||||
| 		fprintf(stderr, "PIN(Incr) %d relname = %s, blockNum = %d, \ | 		fprintf(stderr, "PIN(Incr) %d rel = %u/%u, blockNum = %u, \ | ||||||
| refcount = %ld, file: %s, line: %d\n", | refcount = %ld, file: %s, line: %d\n", | ||||||
| 				buffer, buf->blind.relname, buf->tag.blockNum, | 				buffer, | ||||||
|  | 				buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
|  | 				buf->tag.blockNum, | ||||||
| 				PrivateRefCount[buffer - 1], file, line); | 				PrivateRefCount[buffer - 1], file, line); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @@ -1730,9 +1689,11 @@ ReleaseBuffer_Debug(char *file, int line, Buffer buffer) | |||||||
| 	{ | 	{ | ||||||
| 		BufferDesc *buf = &BufferDescriptors[buffer - 1]; | 		BufferDesc *buf = &BufferDescriptors[buffer - 1]; | ||||||
|  |  | ||||||
| 		fprintf(stderr, "UNPIN(Rel) %d relname = %s, blockNum = %d, \ | 		fprintf(stderr, "UNPIN(Rel) %d rel = %u/%u, blockNum = %u, \ | ||||||
| refcount = %ld, file: %s, line: %d\n", | refcount = %ld, file: %s, line: %d\n", | ||||||
| 				buffer, buf->blind.relname, buf->tag.blockNum, | 				buffer, | ||||||
|  | 				buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
|  | 				buf->tag.blockNum, | ||||||
| 				PrivateRefCount[buffer - 1], file, line); | 				PrivateRefCount[buffer - 1], file, line); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @@ -1757,18 +1718,22 @@ ReleaseAndReadBuffer_Debug(char *file, | |||||||
| 	{ | 	{ | ||||||
| 		BufferDesc *buf = &BufferDescriptors[buffer - 1]; | 		BufferDesc *buf = &BufferDescriptors[buffer - 1]; | ||||||
|  |  | ||||||
| 		fprintf(stderr, "UNPIN(Rel&Rd) %d relname = %s, blockNum = %d, \ | 		fprintf(stderr, "UNPIN(Rel&Rd) %d rel = %u/%u, blockNum = %u, \ | ||||||
| refcount = %ld, file: %s, line: %d\n", | refcount = %ld, file: %s, line: %d\n", | ||||||
| 				buffer, buf->blind.relname, buf->tag.blockNum, | 				buffer, | ||||||
|  | 				buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
|  | 				buf->tag.blockNum, | ||||||
| 				PrivateRefCount[buffer - 1], file, line); | 				PrivateRefCount[buffer - 1], file, line); | ||||||
| 	} | 	} | ||||||
| 	if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) | 	if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) | ||||||
| 	{ | 	{ | ||||||
| 		BufferDesc *buf = &BufferDescriptors[b - 1]; | 		BufferDesc *buf = &BufferDescriptors[b - 1]; | ||||||
|  |  | ||||||
| 		fprintf(stderr, "PIN(Rel&Rd) %d relname = %s, blockNum = %d, \ | 		fprintf(stderr, "PIN(Rel&Rd) %d rel = %u/%u, blockNum = %u, \ | ||||||
| refcount = %ld, file: %s, line: %d\n", | refcount = %ld, file: %s, line: %d\n", | ||||||
| 				b, buf->blind.relname, buf->tag.blockNum, | 				b, | ||||||
|  | 				buf->tag.rnode.tblNode, buf->tag.rnode.relNode, | ||||||
|  | 				buf->tag.blockNum, | ||||||
| 				PrivateRefCount[b - 1], file, line); | 				PrivateRefCount[b - 1], file, line); | ||||||
| 	} | 	} | ||||||
| 	return b; | 	return b; | ||||||
| @@ -1784,6 +1749,7 @@ refcount = %ld, file: %s, line: %d\n", | |||||||
|  *	and die if there's anything fishy. |  *	and die if there's anything fishy. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | void | ||||||
| _bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType) | _bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType) | ||||||
| { | { | ||||||
| 	long		start, | 	long		start, | ||||||
| @@ -1835,6 +1801,7 @@ okay: | |||||||
| 	*CurTraceBuf = (start + 1) % BMT_LIMIT; | 	*CurTraceBuf = (start + 1) % BMT_LIMIT; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void | ||||||
| _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, | _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, | ||||||
| 		int allocType, long start, long cur) | 		int allocType, long start, long cur) | ||||||
| { | { | ||||||
| @@ -1860,7 +1827,7 @@ _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, | |||||||
| 		tb = &TraceBuf[i]; | 		tb = &TraceBuf[i]; | ||||||
| 		if (tb->bmt_op != BMT_NOTUSED) | 		if (tb->bmt_op != BMT_NOTUSED) | ||||||
| 		{ | 		{ | ||||||
| 			fprintf(fp, "     [%3d]%spid %d buf %2d for <%d,%u,%d> ", | 			fprintf(fp, "     [%3d]%spid %d buf %2d for <%u,%u,%u> ", | ||||||
| 					i, (i == cur ? " ---> " : "\t"), | 					i, (i == cur ? " ---> " : "\t"), | ||||||
| 					tb->bmt_pid, tb->bmt_buf, | 					tb->bmt_pid, tb->bmt_buf, | ||||||
| 					tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); | 					tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); | ||||||
| @@ -1967,7 +1934,9 @@ UnlockBuffers(void) | |||||||
|  |  | ||||||
| 	for (i = 0; i < NBuffers; i++) | 	for (i = 0; i < NBuffers; i++) | ||||||
| 	{ | 	{ | ||||||
| 		if (BufferLocks[i] == 0) | 		bits8	buflocks = BufferLocks[i]; | ||||||
|  |  | ||||||
|  | 		if (buflocks == 0) | ||||||
| 			continue; | 			continue; | ||||||
|  |  | ||||||
| 		Assert(BufferIsValid(i + 1)); | 		Assert(BufferIsValid(i + 1)); | ||||||
| @@ -1977,14 +1946,13 @@ UnlockBuffers(void) | |||||||
|  |  | ||||||
| 		S_LOCK(&(buf->cntx_lock)); | 		S_LOCK(&(buf->cntx_lock)); | ||||||
|  |  | ||||||
| 		if (BufferLocks[i] & BL_R_LOCK) | 		if (buflocks & BL_R_LOCK) | ||||||
| 		{ | 		{ | ||||||
| 			Assert(buf->r_locks > 0); | 			Assert(buf->r_locks > 0); | ||||||
| 			(buf->r_locks)--; | 			(buf->r_locks)--; | ||||||
| 		} | 		} | ||||||
| 		if (BufferLocks[i] & BL_RI_LOCK) | 		if (buflocks & BL_RI_LOCK) | ||||||
| 		{ | 		{ | ||||||
|  |  | ||||||
| 			/* | 			/* | ||||||
| 			 * Someone else could remove our RI lock when acquiring W | 			 * Someone else could remove our RI lock when acquiring W | ||||||
| 			 * lock. This is possible if we came here from elog(ERROR) | 			 * lock. This is possible if we came here from elog(ERROR) | ||||||
| @@ -1993,7 +1961,7 @@ UnlockBuffers(void) | |||||||
| 			 */ | 			 */ | ||||||
| 			buf->ri_lock = false; | 			buf->ri_lock = false; | ||||||
| 		} | 		} | ||||||
| 		if (BufferLocks[i] & BL_W_LOCK) | 		if (buflocks & BL_W_LOCK) | ||||||
| 		{ | 		{ | ||||||
| 			Assert(buf->w_lock); | 			Assert(buf->w_lock); | ||||||
| 			buf->w_lock = false; | 			buf->w_lock = false; | ||||||
| @@ -2001,6 +1969,20 @@ UnlockBuffers(void) | |||||||
|  |  | ||||||
| 		S_UNLOCK(&(buf->cntx_lock)); | 		S_UNLOCK(&(buf->cntx_lock)); | ||||||
|  |  | ||||||
|  | 		if (buflocks & BL_PIN_COUNT_LOCK) | ||||||
|  | 		{ | ||||||
|  | 			SpinAcquire(BufMgrLock); | ||||||
|  | 			/* | ||||||
|  | 			 * Don't complain if flag bit not set; it could have been reset | ||||||
|  | 			 * but we got a cancel/die interrupt before getting the signal. | ||||||
|  | 			 */ | ||||||
|  | 			if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && | ||||||
|  | 				buf->wait_backend_id == MyBackendId) | ||||||
|  | 				buf->flags &= ~BM_PIN_COUNT_WAITER; | ||||||
|  | 			SpinRelease(BufMgrLock); | ||||||
|  | 			ProcCancelWaitForSignal(); | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		BufferLocks[i] = 0; | 		BufferLocks[i] = 0; | ||||||
|  |  | ||||||
| 		RESUME_INTERRUPTS(); | 		RESUME_INTERRUPTS(); | ||||||
| @@ -2126,6 +2108,77 @@ LockBuffer(Buffer buffer, int mode) | |||||||
| 	RESUME_INTERRUPTS(); | 	RESUME_INTERRUPTS(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * LockBufferForCleanup - lock a buffer in preparation for deleting items | ||||||
|  |  * | ||||||
|  |  * Items may be deleted from a disk page only when the caller (a) holds an | ||||||
|  |  * exclusive lock on the buffer and (b) has observed that no other backend | ||||||
|  |  * holds a pin on the buffer.  If there is a pin, then the other backend | ||||||
|  |  * might have a pointer into the buffer (for example, a heapscan reference | ||||||
|  |  * to an item --- see README for more details).  It's OK if a pin is added | ||||||
|  |  * after the cleanup starts, however; the newly-arrived backend will be | ||||||
|  |  * unable to look at the page until we release the exclusive lock. | ||||||
|  |  * | ||||||
|  |  * To implement this protocol, a would-be deleter must pin the buffer and | ||||||
|  |  * then call LockBufferForCleanup().  LockBufferForCleanup() is similar to | ||||||
|  |  * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), except that it loops until | ||||||
|  |  * it has successfully observed pin count = 1. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | LockBufferForCleanup(Buffer buffer) | ||||||
|  | { | ||||||
|  | 	BufferDesc *bufHdr; | ||||||
|  | 	bits8	   *buflock; | ||||||
|  |  | ||||||
|  | 	Assert(BufferIsValid(buffer)); | ||||||
|  |  | ||||||
|  | 	if (BufferIsLocal(buffer)) | ||||||
|  | 	{ | ||||||
|  | 		/* There should be exactly one pin */ | ||||||
|  | 		if (LocalRefCount[-buffer - 1] != 1) | ||||||
|  | 			elog(ERROR, "LockBufferForCleanup: wrong local pin count"); | ||||||
|  | 		/* Nobody else to wait for */ | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* There should be exactly one local pin */ | ||||||
|  | 	if (PrivateRefCount[buffer - 1] != 1) | ||||||
|  | 		elog(ERROR, "LockBufferForCleanup: wrong local pin count"); | ||||||
|  |  | ||||||
|  | 	bufHdr = &BufferDescriptors[buffer - 1]; | ||||||
|  | 	buflock = &(BufferLocks[buffer - 1]); | ||||||
|  |  | ||||||
|  | 	for (;;) | ||||||
|  | 	{ | ||||||
|  | 		/* Try to acquire lock */ | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  | 		SpinAcquire(BufMgrLock); | ||||||
|  | 		Assert(bufHdr->refcount > 0); | ||||||
|  | 		if (bufHdr->refcount == 1) | ||||||
|  | 		{ | ||||||
|  | 			/* Successfully acquired exclusive lock with pincount 1 */ | ||||||
|  | 			SpinRelease(BufMgrLock); | ||||||
|  | 			return; | ||||||
|  | 		} | ||||||
|  | 		/* Failed, so mark myself as waiting for pincount 1 */ | ||||||
|  | 		if (bufHdr->flags & BM_PIN_COUNT_WAITER) | ||||||
|  | 		{ | ||||||
|  | 			SpinRelease(BufMgrLock); | ||||||
|  | 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK); | ||||||
|  | 			elog(ERROR, "Multiple backends attempting to wait for pincount 1"); | ||||||
|  | 		} | ||||||
|  | 		bufHdr->wait_backend_id = MyBackendId; | ||||||
|  | 		bufHdr->flags |= BM_PIN_COUNT_WAITER; | ||||||
|  | 		*buflock |= BL_PIN_COUNT_LOCK; | ||||||
|  | 		SpinRelease(BufMgrLock); | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK); | ||||||
|  | 		/* Wait to be signaled by UnpinBuffer() */ | ||||||
|  | 		ProcWaitForSignal(); | ||||||
|  | 		*buflock &= ~BL_PIN_COUNT_LOCK; | ||||||
|  | 		/* Loop back and try again */ | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  *	Functions for IO error handling |  *	Functions for IO error handling | ||||||
|  * |  * | ||||||
| @@ -2240,8 +2293,9 @@ AbortBufferIO(void) | |||||||
| 			/* Issue notice if this is not the first failure... */ | 			/* Issue notice if this is not the first failure... */ | ||||||
| 			if (buf->flags & BM_IO_ERROR) | 			if (buf->flags & BM_IO_ERROR) | ||||||
| 			{ | 			{ | ||||||
| 				elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s", | 				elog(NOTICE, "write error may be permanent: cannot write block %u for %u/%u", | ||||||
| 				buf->tag.blockNum, buf->blind.dbname, buf->blind.relname); | 					 buf->tag.blockNum, | ||||||
|  | 					 buf->tag.rnode.tblNode, buf->tag.rnode.relNode); | ||||||
| 			} | 			} | ||||||
| 			buf->flags |= BM_DIRTY; | 			buf->flags |= BM_DIRTY; | ||||||
| 		} | 		} | ||||||
| @@ -2252,59 +2306,6 @@ AbortBufferIO(void) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Cleanup buffer or mark it for cleanup. Buffer may be cleaned |  | ||||||
|  * up if it's pinned only once. |  | ||||||
|  * |  | ||||||
|  * NOTE: buffer must be excl locked. |  | ||||||
|  */ |  | ||||||
| void |  | ||||||
| MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc) (Buffer)) |  | ||||||
| { |  | ||||||
| 	BufferDesc *bufHdr = &BufferDescriptors[buffer - 1]; |  | ||||||
|  |  | ||||||
| 	Assert(PrivateRefCount[buffer - 1] > 0); |  | ||||||
|  |  | ||||||
| 	if (PrivateRefCount[buffer - 1] > 1) |  | ||||||
| 	{ |  | ||||||
| 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK); |  | ||||||
| 		PrivateRefCount[buffer - 1]--; |  | ||||||
| 		SpinAcquire(BufMgrLock); |  | ||||||
| 		Assert(bufHdr->refcount > 0); |  | ||||||
| 		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); |  | ||||||
| 		bufHdr->CleanupFunc = CleanupFunc; |  | ||||||
| 		SpinRelease(BufMgrLock); |  | ||||||
| 		return; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	SpinAcquire(BufMgrLock); |  | ||||||
| 	Assert(bufHdr->refcount > 0); |  | ||||||
| 	if (bufHdr->refcount == 1) |  | ||||||
| 	{ |  | ||||||
| 		SpinRelease(BufMgrLock); |  | ||||||
| 		CleanupFunc(buffer); |  | ||||||
| 		CleanupFunc = NULL; |  | ||||||
| 	} |  | ||||||
| 	else |  | ||||||
| 		SpinRelease(BufMgrLock); |  | ||||||
|  |  | ||||||
| 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK); |  | ||||||
|  |  | ||||||
| 	SpinAcquire(BufMgrLock); |  | ||||||
| 	PrivateRefCount[buffer - 1] = 0; |  | ||||||
| 	Assert(bufHdr->refcount > 0); |  | ||||||
| 	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); |  | ||||||
| 	bufHdr->CleanupFunc = CleanupFunc; |  | ||||||
| 	bufHdr->refcount--; |  | ||||||
| 	if (bufHdr->refcount == 0) |  | ||||||
| 	{ |  | ||||||
| 		AddBufferToFreelist(bufHdr); |  | ||||||
| 		bufHdr->flags |= BM_FREE; |  | ||||||
| 	} |  | ||||||
| 	SpinRelease(BufMgrLock); |  | ||||||
| 	return; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| RelFileNode | RelFileNode | ||||||
| BufferGetFileNode(Buffer buffer) | BufferGetFileNode(Buffer buffer) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -9,7 +9,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.23 2001/01/24 19:43:06 momjian Exp $ |  *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.24 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -29,14 +29,14 @@ | |||||||
|  |  | ||||||
| #include "storage/buf_internals.h" | #include "storage/buf_internals.h" | ||||||
| #include "storage/bufmgr.h" | #include "storage/bufmgr.h" | ||||||
|  | #include "storage/proc.h" | ||||||
|  |  | ||||||
|  |  | ||||||
| static BufferDesc *SharedFreeList; | static BufferDesc *SharedFreeList; | ||||||
|  |  | ||||||
| /* only actually used in debugging.  The lock | /* | ||||||
|  * should be acquired before calling the freelist manager. |  * State-checking macros | ||||||
|  */ |  */ | ||||||
| extern SPINLOCK BufMgrLock; |  | ||||||
|  |  | ||||||
| #define IsInQueue(bf) \ | #define IsInQueue(bf) \ | ||||||
| ( \ | ( \ | ||||||
| @@ -45,7 +45,7 @@ extern SPINLOCK BufMgrLock; | |||||||
| 	AssertMacro((bf->flags & BM_FREE)) \ | 	AssertMacro((bf->flags & BM_FREE)) \ | ||||||
| ) | ) | ||||||
|  |  | ||||||
| #define NotInQueue(bf) \ | #define IsNotInQueue(bf) \ | ||||||
| ( \ | ( \ | ||||||
| 	AssertMacro((bf->freeNext == INVALID_DESCRIPTOR)), \ | 	AssertMacro((bf->freeNext == INVALID_DESCRIPTOR)), \ | ||||||
| 	AssertMacro((bf->freePrev == INVALID_DESCRIPTOR)), \ | 	AssertMacro((bf->freePrev == INVALID_DESCRIPTOR)), \ | ||||||
| @@ -61,14 +61,14 @@ extern SPINLOCK BufMgrLock; | |||||||
|  * the manner in which buffers are added to the freelist queue. |  * the manner in which buffers are added to the freelist queue. | ||||||
|  * Currently, they are added on an LRU basis. |  * Currently, they are added on an LRU basis. | ||||||
|  */ |  */ | ||||||
| void | static void | ||||||
| AddBufferToFreelist(BufferDesc *bf) | AddBufferToFreelist(BufferDesc *bf) | ||||||
| { | { | ||||||
| #ifdef BMTRACE | #ifdef BMTRACE | ||||||
| 	_bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, | 	_bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, | ||||||
| 			  BufferDescriptorGetBuffer(bf), BMT_DEALLOC); | 			  BufferDescriptorGetBuffer(bf), BMT_DEALLOC); | ||||||
| #endif	 /* BMTRACE */ | #endif	 /* BMTRACE */ | ||||||
| 	NotInQueue(bf); | 	IsNotInQueue(bf); | ||||||
|  |  | ||||||
| 	/* change bf so it points to inFrontOfNew and its successor */ | 	/* change bf so it points to inFrontOfNew and its successor */ | ||||||
| 	bf->freePrev = SharedFreeList->freePrev; | 	bf->freePrev = SharedFreeList->freePrev; | ||||||
| @@ -83,13 +83,14 @@ AddBufferToFreelist(BufferDesc *bf) | |||||||
|  |  | ||||||
| /* | /* | ||||||
|  * PinBuffer -- make buffer unavailable for replacement. |  * PinBuffer -- make buffer unavailable for replacement. | ||||||
|  |  * | ||||||
|  |  * This should be applied only to shared buffers, never local ones. | ||||||
|  |  * Bufmgr lock must be held by caller. | ||||||
|  */ |  */ | ||||||
| void | void | ||||||
| PinBuffer(BufferDesc *buf) | PinBuffer(BufferDesc *buf) | ||||||
| { | { | ||||||
| 	long		b; | 	int		b = BufferDescriptorGetBuffer(buf) - 1; | ||||||
|  |  | ||||||
| 	/* Assert (buf->refcount < 25); */ |  | ||||||
|  |  | ||||||
| 	if (buf->refcount == 0) | 	if (buf->refcount == 0) | ||||||
| 	{ | 	{ | ||||||
| @@ -104,13 +105,12 @@ PinBuffer(BufferDesc *buf) | |||||||
| 		buf->flags &= ~BM_FREE; | 		buf->flags &= ~BM_FREE; | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 		NotInQueue(buf); | 		IsNotInQueue(buf); | ||||||
|  |  | ||||||
| 	b = BufferDescriptorGetBuffer(buf) - 1; |  | ||||||
| 	Assert(PrivateRefCount[b] >= 0); |  | ||||||
| 	if (PrivateRefCount[b] == 0) | 	if (PrivateRefCount[b] == 0) | ||||||
| 		buf->refcount++; | 		buf->refcount++; | ||||||
| 	PrivateRefCount[b]++; | 	PrivateRefCount[b]++; | ||||||
|  | 	Assert(PrivateRefCount[b] > 0); | ||||||
| } | } | ||||||
|  |  | ||||||
| #ifdef NOT_USED | #ifdef NOT_USED | ||||||
| @@ -135,24 +135,35 @@ refcount = %ld, file: %s, line: %d\n", | |||||||
|  |  | ||||||
| /* | /* | ||||||
|  * UnpinBuffer -- make buffer available for replacement. |  * UnpinBuffer -- make buffer available for replacement. | ||||||
|  |  * | ||||||
|  |  * This should be applied only to shared buffers, never local ones. | ||||||
|  |  * Bufmgr lock must be held by caller. | ||||||
|  */ |  */ | ||||||
| void | void | ||||||
| UnpinBuffer(BufferDesc *buf) | UnpinBuffer(BufferDesc *buf) | ||||||
| { | { | ||||||
| 	long		b = BufferDescriptorGetBuffer(buf) - 1; | 	int		b = BufferDescriptorGetBuffer(buf) - 1; | ||||||
|  |  | ||||||
|  | 	IsNotInQueue(buf); | ||||||
| 	Assert(buf->refcount > 0); | 	Assert(buf->refcount > 0); | ||||||
| 	Assert(PrivateRefCount[b] > 0); | 	Assert(PrivateRefCount[b] > 0); | ||||||
| 	PrivateRefCount[b]--; | 	PrivateRefCount[b]--; | ||||||
| 	if (PrivateRefCount[b] == 0) | 	if (PrivateRefCount[b] == 0) | ||||||
| 		buf->refcount--; | 		buf->refcount--; | ||||||
| 	NotInQueue(buf); |  | ||||||
|  |  | ||||||
| 	if (buf->refcount == 0) | 	if (buf->refcount == 0) | ||||||
| 	{ | 	{ | ||||||
|  | 		/* buffer is now unpinned */ | ||||||
| 		AddBufferToFreelist(buf); | 		AddBufferToFreelist(buf); | ||||||
| 		buf->flags |= BM_FREE; | 		buf->flags |= BM_FREE; | ||||||
| 	} | 	} | ||||||
|  | 	else if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && | ||||||
|  | 			 buf->refcount == 1) | ||||||
|  | 	{ | ||||||
|  | 		/* we just released the last pin other than the waiter's */ | ||||||
|  | 		buf->flags &= ~BM_PIN_COUNT_WAITER; | ||||||
|  | 		ProcSendSignal(buf->wait_backend_id); | ||||||
|  | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| 		/* do nothing */ | 		/* do nothing */ | ||||||
| @@ -179,18 +190,16 @@ refcount = %ld, file: %s, line: %d\n", | |||||||
|  |  | ||||||
| /* | /* | ||||||
|  * GetFreeBuffer() -- get the 'next' buffer from the freelist. |  * GetFreeBuffer() -- get the 'next' buffer from the freelist. | ||||||
|  * |  | ||||||
|  */ |  */ | ||||||
| BufferDesc * | BufferDesc * | ||||||
| GetFreeBuffer() | GetFreeBuffer(void) | ||||||
| { | { | ||||||
| 	BufferDesc *buf; | 	BufferDesc *buf; | ||||||
|  |  | ||||||
| 	if (Free_List_Descriptor == SharedFreeList->freeNext) | 	if (Free_List_Descriptor == SharedFreeList->freeNext) | ||||||
| 	{ | 	{ | ||||||
|  |  | ||||||
| 		/* queue is empty. All buffers in the buffer pool are pinned. */ | 		/* queue is empty. All buffers in the buffer pool are pinned. */ | ||||||
| 		elog(ERROR, "out of free buffers: time to abort !\n"); | 		elog(ERROR, "out of free buffers: time to abort!"); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 	buf = &(BufferDescriptors[SharedFreeList->freeNext]); | 	buf = &(BufferDescriptors[SharedFreeList->freeNext]); | ||||||
| @@ -220,7 +229,7 @@ InitFreeList(bool init) | |||||||
|  |  | ||||||
| 	if (init) | 	if (init) | ||||||
| 	{ | 	{ | ||||||
| 		/* we only do this once, normally the postmaster */ | 		/* we only do this once, normally in the postmaster */ | ||||||
| 		SharedFreeList->data = INVALID_OFFSET; | 		SharedFreeList->data = INVALID_OFFSET; | ||||||
| 		SharedFreeList->flags = 0; | 		SharedFreeList->flags = 0; | ||||||
| 		SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); | 		SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); | ||||||
| @@ -249,37 +258,23 @@ DBG_FreeListCheck(int nfree) | |||||||
| 	buf = &(BufferDescriptors[SharedFreeList->freeNext]); | 	buf = &(BufferDescriptors[SharedFreeList->freeNext]); | ||||||
| 	for (i = 0; i < nfree; i++, buf = &(BufferDescriptors[buf->freeNext])) | 	for (i = 0; i < nfree; i++, buf = &(BufferDescriptors[buf->freeNext])) | ||||||
| 	{ | 	{ | ||||||
|  |  | ||||||
| 		if (!(buf->flags & (BM_FREE))) | 		if (!(buf->flags & (BM_FREE))) | ||||||
| 		{ | 		{ | ||||||
| 			if (buf != SharedFreeList) | 			if (buf != SharedFreeList) | ||||||
| 			{ |  | ||||||
| 				printf("\tfree list corrupted: %d flags %x\n", | 				printf("\tfree list corrupted: %d flags %x\n", | ||||||
| 					   buf->buf_id, buf->flags); | 					   buf->buf_id, buf->flags); | ||||||
| 			} |  | ||||||
| 			else | 			else | ||||||
| 			{ |  | ||||||
| 				printf("\tfree list corrupted: too short -- %d not %d\n", | 				printf("\tfree list corrupted: too short -- %d not %d\n", | ||||||
| 					   i, nfree); | 					   i, nfree); | ||||||
|  |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 		} | 		} | ||||||
| 		if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) || | 		if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) || | ||||||
| 			(BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) | 			(BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) | ||||||
| 		{ |  | ||||||
| 			printf("\tfree list links corrupted: %d %ld %ld\n", | 			printf("\tfree list links corrupted: %d %ld %ld\n", | ||||||
| 				   buf->buf_id, buf->freePrev, buf->freeNext); | 				   buf->buf_id, buf->freePrev, buf->freeNext); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	} |  | ||||||
| 	if (buf != SharedFreeList) | 	if (buf != SharedFreeList) | ||||||
| 	{ |  | ||||||
| 		printf("\tfree list corrupted: %d-th buffer is %d\n", | 		printf("\tfree list corrupted: %d-th buffer is %d\n", | ||||||
| 			   nfree, buf->buf_id); | 			   nfree, buf->buf_id); | ||||||
|  |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.34 2001/06/19 19:42:15 tgl Exp $ |  *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.35 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -16,7 +16,6 @@ | |||||||
|  |  | ||||||
| #include <sys/types.h> | #include <sys/types.h> | ||||||
|  |  | ||||||
| #include "storage/backendid.h" |  | ||||||
| #include "storage/proc.h" | #include "storage/proc.h" | ||||||
| #include "storage/sinval.h" | #include "storage/sinval.h" | ||||||
| #include "storage/sinvaladt.h" | #include "storage/sinvaladt.h" | ||||||
| @@ -411,3 +410,31 @@ GetUndoRecPtr(void) | |||||||
|  |  | ||||||
| 	return (urec); | 	return (urec); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * BackendIdGetProc - given a BackendId, find its PROC structure | ||||||
|  |  * | ||||||
|  |  * This is a trivial lookup in the ProcState array.  We assume that the caller | ||||||
|  |  * knows that the backend isn't going to go away, so we do not bother with | ||||||
|  |  * locking. | ||||||
|  |  */ | ||||||
|  | struct proc * | ||||||
|  | BackendIdGetProc(BackendId procId) | ||||||
|  | { | ||||||
|  | 	SISeg	   *segP = shmInvalBuffer; | ||||||
|  |  | ||||||
|  | 	if (procId > 0 && procId <= segP->lastBackend) | ||||||
|  | 	{ | ||||||
|  | 		ProcState  *stateP = &segP->procState[procId - 1]; | ||||||
|  | 		SHMEM_OFFSET pOffset = stateP->procStruct; | ||||||
|  |  | ||||||
|  | 		if (pOffset != INVALID_OFFSET) | ||||||
|  | 		{ | ||||||
|  | 			PROC	   *proc = (PROC *) MAKE_PTR(pOffset); | ||||||
|  |  | ||||||
|  | 			return proc; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.103 2001/06/16 22:58:16 tgl Exp $ |  *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.104 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -74,6 +74,7 @@ | |||||||
|  |  | ||||||
| #include "access/xact.h" | #include "access/xact.h" | ||||||
| #include "storage/proc.h" | #include "storage/proc.h" | ||||||
|  | #include "storage/sinval.h" | ||||||
|  |  | ||||||
|  |  | ||||||
| int			DeadlockTimeout = 1000; | int			DeadlockTimeout = 1000; | ||||||
| @@ -92,6 +93,7 @@ static PROC_HDR *ProcGlobal = NULL; | |||||||
| PROC	   *MyProc = NULL; | PROC	   *MyProc = NULL; | ||||||
|  |  | ||||||
| static bool waitingForLock = false; | static bool waitingForLock = false; | ||||||
|  | static bool waitingForSignal = false; | ||||||
|  |  | ||||||
| static void ProcKill(void); | static void ProcKill(void); | ||||||
| static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum); | static void ProcGetNewSemIdAndNum(IpcSemaphoreId *semId, int *semNum); | ||||||
| @@ -894,6 +896,49 @@ ProcReleaseSpins(PROC *proc) | |||||||
| 	AbortBufferIO(); | 	AbortBufferIO(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * ProcWaitForSignal - wait for a signal from another backend. | ||||||
|  |  * | ||||||
|  |  * This can share the semaphore normally used for waiting for locks, | ||||||
|  |  * since a backend could never be waiting for a lock and a signal at | ||||||
|  |  * the same time.  As with locks, it's OK if the signal arrives just | ||||||
|  |  * before we actually reach the waiting state. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | ProcWaitForSignal(void) | ||||||
|  | { | ||||||
|  | 	waitingForSignal = true; | ||||||
|  | 	IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, true); | ||||||
|  | 	waitingForSignal = false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * ProcCancelWaitForSignal - clean up an aborted wait for signal | ||||||
|  |  * | ||||||
|  |  * We need this in case the signal arrived after we aborted waiting, | ||||||
|  |  * or if it arrived but we never reached ProcWaitForSignal() at all. | ||||||
|  |  * Caller should call this after resetting the signal request status. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | ProcCancelWaitForSignal(void) | ||||||
|  | { | ||||||
|  | 	ZeroProcSemaphore(MyProc); | ||||||
|  | 	waitingForSignal = false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * ProcSendSignal - send a signal to a backend identified by BackendId | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | ProcSendSignal(BackendId procId) | ||||||
|  | { | ||||||
|  | 	PROC   *proc = BackendIdGetProc(procId); | ||||||
|  |  | ||||||
|  | 	if (proc != NULL) | ||||||
|  | 		IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /***************************************************************************** | /***************************************************************************** | ||||||
|  * |  * | ||||||
|  *****************************************************************************/ |  *****************************************************************************/ | ||||||
|   | |||||||
| @@ -7,17 +7,19 @@ | |||||||
|  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group |  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group | ||||||
|  * Portions Copyright (c) 1994, Regents of the University of California |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  * |  * | ||||||
|  * $Id: buf_internals.h,v 1.48 2001/03/22 04:01:05 momjian Exp $ |  * $Id: buf_internals.h,v 1.49 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| #ifndef BUFMGR_INTERNALS_H | #ifndef BUFMGR_INTERNALS_H | ||||||
| #define BUFMGR_INTERNALS_H | #define BUFMGR_INTERNALS_H | ||||||
|  |  | ||||||
|  | #include "storage/backendid.h" | ||||||
| #include "storage/buf.h" | #include "storage/buf.h" | ||||||
| #include "storage/lmgr.h" | #include "storage/lmgr.h" | ||||||
| #include "storage/s_lock.h" | #include "storage/s_lock.h" | ||||||
|  |  | ||||||
|  |  | ||||||
| /* Buf Mgr constants */ | /* Buf Mgr constants */ | ||||||
| /* in bufmgr.c */ | /* in bufmgr.c */ | ||||||
| extern int	Data_Descriptors; | extern int	Data_Descriptors; | ||||||
| @@ -38,9 +40,19 @@ extern int	ShowPinTrace; | |||||||
| #define BM_IO_IN_PROGRESS		(1 << 5) | #define BM_IO_IN_PROGRESS		(1 << 5) | ||||||
| #define BM_IO_ERROR				(1 << 6) | #define BM_IO_ERROR				(1 << 6) | ||||||
| #define BM_JUST_DIRTIED			(1 << 7) | #define BM_JUST_DIRTIED			(1 << 7) | ||||||
|  | #define BM_PIN_COUNT_WAITER		(1 << 8) | ||||||
|  |  | ||||||
| typedef bits16 BufFlags; | typedef bits16 BufFlags; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Buffer tag identifies which disk block the buffer contains. | ||||||
|  |  * | ||||||
|  |  * Note: the BufferTag data must be sufficient to determine where to write the | ||||||
|  |  * block, even during a "blind write" with no relcache entry.  It's possible | ||||||
|  |  * that the backend flushing the buffer doesn't even believe the relation is | ||||||
|  |  * visible yet (its xact may have started before the xact that created the | ||||||
|  |  * rel).  The storage manager must be able to cope anyway. | ||||||
|  |  */ | ||||||
| typedef struct buftag | typedef struct buftag | ||||||
| { | { | ||||||
| 	RelFileNode rnode; | 	RelFileNode rnode; | ||||||
| @@ -60,28 +72,9 @@ typedef struct buftag | |||||||
| 	(a)->rnode = (xx_reln)->rd_node \ | 	(a)->rnode = (xx_reln)->rd_node \ | ||||||
| ) | ) | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * We don't need this data any more but it allows more user |  | ||||||
|  * friendly error messages. Feel free to get rid of it |  | ||||||
|  * (and change a lot of places -:)) |  | ||||||
|  */ |  | ||||||
| typedef struct bufblindid |  | ||||||
| { |  | ||||||
| 	char		dbname[NAMEDATALEN];	/* name of db in which buf belongs */ |  | ||||||
| 	char		relname[NAMEDATALEN];	/* name of reln */ |  | ||||||
| } BufferBlindId; |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  *	BufferDesc -- shared buffer cache metadata for a single |  *	BufferDesc -- shared buffer cache metadata for a single | ||||||
|  *				  shared buffer descriptor. |  *				  shared buffer descriptor. | ||||||
|  * |  | ||||||
|  *		We keep the name of the database and relation in which this |  | ||||||
|  *		buffer appears in order to avoid a catalog lookup on cache |  | ||||||
|  *		flush if we don't have the reldesc in the cache.  It is also |  | ||||||
|  *		possible that the relation to which this buffer belongs is |  | ||||||
|  *		not visible to all backends at the time that it gets flushed. |  | ||||||
|  *		Dbname, relname, dbid, and relid are enough to determine where |  | ||||||
|  *		to put the buffer, for all storage managers. |  | ||||||
|  */ |  */ | ||||||
| typedef struct sbufdesc | typedef struct sbufdesc | ||||||
| { | { | ||||||
| @@ -89,14 +82,14 @@ typedef struct sbufdesc | |||||||
| 	Buffer		freePrev; | 	Buffer		freePrev; | ||||||
| 	SHMEM_OFFSET data;			/* pointer to data in buf pool */ | 	SHMEM_OFFSET data;			/* pointer to data in buf pool */ | ||||||
|  |  | ||||||
| 	/* tag and id must be together for table lookup to work */ | 	/* tag and id must be together for table lookup (still true?) */ | ||||||
| 	BufferTag	tag;			/* file/block identifier */ | 	BufferTag	tag;			/* file/block identifier */ | ||||||
| 	int			buf_id;			/* maps global desc to local desc */ | 	int			buf_id;			/* buffer's index number (from 0) */ | ||||||
|  |  | ||||||
| 	BufFlags	flags;			/* see bit definitions above */ | 	BufFlags	flags;			/* see bit definitions above */ | ||||||
| 	unsigned	refcount;		/* # of times buffer is pinned */ | 	unsigned	refcount;		/* # of backends holding pins on buffer */ | ||||||
|  |  | ||||||
| 	slock_t		io_in_progress_lock;	/* to block for I/O to complete */ | 	slock_t		io_in_progress_lock;	/* to wait for I/O to complete */ | ||||||
| 	slock_t		cntx_lock;		/* to lock access to page context */ | 	slock_t		cntx_lock;		/* to lock access to page context */ | ||||||
|  |  | ||||||
| 	unsigned	r_locks;		/* # of shared locks */ | 	unsigned	r_locks;		/* # of shared locks */ | ||||||
| @@ -105,15 +98,14 @@ typedef struct sbufdesc | |||||||
|  |  | ||||||
| 	bool		cntxDirty;		/* new way to mark block as dirty */ | 	bool		cntxDirty;		/* new way to mark block as dirty */ | ||||||
|  |  | ||||||
| 	BufferBlindId blind;		/* was used to support blind write */ |  | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * When we can't delete item from page (someone else has buffer | 	 * We can't physically remove items from a disk page if another backend | ||||||
| 	 * pinned) we mark buffer for cleanup by specifying appropriate for | 	 * has the buffer pinned.  Hence, a backend may need to wait for all | ||||||
| 	 * buffer content cleanup function. Buffer will be cleaned up from | 	 * other pins to go away.  This is signaled by setting its own backend ID | ||||||
| 	 * release buffer functions. | 	 * into wait_backend_id and setting flag bit BM_PIN_COUNT_WAITER. | ||||||
|  | 	 * At present, there can be only one such waiter per buffer. | ||||||
| 	 */ | 	 */ | ||||||
| 	void		(*CleanupFunc) (Buffer); | 	BackendId	wait_backend_id; /* backend ID of pin-count waiter */ | ||||||
| } BufferDesc; | } BufferDesc; | ||||||
|  |  | ||||||
| #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) | #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) | ||||||
| @@ -128,21 +120,23 @@ typedef struct sbufdesc | |||||||
| #define BL_R_LOCK			(1 << 1) | #define BL_R_LOCK			(1 << 1) | ||||||
| #define BL_RI_LOCK			(1 << 2) | #define BL_RI_LOCK			(1 << 2) | ||||||
| #define BL_W_LOCK			(1 << 3) | #define BL_W_LOCK			(1 << 3) | ||||||
|  | #define BL_PIN_COUNT_LOCK	(1 << 4) | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  *	mao tracing buffer allocation |  *	mao tracing buffer allocation | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| /*#define BMTRACE*/ | /*#define BMTRACE*/ | ||||||
|  |  | ||||||
| #ifdef BMTRACE | #ifdef BMTRACE | ||||||
|  |  | ||||||
| typedef struct _bmtrace | typedef struct _bmtrace | ||||||
| { | { | ||||||
| 	int			bmt_pid; | 	int			bmt_pid; | ||||||
| 	long		bmt_buf; | 	int			bmt_buf; | ||||||
| 	long		bmt_dbid; | 	Oid			bmt_dbid; | ||||||
| 	long		bmt_relid; | 	Oid			bmt_relid; | ||||||
| 	int			bmt_blkno; | 	BlockNumber	bmt_blkno; | ||||||
| 	int			bmt_op; | 	int			bmt_op; | ||||||
|  |  | ||||||
| #define BMT_NOTUSED		0 | #define BMT_NOTUSED		0 | ||||||
| @@ -162,9 +156,7 @@ typedef struct _bmtrace | |||||||
| /* Internal routines: only called by buf.c */ | /* Internal routines: only called by buf.c */ | ||||||
|  |  | ||||||
| /*freelist.c*/ | /*freelist.c*/ | ||||||
| extern void AddBufferToFreelist(BufferDesc *bf); |  | ||||||
| extern void PinBuffer(BufferDesc *buf); | extern void PinBuffer(BufferDesc *buf); | ||||||
| extern void PinBuffer_Debug(char *file, int line, BufferDesc *buf); |  | ||||||
| extern void UnpinBuffer(BufferDesc *buf); | extern void UnpinBuffer(BufferDesc *buf); | ||||||
| extern BufferDesc *GetFreeBuffer(void); | extern BufferDesc *GetFreeBuffer(void); | ||||||
| extern void InitFreeList(bool init); | extern void InitFreeList(bool init); | ||||||
| @@ -179,7 +171,6 @@ extern bool BufTableInsert(BufferDesc *buf); | |||||||
| extern BufferDesc *BufferDescriptors; | extern BufferDesc *BufferDescriptors; | ||||||
| extern bits8 *BufferLocks; | extern bits8 *BufferLocks; | ||||||
| extern BufferTag *BufferTagLastDirtied; | extern BufferTag *BufferTagLastDirtied; | ||||||
| extern BufferBlindId *BufferBlindLastDirtied; |  | ||||||
| extern LockRelId *BufferRelidLastDirtied; | extern LockRelId *BufferRelidLastDirtied; | ||||||
| extern bool *BufferDirtiedByMe; | extern bool *BufferDirtiedByMe; | ||||||
| extern SPINLOCK BufMgrLock; | extern SPINLOCK BufMgrLock; | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ | |||||||
|  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group |  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group | ||||||
|  * Portions Copyright (c) 1994, Regents of the University of California |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  * |  * | ||||||
|  * $Id: bufmgr.h,v 1.53 2001/06/29 21:08:25 tgl Exp $ |  * $Id: bufmgr.h,v 1.54 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -167,7 +167,7 @@ extern void InitBufferPoolAccess(void); | |||||||
| extern void PrintBufferUsage(FILE *statfp); | extern void PrintBufferUsage(FILE *statfp); | ||||||
| extern void ResetBufferUsage(void); | extern void ResetBufferUsage(void); | ||||||
| extern void ResetBufferPool(bool isCommit); | extern void ResetBufferPool(bool isCommit); | ||||||
| extern int	BufferPoolCheckLeak(void); | extern bool BufferPoolCheckLeak(void); | ||||||
| extern void FlushBufferPool(void); | extern void FlushBufferPool(void); | ||||||
| extern BlockNumber BufferGetBlockNumber(Buffer buffer); | extern BlockNumber BufferGetBlockNumber(Buffer buffer); | ||||||
| extern BlockNumber RelationGetNumberOfBlocks(Relation relation); | extern BlockNumber RelationGetNumberOfBlocks(Relation relation); | ||||||
| @@ -183,10 +183,9 @@ extern void SetBufferCommitInfoNeedsSave(Buffer buffer); | |||||||
|  |  | ||||||
| extern void UnlockBuffers(void); | extern void UnlockBuffers(void); | ||||||
| extern void LockBuffer(Buffer buffer, int mode); | extern void LockBuffer(Buffer buffer, int mode); | ||||||
| extern void AbortBufferIO(void); | extern void LockBufferForCleanup(Buffer buffer); | ||||||
|  |  | ||||||
| extern bool BufferIsUpdatable(Buffer buffer); | extern void AbortBufferIO(void); | ||||||
| extern void MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc) (Buffer)); |  | ||||||
|  |  | ||||||
| extern void BufmgrCommit(void); | extern void BufmgrCommit(void); | ||||||
| extern void BufferSync(void); | extern void BufferSync(void); | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ | |||||||
|  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group |  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group | ||||||
|  * Portions Copyright (c) 1994, Regents of the University of California |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  * |  * | ||||||
|  * $Id: proc.h,v 1.44 2001/06/16 22:58:17 tgl Exp $ |  * $Id: proc.h,v 1.45 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -15,6 +15,7 @@ | |||||||
| #define _PROC_H_ | #define _PROC_H_ | ||||||
|  |  | ||||||
| #include "access/xlog.h" | #include "access/xlog.h" | ||||||
|  | #include "storage/backendid.h" | ||||||
| #include "storage/lock.h" | #include "storage/lock.h" | ||||||
|  |  | ||||||
| /* configurable option */ | /* configurable option */ | ||||||
| @@ -139,4 +140,8 @@ extern void ProcReleaseSpins(PROC *proc); | |||||||
| extern bool LockWaitCancel(void); | extern bool LockWaitCancel(void); | ||||||
| extern void HandleDeadLock(SIGNAL_ARGS); | extern void HandleDeadLock(SIGNAL_ARGS); | ||||||
|  |  | ||||||
|  | extern void ProcWaitForSignal(void); | ||||||
|  | extern void ProcCancelWaitForSignal(void); | ||||||
|  | extern void ProcSendSignal(BackendId procId); | ||||||
|  |  | ||||||
| #endif	 /* PROC_H */ | #endif	 /* PROC_H */ | ||||||
|   | |||||||
| @@ -7,13 +7,14 @@ | |||||||
|  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group |  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group | ||||||
|  * Portions Copyright (c) 1994, Regents of the University of California |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  * |  * | ||||||
|  * $Id: sinval.h,v 1.19 2001/06/19 19:42:16 tgl Exp $ |  * $Id: sinval.h,v 1.20 2001/07/06 21:04:26 tgl Exp $ | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| #ifndef SINVAL_H | #ifndef SINVAL_H | ||||||
| #define SINVAL_H | #define SINVAL_H | ||||||
|  |  | ||||||
|  | #include "storage/backendid.h" | ||||||
| #include "storage/itemptr.h" | #include "storage/itemptr.h" | ||||||
| #include "storage/spin.h" | #include "storage/spin.h" | ||||||
|  |  | ||||||
| @@ -77,5 +78,7 @@ extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself); | |||||||
| extern bool TransactionIdIsInProgress(TransactionId xid); | extern bool TransactionIdIsInProgress(TransactionId xid); | ||||||
| extern void GetXmaxRecent(TransactionId *XmaxRecent); | extern void GetXmaxRecent(TransactionId *XmaxRecent); | ||||||
| extern int	CountActiveBackends(void); | extern int	CountActiveBackends(void); | ||||||
|  | /* Use "struct proc", not PROC, to avoid including proc.h here */ | ||||||
|  | extern struct proc *BackendIdGetProc(BackendId procId); | ||||||
|  |  | ||||||
| #endif	 /* SINVAL_H */ | #endif	 /* SINVAL_H */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user