mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Fix BRIN summarization concurrent with extension
If a process is extending a table concurrently with some BRIN summarization process, it is possible for the latter to miss pages added by the former because the number of pages is computed ahead of time. Fix by determining a fresh relation size after inserting the placeholder tuple: any process that further extends the table concurrently will update the placeholder tuple, while previous pages will be processed by the heap scan. Reported-by: Tomas Vondra Reviewed-by: Tom Lane Author: Álvaro Herrera Discussion: https://postgr.es/m/083d996a-4a8a-0e13-800a-851dd09ad8cc@2ndquadrant.com Backpatch-to: 9.5
This commit is contained in:
		| @@ -957,7 +957,8 @@ terminate_brin_buildstate(BrinBuildState *state) | |||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Summarize the given page range of the given index. |  * On the given BRIN index, summarize the heap page range that corresponds | ||||||
|  |  * to the heap block number given. | ||||||
|  * |  * | ||||||
|  * This routine can run in parallel with insertions into the heap.  To avoid |  * This routine can run in parallel with insertions into the heap.  To avoid | ||||||
|  * missing those values from the summary tuple, we first insert a placeholder |  * missing those values from the summary tuple, we first insert a placeholder | ||||||
| @@ -967,6 +968,12 @@ terminate_brin_buildstate(BrinBuildState *state) | |||||||
|  * update of the index value happens in a loop, so that if somebody updates |  * update of the index value happens in a loop, so that if somebody updates | ||||||
|  * the placeholder tuple after we read it, we detect the case and try again. |  * the placeholder tuple after we read it, we detect the case and try again. | ||||||
|  * This ensures that the concurrently inserted tuples are not lost. |  * This ensures that the concurrently inserted tuples are not lost. | ||||||
|  |  * | ||||||
|  |  * A further corner case is this routine being asked to summarize the partial | ||||||
|  |  * range at the end of the table.  heapNumBlocks is the (possibly outdated) | ||||||
|  |  * table size; if we notice that the requested range lies beyond that size, | ||||||
|  |  * we re-compute the table size after inserting the placeholder tuple, to | ||||||
|  |  * avoid missing pages that were appended recently. | ||||||
|  */ |  */ | ||||||
| static void | static void | ||||||
| summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, | summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, | ||||||
| @@ -987,6 +994,33 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, | |||||||
| 						   state->bs_rmAccess, &phbuf, | 						   state->bs_rmAccess, &phbuf, | ||||||
| 						   heapBlk, phtup, phsz); | 						   heapBlk, phtup, phsz); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Compute range end.  We hold ShareUpdateExclusive lock on table, so it | ||||||
|  | 	 * cannot shrink concurrently (but it can grow). | ||||||
|  | 	 */ | ||||||
|  | 	Assert(heapBlk % state->bs_pagesPerRange == 0); | ||||||
|  | 	if (heapBlk + state->bs_pagesPerRange > heapNumBlks) | ||||||
|  | 	{ | ||||||
|  | 		/* | ||||||
|  | 		 * If we're asked to scan what we believe to be the final range on the | ||||||
|  | 		 * table (i.e. a range that might be partial) we need to recompute our | ||||||
|  | 		 * idea of what the latest page is after inserting the placeholder | ||||||
|  | 		 * tuple.  Anyone that grows the table later will update the | ||||||
|  | 		 * placeholder tuple, so it doesn't matter that we won't scan these | ||||||
|  | 		 * pages ourselves.  Careful: the table might have been extended | ||||||
|  | 		 * beyond the current range, so clamp our result. | ||||||
|  | 		 * | ||||||
|  | 		 * Fortunately, this should occur infrequently. | ||||||
|  | 		 */ | ||||||
|  | 		scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk, | ||||||
|  | 						  state->bs_pagesPerRange); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		/* Easy case: range is known to be complete */ | ||||||
|  | 		scanNumBlks = state->bs_pagesPerRange; | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Execute the partial heap scan covering the heap blocks in the specified | 	 * Execute the partial heap scan covering the heap blocks in the specified | ||||||
| 	 * page range, summarizing the heap tuples in it.  This scan stops just | 	 * page range, summarizing the heap tuples in it.  This scan stops just | ||||||
| @@ -997,8 +1031,6 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, | |||||||
| 	 * by transactions that are still in progress, among other corner cases. | 	 * by transactions that are still in progress, among other corner cases. | ||||||
| 	 */ | 	 */ | ||||||
| 	state->bs_currRangeStart = heapBlk; | 	state->bs_currRangeStart = heapBlk; | ||||||
| 	scanNumBlks = heapBlk + state->bs_pagesPerRange <= heapNumBlks ? |  | ||||||
| 		state->bs_pagesPerRange : heapNumBlks - heapBlk; |  | ||||||
| 	IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true, | 	IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true, | ||||||
| 							heapBlk, scanNumBlks, | 							heapBlk, scanNumBlks, | ||||||
| 							brinbuildCallback, (void *) state); | 							brinbuildCallback, (void *) state); | ||||||
| @@ -1074,25 +1106,34 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized, | |||||||
| 	BrinBuildState *state = NULL; | 	BrinBuildState *state = NULL; | ||||||
| 	IndexInfo  *indexInfo = NULL; | 	IndexInfo  *indexInfo = NULL; | ||||||
| 	BlockNumber heapNumBlocks; | 	BlockNumber heapNumBlocks; | ||||||
| 	BlockNumber heapBlk; |  | ||||||
| 	BlockNumber pagesPerRange; | 	BlockNumber pagesPerRange; | ||||||
| 	Buffer		buf; | 	Buffer		buf; | ||||||
|  | 	BlockNumber startBlk; | ||||||
|  |  | ||||||
| 	revmap = brinRevmapInitialize(index, &pagesPerRange); | 	revmap = brinRevmapInitialize(index, &pagesPerRange); | ||||||
|  |  | ||||||
|  | 	/* determine range of pages to process: always start from the beginning */ | ||||||
|  | 	heapNumBlocks = RelationGetNumberOfBlocks(heapRel); | ||||||
|  | 	startBlk = 0; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Scan the revmap to find unsummarized items. | 	 * Scan the revmap to find unsummarized items. | ||||||
| 	 */ | 	 */ | ||||||
| 	buf = InvalidBuffer; | 	buf = InvalidBuffer; | ||||||
| 	heapNumBlocks = RelationGetNumberOfBlocks(heapRel); | 	for (; startBlk < heapNumBlocks; startBlk += pagesPerRange) | ||||||
| 	for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange) |  | ||||||
| 	{ | 	{ | ||||||
| 		BrinTuple  *tup; | 		BrinTuple  *tup; | ||||||
| 		OffsetNumber off; | 		OffsetNumber off; | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * Go away now if we think the next range is partial. | ||||||
|  | 		 */ | ||||||
|  | 		if (startBlk + pagesPerRange > heapNumBlocks) | ||||||
|  | 			break; | ||||||
|  |  | ||||||
| 		CHECK_FOR_INTERRUPTS(); | 		CHECK_FOR_INTERRUPTS(); | ||||||
|  |  | ||||||
| 		tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, | 		tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL, | ||||||
| 									   BUFFER_LOCK_SHARE); | 									   BUFFER_LOCK_SHARE); | ||||||
| 		if (tup == NULL) | 		if (tup == NULL) | ||||||
| 		{ | 		{ | ||||||
| @@ -1105,7 +1146,7 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized, | |||||||
| 												   pagesPerRange); | 												   pagesPerRange); | ||||||
| 				indexInfo = BuildIndexInfo(index); | 				indexInfo = BuildIndexInfo(index); | ||||||
| 			} | 			} | ||||||
| 			summarize_range(indexInfo, state, heapRel, heapBlk, heapNumBlocks); | 			summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks); | ||||||
|  |  | ||||||
| 			/* and re-initialize state for the next range */ | 			/* and re-initialize state for the next range */ | ||||||
| 			brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); | 			brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user