mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-28 11:55:03 +03:00 
			
		
		
		
	Further optimize GIN multi-key searches.
When skipping over some items in a posting tree, re-find the new location by descending the tree from root, rather than walking the right links. This can save a lot of I/O. Heavily modified from Alexander Korotkov's fast scan patch.
This commit is contained in:
		| @@ -1639,16 +1639,15 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno, | |||||||
|  * Starts a new scan on a posting tree. |  * Starts a new scan on a posting tree. | ||||||
|  */ |  */ | ||||||
| GinBtreeStack * | GinBtreeStack * | ||||||
| ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno) | ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno) | ||||||
| { | { | ||||||
| 	GinBtreeData btree; |  | ||||||
| 	GinBtreeStack *stack; | 	GinBtreeStack *stack; | ||||||
|  |  | ||||||
| 	ginPrepareDataScan(&btree, index, rootBlkno); | 	ginPrepareDataScan(btree, index, rootBlkno); | ||||||
|  |  | ||||||
| 	btree.fullScan = TRUE; | 	btree->fullScan = TRUE; | ||||||
|  |  | ||||||
| 	stack = ginFindLeafPage(&btree, TRUE); | 	stack = ginFindLeafPage(btree, TRUE); | ||||||
|  |  | ||||||
| 	return stack; | 	return stack; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -99,12 +99,13 @@ static void | |||||||
| scanPostingTree(Relation index, GinScanEntry scanEntry, | scanPostingTree(Relation index, GinScanEntry scanEntry, | ||||||
| 				BlockNumber rootPostingTree) | 				BlockNumber rootPostingTree) | ||||||
| { | { | ||||||
|  | 	GinBtreeData btree; | ||||||
| 	GinBtreeStack *stack; | 	GinBtreeStack *stack; | ||||||
| 	Buffer		buffer; | 	Buffer		buffer; | ||||||
| 	Page		page; | 	Page		page; | ||||||
|  |  | ||||||
| 	/* Descend to the leftmost leaf page */ | 	/* Descend to the leftmost leaf page */ | ||||||
| 	stack = ginScanBeginPostingTree(index, rootPostingTree); | 	stack = ginScanBeginPostingTree(&btree, index, rootPostingTree); | ||||||
| 	buffer = stack->buffer; | 	buffer = stack->buffer; | ||||||
| 	IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */ | 	IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */ | ||||||
|  |  | ||||||
| @@ -412,7 +413,8 @@ restartScanEntry: | |||||||
| 			LockBuffer(stackEntry->buffer, GIN_UNLOCK); | 			LockBuffer(stackEntry->buffer, GIN_UNLOCK); | ||||||
| 			needUnlock = FALSE; | 			needUnlock = FALSE; | ||||||
|  |  | ||||||
| 			stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree); | 			stack = ginScanBeginPostingTree(&entry->btree, ginstate->index, | ||||||
|  | 											rootPostingTree); | ||||||
| 			entry->buffer = stack->buffer; | 			entry->buffer = stack->buffer; | ||||||
|  |  | ||||||
| 			/* | 			/* | ||||||
| @@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan | |||||||
| { | { | ||||||
| 	Page		page; | 	Page		page; | ||||||
| 	int			i; | 	int			i; | ||||||
|  | 	bool		stepright; | ||||||
|  |  | ||||||
|  | 	if (!BufferIsValid(entry->buffer)) | ||||||
|  | 	{ | ||||||
|  | 		entry->isFinished = true; | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * We have two strategies for finding the correct page: step right from | ||||||
|  | 	 * the current page, or descend the tree again from the root. If | ||||||
|  | 	 * advancePast equals the current item, the next matching item should be | ||||||
|  | 	 * on the next page, so we step right. Otherwise, descend from root. | ||||||
|  | 	 */ | ||||||
|  | 	if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0) | ||||||
|  | 	{ | ||||||
|  | 		stepright = true; | ||||||
| 		LockBuffer(entry->buffer, GIN_SHARE); | 		LockBuffer(entry->buffer, GIN_SHARE); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		GinBtreeStack *stack; | ||||||
|  |  | ||||||
|  | 		ReleaseBuffer(entry->buffer); | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * Set the search key, and find the correct leaf page. | ||||||
|  | 		 */ | ||||||
|  | 		if (ItemPointerIsLossyPage(&advancePast)) | ||||||
|  | 		{ | ||||||
|  | 			ItemPointerSet(&entry->btree.itemptr, | ||||||
|  | 						   GinItemPointerGetBlockNumber(&advancePast) + 1, | ||||||
|  | 						   FirstOffsetNumber); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			entry->btree.itemptr = advancePast; | ||||||
|  | 			entry->btree.itemptr.ip_posid++; | ||||||
|  | 		} | ||||||
|  | 		entry->btree.fullScan = false; | ||||||
|  | 		stack = ginFindLeafPage(&entry->btree, true); | ||||||
|  |  | ||||||
|  | 		/* we don't need the stack, just the buffer. */ | ||||||
|  | 		entry->buffer = stack->buffer; | ||||||
|  | 		IncrBufferRefCount(entry->buffer); | ||||||
|  | 		freeGinBtreeStack(stack); | ||||||
|  | 		stepright = false; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d", | ||||||
|  | 		 GinItemPointerGetBlockNumber(&advancePast), | ||||||
|  | 		 GinItemPointerGetOffsetNumber(&advancePast), | ||||||
|  | 		 !stepright); | ||||||
|  |  | ||||||
| 	page = BufferGetPage(entry->buffer); | 	page = BufferGetPage(entry->buffer); | ||||||
| 	for (;;) | 	for (;;) | ||||||
| 	{ | 	{ | ||||||
| @@ -519,6 +573,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan | |||||||
| 			entry->nlist = 0; | 			entry->nlist = 0; | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		if (stepright) | ||||||
|  | 		{ | ||||||
| 			/* | 			/* | ||||||
| 			 * We've processed all the entries on this page. If it was the last | 			 * We've processed all the entries on this page. If it was the last | ||||||
| 			 * page in the tree, we're done. | 			 * page in the tree, we're done. | ||||||
| @@ -531,9 +587,6 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan | |||||||
| 				return; | 				return; | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 		if (GinPageGetOpaque(page)->flags & GIN_DELETED) |  | ||||||
| 			continue;		/* page was deleted by concurrent vacuum */ |  | ||||||
|  |  | ||||||
| 			/* | 			/* | ||||||
| 			 * Step to next page, following the right link. then find the first | 			 * Step to next page, following the right link. then find the first | ||||||
| 			 * ItemPointer greater than advancePast. | 			 * ItemPointer greater than advancePast. | ||||||
| @@ -542,6 +595,11 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan | |||||||
| 										 ginstate->index, | 										 ginstate->index, | ||||||
| 										 GIN_SHARE); | 										 GIN_SHARE); | ||||||
| 			page = BufferGetPage(entry->buffer); | 			page = BufferGetPage(entry->buffer); | ||||||
|  | 		} | ||||||
|  | 		stepright = true; | ||||||
|  |  | ||||||
|  | 		if (GinPageGetOpaque(page)->flags & GIN_DELETED) | ||||||
|  | 			continue;		/* page was deleted by concurrent vacuum */ | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * The first item > advancePast might not be on this page, but | 		 * The first item > advancePast might not be on this page, but | ||||||
| @@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan | |||||||
| 		{ | 		{ | ||||||
| 			if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0) | 			if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0) | ||||||
| 			{ | 			{ | ||||||
| 				LockBuffer(entry->buffer, GIN_UNLOCK); |  | ||||||
| 				entry->offset = i; | 				entry->offset = i; | ||||||
|  |  | ||||||
|  | 				if (GinPageRightMost(page)) | ||||||
|  | 				{ | ||||||
|  | 					/* after processing the copied items, we're done. */ | ||||||
|  | 					UnlockReleaseBuffer(entry->buffer); | ||||||
|  | 					entry->buffer = InvalidBuffer; | ||||||
|  | 				} | ||||||
|  | 				else | ||||||
|  | 					LockBuffer(entry->buffer, GIN_UNLOCK); | ||||||
| 				return; | 				return; | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| @@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, | |||||||
| 	} | 	} | ||||||
| 	else if (!BufferIsValid(entry->buffer)) | 	else if (!BufferIsValid(entry->buffer)) | ||||||
| 	{ | 	{ | ||||||
| 		/* A posting list from an entry tuple  */ | 		/* | ||||||
|  | 		 * A posting list from an entry tuple, or the last page of a posting | ||||||
|  | 		 * tree. | ||||||
|  | 		 */ | ||||||
| 		do | 		do | ||||||
| 		{ | 		{ | ||||||
| 			if (entry->offset >= entry->nlist) | 			if (entry->offset >= entry->nlist) | ||||||
|   | |||||||
| @@ -702,7 +702,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset); | |||||||
| extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, | extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, | ||||||
| 					  ItemPointerData *items, uint32 nitem, | 					  ItemPointerData *items, uint32 nitem, | ||||||
| 					  GinStatsData *buildStats); | 					  GinStatsData *buildStats); | ||||||
| extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno); | extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno); | ||||||
| extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage); | extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage); | ||||||
| extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno); | extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno); | ||||||
|  |  | ||||||
| @@ -802,6 +802,7 @@ typedef struct GinScanEntryData | |||||||
| 	bool		isFinished; | 	bool		isFinished; | ||||||
| 	bool		reduceResult; | 	bool		reduceResult; | ||||||
| 	uint32		predictNumberResult; | 	uint32		predictNumberResult; | ||||||
|  | 	GinBtreeData btree; | ||||||
| }	GinScanEntryData; | }	GinScanEntryData; | ||||||
|  |  | ||||||
| typedef struct GinScanOpaqueData | typedef struct GinScanOpaqueData | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user