mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Bloom index contrib module
Module provides new access method. It is actually a simple Bloom filter implemented as pgsql's index. It could give some benefits on search with large number of columns. Module is a single way to test generic WAL interface committed earlier. Author: Teodor Sigaev, Alexander Korotkov Reviewers: Aleksander Alekseev, Michael Paquier, Jim Nasby
This commit is contained in:
		| @@ -8,6 +8,7 @@ SUBDIRS = \ | |||||||
| 		adminpack	\ | 		adminpack	\ | ||||||
| 		auth_delay	\ | 		auth_delay	\ | ||||||
| 		auto_explain	\ | 		auto_explain	\ | ||||||
|  | 		bloom		\ | ||||||
| 		btree_gin	\ | 		btree_gin	\ | ||||||
| 		btree_gist	\ | 		btree_gist	\ | ||||||
| 		chkpass		\ | 		chkpass		\ | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								contrib/bloom/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								contrib/bloom/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | |||||||
|  | # Generated subdirectories | ||||||
|  | /log/ | ||||||
|  | /results/ | ||||||
|  | /tmp_check/ | ||||||
							
								
								
									
										24
									
								
								contrib/bloom/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								contrib/bloom/Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | |||||||
|  | # contrib/bloom/Makefile | ||||||
|  |  | ||||||
|  | MODULE_big = bloom | ||||||
|  | OBJS = blcost.o blinsert.o blscan.o blutils.o blvacuum.o blvalidate.o $(WIN32RES) | ||||||
|  |  | ||||||
|  | EXTENSION = bloom | ||||||
|  | DATA = bloom--1.0.sql | ||||||
|  | PGFILEDESC = "bloom access method - signature file based index" | ||||||
|  |  | ||||||
|  | REGRESS = bloom | ||||||
|  |  | ||||||
|  | ifdef USE_PGXS | ||||||
|  | PG_CONFIG = pg_config | ||||||
|  | PGXS := $(shell $(PG_CONFIG) --pgxs) | ||||||
|  | include $(PGXS) | ||||||
|  | else | ||||||
|  | subdir = contrib/bloom | ||||||
|  | top_builddir = ../.. | ||||||
|  | include $(top_builddir)/src/Makefile.global | ||||||
|  | include $(top_srcdir)/contrib/contrib-global.mk | ||||||
|  | endif | ||||||
|  |  | ||||||
|  | wal-check: temp-install | ||||||
|  | 	$(prove_check) | ||||||
							
								
								
									
										48
									
								
								contrib/bloom/blcost.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								contrib/bloom/blcost.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * blcost.c | ||||||
|  |  *		Cost estimate function for bloom indexes. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/blcost.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "postgres.h" | ||||||
|  |  | ||||||
|  | #include "fmgr.h" | ||||||
|  | #include "optimizer/cost.h" | ||||||
|  | #include "utils/selfuncs.h" | ||||||
|  |  | ||||||
|  | #include "bloom.h" | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Estimate cost of bloom index scan. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, | ||||||
|  | 			   Cost *indexStartupCost, Cost *indexTotalCost, | ||||||
|  | 			   Selectivity *indexSelectivity, double *indexCorrelation) | ||||||
|  | { | ||||||
|  | 	IndexOptInfo *index = path->indexinfo; | ||||||
|  | 	List	   *qinfos; | ||||||
|  | 	GenericCosts costs; | ||||||
|  |  | ||||||
|  | 	/* Do preliminary analysis of indexquals */ | ||||||
|  | 	qinfos = deconstruct_indexquals(path); | ||||||
|  |  | ||||||
|  | 	MemSet(&costs, 0, sizeof(costs)); | ||||||
|  |  | ||||||
|  | 	/* We have to visit all index tuples anyway */ | ||||||
|  | 	costs.numIndexTuples = index->tuples; | ||||||
|  |  | ||||||
|  | 	/* Use generic estimate */ | ||||||
|  | 	genericcostestimate(root, path, loop_count, qinfos, &costs); | ||||||
|  |  | ||||||
|  | 	*indexStartupCost = costs.indexStartupCost; | ||||||
|  | 	*indexTotalCost = costs.indexTotalCost; | ||||||
|  | 	*indexSelectivity = costs.indexSelectivity; | ||||||
|  | 	*indexCorrelation = costs.indexCorrelation; | ||||||
|  | } | ||||||
							
								
								
									
										313
									
								
								contrib/bloom/blinsert.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										313
									
								
								contrib/bloom/blinsert.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,313 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * blinsert.c | ||||||
|  |  *		Bloom index build and insert functions. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/blinsert.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "postgres.h" | ||||||
|  |  | ||||||
|  | #include "access/genam.h" | ||||||
|  | #include "access/generic_xlog.h" | ||||||
|  | #include "catalog/index.h" | ||||||
|  | #include "miscadmin.h" | ||||||
|  | #include "storage/bufmgr.h" | ||||||
|  | #include "storage/indexfsm.h" | ||||||
|  | #include "utils/memutils.h" | ||||||
|  | #include "utils/rel.h" | ||||||
|  |  | ||||||
|  | #include "bloom.h" | ||||||
|  |  | ||||||
|  | PG_MODULE_MAGIC; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * State of bloom index build.  We accumulate one page data here before | ||||||
|  |  * flushing it to buffer manager. | ||||||
|  |  */ | ||||||
|  | typedef struct | ||||||
|  | { | ||||||
|  | 	BloomState	blstate;		/* bloom index state */ | ||||||
|  | 	MemoryContext tmpCtx;		/* temporary memory context reset after | ||||||
|  | 								 * each tuple */ | ||||||
|  | 	char		data[BLCKSZ];	/* cached page */ | ||||||
|  | 	int64		count;			/* number of tuples in cached page */ | ||||||
|  | }	BloomBuildState; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Flush page cached in BloomBuildState. | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | flushCachedPage(Relation index, BloomBuildState *buildstate) | ||||||
|  | { | ||||||
|  | 	Page		page; | ||||||
|  | 	Buffer		buffer = BloomNewBuffer(index); | ||||||
|  | 	GenericXLogState *state; | ||||||
|  |  | ||||||
|  | 	state = GenericXLogStart(index); | ||||||
|  | 	page = GenericXLogRegister(state, buffer, true); | ||||||
|  | 	memcpy(page, buildstate->data, BLCKSZ); | ||||||
|  | 	GenericXLogFinish(state); | ||||||
|  | 	UnlockReleaseBuffer(buffer); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * (Re)initialize cached page in BloomBuildState. | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | initCachedPage(BloomBuildState *buildstate) | ||||||
|  | { | ||||||
|  | 	memset(buildstate->data, 0, BLCKSZ); | ||||||
|  | 	BloomInitPage(buildstate->data, 0); | ||||||
|  | 	buildstate->count = 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Per-tuple callback from IndexBuildHeapScan. | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | bloomBuildCallback(Relation index, HeapTuple htup, Datum *values, | ||||||
|  | 				   bool *isnull, bool tupleIsAlive, void *state) | ||||||
|  | { | ||||||
|  | 	BloomBuildState *buildstate = (BloomBuildState *) state; | ||||||
|  | 	MemoryContext oldCtx; | ||||||
|  | 	BloomTuple *itup; | ||||||
|  |  | ||||||
|  | 	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); | ||||||
|  |  | ||||||
|  | 	itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull); | ||||||
|  |  | ||||||
|  | 	/* Try to add next item to cached page */ | ||||||
|  | 	if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup)) | ||||||
|  | 	{ | ||||||
|  | 		/* Next item was added successfully */ | ||||||
|  | 		buildstate->count++; | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		/* Cached page is full, flush it out and make a new one */ | ||||||
|  | 		flushCachedPage(index, buildstate); | ||||||
|  |  | ||||||
|  | 		CHECK_FOR_INTERRUPTS(); | ||||||
|  |  | ||||||
|  | 		initCachedPage(buildstate); | ||||||
|  |  | ||||||
|  | 		if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup) == false) | ||||||
|  | 		{ | ||||||
|  | 			/* We shouldn't be here since we're inserting to the empty page */ | ||||||
|  | 			elog(ERROR, "can not add new tuple"); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	MemoryContextSwitchTo(oldCtx); | ||||||
|  | 	MemoryContextReset(buildstate->tmpCtx); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Build a new bloom index. | ||||||
|  |  */ | ||||||
|  | IndexBuildResult * | ||||||
|  | blbuild(Relation heap, Relation index, IndexInfo *indexInfo) | ||||||
|  | { | ||||||
|  | 	IndexBuildResult *result; | ||||||
|  | 	double		reltuples; | ||||||
|  | 	BloomBuildState buildstate; | ||||||
|  |  | ||||||
|  | 	if (RelationGetNumberOfBlocks(index) != 0) | ||||||
|  | 		elog(ERROR, "index \"%s\" already contains data", | ||||||
|  | 			 RelationGetRelationName(index)); | ||||||
|  |  | ||||||
|  | 	/* Initialize the meta page */ | ||||||
|  | 	BloomInitMetapage(index); | ||||||
|  |  | ||||||
|  | 	/* Initialize the bloom build state */ | ||||||
|  | 	memset(&buildstate, 0, sizeof(buildstate)); | ||||||
|  | 	initBloomState(&buildstate.blstate, index); | ||||||
|  | 	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, | ||||||
|  | 											  "Bloom build temporary context", | ||||||
|  | 											  ALLOCSET_DEFAULT_MINSIZE, | ||||||
|  | 											  ALLOCSET_DEFAULT_INITSIZE, | ||||||
|  | 											  ALLOCSET_DEFAULT_MAXSIZE); | ||||||
|  | 	initCachedPage(&buildstate); | ||||||
|  |  | ||||||
|  | 	/* Do the heap scan */ | ||||||
|  | 	reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, | ||||||
|  | 								   bloomBuildCallback, (void *) &buildstate); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * There are could be some items in cached page.  Flush this page | ||||||
|  | 	 * if needed. | ||||||
|  | 	 */ | ||||||
|  | 	if (buildstate.count > 0) | ||||||
|  | 		flushCachedPage(index, &buildstate); | ||||||
|  |  | ||||||
|  | 	MemoryContextDelete(buildstate.tmpCtx); | ||||||
|  |  | ||||||
|  | 	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); | ||||||
|  | 	result->heap_tuples = result->index_tuples = reltuples; | ||||||
|  |  | ||||||
|  | 	return result; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Build an empty bloom index in the initialization fork. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | blbuildempty(Relation index) | ||||||
|  | { | ||||||
|  | 	if (RelationGetNumberOfBlocks(index) != 0) | ||||||
|  | 		elog(ERROR, "index \"%s\" already contains data", | ||||||
|  | 			 RelationGetRelationName(index)); | ||||||
|  |  | ||||||
|  | 	/* Initialize the meta page */ | ||||||
|  | 	BloomInitMetapage(index); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Insert new tuple to the bloom index. | ||||||
|  |  */ | ||||||
|  | bool | ||||||
|  | blinsert(Relation index, Datum *values, bool *isnull, | ||||||
|  | 		 ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique) | ||||||
|  | { | ||||||
|  | 	BloomState	blstate; | ||||||
|  | 	BloomTuple *itup; | ||||||
|  | 	MemoryContext oldCtx; | ||||||
|  | 	MemoryContext insertCtx; | ||||||
|  | 	BloomMetaPageData *metaData; | ||||||
|  | 	Buffer		buffer, | ||||||
|  | 				metaBuffer; | ||||||
|  | 	Page		page, | ||||||
|  | 				metaPage; | ||||||
|  | 	BlockNumber blkno = InvalidBlockNumber; | ||||||
|  | 	OffsetNumber nStart; | ||||||
|  | 	GenericXLogState *state; | ||||||
|  |  | ||||||
|  | 	insertCtx = AllocSetContextCreate(CurrentMemoryContext, | ||||||
|  | 									  "Bloom insert temporary context", | ||||||
|  | 									  ALLOCSET_DEFAULT_MINSIZE, | ||||||
|  | 									  ALLOCSET_DEFAULT_INITSIZE, | ||||||
|  | 									  ALLOCSET_DEFAULT_MAXSIZE); | ||||||
|  |  | ||||||
|  | 	oldCtx = MemoryContextSwitchTo(insertCtx); | ||||||
|  |  | ||||||
|  | 	initBloomState(&blstate, index); | ||||||
|  | 	itup = BloomFormTuple(&blstate, ht_ctid, values, isnull); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * At first, try to insert new tuple to the first page in notFullPage | ||||||
|  | 	 * array.  If success we don't need to modify the meta page. | ||||||
|  | 	 */ | ||||||
|  | 	metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); | ||||||
|  | 	LockBuffer(metaBuffer, BUFFER_LOCK_SHARE); | ||||||
|  | 	metaData = BloomPageGetMeta(BufferGetPage(metaBuffer)); | ||||||
|  |  | ||||||
|  | 	if (metaData->nEnd > metaData->nStart) | ||||||
|  | 	{ | ||||||
|  | 		Page		page; | ||||||
|  |  | ||||||
|  | 		blkno = metaData->notFullPage[metaData->nStart]; | ||||||
|  |  | ||||||
|  | 		Assert(blkno != InvalidBlockNumber); | ||||||
|  | 		LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK); | ||||||
|  |  | ||||||
|  | 		buffer = ReadBuffer(index, blkno); | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  | 		state = GenericXLogStart(index); | ||||||
|  | 		page = GenericXLogRegister(state, buffer, false); | ||||||
|  |  | ||||||
|  | 		if (BloomPageAddItem(&blstate, page, itup)) | ||||||
|  | 		{ | ||||||
|  | 			GenericXLogFinish(state); | ||||||
|  | 			UnlockReleaseBuffer(buffer); | ||||||
|  | 			ReleaseBuffer(metaBuffer); | ||||||
|  | 			MemoryContextSwitchTo(oldCtx); | ||||||
|  | 			MemoryContextDelete(insertCtx); | ||||||
|  | 			return false; | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			GenericXLogAbort(state); | ||||||
|  | 			UnlockReleaseBuffer(buffer); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		/* First page in notFullPage isn't suitable */ | ||||||
|  | 		LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Try other pages in notFullPage array.  We will have to change nStart in | ||||||
|  | 	 * metapage.  Thus, grab exclusive lock on metapage. | ||||||
|  | 	 */ | ||||||
|  | 	LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  |  | ||||||
|  | 	state = GenericXLogStart(index); | ||||||
|  | 	metaPage = GenericXLogRegister(state, metaBuffer, false); | ||||||
|  | 	metaData = BloomPageGetMeta(metaPage); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Iterate over notFullPage array.  Skip page we already tried first. | ||||||
|  | 	 */ | ||||||
|  | 	nStart = metaData->nStart; | ||||||
|  | 	if (metaData->nEnd > nStart && | ||||||
|  | 		blkno == metaData->notFullPage[nStart]) | ||||||
|  | 		nStart++; | ||||||
|  |  | ||||||
|  | 	while (metaData->nEnd > nStart) | ||||||
|  | 	{ | ||||||
|  | 		blkno = metaData->notFullPage[nStart]; | ||||||
|  | 		Assert(blkno != InvalidBlockNumber); | ||||||
|  |  | ||||||
|  | 		buffer = ReadBuffer(index, blkno); | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  | 		page = GenericXLogRegister(state, buffer, false); | ||||||
|  |  | ||||||
|  | 		if (BloomPageAddItem(&blstate, page, itup)) | ||||||
|  | 		{ | ||||||
|  | 			metaData->nStart = nStart; | ||||||
|  | 			GenericXLogFinish(state); | ||||||
|  | 			UnlockReleaseBuffer(buffer); | ||||||
|  | 			UnlockReleaseBuffer(metaBuffer); | ||||||
|  | 			MemoryContextSwitchTo(oldCtx); | ||||||
|  | 			MemoryContextDelete(insertCtx); | ||||||
|  | 			return false; | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			GenericXLogUnregister(state, buffer); | ||||||
|  | 			UnlockReleaseBuffer(buffer); | ||||||
|  | 		} | ||||||
|  | 		nStart++; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	GenericXLogAbort(state); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Didn't find place to insert in notFullPage array.  Allocate new page. | ||||||
|  | 	 */ | ||||||
|  | 	buffer = BloomNewBuffer(index); | ||||||
|  |  | ||||||
|  | 	state = GenericXLogStart(index); | ||||||
|  | 	metaPage = GenericXLogRegister(state, metaBuffer, false); | ||||||
|  | 	metaData = BloomPageGetMeta(metaPage); | ||||||
|  | 	page = GenericXLogRegister(state, buffer, true); | ||||||
|  | 	BloomInitPage(page, 0); | ||||||
|  | 	BloomPageAddItem(&blstate, page, itup); | ||||||
|  |  | ||||||
|  | 	metaData->nStart = 0; | ||||||
|  | 	metaData->nEnd = 1; | ||||||
|  | 	metaData->notFullPage[0] = BufferGetBlockNumber(buffer); | ||||||
|  |  | ||||||
|  | 	GenericXLogFinish(state); | ||||||
|  |  | ||||||
|  | 	UnlockReleaseBuffer(buffer); | ||||||
|  | 	UnlockReleaseBuffer(metaBuffer); | ||||||
|  |  | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
							
								
								
									
										19
									
								
								contrib/bloom/bloom--1.0.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								contrib/bloom/bloom--1.0.sql
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | |||||||
|  | CREATE OR REPLACE FUNCTION blhandler(internal) | ||||||
|  | RETURNS index_am_handler | ||||||
|  | AS 'MODULE_PATHNAME' | ||||||
|  | LANGUAGE C; | ||||||
|  |  | ||||||
|  | -- Access method | ||||||
|  | CREATE ACCESS METHOD bloom TYPE INDEX HANDLER blhandler; | ||||||
|  |  | ||||||
|  | -- Opclasses | ||||||
|  |  | ||||||
|  | CREATE OPERATOR CLASS int4_ops | ||||||
|  | DEFAULT FOR TYPE int4 USING bloom AS | ||||||
|  | 	OPERATOR	1	=(int4, int4), | ||||||
|  | 	FUNCTION	1	hashint4(int4); | ||||||
|  |  | ||||||
|  | CREATE OPERATOR CLASS text_ops | ||||||
|  | DEFAULT FOR TYPE text USING bloom AS | ||||||
|  | 	OPERATOR	1	=(text, text), | ||||||
|  | 	FUNCTION	1	hashtext(text); | ||||||
							
								
								
									
										5
									
								
								contrib/bloom/bloom.control
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								contrib/bloom/bloom.control
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | # bloom extension | ||||||
|  | comment = 'bloom access method - signature file based index' | ||||||
|  | default_version = '1.0' | ||||||
|  | module_pathname = '$libdir/bloom' | ||||||
|  | relocatable = true | ||||||
							
								
								
									
										178
									
								
								contrib/bloom/bloom.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										178
									
								
								contrib/bloom/bloom.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,178 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * bloom.h | ||||||
|  |  *	  Header for bloom index. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/bloom.h | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #ifndef _BLOOM_H_ | ||||||
|  | #define _BLOOM_H_ | ||||||
|  |  | ||||||
|  | #include "access/amapi.h" | ||||||
|  | #include "access/generic_xlog.h" | ||||||
|  | #include "access/itup.h" | ||||||
|  | #include "access/xlog.h" | ||||||
|  | #include "nodes/relation.h" | ||||||
|  | #include "fmgr.h" | ||||||
|  |  | ||||||
|  | /* Support procedures numbers */ | ||||||
|  | #define BLOOM_HASH_PROC			1 | ||||||
|  | #define BLOOM_NPROC				1 | ||||||
|  |  | ||||||
|  | /* Scan strategies */ | ||||||
|  | #define BLOOM_EQUAL_STRATEGY	1 | ||||||
|  | #define BLOOM_NSTRATEGIES		1 | ||||||
|  |  | ||||||
|  | /* Opaque for bloom pages */ | ||||||
|  | typedef struct BloomPageOpaqueData | ||||||
|  | { | ||||||
|  | 	OffsetNumber maxoff; | ||||||
|  | 	uint16		flags; | ||||||
|  | }	BloomPageOpaqueData; | ||||||
|  |  | ||||||
|  | typedef BloomPageOpaqueData *BloomPageOpaque; | ||||||
|  |  | ||||||
|  | /* Bloom page flags */ | ||||||
|  | #define BLOOM_META		(1<<0) | ||||||
|  | #define BLOOM_DELETED	(2<<0) | ||||||
|  |  | ||||||
|  | /* Macros for accessing bloom page structures */ | ||||||
|  | #define BloomPageGetOpaque(page) ((BloomPageOpaque) PageGetSpecialPointer(page)) | ||||||
|  | #define BloomPageGetMaxOffset(page) (BloomPageGetOpaque(page)->maxoff) | ||||||
|  | #define BloomPageIsMeta(page) (BloomPageGetOpaque(page)->flags & BLOOM_META) | ||||||
|  | #define BloomPageIsDeleted(page) (BloomPageGetOpaque(page)->flags & BLOOM_DELETED) | ||||||
|  | #define BloomPageSetDeleted(page) (BloomPageGetOpaque(page)->flags |= BLOOM_DELETED) | ||||||
|  | #define BloomPageSetNonDeleted(page) (BloomPageGetOpaque(page)->flags &= ~BLOOM_DELETED) | ||||||
|  | #define BloomPageGetData(page)		((BloomTuple *)PageGetContents(page)) | ||||||
|  | #define BloomPageGetTuple(state, page, offset) \ | ||||||
|  | 	((BloomTuple *)(PageGetContents(page) \ | ||||||
|  | 		+ (state)->sizeOfBloomTuple * ((offset) - 1))) | ||||||
|  | #define BloomPageGetNextTuple(state, tuple) \ | ||||||
|  | 	((BloomTuple *)((Pointer)(tuple) + (state)->sizeOfBloomTuple)) | ||||||
|  |  | ||||||
|  | /* Preserved page numbers */ | ||||||
|  | #define BLOOM_METAPAGE_BLKNO	(0) | ||||||
|  | #define BLOOM_HEAD_BLKNO		(1)		/* first data page */ | ||||||
|  |  | ||||||
|  | /* Bloom index options */ | ||||||
|  | typedef struct BloomOptions | ||||||
|  | { | ||||||
|  | 	int32		vl_len_;		/* varlena header (do not touch directly!) */ | ||||||
|  | 	int			bloomLength;	/* length of signature in uint16 */ | ||||||
|  | 	int			bitSize[INDEX_MAX_KEYS];		/* signature bits per index | ||||||
|  | 												 * key */ | ||||||
|  | }	BloomOptions; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * FreeBlockNumberArray - array of block numbers sized so that metadata fill | ||||||
|  |  * all space in metapage. | ||||||
|  |  */ | ||||||
|  | typedef BlockNumber FreeBlockNumberArray[ | ||||||
|  | 										 MAXALIGN_DOWN( | ||||||
|  | 		BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(BloomPageOpaqueData)) | ||||||
|  | 	   - MAXALIGN(sizeof(uint16) * 2 + sizeof(uint32) + sizeof(BloomOptions)) | ||||||
|  | 													   ) / sizeof(BlockNumber) | ||||||
|  | ]; | ||||||
|  |  | ||||||
|  | /* Metadata of bloom index */ | ||||||
|  | typedef struct BloomMetaPageData | ||||||
|  | { | ||||||
|  | 	uint32		magickNumber; | ||||||
|  | 	uint16		nStart; | ||||||
|  | 	uint16		nEnd; | ||||||
|  | 	BloomOptions opts; | ||||||
|  | 	FreeBlockNumberArray notFullPage; | ||||||
|  | }	BloomMetaPageData; | ||||||
|  |  | ||||||
|  | /* Magic number to distinguish bloom pages among anothers */ | ||||||
|  | #define BLOOM_MAGICK_NUMBER (0xDBAC0DED) | ||||||
|  |  | ||||||
|  | /* Number of blocks numbers fit in BloomMetaPageData */ | ||||||
|  | #define BloomMetaBlockN		(sizeof(FreeBlockNumberArray) / sizeof(BlockNumber)) | ||||||
|  |  | ||||||
|  | #define BloomPageGetMeta(page)	((BloomMetaPageData *) PageGetContents(page)) | ||||||
|  |  | ||||||
|  | typedef struct BloomState | ||||||
|  | { | ||||||
|  | 	FmgrInfo	hashFn[INDEX_MAX_KEYS]; | ||||||
|  | 	BloomOptions *opts;			/* stored in rd_amcache and defined at | ||||||
|  | 								 * creation time */ | ||||||
|  | 	int32		nColumns; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * sizeOfBloomTuple is index's specific, and it depends on reloptions, so | ||||||
|  | 	 * precompute it | ||||||
|  | 	 */ | ||||||
|  | 	int32		sizeOfBloomTuple; | ||||||
|  | }	BloomState; | ||||||
|  |  | ||||||
|  | #define BloomPageGetFreeSpace(state, page) \ | ||||||
|  | 	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ | ||||||
|  | 		- BloomPageGetMaxOffset(page) * (state)->sizeOfBloomTuple \ | ||||||
|  | 		- MAXALIGN(sizeof(BloomPageOpaqueData))) | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Tuples are very different from all other relations | ||||||
|  |  */ | ||||||
|  | typedef uint16 SignType; | ||||||
|  |  | ||||||
|  | typedef struct BloomTuple | ||||||
|  | { | ||||||
|  | 	ItemPointerData heapPtr; | ||||||
|  | 	SignType	sign[1]; | ||||||
|  | }	BloomTuple; | ||||||
|  |  | ||||||
|  | #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign) | ||||||
|  |  | ||||||
|  | /* Opaque data structure for bloom index scan */ | ||||||
|  | typedef struct BloomScanOpaqueData | ||||||
|  | { | ||||||
|  | 	SignType   *sign;			/* Scan signature */ | ||||||
|  | 	BloomState	state; | ||||||
|  | }	BloomScanOpaqueData; | ||||||
|  |  | ||||||
|  | typedef BloomScanOpaqueData *BloomScanOpaque; | ||||||
|  |  | ||||||
|  | /* blutils.c */ | ||||||
|  | extern void _PG_init(void); | ||||||
|  | extern Datum blhandler(PG_FUNCTION_ARGS); | ||||||
|  | extern void initBloomState(BloomState * state, Relation index); | ||||||
|  | extern void BloomInitMetapage(Relation index); | ||||||
|  | extern void BloomInitPage(Page page, uint16 flags); | ||||||
|  | extern Buffer BloomNewBuffer(Relation index); | ||||||
|  | extern void signValue(BloomState * state, SignType * sign, Datum value, int attno); | ||||||
|  | extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull); | ||||||
|  | extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple); | ||||||
|  |  | ||||||
|  | /* blvalidate.c */ | ||||||
|  | extern bool blvalidate(Oid opclassoid); | ||||||
|  |  | ||||||
|  | /* index access method interface functions */ | ||||||
|  | extern bool blinsert(Relation index, Datum *values, bool *isnull, | ||||||
|  | 		 ItemPointer ht_ctid, Relation heapRel, | ||||||
|  | 		 IndexUniqueCheck checkUnique); | ||||||
|  | extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys); | ||||||
|  | extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); | ||||||
|  | extern void blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, | ||||||
|  | 		 ScanKey orderbys, int norderbys); | ||||||
|  | extern void blendscan(IndexScanDesc scan); | ||||||
|  | extern IndexBuildResult *blbuild(Relation heap, Relation index, | ||||||
|  | 		struct IndexInfo *indexInfo); | ||||||
|  | extern void blbuildempty(Relation index); | ||||||
|  | extern IndexBulkDeleteResult *blbulkdelete(IndexVacuumInfo *info, | ||||||
|  | 			 IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, | ||||||
|  | 			 void *callback_state); | ||||||
|  | extern IndexBulkDeleteResult *blvacuumcleanup(IndexVacuumInfo *info, | ||||||
|  | 				IndexBulkDeleteResult *stats); | ||||||
|  | extern bytea *bloptions(Datum reloptions, bool validate); | ||||||
|  | extern void blcostestimate(PlannerInfo *root, IndexPath *path, | ||||||
|  | 			   double loop_count, Cost *indexStartupCost, | ||||||
|  | 			   Cost *indexTotalCost, Selectivity *indexSelectivity, | ||||||
|  | 			   double *indexCorrelation); | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										175
									
								
								contrib/bloom/blscan.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								contrib/bloom/blscan.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,175 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * blscan.c | ||||||
|  |  *		Bloom index scan functions. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/blscan.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "postgres.h" | ||||||
|  |  | ||||||
|  | #include "access/relscan.h" | ||||||
|  | #include "pgstat.h" | ||||||
|  | #include "miscadmin.h" | ||||||
|  | #include "storage/bufmgr.h" | ||||||
|  | #include "storage/lmgr.h" | ||||||
|  | #include "utils/memutils.h" | ||||||
|  | #include "utils/rel.h" | ||||||
|  |  | ||||||
|  | #include "bloom.h" | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Begin scan of bloom index. | ||||||
|  |  */ | ||||||
|  | IndexScanDesc | ||||||
|  | blbeginscan(Relation r, int nkeys, int norderbys) | ||||||
|  | { | ||||||
|  | 	IndexScanDesc scan; | ||||||
|  |  | ||||||
|  | 	scan = RelationGetIndexScan(r, nkeys, norderbys); | ||||||
|  |  | ||||||
|  | 	return scan; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Rescan a bloom index. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, | ||||||
|  | 		 ScanKey orderbys, int norderbys) | ||||||
|  | { | ||||||
|  | 	BloomScanOpaque so; | ||||||
|  |  | ||||||
|  | 	so = (BloomScanOpaque) scan->opaque; | ||||||
|  |  | ||||||
|  | 	if (so == NULL) | ||||||
|  | 	{ | ||||||
|  | 		/* if called from blbeginscan */ | ||||||
|  | 		so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData)); | ||||||
|  | 		initBloomState(&so->state, scan->indexRelation); | ||||||
|  | 		scan->opaque = so; | ||||||
|  |  | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		if (so->sign) | ||||||
|  | 			pfree(so->sign); | ||||||
|  | 	} | ||||||
|  | 	so->sign = NULL; | ||||||
|  |  | ||||||
|  | 	if (scankey && scan->numberOfKeys > 0) | ||||||
|  | 	{ | ||||||
|  | 		memmove(scan->keyData, scankey, | ||||||
|  | 				scan->numberOfKeys * sizeof(ScanKeyData)); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * End scan of bloom index. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | blendscan(IndexScanDesc scan) | ||||||
|  | { | ||||||
|  | 	BloomScanOpaque so = (BloomScanOpaque) scan->opaque; | ||||||
|  |  | ||||||
|  | 	if (so->sign) | ||||||
|  | 		pfree(so->sign); | ||||||
|  | 	so->sign = NULL; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Insert all matching tuples into to a bitmap. | ||||||
|  |  */ | ||||||
|  | int64 | ||||||
|  | blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) | ||||||
|  | { | ||||||
|  | 	int64		ntids = 0; | ||||||
|  | 	BlockNumber blkno = BLOOM_HEAD_BLKNO, | ||||||
|  | 				npages; | ||||||
|  | 	int			i; | ||||||
|  | 	BufferAccessStrategy bas; | ||||||
|  | 	BloomScanOpaque so = (BloomScanOpaque) scan->opaque; | ||||||
|  |  | ||||||
|  | 	if (so->sign == NULL && scan->numberOfKeys > 0) | ||||||
|  | 	{ | ||||||
|  | 		/* New search: have to calculate search signature */ | ||||||
|  | 		ScanKey		skey = scan->keyData; | ||||||
|  |  | ||||||
|  | 		so->sign = palloc0(sizeof(SignType) * so->state.opts->bloomLength); | ||||||
|  |  | ||||||
|  | 		for (i = 0; i < scan->numberOfKeys; i++) | ||||||
|  | 		{ | ||||||
|  | 			/* | ||||||
|  | 			 * Assume bloom-indexable operators to be strict, so nothing could | ||||||
|  | 			 * be found for NULL key. | ||||||
|  | 			 */ | ||||||
|  | 			if (skey->sk_flags & SK_ISNULL) | ||||||
|  | 			{ | ||||||
|  | 				pfree(so->sign); | ||||||
|  | 				so->sign = NULL; | ||||||
|  | 				return 0; | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			/* Add next value to the signature */ | ||||||
|  | 			signValue(&so->state, so->sign, skey->sk_argument, | ||||||
|  | 					  skey->sk_attno - 1); | ||||||
|  |  | ||||||
|  | 			skey++; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * We're going to read the whole index. This is why we use appropriate | ||||||
|  | 	 * buffer access strategy. | ||||||
|  | 	 */ | ||||||
|  | 	bas = GetAccessStrategy(BAS_BULKREAD); | ||||||
|  | 	npages = RelationGetNumberOfBlocks(scan->indexRelation); | ||||||
|  |  | ||||||
|  | 	for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) | ||||||
|  | 	{ | ||||||
|  | 		Buffer		buffer; | ||||||
|  | 		Page		page; | ||||||
|  |  | ||||||
|  | 		buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, | ||||||
|  | 									blkno, RBM_NORMAL, bas); | ||||||
|  |  | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_SHARE); | ||||||
|  | 		page = BufferGetPage(buffer); | ||||||
|  |  | ||||||
|  | 		if (!BloomPageIsDeleted(page)) | ||||||
|  | 		{ | ||||||
|  | 			OffsetNumber offset, | ||||||
|  | 						maxOffset = BloomPageGetMaxOffset(page); | ||||||
|  |  | ||||||
|  | 			for (offset = 1; offset <= maxOffset; offset++) | ||||||
|  | 			{ | ||||||
|  | 				BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset); | ||||||
|  | 				bool		res = true; | ||||||
|  |  | ||||||
|  | 				/* Check index signature with scan signature */ | ||||||
|  | 				for (i = 0; res && i < so->state.opts->bloomLength; i++) | ||||||
|  | 				{ | ||||||
|  | 					if ((itup->sign[i] & so->sign[i]) != so->sign[i]) | ||||||
|  | 						res = false; | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				/* Add matching tuples to bitmap */ | ||||||
|  | 				if (res) | ||||||
|  | 				{ | ||||||
|  | 					tbm_add_tuples(tbm, &itup->heapPtr, 1, true); | ||||||
|  | 					ntids++; | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		UnlockReleaseBuffer(buffer); | ||||||
|  | 		CHECK_FOR_INTERRUPTS(); | ||||||
|  | 	} | ||||||
|  | 	FreeAccessStrategy(bas); | ||||||
|  |  | ||||||
|  | 	return ntids; | ||||||
|  | } | ||||||
							
								
								
									
										463
									
								
								contrib/bloom/blutils.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										463
									
								
								contrib/bloom/blutils.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,463 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * blutils.c | ||||||
|  |  *		Bloom index utilities. | ||||||
|  |  * | ||||||
|  |  * Portions Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * Portions Copyright (c) 1990-1993, Regents of the University of California | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/blutils.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "postgres.h" | ||||||
|  |  | ||||||
|  | #include "access/amapi.h" | ||||||
|  | #include "access/generic_xlog.h" | ||||||
|  | #include "catalog/index.h" | ||||||
|  | #include "storage/lmgr.h" | ||||||
|  | #include "miscadmin.h" | ||||||
|  | #include "storage/bufmgr.h" | ||||||
|  | #include "storage/indexfsm.h" | ||||||
|  | #include "utils/memutils.h" | ||||||
|  | #include "access/reloptions.h" | ||||||
|  | #include "storage/freespace.h" | ||||||
|  | #include "storage/indexfsm.h" | ||||||
|  |  | ||||||
|  | #include "bloom.h" | ||||||
|  |  | ||||||
|  | /* Signature dealing macros */ | ||||||
|  | #define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType)) | ||||||
|  | #define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) ) | ||||||
|  | #define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) ) | ||||||
|  | #define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % BITSIGNTYPE ) ) | ||||||
|  | #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 ) | ||||||
|  |  | ||||||
|  | PG_FUNCTION_INFO_V1(blhandler); | ||||||
|  |  | ||||||
|  | /* Kind of relation optioms for bloom index */ | ||||||
|  | static relopt_kind bl_relopt_kind; | ||||||
|  |  | ||||||
|  | static int32 myRand(); | ||||||
|  | static void mySrand(uint32 seed); | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Module initialize function: initilized relation options. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | _PG_init(void) | ||||||
|  | { | ||||||
|  | 	int			i; | ||||||
|  | 	char		buf[16]; | ||||||
|  |  | ||||||
|  | 	bl_relopt_kind = add_reloption_kind(); | ||||||
|  |  | ||||||
|  | 	add_int_reloption(bl_relopt_kind, "length", | ||||||
|  | 					  "Length of signature in uint16 type", 5, 1, 256); | ||||||
|  |  | ||||||
|  | 	for (i = 0; i < INDEX_MAX_KEYS; i++) | ||||||
|  | 	{ | ||||||
|  | 		snprintf(buf, 16, "col%d", i + 1); | ||||||
|  | 		add_int_reloption(bl_relopt_kind, buf, | ||||||
|  | 					  "Number of bits for corresponding column", 2, 1, 2048); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Bloom handler function: return IndexAmRoutine with access method parameters | ||||||
|  |  * and callbacks. | ||||||
|  |  */ | ||||||
|  | Datum | ||||||
|  | blhandler(PG_FUNCTION_ARGS) | ||||||
|  | { | ||||||
|  | 	IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); | ||||||
|  |  | ||||||
|  | 	amroutine->amstrategies = 1; | ||||||
|  | 	amroutine->amsupport = 1; | ||||||
|  | 	amroutine->amcanorder = false; | ||||||
|  | 	amroutine->amcanorderbyop = false; | ||||||
|  | 	amroutine->amcanbackward = false; | ||||||
|  | 	amroutine->amcanunique = false; | ||||||
|  | 	amroutine->amcanmulticol = true; | ||||||
|  | 	amroutine->amoptionalkey = true; | ||||||
|  | 	amroutine->amsearcharray = false; | ||||||
|  | 	amroutine->amsearchnulls = false; | ||||||
|  | 	amroutine->amstorage = false; | ||||||
|  | 	amroutine->amclusterable = false; | ||||||
|  | 	amroutine->ampredlocks = false; | ||||||
|  | 	amroutine->amkeytype = 0; | ||||||
|  |  | ||||||
|  | 	amroutine->aminsert = blinsert; | ||||||
|  | 	amroutine->ambeginscan = blbeginscan; | ||||||
|  | 	amroutine->amgettuple = NULL; | ||||||
|  | 	amroutine->amgetbitmap = blgetbitmap; | ||||||
|  | 	amroutine->amrescan = blrescan; | ||||||
|  | 	amroutine->amendscan = blendscan; | ||||||
|  | 	amroutine->ammarkpos = NULL; | ||||||
|  | 	amroutine->amrestrpos = NULL; | ||||||
|  | 	amroutine->ambuild = blbuild; | ||||||
|  | 	amroutine->ambuildempty = blbuildempty; | ||||||
|  | 	amroutine->ambulkdelete = blbulkdelete; | ||||||
|  | 	amroutine->amvacuumcleanup = blvacuumcleanup; | ||||||
|  | 	amroutine->amcanreturn = NULL; | ||||||
|  | 	amroutine->amcostestimate = blcostestimate; | ||||||
|  | 	amroutine->amoptions = bloptions; | ||||||
|  | 	amroutine->amvalidate = blvalidate; | ||||||
|  |  | ||||||
|  | 	PG_RETURN_POINTER(amroutine); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Fill BloomState structure for particular index. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | initBloomState(BloomState *state, Relation index) | ||||||
|  | { | ||||||
|  | 	int			i; | ||||||
|  |  | ||||||
|  | 	state->nColumns = index->rd_att->natts; | ||||||
|  |  | ||||||
|  | 	/* Initialize hash function for each attribute */ | ||||||
|  | 	for (i = 0; i < index->rd_att->natts; i++) | ||||||
|  | 	{ | ||||||
|  | 		fmgr_info_copy(&(state->hashFn[i]), | ||||||
|  | 					   index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), | ||||||
|  | 					   CurrentMemoryContext); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Initialize amcache if needed with options from metapage */ | ||||||
|  | 	if (!index->rd_amcache) | ||||||
|  | 	{ | ||||||
|  | 		Buffer		buffer; | ||||||
|  | 		Page		page; | ||||||
|  | 		BloomMetaPageData *meta; | ||||||
|  | 		BloomOptions *opts; | ||||||
|  |  | ||||||
|  | 		opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions)); | ||||||
|  |  | ||||||
|  | 		buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_SHARE); | ||||||
|  |  | ||||||
|  | 		page = BufferGetPage(buffer); | ||||||
|  |  | ||||||
|  | 		if (!BloomPageIsMeta(page)) | ||||||
|  | 			elog(ERROR, "Relation is not a bloom index"); | ||||||
|  | 		meta = BloomPageGetMeta(BufferGetPage(buffer)); | ||||||
|  |  | ||||||
|  | 		if (meta->magickNumber != BLOOM_MAGICK_NUMBER) | ||||||
|  | 			elog(ERROR, "Relation is not a bloom index"); | ||||||
|  |  | ||||||
|  | 		*opts = meta->opts; | ||||||
|  |  | ||||||
|  | 		UnlockReleaseBuffer(buffer); | ||||||
|  |  | ||||||
|  | 		index->rd_amcache = (void *) opts; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	state->opts = (BloomOptions *) index->rd_amcache; | ||||||
|  | 	state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + | ||||||
|  | 		sizeof(SignType) * state->opts->bloomLength; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Random generator copied from FreeBSD.  Using own random generator here for | ||||||
|  |  * two reasons: | ||||||
|  |  * | ||||||
|  |  * 1) In this case random numbers are used for on-disk storage.  Usage of | ||||||
|  |  *	  PostgreSQL number generator would obstruct it from all possible changes. | ||||||
|  |  * 2) Changing seed of PostgreSQL random generator would be undesirable side | ||||||
|  |  *	  effect. | ||||||
|  |  */ | ||||||
|  | static int32 next; | ||||||
|  |  | ||||||
|  | static int32 | ||||||
|  | myRand() | ||||||
|  | { | ||||||
|  | 	/* | ||||||
|  | 	 * Compute x = (7^5 * x) mod (2^31 - 1) | ||||||
|  | 	 * without overflowing 31 bits: | ||||||
|  | 	 *      (2^31 - 1) = 127773 * (7^5) + 2836 | ||||||
|  | 	 * From "Random number generators: good ones are hard to find", | ||||||
|  | 	 * Park and Miller, Communications of the ACM, vol. 31, no. 10, | ||||||
|  | 	 * October 1988, p. 1195. | ||||||
|  | 	 */ | ||||||
|  | 	int32 hi, lo, x; | ||||||
|  |  | ||||||
|  | 	/* Must be in [1, 0x7ffffffe] range at this point. */ | ||||||
|  | 	hi = next / 127773; | ||||||
|  | 	lo = next % 127773; | ||||||
|  | 	x = 16807 * lo - 2836 * hi; | ||||||
|  | 	if (x < 0) | ||||||
|  | 		x += 0x7fffffff; | ||||||
|  | 	next = x; | ||||||
|  | 	/* Transform to [0, 0x7ffffffd] range. */ | ||||||
|  | 	return (x - 1); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void | ||||||
|  | mySrand(uint32 seed) | ||||||
|  | { | ||||||
|  | 	next = seed; | ||||||
|  | 	/* Transform to [1, 0x7ffffffe] range. */ | ||||||
|  | 	next = (next % 0x7ffffffe) + 1; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Add bits of given value to the signature. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | signValue(BloomState *state, SignType *sign, Datum value, int attno) | ||||||
|  | { | ||||||
|  | 	uint32		hashVal; | ||||||
|  | 	int			nBit, | ||||||
|  | 				j; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * init generator with "column's" number to get "hashed" seed for new | ||||||
|  | 	 * value. We don't want to map the same numbers from different columns | ||||||
|  | 	 * into the same bits! | ||||||
|  | 	 */ | ||||||
|  | 	mySrand(attno); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Init hash sequence to map our value into bits. the same values in | ||||||
|  | 	 * different columns will be mapped into different bits because of step | ||||||
|  | 	 * above | ||||||
|  | 	 */ | ||||||
|  | 	hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); | ||||||
|  | 	mySrand(hashVal ^ myRand()); | ||||||
|  |  | ||||||
|  | 	for (j = 0; j < state->opts->bitSize[attno]; j++) | ||||||
|  | 	{ | ||||||
|  | 		/* prevent mutiple evaluation */ | ||||||
|  | 		nBit = myRand() % (state->opts->bloomLength * BITSIGNTYPE); | ||||||
|  | 		SETBIT(sign, nBit); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Make bloom tuple from values. | ||||||
|  |  */ | ||||||
|  | BloomTuple * | ||||||
|  | BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull) | ||||||
|  | { | ||||||
|  | 	int			i; | ||||||
|  | 	BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple); | ||||||
|  |  | ||||||
|  | 	res->heapPtr = *iptr; | ||||||
|  |  | ||||||
|  | 	/* Blooming each column */ | ||||||
|  | 	for (i = 0; i < state->nColumns; i++) | ||||||
|  | 	{ | ||||||
|  | 		/* skip nulls */ | ||||||
|  | 		if (isnull[i]) | ||||||
|  | 			continue; | ||||||
|  |  | ||||||
|  | 		signValue(state, res->sign, values[i], i); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return res; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Add new bloom tuple to the page.  Returns true if new tuple was successfully | ||||||
|  |  * added to the page.  Returns false if it doesn't git the page. | ||||||
|  |  */ | ||||||
|  | bool | ||||||
|  | BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple) | ||||||
|  | { | ||||||
|  | 	BloomTuple *itup; | ||||||
|  | 	BloomPageOpaque opaque; | ||||||
|  | 	Pointer		ptr; | ||||||
|  |  | ||||||
|  | 	/* Does new tuple fit the page */ | ||||||
|  | 	if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple) | ||||||
|  | 		return false; | ||||||
|  |  | ||||||
|  | 	/* Copy new tuple to the end of page */ | ||||||
|  | 	opaque = BloomPageGetOpaque(page); | ||||||
|  | 	itup = BloomPageGetTuple(state, page, opaque->maxoff + 1); | ||||||
|  | 	memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple); | ||||||
|  |  | ||||||
|  | 	/* Adjust maxoff and pd_lower */ | ||||||
|  | 	opaque->maxoff++; | ||||||
|  | 	ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1); | ||||||
|  | 	((PageHeader) page)->pd_lower = ptr - page; | ||||||
|  |  | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Allocate a new page (either by recycling, or by extending the index file) | ||||||
|  |  * The returned buffer is already pinned and exclusive-locked | ||||||
|  |  * Caller is responsible for initializing the page by calling BloomInitBuffer | ||||||
|  |  */ | ||||||
|  | Buffer | ||||||
|  | BloomNewBuffer(Relation index) | ||||||
|  | { | ||||||
|  | 	Buffer		buffer; | ||||||
|  | 	bool		needLock; | ||||||
|  |  | ||||||
|  | 	/* First, try to get a page from FSM */ | ||||||
|  | 	for (;;) | ||||||
|  | 	{ | ||||||
|  | 		BlockNumber blkno = GetFreeIndexPage(index); | ||||||
|  |  | ||||||
|  | 		if (blkno == InvalidBlockNumber) | ||||||
|  | 			break; | ||||||
|  |  | ||||||
|  | 		buffer = ReadBuffer(index, blkno); | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * We have to guard against the possibility that someone else already | ||||||
|  | 		 * recycled this page; the buffer may be locked if so. | ||||||
|  | 		 */ | ||||||
|  | 		if (ConditionalLockBuffer(buffer)) | ||||||
|  | 		{ | ||||||
|  | 			Page		page = BufferGetPage(buffer); | ||||||
|  |  | ||||||
|  | 			if (PageIsNew(page)) | ||||||
|  | 				return buffer;	/* OK to use, if never initialized */ | ||||||
|  |  | ||||||
|  | 			if (BloomPageIsDeleted(page)) | ||||||
|  | 				return buffer;	/* OK to use */ | ||||||
|  |  | ||||||
|  | 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK); | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* Can't use it, so release buffer and try again */ | ||||||
|  | 		ReleaseBuffer(buffer); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Must extend the file */ | ||||||
|  | 	needLock = !RELATION_IS_LOCAL(index); | ||||||
|  | 	if (needLock) | ||||||
|  | 		LockRelationForExtension(index, ExclusiveLock); | ||||||
|  |  | ||||||
|  | 	buffer = ReadBuffer(index, P_NEW); | ||||||
|  | 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  |  | ||||||
|  | 	if (needLock) | ||||||
|  | 		UnlockRelationForExtension(index, ExclusiveLock); | ||||||
|  |  | ||||||
|  | 	return buffer; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Initialize bloom page. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | BloomInitPage(Page page, uint16 flags) | ||||||
|  | { | ||||||
|  | 	BloomPageOpaque opaque; | ||||||
|  |  | ||||||
|  | 	PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData)); | ||||||
|  |  | ||||||
|  | 	opaque = BloomPageGetOpaque(page); | ||||||
|  | 	memset(opaque, 0, sizeof(BloomPageOpaqueData)); | ||||||
|  | 	opaque->flags = flags; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Adjust options of bloom index. | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | adjustBloomOptions(BloomOptions *opts) | ||||||
|  | { | ||||||
|  | 	int				i; | ||||||
|  |  | ||||||
|  | 	/* Default length of bloom filter is 5 of 16-bit integers */ | ||||||
|  | 	if (opts->bloomLength <= 0) | ||||||
|  | 		opts->bloomLength = 5; | ||||||
|  | 	else | ||||||
|  | 		opts->bloomLength = opts->bloomLength; | ||||||
|  |  | ||||||
|  | 	/* Check singnature length */ | ||||||
|  | 	for (i = 0; i < INDEX_MAX_KEYS; i++) | ||||||
|  | 	{ | ||||||
|  | 		/* | ||||||
|  | 		 * Zero and negative number of bits is meaningless.  Also setting | ||||||
|  | 		 * more bits than signature have seems useless.  Replace both cases | ||||||
|  | 		 * with 2 bits default. | ||||||
|  | 		 */ | ||||||
|  | 		if (opts->bitSize[i] <= 0 | ||||||
|  | 			|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType)) | ||||||
|  | 			opts->bitSize[i] = 2; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Initialize metapage for bloom index. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | BloomInitMetapage(Relation index) | ||||||
|  | { | ||||||
|  | 	Page		metaPage; | ||||||
|  | 	Buffer		metaBuffer; | ||||||
|  | 	BloomMetaPageData *metadata; | ||||||
|  | 	GenericXLogState *state; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Make a new buffer, since it first buffer it should be associated with | ||||||
|  | 	 * block number 0 (BLOOM_METAPAGE_BLKNO). | ||||||
|  | 	 */ | ||||||
|  | 	metaBuffer = BloomNewBuffer(index); | ||||||
|  | 	Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO); | ||||||
|  |  | ||||||
|  | 	/* Initialize bloom index options */ | ||||||
|  | 	if (!index->rd_options) | ||||||
|  | 		index->rd_options = palloc0(sizeof(BloomOptions)); | ||||||
|  | 	adjustBloomOptions((BloomOptions *) index->rd_options); | ||||||
|  |  | ||||||
|  | 	/* Initialize contents of meta page */ | ||||||
|  | 	state = GenericXLogStart(index); | ||||||
|  | 	metaPage = GenericXLogRegister(state, metaBuffer, true); | ||||||
|  |  | ||||||
|  | 	BloomInitPage(metaPage, BLOOM_META); | ||||||
|  | 	metadata = BloomPageGetMeta(metaPage); | ||||||
|  | 	memset(metadata, 0, sizeof(BloomMetaPageData)); | ||||||
|  | 	metadata->magickNumber = BLOOM_MAGICK_NUMBER; | ||||||
|  | 	metadata->opts = *((BloomOptions *) index->rd_options); | ||||||
|  | 	((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData); | ||||||
|  |  | ||||||
|  | 	GenericXLogFinish(state); | ||||||
|  | 	UnlockReleaseBuffer(metaBuffer); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Initialize options for bloom index. | ||||||
|  |  */ | ||||||
|  | bytea * | ||||||
|  | bloptions(Datum reloptions, bool validate) | ||||||
|  | { | ||||||
|  | 	relopt_value *options; | ||||||
|  | 	int			numoptions; | ||||||
|  | 	BloomOptions *rdopts; | ||||||
|  | 	relopt_parse_elt tab[INDEX_MAX_KEYS + 1]; | ||||||
|  | 	int			i; | ||||||
|  | 	char		buf[16]; | ||||||
|  |  | ||||||
|  | 	/* Option for length of signature */ | ||||||
|  | 	tab[0].optname = "length"; | ||||||
|  | 	tab[0].opttype = RELOPT_TYPE_INT; | ||||||
|  | 	tab[0].offset = offsetof(BloomOptions, bloomLength); | ||||||
|  |  | ||||||
|  | 	/* Number of bits for each of possible columns: col1, col2, ... */ | ||||||
|  | 	for (i = 0; i < INDEX_MAX_KEYS; i++) | ||||||
|  | 	{ | ||||||
|  | 		snprintf(buf, sizeof(buf), "col%d", i + 1); | ||||||
|  | 		tab[i + 1].optname = pstrdup(buf); | ||||||
|  | 		tab[i + 1].opttype = RELOPT_TYPE_INT; | ||||||
|  | 		tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions); | ||||||
|  | 	rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions); | ||||||
|  | 	fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions, | ||||||
|  | 				   validate, tab, INDEX_MAX_KEYS + 1); | ||||||
|  |  | ||||||
|  | 	adjustBloomOptions(rdopts); | ||||||
|  |  | ||||||
|  | 	return (bytea *) rdopts; | ||||||
|  | } | ||||||
							
								
								
									
										212
									
								
								contrib/bloom/blvacuum.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								contrib/bloom/blvacuum.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,212 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * blvacuum.c | ||||||
|  |  *		Bloom VACUUM functions. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/blvacuum.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "postgres.h" | ||||||
|  |  | ||||||
|  | #include "access/genam.h" | ||||||
|  | #include "catalog/storage.h" | ||||||
|  | #include "commands/vacuum.h" | ||||||
|  | #include "miscadmin.h" | ||||||
|  | #include "postmaster/autovacuum.h" | ||||||
|  | #include "storage/bufmgr.h" | ||||||
|  | #include "storage/indexfsm.h" | ||||||
|  | #include "storage/lmgr.h" | ||||||
|  |  | ||||||
|  | #include "bloom.h" | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Bulk deletion of all index entries pointing to a set of heap tuples. | ||||||
|  |  * The set of target tuples is specified via a callback routine that tells | ||||||
|  |  * whether any given heap tuple (identified by ItemPointer) is being deleted. | ||||||
|  |  * | ||||||
|  |  * Result: a palloc'd struct containing statistical info for VACUUM displays. | ||||||
|  |  */ | ||||||
|  | IndexBulkDeleteResult * | ||||||
|  | blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, | ||||||
|  | 			 IndexBulkDeleteCallback callback, void *callback_state) | ||||||
|  | { | ||||||
|  | 	Relation	index = info->index; | ||||||
|  | 	BlockNumber blkno, | ||||||
|  | 				npages; | ||||||
|  | 	FreeBlockNumberArray notFullPage; | ||||||
|  | 	int			countPage = 0; | ||||||
|  | 	BloomState	state; | ||||||
|  | 	Buffer		buffer; | ||||||
|  | 	Page		page; | ||||||
|  | 	GenericXLogState *gxlogState; | ||||||
|  |  | ||||||
|  | 	if (stats == NULL) | ||||||
|  | 		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); | ||||||
|  |  | ||||||
|  | 	initBloomState(&state, index); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Interate over the pages. We don't care about concurrently added pages, | ||||||
|  | 	 * they can't contain tuples to delete. | ||||||
|  | 	 */ | ||||||
|  | 	npages = RelationGetNumberOfBlocks(index); | ||||||
|  | 	for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) | ||||||
|  | 	{ | ||||||
|  | 		BloomTuple *itup, | ||||||
|  | 				   *itupPtr, | ||||||
|  | 				   *itupEnd; | ||||||
|  |  | ||||||
|  | 		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, | ||||||
|  | 									RBM_NORMAL, info->strategy); | ||||||
|  |  | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  | 		gxlogState = GenericXLogStart(index); | ||||||
|  | 		page = GenericXLogRegister(gxlogState, buffer, false); | ||||||
|  |  | ||||||
|  | 		if (BloomPageIsDeleted(page)) | ||||||
|  | 		{ | ||||||
|  | 			UnlockReleaseBuffer(buffer); | ||||||
|  | 			CHECK_FOR_INTERRUPTS(); | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* Iterate over the tuples */ | ||||||
|  | 		itup = BloomPageGetTuple(&state, page, 1); | ||||||
|  | 		itupPtr = BloomPageGetTuple(&state, page, 1); | ||||||
|  | 		itupEnd = BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1); | ||||||
|  | 		while (itup < itupEnd) | ||||||
|  | 		{ | ||||||
|  | 			/* Do we have to delete this tuple? */ | ||||||
|  | 			if (callback(&itup->heapPtr, callback_state)) | ||||||
|  | 			{ | ||||||
|  | 				stats->tuples_removed += 1; | ||||||
|  | 				BloomPageGetOpaque(page)->maxoff--; | ||||||
|  | 			} | ||||||
|  | 			else | ||||||
|  | 			{ | ||||||
|  | 				if (itupPtr != itup) | ||||||
|  | 				{ | ||||||
|  | 					/* | ||||||
|  | 					 * If we already delete something before, we have to move | ||||||
|  | 					 * this tuple backward. | ||||||
|  | 					 */ | ||||||
|  | 					memmove((Pointer) itupPtr, (Pointer) itup, | ||||||
|  | 							state.sizeOfBloomTuple); | ||||||
|  | 				} | ||||||
|  | 				stats->num_index_tuples++; | ||||||
|  | 				itupPtr = BloomPageGetNextTuple(&state, itupPtr); | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			itup = BloomPageGetNextTuple(&state, itup); | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		Assert(itupPtr == BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1)); | ||||||
|  |  | ||||||
|  | 		if (!BloomPageIsDeleted(page) && | ||||||
|  | 			BloomPageGetFreeSpace(&state, page) > state.sizeOfBloomTuple && | ||||||
|  | 			countPage < BloomMetaBlockN) | ||||||
|  | 			notFullPage[countPage++] = blkno; | ||||||
|  |  | ||||||
|  | 		/* Did we delete something? */ | ||||||
|  | 		if (itupPtr != itup) | ||||||
|  | 		{ | ||||||
|  | 			/* Is it empty page now? */ | ||||||
|  | 			if (itupPtr == BloomPageGetData(page)) | ||||||
|  | 				BloomPageSetDeleted(page); | ||||||
|  | 			/* Adjust pg_lower */ | ||||||
|  | 			((PageHeader) page)->pd_lower = (Pointer) itupPtr - page; | ||||||
|  | 			/* Finish WAL-logging */ | ||||||
|  | 			GenericXLogFinish(gxlogState); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			/* Didn't change anything: abort WAL-logging */ | ||||||
|  | 			GenericXLogAbort(gxlogState); | ||||||
|  | 		} | ||||||
|  | 		UnlockReleaseBuffer(buffer); | ||||||
|  | 		CHECK_FOR_INTERRUPTS(); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if (countPage > 0) | ||||||
|  | 	{ | ||||||
|  | 		BloomMetaPageData *metaData; | ||||||
|  |  | ||||||
|  | 		buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); | ||||||
|  |  | ||||||
|  | 		gxlogState = GenericXLogStart(index); | ||||||
|  | 		page = GenericXLogRegister(gxlogState, buffer, false); | ||||||
|  |  | ||||||
|  | 		metaData = BloomPageGetMeta(page); | ||||||
|  | 		memcpy(metaData->notFullPage, notFullPage, sizeof(FreeBlockNumberArray)); | ||||||
|  | 		metaData->nStart = 0; | ||||||
|  | 		metaData->nEnd = countPage; | ||||||
|  |  | ||||||
|  | 		GenericXLogFinish(gxlogState); | ||||||
|  | 		UnlockReleaseBuffer(buffer); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return stats; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Post-VACUUM cleanup. | ||||||
|  |  * | ||||||
|  |  * Result: a palloc'd struct containing statistical info for VACUUM displays. | ||||||
|  |  */ | ||||||
|  | IndexBulkDeleteResult * | ||||||
|  | blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) | ||||||
|  | { | ||||||
|  | 	Relation	index = info->index; | ||||||
|  | 	BlockNumber npages, | ||||||
|  | 				blkno; | ||||||
|  | 	BlockNumber totFreePages; | ||||||
|  |  | ||||||
|  | 	if (info->analyze_only) | ||||||
|  | 		return stats; | ||||||
|  |  | ||||||
|  | 	if (stats == NULL) | ||||||
|  | 		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * Iterate over the pages: insert deleted pages into FSM and collect | ||||||
|  | 	 * statistics. | ||||||
|  | 	 */ | ||||||
|  | 	npages = RelationGetNumberOfBlocks(index); | ||||||
|  | 	totFreePages = 0; | ||||||
|  | 	for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) | ||||||
|  | 	{ | ||||||
|  | 		Buffer		buffer; | ||||||
|  | 		Page		page; | ||||||
|  |  | ||||||
|  | 		vacuum_delay_point(); | ||||||
|  |  | ||||||
|  | 		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, | ||||||
|  | 									RBM_NORMAL, info->strategy); | ||||||
|  | 		LockBuffer(buffer, BUFFER_LOCK_SHARE); | ||||||
|  | 		page = (Page) BufferGetPage(buffer); | ||||||
|  |  | ||||||
|  | 		if (BloomPageIsDeleted(page)) | ||||||
|  | 		{ | ||||||
|  | 			RecordFreeIndexPage(index, blkno); | ||||||
|  | 			totFreePages++; | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			stats->num_index_tuples += BloomPageGetMaxOffset(page); | ||||||
|  | 			stats->estimated_count += BloomPageGetMaxOffset(page); | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		UnlockReleaseBuffer(buffer); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	IndexFreeSpaceMapVacuum(info->index); | ||||||
|  | 	stats->pages_free = totFreePages; | ||||||
|  | 	stats->num_pages = RelationGetNumberOfBlocks(index); | ||||||
|  |  | ||||||
|  | 	return stats; | ||||||
|  | } | ||||||
							
								
								
									
										220
									
								
								contrib/bloom/blvalidate.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								contrib/bloom/blvalidate.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,220 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * blvalidate.c | ||||||
|  |  *	  Opclass validator for bloom. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016, PostgreSQL Global Development Group | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  contrib/bloom/blvalidate.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "postgres.h" | ||||||
|  |  | ||||||
|  | #include "access/amvalidate.h" | ||||||
|  | #include "access/htup_details.h" | ||||||
|  | #include "catalog/pg_amop.h" | ||||||
|  | #include "catalog/pg_amproc.h" | ||||||
|  | #include "catalog/pg_opclass.h" | ||||||
|  | #include "catalog/pg_opfamily.h" | ||||||
|  | #include "catalog/pg_type.h" | ||||||
|  | #include "utils/builtins.h" | ||||||
|  | #include "utils/lsyscache.h" | ||||||
|  | #include "utils/syscache.h" | ||||||
|  |  | ||||||
|  | #include "bloom.h" | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Validator for a bloom opclass. | ||||||
|  |  */ | ||||||
|  | bool | ||||||
|  | blvalidate(Oid opclassoid) | ||||||
|  | { | ||||||
|  | 	bool		result = true; | ||||||
|  | 	HeapTuple	classtup; | ||||||
|  | 	Form_pg_opclass classform; | ||||||
|  | 	Oid			opfamilyoid; | ||||||
|  | 	Oid			opcintype; | ||||||
|  | 	Oid			opckeytype; | ||||||
|  | 	char	   *opclassname; | ||||||
|  | 	HeapTuple	familytup; | ||||||
|  | 	Form_pg_opfamily familyform; | ||||||
|  | 	char	   *opfamilyname; | ||||||
|  | 	CatCList   *proclist, | ||||||
|  | 			   *oprlist; | ||||||
|  | 	List	   *grouplist; | ||||||
|  | 	OpFamilyOpFuncGroup *opclassgroup; | ||||||
|  | 	int			i; | ||||||
|  | 	ListCell   *lc; | ||||||
|  |  | ||||||
|  | 	/* Fetch opclass information */ | ||||||
|  | 	classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid)); | ||||||
|  | 	if (!HeapTupleIsValid(classtup)) | ||||||
|  | 		elog(ERROR, "cache lookup failed for operator class %u", opclassoid); | ||||||
|  | 	classform = (Form_pg_opclass) GETSTRUCT(classtup); | ||||||
|  |  | ||||||
|  | 	opfamilyoid = classform->opcfamily; | ||||||
|  | 	opcintype = classform->opcintype; | ||||||
|  | 	opckeytype = classform->opckeytype; | ||||||
|  | 	if (!OidIsValid(opckeytype)) | ||||||
|  | 		opckeytype = opcintype; | ||||||
|  | 	opclassname = NameStr(classform->opcname); | ||||||
|  |  | ||||||
|  | 	/* Fetch opfamily information */ | ||||||
|  | 	familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid)); | ||||||
|  | 	if (!HeapTupleIsValid(familytup)) | ||||||
|  | 		elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid); | ||||||
|  | 	familyform = (Form_pg_opfamily) GETSTRUCT(familytup); | ||||||
|  |  | ||||||
|  | 	opfamilyname = NameStr(familyform->opfname); | ||||||
|  |  | ||||||
|  | 	/* Fetch all operators and support functions of the opfamily */ | ||||||
|  | 	oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid)); | ||||||
|  | 	proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid)); | ||||||
|  |  | ||||||
|  | 	/* Check individual support functions */ | ||||||
|  | 	for (i = 0; i < proclist->n_members; i++) | ||||||
|  | 	{ | ||||||
|  | 		HeapTuple	proctup = &proclist->members[i]->tuple; | ||||||
|  | 		Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup); | ||||||
|  | 		bool		ok; | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * All bloom support functions should be registered with matching | ||||||
|  | 		 * left/right types | ||||||
|  | 		 */ | ||||||
|  | 		if (procform->amproclefttype != procform->amprocrighttype) | ||||||
|  | 		{ | ||||||
|  | 			ereport(INFO, | ||||||
|  | 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 					 errmsg("bloom opfamily %s contains support procedure %s with cross-type registration", | ||||||
|  | 							opfamilyname, | ||||||
|  | 							format_procedure(procform->amproc)))); | ||||||
|  | 			result = false; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * We can't check signatures except within the specific opclass, since | ||||||
|  | 		 * we need to know the associated opckeytype in many cases. | ||||||
|  | 		 */ | ||||||
|  | 		if (procform->amproclefttype != opcintype) | ||||||
|  | 			continue; | ||||||
|  |  | ||||||
|  | 		/* Check procedure numbers and function signatures */ | ||||||
|  | 		switch (procform->amprocnum) | ||||||
|  | 		{ | ||||||
|  | 			case BLOOM_HASH_PROC: | ||||||
|  | 				ok = check_amproc_signature(procform->amproc, INT4OID, false, | ||||||
|  | 											1, 1, opckeytype); | ||||||
|  | 				break; | ||||||
|  | 			default: | ||||||
|  | 				ereport(INFO, | ||||||
|  | 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 						 errmsg("bloom opfamily %s contains function %s with invalid support number %d", | ||||||
|  | 								opfamilyname, | ||||||
|  | 								format_procedure(procform->amproc), | ||||||
|  | 								procform->amprocnum))); | ||||||
|  | 				result = false; | ||||||
|  | 				continue;		/* don't want additional message */ | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if (!ok) | ||||||
|  | 		{ | ||||||
|  | 			ereport(INFO, | ||||||
|  | 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 					 errmsg("gist opfamily %s contains function %s with wrong signature for support number %d", | ||||||
|  | 							opfamilyname, | ||||||
|  | 							format_procedure(procform->amproc), | ||||||
|  | 							procform->amprocnum))); | ||||||
|  | 			result = false; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Check individual operators */ | ||||||
|  | 	for (i = 0; i < oprlist->n_members; i++) | ||||||
|  | 	{ | ||||||
|  | 		HeapTuple	oprtup = &oprlist->members[i]->tuple; | ||||||
|  | 		Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup); | ||||||
|  |  | ||||||
|  | 		/* Check it's allowed strategy for bloom */ | ||||||
|  | 		if (oprform->amopstrategy < 1 || | ||||||
|  | 			oprform->amopstrategy > BLOOM_NSTRATEGIES) | ||||||
|  | 		{ | ||||||
|  | 			ereport(INFO, | ||||||
|  | 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 					 errmsg("bloom opfamily %s contains operator %s with invalid strategy number %d", | ||||||
|  | 							opfamilyname, | ||||||
|  | 							format_operator(oprform->amopopr), | ||||||
|  | 							oprform->amopstrategy))); | ||||||
|  | 			result = false; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* bloom doesn't support ORDER BY operators */ | ||||||
|  | 		if (oprform->amoppurpose != AMOP_SEARCH || | ||||||
|  | 			OidIsValid(oprform->amopsortfamily)) | ||||||
|  | 		{ | ||||||
|  | 			ereport(INFO, | ||||||
|  | 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 					 errmsg("bloom opfamily %s contains invalid ORDER BY specification for operator %s", | ||||||
|  | 							opfamilyname, | ||||||
|  | 							format_operator(oprform->amopopr)))); | ||||||
|  | 			result = false; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		/* Check operator signature --- same for all bloom strategies */ | ||||||
|  | 		if (!check_amop_signature(oprform->amopopr, BOOLOID, | ||||||
|  | 								  oprform->amoplefttype, | ||||||
|  | 								  oprform->amoprighttype)) | ||||||
|  | 		{ | ||||||
|  | 			ereport(INFO, | ||||||
|  | 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 					 errmsg("bloom opfamily %s contains operator %s with wrong signature", | ||||||
|  | 							opfamilyname, | ||||||
|  | 							format_operator(oprform->amopopr)))); | ||||||
|  | 			result = false; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Now check for inconsistent groups of operators/functions */ | ||||||
|  | 	grouplist = identify_opfamily_groups(oprlist, proclist); | ||||||
|  | 	opclassgroup = NULL; | ||||||
|  | 	foreach(lc, grouplist) | ||||||
|  | 	{ | ||||||
|  | 		OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc); | ||||||
|  |  | ||||||
|  | 		/* Remember the group exactly matching the test opclass */ | ||||||
|  | 		if (thisgroup->lefttype == opcintype && | ||||||
|  | 			thisgroup->righttype == opcintype) | ||||||
|  | 			opclassgroup = thisgroup; | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * There is not a lot we can do to check the operator sets, since each | ||||||
|  | 		 * bloom opclass is more or less a law unto itself, and some contain | ||||||
|  | 		 * only operators that are binary-compatible with the opclass datatype | ||||||
|  | 		 * (meaning that empty operator sets can be OK).  That case also means | ||||||
|  | 		 * that we shouldn't insist on nonempty function sets except for the | ||||||
|  | 		 * opclass's own group. | ||||||
|  | 		 */ | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Check that the originally-named opclass is complete */ | ||||||
|  | 	for (i = 1; i <= BLOOM_NPROC; i++) | ||||||
|  | 	{ | ||||||
|  | 		if (opclassgroup && | ||||||
|  | 			(opclassgroup->functionset & (((uint64) 1) << i)) != 0) | ||||||
|  | 			continue;			/* got it */ | ||||||
|  | 		ereport(INFO, | ||||||
|  | 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), | ||||||
|  | 				 errmsg("bloom opclass %s is missing support function %d", | ||||||
|  | 						opclassname, i))); | ||||||
|  | 		result = false; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	ReleaseCatCacheList(proclist); | ||||||
|  | 	ReleaseCatCacheList(oprlist); | ||||||
|  | 	ReleaseSysCache(familytup); | ||||||
|  | 	ReleaseSysCache(classtup); | ||||||
|  |  | ||||||
|  | 	return result; | ||||||
|  | } | ||||||
							
								
								
									
										122
									
								
								contrib/bloom/expected/bloom.out
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								contrib/bloom/expected/bloom.out
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | |||||||
|  | CREATE EXTENSION bloom; | ||||||
|  | CREATE TABLE tst ( | ||||||
|  | 	i	int4, | ||||||
|  | 	t	text | ||||||
|  | ); | ||||||
|  | INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i; | ||||||
|  | CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3); | ||||||
|  | SET enable_seqscan=on; | ||||||
|  | SET enable_bitmapscan=off; | ||||||
|  | SET enable_indexscan=off; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |  10000 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |   6264 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |    588 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SET enable_seqscan=off; | ||||||
|  | SET enable_bitmapscan=on; | ||||||
|  | SET enable_indexscan=on; | ||||||
|  | EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  |                 QUERY PLAN                  | ||||||
|  | ------------------------------------------- | ||||||
|  |  Aggregate | ||||||
|  |    ->  Bitmap Heap Scan on tst | ||||||
|  |          Recheck Cond: (i = 7) | ||||||
|  |          ->  Bitmap Index Scan on bloomidx | ||||||
|  |                Index Cond: (i = 7) | ||||||
|  | (5 rows) | ||||||
|  |  | ||||||
|  | EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  |                 QUERY PLAN                  | ||||||
|  | ------------------------------------------- | ||||||
|  |  Aggregate | ||||||
|  |    ->  Bitmap Heap Scan on tst | ||||||
|  |          Recheck Cond: (t = '5'::text) | ||||||
|  |          ->  Bitmap Index Scan on bloomidx | ||||||
|  |                Index Cond: (t = '5'::text) | ||||||
|  | (5 rows) | ||||||
|  |  | ||||||
|  | EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |                        QUERY PLAN                         | ||||||
|  | --------------------------------------------------------- | ||||||
|  |  Aggregate | ||||||
|  |    ->  Bitmap Heap Scan on tst | ||||||
|  |          Recheck Cond: ((i = 7) AND (t = '5'::text)) | ||||||
|  |          ->  Bitmap Index Scan on bloomidx | ||||||
|  |                Index Cond: ((i = 7) AND (t = '5'::text)) | ||||||
|  | (5 rows) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |  10000 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |   6264 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |    588 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | DELETE FROM tst; | ||||||
|  | INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i; | ||||||
|  | VACUUM ANALYZE tst; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |  10000 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |   6264 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |    588 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | VACUUM FULL tst; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |  10000 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |   6264 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  count  | ||||||
|  | ------- | ||||||
|  |    588 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | RESET enable_seqscan; | ||||||
|  | RESET enable_bitmapscan; | ||||||
|  | RESET enable_indexscan; | ||||||
							
								
								
									
										47
									
								
								contrib/bloom/sql/bloom.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								contrib/bloom/sql/bloom.sql
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | |||||||
|  | CREATE EXTENSION bloom; | ||||||
|  |  | ||||||
|  | CREATE TABLE tst ( | ||||||
|  | 	i	int4, | ||||||
|  | 	t	text | ||||||
|  | ); | ||||||
|  |  | ||||||
|  | INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i; | ||||||
|  | CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3); | ||||||
|  |  | ||||||
|  | SET enable_seqscan=on; | ||||||
|  | SET enable_bitmapscan=off; | ||||||
|  | SET enable_indexscan=off; | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  | ||||||
|  | SET enable_seqscan=off; | ||||||
|  | SET enable_bitmapscan=on; | ||||||
|  | SET enable_indexscan=on; | ||||||
|  |  | ||||||
|  | EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  | EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  | EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  | ||||||
|  | DELETE FROM tst; | ||||||
|  | INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i; | ||||||
|  | VACUUM ANALYZE tst; | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  | ||||||
|  | VACUUM FULL tst; | ||||||
|  |  | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7; | ||||||
|  | SELECT count(*) FROM tst WHERE t = '5'; | ||||||
|  | SELECT count(*) FROM tst WHERE i = 7 AND t = '5'; | ||||||
|  |  | ||||||
|  | RESET enable_seqscan; | ||||||
|  | RESET enable_bitmapscan; | ||||||
|  | RESET enable_indexscan; | ||||||
							
								
								
									
										75
									
								
								contrib/bloom/t/001_wal.pl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								contrib/bloom/t/001_wal.pl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | |||||||
|  | # Test generic xlog record work for bloom index replication. | ||||||
|  | use strict; | ||||||
|  | use warnings; | ||||||
|  | use PostgresNode; | ||||||
|  | use TestLib; | ||||||
|  | use Test::More tests => 31; | ||||||
|  |  | ||||||
|  | my $node_master; | ||||||
|  | my $node_standby; | ||||||
|  |  | ||||||
|  | # Run few queries on both master and standby and check their results match. | ||||||
|  | sub test_index_replay | ||||||
|  | { | ||||||
|  | 	my ($test_name) = @_; | ||||||
|  |  | ||||||
|  | 	# Wait for standby to catch up | ||||||
|  | 	my $applname = $node_standby->name; | ||||||
|  | 	my $caughtup_query = | ||||||
|  | 		"SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';"; | ||||||
|  | 	$node_master->poll_query_until('postgres', $caughtup_query) | ||||||
|  | 	  or die "Timed out while waiting for standby 1 to catch up"; | ||||||
|  |  | ||||||
|  | 	my $queries = qq(SET enable_seqscan=off; | ||||||
|  | SET enable_bitmapscan=on; | ||||||
|  | SET enable_indexscan=on; | ||||||
|  | SELECT * FROM tst WHERE i = 0; | ||||||
|  | SELECT * FROM tst WHERE i = 3; | ||||||
|  | SELECT * FROM tst WHERE t = 'b'; | ||||||
|  | SELECT * FROM tst WHERE t = 'f'; | ||||||
|  | SELECT * FROM tst WHERE i = 3 AND t = 'c'; | ||||||
|  | SELECT * FROM tst WHERE i = 7 AND t = 'e'; | ||||||
|  | ); | ||||||
|  |  | ||||||
|  | 	# Run test queries and compare their result | ||||||
|  | 	my $master_result = $node_master->psql("postgres", $queries); | ||||||
|  | 	my $standby_result = $node_standby->psql("postgres", $queries); | ||||||
|  |  | ||||||
|  | 	is($master_result, $standby_result, "$test_name: query result matches"); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | # Initialize master node | ||||||
|  | $node_master = get_new_node('master'); | ||||||
|  | $node_master->init(allows_streaming => 1); | ||||||
|  | $node_master->start; | ||||||
|  | my $backup_name = 'my_backup'; | ||||||
|  |  | ||||||
|  | # Take backup | ||||||
|  | $node_master->backup($backup_name); | ||||||
|  |  | ||||||
|  | # Create streaming standby linking to master | ||||||
|  | $node_standby = get_new_node('standby'); | ||||||
|  | $node_standby->init_from_backup($node_master, $backup_name, | ||||||
|  | 	has_streaming => 1); | ||||||
|  | $node_standby->start; | ||||||
|  |  | ||||||
|  | # Create some bloom index on master | ||||||
|  | $node_master->psql("postgres", "CREATE EXTENSION bloom;"); | ||||||
|  | $node_master->psql("postgres", "CREATE TABLE tst (i int4, t text);"); | ||||||
|  | $node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;"); | ||||||
|  | $node_master->psql("postgres", "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);"); | ||||||
|  |  | ||||||
|  | # Test that queries give same result | ||||||
|  | test_index_replay('initial'); | ||||||
|  |  | ||||||
|  | # Run 10 cycles of table modification. Run test queries after each modification. | ||||||
|  | for my $i (1..10) | ||||||
|  | { | ||||||
|  | 	$node_master->psql("postgres", "DELETE FROM tst WHERE i = $i;"); | ||||||
|  | 	test_index_replay("delete $i"); | ||||||
|  | 	$node_master->psql("postgres", "VACUUM tst;"); | ||||||
|  | 	test_index_replay("vacuum $i"); | ||||||
|  | 	my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000); | ||||||
|  | 	$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series($start,$end) i;"); | ||||||
|  | 	test_index_replay("insert $i"); | ||||||
|  | } | ||||||
							
								
								
									
										218
									
								
								doc/src/sgml/bloom.sgml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								doc/src/sgml/bloom.sgml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,218 @@ | |||||||
|  | <!-- doc/src/sgml/bloom.sgml --> | ||||||
|  |  | ||||||
|  | <sect1 id="bloom" xreflabel="bloom"> | ||||||
|  |  <title>bloom</title> | ||||||
|  |  | ||||||
|  |  <indexterm zone="bloom"> | ||||||
|  |   <primary>bloom</primary> | ||||||
|  |  </indexterm> | ||||||
|  |  | ||||||
|  |  <para> | ||||||
|  |   <literal>bloom</> is a contrib which implements index access method.  It comes | ||||||
|  |   as example of custom access methods and generic WAL records usage.  But it | ||||||
|  |   is also useful itself. | ||||||
|  |  </para> | ||||||
|  |  | ||||||
|  |  <sect2> | ||||||
|  |   <title>Introduction</title> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Implementation of | ||||||
|  |    <ulink url="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</ulink> | ||||||
|  |    allows fast exclusion of non-candidate tuples. | ||||||
|  |    Since signature is a lossy representation of all indexed attributes,  | ||||||
|  |    search results should be rechecked using heap information.  | ||||||
|  |    User can specify signature length (in uint16, default is 5) and the number of  | ||||||
|  |    bits, which can be setted, per attribute (1 < colN < 2048). | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    This index is useful if table has many attributes and queries can include | ||||||
|  |    their arbitary combinations.  Traditional <literal>btree</> index is faster | ||||||
|  |    than bloom index, but it'd require too many indexes to support all possible  | ||||||
|  |    queries, while one need only one bloom index.  Bloom index supports only  | ||||||
|  |    equality comparison.  Since it's a signature file, not a tree, it always | ||||||
|  |    should be readed fully, but sequentially, so index search performance is  | ||||||
|  |    constant and doesn't depend on a query.  | ||||||
|  |   </para> | ||||||
|  |  </sect2> | ||||||
|  |  | ||||||
|  |  <sect2> | ||||||
|  |   <title>Parameters</title> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    <literal>bloom</> indexes accept following parameters in <literal>WITH</> | ||||||
|  |    clause. | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  |    <variablelist> | ||||||
|  |    <varlistentry> | ||||||
|  |     <term><literal>length</></term> | ||||||
|  |     <listitem> | ||||||
|  |      <para> | ||||||
|  |       Length of signature in uint16 type values | ||||||
|  |      </para> | ||||||
|  |     </listitem> | ||||||
|  |    </varlistentry> | ||||||
|  |    </variablelist> | ||||||
|  |    <variablelist> | ||||||
|  |    <varlistentry> | ||||||
|  |     <term><literal>col1 — col16</></term> | ||||||
|  |     <listitem> | ||||||
|  |      <para> | ||||||
|  |       Number of bits for corresponding column | ||||||
|  |      </para> | ||||||
|  |     </listitem> | ||||||
|  |    </varlistentry> | ||||||
|  |    </variablelist> | ||||||
|  |  </sect2> | ||||||
|  |  | ||||||
|  |  <sect2> | ||||||
|  |   <title>Examples</title> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Example of index definition is given below. | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  | <programlisting> | ||||||
|  | CREATE INDEX bloomidx ON tbloom(i1,i2,i3)  | ||||||
|  |        WITH (length=5, col1=2, col2=2, col3=4); | ||||||
|  | </programlisting> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Here, we create bloom index with signature length 80 bits and attributes | ||||||
|  |    i1, i2  mapped to 2 bits, attribute i3 - to 4 bits. | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Example of index definition and usage is given below. | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  | <programlisting> | ||||||
|  | CREATE TABLE tbloom AS | ||||||
|  | SELECT | ||||||
|  |     random()::int as i1, | ||||||
|  |     random()::int as i2, | ||||||
|  |     random()::int as i3, | ||||||
|  |     random()::int as i4, | ||||||
|  |     random()::int as i5, | ||||||
|  |     random()::int as i6, | ||||||
|  |     random()::int as i7, | ||||||
|  |     random()::int as i8, | ||||||
|  |     random()::int as i9, | ||||||
|  |     random()::int as i10, | ||||||
|  |     random()::int as i11, | ||||||
|  |     random()::int as i12, | ||||||
|  |     random()::int as i13 | ||||||
|  | FROM | ||||||
|  |     generate_series(1,1000); | ||||||
|  | CREATE INDEX bloomidx ON tbloom USING | ||||||
|  |              bloom (i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12); | ||||||
|  | SELECT pg_relation_size('bloomidx'); | ||||||
|  | CREATE index btree_idx ON tbloom(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12); | ||||||
|  | SELECT pg_relation_size('btree_idx'); | ||||||
|  | </programlisting> | ||||||
|  |  | ||||||
|  | <programlisting> | ||||||
|  | =# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15; | ||||||
|  |                                                    QUERY PLAN | ||||||
|  | ----------------------------------------------------------------------------------------------------------------- | ||||||
|  |  Bitmap Heap Scan on tbloom  (cost=1.50..5.52 rows=1 width=52) (actual time=0.057..0.057 rows=0 loops=1) | ||||||
|  |    Recheck Cond: ((i2 = 20) AND (i10 = 15)) | ||||||
|  |    ->  Bitmap Index Scan on bloomidx  (cost=0.00..1.50 rows=1 width=0) (actual time=0.041..0.041 rows=9 loops=1) | ||||||
|  |          Index Cond: ((i2 = 20) AND (i10 = 15)) | ||||||
|  |  Total runtime: 0.081 ms | ||||||
|  | (5 rows) | ||||||
|  | </programlisting> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Seqscan is slow. | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  | <programlisting> | ||||||
|  | =# SET enable_bitmapscan = off; | ||||||
|  | =# SET enable_indexscan = off; | ||||||
|  | =# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15; | ||||||
|  |                                             QUERY PLAN | ||||||
|  | -------------------------------------------------------------------------------------------------- | ||||||
|  |  Seq Scan on tbloom  (cost=0.00..25.00 rows=1 width=52) (actual time=0.162..0.162 rows=0 loops=1) | ||||||
|  |    Filter: ((i2 = 20) AND (i10 = 15)) | ||||||
|  |  Total runtime: 0.181 ms | ||||||
|  | (3 rows) | ||||||
|  | </programlisting> | ||||||
|  |  | ||||||
|  |  <para> | ||||||
|  |   Btree index will be not used for this query. | ||||||
|  |  </para> | ||||||
|  |  | ||||||
|  | <programlisting> | ||||||
|  | =# DROP INDEX bloomidx; | ||||||
|  | =# CREATE INDEX btree_idx ON tbloom(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12); | ||||||
|  | =# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15; | ||||||
|  |                                             QUERY PLAN | ||||||
|  | -------------------------------------------------------------------------------------------------- | ||||||
|  |  Seq Scan on tbloom (cost=0.00..25.00 rows=1 width=52) (actual time=0.210..0.210 rows=0 loops=1) | ||||||
|  |    Filter: ((i2 = 20) AND (i10 = 15)) | ||||||
|  |  Total runtime: 0.250 ms | ||||||
|  | (3 rows) | ||||||
|  | </programlisting> | ||||||
|  |  </sect2> | ||||||
|  |  | ||||||
|  |  <sect2> | ||||||
|  |   <title>Opclass interface</title> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Bloom opclass interface is simple.  It requires 1 supporting function: | ||||||
|  |    hash function for indexing datatype.  And it provides 1 search operator: | ||||||
|  |    equality operator.  The example below shows <literal>opclass</> definition | ||||||
|  |    for <literal>text</> datatype. | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  | <programlisting> | ||||||
|  | CREATE OPERATOR CLASS text_ops | ||||||
|  | DEFAULT FOR TYPE text USING bloom AS | ||||||
|  |     OPERATOR    1   =(text, text), | ||||||
|  |     FUNCTION    1   hashtext(text); | ||||||
|  | </programlisting> | ||||||
|  |  </sect2> | ||||||
|  |  | ||||||
|  |  <sect2> | ||||||
|  |   <title>Limitation</title> | ||||||
|  |   <para> | ||||||
|  |  | ||||||
|  |    <itemizedlist> | ||||||
|  |     <listitem> | ||||||
|  |      <para> | ||||||
|  |       For now, only opclasses for <literal>int4</>, <literal>text</> comes | ||||||
|  |       with contrib.  However, users may define more of them. | ||||||
|  |      </para> | ||||||
|  |     </listitem> | ||||||
|  |  | ||||||
|  |     <listitem> | ||||||
|  |      <para> | ||||||
|  |       Only <literal>=</literal> operator is supported for search now.  But it's | ||||||
|  |       possible to add support of arrays with contains and intersection | ||||||
|  |       operations in future. | ||||||
|  |      </para> | ||||||
|  |     </listitem> | ||||||
|  |    </itemizedlist> | ||||||
|  |   </para> | ||||||
|  |  </sect2> | ||||||
|  |  | ||||||
|  |  <sect2> | ||||||
|  |   <title>Authors</title> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Teodor Sigaev <email>teodor@postgrespro.ru</email>, Postgres Professional, Moscow, Russia | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Alexander Korotkov <email>a.korotkov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia | ||||||
|  |   </para> | ||||||
|  |  | ||||||
|  |   <para> | ||||||
|  |    Oleg Bartunov <email>obartunov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia | ||||||
|  |   </para> | ||||||
|  |  </sect2> | ||||||
|  |  | ||||||
|  | </sect1> | ||||||
| @@ -105,6 +105,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged; | |||||||
|  &adminpack; |  &adminpack; | ||||||
|  &auth-delay; |  &auth-delay; | ||||||
|  &auto-explain; |  &auto-explain; | ||||||
|  |  &bloom; | ||||||
|  &btree-gin; |  &btree-gin; | ||||||
|  &btree-gist; |  &btree-gist; | ||||||
|  &chkpass; |  &chkpass; | ||||||
|   | |||||||
| @@ -107,6 +107,7 @@ | |||||||
| <!ENTITY adminpack       SYSTEM "adminpack.sgml"> | <!ENTITY adminpack       SYSTEM "adminpack.sgml"> | ||||||
| <!ENTITY auth-delay      SYSTEM "auth-delay.sgml"> | <!ENTITY auth-delay      SYSTEM "auth-delay.sgml"> | ||||||
| <!ENTITY auto-explain    SYSTEM "auto-explain.sgml"> | <!ENTITY auto-explain    SYSTEM "auto-explain.sgml"> | ||||||
|  | <!ENTITY bloom           SYSTEM "bloom.sgml"> | ||||||
| <!ENTITY btree-gin       SYSTEM "btree-gin.sgml"> | <!ENTITY btree-gin       SYSTEM "btree-gin.sgml"> | ||||||
| <!ENTITY btree-gist      SYSTEM "btree-gist.sgml"> | <!ENTITY btree-gist      SYSTEM "btree-gist.sgml"> | ||||||
| <!ENTITY chkpass         SYSTEM "chkpass.sgml"> | <!ENTITY chkpass         SYSTEM "chkpass.sgml"> | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user