1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Bloom index contrib module

Module provides new access method. It is actually a simple Bloom filter
implemented as pgsql's index. It could give some benefits on search
with large number of columns.

Module is a single way to test generic WAL interface committed earlier.

Author: Teodor Sigaev, Alexander Korotkov
Reviewers: Aleksander Alekseev, Michael Paquier, Jim Nasby
This commit is contained in:
Teodor Sigaev
2016-04-01 16:42:24 +03:00
parent 4e56e5a6de
commit 9ee014fc89
18 changed files with 2126 additions and 0 deletions

View File

@ -8,6 +8,7 @@ SUBDIRS = \
adminpack \
auth_delay \
auto_explain \
bloom \
btree_gin \
btree_gist \
chkpass \

4
contrib/bloom/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
# Generated subdirectories
/log/
/results/
/tmp_check/

24
contrib/bloom/Makefile Normal file
View File

@ -0,0 +1,24 @@
# contrib/bloom/Makefile
MODULE_big = bloom
OBJS = blcost.o blinsert.o blscan.o blutils.o blvacuum.o blvalidate.o $(WIN32RES)
EXTENSION = bloom
DATA = bloom--1.0.sql
PGFILEDESC = "bloom access method - signature file based index"
REGRESS = bloom
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = contrib/bloom
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
wal-check: temp-install
$(prove_check)

48
contrib/bloom/blcost.c Normal file
View File

@ -0,0 +1,48 @@
/*-------------------------------------------------------------------------
*
* blcost.c
* Cost estimate function for bloom indexes.
*
* Copyright (c) 2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/bloom/blcost.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "optimizer/cost.h"
#include "utils/selfuncs.h"
#include "bloom.h"
/*
* Estimate cost of bloom index scan.
*/
void
blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation)
{
IndexOptInfo *index = path->indexinfo;
List *qinfos;
GenericCosts costs;
/* Do preliminary analysis of indexquals */
qinfos = deconstruct_indexquals(path);
MemSet(&costs, 0, sizeof(costs));
/* We have to visit all index tuples anyway */
costs.numIndexTuples = index->tuples;
/* Use generic estimate */
genericcostestimate(root, path, loop_count, qinfos, &costs);
*indexStartupCost = costs.indexStartupCost;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
}

313
contrib/bloom/blinsert.c Normal file
View File

@ -0,0 +1,313 @@
/*-------------------------------------------------------------------------
*
* blinsert.c
* Bloom index build and insert functions.
*
* Copyright (c) 2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/bloom/blinsert.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "bloom.h"
PG_MODULE_MAGIC;
/*
* State of bloom index build. We accumulate one page data here before
* flushing it to buffer manager.
*/
typedef struct
{
BloomState blstate; /* bloom index state */
MemoryContext tmpCtx; /* temporary memory context reset after
* each tuple */
char data[BLCKSZ]; /* cached page */
int64 count; /* number of tuples in cached page */
} BloomBuildState;
/*
* Flush page cached in BloomBuildState.
*/
static void
flushCachedPage(Relation index, BloomBuildState *buildstate)
{
Page page;
Buffer buffer = BloomNewBuffer(index);
GenericXLogState *state;
state = GenericXLogStart(index);
page = GenericXLogRegister(state, buffer, true);
memcpy(page, buildstate->data, BLCKSZ);
GenericXLogFinish(state);
UnlockReleaseBuffer(buffer);
}
/*
* (Re)initialize cached page in BloomBuildState.
*/
static void
initCachedPage(BloomBuildState *buildstate)
{
memset(buildstate->data, 0, BLCKSZ);
BloomInitPage(buildstate->data, 0);
buildstate->count = 0;
}
/*
* Per-tuple callback from IndexBuildHeapScan.
*/
static void
bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
bool *isnull, bool tupleIsAlive, void *state)
{
BloomBuildState *buildstate = (BloomBuildState *) state;
MemoryContext oldCtx;
BloomTuple *itup;
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
/* Try to add next item to cached page */
if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
{
/* Next item was added successfully */
buildstate->count++;
}
else
{
/* Cached page is full, flush it out and make a new one */
flushCachedPage(index, buildstate);
CHECK_FOR_INTERRUPTS();
initCachedPage(buildstate);
if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup) == false)
{
/* We shouldn't be here since we're inserting to the empty page */
elog(ERROR, "can not add new tuple");
}
}
MemoryContextSwitchTo(oldCtx);
MemoryContextReset(buildstate->tmpCtx);
}
/*
* Build a new bloom index.
*/
IndexBuildResult *
blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
{
IndexBuildResult *result;
double reltuples;
BloomBuildState buildstate;
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
/* Initialize the meta page */
BloomInitMetapage(index);
/* Initialize the bloom build state */
memset(&buildstate, 0, sizeof(buildstate));
initBloomState(&buildstate.blstate, index);
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
"Bloom build temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
initCachedPage(&buildstate);
/* Do the heap scan */
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
bloomBuildCallback, (void *) &buildstate);
/*
* There are could be some items in cached page. Flush this page
* if needed.
*/
if (buildstate.count > 0)
flushCachedPage(index, &buildstate);
MemoryContextDelete(buildstate.tmpCtx);
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
result->heap_tuples = result->index_tuples = reltuples;
return result;
}
/*
* Build an empty bloom index in the initialization fork.
*/
void
blbuildempty(Relation index)
{
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
/* Initialize the meta page */
BloomInitMetapage(index);
}
/*
* Insert new tuple to the bloom index.
*/
bool
blinsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique)
{
BloomState blstate;
BloomTuple *itup;
MemoryContext oldCtx;
MemoryContext insertCtx;
BloomMetaPageData *metaData;
Buffer buffer,
metaBuffer;
Page page,
metaPage;
BlockNumber blkno = InvalidBlockNumber;
OffsetNumber nStart;
GenericXLogState *state;
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"Bloom insert temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldCtx = MemoryContextSwitchTo(insertCtx);
initBloomState(&blstate, index);
itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
/*
* At first, try to insert new tuple to the first page in notFullPage
* array. If success we don't need to modify the meta page.
*/
metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
if (metaData->nEnd > metaData->nStart)
{
Page page;
blkno = metaData->notFullPage[metaData->nStart];
Assert(blkno != InvalidBlockNumber);
LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
buffer = ReadBuffer(index, blkno);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
state = GenericXLogStart(index);
page = GenericXLogRegister(state, buffer, false);
if (BloomPageAddItem(&blstate, page, itup))
{
GenericXLogFinish(state);
UnlockReleaseBuffer(buffer);
ReleaseBuffer(metaBuffer);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
return false;
}
else
{
GenericXLogAbort(state);
UnlockReleaseBuffer(buffer);
}
}
else
{
/* First page in notFullPage isn't suitable */
LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
}
/*
* Try other pages in notFullPage array. We will have to change nStart in
* metapage. Thus, grab exclusive lock on metapage.
*/
LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
state = GenericXLogStart(index);
metaPage = GenericXLogRegister(state, metaBuffer, false);
metaData = BloomPageGetMeta(metaPage);
/*
* Iterate over notFullPage array. Skip page we already tried first.
*/
nStart = metaData->nStart;
if (metaData->nEnd > nStart &&
blkno == metaData->notFullPage[nStart])
nStart++;
while (metaData->nEnd > nStart)
{
blkno = metaData->notFullPage[nStart];
Assert(blkno != InvalidBlockNumber);
buffer = ReadBuffer(index, blkno);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = GenericXLogRegister(state, buffer, false);
if (BloomPageAddItem(&blstate, page, itup))
{
metaData->nStart = nStart;
GenericXLogFinish(state);
UnlockReleaseBuffer(buffer);
UnlockReleaseBuffer(metaBuffer);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
return false;
}
else
{
GenericXLogUnregister(state, buffer);
UnlockReleaseBuffer(buffer);
}
nStart++;
}
GenericXLogAbort(state);
/*
* Didn't find place to insert in notFullPage array. Allocate new page.
*/
buffer = BloomNewBuffer(index);
state = GenericXLogStart(index);
metaPage = GenericXLogRegister(state, metaBuffer, false);
metaData = BloomPageGetMeta(metaPage);
page = GenericXLogRegister(state, buffer, true);
BloomInitPage(page, 0);
BloomPageAddItem(&blstate, page, itup);
metaData->nStart = 0;
metaData->nEnd = 1;
metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
GenericXLogFinish(state);
UnlockReleaseBuffer(buffer);
UnlockReleaseBuffer(metaBuffer);
return false;
}

View File

@ -0,0 +1,19 @@
CREATE OR REPLACE FUNCTION blhandler(internal)
RETURNS index_am_handler
AS 'MODULE_PATHNAME'
LANGUAGE C;
-- Access method
CREATE ACCESS METHOD bloom TYPE INDEX HANDLER blhandler;
-- Opclasses
CREATE OPERATOR CLASS int4_ops
DEFAULT FOR TYPE int4 USING bloom AS
OPERATOR 1 =(int4, int4),
FUNCTION 1 hashint4(int4);
CREATE OPERATOR CLASS text_ops
DEFAULT FOR TYPE text USING bloom AS
OPERATOR 1 =(text, text),
FUNCTION 1 hashtext(text);

View File

@ -0,0 +1,5 @@
# bloom extension
comment = 'bloom access method - signature file based index'
default_version = '1.0'
module_pathname = '$libdir/bloom'
relocatable = true

178
contrib/bloom/bloom.h Normal file
View File

@ -0,0 +1,178 @@
/*-------------------------------------------------------------------------
*
* bloom.h
* Header for bloom index.
*
* Copyright (c) 2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/bloom/bloom.h
*
*-------------------------------------------------------------------------
*/
#ifndef _BLOOM_H_
#define _BLOOM_H_
#include "access/amapi.h"
#include "access/generic_xlog.h"
#include "access/itup.h"
#include "access/xlog.h"
#include "nodes/relation.h"
#include "fmgr.h"
/* Support procedures numbers */
#define BLOOM_HASH_PROC 1
#define BLOOM_NPROC 1
/* Scan strategies */
#define BLOOM_EQUAL_STRATEGY 1
#define BLOOM_NSTRATEGIES 1
/* Opaque for bloom pages */
typedef struct BloomPageOpaqueData
{
OffsetNumber maxoff;
uint16 flags;
} BloomPageOpaqueData;
typedef BloomPageOpaqueData *BloomPageOpaque;
/* Bloom page flags */
#define BLOOM_META (1<<0)
#define BLOOM_DELETED (2<<0)
/* Macros for accessing bloom page structures */
#define BloomPageGetOpaque(page) ((BloomPageOpaque) PageGetSpecialPointer(page))
#define BloomPageGetMaxOffset(page) (BloomPageGetOpaque(page)->maxoff)
#define BloomPageIsMeta(page) (BloomPageGetOpaque(page)->flags & BLOOM_META)
#define BloomPageIsDeleted(page) (BloomPageGetOpaque(page)->flags & BLOOM_DELETED)
#define BloomPageSetDeleted(page) (BloomPageGetOpaque(page)->flags |= BLOOM_DELETED)
#define BloomPageSetNonDeleted(page) (BloomPageGetOpaque(page)->flags &= ~BLOOM_DELETED)
#define BloomPageGetData(page) ((BloomTuple *)PageGetContents(page))
#define BloomPageGetTuple(state, page, offset) \
((BloomTuple *)(PageGetContents(page) \
+ (state)->sizeOfBloomTuple * ((offset) - 1)))
#define BloomPageGetNextTuple(state, tuple) \
((BloomTuple *)((Pointer)(tuple) + (state)->sizeOfBloomTuple))
/* Preserved page numbers */
#define BLOOM_METAPAGE_BLKNO (0)
#define BLOOM_HEAD_BLKNO (1) /* first data page */
/* Bloom index options */
typedef struct BloomOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int bloomLength; /* length of signature in uint16 */
int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
* key */
} BloomOptions;
/*
* FreeBlockNumberArray - array of block numbers sized so that metadata fill
* all space in metapage.
*/
typedef BlockNumber FreeBlockNumberArray[
MAXALIGN_DOWN(
BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(BloomPageOpaqueData))
- MAXALIGN(sizeof(uint16) * 2 + sizeof(uint32) + sizeof(BloomOptions))
) / sizeof(BlockNumber)
];
/* Metadata of bloom index */
typedef struct BloomMetaPageData
{
uint32 magickNumber;
uint16 nStart;
uint16 nEnd;
BloomOptions opts;
FreeBlockNumberArray notFullPage;
} BloomMetaPageData;
/* Magic number to distinguish bloom pages among anothers */
#define BLOOM_MAGICK_NUMBER (0xDBAC0DED)
/* Number of blocks numbers fit in BloomMetaPageData */
#define BloomMetaBlockN (sizeof(FreeBlockNumberArray) / sizeof(BlockNumber))
#define BloomPageGetMeta(page) ((BloomMetaPageData *) PageGetContents(page))
typedef struct BloomState
{
FmgrInfo hashFn[INDEX_MAX_KEYS];
BloomOptions *opts; /* stored in rd_amcache and defined at
* creation time */
int32 nColumns;
/*
* sizeOfBloomTuple is index's specific, and it depends on reloptions, so
* precompute it
*/
int32 sizeOfBloomTuple;
} BloomState;
#define BloomPageGetFreeSpace(state, page) \
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
- BloomPageGetMaxOffset(page) * (state)->sizeOfBloomTuple \
- MAXALIGN(sizeof(BloomPageOpaqueData)))
/*
* Tuples are very different from all other relations
*/
typedef uint16 SignType;
typedef struct BloomTuple
{
ItemPointerData heapPtr;
SignType sign[1];
} BloomTuple;
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
/* Opaque data structure for bloom index scan */
typedef struct BloomScanOpaqueData
{
SignType *sign; /* Scan signature */
BloomState state;
} BloomScanOpaqueData;
typedef BloomScanOpaqueData *BloomScanOpaque;
/* blutils.c */
extern void _PG_init(void);
extern Datum blhandler(PG_FUNCTION_ARGS);
extern void initBloomState(BloomState * state, Relation index);
extern void BloomInitMetapage(Relation index);
extern void BloomInitPage(Page page, uint16 flags);
extern Buffer BloomNewBuffer(Relation index);
extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
/* blvalidate.c */
extern bool blvalidate(Oid opclassoid);
/* index access method interface functions */
extern bool blinsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique);
extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys);
extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
extern void blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
ScanKey orderbys, int norderbys);
extern void blendscan(IndexScanDesc scan);
extern IndexBuildResult *blbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void blbuildempty(Relation index);
extern IndexBulkDeleteResult *blbulkdelete(IndexVacuumInfo *info,
IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback,
void *callback_state);
extern IndexBulkDeleteResult *blvacuumcleanup(IndexVacuumInfo *info,
IndexBulkDeleteResult *stats);
extern bytea *bloptions(Datum reloptions, bool validate);
extern void blcostestimate(PlannerInfo *root, IndexPath *path,
double loop_count, Cost *indexStartupCost,
Cost *indexTotalCost, Selectivity *indexSelectivity,
double *indexCorrelation);
#endif

175
contrib/bloom/blscan.c Normal file
View File

@ -0,0 +1,175 @@
/*-------------------------------------------------------------------------
*
* blscan.c
* Bloom index scan functions.
*
* Copyright (c) 2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/bloom/blscan.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/relscan.h"
#include "pgstat.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "bloom.h"
/*
* Begin scan of bloom index.
*/
IndexScanDesc
blbeginscan(Relation r, int nkeys, int norderbys)
{
IndexScanDesc scan;
scan = RelationGetIndexScan(r, nkeys, norderbys);
return scan;
}
/*
* Rescan a bloom index.
*/
void
blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
ScanKey orderbys, int norderbys)
{
BloomScanOpaque so;
so = (BloomScanOpaque) scan->opaque;
if (so == NULL)
{
/* if called from blbeginscan */
so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData));
initBloomState(&so->state, scan->indexRelation);
scan->opaque = so;
}
else
{
if (so->sign)
pfree(so->sign);
}
so->sign = NULL;
if (scankey && scan->numberOfKeys > 0)
{
memmove(scan->keyData, scankey,
scan->numberOfKeys * sizeof(ScanKeyData));
}
}
/*
* End scan of bloom index.
*/
void
blendscan(IndexScanDesc scan)
{
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
if (so->sign)
pfree(so->sign);
so->sign = NULL;
}
/*
* Insert all matching tuples into to a bitmap.
*/
int64
blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
{
int64 ntids = 0;
BlockNumber blkno = BLOOM_HEAD_BLKNO,
npages;
int i;
BufferAccessStrategy bas;
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
if (so->sign == NULL && scan->numberOfKeys > 0)
{
/* New search: have to calculate search signature */
ScanKey skey = scan->keyData;
so->sign = palloc0(sizeof(SignType) * so->state.opts->bloomLength);
for (i = 0; i < scan->numberOfKeys; i++)
{
/*
* Assume bloom-indexable operators to be strict, so nothing could
* be found for NULL key.
*/
if (skey->sk_flags & SK_ISNULL)
{
pfree(so->sign);
so->sign = NULL;
return 0;
}
/* Add next value to the signature */
signValue(&so->state, so->sign, skey->sk_argument,
skey->sk_attno - 1);
skey++;
}
}
/*
* We're going to read the whole index. This is why we use appropriate
* buffer access strategy.
*/
bas = GetAccessStrategy(BAS_BULKREAD);
npages = RelationGetNumberOfBlocks(scan->indexRelation);
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
{
Buffer buffer;
Page page;
buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
blkno, RBM_NORMAL, bas);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
if (!BloomPageIsDeleted(page))
{
OffsetNumber offset,
maxOffset = BloomPageGetMaxOffset(page);
for (offset = 1; offset <= maxOffset; offset++)
{
BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset);
bool res = true;
/* Check index signature with scan signature */
for (i = 0; res && i < so->state.opts->bloomLength; i++)
{
if ((itup->sign[i] & so->sign[i]) != so->sign[i])
res = false;
}
/* Add matching tuples to bitmap */
if (res)
{
tbm_add_tuples(tbm, &itup->heapPtr, 1, true);
ntids++;
}
}
}
UnlockReleaseBuffer(buffer);
CHECK_FOR_INTERRUPTS();
}
FreeAccessStrategy(bas);
return ntids;
}

463
contrib/bloom/blutils.c Normal file
View File

@ -0,0 +1,463 @@
/*-------------------------------------------------------------------------
*
* blutils.c
* Bloom index utilities.
*
* Portions Copyright (c) 2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1990-1993, Regents of the University of California
*
* IDENTIFICATION
* contrib/bloom/blutils.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/amapi.h"
#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "storage/lmgr.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
#include "access/reloptions.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "bloom.h"
/* Signature dealing macros */
#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
PG_FUNCTION_INFO_V1(blhandler);
/* Kind of relation optioms for bloom index */
static relopt_kind bl_relopt_kind;
static int32 myRand();
static void mySrand(uint32 seed);
/*
* Module initialize function: initilized relation options.
*/
void
_PG_init(void)
{
int i;
char buf[16];
bl_relopt_kind = add_reloption_kind();
add_int_reloption(bl_relopt_kind, "length",
"Length of signature in uint16 type", 5, 1, 256);
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
snprintf(buf, 16, "col%d", i + 1);
add_int_reloption(bl_relopt_kind, buf,
"Number of bits for corresponding column", 2, 1, 2048);
}
}
/*
* Bloom handler function: return IndexAmRoutine with access method parameters
* and callbacks.
*/
Datum
blhandler(PG_FUNCTION_ARGS)
{
IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
amroutine->amstrategies = 1;
amroutine->amsupport = 1;
amroutine->amcanorder = false;
amroutine->amcanorderbyop = false;
amroutine->amcanbackward = false;
amroutine->amcanunique = false;
amroutine->amcanmulticol = true;
amroutine->amoptionalkey = true;
amroutine->amsearcharray = false;
amroutine->amsearchnulls = false;
amroutine->amstorage = false;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
amroutine->amkeytype = 0;
amroutine->aminsert = blinsert;
amroutine->ambeginscan = blbeginscan;
amroutine->amgettuple = NULL;
amroutine->amgetbitmap = blgetbitmap;
amroutine->amrescan = blrescan;
amroutine->amendscan = blendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
amroutine->ambuild = blbuild;
amroutine->ambuildempty = blbuildempty;
amroutine->ambulkdelete = blbulkdelete;
amroutine->amvacuumcleanup = blvacuumcleanup;
amroutine->amcanreturn = NULL;
amroutine->amcostestimate = blcostestimate;
amroutine->amoptions = bloptions;
amroutine->amvalidate = blvalidate;
PG_RETURN_POINTER(amroutine);
}
/*
* Fill BloomState structure for particular index.
*/
void
initBloomState(BloomState *state, Relation index)
{
int i;
state->nColumns = index->rd_att->natts;
/* Initialize hash function for each attribute */
for (i = 0; i < index->rd_att->natts; i++)
{
fmgr_info_copy(&(state->hashFn[i]),
index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
CurrentMemoryContext);
}
/* Initialize amcache if needed with options from metapage */
if (!index->rd_amcache)
{
Buffer buffer;
Page page;
BloomMetaPageData *meta;
BloomOptions *opts;
opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
if (!BloomPageIsMeta(page))
elog(ERROR, "Relation is not a bloom index");
meta = BloomPageGetMeta(BufferGetPage(buffer));
if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
elog(ERROR, "Relation is not a bloom index");
*opts = meta->opts;
UnlockReleaseBuffer(buffer);
index->rd_amcache = (void *) opts;
}
state->opts = (BloomOptions *) index->rd_amcache;
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
sizeof(SignType) * state->opts->bloomLength;
}
/*
* Random generator copied from FreeBSD. Using own random generator here for
* two reasons:
*
* 1) In this case random numbers are used for on-disk storage. Usage of
* PostgreSQL number generator would obstruct it from all possible changes.
* 2) Changing seed of PostgreSQL random generator would be undesirable side
* effect.
*/
static int32 next;
static int32
myRand()
{
/*
* Compute x = (7^5 * x) mod (2^31 - 1)
* without overflowing 31 bits:
* (2^31 - 1) = 127773 * (7^5) + 2836
* From "Random number generators: good ones are hard to find",
* Park and Miller, Communications of the ACM, vol. 31, no. 10,
* October 1988, p. 1195.
*/
int32 hi, lo, x;
/* Must be in [1, 0x7ffffffe] range at this point. */
hi = next / 127773;
lo = next % 127773;
x = 16807 * lo - 2836 * hi;
if (x < 0)
x += 0x7fffffff;
next = x;
/* Transform to [0, 0x7ffffffd] range. */
return (x - 1);
}
void
mySrand(uint32 seed)
{
next = seed;
/* Transform to [1, 0x7ffffffe] range. */
next = (next % 0x7ffffffe) + 1;
}
/*
* Add bits of given value to the signature.
*/
void
signValue(BloomState *state, SignType *sign, Datum value, int attno)
{
uint32 hashVal;
int nBit,
j;
/*
* init generator with "column's" number to get "hashed" seed for new
* value. We don't want to map the same numbers from different columns
* into the same bits!
*/
mySrand(attno);
/*
* Init hash sequence to map our value into bits. the same values in
* different columns will be mapped into different bits because of step
* above
*/
hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
mySrand(hashVal ^ myRand());
for (j = 0; j < state->opts->bitSize[attno]; j++)
{
/* prevent mutiple evaluation */
nBit = myRand() % (state->opts->bloomLength * BITSIGNTYPE);
SETBIT(sign, nBit);
}
}
/*
* Make bloom tuple from values.
*/
BloomTuple *
BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
{
int i;
BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
res->heapPtr = *iptr;
/* Blooming each column */
for (i = 0; i < state->nColumns; i++)
{
/* skip nulls */
if (isnull[i])
continue;
signValue(state, res->sign, values[i], i);
}
return res;
}
/*
* Add new bloom tuple to the page. Returns true if new tuple was successfully
* added to the page. Returns false if it doesn't git the page.
*/
bool
BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
{
BloomTuple *itup;
BloomPageOpaque opaque;
Pointer ptr;
/* Does new tuple fit the page */
if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
return false;
/* Copy new tuple to the end of page */
opaque = BloomPageGetOpaque(page);
itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
/* Adjust maxoff and pd_lower */
opaque->maxoff++;
ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
((PageHeader) page)->pd_lower = ptr - page;
return true;
}
/*
* Allocate a new page (either by recycling, or by extending the index file)
* The returned buffer is already pinned and exclusive-locked
* Caller is responsible for initializing the page by calling BloomInitBuffer
*/
Buffer
BloomNewBuffer(Relation index)
{
Buffer buffer;
bool needLock;
/* First, try to get a page from FSM */
for (;;)
{
BlockNumber blkno = GetFreeIndexPage(index);
if (blkno == InvalidBlockNumber)
break;
buffer = ReadBuffer(index, blkno);
/*
* We have to guard against the possibility that someone else already
* recycled this page; the buffer may be locked if so.
*/
if (ConditionalLockBuffer(buffer))
{
Page page = BufferGetPage(buffer);
if (PageIsNew(page))
return buffer; /* OK to use, if never initialized */
if (BloomPageIsDeleted(page))
return buffer; /* OK to use */
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
}
/* Can't use it, so release buffer and try again */
ReleaseBuffer(buffer);
}
/* Must extend the file */
needLock = !RELATION_IS_LOCAL(index);
if (needLock)
LockRelationForExtension(index, ExclusiveLock);
buffer = ReadBuffer(index, P_NEW);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (needLock)
UnlockRelationForExtension(index, ExclusiveLock);
return buffer;
}
/*
* Initialize bloom page.
*/
void
BloomInitPage(Page page, uint16 flags)
{
BloomPageOpaque opaque;
PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
opaque = BloomPageGetOpaque(page);
memset(opaque, 0, sizeof(BloomPageOpaqueData));
opaque->flags = flags;
}
/*
* Adjust options of bloom index.
*/
static void
adjustBloomOptions(BloomOptions *opts)
{
int i;
/* Default length of bloom filter is 5 of 16-bit integers */
if (opts->bloomLength <= 0)
opts->bloomLength = 5;
else
opts->bloomLength = opts->bloomLength;
/* Check singnature length */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
/*
* Zero and negative number of bits is meaningless. Also setting
* more bits than signature have seems useless. Replace both cases
* with 2 bits default.
*/
if (opts->bitSize[i] <= 0
|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType))
opts->bitSize[i] = 2;
}
}
/*
* Initialize metapage for bloom index.
*/
void
BloomInitMetapage(Relation index)
{
Page metaPage;
Buffer metaBuffer;
BloomMetaPageData *metadata;
GenericXLogState *state;
/*
* Make a new buffer, since it first buffer it should be associated with
* block number 0 (BLOOM_METAPAGE_BLKNO).
*/
metaBuffer = BloomNewBuffer(index);
Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
/* Initialize bloom index options */
if (!index->rd_options)
index->rd_options = palloc0(sizeof(BloomOptions));
adjustBloomOptions((BloomOptions *) index->rd_options);
/* Initialize contents of meta page */
state = GenericXLogStart(index);
metaPage = GenericXLogRegister(state, metaBuffer, true);
BloomInitPage(metaPage, BLOOM_META);
metadata = BloomPageGetMeta(metaPage);
memset(metadata, 0, sizeof(BloomMetaPageData));
metadata->magickNumber = BLOOM_MAGICK_NUMBER;
metadata->opts = *((BloomOptions *) index->rd_options);
((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
GenericXLogFinish(state);
UnlockReleaseBuffer(metaBuffer);
}
/*
* Initialize options for bloom index.
*/
bytea *
bloptions(Datum reloptions, bool validate)
{
relopt_value *options;
int numoptions;
BloomOptions *rdopts;
relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
int i;
char buf[16];
/* Option for length of signature */
tab[0].optname = "length";
tab[0].opttype = RELOPT_TYPE_INT;
tab[0].offset = offsetof(BloomOptions, bloomLength);
/* Number of bits for each of possible columns: col1, col2, ... */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
snprintf(buf, sizeof(buf), "col%d", i + 1);
tab[i + 1].optname = pstrdup(buf);
tab[i + 1].opttype = RELOPT_TYPE_INT;
tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
}
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
validate, tab, INDEX_MAX_KEYS + 1);
adjustBloomOptions(rdopts);
return (bytea *) rdopts;
}

212
contrib/bloom/blvacuum.c Normal file
View File

@ -0,0 +1,212 @@
/*-------------------------------------------------------------------------
*
* blvacuum.c
* Bloom VACUUM functions.
*
* Copyright (c) 2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/bloom/blvacuum.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "catalog/storage.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "bloom.h"
/*
* Bulk deletion of all index entries pointing to a set of heap tuples.
* The set of target tuples is specified via a callback routine that tells
* whether any given heap tuple (identified by ItemPointer) is being deleted.
*
* Result: a palloc'd struct containing statistical info for VACUUM displays.
*/
IndexBulkDeleteResult *
blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state)
{
Relation index = info->index;
BlockNumber blkno,
npages;
FreeBlockNumberArray notFullPage;
int countPage = 0;
BloomState state;
Buffer buffer;
Page page;
GenericXLogState *gxlogState;
if (stats == NULL)
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
initBloomState(&state, index);
/*
* Interate over the pages. We don't care about concurrently added pages,
* they can't contain tuples to delete.
*/
npages = RelationGetNumberOfBlocks(index);
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
{
BloomTuple *itup,
*itupPtr,
*itupEnd;
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
RBM_NORMAL, info->strategy);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
gxlogState = GenericXLogStart(index);
page = GenericXLogRegister(gxlogState, buffer, false);
if (BloomPageIsDeleted(page))
{
UnlockReleaseBuffer(buffer);
CHECK_FOR_INTERRUPTS();
continue;
}
/* Iterate over the tuples */
itup = BloomPageGetTuple(&state, page, 1);
itupPtr = BloomPageGetTuple(&state, page, 1);
itupEnd = BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1);
while (itup < itupEnd)
{
/* Do we have to delete this tuple? */
if (callback(&itup->heapPtr, callback_state))
{
stats->tuples_removed += 1;
BloomPageGetOpaque(page)->maxoff--;
}
else
{
if (itupPtr != itup)
{
/*
* If we already delete something before, we have to move
* this tuple backward.
*/
memmove((Pointer) itupPtr, (Pointer) itup,
state.sizeOfBloomTuple);
}
stats->num_index_tuples++;
itupPtr = BloomPageGetNextTuple(&state, itupPtr);
}
itup = BloomPageGetNextTuple(&state, itup);
}
Assert(itupPtr == BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1));
if (!BloomPageIsDeleted(page) &&
BloomPageGetFreeSpace(&state, page) > state.sizeOfBloomTuple &&
countPage < BloomMetaBlockN)
notFullPage[countPage++] = blkno;
/* Did we delete something? */
if (itupPtr != itup)
{
/* Is it empty page now? */
if (itupPtr == BloomPageGetData(page))
BloomPageSetDeleted(page);
/* Adjust pg_lower */
((PageHeader) page)->pd_lower = (Pointer) itupPtr - page;
/* Finish WAL-logging */
GenericXLogFinish(gxlogState);
}
else
{
/* Didn't change anything: abort WAL-logging */
GenericXLogAbort(gxlogState);
}
UnlockReleaseBuffer(buffer);
CHECK_FOR_INTERRUPTS();
}
if (countPage > 0)
{
BloomMetaPageData *metaData;
buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
gxlogState = GenericXLogStart(index);
page = GenericXLogRegister(gxlogState, buffer, false);
metaData = BloomPageGetMeta(page);
memcpy(metaData->notFullPage, notFullPage, sizeof(FreeBlockNumberArray));
metaData->nStart = 0;
metaData->nEnd = countPage;
GenericXLogFinish(gxlogState);
UnlockReleaseBuffer(buffer);
}
return stats;
}
/*
* Post-VACUUM cleanup.
*
* Result: a palloc'd struct containing statistical info for VACUUM displays.
*/
IndexBulkDeleteResult *
blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
{
Relation index = info->index;
BlockNumber npages,
blkno;
BlockNumber totFreePages;
if (info->analyze_only)
return stats;
if (stats == NULL)
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
/*
* Iterate over the pages: insert deleted pages into FSM and collect
* statistics.
*/
npages = RelationGetNumberOfBlocks(index);
totFreePages = 0;
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
{
Buffer buffer;
Page page;
vacuum_delay_point();
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
RBM_NORMAL, info->strategy);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = (Page) BufferGetPage(buffer);
if (BloomPageIsDeleted(page))
{
RecordFreeIndexPage(index, blkno);
totFreePages++;
}
else
{
stats->num_index_tuples += BloomPageGetMaxOffset(page);
stats->estimated_count += BloomPageGetMaxOffset(page);
}
UnlockReleaseBuffer(buffer);
}
IndexFreeSpaceMapVacuum(info->index);
stats->pages_free = totFreePages;
stats->num_pages = RelationGetNumberOfBlocks(index);
return stats;
}

220
contrib/bloom/blvalidate.c Normal file
View File

@ -0,0 +1,220 @@
/*-------------------------------------------------------------------------
*
* blvalidate.c
* Opclass validator for bloom.
*
* Copyright (c) 2016, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/bloom/blvalidate.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/amvalidate.h"
#include "access/htup_details.h"
#include "catalog/pg_amop.h"
#include "catalog/pg_amproc.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "bloom.h"
/*
* Validator for a bloom opclass.
*/
bool
blvalidate(Oid opclassoid)
{
bool result = true;
HeapTuple classtup;
Form_pg_opclass classform;
Oid opfamilyoid;
Oid opcintype;
Oid opckeytype;
char *opclassname;
HeapTuple familytup;
Form_pg_opfamily familyform;
char *opfamilyname;
CatCList *proclist,
*oprlist;
List *grouplist;
OpFamilyOpFuncGroup *opclassgroup;
int i;
ListCell *lc;
/* Fetch opclass information */
classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
if (!HeapTupleIsValid(classtup))
elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
classform = (Form_pg_opclass) GETSTRUCT(classtup);
opfamilyoid = classform->opcfamily;
opcintype = classform->opcintype;
opckeytype = classform->opckeytype;
if (!OidIsValid(opckeytype))
opckeytype = opcintype;
opclassname = NameStr(classform->opcname);
/* Fetch opfamily information */
familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
if (!HeapTupleIsValid(familytup))
elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
opfamilyname = NameStr(familyform->opfname);
/* Fetch all operators and support functions of the opfamily */
oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
/* Check individual support functions */
for (i = 0; i < proclist->n_members; i++)
{
HeapTuple proctup = &proclist->members[i]->tuple;
Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
bool ok;
/*
* All bloom support functions should be registered with matching
* left/right types
*/
if (procform->amproclefttype != procform->amprocrighttype)
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opfamily %s contains support procedure %s with cross-type registration",
opfamilyname,
format_procedure(procform->amproc))));
result = false;
}
/*
* We can't check signatures except within the specific opclass, since
* we need to know the associated opckeytype in many cases.
*/
if (procform->amproclefttype != opcintype)
continue;
/* Check procedure numbers and function signatures */
switch (procform->amprocnum)
{
case BLOOM_HASH_PROC:
ok = check_amproc_signature(procform->amproc, INT4OID, false,
1, 1, opckeytype);
break;
default:
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opfamily %s contains function %s with invalid support number %d",
opfamilyname,
format_procedure(procform->amproc),
procform->amprocnum)));
result = false;
continue; /* don't want additional message */
}
if (!ok)
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("gist opfamily %s contains function %s with wrong signature for support number %d",
opfamilyname,
format_procedure(procform->amproc),
procform->amprocnum)));
result = false;
}
}
/* Check individual operators */
for (i = 0; i < oprlist->n_members; i++)
{
HeapTuple oprtup = &oprlist->members[i]->tuple;
Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
/* Check it's allowed strategy for bloom */
if (oprform->amopstrategy < 1 ||
oprform->amopstrategy > BLOOM_NSTRATEGIES)
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opfamily %s contains operator %s with invalid strategy number %d",
opfamilyname,
format_operator(oprform->amopopr),
oprform->amopstrategy)));
result = false;
}
/* bloom doesn't support ORDER BY operators */
if (oprform->amoppurpose != AMOP_SEARCH ||
OidIsValid(oprform->amopsortfamily))
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opfamily %s contains invalid ORDER BY specification for operator %s",
opfamilyname,
format_operator(oprform->amopopr))));
result = false;
}
/* Check operator signature --- same for all bloom strategies */
if (!check_amop_signature(oprform->amopopr, BOOLOID,
oprform->amoplefttype,
oprform->amoprighttype))
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opfamily %s contains operator %s with wrong signature",
opfamilyname,
format_operator(oprform->amopopr))));
result = false;
}
}
/* Now check for inconsistent groups of operators/functions */
grouplist = identify_opfamily_groups(oprlist, proclist);
opclassgroup = NULL;
foreach(lc, grouplist)
{
OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
/* Remember the group exactly matching the test opclass */
if (thisgroup->lefttype == opcintype &&
thisgroup->righttype == opcintype)
opclassgroup = thisgroup;
/*
* There is not a lot we can do to check the operator sets, since each
* bloom opclass is more or less a law unto itself, and some contain
* only operators that are binary-compatible with the opclass datatype
* (meaning that empty operator sets can be OK). That case also means
* that we shouldn't insist on nonempty function sets except for the
* opclass's own group.
*/
}
/* Check that the originally-named opclass is complete */
for (i = 1; i <= BLOOM_NPROC; i++)
{
if (opclassgroup &&
(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
continue; /* got it */
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opclass %s is missing support function %d",
opclassname, i)));
result = false;
}
ReleaseCatCacheList(proclist);
ReleaseCatCacheList(oprlist);
ReleaseSysCache(familytup);
ReleaseSysCache(classtup);
return result;
}

View File

@ -0,0 +1,122 @@
CREATE EXTENSION bloom;
CREATE TABLE tst (
i int4,
t text
);
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
SET enable_seqscan=on;
SET enable_bitmapscan=off;
SET enable_indexscan=off;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
10000
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
6264
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
588
(1 row)
SET enable_seqscan=off;
SET enable_bitmapscan=on;
SET enable_indexscan=on;
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
QUERY PLAN
-------------------------------------------
Aggregate
-> Bitmap Heap Scan on tst
Recheck Cond: (i = 7)
-> Bitmap Index Scan on bloomidx
Index Cond: (i = 7)
(5 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
QUERY PLAN
-------------------------------------------
Aggregate
-> Bitmap Heap Scan on tst
Recheck Cond: (t = '5'::text)
-> Bitmap Index Scan on bloomidx
Index Cond: (t = '5'::text)
(5 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
QUERY PLAN
---------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on tst
Recheck Cond: ((i = 7) AND (t = '5'::text))
-> Bitmap Index Scan on bloomidx
Index Cond: ((i = 7) AND (t = '5'::text))
(5 rows)
SELECT count(*) FROM tst WHERE i = 7;
count
-------
10000
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
6264
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
588
(1 row)
DELETE FROM tst;
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
VACUUM ANALYZE tst;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
10000
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
6264
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
588
(1 row)
VACUUM FULL tst;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
10000
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
6264
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
588
(1 row)
RESET enable_seqscan;
RESET enable_bitmapscan;
RESET enable_indexscan;

View File

@ -0,0 +1,47 @@
CREATE EXTENSION bloom;
CREATE TABLE tst (
i int4,
t text
);
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
SET enable_seqscan=on;
SET enable_bitmapscan=off;
SET enable_indexscan=off;
SELECT count(*) FROM tst WHERE i = 7;
SELECT count(*) FROM tst WHERE t = '5';
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
SET enable_seqscan=off;
SET enable_bitmapscan=on;
SET enable_indexscan=on;
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
SELECT count(*) FROM tst WHERE i = 7;
SELECT count(*) FROM tst WHERE t = '5';
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
DELETE FROM tst;
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
VACUUM ANALYZE tst;
SELECT count(*) FROM tst WHERE i = 7;
SELECT count(*) FROM tst WHERE t = '5';
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
VACUUM FULL tst;
SELECT count(*) FROM tst WHERE i = 7;
SELECT count(*) FROM tst WHERE t = '5';
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
RESET enable_seqscan;
RESET enable_bitmapscan;
RESET enable_indexscan;

View File

@ -0,0 +1,75 @@
# Test generic xlog record work for bloom index replication.
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 31;
my $node_master;
my $node_standby;
# Run few queries on both master and standby and check their results match.
sub test_index_replay
{
my ($test_name) = @_;
# Wait for standby to catch up
my $applname = $node_standby->name;
my $caughtup_query =
"SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';";
$node_master->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby 1 to catch up";
my $queries = qq(SET enable_seqscan=off;
SET enable_bitmapscan=on;
SET enable_indexscan=on;
SELECT * FROM tst WHERE i = 0;
SELECT * FROM tst WHERE i = 3;
SELECT * FROM tst WHERE t = 'b';
SELECT * FROM tst WHERE t = 'f';
SELECT * FROM tst WHERE i = 3 AND t = 'c';
SELECT * FROM tst WHERE i = 7 AND t = 'e';
);
# Run test queries and compare their result
my $master_result = $node_master->psql("postgres", $queries);
my $standby_result = $node_standby->psql("postgres", $queries);
is($master_result, $standby_result, "$test_name: query result matches");
}
# Initialize master node
$node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
my $backup_name = 'my_backup';
# Take backup
$node_master->backup($backup_name);
# Create streaming standby linking to master
$node_standby = get_new_node('standby');
$node_standby->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby->start;
# Create some bloom index on master
$node_master->psql("postgres", "CREATE EXTENSION bloom;");
$node_master->psql("postgres", "CREATE TABLE tst (i int4, t text);");
$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;");
$node_master->psql("postgres", "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);");
# Test that queries give same result
test_index_replay('initial');
# Run 10 cycles of table modification. Run test queries after each modification.
for my $i (1..10)
{
$node_master->psql("postgres", "DELETE FROM tst WHERE i = $i;");
test_index_replay("delete $i");
$node_master->psql("postgres", "VACUUM tst;");
test_index_replay("vacuum $i");
my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000);
$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series($start,$end) i;");
test_index_replay("insert $i");
}