mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Speed up "rare & frequent" type GIN queries.
If you have a GIN query like "rare & frequent", we currently fetch all the items that match either rare or frequent, call the consistent function for each item, and let the consistent function filter out items that only match one of the terms. However, if we can deduce that "rare" must be present for the overall qual to be true, we can scan all the rare items, and for each rare item, skip over to the next frequent item with the same or greater TID. That greatly speeds up "rare & frequent" type queries. To implement that, introduce the concept of a tri-state consistent function, where the 3rd value is MAYBE, indicating that we don't know if that term is present. Operator classes only provide a boolean consistent function, so we simulate the tri-state consistent function by calling the boolean function several times, with the MAYBE arguments set to all combinations of TRUE and FALSE. Testing all combinations is only feasible for a small number of MAYBE arguments, but it is envisioned that we'll provide a way for operator classes to provide a native tri-state consistent function, which can be much more efficient. But that is not included in this patch. We were already using that trick to for lossy pages, calling the consistent function with the lossy entry set to TRUE and FALSE. Now that we have the tri-state consistent function, use it for lossy pages too. Alexander Korotkov, with fair amount of refactoring by me.
This commit is contained in:
180
src/backend/access/gin/ginlogic.c
Normal file
180
src/backend/access/gin/ginlogic.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginlogic.c
|
||||
* routines for performing binary- and ternary-logic consistent checks.
|
||||
*
|
||||
* A GIN operator class provides a consistent function which checks if a
|
||||
* tuple matches a qual, when the given set of keys are present in the tuple.
|
||||
* The consistent function is passed a TRUE/FALSE argument for every key,
|
||||
* indicating if that key is present, and it returns TRUE or FALSE. However,
|
||||
* a GIN scan can apply various optimizations, if it can determine that an
|
||||
* item matches or doesn't match, even if it doesn't know if some of the keys
|
||||
* are present or not. Hence, it's useful to have a ternary-logic consistent
|
||||
* function, where where each key can be TRUE (present), FALSE (not present),
|
||||
* or MAYBE (don't know if present). This file provides such a ternary-logic
|
||||
* consistent function, implemented by calling the regular boolean consistent
|
||||
* function many times, with all the MAYBE arguments set to all combinations
|
||||
* of TRUE and FALSE.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/access/gin/ginlogic.c
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gin_private.h"
|
||||
#include "access/reloptions.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/indexfsm.h"
|
||||
#include "storage/lmgr.h"
|
||||
|
||||
|
||||
/*
|
||||
* Maximum number of MAYBE inputs that shimTriConsistentFn will try to
|
||||
* resolve by calling all combinations.
|
||||
*/
|
||||
#define MAX_MAYBE_ENTRIES 4
|
||||
|
||||
/*
|
||||
* A dummy consistent function for an EVERYTHING key. Just claim it matches.
|
||||
*/
|
||||
static bool
|
||||
trueConsistentFn(GinScanKey key)
|
||||
{
|
||||
key->recheckCurItem = false;
|
||||
return true;
|
||||
}
|
||||
static GinLogicValue
|
||||
trueTriConsistentFn(GinScanKey key)
|
||||
{
|
||||
return GIN_MAYBE;
|
||||
}
|
||||
|
||||
/*
|
||||
* A helper function for calling a regular, binary logic, consistent function.
|
||||
*/
|
||||
static bool
|
||||
normalBoolConsistentFn(GinScanKey key)
|
||||
{
|
||||
/*
|
||||
* Initialize recheckCurItem in case the consistentFn doesn't know it
|
||||
* should set it. The safe assumption in that case is to force recheck.
|
||||
*/
|
||||
key->recheckCurItem = true;
|
||||
|
||||
return DatumGetBool(FunctionCall8Coll(key->consistentFmgrInfo,
|
||||
key->collation,
|
||||
PointerGetDatum(key->entryRes),
|
||||
UInt16GetDatum(key->strategy),
|
||||
key->query,
|
||||
UInt32GetDatum(key->nuserentries),
|
||||
PointerGetDatum(key->extra_data),
|
||||
PointerGetDatum(&key->recheckCurItem),
|
||||
PointerGetDatum(key->queryValues),
|
||||
PointerGetDatum(key->queryCategories)));
|
||||
}
|
||||
|
||||
/*
|
||||
* This function implements a tri-state consistency check, using a boolean
|
||||
* consistent function provided by the opclass.
|
||||
*
|
||||
* Our strategy is to call consistentFn with MAYBE inputs replaced with every
|
||||
* combination of TRUE/FALSE. If consistentFn returns the same value for every
|
||||
* combination, that's the overall result. Otherwise, return MAYBE. Testing
|
||||
* every combination is O(n^2), so this is only feasible for a small number of
|
||||
* MAYBE inputs.
|
||||
*/
|
||||
static GinLogicValue
|
||||
shimTriConsistentFn(GinScanKey key)
|
||||
{
|
||||
int nmaybe;
|
||||
int maybeEntries[MAX_MAYBE_ENTRIES];
|
||||
int i;
|
||||
bool boolResult;
|
||||
bool recheck = 0;
|
||||
GinLogicValue curResult;
|
||||
|
||||
/*
|
||||
* Count how many MAYBE inputs there are, and store their indexes in
|
||||
* maybeEntries. If there are too many MAYBE inputs, it's not feasible to
|
||||
* test all combinations, so give up and return MAYBE.
|
||||
*/
|
||||
nmaybe = 0;
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
{
|
||||
if (key->entryRes[i] == GIN_MAYBE)
|
||||
{
|
||||
if (nmaybe >= MAX_MAYBE_ENTRIES)
|
||||
return GIN_MAYBE;
|
||||
maybeEntries[nmaybe++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If none of the inputs were MAYBE, so we can just call consistent
|
||||
* function as is.
|
||||
*/
|
||||
if (nmaybe == 0)
|
||||
return normalBoolConsistentFn(key);
|
||||
|
||||
/* Try the consistent function with the maybe-inputs set both ways */
|
||||
for (i = 0; i < nmaybe; i++)
|
||||
key->entryRes[maybeEntries[i]] = GIN_FALSE;
|
||||
curResult = normalBoolConsistentFn(key);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Twiddle the entries for next combination. */
|
||||
for (i = 0; i < nmaybe; i++)
|
||||
{
|
||||
if (key->entryRes[maybeEntries[i]] == GIN_FALSE)
|
||||
{
|
||||
key->entryRes[maybeEntries[i]] = GIN_TRUE;
|
||||
break;
|
||||
}
|
||||
else
|
||||
key->entryRes[maybeEntries[i]] = GIN_FALSE;
|
||||
}
|
||||
if (i == nmaybe)
|
||||
break;
|
||||
|
||||
boolResult = normalBoolConsistentFn(key);
|
||||
recheck |= key->recheckCurItem;
|
||||
|
||||
if (curResult != boolResult)
|
||||
return GIN_MAYBE;
|
||||
}
|
||||
|
||||
/* TRUE with recheck is taken to mean MAYBE */
|
||||
if (curResult == GIN_TRUE && recheck)
|
||||
curResult = GIN_MAYBE;
|
||||
|
||||
return curResult;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the implementation of the consistent functions for a scan key.
|
||||
*/
|
||||
void
|
||||
ginInitConsistentFunction(GinState *ginstate, GinScanKey key)
|
||||
{
|
||||
if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
|
||||
{
|
||||
key->boolConsistentFn = trueConsistentFn;
|
||||
key->triConsistentFn = trueTriConsistentFn;
|
||||
}
|
||||
else
|
||||
{
|
||||
key->consistentFmgrInfo = &ginstate->consistentFn[key->attnum - 1];
|
||||
key->collation = ginstate->supportCollation[key->attnum - 1];
|
||||
key->boolConsistentFn = normalBoolConsistentFn;
|
||||
key->triConsistentFn = shimTriConsistentFn;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user