1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00
Files
postgres/src/backend/access/gin/ginlogic.c
Tom Lane e708ffe79d Fix GIN's shimTriConsistentFn to not corrupt its input.
Commit 0f21db36d made an assumption that GIN triConsistentFns
would not modify their input entryRes[] arrays.  But in fact,
the "shim" triConsistentFn that we use for opclasses that don't
supply their own did exactly that, potentially leading to wrong
answers from a GIN index search.  Through bad luck, none of the
test cases that we have for such opclasses exposed the bug.

One response to this could be that the assumption of consistency check
functions not modifying entryRes[] arrays is a bad one, but it still
seems reasonable to me.  Notably, shimTriConsistentFn is itself
assuming that with respect to the underlying boolean consistentFn,
so it's sure being self-centered in supposing that it gets to do so.

Fortunately, it's quite simple to fix shimTriConsistentFn to restore
the entry-time state of entryRes[], so let's do that instead.

This issue doesn't affect any core GIN opclasses, since they all
supply their own triConsistentFns.  It does affect contrib modules
btree_gin, hstore, and intarray.

Along the way, I (tgl) noticed that shimTriConsistentFn failed to
pick up on a "recheck" flag returned by its first call to the boolean
consistentFn.  This may be only a latent problem, since it would be
unlikely for a consistentFn to set recheck for the all-false case
and not any other cases.  (Indeed, none of our contrib modules do
that.)  Nonetheless, it's formally wrong.

Reported-by: Vinod Sridharan <vsridh90@gmail.com>
Author: Vinod Sridharan <vsridh90@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAFMdLD7XzsXfi1+DpTqTgrD8XU0i2C99KuF=5VHLWjx4C1pkcg@mail.gmail.com
Backpatch-through: 13
2025-04-12 12:28:02 -04:00

251 lines
7.3 KiB
C

/*-------------------------------------------------------------------------
*
* ginlogic.c
* routines for performing binary- and ternary-logic consistent checks.
*
* A GIN operator class can provide a boolean or ternary consistent
* function, or both. This file provides both boolean and ternary
* interfaces to the rest of the GIN code, even if only one of them is
* implemented by the opclass.
*
* Providing a boolean interface when the opclass implements only the
* ternary function is straightforward - just call the ternary function
* with the check-array as is, and map the GIN_TRUE, GIN_FALSE, GIN_MAYBE
* return codes to TRUE, FALSE and TRUE+recheck, respectively. Providing
* a ternary interface when the opclass only implements a boolean function
* is implemented by calling the boolean function many times, with all the
* MAYBE arguments set to all combinations of TRUE and FALSE (up to a
* certain number of MAYBE arguments).
*
* (A boolean function is enough to determine if an item matches, but a
* GIN scan can apply various optimizations if it can determine that an
* item matches or doesn't match, even if it doesn't know if some of the
* keys are present or not. That's what the ternary consistent function
* is used for.)
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/gin/ginlogic.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gin_private.h"
/*
* Maximum number of MAYBE inputs that shimTriConsistentFn will try to
* resolve by calling all combinations.
*/
#define MAX_MAYBE_ENTRIES 4
/*
* Dummy consistent functions for an EVERYTHING key. Just claim it matches.
*/
static bool
trueConsistentFn(GinScanKey key)
{
key->recheckCurItem = false;
return true;
}
static GinTernaryValue
trueTriConsistentFn(GinScanKey key)
{
return GIN_TRUE;
}
/*
* A helper function for calling a regular, binary logic, consistent function.
*/
static bool
directBoolConsistentFn(GinScanKey key)
{
/*
* Initialize recheckCurItem in case the consistentFn doesn't know it
* should set it. The safe assumption in that case is to force recheck.
*/
key->recheckCurItem = true;
return DatumGetBool(FunctionCall8Coll(key->consistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(&key->recheckCurItem),
PointerGetDatum(key->queryValues),
PointerGetDatum(key->queryCategories)));
}
/*
* A helper function for calling a native ternary logic consistent function.
*/
static GinTernaryValue
directTriConsistentFn(GinScanKey key)
{
return DatumGetGinTernaryValue(FunctionCall7Coll(key->triConsistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(key->queryValues),
PointerGetDatum(key->queryCategories)));
}
/*
* This function implements a binary logic consistency check, using a ternary
* logic consistent function provided by the opclass. GIN_MAYBE return value
* is interpreted as true with recheck flag.
*/
static bool
shimBoolConsistentFn(GinScanKey key)
{
GinTernaryValue result;
result = DatumGetGinTernaryValue(FunctionCall7Coll(key->triConsistentFmgrInfo,
key->collation,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query,
UInt32GetDatum(key->nuserentries),
PointerGetDatum(key->extra_data),
PointerGetDatum(key->queryValues),
PointerGetDatum(key->queryCategories)));
if (result == GIN_MAYBE)
{
key->recheckCurItem = true;
return true;
}
else
{
key->recheckCurItem = false;
return result;
}
}
/*
* This function implements a tri-state consistency check, using a boolean
* consistent function provided by the opclass.
*
* Our strategy is to call consistentFn with MAYBE inputs replaced with every
* combination of TRUE/FALSE. If consistentFn returns the same value for every
* combination, that's the overall result. Otherwise, return MAYBE. Testing
* every combination is O(n^2), so this is only feasible for a small number of
* MAYBE inputs.
*
* NB: This function modifies the key->entryRes array. For now that's okay
* so long as we restore the entry-time contents before returning. This may
* need revisiting if we ever invent multithreaded GIN scans, though.
*/
static GinTernaryValue
shimTriConsistentFn(GinScanKey key)
{
int nmaybe;
int maybeEntries[MAX_MAYBE_ENTRIES];
int i;
bool boolResult;
bool recheck;
GinTernaryValue curResult;
/*
* Count how many MAYBE inputs there are, and store their indexes in
* maybeEntries. If there are too many MAYBE inputs, it's not feasible to
* test all combinations, so give up and return MAYBE.
*/
nmaybe = 0;
for (i = 0; i < key->nentries; i++)
{
if (key->entryRes[i] == GIN_MAYBE)
{
if (nmaybe >= MAX_MAYBE_ENTRIES)
return GIN_MAYBE;
maybeEntries[nmaybe++] = i;
}
}
/*
* If none of the inputs were MAYBE, we can just call the consistent
* function as-is.
*/
if (nmaybe == 0)
return directBoolConsistentFn(key);
/* First call consistent function with all the maybe-inputs set FALSE */
for (i = 0; i < nmaybe; i++)
key->entryRes[maybeEntries[i]] = GIN_FALSE;
curResult = directBoolConsistentFn(key);
recheck = key->recheckCurItem;
for (;;)
{
/* Twiddle the entries for next combination. */
for (i = 0; i < nmaybe; i++)
{
if (key->entryRes[maybeEntries[i]] == GIN_FALSE)
{
key->entryRes[maybeEntries[i]] = GIN_TRUE;
break;
}
else
key->entryRes[maybeEntries[i]] = GIN_FALSE;
}
if (i == nmaybe)
break;
boolResult = directBoolConsistentFn(key);
recheck |= key->recheckCurItem;
if (curResult != boolResult)
{
curResult = GIN_MAYBE;
break;
}
}
/* TRUE with recheck is taken to mean MAYBE */
if (curResult == GIN_TRUE && recheck)
curResult = GIN_MAYBE;
/* We must restore the original state of the entryRes array */
for (i = 0; i < nmaybe; i++)
key->entryRes[maybeEntries[i]] = GIN_MAYBE;
return curResult;
}
/*
* Set up the implementation of the consistent functions for a scan key.
*/
void
ginInitConsistentFunction(GinState *ginstate, GinScanKey key)
{
if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
{
key->boolConsistentFn = trueConsistentFn;
key->triConsistentFn = trueTriConsistentFn;
}
else
{
key->consistentFmgrInfo = &ginstate->consistentFn[key->attnum - 1];
key->triConsistentFmgrInfo = &ginstate->triConsistentFn[key->attnum - 1];
key->collation = ginstate->supportCollation[key->attnum - 1];
if (OidIsValid(ginstate->consistentFn[key->attnum - 1].fn_oid))
key->boolConsistentFn = directBoolConsistentFn;
else
key->boolConsistentFn = shimBoolConsistentFn;
if (OidIsValid(ginstate->triConsistentFn[key->attnum - 1].fn_oid))
key->triConsistentFn = directTriConsistentFn;
else
key->triConsistentFn = shimTriConsistentFn;
}
}