mirror of
https://github.com/postgres/postgres.git
synced 2025-06-14 18:42:34 +03:00
First cut at implementing IN (and NOT IN) via hashtables. There is
more to be done yet, but this is a good start.
This commit is contained in:
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -22,11 +22,24 @@
|
||||
#include "access/heapam.h"
|
||||
#include "executor/executor.h"
|
||||
#include "executor/nodeSubplan.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "parser/parse_expr.h"
|
||||
#include "tcop/pquery.h"
|
||||
|
||||
|
||||
static Datum ExecHashSubPlan(SubPlanState *node,
|
||||
ExprContext *econtext,
|
||||
bool *isNull);
|
||||
static Datum ExecScanSubPlan(SubPlanState *node,
|
||||
ExprContext *econtext,
|
||||
bool *isNull);
|
||||
static void buildSubPlanHash(SubPlanState *node);
|
||||
static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot);
|
||||
static bool tupleAllNulls(HeapTuple tuple);
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecSubPlan(node)
|
||||
* ExecSubPlan
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
Datum
|
||||
@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node,
|
||||
bool *isNull)
|
||||
{
|
||||
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
|
||||
|
||||
if (subplan->setParam != NIL)
|
||||
elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
|
||||
|
||||
if (subplan->useHashTable)
|
||||
return ExecHashSubPlan(node, econtext, isNull);
|
||||
else
|
||||
return ExecScanSubPlan(node, econtext, isNull);
|
||||
}
|
||||
|
||||
/*
|
||||
* ExecHashSubPlan: store subselect result in an in-memory hash table
|
||||
*/
|
||||
static Datum
|
||||
ExecHashSubPlan(SubPlanState *node,
|
||||
ExprContext *econtext,
|
||||
bool *isNull)
|
||||
{
|
||||
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
|
||||
PlanState *planstate = node->planstate;
|
||||
ExprContext *innerecontext = node->innerecontext;
|
||||
TupleTableSlot *slot;
|
||||
HeapTuple tup;
|
||||
|
||||
/* Shouldn't have any direct correlation Vars */
|
||||
if (subplan->parParam != NIL || node->args != NIL)
|
||||
elog(ERROR, "ExecHashSubPlan: direct correlation not supported");
|
||||
|
||||
/*
|
||||
* If first time through or we need to rescan the subplan, build
|
||||
* the hash table.
|
||||
*/
|
||||
if (node->hashtable == NULL || planstate->chgParam != NIL)
|
||||
buildSubPlanHash(node);
|
||||
|
||||
/*
|
||||
* The result for an empty subplan is always FALSE; no need to
|
||||
* evaluate lefthand side.
|
||||
*/
|
||||
*isNull = false;
|
||||
if (!node->havehashrows && !node->havenullrows)
|
||||
return BoolGetDatum(false);
|
||||
|
||||
/*
|
||||
* Evaluate lefthand expressions and form a projection tuple.
|
||||
* First we have to set the econtext to use (hack alert!).
|
||||
*/
|
||||
node->projLeft->pi_exprContext = econtext;
|
||||
slot = ExecProject(node->projLeft, NULL);
|
||||
tup = slot->val;
|
||||
|
||||
/*
|
||||
* Note: because we are typically called in a per-tuple context,
|
||||
* we have to explicitly clear the projected tuple before returning.
|
||||
* Otherwise, we'll have a double-free situation: the per-tuple context
|
||||
* will probably be reset before we're called again, and then the tuple
|
||||
* slot will think it still needs to free the tuple.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Since the hashtable routines will use innerecontext's per-tuple
|
||||
* memory as working memory, be sure to reset it for each tuple.
|
||||
*/
|
||||
ResetExprContext(innerecontext);
|
||||
|
||||
/*
|
||||
* If the LHS is all non-null, probe for an exact match in the
|
||||
* main hash table. If we find one, the result is TRUE.
|
||||
* Otherwise, scan the partly-null table to see if there are any
|
||||
* rows that aren't provably unequal to the LHS; if so, the result
|
||||
* is UNKNOWN. (We skip that part if we don't care about UNKNOWN.)
|
||||
* Otherwise, the result is FALSE.
|
||||
*
|
||||
* Note: the reason we can avoid a full scan of the main hash table
|
||||
* is that the combining operators are assumed never to yield NULL
|
||||
* when both inputs are non-null. If they were to do so, we might
|
||||
* need to produce UNKNOWN instead of FALSE because of an UNKNOWN
|
||||
* result in comparing the LHS to some main-table entry --- which
|
||||
* is a comparison we will not even make, unless there's a chance
|
||||
* match of hash keys.
|
||||
*/
|
||||
if (HeapTupleNoNulls(tup))
|
||||
{
|
||||
if (node->havehashrows &&
|
||||
LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
return BoolGetDatum(true);
|
||||
}
|
||||
if (node->havenullrows &&
|
||||
findPartialMatch(node->hashnulls, slot))
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
*isNull = true;
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
ExecClearTuple(slot);
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
|
||||
/*
|
||||
* When the LHS is partly or wholly NULL, we can never return TRUE.
|
||||
* If we don't care about UNKNOWN, just return FALSE. Otherwise,
|
||||
* if the LHS is wholly NULL, immediately return UNKNOWN. (Since the
|
||||
* combining operators are strict, the result could only be FALSE if the
|
||||
* sub-select were empty, but we already handled that case.) Otherwise,
|
||||
* we must scan both the main and partly-null tables to see if there are
|
||||
* any rows that aren't provably unequal to the LHS; if so, the result is
|
||||
* UNKNOWN. Otherwise, the result is FALSE.
|
||||
*/
|
||||
if (node->hashnulls == NULL)
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
if (tupleAllNulls(tup))
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
*isNull = true;
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
/* Scan partly-null table first, since more likely to get a match */
|
||||
if (node->havenullrows &&
|
||||
findPartialMatch(node->hashnulls, slot))
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
*isNull = true;
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
if (node->havehashrows &&
|
||||
findPartialMatch(node->hashtable, slot))
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
*isNull = true;
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
ExecClearTuple(slot);
|
||||
return BoolGetDatum(false);
|
||||
}
|
||||
|
||||
/*
|
||||
* ExecScanSubPlan: default case where we have to rescan subplan each time
|
||||
*/
|
||||
static Datum
|
||||
ExecScanSubPlan(SubPlanState *node,
|
||||
ExprContext *econtext,
|
||||
bool *isNull)
|
||||
{
|
||||
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
|
||||
PlanState *planstate = node->planstate;
|
||||
SubLinkType subLinkType = subplan->subLinkType;
|
||||
bool useOr = subplan->useOr;
|
||||
@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node,
|
||||
*/
|
||||
oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
|
||||
|
||||
if (subplan->setParam != NIL)
|
||||
elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
|
||||
|
||||
/*
|
||||
* Set Params of this plan from parent plan correlation Vars
|
||||
*/
|
||||
@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node,
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* buildSubPlanHash: load hash table by scanning subplan output.
|
||||
*/
|
||||
static void
|
||||
buildSubPlanHash(SubPlanState *node)
|
||||
{
|
||||
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
|
||||
PlanState *planstate = node->planstate;
|
||||
int ncols = length(node->exprs);
|
||||
ExprContext *innerecontext = node->innerecontext;
|
||||
MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory;
|
||||
MemoryContext oldcontext;
|
||||
int nbuckets;
|
||||
TupleTableSlot *slot;
|
||||
|
||||
Assert(subplan->subLinkType == ANY_SUBLINK);
|
||||
Assert(!subplan->useOr);
|
||||
|
||||
/*
|
||||
* If we already had any hash tables, destroy 'em; then create
|
||||
* empty hash table(s).
|
||||
*
|
||||
* If we need to distinguish accurately between FALSE and UNKNOWN
|
||||
* (i.e., NULL) results of the IN operation, then we have to store
|
||||
* subplan output rows that are partly or wholly NULL. We store such
|
||||
* rows in a separate hash table that we expect will be much smaller
|
||||
* than the main table. (We can use hashing to eliminate partly-null
|
||||
* rows that are not distinct. We keep them separate to minimize the
|
||||
* cost of the inevitable full-table searches; see findPartialMatch.)
|
||||
*
|
||||
* If it's not necessary to distinguish FALSE and UNKNOWN, then we
|
||||
* don't need to store subplan output rows that contain NULL.
|
||||
*/
|
||||
MemoryContextReset(node->tablecxt);
|
||||
node->hashtable = NULL;
|
||||
node->hashnulls = NULL;
|
||||
node->havehashrows = false;
|
||||
node->havenullrows = false;
|
||||
|
||||
nbuckets = (int) ceil(planstate->plan->plan_rows);
|
||||
if (nbuckets < 1)
|
||||
nbuckets = 1;
|
||||
|
||||
node->hashtable = BuildTupleHashTable(ncols,
|
||||
node->keyColIdx,
|
||||
node->eqfunctions,
|
||||
nbuckets,
|
||||
sizeof(TupleHashEntryData),
|
||||
node->tablecxt,
|
||||
tempcxt);
|
||||
|
||||
if (!subplan->unknownEqFalse)
|
||||
{
|
||||
if (ncols == 1)
|
||||
nbuckets = 1; /* there can only be one entry */
|
||||
else
|
||||
{
|
||||
nbuckets /= 16;
|
||||
if (nbuckets < 1)
|
||||
nbuckets = 1;
|
||||
}
|
||||
node->hashnulls = BuildTupleHashTable(ncols,
|
||||
node->keyColIdx,
|
||||
node->eqfunctions,
|
||||
nbuckets,
|
||||
sizeof(TupleHashEntryData),
|
||||
node->tablecxt,
|
||||
tempcxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* We are probably in a short-lived expression-evaluation context.
|
||||
* Switch to the child plan's per-query context for calling ExecProcNode.
|
||||
*/
|
||||
oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
|
||||
|
||||
/*
|
||||
* Reset subplan to start.
|
||||
*/
|
||||
ExecReScan(planstate, NULL);
|
||||
|
||||
/*
|
||||
* Scan the subplan and load the hash table(s). Note that when there are
|
||||
* duplicate rows coming out of the sub-select, only one copy is stored.
|
||||
*/
|
||||
for (slot = ExecProcNode(planstate);
|
||||
!TupIsNull(slot);
|
||||
slot = ExecProcNode(planstate))
|
||||
{
|
||||
HeapTuple tup = slot->val;
|
||||
TupleDesc tdesc = slot->ttc_tupleDescriptor;
|
||||
int col = 1;
|
||||
List *plst;
|
||||
bool isnew;
|
||||
|
||||
/*
|
||||
* Load up the Params representing the raw sub-select outputs,
|
||||
* then form the projection tuple to store in the hashtable.
|
||||
*/
|
||||
foreach(plst, subplan->paramIds)
|
||||
{
|
||||
int paramid = lfirsti(plst);
|
||||
ParamExecData *prmdata;
|
||||
|
||||
prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]);
|
||||
Assert(prmdata->execPlan == NULL);
|
||||
prmdata->value = heap_getattr(tup, col, tdesc,
|
||||
&(prmdata->isnull));
|
||||
col++;
|
||||
}
|
||||
slot = ExecProject(node->projRight, NULL);
|
||||
tup = slot->val;
|
||||
|
||||
/*
|
||||
* If result contains any nulls, store separately or not at all.
|
||||
* (Since we know the projection tuple has no junk columns, we
|
||||
* can just look at the overall hasnull info bit, instead of
|
||||
* groveling through the columns.)
|
||||
*/
|
||||
if (HeapTupleNoNulls(tup))
|
||||
{
|
||||
(void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
|
||||
node->havehashrows = true;
|
||||
}
|
||||
else if (node->hashnulls)
|
||||
{
|
||||
(void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
|
||||
node->havenullrows = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset innerecontext after each inner tuple to free any memory
|
||||
* used in hash computation or comparison routines.
|
||||
*/
|
||||
ResetExprContext(innerecontext);
|
||||
}
|
||||
|
||||
/*
|
||||
* Since the projected tuples are in the sub-query's context and not
|
||||
* the main context, we'd better clear the tuple slot before there's
|
||||
* any chance of a reset of the sub-query's context. Else we will
|
||||
* have the potential for a double free attempt.
|
||||
*/
|
||||
ExecClearTuple(node->projRight->pi_slot);
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
/*
|
||||
* findPartialMatch: does the hashtable contain an entry that is not
|
||||
* provably distinct from the tuple?
|
||||
*
|
||||
* We have to scan the whole hashtable; we can't usefully use hashkeys
|
||||
* to guide probing, since we might get partial matches on tuples with
|
||||
* hashkeys quite unrelated to what we'd get from the given tuple.
|
||||
*/
|
||||
static bool
|
||||
findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
|
||||
{
|
||||
int numCols = hashtable->numCols;
|
||||
AttrNumber *keyColIdx = hashtable->keyColIdx;
|
||||
HeapTuple tuple = slot->val;
|
||||
TupleDesc tupdesc = slot->ttc_tupleDescriptor;
|
||||
TupleHashIterator hashiter;
|
||||
TupleHashEntry entry;
|
||||
|
||||
ResetTupleHashIterator(&hashiter);
|
||||
while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
|
||||
{
|
||||
if (!execTuplesUnequal(entry->firstTuple,
|
||||
tuple,
|
||||
tupdesc,
|
||||
numCols, keyColIdx,
|
||||
hashtable->eqfunctions,
|
||||
hashtable->tempcxt))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* tupleAllNulls: is the tuple completely NULL?
|
||||
*/
|
||||
static bool
|
||||
tupleAllNulls(HeapTuple tuple)
|
||||
{
|
||||
int ncols = tuple->t_data->t_natts;
|
||||
int i;
|
||||
|
||||
for (i = 1; i <= ncols; i++)
|
||||
{
|
||||
if (!heap_attisnull(tuple, i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecInitSubPlan
|
||||
* ----------------------------------------------------------------
|
||||
@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
|
||||
*/
|
||||
node->needShutdown = false;
|
||||
node->curTuple = NULL;
|
||||
node->projLeft = NULL;
|
||||
node->projRight = NULL;
|
||||
node->hashtable = NULL;
|
||||
node->hashnulls = NULL;
|
||||
node->tablecxt = NULL;
|
||||
node->innerecontext = NULL;
|
||||
node->keyColIdx = NULL;
|
||||
node->eqfunctions = NULL;
|
||||
|
||||
/*
|
||||
* create an EState for the subplan
|
||||
@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
|
||||
* it, for others - it doesn't matter...
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are going to hash the subquery output, initialize relevant
|
||||
* stuff. (We don't create the hashtable until needed, though.)
|
||||
*/
|
||||
if (subplan->useHashTable)
|
||||
{
|
||||
int ncols,
|
||||
i;
|
||||
TupleDesc tupDesc;
|
||||
TupleTable tupTable;
|
||||
TupleTableSlot *slot;
|
||||
List *lefttlist,
|
||||
*righttlist,
|
||||
*leftptlist,
|
||||
*rightptlist,
|
||||
*lexpr;
|
||||
|
||||
/* We need a memory context to hold the hash table(s) */
|
||||
node->tablecxt =
|
||||
AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Subplan HashTable Context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
/* and a short-lived exprcontext for function evaluation */
|
||||
node->innerecontext = CreateExprContext(estate);
|
||||
/* Silly little array of column numbers 1..n */
|
||||
ncols = length(node->exprs);
|
||||
node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
|
||||
for (i = 0; i < ncols; i++)
|
||||
node->keyColIdx[i] = i+1;
|
||||
/*
|
||||
* We use ExecProject to evaluate the lefthand and righthand
|
||||
* expression lists and form tuples. (You might think that we
|
||||
* could use the sub-select's output tuples directly, but that is
|
||||
* not the case if we had to insert any run-time coercions of the
|
||||
* sub-select's output datatypes; anyway this avoids storing any
|
||||
* resjunk columns that might be in the sub-select's output.)
|
||||
* Run through the combining expressions to build tlists for the
|
||||
* lefthand and righthand sides. We need both the ExprState list
|
||||
* (for ExecProject) and the underlying parse Exprs (for
|
||||
* ExecTypeFromTL).
|
||||
*
|
||||
* We also extract the combining operators themselves to initialize
|
||||
* the equality functions for the hash tables.
|
||||
*/
|
||||
lefttlist = righttlist = NIL;
|
||||
leftptlist = rightptlist = NIL;
|
||||
node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
|
||||
i = 1;
|
||||
foreach(lexpr, node->exprs)
|
||||
{
|
||||
FuncExprState *fstate = (FuncExprState *) lfirst(lexpr);
|
||||
OpExpr *opexpr = (OpExpr *) fstate->xprstate.expr;
|
||||
ExprState *exstate;
|
||||
Expr *expr;
|
||||
TargetEntry *tle;
|
||||
GenericExprState *tlestate;
|
||||
|
||||
Assert(IsA(fstate, FuncExprState));
|
||||
Assert(IsA(opexpr, OpExpr));
|
||||
Assert(length(fstate->args) == 2);
|
||||
|
||||
/* Process lefthand argument */
|
||||
exstate = (ExprState *) lfirst(fstate->args);
|
||||
expr = exstate->expr;
|
||||
tle = makeTargetEntry(makeResdom(i,
|
||||
exprType((Node *) expr),
|
||||
exprTypmod((Node *) expr),
|
||||
NULL,
|
||||
false),
|
||||
expr);
|
||||
tlestate = makeNode(GenericExprState);
|
||||
tlestate->xprstate.expr = (Expr *) tle;
|
||||
tlestate->arg = exstate;
|
||||
lefttlist = lappend(lefttlist, tlestate);
|
||||
leftptlist = lappend(leftptlist, tle);
|
||||
|
||||
/* Process righthand argument */
|
||||
exstate = (ExprState *) lsecond(fstate->args);
|
||||
expr = exstate->expr;
|
||||
tle = makeTargetEntry(makeResdom(i,
|
||||
exprType((Node *) expr),
|
||||
exprTypmod((Node *) expr),
|
||||
NULL,
|
||||
false),
|
||||
expr);
|
||||
tlestate = makeNode(GenericExprState);
|
||||
tlestate->xprstate.expr = (Expr *) tle;
|
||||
tlestate->arg = exstate;
|
||||
righttlist = lappend(righttlist, tlestate);
|
||||
rightptlist = lappend(rightptlist, tle);
|
||||
|
||||
/* Lookup the combining function */
|
||||
fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]);
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a tupletable to hold these tuples. (Note: we never bother
|
||||
* to free the tupletable explicitly; that's okay because it will
|
||||
* never store raw disk tuples that might have associated buffer
|
||||
* pins. The only resource involved is memory, which will be
|
||||
* cleaned up by freeing the query context.)
|
||||
*/
|
||||
tupTable = ExecCreateTupleTable(2);
|
||||
|
||||
/*
|
||||
* Construct tupdescs, slots and projection nodes for left and
|
||||
* right sides. The lefthand expressions will be evaluated in
|
||||
* the parent plan node's exprcontext, which we don't have access
|
||||
* to here. Fortunately we can just pass NULL for now and fill it
|
||||
* in later (hack alert!). The righthand expressions will be
|
||||
* evaluated in our own innerecontext.
|
||||
*/
|
||||
tupDesc = ExecTypeFromTL(leftptlist, false);
|
||||
slot = ExecAllocTableSlot(tupTable);
|
||||
ExecSetSlotDescriptor(slot, tupDesc, true);
|
||||
node->projLeft = ExecBuildProjectionInfo(lefttlist,
|
||||
NULL,
|
||||
slot);
|
||||
|
||||
tupDesc = ExecTypeFromTL(rightptlist, false);
|
||||
slot = ExecAllocTableSlot(tupTable);
|
||||
ExecSetSlotDescriptor(slot, tupDesc, true);
|
||||
node->projRight = ExecBuildProjectionInfo(righttlist,
|
||||
node->innerecontext,
|
||||
slot);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node)
|
||||
node->planstate = NULL;
|
||||
node->needShutdown = false;
|
||||
}
|
||||
if (node->curTuple)
|
||||
{
|
||||
heap_freetuple(node->curTuple);
|
||||
node->curTuple = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
Reference in New Issue
Block a user