mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Change the implementation of hash join to attempt to avoid unnecessary
work if either of the join relations are empty. The logic is: (1) if the inner relation's startup cost is less than the outer relation's startup cost and this is not an outer join, read a single tuple from the inner relation via ExecHash() - if NULL, we're done (2) read a single tuple from the outer relation - if NULL, we're done (3) build the hash table on the inner relation - if hash table is empty and this is not an outer join, we're done (4) otherwise, do hash join as usual The implementation uses the new MultiExecProcNode API, per a suggestion from Tom: invoking ExecHash() now produces the first tuple from the Hash node's child node, whereas MultiExecHash() builds the hash table. I had to put in a bit of a kludge to get the row count returned for EXPLAIN ANALYZE to be correct: since ExecHash() is invoked to return a tuple, and then MultiExecHash() is invoked, we would return one too many tuples to EXPLAIN ANALYZE. I hacked around this by just manually detecting this situation and subtracting 1 from the EXPLAIN ANALYZE row count.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.93 2005/04/16 20:07:35 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.94 2005/06/15 07:27:44 neilc Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -37,14 +37,22 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecHash
|
||||
*
|
||||
* stub for pro forma compliance
|
||||
* produce the first tuple from our child node (and _only_ the
|
||||
* first tuple). This is of limited general use -- it does not
|
||||
* hash its output, and produces only a single tuple. It is
|
||||
* provided so that hash join can probe the inner hash input to
|
||||
* determine whether it is empty without needing to build the
|
||||
* entire hash table first, which is what MultiExecHash() would
|
||||
* do.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
TupleTableSlot *
|
||||
ExecHash(HashState *node)
|
||||
{
|
||||
elog(ERROR, "Hash node does not support ExecProcNode call convention");
|
||||
return NULL;
|
||||
if (TupIsNull(node->firstTuple))
|
||||
node->firstTuple = ExecProcNode(outerPlanState(node));
|
||||
|
||||
return node->firstTuple;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
@ -63,6 +71,7 @@ MultiExecHash(HashState *node)
|
||||
TupleTableSlot *slot;
|
||||
ExprContext *econtext;
|
||||
uint32 hashvalue;
|
||||
bool cleared_first_tuple = false;
|
||||
|
||||
/* must provide our own instrumentation support */
|
||||
if (node->ps.instrument)
|
||||
@ -85,9 +94,19 @@ MultiExecHash(HashState *node)
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
slot = ExecProcNode(outerNode);
|
||||
if (TupIsNull(slot))
|
||||
break;
|
||||
/* use and clear the tuple produced by ExecHash(), if any */
|
||||
if (!TupIsNull(node->firstTuple))
|
||||
{
|
||||
slot = node->firstTuple;
|
||||
node->firstTuple = NULL;
|
||||
cleared_first_tuple = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
slot = ExecProcNode(outerNode);
|
||||
if (TupIsNull(slot))
|
||||
break;
|
||||
}
|
||||
hashtable->totalTuples += 1;
|
||||
/* We have to compute the hash value */
|
||||
econtext->ecxt_innertuple = slot;
|
||||
@ -97,7 +116,19 @@ MultiExecHash(HashState *node)
|
||||
|
||||
/* must provide our own instrumentation support */
|
||||
if (node->ps.instrument)
|
||||
InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples);
|
||||
{
|
||||
/*
|
||||
* XXX: kludge -- if ExecHash() was invoked, we've already
|
||||
* included the tuple that it produced in the row output count
|
||||
* for this node, so subtract 1 from the # of hashed tuples.
|
||||
*/
|
||||
if (cleared_first_tuple)
|
||||
InstrStopNodeMulti(node->ps.instrument,
|
||||
hashtable->totalTuples - 1);
|
||||
else
|
||||
InstrStopNodeMulti(node->ps.instrument,
|
||||
hashtable->totalTuples);
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not return the hash table directly because it's not a subtype
|
||||
@ -130,6 +161,7 @@ ExecInitHash(Hash *node, EState *estate)
|
||||
hashstate->ps.state = estate;
|
||||
hashstate->hashtable = NULL;
|
||||
hashstate->hashkeys = NIL; /* will be set by parent HashJoin */
|
||||
hashstate->firstTuple = NULL;
|
||||
|
||||
/*
|
||||
* Miscellaneous initialization
|
||||
@ -189,6 +221,8 @@ ExecEndHash(HashState *node)
|
||||
{
|
||||
PlanState *outerPlan;
|
||||
|
||||
node->firstTuple = NULL;
|
||||
|
||||
/*
|
||||
* free exprcontext
|
||||
*/
|
||||
@ -830,6 +864,8 @@ ExecHashTableReset(HashJoinTable hashtable)
|
||||
void
|
||||
ExecReScanHash(HashState *node, ExprContext *exprCtxt)
|
||||
{
|
||||
node->firstTuple = NULL;
|
||||
|
||||
/*
|
||||
* if chgParam of subnode is not null then plan will be re-scanned by
|
||||
* first ExecProcNode.
|
||||
|
Reference in New Issue
Block a user