Change the implementation of hash join to attempt to avoid unnecessary

work if either of the join relations are empty. The logic is: (1) if the inner relation's startup cost is less than the outer relation's startup cost and this is not an outer join, read a single tuple from the inner relation via ExecHash() - if NULL, we're done (2) read a single tuple from the outer relation - if NULL, we're done (3) build the hash table on the inner relation - if hash table is empty and this is not an outer join, we're done (4) otherwise, do hash join as usual The implementation uses the new MultiExecProcNode API, per a suggestion from Tom: invoking ExecHash() now produces the first tuple from the Hash node's child node, whereas MultiExecHash() builds the hash table. I had to put in a bit of a kludge to get the row count returned for EXPLAIN ANALYZE to be correct: since ExecHash() is invoked to return a tuple, and then MultiExecHash() is invoked, we would return one too many tuples to EXPLAIN ANALYZE. I hacked around this by just manually detecting this situation and subtracting 1 from the EXPLAIN ANALYZE row count.
2025-12-21 05:21:08 +03:00 · 2005-06-15 07:27:44 +00:00
parent 4aaff55359
commit c119c5bd49
3 changed files with 169 additions and 62 deletions
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.93 2005/04/16 20:07:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.94 2005/06/15 07:27:44 neilc Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -37,14 +37,22 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
 /* ----------------------------------------------------------------
 *		ExecHash
 *
- *		stub for pro forma compliance
+ *		produce the first tuple from our child node (and _only_ the
+ *		first tuple). This is of limited general use -- it does not
+ *		hash its output, and produces only a single tuple. It is
+ *		provided so that hash join can probe the inner hash input to
+ *		determine whether it is empty without needing to build the
+ *		entire hash table first, which is what MultiExecHash() would
+ *		do.
 * ----------------------------------------------------------------
 */
 TupleTableSlot *
 ExecHash(HashState *node)
 {
-	elog(ERROR, "Hash node does not support ExecProcNode call convention");
-	return NULL;
+	if (TupIsNull(node->firstTuple))
+		node->firstTuple = ExecProcNode(outerPlanState(node));
+
+	return node->firstTuple;
 }

 /* ----------------------------------------------------------------
@@ -63,6 +71,7 @@ MultiExecHash(HashState *node)
 	TupleTableSlot *slot;
 	ExprContext *econtext;
 	uint32		hashvalue;
+	bool		cleared_first_tuple = false;

 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
@@ -85,9 +94,19 @@ MultiExecHash(HashState *node)
 	 */
 	for (;;)
 	{
-		slot = ExecProcNode(outerNode);
-		if (TupIsNull(slot))
-			break;
+		/* use and clear the tuple produced by ExecHash(), if any */
+		if (!TupIsNull(node->firstTuple))
+		{
+			slot = node->firstTuple;
+			node->firstTuple = NULL;
+			cleared_first_tuple = true;
+		}
+		else
+		{
+			slot = ExecProcNode(outerNode);
+			if (TupIsNull(slot))
+				break;
+		}
 		hashtable->totalTuples += 1;
 		/* We have to compute the hash value */
 		econtext->ecxt_innertuple = slot;
@@ -97,7 +116,19 @@ MultiExecHash(HashState *node)

 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
-		InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples);
+	{
+		/*
+		 * XXX: kludge -- if ExecHash() was invoked, we've already
+		 * included the tuple that it produced in the row output count
+		 * for this node, so subtract 1 from the # of hashed tuples.
+		 */
+		if (cleared_first_tuple)
+			InstrStopNodeMulti(node->ps.instrument,
+							   hashtable->totalTuples - 1);
+		else
+			InstrStopNodeMulti(node->ps.instrument,
+							   hashtable->totalTuples);
+	}

 	/*
 	 * We do not return the hash table directly because it's not a subtype
@@ -130,6 +161,7 @@ ExecInitHash(Hash *node, EState *estate)
 	hashstate->ps.state = estate;
 	hashstate->hashtable = NULL;
 	hashstate->hashkeys = NIL;	/* will be set by parent HashJoin */
+	hashstate->firstTuple = NULL;

 	/*
 	 * Miscellaneous initialization
@@ -189,6 +221,8 @@ ExecEndHash(HashState *node)
 {
 	PlanState  *outerPlan;

+	node->firstTuple = NULL;
+
 	/*
 	 * free exprcontext
 	 */
@@ -830,6 +864,8 @@ ExecHashTableReset(HashJoinTable hashtable)
 void
 ExecReScanHash(HashState *node, ExprContext *exprCtxt)
 {
+	node->firstTuple = NULL;
+
 	/*
 	 * if chgParam of subnode is not null then plan will be re-scanned by
 	 * first ExecProcNode.