mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Improve hash join to discard input tuples immediately if they can't
match because they contain a null join key (and the join operator is known strict). Improves performance significantly when the inner relation contains a lot of nulls, as per bug #2930.
This commit is contained in:
parent
28c480e9ae
commit
b39e91501c
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
|
|||||||
slot = ExecProcNode(outerNode);
|
slot = ExecProcNode(outerNode);
|
||||||
if (TupIsNull(slot))
|
if (TupIsNull(slot))
|
||||||
break;
|
break;
|
||||||
hashtable->totalTuples += 1;
|
|
||||||
/* We have to compute the hash value */
|
/* We have to compute the hash value */
|
||||||
econtext->ecxt_innertuple = slot;
|
econtext->ecxt_innertuple = slot;
|
||||||
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
|
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
|
||||||
|
&hashvalue))
|
||||||
|
{
|
||||||
ExecHashTableInsert(hashtable, slot, hashvalue);
|
ExecHashTableInsert(hashtable, slot, hashvalue);
|
||||||
|
hashtable->totalTuples += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* must provide our own instrumentation support */
|
/* must provide our own instrumentation support */
|
||||||
@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Get info about the hash functions to be used for each hash key.
|
* Get info about the hash functions to be used for each hash key.
|
||||||
|
* Also remember whether the join operators are strict.
|
||||||
*/
|
*/
|
||||||
nkeys = list_length(hashOperators);
|
nkeys = list_length(hashOperators);
|
||||||
hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
|
hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
|
||||||
|
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
|
||||||
i = 0;
|
i = 0;
|
||||||
foreach(ho, hashOperators)
|
foreach(ho, hashOperators)
|
||||||
{
|
{
|
||||||
|
Oid hashop = lfirst_oid(ho);
|
||||||
Oid hashfn;
|
Oid hashfn;
|
||||||
|
|
||||||
hashfn = get_op_hash_function(lfirst_oid(ho));
|
hashfn = get_op_hash_function(hashop);
|
||||||
if (!OidIsValid(hashfn))
|
if (!OidIsValid(hashfn))
|
||||||
elog(ERROR, "could not find hash function for hash operator %u",
|
elog(ERROR, "could not find hash function for hash operator %u",
|
||||||
lfirst_oid(ho));
|
hashop);
|
||||||
fmgr_info(hashfn, &hashtable->hashfunctions[i]);
|
fmgr_info(hashfn, &hashtable->hashfunctions[i]);
|
||||||
|
hashtable->hashStrict[i] = op_strict(hashop);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
|
|||||||
* The tuple to be tested must be in either econtext->ecxt_outertuple or
|
* The tuple to be tested must be in either econtext->ecxt_outertuple or
|
||||||
* econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
|
* econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
|
||||||
* either OUTER or INNER.
|
* either OUTER or INNER.
|
||||||
|
*
|
||||||
|
* A TRUE result means the tuple's hash value has been successfully computed
|
||||||
|
* and stored at *hashvalue. A FALSE result means the tuple cannot match
|
||||||
|
* because it contains a null attribute, and hence it should be discarded
|
||||||
|
* immediately. (If keep_nulls is true then FALSE is never returned.)
|
||||||
*/
|
*/
|
||||||
uint32
|
bool
|
||||||
ExecHashGetHashValue(HashJoinTable hashtable,
|
ExecHashGetHashValue(HashJoinTable hashtable,
|
||||||
ExprContext *econtext,
|
ExprContext *econtext,
|
||||||
List *hashkeys)
|
List *hashkeys,
|
||||||
|
bool keep_nulls,
|
||||||
|
uint32 *hashvalue)
|
||||||
{
|
{
|
||||||
uint32 hashkey = 0;
|
uint32 hashkey = 0;
|
||||||
ListCell *hk;
|
ListCell *hk;
|
||||||
@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
|
|||||||
keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
|
keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute the hash function
|
* If the attribute is NULL, and the join operator is strict, then
|
||||||
|
* this tuple cannot pass the join qual so we can reject it
|
||||||
|
* immediately (unless we're scanning the outside of an outer join,
|
||||||
|
* in which case we must not reject it). Otherwise we act like the
|
||||||
|
* hashcode of NULL is zero (this will support operators that act like
|
||||||
|
* IS NOT DISTINCT, though not any more-random behavior). We treat
|
||||||
|
* the hash support function as strict even if the operator is not.
|
||||||
|
*
|
||||||
|
* Note: currently, all hashjoinable operators must be strict since
|
||||||
|
* the hash index AM assumes that. However, it takes so little
|
||||||
|
* extra code here to allow non-strict that we may as well do it.
|
||||||
*/
|
*/
|
||||||
if (!isNull) /* treat nulls as having hash key 0 */
|
if (isNull)
|
||||||
{
|
{
|
||||||
|
if (hashtable->hashStrict[i] && !keep_nulls)
|
||||||
|
return false; /* cannot match */
|
||||||
|
/* else, leave hashkey unmodified, equivalent to hashcode 0 */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Compute the hash function */
|
||||||
uint32 hkey;
|
uint32 hkey;
|
||||||
|
|
||||||
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
|
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
|
||||||
@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
|
|||||||
|
|
||||||
MemoryContextSwitchTo(oldContext);
|
MemoryContextSwitchTo(oldContext);
|
||||||
|
|
||||||
return hashkey;
|
*hashvalue = hashkey;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
|
|||||||
int curbatch = hashtable->curbatch;
|
int curbatch = hashtable->curbatch;
|
||||||
TupleTableSlot *slot;
|
TupleTableSlot *slot;
|
||||||
|
|
||||||
if (curbatch == 0)
|
if (curbatch == 0) /* if it is the first pass */
|
||||||
{ /* if it is the first pass */
|
{
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check to see if first outer tuple was already fetched by
|
* Check to see if first outer tuple was already fetched by
|
||||||
* ExecHashJoin() and not used yet.
|
* ExecHashJoin() and not used yet.
|
||||||
@ -559,7 +558,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
|
|||||||
hjstate->hj_FirstOuterTupleSlot = NULL;
|
hjstate->hj_FirstOuterTupleSlot = NULL;
|
||||||
else
|
else
|
||||||
slot = ExecProcNode(outerNode);
|
slot = ExecProcNode(outerNode);
|
||||||
if (!TupIsNull(slot))
|
|
||||||
|
while (!TupIsNull(slot))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We have to compute the tuple's hash value.
|
* We have to compute the tuple's hash value.
|
||||||
@ -567,15 +567,24 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
|
|||||||
ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
|
ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
|
||||||
|
|
||||||
econtext->ecxt_outertuple = slot;
|
econtext->ecxt_outertuple = slot;
|
||||||
*hashvalue = ExecHashGetHashValue(hashtable, econtext,
|
if (ExecHashGetHashValue(hashtable, econtext,
|
||||||
hjstate->hj_OuterHashKeys);
|
hjstate->hj_OuterHashKeys,
|
||||||
|
(hjstate->js.jointype == JOIN_LEFT),
|
||||||
|
hashvalue))
|
||||||
|
{
|
||||||
/* remember outer relation is not empty for possible rescan */
|
/* remember outer relation is not empty for possible rescan */
|
||||||
hjstate->hj_OuterNotEmpty = true;
|
hjstate->hj_OuterNotEmpty = true;
|
||||||
|
|
||||||
return slot;
|
return slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* That tuple couldn't match because of a NULL, so discard it
|
||||||
|
* and continue with the next one.
|
||||||
|
*/
|
||||||
|
slot = ExecProcNode(outerNode);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have just reached the end of the first pass. Try to switch to a
|
* We have just reached the end of the first pass. Try to switch to a
|
||||||
* saved batch.
|
* saved batch.
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -108,6 +108,8 @@ typedef struct HashJoinTableData
|
|||||||
*/
|
*/
|
||||||
FmgrInfo *hashfunctions; /* lookup data for hash functions */
|
FmgrInfo *hashfunctions; /* lookup data for hash functions */
|
||||||
|
|
||||||
|
bool *hashStrict; /* is each hash join operator strict? */
|
||||||
|
|
||||||
Size spaceUsed; /* memory space currently used by tuples */
|
Size spaceUsed; /* memory space currently used by tuples */
|
||||||
Size spaceAllowed; /* upper limit for space used */
|
Size spaceAllowed; /* upper limit for space used */
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable);
|
|||||||
extern void ExecHashTableInsert(HashJoinTable hashtable,
|
extern void ExecHashTableInsert(HashJoinTable hashtable,
|
||||||
TupleTableSlot *slot,
|
TupleTableSlot *slot,
|
||||||
uint32 hashvalue);
|
uint32 hashvalue);
|
||||||
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
|
extern bool ExecHashGetHashValue(HashJoinTable hashtable,
|
||||||
ExprContext *econtext,
|
ExprContext *econtext,
|
||||||
List *hashkeys);
|
List *hashkeys,
|
||||||
|
bool keep_nulls,
|
||||||
|
uint32 *hashvalue);
|
||||||
extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
|
extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
|
||||||
uint32 hashvalue,
|
uint32 hashvalue,
|
||||||
int *bucketno,
|
int *bucketno,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user