diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index a4803708ad4..f03df3d5999 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.45.2.1 2005/12/22 22:50:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.45.2.2 2007/06/01 15:58:09 tgl Exp $ * * NOTES * These functions are stored in pg_amproc. For each operator class @@ -267,6 +267,31 @@ hash_any(register const unsigned char *k, register int keylen) /* case 0: nothing left to add */ } mix(a, b, c); + + /* report the result */ + return UInt32GetDatum(c); +} + +/* + * hash_uint32() -- hash a 32-bit value + * + * This has the same result (at least on little-endian machines) as + * hash_any(&k, sizeof(uint32)) + * but is faster and doesn't force the caller to store k into memory. + */ +Datum +hash_uint32(uint32 k) +{ + register uint32 a, + b, + c; + + a = 0x9e3779b9 + k; + b = 0x9e3779b9; + c = 3923095 + (uint32) sizeof(uint32); + + mix(a, b, c); + /* report the result */ return UInt32GetDatum(c); } diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index a40423179d1..826cafe8737 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.96.2.2 2005/11/23 20:28:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.96.2.3 2007/06/01 15:58:09 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ */ #include "postgres.h" +#include "access/hash.h" #include "executor/execdebug.h" #include "executor/hashjoin.h" #include "executor/instrument.h" @@ -713,9 +714,11 @@ ExecHashGetHashValue(HashJoinTable hashtable, * chains), and must only cause the batch number to remain the same or * increase. Our algorithm is * bucketno = hashvalue MOD nbuckets - * batchno = (hashvalue DIV nbuckets) MOD nbatch - * where nbuckets should preferably be prime so that all bits of the - * hash value can affect both bucketno and batchno. + * batchno = hash_uint32(hashvalue) MOD nbatch + * which gives reasonably independent bucket and batch numbers in the face + * of some rather poorly-implemented hash functions in hashfunc.c. (This + * will change in PG 8.3.) + * * nbuckets doesn't change over the course of the join. * * nbatch is always a power of 2; we increase it only by doubling it. This @@ -734,7 +737,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable, { *bucketno = hashvalue % nbuckets; /* since nbatch is a power of 2, can do MOD by masking */ - *batchno = (hashvalue / nbuckets) & (nbatch - 1); + *batchno = hash_uint32(hashvalue) & (nbatch - 1); } else { diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 03d2fc6c12e..27861bb7243 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.63.2.1 2007/04/19 20:24:18 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.63.2.2 2007/06/01 15:58:09 tgl Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -263,6 +263,7 @@ extern Datum hashname(PG_FUNCTION_ARGS); extern Datum hashtext(PG_FUNCTION_ARGS); extern Datum hashvarlena(PG_FUNCTION_ARGS); extern Datum hash_any(register const unsigned char *k, register int keylen); +extern Datum hash_uint32(uint32 k); /* private routines */