1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-06 07:49:08 +03:00

Introduce 64-bit hash functions with a 64-bit seed.

This will be useful for hash partitioning, which needs a way to seed
the hash functions to avoid problems such as a hash index on a hash
partitioned table clumping all values into a small portion of the
bucket space; it's also useful for anything that wants a 64-bit hash
value rather than a 32-bit hash value.

Just in case somebody wants a 64-bit hash value that is compatible
with the existing 32-bit hash values, make the low 32-bits of the
64-bit hash value match the 32-bit hash value when the seed is 0.

Robert Haas and Amul Sul

Discussion: http://postgr.es/m/CA+Tgmoafx2yoJuhCQQOL5CocEi-w_uG4S2xT0EtgiJnPGcHW3g@mail.gmail.com
This commit is contained in:
Robert Haas
2017-08-31 22:21:21 -04:00
parent 2d44c58c79
commit 81c5e46c49
33 changed files with 1555 additions and 42 deletions

View File

@@ -46,18 +46,36 @@ hashchar(PG_FUNCTION_ARGS)
return hash_uint32((int32) PG_GETARG_CHAR(0));
}
Datum
hashcharextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
}
Datum
hashint2(PG_FUNCTION_ARGS)
{
return hash_uint32((int32) PG_GETARG_INT16(0));
}
Datum
hashint2extended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
}
Datum
hashint4(PG_FUNCTION_ARGS)
{
return hash_uint32(PG_GETARG_INT32(0));
}
Datum
hashint4extended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
}
Datum
hashint8(PG_FUNCTION_ARGS)
{
@@ -78,18 +96,43 @@ hashint8(PG_FUNCTION_ARGS)
return hash_uint32(lohalf);
}
Datum
hashint8extended(PG_FUNCTION_ARGS)
{
/* Same approach as hashint8 */
int64 val = PG_GETARG_INT64(0);
uint32 lohalf = (uint32) val;
uint32 hihalf = (uint32) (val >> 32);
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
}
Datum
hashoid(PG_FUNCTION_ARGS)
{
return hash_uint32((uint32) PG_GETARG_OID(0));
}
Datum
hashoidextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
}
Datum
hashenum(PG_FUNCTION_ARGS)
{
return hash_uint32((uint32) PG_GETARG_OID(0));
}
Datum
hashenumextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
}
Datum
hashfloat4(PG_FUNCTION_ARGS)
{
@@ -116,6 +159,21 @@ hashfloat4(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) &key8, sizeof(key8));
}
Datum
hashfloat4extended(PG_FUNCTION_ARGS)
{
float4 key = PG_GETARG_FLOAT4(0);
uint64 seed = PG_GETARG_INT64(1);
float8 key8;
/* Same approach as hashfloat4 */
if (key == (float4) 0)
PG_RETURN_UINT64(seed);
key8 = key;
return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
}
Datum
hashfloat8(PG_FUNCTION_ARGS)
{
@@ -132,6 +190,19 @@ hashfloat8(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) &key, sizeof(key));
}
Datum
hashfloat8extended(PG_FUNCTION_ARGS)
{
float8 key = PG_GETARG_FLOAT8(0);
uint64 seed = PG_GETARG_INT64(1);
/* Same approach as hashfloat8 */
if (key == (float8) 0)
PG_RETURN_UINT64(seed);
return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
}
Datum
hashoidvector(PG_FUNCTION_ARGS)
{
@@ -140,6 +211,16 @@ hashoidvector(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
}
Datum
hashoidvectorextended(PG_FUNCTION_ARGS)
{
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
return hash_any_extended((unsigned char *) key->values,
key->dim1 * sizeof(Oid),
PG_GETARG_INT64(1));
}
Datum
hashname(PG_FUNCTION_ARGS)
{
@@ -148,6 +229,15 @@ hashname(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) key, strlen(key));
}
Datum
hashnameextended(PG_FUNCTION_ARGS)
{
char *key = NameStr(*PG_GETARG_NAME(0));
return hash_any_extended((unsigned char *) key, strlen(key),
PG_GETARG_INT64(1));
}
Datum
hashtext(PG_FUNCTION_ARGS)
{
@@ -168,6 +258,22 @@ hashtext(PG_FUNCTION_ARGS)
return result;
}
Datum
hashtextextended(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Datum result;
/* Same approach as hashtext */
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key),
PG_GETARG_INT64(1));
PG_FREE_IF_COPY(key, 0);
return result;
}
/*
* hashvarlena() can be used for any varlena datatype in which there are
* no non-significant bits, ie, distinct bitpatterns never compare as equal.
@@ -187,6 +293,21 @@ hashvarlena(PG_FUNCTION_ARGS)
return result;
}
Datum
hashvarlenaextended(PG_FUNCTION_ARGS)
{
struct varlena *key = PG_GETARG_VARLENA_PP(0);
Datum result;
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key),
PG_GETARG_INT64(1));
PG_FREE_IF_COPY(key, 0);
return result;
}
/*
* This hash function was written by Bob Jenkins
* (bob_jenkins@burtleburtle.net), and superficially adapted
@@ -502,7 +623,227 @@ hash_any(register const unsigned char *k, register int keylen)
}
/*
* hash_uint32() -- hash a 32-bit value
* hash_any_extended() -- hash into a 64-bit value, using an optional seed
* k : the key (the unaligned variable-length array of bytes)
* len : the length of the key, counting by bytes
* seed : a 64-bit seed (0 means no seed)
*
* Returns a uint64 value. Otherwise similar to hash_any.
*/
Datum
hash_any_extended(register const unsigned char *k, register int keylen,
uint64 seed)
{
register uint32 a,
b,
c,
len;
/* Set up the internal state */
len = keylen;
a = b = c = 0x9e3779b9 + len + 3923095;
/* If the seed is non-zero, use it to perturb the internal state. */
if (seed != 0)
{
/*
* In essence, the seed is treated as part of the data being hashed,
* but for simplicity, we pretend that it's padded with four bytes of
* zeroes so that the seed constitutes a 12-byte chunk.
*/
a += (uint32) (seed >> 32);
b += (uint32) seed;
mix(a, b, c);
}
/* If the source pointer is word-aligned, we use word-wide fetches */
if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
{
/* Code path for aligned source data */
register const uint32 *ka = (const uint32 *) k;
/* handle most of the key */
while (len >= 12)
{
a += ka[0];
b += ka[1];
c += ka[2];
mix(a, b, c);
ka += 3;
len -= 12;
}
/* handle the last 11 bytes */
k = (const unsigned char *) ka;
#ifdef WORDS_BIGENDIAN
switch (len)
{
case 11:
c += ((uint32) k[10] << 8);
/* fall through */
case 10:
c += ((uint32) k[9] << 16);
/* fall through */
case 9:
c += ((uint32) k[8] << 24);
/* the lowest byte of c is reserved for the length */
/* fall through */
case 8:
b += ka[1];
a += ka[0];
break;
case 7:
b += ((uint32) k[6] << 8);
/* fall through */
case 6:
b += ((uint32) k[5] << 16);
/* fall through */
case 5:
b += ((uint32) k[4] << 24);
/* fall through */
case 4:
a += ka[0];
break;
case 3:
a += ((uint32) k[2] << 8);
/* fall through */
case 2:
a += ((uint32) k[1] << 16);
/* fall through */
case 1:
a += ((uint32) k[0] << 24);
/* case 0: nothing left to add */
}
#else /* !WORDS_BIGENDIAN */
switch (len)
{
case 11:
c += ((uint32) k[10] << 24);
/* fall through */
case 10:
c += ((uint32) k[9] << 16);
/* fall through */
case 9:
c += ((uint32) k[8] << 8);
/* the lowest byte of c is reserved for the length */
/* fall through */
case 8:
b += ka[1];
a += ka[0];
break;
case 7:
b += ((uint32) k[6] << 16);
/* fall through */
case 6:
b += ((uint32) k[5] << 8);
/* fall through */
case 5:
b += k[4];
/* fall through */
case 4:
a += ka[0];
break;
case 3:
a += ((uint32) k[2] << 16);
/* fall through */
case 2:
a += ((uint32) k[1] << 8);
/* fall through */
case 1:
a += k[0];
/* case 0: nothing left to add */
}
#endif /* WORDS_BIGENDIAN */
}
else
{
/* Code path for non-aligned source data */
/* handle most of the key */
while (len >= 12)
{
#ifdef WORDS_BIGENDIAN
a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
#else /* !WORDS_BIGENDIAN */
a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
#endif /* WORDS_BIGENDIAN */
mix(a, b, c);
k += 12;
len -= 12;
}
/* handle the last 11 bytes */
#ifdef WORDS_BIGENDIAN
switch (len) /* all the case statements fall through */
{
case 11:
c += ((uint32) k[10] << 8);
case 10:
c += ((uint32) k[9] << 16);
case 9:
c += ((uint32) k[8] << 24);
/* the lowest byte of c is reserved for the length */
case 8:
b += k[7];
case 7:
b += ((uint32) k[6] << 8);
case 6:
b += ((uint32) k[5] << 16);
case 5:
b += ((uint32) k[4] << 24);
case 4:
a += k[3];
case 3:
a += ((uint32) k[2] << 8);
case 2:
a += ((uint32) k[1] << 16);
case 1:
a += ((uint32) k[0] << 24);
/* case 0: nothing left to add */
}
#else /* !WORDS_BIGENDIAN */
switch (len) /* all the case statements fall through */
{
case 11:
c += ((uint32) k[10] << 24);
case 10:
c += ((uint32) k[9] << 16);
case 9:
c += ((uint32) k[8] << 8);
/* the lowest byte of c is reserved for the length */
case 8:
b += ((uint32) k[7] << 24);
case 7:
b += ((uint32) k[6] << 16);
case 6:
b += ((uint32) k[5] << 8);
case 5:
b += k[4];
case 4:
a += ((uint32) k[3] << 24);
case 3:
a += ((uint32) k[2] << 16);
case 2:
a += ((uint32) k[1] << 8);
case 1:
a += k[0];
/* case 0: nothing left to add */
}
#endif /* WORDS_BIGENDIAN */
}
final(a, b, c);
/* report the result */
PG_RETURN_UINT64(((uint64) b << 32) | c);
}
/*
* hash_uint32() -- hash a 32-bit value to a 32-bit value
*
* This has the same result as
* hash_any(&k, sizeof(uint32))
@@ -523,3 +864,32 @@ hash_uint32(uint32 k)
/* report the result */
return UInt32GetDatum(c);
}
/*
* hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed
*
* Like hash_uint32, this is a convenience function.
*/
Datum
hash_uint32_extended(uint32 k, uint64 seed)
{
register uint32 a,
b,
c;
a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
if (seed != 0)
{
a += (uint32) (seed >> 32);
b += (uint32) seed;
mix(a, b, c);
}
a += k;
final(a, b, c);
/* report the result */
PG_RETURN_UINT64(((uint64) b << 32) | c);
}

View File

@@ -373,7 +373,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
if (ffactor < 10)
ffactor = 10;
procid = index_getprocid(rel, 1, HASHPROC);
procid = index_getprocid(rel, 1, HASHSTANDARD_PROC);
/*
* We initialize the metapage, the first N bucket pages, and the first

View File

@@ -85,7 +85,7 @@ _hash_datum2hashkey(Relation rel, Datum key)
Oid collation;
/* XXX assumes index has only one attribute */
procinfo = index_getprocinfo(rel, 1, HASHPROC);
procinfo = index_getprocinfo(rel, 1, HASHSTANDARD_PROC);
collation = rel->rd_indcollation[0];
return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
@@ -108,10 +108,10 @@ _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
keytype,
keytype,
HASHPROC);
HASHSTANDARD_PROC);
if (!RegProcedureIsValid(hash_proc))
elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
HASHPROC, keytype, keytype,
HASHSTANDARD_PROC, keytype, keytype,
RelationGetRelationName(rel));
collation = rel->rd_indcollation[0];

View File

@@ -29,7 +29,7 @@
#include "utils/syscache.h"
static bool check_hash_func_signature(Oid funcid, Oid restype, Oid argtype);
static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype);
/*
@@ -105,8 +105,9 @@ hashvalidate(Oid opclassoid)
/* Check procedure numbers and function signatures */
switch (procform->amprocnum)
{
case HASHPROC:
if (!check_hash_func_signature(procform->amproc, INT4OID,
case HASHSTANDARD_PROC:
case HASHEXTENDED_PROC:
if (!check_hash_func_signature(procform->amproc, procform->amprocnum,
procform->amproclefttype))
{
ereport(INFO,
@@ -264,19 +265,37 @@ hashvalidate(Oid opclassoid)
* hacks in the core hash opclass definitions.
*/
static bool
check_hash_func_signature(Oid funcid, Oid restype, Oid argtype)
check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype)
{
bool result = true;
Oid restype;
int16 nargs;
HeapTuple tp;
Form_pg_proc procform;
switch (amprocnum)
{
case HASHSTANDARD_PROC:
restype = INT4OID;
nargs = 1;
break;
case HASHEXTENDED_PROC:
restype = INT8OID;
nargs = 2;
break;
default:
elog(ERROR, "invalid amprocnum");
}
tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for function %u", funcid);
procform = (Form_pg_proc) GETSTRUCT(tp);
if (procform->prorettype != restype || procform->proretset ||
procform->pronargs != 1)
procform->pronargs != nargs)
result = false;
if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0]))
@@ -290,24 +309,29 @@ check_hash_func_signature(Oid funcid, Oid restype, Oid argtype)
* identity, not just its input type, because hashvarlena() takes
* INTERNAL and allowing any such function seems too scary.
*/
if (funcid == F_HASHINT4 &&
if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) &&
(argtype == DATEOID ||
argtype == ABSTIMEOID || argtype == RELTIMEOID ||
argtype == XIDOID || argtype == CIDOID))
/* okay, allowed use of hashint4() */ ;
else if (funcid == F_TIMESTAMP_HASH &&
else if ((funcid == F_TIMESTAMP_HASH ||
funcid == F_TIMESTAMP_HASH_EXTENDED) &&
argtype == TIMESTAMPTZOID)
/* okay, allowed use of timestamp_hash() */ ;
else if (funcid == F_HASHCHAR &&
else if ((funcid == F_HASHCHAR || funcid == F_HASHCHAREXTENDED) &&
argtype == BOOLOID)
/* okay, allowed use of hashchar() */ ;
else if (funcid == F_HASHVARLENA &&
else if ((funcid == F_HASHVARLENA || funcid == F_HASHVARLENAEXTENDED) &&
argtype == BYTEAOID)
/* okay, allowed use of hashvarlena() */ ;
else
result = false;
}
/* If function takes a second argument, it must be for a 64-bit salt. */
if (nargs == 2 && procform->proargtypes.values[1] != INT8OID)
result = false;
ReleaseSysCache(tp);
return result;
}

View File

@@ -18,6 +18,7 @@
#include <limits.h>
#include "access/genam.h"
#include "access/hash.h"
#include "access/heapam.h"
#include "access/nbtree.h"
#include "access/htup_details.h"
@@ -1129,7 +1130,8 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
/*
* btree comparison procs must be 2-arg procs returning int4, while btree
* sortsupport procs must take internal and return void. hash support
* procs must be 1-arg procs returning int4. Otherwise we don't know.
* proc 1 must be a 1-arg proc returning int4, while proc 2 must be a
* 2-arg proc returning int8. Otherwise we don't know.
*/
if (amoid == BTREE_AM_OID)
{
@@ -1172,14 +1174,28 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
}
else if (amoid == HASH_AM_OID)
{
if (procform->pronargs != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedures must have one argument")));
if (procform->prorettype != INT4OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedures must return integer")));
if (member->number == HASHSTANDARD_PROC)
{
if (procform->pronargs != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 1 must have one argument")));
if (procform->prorettype != INT4OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 1 must return integer")));
}
else if (member->number == HASHEXTENDED_PROC)
{
if (procform->pronargs != 2)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 2 must have two arguments")));
if (procform->prorettype != INT8OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 2 must return bigint")));
}
/*
* If lefttype/righttype isn't specified, use the proc's input type

View File

@@ -16,6 +16,7 @@
#include <ctype.h>
#include "access/hash.h"
#include "access/htup_details.h"
#include "catalog/catalog.h"
#include "catalog/namespace.h"
@@ -717,6 +718,20 @@ hash_aclitem(PG_FUNCTION_ARGS)
PG_RETURN_UINT32((uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor));
}
/*
* 64-bit hash function for aclitem.
*
* Similar to hash_aclitem, but accepts a seed and returns a uint64 value.
*/
Datum
hash_aclitem_extended(PG_FUNCTION_ARGS)
{
AclItem *a = PG_GETARG_ACLITEM_P(0);
uint64 seed = PG_GETARG_INT64(1);
uint32 sum = (uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor);
return (seed == 0) ? UInt64GetDatum(sum) : hash_uint32_extended(sum, seed);
}
/*
* acldefault() --- create an ACL describing default access permissions

View File

@@ -20,6 +20,7 @@
#endif
#include <math.h>
#include "access/hash.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
@@ -4020,6 +4021,84 @@ hash_array(PG_FUNCTION_ARGS)
PG_RETURN_UINT32(result);
}
/*
* Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
* Otherwise, similar to hash_array.
*/
Datum
hash_array_extended(PG_FUNCTION_ARGS)
{
AnyArrayType *array = PG_GETARG_ANY_ARRAY(0);
uint64 seed = PG_GETARG_INT64(1);
int ndims = AARR_NDIM(array);
int *dims = AARR_DIMS(array);
Oid element_type = AARR_ELEMTYPE(array);
uint64 result = 1;
int nitems;
TypeCacheEntry *typentry;
int typlen;
bool typbyval;
char typalign;
int i;
array_iter iter;
FunctionCallInfoData locfcinfo;
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
if (typentry == NULL ||
typentry->type_id != element_type)
{
typentry = lookup_type_cache(element_type,
TYPECACHE_HASH_EXTENDED_PROC_FINFO);
if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an extended hash function for type %s",
format_type_be(element_type))));
fcinfo->flinfo->fn_extra = (void *) typentry;
}
typlen = typentry->typlen;
typbyval = typentry->typbyval;
typalign = typentry->typalign;
InitFunctionCallInfoData(locfcinfo, &typentry->hash_extended_proc_finfo, 2,
InvalidOid, NULL, NULL);
/* Loop over source data */
nitems = ArrayGetNItems(ndims, dims);
array_iter_setup(&iter, array);
for (i = 0; i < nitems; i++)
{
Datum elt;
bool isnull;
uint64 elthash;
/* Get element, checking for NULL */
elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign);
if (isnull)
{
elthash = 0;
}
else
{
/* Apply the hash function */
locfcinfo.arg[0] = elt;
locfcinfo.arg[1] = seed;
locfcinfo.argnull[0] = false;
locfcinfo.argnull[1] = false;
locfcinfo.isnull = false;
elthash = DatumGetUInt64(FunctionCallInvoke(&locfcinfo));
}
result = (result << 5) - result + elthash;
}
AARR_FREE_IF_COPY(array, 0);
PG_RETURN_UINT64(result);
}
/*-----------------------------------------------------------------------------
* array overlap/containment comparisons

View File

@@ -1508,6 +1508,12 @@ time_hash(PG_FUNCTION_ARGS)
return hashint8(fcinfo);
}
Datum
time_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
Datum
time_larger(PG_FUNCTION_ARGS)
{
@@ -2213,6 +2219,21 @@ timetz_hash(PG_FUNCTION_ARGS)
PG_RETURN_UINT32(thash);
}
Datum
timetz_hash_extended(PG_FUNCTION_ARGS)
{
TimeTzADT *key = PG_GETARG_TIMETZADT_P(0);
uint64 seed = PG_GETARG_DATUM(1);
uint64 thash;
/* Same approach as timetz_hash */
thash = DatumGetUInt64(DirectFunctionCall2(hashint8extended,
Int64GetDatumFast(key->time),
seed));
thash ^= DatumGetUInt64(hash_uint32_extended(key->zone, seed));
PG_RETURN_UINT64(thash);
}
Datum
timetz_larger(PG_FUNCTION_ARGS)
{

View File

@@ -291,3 +291,46 @@ jsonb_hash(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(jb, 0);
PG_RETURN_INT32(hash);
}
Datum
jsonb_hash_extended(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
uint64 seed = PG_GETARG_INT64(1);
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken r;
uint64 hash = 0;
if (JB_ROOT_COUNT(jb) == 0)
PG_RETURN_UINT64(seed);
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
switch (r)
{
/* Rotation is left to JsonbHashScalarValueExtended() */
case WJB_BEGIN_ARRAY:
hash ^= ((UINT64CONST(JB_FARRAY) << 32) | UINT64CONST(JB_FARRAY));
break;
case WJB_BEGIN_OBJECT:
hash ^= ((UINT64CONST(JB_FOBJECT) << 32) | UINT64CONST(JB_FOBJECT));
break;
case WJB_KEY:
case WJB_VALUE:
case WJB_ELEM:
JsonbHashScalarValueExtended(&v, &hash, seed);
break;
case WJB_END_ARRAY:
case WJB_END_OBJECT:
break;
default:
elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
}
}
PG_FREE_IF_COPY(jb, 0);
PG_RETURN_UINT64(hash);
}

View File

@@ -1249,6 +1249,49 @@ JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
*hash ^= tmp;
}
/*
* Hash a value to a 64-bit value, with a seed. Otherwise, similar to
* JsonbHashScalarValue.
*/
void
JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
uint64 seed)
{
uint64 tmp;
switch (scalarVal->type)
{
case jbvNull:
tmp = seed + 0x01;
break;
case jbvString:
tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
scalarVal->val.string.len,
seed));
break;
case jbvNumeric:
tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
NumericGetDatum(scalarVal->val.numeric),
UInt64GetDatum(seed)));
break;
case jbvBool:
if (seed)
tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
BoolGetDatum(scalarVal->val.boolean),
UInt64GetDatum(seed)));
else
tmp = scalarVal->val.boolean ? 0x02 : 0x04;
break;
default:
elog(ERROR, "invalid jsonb scalar type");
break;
}
*hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
*hash ^= tmp;
}
/*
* Are two scalar JsonbValues of the same type a and b equal?
*/

View File

@@ -271,6 +271,15 @@ hashmacaddr(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) key, sizeof(macaddr));
}
Datum
hashmacaddrextended(PG_FUNCTION_ARGS)
{
macaddr *key = PG_GETARG_MACADDR_P(0);
return hash_any_extended((unsigned char *) key, sizeof(macaddr),
PG_GETARG_INT64(1));
}
/*
* Arithmetic functions: bitwise NOT, AND, OR.
*/

View File

@@ -407,6 +407,15 @@ hashmacaddr8(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) key, sizeof(macaddr8));
}
Datum
hashmacaddr8extended(PG_FUNCTION_ARGS)
{
macaddr8 *key = PG_GETARG_MACADDR8_P(0);
return hash_any_extended((unsigned char *) key, sizeof(macaddr8),
PG_GETARG_INT64(1));
}
/*
* Arithmetic functions: bitwise NOT, AND, OR.
*/

View File

@@ -486,6 +486,16 @@ hashinet(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) VARDATA_ANY(addr), addrsize + 2);
}
Datum
hashinetextended(PG_FUNCTION_ARGS)
{
inet *addr = PG_GETARG_INET_PP(0);
int addrsize = ip_addrsize(addr);
return hash_any_extended((unsigned char *) VARDATA_ANY(addr), addrsize + 2,
PG_GETARG_INT64(1));
}
/*
* Boolean network-inclusion tests.
*/

View File

@@ -2230,6 +2230,66 @@ hash_numeric(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(result);
}
/*
* Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
* Otherwise, similar to hash_numeric.
*/
Datum
hash_numeric_extended(PG_FUNCTION_ARGS)
{
Numeric key = PG_GETARG_NUMERIC(0);
uint64 seed = PG_GETARG_INT64(1);
Datum digit_hash;
Datum result;
int weight;
int start_offset;
int end_offset;
int i;
int hash_len;
NumericDigit *digits;
if (NUMERIC_IS_NAN(key))
PG_RETURN_UINT64(seed);
weight = NUMERIC_WEIGHT(key);
start_offset = 0;
end_offset = 0;
digits = NUMERIC_DIGITS(key);
for (i = 0; i < NUMERIC_NDIGITS(key); i++)
{
if (digits[i] != (NumericDigit) 0)
break;
start_offset++;
weight--;
}
if (NUMERIC_NDIGITS(key) == start_offset)
PG_RETURN_UINT64(seed - 1);
for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
{
if (digits[i] != (NumericDigit) 0)
break;
end_offset++;
}
Assert(start_offset + end_offset < NUMERIC_NDIGITS(key));
hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset;
digit_hash = hash_any_extended((unsigned char *) (NUMERIC_DIGITS(key)
+ start_offset),
hash_len * sizeof(NumericDigit),
seed);
result = digit_hash ^ weight;
PG_RETURN_DATUM(result);
}
/* ----------------------------------------------------------------------
*

View File

@@ -179,6 +179,12 @@ pg_lsn_hash(PG_FUNCTION_ARGS)
return hashint8(fcinfo);
}
Datum
pg_lsn_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
/*----------------------------------------------------------
* Arithmetic operators on PostgreSQL LSNs.

View File

@@ -1280,6 +1280,69 @@ hash_range(PG_FUNCTION_ARGS)
PG_RETURN_INT32(result);
}
/*
* Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
* Otherwise, similar to hash_range.
*/
Datum
hash_range_extended(PG_FUNCTION_ARGS)
{
RangeType *r = PG_GETARG_RANGE(0);
uint64 seed = PG_GETARG_INT64(1);
uint64 result;
TypeCacheEntry *typcache;
TypeCacheEntry *scache;
RangeBound lower;
RangeBound upper;
bool empty;
char flags;
uint64 lower_hash;
uint64 upper_hash;
check_stack_depth();
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
range_deserialize(typcache, r, &lower, &upper, &empty);
flags = range_get_flags(r);
scache = typcache->rngelemtype;
if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
{
scache = lookup_type_cache(scache->type_id,
TYPECACHE_HASH_EXTENDED_PROC_FINFO);
if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify a hash function for type %s",
format_type_be(scache->type_id))));
}
if (RANGE_HAS_LBOUND(flags))
lower_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
typcache->rng_collation,
lower.val,
seed));
else
lower_hash = 0;
if (RANGE_HAS_UBOUND(flags))
upper_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
typcache->rng_collation,
upper.val,
seed));
else
upper_hash = 0;
/* Merge hashes of flags and bounds */
result = hash_uint32_extended((uint32) flags, seed);
result ^= lower_hash;
result = ROTATE_HIGH_AND_LOW_32BITS(result);
result ^= upper_hash;
PG_RETURN_UINT64(result);
}
/*
*----------------------------------------------------------
* CANONICAL FUNCTIONS

View File

@@ -2113,6 +2113,11 @@ timestamp_hash(PG_FUNCTION_ARGS)
return hashint8(fcinfo);
}
Datum
timestamp_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
/*
* Cross-type comparison functions for timestamp vs timestamptz
@@ -2419,6 +2424,20 @@ interval_hash(PG_FUNCTION_ARGS)
return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64));
}
Datum
interval_hash_extended(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
INT128 span = interval_cmp_value(interval);
int64 span64;
/* Same approach as interval_hash */
span64 = int128_to_int64(span);
return DirectFunctionCall2(hashint8extended, Int64GetDatumFast(span64),
PG_GETARG_DATUM(1));
}
/* overlaps_timestamp() --- implements the SQL OVERLAPS operator.
*
* Algorithm is per SQL spec. This is much harder than you'd think

View File

@@ -408,3 +408,11 @@ uuid_hash(PG_FUNCTION_ARGS)
return hash_any(key->data, UUID_LEN);
}
Datum
uuid_hash_extended(PG_FUNCTION_ARGS)
{
pg_uuid_t *key = PG_GETARG_UUID_P(0);
return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
}

View File

@@ -947,6 +947,24 @@ hashbpchar(PG_FUNCTION_ARGS)
return result;
}
Datum
hashbpcharextended(PG_FUNCTION_ARGS)
{
BpChar *key = PG_GETARG_BPCHAR_PP(0);
char *keydata;
int keylen;
Datum result;
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
result = hash_any_extended((unsigned char *) keydata, keylen,
PG_GETARG_INT64(1));
PG_FREE_IF_COPY(key, 0);
return result;
}
/*
* The following operators support character-by-character comparison

View File

@@ -490,8 +490,8 @@ get_compatible_hash_operators(Oid opno,
/*
* get_op_hash_functions
* Get the OID(s) of hash support function(s) compatible with the given
* operator, operating on its LHS and/or RHS datatype as required.
* Get the OID(s) of the standard hash support function(s) compatible with
* the given operator, operating on its LHS and/or RHS datatype as required.
*
* A function for the LHS type is sought and returned into *lhs_procno if
* lhs_procno isn't NULL. Similarly, a function for the RHS type is sought
@@ -542,7 +542,7 @@ get_op_hash_functions(Oid opno,
*lhs_procno = get_opfamily_proc(aform->amopfamily,
aform->amoplefttype,
aform->amoplefttype,
HASHPROC);
HASHSTANDARD_PROC);
if (!OidIsValid(*lhs_procno))
continue;
/* Matching LHS found, done if caller doesn't want RHS */
@@ -564,7 +564,7 @@ get_op_hash_functions(Oid opno,
*rhs_procno = get_opfamily_proc(aform->amopfamily,
aform->amoprighttype,
aform->amoprighttype,
HASHPROC);
HASHSTANDARD_PROC);
if (!OidIsValid(*rhs_procno))
{
/* Forget any LHS function from this opfamily */

View File

@@ -90,6 +90,7 @@ static TypeCacheEntry *firstDomainTypeEntry = NULL;
#define TCFLAGS_HAVE_FIELD_EQUALITY 0x1000
#define TCFLAGS_HAVE_FIELD_COMPARE 0x2000
#define TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS 0x4000
#define TCFLAGS_CHECKED_HASH_EXTENDED_PROC 0x8000
/*
* Data stored about a domain type's constraints. Note that we do not create
@@ -307,6 +308,8 @@ lookup_type_cache(Oid type_id, int flags)
flags |= TYPECACHE_HASH_OPFAMILY;
if ((flags & (TYPECACHE_HASH_PROC | TYPECACHE_HASH_PROC_FINFO |
TYPECACHE_HASH_EXTENDED_PROC |
TYPECACHE_HASH_EXTENDED_PROC_FINFO |
TYPECACHE_HASH_OPFAMILY)) &&
!(typentry->flags & TCFLAGS_CHECKED_HASH_OPCLASS))
{
@@ -329,6 +332,7 @@ lookup_type_cache(Oid type_id, int flags)
* decision is still good.
*/
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_PROC);
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_EXTENDED_PROC);
typentry->flags |= TCFLAGS_CHECKED_HASH_OPCLASS;
}
@@ -372,11 +376,12 @@ lookup_type_cache(Oid type_id, int flags)
typentry->eq_opr = eq_opr;
/*
* Reset info about hash function whenever we pick up new info about
* equality operator. This is so we can ensure that the hash function
* matches the operator.
* Reset info about hash functions whenever we pick up new info about
* equality operator. This is so we can ensure that the hash functions
* match the operator.
*/
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_PROC);
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_EXTENDED_PROC);
typentry->flags |= TCFLAGS_CHECKED_EQ_OPR;
}
if ((flags & TYPECACHE_LT_OPR) &&
@@ -467,7 +472,7 @@ lookup_type_cache(Oid type_id, int flags)
hash_proc = get_opfamily_proc(typentry->hash_opf,
typentry->hash_opintype,
typentry->hash_opintype,
HASHPROC);
HASHSTANDARD_PROC);
/*
* As above, make sure hash_array will succeed. We don't currently
@@ -485,6 +490,43 @@ lookup_type_cache(Oid type_id, int flags)
typentry->hash_proc = hash_proc;
typentry->flags |= TCFLAGS_CHECKED_HASH_PROC;
}
if ((flags & (TYPECACHE_HASH_EXTENDED_PROC |
TYPECACHE_HASH_EXTENDED_PROC_FINFO)) &&
!(typentry->flags & TCFLAGS_CHECKED_HASH_EXTENDED_PROC))
{
Oid hash_extended_proc = InvalidOid;
/*
* We insist that the eq_opr, if one has been determined, match the
* hash opclass; else report there is no hash function.
*/
if (typentry->hash_opf != InvalidOid &&
(!OidIsValid(typentry->eq_opr) ||
typentry->eq_opr == get_opfamily_member(typentry->hash_opf,
typentry->hash_opintype,
typentry->hash_opintype,
HTEqualStrategyNumber)))
hash_extended_proc = get_opfamily_proc(typentry->hash_opf,
typentry->hash_opintype,
typentry->hash_opintype,
HASHEXTENDED_PROC);
/*
* As above, make sure hash_array_extended will succeed. We don't
* currently support hashing for composite types, but when we do,
* we'll need more logic here to check that case too.
*/
if (hash_extended_proc == F_HASH_ARRAY_EXTENDED &&
!array_element_has_hashing(typentry))
hash_extended_proc = InvalidOid;
/* Force update of hash_proc_finfo only if we're changing state */
if (typentry->hash_extended_proc != hash_extended_proc)
typentry->hash_extended_proc_finfo.fn_oid = InvalidOid;
typentry->hash_extended_proc = hash_extended_proc;
typentry->flags |= TCFLAGS_CHECKED_HASH_EXTENDED_PROC;
}
/*
* Set up fmgr lookup info as requested
@@ -523,6 +565,14 @@ lookup_type_cache(Oid type_id, int flags)
fmgr_info_cxt(typentry->hash_proc, &typentry->hash_proc_finfo,
CacheMemoryContext);
}
if ((flags & TYPECACHE_HASH_EXTENDED_PROC_FINFO) &&
typentry->hash_extended_proc_finfo.fn_oid == InvalidOid &&
typentry->hash_extended_proc != InvalidOid)
{
fmgr_info_cxt(typentry->hash_extended_proc,
&typentry->hash_extended_proc_finfo,
CacheMemoryContext);
}
/*
* If it's a composite type (row type), get tupdesc if requested