1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-28 18:48:04 +03:00
Files
postgres/src/backend/access/hash/hashfunc.c
Robert Haas 05d8449e73 Move src/backend/utils/hash/hashfn.c to src/common
This also involves renaming src/include/utils/hashutils.h, which
becomes src/include/common/hashfn.h. Perhaps an argument can be
made for keeping the hashutils.h name, but it seemed more
consistent to make it match the name of the file, and also more
descriptive of what is actually going on here.

Patch by me, reviewed by Suraj Kharage and Mark Dilger. Off-list
advice on how not to break the Windows build from Davinder Singh
and Amit Kapila.

Discussion: http://postgr.es/m/CA+TgmoaRiG4TXND8QuM6JXFRkM_1wL2ZNhzaUKsuec9-4yrkgw@mail.gmail.com
2020-02-27 09:25:41 +05:30

389 lines
9.1 KiB
C

/*-------------------------------------------------------------------------
*
* hashfunc.c
* Support functions for hash access method.
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/access/hash/hashfunc.c
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
* defined for hash indexes, they compute the hash value of the argument.
*
* Additional hash functions appear in /utils/adt/ files for various
* specialized datatypes.
*
* It is expected that every bit of a hash function's 32-bit result is
* as random as every other; failure to ensure this is likely to lead
* to poor performance of hash joins, for example. In most cases a hash
* function should use hash_any() or its variant hash_uint32().
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/hash.h"
#include "catalog/pg_collation.h"
#include "common/hashfn.h"
#include "utils/builtins.h"
#include "utils/pg_locale.h"
/*
* Datatype-specific hash functions.
*
* These support both hash indexes and hash joins.
*
* NOTE: some of these are also used by catcache operations, without
* any direct connection to hash indexes. Also, the common hash_any
* routine is also used by dynahash tables.
*/
/* Note: this is used for both "char" and boolean datatypes */
Datum
hashchar(PG_FUNCTION_ARGS)
{
return hash_uint32((int32) PG_GETARG_CHAR(0));
}
Datum
hashcharextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
}
Datum
hashint2(PG_FUNCTION_ARGS)
{
return hash_uint32((int32) PG_GETARG_INT16(0));
}
Datum
hashint2extended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
}
Datum
hashint4(PG_FUNCTION_ARGS)
{
return hash_uint32(PG_GETARG_INT32(0));
}
Datum
hashint4extended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
}
Datum
hashint8(PG_FUNCTION_ARGS)
{
/*
* The idea here is to produce a hash value compatible with the values
* produced by hashint4 and hashint2 for logically equal inputs; this is
* necessary to support cross-type hash joins across these input types.
* Since all three types are signed, we can xor the high half of the int8
* value if the sign is positive, or the complement of the high half when
* the sign is negative.
*/
int64 val = PG_GETARG_INT64(0);
uint32 lohalf = (uint32) val;
uint32 hihalf = (uint32) (val >> 32);
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
return hash_uint32(lohalf);
}
Datum
hashint8extended(PG_FUNCTION_ARGS)
{
/* Same approach as hashint8 */
int64 val = PG_GETARG_INT64(0);
uint32 lohalf = (uint32) val;
uint32 hihalf = (uint32) (val >> 32);
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
}
Datum
hashoid(PG_FUNCTION_ARGS)
{
return hash_uint32((uint32) PG_GETARG_OID(0));
}
Datum
hashoidextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
}
Datum
hashenum(PG_FUNCTION_ARGS)
{
return hash_uint32((uint32) PG_GETARG_OID(0));
}
Datum
hashenumextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
}
Datum
hashfloat4(PG_FUNCTION_ARGS)
{
float4 key = PG_GETARG_FLOAT4(0);
float8 key8;
/*
* On IEEE-float machines, minus zero and zero have different bit patterns
* but should compare as equal. We must ensure that they have the same
* hash value, which is most reliably done this way:
*/
if (key == (float4) 0)
PG_RETURN_UINT32(0);
/*
* To support cross-type hashing of float8 and float4, we want to return
* the same hash value hashfloat8 would produce for an equal float8 value.
* So, widen the value to float8 and hash that. (We must do this rather
* than have hashfloat8 try to narrow its value to float4; that could fail
* on overflow.)
*/
key8 = key;
return hash_any((unsigned char *) &key8, sizeof(key8));
}
Datum
hashfloat4extended(PG_FUNCTION_ARGS)
{
float4 key = PG_GETARG_FLOAT4(0);
uint64 seed = PG_GETARG_INT64(1);
float8 key8;
/* Same approach as hashfloat4 */
if (key == (float4) 0)
PG_RETURN_UINT64(seed);
key8 = key;
return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
}
Datum
hashfloat8(PG_FUNCTION_ARGS)
{
float8 key = PG_GETARG_FLOAT8(0);
/*
* On IEEE-float machines, minus zero and zero have different bit patterns
* but should compare as equal. We must ensure that they have the same
* hash value, which is most reliably done this way:
*/
if (key == (float8) 0)
PG_RETURN_UINT32(0);
return hash_any((unsigned char *) &key, sizeof(key));
}
Datum
hashfloat8extended(PG_FUNCTION_ARGS)
{
float8 key = PG_GETARG_FLOAT8(0);
uint64 seed = PG_GETARG_INT64(1);
/* Same approach as hashfloat8 */
if (key == (float8) 0)
PG_RETURN_UINT64(seed);
return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
}
Datum
hashoidvector(PG_FUNCTION_ARGS)
{
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
}
Datum
hashoidvectorextended(PG_FUNCTION_ARGS)
{
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
return hash_any_extended((unsigned char *) key->values,
key->dim1 * sizeof(Oid),
PG_GETARG_INT64(1));
}
Datum
hashname(PG_FUNCTION_ARGS)
{
char *key = NameStr(*PG_GETARG_NAME(0));
return hash_any((unsigned char *) key, strlen(key));
}
Datum
hashnameextended(PG_FUNCTION_ARGS)
{
char *key = NameStr(*PG_GETARG_NAME(0));
return hash_any_extended((unsigned char *) key, strlen(key),
PG_GETARG_INT64(1));
}
Datum
hashtext(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Oid collid = PG_GET_COLLATION();
pg_locale_t mylocale = 0;
Datum result;
if (!collid)
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for string hashing"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale || mylocale->deterministic)
{
result = hash_any((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
}
else
{
#ifdef USE_ICU
if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t ulen = -1;
UChar *uchar = NULL;
Size bsize;
uint8_t *buf;
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
uchar, ulen, NULL, 0);
buf = palloc(bsize);
ucol_getSortKey(mylocale->info.icu.ucol,
uchar, ulen, buf, bsize);
result = hash_any(buf, bsize);
pfree(buf);
}
else
#endif
/* shouldn't happen */
elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
}
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
return result;
}
Datum
hashtextextended(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Oid collid = PG_GET_COLLATION();
pg_locale_t mylocale = 0;
Datum result;
if (!collid)
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for string hashing"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale || mylocale->deterministic)
{
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key),
PG_GETARG_INT64(1));
}
else
{
#ifdef USE_ICU
if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t ulen = -1;
UChar *uchar = NULL;
Size bsize;
uint8_t *buf;
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
uchar, ulen, NULL, 0);
buf = palloc(bsize);
ucol_getSortKey(mylocale->info.icu.ucol,
uchar, ulen, buf, bsize);
result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
pfree(buf);
}
else
#endif
/* shouldn't happen */
elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
}
PG_FREE_IF_COPY(key, 0);
return result;
}
/*
* hashvarlena() can be used for any varlena datatype in which there are
* no non-significant bits, ie, distinct bitpatterns never compare as equal.
*/
Datum
hashvarlena(PG_FUNCTION_ARGS)
{
struct varlena *key = PG_GETARG_VARLENA_PP(0);
Datum result;
result = hash_any((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
return result;
}
Datum
hashvarlenaextended(PG_FUNCTION_ARGS)
{
struct varlena *key = PG_GETARG_VARLENA_PP(0);
Datum result;
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key),
PG_GETARG_INT64(1));
PG_FREE_IF_COPY(key, 0);
return result;
}