1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-21 12:05:57 +03:00
Robert Haas 05d8449e73 Move src/backend/utils/hash/hashfn.c to src/common
This also involves renaming src/include/utils/hashutils.h, which
becomes src/include/common/hashfn.h. Perhaps an argument can be
made for keeping the hashutils.h name, but it seemed more
consistent to make it match the name of the file, and also more
descriptive of what is actually going on here.

Patch by me, reviewed by Suraj Kharage and Mark Dilger. Off-list
advice on how not to break the Windows build from Davinder Singh
and Amit Kapila.

Discussion: http://postgr.es/m/CA+TgmoaRiG4TXND8QuM6JXFRkM_1wL2ZNhzaUKsuec9-4yrkgw@mail.gmail.com
2020-02-27 09:25:41 +05:30

439 lines
10 KiB
C

/*-------------------------------------------------------------------------
*
* uuid.c
* Functions for the built-in type "uuid".
*
* Copyright (c) 2007-2020, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/utils/adt/uuid.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "common/hashfn.h"
#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/sortsupport.h"
#include "utils/uuid.h"
/* sortsupport for uuid */
typedef struct
{
int64 input_count; /* number of non-null values seen */
bool estimating; /* true if estimating cardinality */
hyperLogLogState abbr_card; /* cardinality estimator */
} uuid_sortsupport_state;
static void string_to_uuid(const char *source, pg_uuid_t *uuid);
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
Datum
uuid_in(PG_FUNCTION_ARGS)
{
char *uuid_str = PG_GETARG_CSTRING(0);
pg_uuid_t *uuid;
uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
string_to_uuid(uuid_str, uuid);
PG_RETURN_UUID_P(uuid);
}
Datum
uuid_out(PG_FUNCTION_ARGS)
{
pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
static const char hex_chars[] = "0123456789abcdef";
StringInfoData buf;
int i;
initStringInfo(&buf);
for (i = 0; i < UUID_LEN; i++)
{
int hi;
int lo;
/*
* We print uuid values as a string of 8, 4, 4, 4, and then 12
* hexadecimal characters, with each group is separated by a hyphen
* ("-"). Therefore, add the hyphens at the appropriate places here.
*/
if (i == 4 || i == 6 || i == 8 || i == 10)
appendStringInfoChar(&buf, '-');
hi = uuid->data[i] >> 4;
lo = uuid->data[i] & 0x0F;
appendStringInfoChar(&buf, hex_chars[hi]);
appendStringInfoChar(&buf, hex_chars[lo]);
}
PG_RETURN_CSTRING(buf.data);
}
/*
* We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
* after each group of 4 hexadecimal digits, and optionally surrounded by {}.
* (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
* digits, is the only one used for output.)
*/
static void
string_to_uuid(const char *source, pg_uuid_t *uuid)
{
const char *src = source;
bool braces = false;
int i;
if (src[0] == '{')
{
src++;
braces = true;
}
for (i = 0; i < UUID_LEN; i++)
{
char str_buf[3];
if (src[0] == '\0' || src[1] == '\0')
goto syntax_error;
memcpy(str_buf, src, 2);
if (!isxdigit((unsigned char) str_buf[0]) ||
!isxdigit((unsigned char) str_buf[1]))
goto syntax_error;
str_buf[2] = '\0';
uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
src += 2;
if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
src++;
}
if (braces)
{
if (*src != '}')
goto syntax_error;
src++;
}
if (*src != '\0')
goto syntax_error;
return;
syntax_error:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"uuid", source)));
}
Datum
uuid_recv(PG_FUNCTION_ARGS)
{
StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
pg_uuid_t *uuid;
uuid = (pg_uuid_t *) palloc(UUID_LEN);
memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
PG_RETURN_POINTER(uuid);
}
Datum
uuid_send(PG_FUNCTION_ARGS)
{
pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
StringInfoData buffer;
pq_begintypsend(&buffer);
pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
}
/* internal uuid compare function */
static int
uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
{
return memcmp(arg1->data, arg2->data, UUID_LEN);
}
Datum
uuid_lt(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
}
Datum
uuid_le(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
}
Datum
uuid_eq(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
}
Datum
uuid_ge(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
}
Datum
uuid_gt(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
}
Datum
uuid_ne(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
}
/* handler for btree index operator */
Datum
uuid_cmp(PG_FUNCTION_ARGS)
{
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
}
/*
* Sort support strategy routine
*/
Datum
uuid_sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = uuid_fast_cmp;
ssup->ssup_extra = NULL;
if (ssup->abbreviate)
{
uuid_sortsupport_state *uss;
MemoryContext oldcontext;
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
uss = palloc(sizeof(uuid_sortsupport_state));
uss->input_count = 0;
uss->estimating = true;
initHyperLogLog(&uss->abbr_card, 10);
ssup->ssup_extra = uss;
ssup->comparator = uuid_cmp_abbrev;
ssup->abbrev_converter = uuid_abbrev_convert;
ssup->abbrev_abort = uuid_abbrev_abort;
ssup->abbrev_full_comparator = uuid_fast_cmp;
MemoryContextSwitchTo(oldcontext);
}
PG_RETURN_VOID();
}
/*
* SortSupport comparison func
*/
static int
uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
{
pg_uuid_t *arg1 = DatumGetUUIDP(x);
pg_uuid_t *arg2 = DatumGetUUIDP(y);
return uuid_internal_cmp(arg1, arg2);
}
/*
* Abbreviated key comparison func
*/
static int
uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
if (x > y)
return 1;
else if (x == y)
return 0;
else
return -1;
}
/*
* Callback for estimating effectiveness of abbreviated key optimization.
*
* We pay no attention to the cardinality of the non-abbreviated data, because
* there is no equality fast-path within authoritative uuid comparator.
*/
static bool
uuid_abbrev_abort(int memtupcount, SortSupport ssup)
{
uuid_sortsupport_state *uss = ssup->ssup_extra;
double abbr_card;
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
return false;
abbr_card = estimateHyperLogLog(&uss->abbr_card);
/*
* If we have >100k distinct values, then even if we were sorting many
* billion rows we'd likely still break even, and the penalty of undoing
* that many rows of abbrevs would probably not be worth it. Stop even
* counting at that point.
*/
if (abbr_card > 100000.0)
{
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"uuid_abbrev: estimation ends at cardinality %f"
" after " INT64_FORMAT " values (%d rows)",
abbr_card, uss->input_count, memtupcount);
#endif
uss->estimating = false;
return false;
}
/*
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
* fudge factor allows us to abort earlier on genuinely pathological data
* where we've had exactly one abbreviated value in the first 2k
* (non-null) rows.
*/
if (abbr_card < uss->input_count / 2000.0 + 0.5)
{
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"uuid_abbrev: aborting abbreviation at cardinality %f"
" below threshold %f after " INT64_FORMAT " values (%d rows)",
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
memtupcount);
#endif
return true;
}
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"uuid_abbrev: cardinality %f after " INT64_FORMAT
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
#endif
return false;
}
/*
* Conversion routine for sortsupport. Converts original uuid representation
* to abbreviated key representation. Our encoding strategy is simple -- pack
* the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
* machines, the bytes are stored in reverse order), and treat it as an
* unsigned integer.
*/
static Datum
uuid_abbrev_convert(Datum original, SortSupport ssup)
{
uuid_sortsupport_state *uss = ssup->ssup_extra;
pg_uuid_t *authoritative = DatumGetUUIDP(original);
Datum res;
memcpy(&res, authoritative->data, sizeof(Datum));
uss->input_count += 1;
if (uss->estimating)
{
uint32 tmp;
#if SIZEOF_DATUM == 8
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
#else /* SIZEOF_DATUM != 8 */
tmp = (uint32) res;
#endif
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
}
/*
* Byteswap on little-endian machines.
*
* This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
* comparator) works correctly on all platforms. If we didn't do this,
* the comparator would have to call memcmp() with a pair of pointers to
* the first byte of each abbreviated key, which is slower.
*/
res = DatumBigEndianToNative(res);
return res;
}
/* hash index support */
Datum
uuid_hash(PG_FUNCTION_ARGS)
{
pg_uuid_t *key = PG_GETARG_UUID_P(0);
return hash_any(key->data, UUID_LEN);
}
Datum
uuid_hash_extended(PG_FUNCTION_ARGS)
{
pg_uuid_t *key = PG_GETARG_UUID_P(0);
return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
}
Datum
gen_random_uuid(PG_FUNCTION_ARGS)
{
pg_uuid_t *uuid = palloc(UUID_LEN);
if (!pg_strong_random(uuid, UUID_LEN))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate random values")));
/*
* Set magic numbers for a "version 4" (pseudorandom) UUID, see
* http://tools.ietf.org/html/rfc4122#section-4.4
*/
uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
PG_RETURN_UUID_P(uuid);
}