mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
This also involves renaming src/include/utils/hashutils.h, which becomes src/include/common/hashfn.h. Perhaps an argument can be made for keeping the hashutils.h name, but it seemed more consistent to make it match the name of the file, and also more descriptive of what is actually going on here. Patch by me, reviewed by Suraj Kharage and Mark Dilger. Off-list advice on how not to break the Windows build from Davinder Singh and Amit Kapila. Discussion: http://postgr.es/m/CA+TgmoaRiG4TXND8QuM6JXFRkM_1wL2ZNhzaUKsuec9-4yrkgw@mail.gmail.com
439 lines
10 KiB
C
439 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* uuid.c
|
|
* Functions for the built-in type "uuid".
|
|
*
|
|
* Copyright (c) 2007-2020, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/uuid.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "common/hashfn.h"
|
|
#include "lib/hyperloglog.h"
|
|
#include "libpq/pqformat.h"
|
|
#include "port/pg_bswap.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/sortsupport.h"
|
|
#include "utils/uuid.h"
|
|
|
|
/* sortsupport for uuid */
|
|
typedef struct
|
|
{
|
|
int64 input_count; /* number of non-null values seen */
|
|
bool estimating; /* true if estimating cardinality */
|
|
|
|
hyperLogLogState abbr_card; /* cardinality estimator */
|
|
} uuid_sortsupport_state;
|
|
|
|
static void string_to_uuid(const char *source, pg_uuid_t *uuid);
|
|
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
|
|
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
|
|
static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
|
|
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
|
|
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
|
|
|
|
Datum
|
|
uuid_in(PG_FUNCTION_ARGS)
|
|
{
|
|
char *uuid_str = PG_GETARG_CSTRING(0);
|
|
pg_uuid_t *uuid;
|
|
|
|
uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
|
|
string_to_uuid(uuid_str, uuid);
|
|
PG_RETURN_UUID_P(uuid);
|
|
}
|
|
|
|
Datum
|
|
uuid_out(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
|
|
static const char hex_chars[] = "0123456789abcdef";
|
|
StringInfoData buf;
|
|
int i;
|
|
|
|
initStringInfo(&buf);
|
|
for (i = 0; i < UUID_LEN; i++)
|
|
{
|
|
int hi;
|
|
int lo;
|
|
|
|
/*
|
|
* We print uuid values as a string of 8, 4, 4, 4, and then 12
|
|
* hexadecimal characters, with each group is separated by a hyphen
|
|
* ("-"). Therefore, add the hyphens at the appropriate places here.
|
|
*/
|
|
if (i == 4 || i == 6 || i == 8 || i == 10)
|
|
appendStringInfoChar(&buf, '-');
|
|
|
|
hi = uuid->data[i] >> 4;
|
|
lo = uuid->data[i] & 0x0F;
|
|
|
|
appendStringInfoChar(&buf, hex_chars[hi]);
|
|
appendStringInfoChar(&buf, hex_chars[lo]);
|
|
}
|
|
|
|
PG_RETURN_CSTRING(buf.data);
|
|
}
|
|
|
|
/*
|
|
* We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
|
|
* after each group of 4 hexadecimal digits, and optionally surrounded by {}.
|
|
* (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
|
|
* digits, is the only one used for output.)
|
|
*/
|
|
static void
|
|
string_to_uuid(const char *source, pg_uuid_t *uuid)
|
|
{
|
|
const char *src = source;
|
|
bool braces = false;
|
|
int i;
|
|
|
|
if (src[0] == '{')
|
|
{
|
|
src++;
|
|
braces = true;
|
|
}
|
|
|
|
for (i = 0; i < UUID_LEN; i++)
|
|
{
|
|
char str_buf[3];
|
|
|
|
if (src[0] == '\0' || src[1] == '\0')
|
|
goto syntax_error;
|
|
memcpy(str_buf, src, 2);
|
|
if (!isxdigit((unsigned char) str_buf[0]) ||
|
|
!isxdigit((unsigned char) str_buf[1]))
|
|
goto syntax_error;
|
|
|
|
str_buf[2] = '\0';
|
|
uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
|
|
src += 2;
|
|
if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
|
|
src++;
|
|
}
|
|
|
|
if (braces)
|
|
{
|
|
if (*src != '}')
|
|
goto syntax_error;
|
|
src++;
|
|
}
|
|
|
|
if (*src != '\0')
|
|
goto syntax_error;
|
|
|
|
return;
|
|
|
|
syntax_error:
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
"uuid", source)));
|
|
}
|
|
|
|
Datum
|
|
uuid_recv(PG_FUNCTION_ARGS)
|
|
{
|
|
StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
|
|
pg_uuid_t *uuid;
|
|
|
|
uuid = (pg_uuid_t *) palloc(UUID_LEN);
|
|
memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
|
|
PG_RETURN_POINTER(uuid);
|
|
}
|
|
|
|
Datum
|
|
uuid_send(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
|
|
StringInfoData buffer;
|
|
|
|
pq_begintypsend(&buffer);
|
|
pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
|
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
|
|
}
|
|
|
|
/* internal uuid compare function */
|
|
static int
|
|
uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
|
|
{
|
|
return memcmp(arg1->data, arg2->data, UUID_LEN);
|
|
}
|
|
|
|
Datum
|
|
uuid_lt(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_le(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_eq(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_ge(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_gt(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_ne(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
|
|
}
|
|
|
|
/* handler for btree index operator */
|
|
Datum
|
|
uuid_cmp(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
|
|
}
|
|
|
|
/*
|
|
* Sort support strategy routine
|
|
*/
|
|
Datum
|
|
uuid_sortsupport(PG_FUNCTION_ARGS)
|
|
{
|
|
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
|
|
|
ssup->comparator = uuid_fast_cmp;
|
|
ssup->ssup_extra = NULL;
|
|
|
|
if (ssup->abbreviate)
|
|
{
|
|
uuid_sortsupport_state *uss;
|
|
MemoryContext oldcontext;
|
|
|
|
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
|
|
|
|
uss = palloc(sizeof(uuid_sortsupport_state));
|
|
uss->input_count = 0;
|
|
uss->estimating = true;
|
|
initHyperLogLog(&uss->abbr_card, 10);
|
|
|
|
ssup->ssup_extra = uss;
|
|
|
|
ssup->comparator = uuid_cmp_abbrev;
|
|
ssup->abbrev_converter = uuid_abbrev_convert;
|
|
ssup->abbrev_abort = uuid_abbrev_abort;
|
|
ssup->abbrev_full_comparator = uuid_fast_cmp;
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
}
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* SortSupport comparison func
|
|
*/
|
|
static int
|
|
uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
|
|
{
|
|
pg_uuid_t *arg1 = DatumGetUUIDP(x);
|
|
pg_uuid_t *arg2 = DatumGetUUIDP(y);
|
|
|
|
return uuid_internal_cmp(arg1, arg2);
|
|
}
|
|
|
|
/*
|
|
* Abbreviated key comparison func
|
|
*/
|
|
static int
|
|
uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
|
|
{
|
|
if (x > y)
|
|
return 1;
|
|
else if (x == y)
|
|
return 0;
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Callback for estimating effectiveness of abbreviated key optimization.
|
|
*
|
|
* We pay no attention to the cardinality of the non-abbreviated data, because
|
|
* there is no equality fast-path within authoritative uuid comparator.
|
|
*/
|
|
static bool
|
|
uuid_abbrev_abort(int memtupcount, SortSupport ssup)
|
|
{
|
|
uuid_sortsupport_state *uss = ssup->ssup_extra;
|
|
double abbr_card;
|
|
|
|
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
|
|
return false;
|
|
|
|
abbr_card = estimateHyperLogLog(&uss->abbr_card);
|
|
|
|
/*
|
|
* If we have >100k distinct values, then even if we were sorting many
|
|
* billion rows we'd likely still break even, and the penalty of undoing
|
|
* that many rows of abbrevs would probably not be worth it. Stop even
|
|
* counting at that point.
|
|
*/
|
|
if (abbr_card > 100000.0)
|
|
{
|
|
#ifdef TRACE_SORT
|
|
if (trace_sort)
|
|
elog(LOG,
|
|
"uuid_abbrev: estimation ends at cardinality %f"
|
|
" after " INT64_FORMAT " values (%d rows)",
|
|
abbr_card, uss->input_count, memtupcount);
|
|
#endif
|
|
uss->estimating = false;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
|
|
* fudge factor allows us to abort earlier on genuinely pathological data
|
|
* where we've had exactly one abbreviated value in the first 2k
|
|
* (non-null) rows.
|
|
*/
|
|
if (abbr_card < uss->input_count / 2000.0 + 0.5)
|
|
{
|
|
#ifdef TRACE_SORT
|
|
if (trace_sort)
|
|
elog(LOG,
|
|
"uuid_abbrev: aborting abbreviation at cardinality %f"
|
|
" below threshold %f after " INT64_FORMAT " values (%d rows)",
|
|
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
|
|
memtupcount);
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
#ifdef TRACE_SORT
|
|
if (trace_sort)
|
|
elog(LOG,
|
|
"uuid_abbrev: cardinality %f after " INT64_FORMAT
|
|
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
|
|
#endif
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Conversion routine for sortsupport. Converts original uuid representation
|
|
* to abbreviated key representation. Our encoding strategy is simple -- pack
|
|
* the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
|
|
* machines, the bytes are stored in reverse order), and treat it as an
|
|
* unsigned integer.
|
|
*/
|
|
static Datum
|
|
uuid_abbrev_convert(Datum original, SortSupport ssup)
|
|
{
|
|
uuid_sortsupport_state *uss = ssup->ssup_extra;
|
|
pg_uuid_t *authoritative = DatumGetUUIDP(original);
|
|
Datum res;
|
|
|
|
memcpy(&res, authoritative->data, sizeof(Datum));
|
|
uss->input_count += 1;
|
|
|
|
if (uss->estimating)
|
|
{
|
|
uint32 tmp;
|
|
|
|
#if SIZEOF_DATUM == 8
|
|
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
|
|
#else /* SIZEOF_DATUM != 8 */
|
|
tmp = (uint32) res;
|
|
#endif
|
|
|
|
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
|
|
}
|
|
|
|
/*
|
|
* Byteswap on little-endian machines.
|
|
*
|
|
* This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
|
|
* comparator) works correctly on all platforms. If we didn't do this,
|
|
* the comparator would have to call memcmp() with a pair of pointers to
|
|
* the first byte of each abbreviated key, which is slower.
|
|
*/
|
|
res = DatumBigEndianToNative(res);
|
|
|
|
return res;
|
|
}
|
|
|
|
/* hash index support */
|
|
Datum
|
|
uuid_hash(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *key = PG_GETARG_UUID_P(0);
|
|
|
|
return hash_any(key->data, UUID_LEN);
|
|
}
|
|
|
|
Datum
|
|
uuid_hash_extended(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *key = PG_GETARG_UUID_P(0);
|
|
|
|
return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
|
|
}
|
|
|
|
Datum
|
|
gen_random_uuid(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *uuid = palloc(UUID_LEN);
|
|
|
|
if (!pg_strong_random(uuid, UUID_LEN))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("could not generate random values")));
|
|
|
|
/*
|
|
* Set magic numbers for a "version 4" (pseudorandom) UUID, see
|
|
* http://tools.ietf.org/html/rfc4122#section-4.4
|
|
*/
|
|
uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
|
|
uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
|
|
|
|
PG_RETURN_UUID_P(uuid);
|
|
}
|