mirror of
https://github.com/postgres/postgres.git
synced 2025-05-11 05:41:32 +03:00
Implement SortSupport for macaddr data type
Introduces a scheme to produce abbreviated keys for the macaddr type. Bump catalog version. Author: Brandur Leach Reviewed-by: Julien Rouhaud, Peter Geoghegan https://commitfest.postgresql.org/13/743/
This commit is contained in:
parent
5baf869f74
commit
f90d23d0c5
@ -14,9 +14,13 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "lib/hyperloglog.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "port/pg_bswap.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/inet.h"
|
||||
#include "utils/sortsupport.h"
|
||||
|
||||
|
||||
/*
|
||||
@ -29,6 +33,21 @@
|
||||
#define lobits(addr) \
|
||||
((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
|
||||
|
||||
/* sortsupport for macaddr */
|
||||
typedef struct
|
||||
{
|
||||
int64 input_count; /* number of non-null values seen */
|
||||
bool estimating; /* true if estimating cardinality */
|
||||
|
||||
hyperLogLogState abbr_card; /* cardinality estimator */
|
||||
} macaddr_sortsupport_state;
|
||||
|
||||
static int macaddr_cmp_internal(macaddr *a1, macaddr *a2);
|
||||
static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
|
||||
static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
|
||||
static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
|
||||
static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
|
||||
|
||||
/*
|
||||
* MAC address reader. Accepts several common notations.
|
||||
*/
|
||||
@ -159,7 +178,7 @@ macaddr_send(PG_FUNCTION_ARGS)
|
||||
* Comparison function for sorting:
|
||||
*/
|
||||
|
||||
static int32
|
||||
static int
|
||||
macaddr_cmp_internal(macaddr *a1, macaddr *a2)
|
||||
{
|
||||
if (hibits(a1) < hibits(a2))
|
||||
@ -326,3 +345,194 @@ macaddr_trunc(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_MACADDR_P(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* SortSupport strategy function. Populates a SortSupport struct with the
|
||||
* information necessary to use comparison by abbreviated keys.
|
||||
*/
|
||||
Datum
|
||||
macaddr_sortsupport(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
||||
|
||||
ssup->comparator = macaddr_fast_cmp;
|
||||
ssup->ssup_extra = NULL;
|
||||
|
||||
if (ssup->abbreviate)
|
||||
{
|
||||
macaddr_sortsupport_state *uss;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
|
||||
|
||||
uss = palloc(sizeof(macaddr_sortsupport_state));
|
||||
uss->input_count = 0;
|
||||
uss->estimating = true;
|
||||
initHyperLogLog(&uss->abbr_card, 10);
|
||||
|
||||
ssup->ssup_extra = uss;
|
||||
|
||||
ssup->comparator = macaddr_cmp_abbrev;
|
||||
ssup->abbrev_converter = macaddr_abbrev_convert;
|
||||
ssup->abbrev_abort = macaddr_abbrev_abort;
|
||||
ssup->abbrev_full_comparator = macaddr_fast_cmp;
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* SortSupport "traditional" comparison function. Pulls two MAC addresses from
|
||||
* the heap and runs a standard comparison on them.
|
||||
*/
|
||||
static int
|
||||
macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
|
||||
{
|
||||
macaddr *arg1 = DatumGetMacaddrP(x);
|
||||
macaddr *arg2 = DatumGetMacaddrP(y);
|
||||
|
||||
return macaddr_cmp_internal(arg1, arg2);
|
||||
}
|
||||
|
||||
/*
|
||||
* SortSupport abbreviated key comparison function. Compares two MAC addresses
|
||||
* quickly by treating them like integers, and without having to go the heap.
|
||||
*/
|
||||
static int
|
||||
macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
|
||||
{
|
||||
if (x > y)
|
||||
return 1;
|
||||
else if (x == y)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback for estimating effectiveness of abbreviated key optimization.
|
||||
*
|
||||
* We pay no attention to the cardinality of the non-abbreviated data, because
|
||||
* there is no equality fast-path within authoritative macaddr comparator.
|
||||
*/
|
||||
static bool
|
||||
macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
|
||||
{
|
||||
macaddr_sortsupport_state *uss = ssup->ssup_extra;
|
||||
double abbr_card;
|
||||
|
||||
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
|
||||
return false;
|
||||
|
||||
abbr_card = estimateHyperLogLog(&uss->abbr_card);
|
||||
|
||||
/*
|
||||
* If we have >100k distinct values, then even if we were sorting many
|
||||
* billion rows we'd likely still break even, and the penalty of undoing
|
||||
* that many rows of abbrevs would probably not be worth it. At this point
|
||||
* we stop counting because we know that we're now fully committed.
|
||||
*/
|
||||
if (abbr_card > 100000.0)
|
||||
{
|
||||
#ifdef TRACE_SORT
|
||||
if (trace_sort)
|
||||
elog(LOG,
|
||||
"macaddr_abbrev: estimation ends at cardinality %f"
|
||||
" after " INT64_FORMAT " values (%d rows)",
|
||||
abbr_card, uss->input_count, memtupcount);
|
||||
#endif
|
||||
uss->estimating = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
|
||||
* fudge factor allows us to abort earlier on genuinely pathological data
|
||||
* where we've had exactly one abbreviated value in the first 2k
|
||||
* (non-null) rows.
|
||||
*/
|
||||
if (abbr_card < uss->input_count / 2000.0 + 0.5)
|
||||
{
|
||||
#ifdef TRACE_SORT
|
||||
if (trace_sort)
|
||||
elog(LOG,
|
||||
"macaddr_abbrev: aborting abbreviation at cardinality %f"
|
||||
" below threshold %f after " INT64_FORMAT " values (%d rows)",
|
||||
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
|
||||
memtupcount);
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef TRACE_SORT
|
||||
if (trace_sort)
|
||||
elog(LOG,
|
||||
"macaddr_abbrev: cardinality %f after " INT64_FORMAT
|
||||
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* SortSupport converstion routine. Converts original macaddr representation
|
||||
* to abbreviated key representation.
|
||||
*
|
||||
* Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
|
||||
* unsigned integer for purposes of comparison. On a 64-bit machine, there
|
||||
* will be two zeroed bytes of padding. The integer is converted to native
|
||||
* endianness to facilitate easy comparison.
|
||||
*/
|
||||
static Datum
|
||||
macaddr_abbrev_convert(Datum original, SortSupport ssup)
|
||||
{
|
||||
macaddr_sortsupport_state *uss = ssup->ssup_extra;
|
||||
macaddr *authoritative = DatumGetMacaddrP(original);
|
||||
Datum res;
|
||||
|
||||
/*
|
||||
* On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
|
||||
* the MAC address in. There will be two bytes of zero padding on the end
|
||||
* of the least significant bits.
|
||||
*/
|
||||
#if SIZEOF_DATUM == 8
|
||||
memset(&res, 0, SIZEOF_DATUM);
|
||||
memcpy(&res, authoritative, sizeof(macaddr));
|
||||
#else /* SIZEOF_DATUM != 8 */
|
||||
memcpy(&res, authoritative, SIZEOF_DATUM);
|
||||
#endif
|
||||
uss->input_count += 1;
|
||||
|
||||
/*
|
||||
* Cardinality estimation. The estimate uses uint32, so on a 64-bit
|
||||
* architecture, XOR the two 32-bit halves together to produce slightly
|
||||
* more entropy. The two zeroed bytes won't have any practical impact on
|
||||
* this operation.
|
||||
*/
|
||||
if (uss->estimating)
|
||||
{
|
||||
uint32 tmp;
|
||||
|
||||
#if SIZEOF_DATUM == 8
|
||||
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
|
||||
#else /* SIZEOF_DATUM != 8 */
|
||||
tmp = (uint32) res;
|
||||
#endif
|
||||
|
||||
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Byteswap on little-endian machines.
|
||||
*
|
||||
* This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way
|
||||
* comparator) works correctly on all platforms. Without this, the
|
||||
* comparator would have to call memcmp() with a pair of pointers to the
|
||||
* first byte of each abbreviated key, which is slower.
|
||||
*/
|
||||
res = DatumBigEndianToNative(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201703291
|
||||
#define CATALOG_VERSION_NO 201703292
|
||||
|
||||
#endif
|
||||
|
@ -117,6 +117,7 @@ DATA(insert ( 1976 20 23 1 2189 ));
|
||||
DATA(insert ( 1976 20 21 1 2193 ));
|
||||
DATA(insert ( 1982 1186 1186 1 1315 ));
|
||||
DATA(insert ( 1984 829 829 1 836 ));
|
||||
DATA(insert ( 1984 829 829 2 3359 ));
|
||||
DATA(insert ( 1986 19 19 1 359 ));
|
||||
DATA(insert ( 1986 19 19 2 3135 ));
|
||||
DATA(insert ( 1988 1700 1700 1 1769 ));
|
||||
|
@ -2125,6 +2125,8 @@ DESCR("less-equal-greater");
|
||||
DATA(insert OID = 3144 ( macaddr_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 829 "829" _null_ _null_ _null_ _null_ _null_ macaddr_not _null_ _null_ _null_ ));
|
||||
DATA(insert OID = 3145 ( macaddr_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_and _null_ _null_ _null_ ));
|
||||
DATA(insert OID = 3146 ( macaddr_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_or _null_ _null_ _null_ ));
|
||||
DATA(insert OID = 3359 ( macaddr_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ macaddr_sortsupport _null_ _null_ _null_ ));
|
||||
DESCR("sort support");
|
||||
|
||||
/* for macaddr8 type support */
|
||||
DATA(insert OID = 4110 ( macaddr8_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 774 "2275" _null_ _null_ _null_ _null_ _null_ macaddr8_in _null_ _null_ _null_ ));
|
||||
|
Loading…
x
Reference in New Issue
Block a user