1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-06 07:49:08 +03:00
Files
postgres/src/backend/utils/adt/mac.c
Tom Lane 6aebedc384 Grab the low-hanging fruit from forcing sizeof(Datum) to 8.
Remove conditionally-compiled code for smaller Datum widths,
and simplify comments that describe cases no longer of interest.

I also fixed up a few more places that were not using
DatumGetIntXX where they should, and made some cosmetic
adjustments such as using sizeof(int64) not sizeof(Datum)
in places where that fit better with the surrounding code.

One thing I remembered while preparing this part is that SP-GiST
stores pass-by-value prefix keys as Datums, so that the on-disk
representation depends on sizeof(Datum).  That's even more
unfortunate than the existing commentary makes it out to be,
because now there is a hazard that the change of sizeof(Datum)
will break SP-GiST indexes on 32-bit machines.  It appears that
there are no existing SP-GiST opclasses that are actually
affected; and if there are some that I didn't find, the number
of installations that are using them on 32-bit machines is
doubtless tiny.  So I'm proceeding on the assumption that we
can get away with this, but it's something to worry about.

(gininsert.c looks like it has a similar problem, but it's okay
because the "tuples" it's constructing are just transient data
within the tuplesort step.  That's pretty poorly documented
though, so I added some comments.)

Author: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us
2025-08-13 17:18:22 -04:00

520 lines
12 KiB
C

/*-------------------------------------------------------------------------
*
* mac.c
* PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses.
*
* Portions Copyright (c) 1998-2025, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/utils/adt/mac.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "common/hashfn.h"
#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
#include "port/pg_bswap.h"
#include "utils/fmgrprotos.h"
#include "utils/guc.h"
#include "utils/inet.h"
#include "utils/sortsupport.h"
/*
* Utility macros used for sorting and comparing:
*/
#define hibits(addr) \
((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c)))
#define lobits(addr) \
((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
/* sortsupport for macaddr */
typedef struct
{
int64 input_count; /* number of non-null values seen */
bool estimating; /* true if estimating cardinality */
hyperLogLogState abbr_card; /* cardinality estimator */
} macaddr_sortsupport_state;
static int macaddr_cmp_internal(macaddr *a1, macaddr *a2);
static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
/*
* MAC address reader. Accepts several common notations.
*/
Datum
macaddr_in(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
macaddr *result;
int a,
b,
c,
d,
e,
f;
char junk[2];
int count;
/* %1s matches iff there is trailing non-whitespace garbage */
count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s",
&a, &b, &c, &d, &e, &f, junk);
if (count != 6)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"", "macaddr",
str)));
if ((a < 0) || (a > 255) || (b < 0) || (b > 255) ||
(c < 0) || (c > 255) || (d < 0) || (d > 255) ||
(e < 0) || (e > 255) || (f < 0) || (f > 255))
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str)));
result = (macaddr *) palloc(sizeof(macaddr));
result->a = a;
result->b = b;
result->c = c;
result->d = d;
result->e = e;
result->f = f;
PG_RETURN_MACADDR_P(result);
}
/*
* MAC address output function. Fixed format.
*/
Datum
macaddr_out(PG_FUNCTION_ARGS)
{
macaddr *addr = PG_GETARG_MACADDR_P(0);
char *result;
result = (char *) palloc(32);
snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
addr->a, addr->b, addr->c, addr->d, addr->e, addr->f);
PG_RETURN_CSTRING(result);
}
/*
* macaddr_recv - converts external binary format to macaddr
*
* The external representation is just the six bytes, MSB first.
*/
Datum
macaddr_recv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
macaddr *addr;
addr = (macaddr *) palloc(sizeof(macaddr));
addr->a = pq_getmsgbyte(buf);
addr->b = pq_getmsgbyte(buf);
addr->c = pq_getmsgbyte(buf);
addr->d = pq_getmsgbyte(buf);
addr->e = pq_getmsgbyte(buf);
addr->f = pq_getmsgbyte(buf);
PG_RETURN_MACADDR_P(addr);
}
/*
* macaddr_send - converts macaddr to binary format
*/
Datum
macaddr_send(PG_FUNCTION_ARGS)
{
macaddr *addr = PG_GETARG_MACADDR_P(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendbyte(&buf, addr->a);
pq_sendbyte(&buf, addr->b);
pq_sendbyte(&buf, addr->c);
pq_sendbyte(&buf, addr->d);
pq_sendbyte(&buf, addr->e);
pq_sendbyte(&buf, addr->f);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
/*
* Comparison function for sorting:
*/
static int
macaddr_cmp_internal(macaddr *a1, macaddr *a2)
{
if (hibits(a1) < hibits(a2))
return -1;
else if (hibits(a1) > hibits(a2))
return 1;
else if (lobits(a1) < lobits(a2))
return -1;
else if (lobits(a1) > lobits(a2))
return 1;
else
return 0;
}
Datum
macaddr_cmp(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_INT32(macaddr_cmp_internal(a1, a2));
}
/*
* Boolean comparisons.
*/
Datum
macaddr_lt(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0);
}
Datum
macaddr_le(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0);
}
Datum
macaddr_eq(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0);
}
Datum
macaddr_ge(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0);
}
Datum
macaddr_gt(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0);
}
Datum
macaddr_ne(PG_FUNCTION_ARGS)
{
macaddr *a1 = PG_GETARG_MACADDR_P(0);
macaddr *a2 = PG_GETARG_MACADDR_P(1);
PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0);
}
/*
* Support function for hash indexes on macaddr.
*/
Datum
hashmacaddr(PG_FUNCTION_ARGS)
{
macaddr *key = PG_GETARG_MACADDR_P(0);
return hash_any((unsigned char *) key, sizeof(macaddr));
}
Datum
hashmacaddrextended(PG_FUNCTION_ARGS)
{
macaddr *key = PG_GETARG_MACADDR_P(0);
return hash_any_extended((unsigned char *) key, sizeof(macaddr),
PG_GETARG_INT64(1));
}
/*
* Arithmetic functions: bitwise NOT, AND, OR.
*/
Datum
macaddr_not(PG_FUNCTION_ARGS)
{
macaddr *addr = PG_GETARG_MACADDR_P(0);
macaddr *result;
result = (macaddr *) palloc(sizeof(macaddr));
result->a = ~addr->a;
result->b = ~addr->b;
result->c = ~addr->c;
result->d = ~addr->d;
result->e = ~addr->e;
result->f = ~addr->f;
PG_RETURN_MACADDR_P(result);
}
Datum
macaddr_and(PG_FUNCTION_ARGS)
{
macaddr *addr1 = PG_GETARG_MACADDR_P(0);
macaddr *addr2 = PG_GETARG_MACADDR_P(1);
macaddr *result;
result = (macaddr *) palloc(sizeof(macaddr));
result->a = addr1->a & addr2->a;
result->b = addr1->b & addr2->b;
result->c = addr1->c & addr2->c;
result->d = addr1->d & addr2->d;
result->e = addr1->e & addr2->e;
result->f = addr1->f & addr2->f;
PG_RETURN_MACADDR_P(result);
}
Datum
macaddr_or(PG_FUNCTION_ARGS)
{
macaddr *addr1 = PG_GETARG_MACADDR_P(0);
macaddr *addr2 = PG_GETARG_MACADDR_P(1);
macaddr *result;
result = (macaddr *) palloc(sizeof(macaddr));
result->a = addr1->a | addr2->a;
result->b = addr1->b | addr2->b;
result->c = addr1->c | addr2->c;
result->d = addr1->d | addr2->d;
result->e = addr1->e | addr2->e;
result->f = addr1->f | addr2->f;
PG_RETURN_MACADDR_P(result);
}
/*
* Truncation function to allow comparing mac manufacturers.
* From suggestion by Alex Pilosov <alex@pilosoft.com>
*/
Datum
macaddr_trunc(PG_FUNCTION_ARGS)
{
macaddr *addr = PG_GETARG_MACADDR_P(0);
macaddr *result;
result = (macaddr *) palloc(sizeof(macaddr));
result->a = addr->a;
result->b = addr->b;
result->c = addr->c;
result->d = 0;
result->e = 0;
result->f = 0;
PG_RETURN_MACADDR_P(result);
}
/*
* SortSupport strategy function. Populates a SortSupport struct with the
* information necessary to use comparison by abbreviated keys.
*/
Datum
macaddr_sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = macaddr_fast_cmp;
ssup->ssup_extra = NULL;
if (ssup->abbreviate)
{
macaddr_sortsupport_state *uss;
MemoryContext oldcontext;
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
uss = palloc(sizeof(macaddr_sortsupport_state));
uss->input_count = 0;
uss->estimating = true;
initHyperLogLog(&uss->abbr_card, 10);
ssup->ssup_extra = uss;
ssup->comparator = ssup_datum_unsigned_cmp;
ssup->abbrev_converter = macaddr_abbrev_convert;
ssup->abbrev_abort = macaddr_abbrev_abort;
ssup->abbrev_full_comparator = macaddr_fast_cmp;
MemoryContextSwitchTo(oldcontext);
}
PG_RETURN_VOID();
}
/*
* SortSupport "traditional" comparison function. Pulls two MAC addresses from
* the heap and runs a standard comparison on them.
*/
static int
macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
{
macaddr *arg1 = DatumGetMacaddrP(x);
macaddr *arg2 = DatumGetMacaddrP(y);
return macaddr_cmp_internal(arg1, arg2);
}
/*
* Callback for estimating effectiveness of abbreviated key optimization.
*
* We pay no attention to the cardinality of the non-abbreviated data, because
* there is no equality fast-path within authoritative macaddr comparator.
*/
static bool
macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
{
macaddr_sortsupport_state *uss = ssup->ssup_extra;
double abbr_card;
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
return false;
abbr_card = estimateHyperLogLog(&uss->abbr_card);
/*
* If we have >100k distinct values, then even if we were sorting many
* billion rows we'd likely still break even, and the penalty of undoing
* that many rows of abbrevs would probably not be worth it. At this point
* we stop counting because we know that we're now fully committed.
*/
if (abbr_card > 100000.0)
{
if (trace_sort)
elog(LOG,
"macaddr_abbrev: estimation ends at cardinality %f"
" after " INT64_FORMAT " values (%d rows)",
abbr_card, uss->input_count, memtupcount);
uss->estimating = false;
return false;
}
/*
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
* fudge factor allows us to abort earlier on genuinely pathological data
* where we've had exactly one abbreviated value in the first 2k
* (non-null) rows.
*/
if (abbr_card < uss->input_count / 2000.0 + 0.5)
{
if (trace_sort)
elog(LOG,
"macaddr_abbrev: aborting abbreviation at cardinality %f"
" below threshold %f after " INT64_FORMAT " values (%d rows)",
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
memtupcount);
return true;
}
if (trace_sort)
elog(LOG,
"macaddr_abbrev: cardinality %f after " INT64_FORMAT
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
return false;
}
/*
* SortSupport conversion routine. Converts original macaddr representation
* to abbreviated key representation.
*
* Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
* unsigned integer for purposes of comparison. On a 64-bit machine, there
* will be two zeroed bytes of padding. The integer is converted to native
* endianness to facilitate easy comparison.
*/
static Datum
macaddr_abbrev_convert(Datum original, SortSupport ssup)
{
macaddr_sortsupport_state *uss = ssup->ssup_extra;
macaddr *authoritative = DatumGetMacaddrP(original);
Datum res;
/*
* Zero out the 8-byte Datum and copy in the 6 bytes of the MAC address.
* There will be two bytes of zero padding on the end of the least
* significant bits.
*/
StaticAssertStmt(sizeof(res) >= sizeof(macaddr),
"Datum is too small for macaddr");
memset(&res, 0, sizeof(res));
memcpy(&res, authoritative, sizeof(macaddr));
uss->input_count += 1;
/*
* Cardinality estimation. The estimate uses uint32, so XOR the two 32-bit
* halves together to produce slightly more entropy. The two zeroed bytes
* won't have any practical impact on this operation.
*/
if (uss->estimating)
{
uint32 tmp;
tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
}
/*
* Byteswap on little-endian machines.
*
* This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
* 3-way comparator) works correctly on all platforms. Without this, the
* comparator would have to call memcmp() with a pair of pointers to the
* first byte of each abbreviated key, which is slower.
*/
res = DatumBigEndianToNative(res);
return res;
}