1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-20 00:42:27 +03:00
Heikki Linnakangas e4309f73f6 Add support for sorted gist index builds to btree_gist
This enables sortsupport in the btree_gist extension for faster builds
of gist indexes.

Sorted gist index build strategy is the new default now. Regression
tests are unchanged (except for one small change in the 'enum' test to
add coverage for enum values added later) and are using the sorted
build strategy instead.

One version of this was committed a long time ago already, in commit
9f984ba6d2, but it was quickly reverted because of buildfarm
failures. The failures were presumably caused by some small bugs, but
we never got around to debug and commit it again. This patch was
written from scratch, implementing the same idea, with some fragments
and ideas from the original patch.

Author: Bernd Helmle <mailings@oopsware.de>
Author: Andrey Borodin <x4mmm@yandex-team.ru>
Discussion: https://www.postgresql.org/message-id/64d324ce2a6d535d3f0f3baeeea7b25beff82ce4.camel@oopsware.de
2025-04-03 13:46:35 +03:00

421 lines
10 KiB
C

/*
* contrib/btree_gist/btree_ts.c
*/
#include "postgres.h"
#include <limits.h>
#include "btree_gist.h"
#include "btree_utils_num.h"
#include "utils/fmgrprotos.h"
#include "utils/timestamp.h"
#include "utils/float.h"
#include "utils/sortsupport.h"
typedef struct
{
Timestamp lower;
Timestamp upper;
} tsKEY;
/* GiST support functions */
PG_FUNCTION_INFO_V1(gbt_ts_compress);
PG_FUNCTION_INFO_V1(gbt_tstz_compress);
PG_FUNCTION_INFO_V1(gbt_ts_fetch);
PG_FUNCTION_INFO_V1(gbt_ts_union);
PG_FUNCTION_INFO_V1(gbt_ts_picksplit);
PG_FUNCTION_INFO_V1(gbt_ts_consistent);
PG_FUNCTION_INFO_V1(gbt_ts_distance);
PG_FUNCTION_INFO_V1(gbt_tstz_consistent);
PG_FUNCTION_INFO_V1(gbt_tstz_distance);
PG_FUNCTION_INFO_V1(gbt_ts_penalty);
PG_FUNCTION_INFO_V1(gbt_ts_same);
PG_FUNCTION_INFO_V1(gbt_ts_sortsupport);
#ifdef USE_FLOAT8_BYVAL
#define TimestampGetDatumFast(X) TimestampGetDatum(X)
#else
#define TimestampGetDatumFast(X) PointerGetDatum(&(X))
#endif
/* define for comparison */
static bool
gbt_tsgt(const void *a, const void *b, FmgrInfo *flinfo)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_gt,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
static bool
gbt_tsge(const void *a, const void *b, FmgrInfo *flinfo)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_ge,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
static bool
gbt_tseq(const void *a, const void *b, FmgrInfo *flinfo)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_eq,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
static bool
gbt_tsle(const void *a, const void *b, FmgrInfo *flinfo)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_le,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
static bool
gbt_tslt(const void *a, const void *b, FmgrInfo *flinfo)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
return DatumGetBool(DirectFunctionCall2(timestamp_lt,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
}
static int
gbt_tskey_cmp(const void *a, const void *b, FmgrInfo *flinfo)
{
tsKEY *ia = (tsKEY *) (((const Nsrt *) a)->t);
tsKEY *ib = (tsKEY *) (((const Nsrt *) b)->t);
int res;
res = DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatumFast(ia->lower), TimestampGetDatumFast(ib->lower)));
if (res == 0)
return DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatumFast(ia->upper), TimestampGetDatumFast(ib->upper)));
return res;
}
static float8
gbt_ts_dist(const void *a, const void *b, FmgrInfo *flinfo)
{
const Timestamp *aa = (const Timestamp *) a;
const Timestamp *bb = (const Timestamp *) b;
Interval *i;
if (TIMESTAMP_NOT_FINITE(*aa) || TIMESTAMP_NOT_FINITE(*bb))
return get_float8_infinity();
i = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi,
TimestampGetDatumFast(*aa),
TimestampGetDatumFast(*bb)));
return fabs(INTERVAL_TO_SEC(i));
}
static const gbtree_ninfo tinfo =
{
gbt_t_ts,
sizeof(Timestamp),
16, /* sizeof(gbtreekey16) */
gbt_tsgt,
gbt_tsge,
gbt_tseq,
gbt_tsle,
gbt_tslt,
gbt_tskey_cmp,
gbt_ts_dist
};
PG_FUNCTION_INFO_V1(ts_dist);
Datum
ts_dist(PG_FUNCTION_ARGS)
{
Timestamp a = PG_GETARG_TIMESTAMP(0);
Timestamp b = PG_GETARG_TIMESTAMP(1);
Interval *r;
if (TIMESTAMP_NOT_FINITE(a) || TIMESTAMP_NOT_FINITE(b))
{
Interval *p = palloc(sizeof(Interval));
p->day = INT_MAX;
p->month = INT_MAX;
p->time = PG_INT64_MAX;
PG_RETURN_INTERVAL_P(p);
}
else
r = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
PG_RETURN_INTERVAL_P(abs_interval(r));
}
PG_FUNCTION_INFO_V1(tstz_dist);
Datum
tstz_dist(PG_FUNCTION_ARGS)
{
TimestampTz a = PG_GETARG_TIMESTAMPTZ(0);
TimestampTz b = PG_GETARG_TIMESTAMPTZ(1);
Interval *r;
if (TIMESTAMP_NOT_FINITE(a) || TIMESTAMP_NOT_FINITE(b))
{
Interval *p = palloc(sizeof(Interval));
p->day = INT_MAX;
p->month = INT_MAX;
p->time = PG_INT64_MAX;
PG_RETURN_INTERVAL_P(p);
}
r = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
PG_RETURN_INTERVAL_P(abs_interval(r));
}
/**************************************************
* GiST support functions
**************************************************/
static inline Timestamp
tstz_to_ts_gmt(TimestampTz ts)
{
/* No timezone correction is needed, since GMT is offset 0 by definition */
return (Timestamp) ts;
}
Datum
gbt_ts_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(gbt_num_compress(entry, &tinfo));
}
Datum
gbt_tstz_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval;
if (entry->leafkey)
{
tsKEY *r = (tsKEY *) palloc(sizeof(tsKEY));
TimestampTz ts = DatumGetTimestampTz(entry->key);
Timestamp gmt;
gmt = tstz_to_ts_gmt(ts);
retval = palloc(sizeof(GISTENTRY));
r->lower = r->upper = gmt;
gistentryinit(*retval, PointerGetDatum(r),
entry->rel, entry->page,
entry->offset, false);
}
else
retval = entry;
PG_RETURN_POINTER(retval);
}
Datum
gbt_ts_fetch(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(gbt_num_fetch(entry, &tinfo));
}
Datum
gbt_ts_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
Timestamp query = PG_GETARG_TIMESTAMP(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
tsKEY *kkk = (tsKEY *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
/* All cases served by this function are exact */
*recheck = false;
key.lower = (GBT_NUMKEY *) &kkk->lower;
key.upper = (GBT_NUMKEY *) &kkk->upper;
PG_RETURN_BOOL(gbt_num_consistent(&key, &query, &strategy,
GIST_LEAF(entry), &tinfo, fcinfo->flinfo));
}
Datum
gbt_ts_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
Timestamp query = PG_GETARG_TIMESTAMP(1);
/* Oid subtype = PG_GETARG_OID(3); */
tsKEY *kkk = (tsKEY *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
key.lower = (GBT_NUMKEY *) &kkk->lower;
key.upper = (GBT_NUMKEY *) &kkk->upper;
PG_RETURN_FLOAT8(gbt_num_distance(&key, &query, GIST_LEAF(entry),
&tinfo, fcinfo->flinfo));
}
Datum
gbt_tstz_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TimestampTz query = PG_GETARG_TIMESTAMPTZ(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
char *kkk = (char *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
Timestamp qqq;
/* All cases served by this function are exact */
*recheck = false;
key.lower = (GBT_NUMKEY *) &kkk[0];
key.upper = (GBT_NUMKEY *) &kkk[MAXALIGN(tinfo.size)];
qqq = tstz_to_ts_gmt(query);
PG_RETURN_BOOL(gbt_num_consistent(&key, &qqq, &strategy,
GIST_LEAF(entry), &tinfo, fcinfo->flinfo));
}
Datum
gbt_tstz_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TimestampTz query = PG_GETARG_TIMESTAMPTZ(1);
/* Oid subtype = PG_GETARG_OID(3); */
char *kkk = (char *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key;
Timestamp qqq;
key.lower = (GBT_NUMKEY *) &kkk[0];
key.upper = (GBT_NUMKEY *) &kkk[MAXALIGN(tinfo.size)];
qqq = tstz_to_ts_gmt(query);
PG_RETURN_FLOAT8(gbt_num_distance(&key, &qqq, GIST_LEAF(entry),
&tinfo, fcinfo->flinfo));
}
Datum
gbt_ts_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
void *out = palloc(sizeof(tsKEY));
*(int *) PG_GETARG_POINTER(1) = sizeof(tsKEY);
PG_RETURN_POINTER(gbt_num_union(out, entryvec, &tinfo, fcinfo->flinfo));
}
#define penalty_check_max_float(val) \
do { \
if ( val > FLT_MAX ) \
val = FLT_MAX; \
if ( val < -FLT_MAX ) \
val = -FLT_MAX; \
} while (0)
Datum
gbt_ts_penalty(PG_FUNCTION_ARGS)
{
tsKEY *origentry = (tsKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
tsKEY *newentry = (tsKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *result = (float *) PG_GETARG_POINTER(2);
double orgdbl[2],
newdbl[2];
/*
* We are always using "double" timestamps here. Precision should be good
* enough.
*/
orgdbl[0] = ((double) origentry->lower);
orgdbl[1] = ((double) origentry->upper);
newdbl[0] = ((double) newentry->lower);
newdbl[1] = ((double) newentry->upper);
penalty_check_max_float(orgdbl[0]);
penalty_check_max_float(orgdbl[1]);
penalty_check_max_float(newdbl[0]);
penalty_check_max_float(newdbl[1]);
penalty_num(result, orgdbl[0], orgdbl[1], newdbl[0], newdbl[1]);
PG_RETURN_POINTER(result);
}
Datum
gbt_ts_picksplit(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(gbt_num_picksplit((GistEntryVector *) PG_GETARG_POINTER(0),
(GIST_SPLITVEC *) PG_GETARG_POINTER(1),
&tinfo, fcinfo->flinfo));
}
Datum
gbt_ts_same(PG_FUNCTION_ARGS)
{
tsKEY *b1 = (tsKEY *) PG_GETARG_POINTER(0);
tsKEY *b2 = (tsKEY *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
*result = gbt_num_same((void *) b1, (void *) b2, &tinfo, fcinfo->flinfo);
PG_RETURN_POINTER(result);
}
static int
gbt_ts_ssup_cmp(Datum x, Datum y, SortSupport ssup)
{
tsKEY *arg1 = (tsKEY *) DatumGetPointer(x);
tsKEY *arg2 = (tsKEY *) DatumGetPointer(y);
/* for leaf items we expect lower == upper, so only compare lower */
return DatumGetInt32(DirectFunctionCall2(timestamp_cmp,
TimestampGetDatumFast(arg1->lower),
TimestampGetDatumFast(arg2->lower)));
}
Datum
gbt_ts_sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = gbt_ts_ssup_cmp;
ssup->ssup_extra = NULL;
PG_RETURN_VOID();
}