mirror of
https://github.com/postgres/postgres.git
synced 2025-07-17 06:41:09 +03:00
Fix an asssortment of typos in brin_minmax_multi.c and mcv.c
Discussion: https://postgr.es/m/CAApHDvrbyJNOPBws4RUhXghZ7+TBjtdO-rznTsqZECuowNorXg@mail.gmail.com
This commit is contained in:
@ -21,7 +21,7 @@
|
|||||||
*
|
*
|
||||||
* [1000,2000] and [1000000,1000000]
|
* [1000,2000] and [1000000,1000000]
|
||||||
*
|
*
|
||||||
* This allow us to still eliminate the page range when the scan keys hit
|
* This allows us to still eliminate the page range when the scan keys hit
|
||||||
* the gap between 2000 and 1000000, making it useful in cases when the
|
* the gap between 2000 and 1000000, making it useful in cases when the
|
||||||
* simple minmax opclass gets inefficient.
|
* simple minmax opclass gets inefficient.
|
||||||
*
|
*
|
||||||
@ -39,7 +39,7 @@
|
|||||||
* arbitrary threshold and may be changed easily).
|
* arbitrary threshold and may be changed easily).
|
||||||
*
|
*
|
||||||
* To pick the closest intervals we use the "distance" support procedure,
|
* To pick the closest intervals we use the "distance" support procedure,
|
||||||
* which measures space between two ranges (i.e. length of an interval).
|
* which measures space between two ranges (i.e. the length of an interval).
|
||||||
* The computed value may be an approximation - in the worst case we will
|
* The computed value may be an approximation - in the worst case we will
|
||||||
* merge two ranges that are slightly less optimal at that step, but the
|
* merge two ranges that are slightly less optimal at that step, but the
|
||||||
* index should still produce correct results.
|
* index should still produce correct results.
|
||||||
@ -56,7 +56,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
/* needef for PGSQL_AF_INET */
|
/* needed for PGSQL_AF_INET */
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
|
|
||||||
#include "access/genam.h"
|
#include "access/genam.h"
|
||||||
@ -125,7 +125,7 @@ typedef struct MinMaxMultiOptions
|
|||||||
int valuesPerRange; /* number of values per range */
|
int valuesPerRange; /* number of values per range */
|
||||||
} MinMaxMultiOptions;
|
} MinMaxMultiOptions;
|
||||||
|
|
||||||
#define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE 32
|
#define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE 32
|
||||||
|
|
||||||
#define MinMaxMultiGetValuesPerRange(opts) \
|
#define MinMaxMultiGetValuesPerRange(opts) \
|
||||||
((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
|
((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
|
||||||
@ -180,7 +180,7 @@ typedef struct Ranges
|
|||||||
/*
|
/*
|
||||||
* We simply add the values into a large buffer, without any expensive
|
* We simply add the values into a large buffer, without any expensive
|
||||||
* steps (sorting, deduplication, ...). The buffer is a multiple of the
|
* steps (sorting, deduplication, ...). The buffer is a multiple of the
|
||||||
* target number of values, so the compaction happen less often,
|
* target number of values, so the compaction happens less often,
|
||||||
* amortizing the costs. We keep the actual target and compact to the
|
* amortizing the costs. We keep the actual target and compact to the
|
||||||
* requested number of values at the very end, before serializing to
|
* requested number of values at the very end, before serializing to
|
||||||
* on-disk representation.
|
* on-disk representation.
|
||||||
@ -456,7 +456,7 @@ AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* And the ranges should be ordered and must nor overlap, i.e. upper <
|
* And the ranges should be ordered and must not overlap, i.e. upper <
|
||||||
* lower for boundaries of consecutive ranges.
|
* lower for boundaries of consecutive ranges.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < nranges - 1; i++)
|
for (i = 0; i < nranges - 1; i++)
|
||||||
@ -668,13 +668,12 @@ range_serialize(Ranges *range)
|
|||||||
Datum tmp;
|
Datum tmp;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For values passed by value, we need to copy just the
|
* For byval types, we need to copy just the significant bytes -
|
||||||
* significant bytes - we can't use memcpy directly, as that
|
* we can't use memcpy directly, as that assumes little-endian
|
||||||
* assumes little endian behavior. store_att_byval does almost
|
* behavior. store_att_byval does almost what we need, but it
|
||||||
* what we need, but it requires properly aligned buffer - the
|
* requires a properly aligned buffer - the output buffer does not
|
||||||
* output buffer does not guarantee that. So we simply use a local
|
* guarantee that. So we simply use a local Datum variable (which
|
||||||
* Datum variable (which guarantees proper alignment), and then
|
* guarantees proper alignment), and then copy the value from it.
|
||||||
* copy the value from it.
|
|
||||||
*/
|
*/
|
||||||
store_att_byval(&tmp, range->values[i], typlen);
|
store_att_byval(&tmp, range->values[i], typlen);
|
||||||
|
|
||||||
@ -757,7 +756,7 @@ range_deserialize(int maxvalues, SerializedRanges *serialized)
|
|||||||
/*
|
/*
|
||||||
* And now deconstruct the values into Datum array. We have to copy the
|
* And now deconstruct the values into Datum array. We have to copy the
|
||||||
* data because the serialized representation ignores alignment, and we
|
* data because the serialized representation ignores alignment, and we
|
||||||
* don't want to rely it will be kept around anyway.
|
* don't want to rely on it being kept around anyway.
|
||||||
*/
|
*/
|
||||||
ptr = serialized->data;
|
ptr = serialized->data;
|
||||||
|
|
||||||
@ -850,10 +849,10 @@ range_deserialize(int maxvalues, SerializedRanges *serialized)
|
|||||||
* compare_expanded_ranges
|
* compare_expanded_ranges
|
||||||
* Compare the expanded ranges - first by minimum, then by maximum.
|
* Compare the expanded ranges - first by minimum, then by maximum.
|
||||||
*
|
*
|
||||||
* We do guarantee that ranges in a single Range object do not overlap,
|
* We do guarantee that ranges in a single Ranges object do not overlap, so it
|
||||||
* so it may seem strange that we don't order just by minimum. But when
|
* may seem strange that we don't order just by minimum. But when merging two
|
||||||
* merging two Ranges (which happens in the union function), the ranges
|
* Ranges (which happens in the union function), the ranges may in fact
|
||||||
* may in fact overlap. So we do compare both.
|
* overlap. So we do compare both.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
compare_expanded_ranges(const void *a, const void *b, void *arg)
|
compare_expanded_ranges(const void *a, const void *b, void *arg)
|
||||||
@ -1062,9 +1061,9 @@ range_contains_value(BrinDesc *bdesc, Oid colloid,
|
|||||||
/*
|
/*
|
||||||
* There is no matching range, so let's inspect the sorted values.
|
* There is no matching range, so let's inspect the sorted values.
|
||||||
*
|
*
|
||||||
* We do a sequential search for small number of values, and binary search
|
* We do a sequential search for small numbers of values, and binary
|
||||||
* once we have more than 16 values. This threshold is somewhat arbitrary,
|
* search once we have more than 16 values. This threshold is somewhat
|
||||||
* as it depends on how expensive the comparison function is.
|
* arbitrary, as it depends on how expensive the comparison function is.
|
||||||
*
|
*
|
||||||
* XXX If we use the threshold here, maybe we should do the same thing in
|
* XXX If we use the threshold here, maybe we should do the same thing in
|
||||||
* has_matching_range? Or maybe we should do the bin search all the time?
|
* has_matching_range? Or maybe we should do the bin search all the time?
|
||||||
@ -1206,7 +1205,7 @@ sort_expanded_ranges(FmgrInfo *cmp, Oid colloid,
|
|||||||
if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], (void *) &cxt))
|
if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], (void *) &cxt))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* otherwise copy it to n-th place (if not already there) */
|
/* otherwise, copy it to n-th place (if not already there) */
|
||||||
if (i != n)
|
if (i != n)
|
||||||
memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
|
memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
|
||||||
|
|
||||||
@ -1314,8 +1313,8 @@ compare_distances(const void *a, const void *b)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Given an array of expanded ranges, compute distance of the gaps between
|
* Given an array of expanded ranges, compute size of the gaps between each
|
||||||
* the ranges - for ncranges there are (ncranges-1) gaps.
|
* range. For neranges there are (neranges-1) gaps.
|
||||||
*
|
*
|
||||||
* We simply call the "distance" function to compute the (max-min) for pairs
|
* We simply call the "distance" function to compute the (max-min) for pairs
|
||||||
* of consecutive ranges. The function may be fairly expensive, so we do that
|
* of consecutive ranges. The function may be fairly expensive, so we do that
|
||||||
@ -1337,8 +1336,8 @@ build_distances(FmgrInfo *distanceFn, Oid colloid,
|
|||||||
distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances);
|
distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Walk though the ranges once and compute distance between the ranges so
|
* Walk through the ranges once and compute the distance between the
|
||||||
* that we can sort them once.
|
* ranges so that we can sort them once.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < ndistances; i++)
|
for (i = 0; i < ndistances; i++)
|
||||||
{
|
{
|
||||||
@ -1394,7 +1393,7 @@ build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges,
|
|||||||
/* sort and deduplicate the expanded ranges */
|
/* sort and deduplicate the expanded ranges */
|
||||||
neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
|
neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
|
||||||
|
|
||||||
/* remember how many cranges we built */
|
/* remember how many ranges we built */
|
||||||
*nranges = neranges;
|
*nranges = neranges;
|
||||||
|
|
||||||
return eranges;
|
return eranges;
|
||||||
@ -1430,7 +1429,7 @@ count_values(ExpandedRange *cranges, int ncranges)
|
|||||||
*
|
*
|
||||||
* Combines ranges until the number of boundary values drops below the
|
* Combines ranges until the number of boundary values drops below the
|
||||||
* threshold specified by max_values. This happens by merging enough
|
* threshold specified by max_values. This happens by merging enough
|
||||||
* ranges by distance between them.
|
* ranges by the distance between them.
|
||||||
*
|
*
|
||||||
* Returns the number of result ranges.
|
* Returns the number of result ranges.
|
||||||
*
|
*
|
||||||
@ -1448,7 +1447,7 @@ count_values(ExpandedRange *cranges, int ncranges)
|
|||||||
* are of equal (or very similar) length.
|
* are of equal (or very similar) length.
|
||||||
*
|
*
|
||||||
* Consider for example points 1, 2, 3, .., 64, which have gaps of the
|
* Consider for example points 1, 2, 3, .., 64, which have gaps of the
|
||||||
* same length 1 of course. In that case we tend to pick the first
|
* same length 1 of course. In that case, we tend to pick the first
|
||||||
* gap of that length, which leads to this:
|
* gap of that length, which leads to this:
|
||||||
*
|
*
|
||||||
* step 1: [1, 2], 3, 4, 5, .., 64
|
* step 1: [1, 2], 3, 4, 5, .., 64
|
||||||
@ -1484,7 +1483,7 @@ reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
|
|||||||
int keep = (max_values / 2 - 1);
|
int keep = (max_values / 2 - 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maybe we have sufficiently low number of ranges already?
|
* Maybe we have a sufficiently low number of ranges already?
|
||||||
*
|
*
|
||||||
* XXX This should happen before we actually do the expensive stuff like
|
* XXX This should happen before we actually do the expensive stuff like
|
||||||
* sorting, so maybe this should be just an assert.
|
* sorting, so maybe this should be just an assert.
|
||||||
@ -1519,7 +1518,7 @@ reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
|
|||||||
Assert(nvalues <= max_values);
|
Assert(nvalues <= max_values);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We should have even number of range values. */
|
/* We should have an even number of range values. */
|
||||||
Assert(nvalues % 2 == 0);
|
Assert(nvalues % 2 == 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1545,7 +1544,7 @@ reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Store the boundary values from ExpandedRanges back into Range (using
|
* Store the boundary values from ExpandedRanges back into 'ranges' (using
|
||||||
* only the minimal number of values needed).
|
* only the minimal number of values needed).
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
@ -1618,16 +1617,16 @@ ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid,
|
|||||||
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
||||||
BTLessStrategyNumber);
|
BTLessStrategyNumber);
|
||||||
|
|
||||||
/* deduplicate values, if there's unsorted part */
|
/* deduplicate values, if there's an unsorted part */
|
||||||
range_deduplicate_values(range);
|
range_deduplicate_values(range);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* did we reduce enough free space by just the deduplication?
|
* Did we reduce enough free space by just the deduplication?
|
||||||
*
|
*
|
||||||
* We don't simply check against range->maxvalues again. The deduplication
|
* We don't simply check against range->maxvalues again. The deduplication
|
||||||
* might have freed very little space (e.g. just one value), forcing us to
|
* might have freed very little space (e.g. just one value), forcing us to
|
||||||
* do deduplication very often. In that case it's better to do compaction
|
* do deduplication very often. In that case, it's better to do the
|
||||||
* and reduce more space.
|
* compaction and reduce more space.
|
||||||
*/
|
*/
|
||||||
if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
|
if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
|
||||||
return true;
|
return true;
|
||||||
@ -1713,8 +1712,8 @@ range_add_value(BrinDesc *bdesc, Oid colloid,
|
|||||||
* rule that we never have duplicates with the ranges or sorted values.
|
* rule that we never have duplicates with the ranges or sorted values.
|
||||||
*
|
*
|
||||||
* We might also deduplicate and recheck if the value is contained, but
|
* We might also deduplicate and recheck if the value is contained, but
|
||||||
* that seems like an overkill. We'd need to deduplicate anyway, so why
|
* that seems like overkill. We'd need to deduplicate anyway, so why not
|
||||||
* not do it now.
|
* do it now.
|
||||||
*/
|
*/
|
||||||
modified = ensure_free_space_in_buffer(bdesc, colloid,
|
modified = ensure_free_space_in_buffer(bdesc, colloid,
|
||||||
attno, attr, ranges);
|
attno, attr, ranges);
|
||||||
@ -1805,10 +1804,10 @@ compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
||||||
* allocate quite a bit of memory, and we must not leak it. Otherwise we'd
|
* allocate quite a bit of memory, and we must not leak it. Otherwise,
|
||||||
* have problems e.g. when building indexes. So we create a local memory
|
* we'd have problems e.g. when building indexes. So we create a local
|
||||||
* context and make sure we free the memory before leaving this function
|
* memory context and make sure we free the memory before leaving this
|
||||||
* (not after every call).
|
* function (not after every call).
|
||||||
*/
|
*/
|
||||||
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||||
"minmax-multi context",
|
"minmax-multi context",
|
||||||
@ -1865,7 +1864,7 @@ brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two float4 values (plain subtraction).
|
* Compute the distance between two float4 values (plain subtraction).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
|
||||||
@ -1883,7 +1882,7 @@ brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two float8 values (plain subtraction).
|
* Compute the distance between two float8 values (plain subtraction).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
|
||||||
@ -1901,7 +1900,7 @@ brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two int2 values (plain subtraction).
|
* Compute the distance between two int2 values (plain subtraction).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
|
||||||
@ -1919,7 +1918,7 @@ brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two int4 values (plain subtraction).
|
* Compute the distance between two int4 values (plain subtraction).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
|
||||||
@ -1937,7 +1936,7 @@ brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two int8 values (plain subtraction).
|
* Compute the distance between two int8 values (plain subtraction).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
|
||||||
@ -1955,8 +1954,8 @@ brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two tid values (by mapping them to float8
|
* Compute the distance between two tid values (by mapping them to float8 and
|
||||||
* and then subtracting them).
|
* then subtracting them).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
|
||||||
@ -1987,7 +1986,7 @@ brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Computes distance between two numeric values (plain subtraction).
|
* Compute the distance between two numeric values (plain subtraction).
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
|
||||||
@ -2008,7 +2007,7 @@ brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Computes approximate distance between two UUID values.
|
* Compute the approximate distance between two UUID values.
|
||||||
*
|
*
|
||||||
* XXX We do not need a perfectly accurate value, so we approximate the
|
* XXX We do not need a perfectly accurate value, so we approximate the
|
||||||
* deltas (which would have to be 128-bit integers) with a 64-bit float.
|
* deltas (which would have to be 128-bit integers) with a 64-bit float.
|
||||||
@ -2046,7 +2045,7 @@ brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute approximate distance between two dates.
|
* Compute the approximate distance between two dates.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
|
||||||
@ -2061,7 +2060,7 @@ brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Computes approximate distance between two time (without tz) values.
|
* Compute the approximate distance between two time (without tz) values.
|
||||||
*
|
*
|
||||||
* TimeADT is just an int64, so we simply subtract the values directly.
|
* TimeADT is just an int64, so we simply subtract the values directly.
|
||||||
*/
|
*/
|
||||||
@ -2081,7 +2080,7 @@ brin_minmax_multi_distance_time(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Computes approximate distance between two timetz values.
|
* Compute the approximate distance between two timetz values.
|
||||||
*
|
*
|
||||||
* Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
|
* Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
|
||||||
*/
|
*/
|
||||||
@ -2100,6 +2099,9 @@ brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_FLOAT8(delta);
|
PG_RETURN_FLOAT8(delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the distance between two timestamp values.
|
||||||
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -2119,7 +2121,7 @@ brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Computes distance between two interval values.
|
* Compute the distance between two interval values.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
|
||||||
@ -2177,7 +2179,7 @@ brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two pg_lsn values.
|
* Compute the distance between two pg_lsn values.
|
||||||
*
|
*
|
||||||
* LSN is just an int64 encoding position in the stream, so just subtract
|
* LSN is just an int64 encoding position in the stream, so just subtract
|
||||||
* those int64 values directly.
|
* those int64 values directly.
|
||||||
@ -2198,7 +2200,7 @@ brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two macaddr values.
|
* Compute the distance between two macaddr values.
|
||||||
*
|
*
|
||||||
* mac addresses are treated as 6 unsigned chars, so do the same thing we
|
* mac addresses are treated as 6 unsigned chars, so do the same thing we
|
||||||
* already do for UUID values.
|
* already do for UUID values.
|
||||||
@ -2235,7 +2237,7 @@ brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two macaddr8 values.
|
* Compute the distance between two macaddr8 values.
|
||||||
*
|
*
|
||||||
* macaddr8 addresses are 8 unsigned chars, so do the same thing we
|
* macaddr8 addresses are 8 unsigned chars, so do the same thing we
|
||||||
* already do for UUID values.
|
* already do for UUID values.
|
||||||
@ -2278,15 +2280,15 @@ brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute distance between two inet values.
|
* Compute the distance between two inet values.
|
||||||
*
|
*
|
||||||
* The distance is defined as difference between 32-bit/128-bit values,
|
* The distance is defined as the difference between 32-bit/128-bit values,
|
||||||
* depending on the IP version. The distance is computed by subtracting
|
* depending on the IP version. The distance is computed by subtracting
|
||||||
* the bytes and normalizing it to [0,1] range for each IP family.
|
* the bytes and normalizing it to [0,1] range for each IP family.
|
||||||
* Addresses from different families are considered to be in maximum
|
* Addresses from different families are considered to be in maximum
|
||||||
* distance, which is 1.0.
|
* distance, which is 1.0.
|
||||||
*
|
*
|
||||||
* XXX Does this need to consider the mask (bits)? For now it's ignored.
|
* XXX Does this need to consider the mask (bits)? For now, it's ignored.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
|
brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
|
||||||
@ -2320,7 +2322,8 @@ brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
|
|||||||
* The length is calculated from the mask length, because we sort the
|
* The length is calculated from the mask length, because we sort the
|
||||||
* addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
|
* addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
|
||||||
* first range starts at A.B.C.0, which is before A.B.C.1). We don't want
|
* first range starts at A.B.C.0, which is before A.B.C.1). We don't want
|
||||||
* to produce negative delta in this case, so we just cut the extra bytes.
|
* to produce a negative delta in this case, so we just cut the extra
|
||||||
|
* bytes.
|
||||||
*
|
*
|
||||||
* XXX Maybe this should be a bit more careful and cut the bits, not just
|
* XXX Maybe this should be a bit more careful and cut the bits, not just
|
||||||
* whole bytes.
|
* whole bytes.
|
||||||
@ -2396,11 +2399,11 @@ brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Examine the given index tuple (which contains partial status of a certain
|
* Examine the given index tuple (which contains the partial status of a
|
||||||
* page range) by comparing it to the given value that comes from another heap
|
* certain page range) by comparing it to the given value that comes from
|
||||||
* tuple. If the new value is outside the min/max range specified by the
|
* another heap tuple. If the new value is outside the min/max range
|
||||||
* existing tuple values, update the index tuple and return true. Otherwise,
|
* specified by the existing tuple values, update the index tuple and return
|
||||||
* return false and do not modify in this case.
|
* true. Otherwise, return false and do not modify in this case.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
|
brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
|
||||||
@ -2427,13 +2430,13 @@ brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is the first non-null value, we need to initialize the range
|
* If this is the first non-null value, we need to initialize the range
|
||||||
* list. Otherwise just extract the existing range list from BrinValues.
|
* list. Otherwise, just extract the existing range list from BrinValues.
|
||||||
*
|
*
|
||||||
* When starting with an empty range, we assume this is a batch mode and
|
* When starting with an empty range, we assume this is a batch mode and
|
||||||
* we use a larger buffer. The buffer size is derived from the BRIN range
|
* we use a larger buffer. The buffer size is derived from the BRIN range
|
||||||
* size, number of rows per page, with some sensible min/max values. Small
|
* size, number of rows per page, with some sensible min/max values. A
|
||||||
* buffer would be bad for performance, but large buffer might require a
|
* small buffer would be bad for performance, but a large buffer might
|
||||||
* lot of memory (because of keeping all the values).
|
* require a lot of memory (because of keeping all the values).
|
||||||
*/
|
*/
|
||||||
if (column->bv_allnulls)
|
if (column->bv_allnulls)
|
||||||
{
|
{
|
||||||
@ -2624,8 +2627,8 @@ brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* haven't managed to eliminate this range, so
|
* We haven't managed to eliminate this range, so
|
||||||
* consider it matching
|
* consider it matching.
|
||||||
*/
|
*/
|
||||||
matches = true;
|
matches = true;
|
||||||
|
|
||||||
@ -2713,9 +2716,7 @@ brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* have we found a range matching all scan keys? if yes, we're done */
|
||||||
* have we found a range matching all scan keys? if yes, we're done
|
|
||||||
*/
|
|
||||||
if (matching)
|
if (matching)
|
||||||
PG_RETURN_DATUM(BoolGetDatum(true));
|
PG_RETURN_DATUM(BoolGetDatum(true));
|
||||||
}
|
}
|
||||||
@ -2769,10 +2770,10 @@ brin_minmax_multi_union(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
||||||
* allocate quite a bit of memory, and we must not leak it. Otherwise we'd
|
* allocate quite a bit of memory, and we must not leak it. Otherwise,
|
||||||
* have problems e.g. when building indexes. So we create a local memory
|
* we'd have problems e.g. when building indexes. So we create a local
|
||||||
* context and make sure we free the memory before leaving this function
|
* memory context and make sure we free the memory before leaving this
|
||||||
* (not after every call).
|
* function (not after every call).
|
||||||
*/
|
*/
|
||||||
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||||
"minmax-multi context",
|
"minmax-multi context",
|
||||||
|
@ -212,10 +212,10 @@ statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
|
|||||||
groups = build_distinct_groups(nitems, items, mss, &ngroups);
|
groups = build_distinct_groups(nitems, items, mss, &ngroups);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum number of MCV items to store, based on the statistics target we
|
* The maximum number of MCV items to store, based on the statistics
|
||||||
* computed for the statistics object (from target set for the object
|
* target we computed for the statistics object (from the target set for
|
||||||
* itself, attributes and the system default). In any case, we can't keep
|
* the object itself, attributes and the system default). In any case, we
|
||||||
* more groups than we have available.
|
* can't keep more groups than we have available.
|
||||||
*/
|
*/
|
||||||
nitems = stattarget;
|
nitems = stattarget;
|
||||||
if (nitems > ngroups)
|
if (nitems > ngroups)
|
||||||
@ -234,7 +234,7 @@ statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
|
|||||||
* to consider unexpectedly uncommon items (again, compared to the base
|
* to consider unexpectedly uncommon items (again, compared to the base
|
||||||
* frequency), and the single-column algorithm does not have to.
|
* frequency), and the single-column algorithm does not have to.
|
||||||
*
|
*
|
||||||
* We simply decide how many items to keep by computing minimum count
|
* We simply decide how many items to keep by computing the minimum count
|
||||||
* using get_mincount_for_mcv_list() and then keep all items that seem to
|
* using get_mincount_for_mcv_list() and then keep all items that seem to
|
||||||
* be more common than that.
|
* be more common than that.
|
||||||
*/
|
*/
|
||||||
@ -255,9 +255,9 @@ statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point we know the number of items for the MCV list. There might
|
* At this point, we know the number of items for the MCV list. There
|
||||||
* be none (for uniform distribution with many groups), and in that case
|
* might be none (for uniform distribution with many groups), and in that
|
||||||
* there will be no MCV list. Otherwise construct the MCV list.
|
* case, there will be no MCV list. Otherwise, construct the MCV list.
|
||||||
*/
|
*/
|
||||||
if (nitems > 0)
|
if (nitems > 0)
|
||||||
{
|
{
|
||||||
@ -345,7 +345,7 @@ statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* build_mss
|
* build_mss
|
||||||
* build MultiSortSupport for the attributes passed in attrs
|
* Build a MultiSortSupport for the given StatsBuildData.
|
||||||
*/
|
*/
|
||||||
static MultiSortSupport
|
static MultiSortSupport
|
||||||
build_mss(StatsBuildData *data)
|
build_mss(StatsBuildData *data)
|
||||||
@ -375,7 +375,7 @@ build_mss(StatsBuildData *data)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* count_distinct_groups
|
* count_distinct_groups
|
||||||
* count distinct combinations of SortItems in the array
|
* Count distinct combinations of SortItems in the array.
|
||||||
*
|
*
|
||||||
* The array is assumed to be sorted according to the MultiSortSupport.
|
* The array is assumed to be sorted according to the MultiSortSupport.
|
||||||
*/
|
*/
|
||||||
@ -400,7 +400,8 @@ count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* compare_sort_item_count
|
* compare_sort_item_count
|
||||||
* comparator for sorting items by count (frequencies) in descending order
|
* Comparator for sorting items by count (frequencies) in descending
|
||||||
|
* order.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
compare_sort_item_count(const void *a, const void *b)
|
compare_sort_item_count(const void *a, const void *b)
|
||||||
@ -418,9 +419,10 @@ compare_sort_item_count(const void *a, const void *b)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* build_distinct_groups
|
* build_distinct_groups
|
||||||
* build an array of SortItems for distinct groups and counts matching items
|
* Build an array of SortItems for distinct groups and counts matching
|
||||||
|
* items.
|
||||||
*
|
*
|
||||||
* The input array is assumed to be sorted
|
* The 'items' array is assumed to be sorted.
|
||||||
*/
|
*/
|
||||||
static SortItem *
|
static SortItem *
|
||||||
build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
|
build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
|
||||||
@ -477,7 +479,7 @@ sort_item_compare(const void *a, const void *b, void *arg)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* build_column_frequencies
|
* build_column_frequencies
|
||||||
* compute frequencies of values in each column
|
* Compute frequencies of values in each column.
|
||||||
*
|
*
|
||||||
* This returns an array of SortItems for each attribute the MCV is built
|
* This returns an array of SortItems for each attribute the MCV is built
|
||||||
* on, with a frequency (number of occurrences) for each value. This is
|
* on, with a frequency (number of occurrences) for each value. This is
|
||||||
@ -554,7 +556,7 @@ build_column_frequencies(SortItem *groups, int ngroups,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* statext_mcv_load
|
* statext_mcv_load
|
||||||
* Load the MCV list for the indicated pg_statistic_ext tuple
|
* Load the MCV list for the indicated pg_statistic_ext tuple.
|
||||||
*/
|
*/
|
||||||
MCVList *
|
MCVList *
|
||||||
statext_mcv_load(Oid mvoid)
|
statext_mcv_load(Oid mvoid)
|
||||||
@ -598,10 +600,11 @@ statext_mcv_load(Oid mvoid)
|
|||||||
* | header fields | dimension info | deduplicated values | items |
|
* | header fields | dimension info | deduplicated values | items |
|
||||||
* +---------------+----------------+---------------------+-------+
|
* +---------------+----------------+---------------------+-------+
|
||||||
*
|
*
|
||||||
* Where dimension info stores information about type of K-th attribute (e.g.
|
* Where dimension info stores information about the type of the K-th
|
||||||
* typlen, typbyval and length of deduplicated values). Deduplicated values
|
* attribute (e.g. typlen, typbyval and length of deduplicated values).
|
||||||
* store deduplicated values for each attribute. And items store the actual
|
* Deduplicated values store deduplicated values for each attribute. And
|
||||||
* MCV list items, with values replaced by indexes into the arrays.
|
* items store the actual MCV list items, with values replaced by indexes into
|
||||||
|
* the arrays.
|
||||||
*
|
*
|
||||||
* When serializing the items, we use uint16 indexes. The number of MCV items
|
* When serializing the items, we use uint16 indexes. The number of MCV items
|
||||||
* is limited by the statistics target (which is capped to 10k at the moment).
|
* is limited by the statistics target (which is capped to 10k at the moment).
|
||||||
@ -641,10 +644,10 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
|
|||||||
/*
|
/*
|
||||||
* We'll include some rudimentary information about the attribute types
|
* We'll include some rudimentary information about the attribute types
|
||||||
* (length, by-val flag), so that we don't have to look them up while
|
* (length, by-val flag), so that we don't have to look them up while
|
||||||
* deserializating the MCV list (we already have the type OID in the
|
* deserializing the MCV list (we already have the type OID in the
|
||||||
* header). This is safe, because when changing type of the attribute the
|
* header). This is safe because when changing the type of the attribute
|
||||||
* statistics gets dropped automatically. We need to store the info about
|
* the statistics gets dropped automatically. We need to store the info
|
||||||
* the arrays of deduplicated values anyway.
|
* about the arrays of deduplicated values anyway.
|
||||||
*/
|
*/
|
||||||
info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
|
info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
|
||||||
|
|
||||||
@ -697,8 +700,8 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Walk through the array and eliminate duplicate values, but keep the
|
* Walk through the array and eliminate duplicate values, but keep the
|
||||||
* ordering (so that we can do bsearch later). We know there's at
|
* ordering (so that we can do a binary search later). We know there's
|
||||||
* least one item as (counts[dim] != 0), so we can skip the first
|
* at least one item as (counts[dim] != 0), so we can skip the first
|
||||||
* element.
|
* element.
|
||||||
*/
|
*/
|
||||||
ndistinct = 1; /* number of distinct values */
|
ndistinct = 1; /* number of distinct values */
|
||||||
@ -787,10 +790,10 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
|
|||||||
Size len;
|
Size len;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For cstring, we do similar thing as for varlena - first we
|
* cstring is handled similar to varlena - first we store the
|
||||||
* store the length as uint32 and then the data. We don't care
|
* length as uint32 and then the data. We don't care about
|
||||||
* about alignment, which means that during deserialization we
|
* alignment, which means that during deserialization we need
|
||||||
* need to copy the fields and only access the copies.
|
* to copy the fields and only access the copies.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* c-strings include terminator, so +1 byte */
|
/* c-strings include terminator, so +1 byte */
|
||||||
@ -874,13 +877,13 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
|
|||||||
Datum tmp;
|
Datum tmp;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For values passed by value, we need to copy just the
|
* For byval types, we need to copy just the significant bytes
|
||||||
* significant bytes - we can't use memcpy directly, as that
|
* - we can't use memcpy directly, as that assumes
|
||||||
* assumes little endian behavior. store_att_byval does
|
* little-endian behavior. store_att_byval does almost what
|
||||||
* almost what we need, but it requires properly aligned
|
* we need, but it requires a properly aligned buffer - the
|
||||||
* buffer - the output buffer does not guarantee that. So we
|
* output buffer does not guarantee that. So we simply use a
|
||||||
* simply use a local Datum variable (which guarantees proper
|
* local Datum variable (which guarantees proper alignment),
|
||||||
* alignment), and then copy the value from it.
|
* and then copy the value from it.
|
||||||
*/
|
*/
|
||||||
store_att_byval(&tmp, value, info[dim].typlen);
|
store_att_byval(&tmp, value, info[dim].typlen);
|
||||||
|
|
||||||
@ -1698,7 +1701,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
|
|||||||
* we can use the collation for the attribute itself, as
|
* we can use the collation for the attribute itself, as
|
||||||
* stored in varcollid. We do reset the statistics after a
|
* stored in varcollid. We do reset the statistics after a
|
||||||
* type change (including collation change), so this is OK.
|
* type change (including collation change), so this is OK.
|
||||||
* For expressions we use the collation extracted from the
|
* For expressions, we use the collation extracted from the
|
||||||
* expression itself.
|
* expression itself.
|
||||||
*/
|
*/
|
||||||
if (expronleft)
|
if (expronleft)
|
||||||
@ -1805,8 +1808,8 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stop evaluating the array elements once we reach match
|
* Stop evaluating the array elements once we reach a
|
||||||
* value that can't change - ALL() is the same as
|
* matching value that can't change - ALL() is the same as
|
||||||
* AND-list, ANY() is the same as OR-list.
|
* AND-list, ANY() is the same as OR-list.
|
||||||
*/
|
*/
|
||||||
if (RESULT_IS_FINAL(match, expr->useOr))
|
if (RESULT_IS_FINAL(match, expr->useOr))
|
||||||
|
Reference in New Issue
Block a user