mirror of
https://github.com/postgres/postgres.git
synced 2025-06-25 01:02:05 +03:00
Fix BRIN cost estimation
The original code was overly optimistic about the cost of scanning a BRIN index, leading to BRIN indexes being selected when they'd be a worse choice than some other index. This complete rewrite should be more accurate. Author: David Rowley, based on an earlier patch by Emre Hasegeli Reviewed-by: Emre Hasegeli Discussion: https://postgr.es/m/CAKJS1f9n-Wapop5Xz1dtGdpdqmzeGqQK4sV2MK-zZugfC14Xtw@mail.gmail.com
This commit is contained in:
@ -1049,6 +1049,27 @@ brin_free_desc(BrinDesc *bdesc)
|
|||||||
MemoryContextDelete(bdesc->bd_context);
|
MemoryContextDelete(bdesc->bd_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fetch index's statistical data into *stats
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
brinGetStats(Relation index, BrinStatsData *stats)
|
||||||
|
{
|
||||||
|
Buffer metabuffer;
|
||||||
|
Page metapage;
|
||||||
|
BrinMetaPageData *metadata;
|
||||||
|
|
||||||
|
metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
|
||||||
|
LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
|
||||||
|
metapage = BufferGetPage(metabuffer);
|
||||||
|
metadata = (BrinMetaPageData *) PageGetContents(metapage);
|
||||||
|
|
||||||
|
stats->pagesPerRange = metadata->pagesPerRange;
|
||||||
|
stats->revmapNumPages = metadata->lastRevmapPage - 1;
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(metabuffer);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize a BrinBuildState appropriate to create tuples on the given index.
|
* Initialize a BrinBuildState appropriate to create tuples on the given index.
|
||||||
*/
|
*/
|
||||||
|
@ -101,6 +101,7 @@
|
|||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "access/brin.h"
|
||||||
#include "access/gin.h"
|
#include "access/gin.h"
|
||||||
#include "access/htup_details.h"
|
#include "access/htup_details.h"
|
||||||
#include "access/sysattr.h"
|
#include "access/sysattr.h"
|
||||||
@ -7721,56 +7722,200 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
|
|||||||
{
|
{
|
||||||
IndexOptInfo *index = path->indexinfo;
|
IndexOptInfo *index = path->indexinfo;
|
||||||
List *indexQuals = path->indexquals;
|
List *indexQuals = path->indexquals;
|
||||||
List *indexOrderBys = path->indexorderbys;
|
|
||||||
double numPages = index->pages;
|
double numPages = index->pages;
|
||||||
double numTuples = index->tuples;
|
RelOptInfo *baserel = index->rel;
|
||||||
|
RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
|
||||||
List *qinfos;
|
List *qinfos;
|
||||||
Cost spc_seq_page_cost;
|
Cost spc_seq_page_cost;
|
||||||
Cost spc_random_page_cost;
|
Cost spc_random_page_cost;
|
||||||
double qual_op_cost;
|
|
||||||
double qual_arg_cost;
|
double qual_arg_cost;
|
||||||
|
double qualSelectivity;
|
||||||
|
BrinStatsData statsData;
|
||||||
|
double indexRanges;
|
||||||
|
double minimalRanges;
|
||||||
|
double estimatedRanges;
|
||||||
|
double selec;
|
||||||
|
Relation indexRel;
|
||||||
|
ListCell *l;
|
||||||
|
VariableStatData vardata;
|
||||||
|
|
||||||
/* Do preliminary analysis of indexquals */
|
Assert(rte->rtekind == RTE_RELATION);
|
||||||
qinfos = deconstruct_indexquals(path);
|
|
||||||
|
|
||||||
/* fetch estimated page cost for tablespace containing index */
|
/* fetch estimated page cost for the tablespace containing the index */
|
||||||
get_tablespace_page_costs(index->reltablespace,
|
get_tablespace_page_costs(index->reltablespace,
|
||||||
&spc_random_page_cost,
|
&spc_random_page_cost,
|
||||||
&spc_seq_page_cost);
|
&spc_seq_page_cost);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BRIN indexes are always read in full; use that as startup cost.
|
* Obtain some data from the index itself.
|
||||||
*
|
|
||||||
* XXX maybe only include revmap pages here?
|
|
||||||
*/
|
*/
|
||||||
*indexStartupCost = spc_seq_page_cost * numPages * loop_count;
|
indexRel = index_open(index->indexoid, AccessShareLock);
|
||||||
|
brinGetStats(indexRel, &statsData);
|
||||||
|
index_close(indexRel, AccessShareLock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute index correlation
|
||||||
|
*
|
||||||
|
* Because we can use all index quals equally when scanning, we can use
|
||||||
|
* the largest correlation (in absolute value) among columns used by the
|
||||||
|
* query. Start at zero, the worst possible case. If we cannot find
|
||||||
|
* any correlation statistics, we will keep it as 0.
|
||||||
|
*/
|
||||||
|
*indexCorrelation = 0;
|
||||||
|
|
||||||
|
qinfos = deconstruct_indexquals(path);
|
||||||
|
foreach(l, qinfos)
|
||||||
|
{
|
||||||
|
IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
|
||||||
|
AttrNumber attnum = index->indexkeys[qinfo->indexcol];
|
||||||
|
|
||||||
|
/* attempt to lookup stats in relation for this index column */
|
||||||
|
if (attnum != 0)
|
||||||
|
{
|
||||||
|
/* Simple variable -- look to stats for the underlying table */
|
||||||
|
if (get_relation_stats_hook &&
|
||||||
|
(*get_relation_stats_hook) (root, rte, attnum, &vardata))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The hook took control of acquiring a stats tuple. If it
|
||||||
|
* did supply a tuple, it'd better have supplied a freefunc.
|
||||||
|
*/
|
||||||
|
if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
|
||||||
|
elog(ERROR,
|
||||||
|
"no function provided to release variable stats with");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vardata.statsTuple =
|
||||||
|
SearchSysCache3(STATRELATTINH,
|
||||||
|
ObjectIdGetDatum(rte->relid),
|
||||||
|
Int16GetDatum(attnum),
|
||||||
|
BoolGetDatum(false));
|
||||||
|
vardata.freefunc = ReleaseSysCache;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Looks like we've found an expression column in the index. Let's
|
||||||
|
* see if there's any stats for it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* get the attnum from the 0-based index. */
|
||||||
|
attnum = qinfo->indexcol + 1;
|
||||||
|
|
||||||
|
if (get_index_stats_hook &&
|
||||||
|
(*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The hook took control of acquiring a stats tuple. If it did
|
||||||
|
* supply a tuple, it'd better have supplied a freefunc.
|
||||||
|
*/
|
||||||
|
if (HeapTupleIsValid(vardata.statsTuple) &&
|
||||||
|
!vardata.freefunc)
|
||||||
|
elog(ERROR, "no function provided to release variable stats with");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vardata.statsTuple = SearchSysCache3(STATRELATTINH,
|
||||||
|
ObjectIdGetDatum(index->indexoid),
|
||||||
|
Int16GetDatum(attnum),
|
||||||
|
BoolGetDatum(false));
|
||||||
|
vardata.freefunc = ReleaseSysCache;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HeapTupleIsValid(vardata.statsTuple))
|
||||||
|
{
|
||||||
|
float4 *numbers;
|
||||||
|
int nnumbers;
|
||||||
|
|
||||||
|
if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
|
||||||
|
STATISTIC_KIND_CORRELATION,
|
||||||
|
InvalidOid,
|
||||||
|
NULL,
|
||||||
|
NULL, NULL,
|
||||||
|
&numbers, &nnumbers))
|
||||||
|
{
|
||||||
|
double varCorrelation = 0.0;
|
||||||
|
|
||||||
|
if (nnumbers > 0)
|
||||||
|
varCorrelation = Abs(numbers[0]);
|
||||||
|
|
||||||
|
if (varCorrelation > *indexCorrelation)
|
||||||
|
*indexCorrelation = varCorrelation;
|
||||||
|
|
||||||
|
free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReleaseVariableStats(vardata);
|
||||||
|
}
|
||||||
|
|
||||||
|
qualSelectivity = clauselist_selectivity(root, indexQuals,
|
||||||
|
baserel->relid,
|
||||||
|
JOIN_INNER, NULL,
|
||||||
|
baserel);
|
||||||
|
|
||||||
|
/* work out the actual number of ranges in the index */
|
||||||
|
indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
|
||||||
|
1.0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now calculate the minimum possible ranges we could match with if all of
|
||||||
|
* the rows were in the perfect order in the table's heap.
|
||||||
|
*/
|
||||||
|
minimalRanges = ceil(indexRanges * qualSelectivity);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now estimate the number of ranges that we'll touch by using the
|
||||||
|
* indexCorrelation from the stats. Careful not to divide by zero
|
||||||
|
* (note we're using the absolute value of the correlation).
|
||||||
|
*/
|
||||||
|
if (*indexCorrelation < 1.0e-10)
|
||||||
|
estimatedRanges = indexRanges;
|
||||||
|
else
|
||||||
|
estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
|
||||||
|
|
||||||
|
/* we expect to visit this portion of the table */
|
||||||
|
selec = estimatedRanges / indexRanges;
|
||||||
|
|
||||||
|
CLAMP_PROBABILITY(selec);
|
||||||
|
|
||||||
|
*indexSelectivity = selec;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the index qual costs, much as in genericcostestimate, to add
|
||||||
|
* to the index costs.
|
||||||
|
*/
|
||||||
|
qual_arg_cost = other_operands_eval_cost(root, qinfos) +
|
||||||
|
orderby_operands_eval_cost(root, path);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the startup cost as the cost to read the whole revmap
|
||||||
|
* sequentially, including the cost to execute the index quals.
|
||||||
|
*/
|
||||||
|
*indexStartupCost =
|
||||||
|
spc_seq_page_cost * statsData.revmapNumPages * loop_count;
|
||||||
|
*indexStartupCost += qual_arg_cost;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To read a BRIN index there might be a bit of back and forth over
|
* To read a BRIN index there might be a bit of back and forth over
|
||||||
* regular pages, as revmap might point to them out of sequential order;
|
* regular pages, as revmap might point to them out of sequential order;
|
||||||
* calculate this as reading the whole index in random order.
|
* calculate the total cost as reading the whole index in random order.
|
||||||
*/
|
*/
|
||||||
*indexTotalCost = spc_random_page_cost * numPages * loop_count;
|
*indexTotalCost = *indexStartupCost +
|
||||||
|
spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
|
||||||
*indexSelectivity =
|
|
||||||
clauselist_selectivity(root, indexQuals,
|
|
||||||
path->indexinfo->rel->relid,
|
|
||||||
JOIN_INNER, NULL,
|
|
||||||
path->indexinfo->rel);
|
|
||||||
*indexCorrelation = 1;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add on index qual eval costs, much as in genericcostestimate.
|
* Charge a small amount per range tuple which we expect to match to. This
|
||||||
|
* is meant to reflect the costs of manipulating the bitmap. The BRIN scan
|
||||||
|
* will set a bit for each page in the range when we find a matching
|
||||||
|
* range, so we must multiply the charge by the number of pages in the
|
||||||
|
* range.
|
||||||
*/
|
*/
|
||||||
qual_arg_cost = other_operands_eval_cost(root, qinfos) +
|
*indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
|
||||||
orderby_operands_eval_cost(root, path);
|
statsData.pagesPerRange;
|
||||||
qual_op_cost = cpu_operator_cost *
|
|
||||||
(list_length(indexQuals) + list_length(indexOrderBys));
|
|
||||||
|
|
||||||
*indexStartupCost += qual_arg_cost;
|
|
||||||
*indexTotalCost += qual_arg_cost;
|
|
||||||
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
|
|
||||||
*indexPages = index->pages;
|
*indexPages = index->pages;
|
||||||
|
|
||||||
/* XXX what about pages_per_range? */
|
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,17 @@ typedef struct BrinOptions
|
|||||||
bool autosummarize;
|
bool autosummarize;
|
||||||
} BrinOptions;
|
} BrinOptions;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BrinStatsData represents stats data for planner use
|
||||||
|
*/
|
||||||
|
typedef struct BrinStatsData
|
||||||
|
{
|
||||||
|
BlockNumber pagesPerRange;
|
||||||
|
BlockNumber revmapNumPages;
|
||||||
|
} BrinStatsData;
|
||||||
|
|
||||||
|
|
||||||
#define BRIN_DEFAULT_PAGES_PER_RANGE 128
|
#define BRIN_DEFAULT_PAGES_PER_RANGE 128
|
||||||
#define BrinGetPagesPerRange(relation) \
|
#define BrinGetPagesPerRange(relation) \
|
||||||
((relation)->rd_options ? \
|
((relation)->rd_options ? \
|
||||||
@ -35,4 +46,7 @@ typedef struct BrinOptions
|
|||||||
((BrinOptions *) (relation)->rd_options)->autosummarize : \
|
((BrinOptions *) (relation)->rd_options)->autosummarize : \
|
||||||
false)
|
false)
|
||||||
|
|
||||||
|
|
||||||
|
extern void brinGetStats(Relation index, BrinStatsData *stats);
|
||||||
|
|
||||||
#endif /* BRIN_H */
|
#endif /* BRIN_H */
|
||||||
|
@ -363,6 +363,8 @@ BEGIN
|
|||||||
END LOOP;
|
END LOOP;
|
||||||
END;
|
END;
|
||||||
$x$;
|
$x$;
|
||||||
|
RESET enable_seqscan;
|
||||||
|
RESET enable_bitmapscan;
|
||||||
INSERT INTO brintest SELECT
|
INSERT INTO brintest SELECT
|
||||||
repeat(stringu1, 42)::bytea,
|
repeat(stringu1, 42)::bytea,
|
||||||
substr(stringu1, 1, 1)::"char",
|
substr(stringu1, 1, 1)::"char",
|
||||||
@ -481,3 +483,27 @@ SELECT brin_summarize_range('brin_summarize_idx', -1);
|
|||||||
ERROR: block number out of range: -1
|
ERROR: block number out of range: -1
|
||||||
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
|
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
|
||||||
ERROR: block number out of range: 4294967296
|
ERROR: block number out of range: 4294967296
|
||||||
|
-- test brin cost estimates behave sanely based on correlation of values
|
||||||
|
CREATE TABLE brin_test (a INT, b INT);
|
||||||
|
INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
|
||||||
|
CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
|
||||||
|
CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
|
||||||
|
VACUUM ANALYZE brin_test;
|
||||||
|
-- Ensure brin index is used when columns are perfectly correlated
|
||||||
|
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
|
||||||
|
QUERY PLAN
|
||||||
|
--------------------------------------------
|
||||||
|
Bitmap Heap Scan on brin_test
|
||||||
|
Recheck Cond: (a = 1)
|
||||||
|
-> Bitmap Index Scan on brin_test_a_idx
|
||||||
|
Index Cond: (a = 1)
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
-- Ensure brin index is not used when values are not correlated
|
||||||
|
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------
|
||||||
|
Seq Scan on brin_test
|
||||||
|
Filter: (b = 1)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
@ -370,6 +370,9 @@ BEGIN
|
|||||||
END;
|
END;
|
||||||
$x$;
|
$x$;
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
|
RESET enable_bitmapscan;
|
||||||
|
|
||||||
INSERT INTO brintest SELECT
|
INSERT INTO brintest SELECT
|
||||||
repeat(stringu1, 42)::bytea,
|
repeat(stringu1, 42)::bytea,
|
||||||
substr(stringu1, 1, 1)::"char",
|
substr(stringu1, 1, 1)::"char",
|
||||||
@ -444,3 +447,16 @@ SELECT brin_summarize_range('brin_summarize_idx', 4294967295);
|
|||||||
-- invalid block number values
|
-- invalid block number values
|
||||||
SELECT brin_summarize_range('brin_summarize_idx', -1);
|
SELECT brin_summarize_range('brin_summarize_idx', -1);
|
||||||
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
|
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
|
||||||
|
|
||||||
|
|
||||||
|
-- test brin cost estimates behave sanely based on correlation of values
|
||||||
|
CREATE TABLE brin_test (a INT, b INT);
|
||||||
|
INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
|
||||||
|
CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
|
||||||
|
CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
|
||||||
|
VACUUM ANALYZE brin_test;
|
||||||
|
|
||||||
|
-- Ensure brin index is used when columns are perfectly correlated
|
||||||
|
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
|
||||||
|
-- Ensure brin index is not used when values are not correlated
|
||||||
|
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
|
||||||
|
Reference in New Issue
Block a user