mirror of
https://github.com/postgres/postgres.git
synced 2025-07-17 06:41:09 +03:00
Measure the number of all-visible pages for use in index-only scan costing.
Add a column pg_class.relallvisible to remember the number of pages that were all-visible according to the visibility map as of the last VACUUM (or ANALYZE, or some other operations that update pg_class.relpages). Use relallvisible/relpages, instead of an arbitrary constant, to estimate how many heap page fetches can be avoided during an index-only scan. This is pretty primitive and will no doubt see refinements once we've acquired more field experience with the index-only scan mechanism, but it's way better than using a constant. Note: I had to adjust an underspecified query in the window.sql regression test, because it was changing answers when the plan changed to use an index-only scan. Some of the adjacent tests perhaps should be adjusted as well, but I didn't do that here.
This commit is contained in:
@ -120,9 +120,6 @@ bool enable_material = true;
|
||||
bool enable_mergejoin = true;
|
||||
bool enable_hashjoin = true;
|
||||
|
||||
/* Possibly this should become a GUC too */
|
||||
static double visibility_fraction = 0.9;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
PlannerInfo *root;
|
||||
@ -324,9 +321,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
|
||||
*
|
||||
* If it's an index-only scan, then we will not need to fetch any heap
|
||||
* pages for which the visibility map shows all tuples are visible.
|
||||
* Unfortunately, we have no stats as to how much of the heap is
|
||||
* all-visible, and that's likely to be a rather unstable number anyway.
|
||||
* We use an arbitrary constant visibility_fraction to estimate this.
|
||||
* Hence, reduce the estimated number of heap fetches accordingly.
|
||||
* We use the measured fraction of the entire heap that is all-visible,
|
||||
* which might not be particularly relevant to the subset of the heap
|
||||
* that this query will fetch; but it's not clear how to do better.
|
||||
*----------
|
||||
*/
|
||||
if (outer_rel != NULL && outer_rel->rows > 1)
|
||||
@ -347,7 +345,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
|
||||
root);
|
||||
|
||||
if (indexonly)
|
||||
pages_fetched = ceil(pages_fetched * visibility_fraction);
|
||||
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
|
||||
|
||||
max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
|
||||
|
||||
@ -369,7 +367,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
|
||||
root);
|
||||
|
||||
if (indexonly)
|
||||
pages_fetched = ceil(pages_fetched * visibility_fraction);
|
||||
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
|
||||
|
||||
min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
|
||||
}
|
||||
@ -385,7 +383,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
|
||||
root);
|
||||
|
||||
if (indexonly)
|
||||
pages_fetched = ceil(pages_fetched * visibility_fraction);
|
||||
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
|
||||
|
||||
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
|
||||
max_IO_cost = pages_fetched * spc_random_page_cost;
|
||||
@ -394,7 +392,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
|
||||
pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
|
||||
|
||||
if (indexonly)
|
||||
pages_fetched = ceil(pages_fetched * visibility_fraction);
|
||||
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
|
||||
|
||||
min_IO_cost = spc_random_page_cost;
|
||||
if (pages_fetched > 1)
|
||||
|
@ -116,7 +116,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
*/
|
||||
if (!inhparent)
|
||||
estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
|
||||
&rel->pages, &rel->tuples);
|
||||
&rel->pages, &rel->tuples, &rel->allvisfrac);
|
||||
|
||||
/*
|
||||
* Make list of indexes. Ignore indexes on system catalogs if told to.
|
||||
@ -339,8 +339,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
}
|
||||
else
|
||||
{
|
||||
double allvisfrac; /* dummy */
|
||||
|
||||
estimate_rel_size(indexRelation, NULL,
|
||||
&info->pages, &info->tuples);
|
||||
&info->pages, &info->tuples, &allvisfrac);
|
||||
if (info->tuples > rel->tuples)
|
||||
info->tuples = rel->tuples;
|
||||
}
|
||||
@ -369,17 +371,21 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
|
||||
/*
|
||||
* estimate_rel_size - estimate # pages and # tuples in a table or index
|
||||
*
|
||||
* We also estimate the fraction of the pages that are marked all-visible in
|
||||
* the visibility map, for use in estimation of index-only scans.
|
||||
*
|
||||
* If attr_widths isn't NULL, it points to the zero-index entry of the
|
||||
* relation's attr_widths[] cache; we fill this in if we have need to compute
|
||||
* the attribute widths for estimation purposes.
|
||||
*/
|
||||
void
|
||||
estimate_rel_size(Relation rel, int32 *attr_widths,
|
||||
BlockNumber *pages, double *tuples)
|
||||
BlockNumber *pages, double *tuples, double *allvisfrac)
|
||||
{
|
||||
BlockNumber curpages;
|
||||
BlockNumber relpages;
|
||||
double reltuples;
|
||||
BlockNumber relallvisible;
|
||||
double density;
|
||||
|
||||
switch (rel->rd_rel->relkind)
|
||||
@ -432,11 +438,13 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
|
||||
if (curpages == 0)
|
||||
{
|
||||
*tuples = 0;
|
||||
*allvisfrac = 0;
|
||||
break;
|
||||
}
|
||||
/* coerce values in pg_class to more desirable types */
|
||||
relpages = (BlockNumber) rel->rd_rel->relpages;
|
||||
reltuples = (double) rel->rd_rel->reltuples;
|
||||
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
|
||||
|
||||
/*
|
||||
* If it's an index, discount the metapage while estimating the
|
||||
@ -480,21 +488,37 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
|
||||
density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
|
||||
}
|
||||
*tuples = rint(density * (double) curpages);
|
||||
|
||||
/*
|
||||
* We use relallvisible as-is, rather than scaling it up like we
|
||||
* do for the pages and tuples counts, on the theory that any
|
||||
* pages added since the last VACUUM are most likely not marked
|
||||
* all-visible. But costsize.c wants it converted to a fraction.
|
||||
*/
|
||||
if (relallvisible == 0 || curpages <= 0)
|
||||
*allvisfrac = 0;
|
||||
else if ((double) relallvisible >= curpages)
|
||||
*allvisfrac = 1;
|
||||
else
|
||||
*allvisfrac = (double) relallvisible / curpages;
|
||||
break;
|
||||
case RELKIND_SEQUENCE:
|
||||
/* Sequences always have a known size */
|
||||
*pages = 1;
|
||||
*tuples = 1;
|
||||
*allvisfrac = 0;
|
||||
break;
|
||||
case RELKIND_FOREIGN_TABLE:
|
||||
/* Just use whatever's in pg_class */
|
||||
*pages = rel->rd_rel->relpages;
|
||||
*tuples = rel->rd_rel->reltuples;
|
||||
*allvisfrac = 0;
|
||||
break;
|
||||
default:
|
||||
/* else it has no disk storage; probably shouldn't get here? */
|
||||
*pages = 0;
|
||||
*tuples = 0;
|
||||
*allvisfrac = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -109,6 +109,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
|
||||
rel->indexlist = NIL;
|
||||
rel->pages = 0;
|
||||
rel->tuples = 0;
|
||||
rel->allvisfrac = 0;
|
||||
rel->subplan = NULL;
|
||||
rel->subroot = NULL;
|
||||
rel->baserestrictinfo = NIL;
|
||||
@ -362,6 +363,7 @@ build_join_rel(PlannerInfo *root,
|
||||
joinrel->indexlist = NIL;
|
||||
joinrel->pages = 0;
|
||||
joinrel->tuples = 0;
|
||||
joinrel->allvisfrac = 0;
|
||||
joinrel->subplan = NULL;
|
||||
joinrel->subroot = NULL;
|
||||
joinrel->baserestrictinfo = NIL;
|
||||
|
Reference in New Issue
Block a user