1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Account for the effect of lossy pages when costing bitmap scans.

Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me.
Some final adjustments by me.

Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
This commit is contained in:
Robert Haas
2017-11-10 16:50:50 -05:00
parent 0c98d0dd5c
commit 5edc63bda6
3 changed files with 75 additions and 22 deletions

View File

@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
double T;
double pages_fetched;
double tuples_fetched;
double heap_pages;
long maxentries;
/*
* Fetch total cost of obtaining the bitmap, as well as its total
@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
/*
* For a single scan, the number of heap pages that need to be fetched is
* the same as the Mackert and Lohman formula for the case T <= b (ie, no
* re-reads needed).
*/
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
/*
* Calculate the number of pages fetched from the heap. Then based on
* current work_mem estimate get the estimated maxentries in the bitmap.
* (Note that we always do this calculation based on the number of pages
* that would be fetched in a single iteration, even if loop_count > 1.
* That's correct, because only that number of entries will be stored in
* the bitmap at one time.)
*/
heap_pages = Min(pages_fetched, baserel->pages);
maxentries = tbm_calculate_entries(work_mem * 1024L);
if (loop_count > 1)
{
/*
@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
root);
pages_fetched /= loop_count;
}
else
{
/*
* For a single scan, the number of heap pages that need to be fetched
* is the same as the Mackert and Lohman formula for the case T <= b
* (ie, no re-reads needed).
*/
pages_fetched =
(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
}
if (pages_fetched >= T)
pages_fetched = T;
else
pages_fetched = ceil(pages_fetched);
if (maxentries < heap_pages)
{
double exact_pages;
double lossy_pages;
/*
* Crude approximation of the number of lossy pages. Because of the
* way tbm_lossify() is coded, the number of lossy pages increases
* very sharply as soon as we run short of memory; this formula has
* that property and seems to perform adequately in testing, but it's
* possible we could do better somehow.
*/
lossy_pages = Max(0, heap_pages - maxentries / 2);
exact_pages = heap_pages - lossy_pages;
/*
* If there are lossy pages then recompute the number of tuples
* processed by the bitmap heap node. We assume here that the chance
* of a given tuple coming from an exact page is the same as the
* chance that a given page is exact. This might not be true, but
* it's not clear how we can do any better.
*/
if (lossy_pages > 0)
tuples_fetched =
clamp_row_est(indexSelectivity *
(exact_pages / heap_pages) * baserel->tuples +
(lossy_pages / heap_pages) * baserel->tuples);
}
if (cost)
*cost = indexTotalCost;
if (tuple)