1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Reduce non-leaf keys overlap in GiST indexes produced by a sorted build

The GiST sorted build currently chooses split points according to the only page
space utilization.  That may lead to higher non-leaf keys overlap and, in turn,
slower search query answers.

This commit makes the sorted build use the opclass's picksplit method.  Once
four pages at the level are accumulated, the picksplit method is applied until
each split partition fits the page.  Some of our split algorithms could show
significant performance degradation while processing 4-times more data at once.
But those opclasses haven't received the sorted build support and shouldn't
receive it before their split algorithms are improved.

Discussion: https://postgr.es/m/CAHqSB9jqtS94e9%3D0vxqQX5dxQA89N95UKyz-%3DA7Y%2B_YJt%2BVW5A%40mail.gmail.com
Author: Aliaksandr Kalenik, Sergei Shoulbakov, Andrey Borodin
Reviewed-by: Björn Harrtell, Darafei Praliaskouski, Andres Freund
Reviewed-by: Alexander Korotkov
This commit is contained in:
Alexander Korotkov
2022-02-07 23:20:42 +03:00
parent 42a9e88bf6
commit f1ea98a797
4 changed files with 199 additions and 112 deletions

View File

@ -33,14 +33,13 @@ COMMIT;
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
itemoffset | ctid | itemlen | dead | keys
------------+-----------+---------+------+-------------------
1 | (1,65535) | 40 | f | (p)=((166,166))
2 | (2,65535) | 40 | f | (p)=((332,332))
3 | (3,65535) | 40 | f | (p)=((498,498))
4 | (4,65535) | 40 | f | (p)=((664,664))
5 | (5,65535) | 40 | f | (p)=((830,830))
6 | (6,65535) | 40 | f | (p)=((996,996))
7 | (7,65535) | 40 | f | (p)=((1000,1000))
(7 rows)
1 | (1,65535) | 40 | f | (p)=((185,185))
2 | (2,65535) | 40 | f | (p)=((370,370))
3 | (3,65535) | 40 | f | (p)=((555,555))
4 | (4,65535) | 40 | f | (p)=((740,740))
5 | (5,65535) | 40 | f | (p)=((870,870))
6 | (6,65535) | 40 | f | (p)=((1000,1000))
(6 rows)
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 1), 'test_gist_idx') LIMIT 5;
itemoffset | ctid | itemlen | dead | keys
@ -63,7 +62,6 @@ SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_g
4 | (4,65535) | 40
5 | (5,65535) | 40
6 | (6,65535) | 40
7 | (7,65535) | 40
(7 rows)
(6 rows)
DROP TABLE test_gist;