1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-02 23:42:46 +03:00

Introduce SP-GiST operator class over box.

Patch implements quad-tree over boxes, naive approach of 2D quad tree will not
work for any non-point objects because splitting space on node is not
efficient. The idea of pathc is treating 2D boxes as 4D points, so,
object will not overlap (in 4D space).

The performance tests reveal that this technique especially beneficial
with too much overlapping objects, so called "spaghetti data".

Author: Alexander Lebedev with editorization by Emre Hasegeli and me
This commit is contained in:
Teodor Sigaev
2016-03-30 18:42:36 +03:00
parent 87545f5412
commit acdf2a8b37
12 changed files with 1066 additions and 3 deletions

View File

@@ -215,3 +215,243 @@ SELECT '' AS four, height(f1), width(f1) FROM BOX_TBL;
| 0 | 0
(4 rows)
--
-- Test the SP-GiST index
--
CREATE TEMPORARY TABLE box_temp (f1 box);
INSERT INTO box_temp
SELECT box(point(i, i), point(i * 2, i * 2))
FROM generate_series(1, 50) AS i;
CREATE INDEX box_spgist ON box_temp USING spgist (f1);
INSERT INTO box_temp
VALUES (NULL),
('(-0,0)(0,100)'),
('(-3,4.3333333333)(40,1)'),
('(0,100)(0,infinity)'),
('(-infinity,0)(0,infinity)'),
('(-infinity,-infinity)(infinity,infinity)');
SET enable_seqscan = false;
SELECT * FROM box_temp WHERE f1 << '(10,20),(30,40)';
f1
------------------
(2,2),(1,1)
(4,4),(2,2)
(6,6),(3,3)
(8,8),(4,4)
(-0,100),(0,0)
(0,inf),(0,100)
(0,inf),(-inf,0)
(7 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 << '(10,20),(30,40)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 << '(30,40),(10,20)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)';
f1
------------------
(2,2),(1,1)
(4,4),(2,2)
(6,6),(3,3)
(8,8),(4,4)
(10,10),(5,5)
(-0,100),(0,0)
(0,inf),(0,100)
(0,inf),(-inf,0)
(8 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)';
QUERY PLAN
----------------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 &< '(10,100),(5,4.333334)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 && '(15,20),(25,30)';
f1
-----------------------
(20,20),(10,10)
(22,22),(11,11)
(24,24),(12,12)
(26,26),(13,13)
(28,28),(14,14)
(30,30),(15,15)
(32,32),(16,16)
(34,34),(17,17)
(36,36),(18,18)
(38,38),(19,19)
(40,40),(20,20)
(42,42),(21,21)
(44,44),(22,22)
(46,46),(23,23)
(48,48),(24,24)
(50,50),(25,25)
(inf,inf),(-inf,-inf)
(17 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 && '(15,20),(25,30)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 && '(25,30),(15,20)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)';
f1
-------------------
(80,80),(40,40)
(82,82),(41,41)
(84,84),(42,42)
(86,86),(43,43)
(88,88),(44,44)
(90,90),(45,45)
(92,92),(46,46)
(94,94),(47,47)
(96,96),(48,48)
(98,98),(49,49)
(100,100),(50,50)
(11 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 &> '(45,50),(40,30)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 >> '(30,40),(40,30)';
f1
-------------------
(82,82),(41,41)
(84,84),(42,42)
(86,86),(43,43)
(88,88),(44,44)
(90,90),(45,45)
(92,92),(46,46)
(94,94),(47,47)
(96,96),(48,48)
(98,98),(49,49)
(100,100),(50,50)
(10 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 >> '(30,40),(40,30)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 >> '(40,40),(30,30)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)';
f1
--------------------------
(2,2),(1,1)
(4,4),(2,2)
(40,4.3333333333),(-3,1)
(3 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)';
QUERY PLAN
----------------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 <<| '(10,100),(5,4.33334)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)';
f1
--------------------------
(2,2),(1,1)
(4,4),(2,2)
(40,4.3333333333),(-3,1)
(3 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)';
QUERY PLAN
----------------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 &<| '(10,4.3333334),(5,1)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49.99)';
f1
-------------------
(100,100),(50,50)
(0,inf),(0,100)
(2 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49.99)';
QUERY PLAN
-----------------------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 |&> '(49.99,49.99),(49.99,49.99)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 |>> '(37,38),(39,40)';
f1
-------------------
(82,82),(41,41)
(84,84),(42,42)
(86,86),(43,43)
(88,88),(44,44)
(90,90),(45,45)
(92,92),(46,46)
(94,94),(47,47)
(96,96),(48,48)
(98,98),(49,49)
(100,100),(50,50)
(0,inf),(0,100)
(11 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |>> '(37,38),(39,40)';
QUERY PLAN
-----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 |>> '(39,40),(37,38)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 @> '(10,11),(15,16)';
f1
-----------------------
(16,16),(8,8)
(18,18),(9,9)
(20,20),(10,10)
(inf,inf),(-inf,-inf)
(4 rows)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 @> '(10,11),(15,15)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 @> '(15,15),(10,11)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)';
f1
-----------------
(30,30),(15,15)
(1 row)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 <@ '(30,35),(10,15)'::box)
(2 rows)
SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
f1
-----------------
(40,40),(20,20)
(1 row)
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
QUERY PLAN
----------------------------------------------
Index Only Scan using box_spgist on box_temp
Index Cond: (f1 ~= '(40,40),(20,20)'::box)
(2 rows)
RESET enable_seqscan;
DROP INDEX box_spgist;

View File

@@ -1728,15 +1728,19 @@ ORDER BY 1, 2, 3;
4000 | 6 | ~=
4000 | 7 | @>
4000 | 8 | <@
4000 | 9 | &<|
4000 | 10 | <<|
4000 | 10 | <^
4000 | 11 | <
4000 | 11 | >^
4000 | 11 | |>>
4000 | 12 | <=
4000 | 12 | |&>
4000 | 14 | >=
4000 | 15 | >
4000 | 16 | @>
4000 | 18 | =
(108 rows)
(112 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing

View File

@@ -117,3 +117,65 @@ SELECT '' AS one, b1.*, b2.*
WHERE b1.f1 @> b2.f1 and not b1.f1 ~= b2.f1;
SELECT '' AS four, height(f1), width(f1) FROM BOX_TBL;
--
-- Test the SP-GiST index
--
CREATE TEMPORARY TABLE box_temp (f1 box);
INSERT INTO box_temp
SELECT box(point(i, i), point(i * 2, i * 2))
FROM generate_series(1, 50) AS i;
CREATE INDEX box_spgist ON box_temp USING spgist (f1);
INSERT INTO box_temp
VALUES (NULL),
('(-0,0)(0,100)'),
('(-3,4.3333333333)(40,1)'),
('(0,100)(0,infinity)'),
('(-infinity,0)(0,infinity)'),
('(-infinity,-infinity)(infinity,infinity)');
SET enable_seqscan = false;
SELECT * FROM box_temp WHERE f1 << '(10,20),(30,40)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 << '(10,20),(30,40)';
SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)';
SELECT * FROM box_temp WHERE f1 && '(15,20),(25,30)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 && '(15,20),(25,30)';
SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)';
SELECT * FROM box_temp WHERE f1 >> '(30,40),(40,30)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 >> '(30,40),(40,30)';
SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)';
SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)';
SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49.99)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49.99)';
SELECT * FROM box_temp WHERE f1 |>> '(37,38),(39,40)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |>> '(37,38),(39,40)';
SELECT * FROM box_temp WHERE f1 @> '(10,11),(15,16)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 @> '(10,11),(15,15)';
SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)';
SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
RESET enable_seqscan;
DROP INDEX box_spgist;