1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-05 07:21:24 +03:00

TABLESAMPLE, SQL Standard and extensible

Add a TABLESAMPLE clause to SELECT statements that allows
user to specify random BERNOULLI sampling or block level
SYSTEM sampling. Implementation allows for extensible
sampling functions to be written, using a standard API.
Basic version follows SQLStandard exactly. Usable
concrete use cases for the sampling API follow in later
commits.

Petr Jelinek

Reviewed by Michael Paquier and Simon Riggs
This commit is contained in:
Simon Riggs
2015-05-15 14:37:10 -04:00
parent 11a83bbedd
commit f6d208d6e5
66 changed files with 2756 additions and 40 deletions

View File

@ -101,6 +101,17 @@ NOTICE: f_leak => great manga
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(4 rows)
SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first novel
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
did | cid | dlevel | dauthor | dtitle
-----+-----+--------+-------------------+-----------------------
1 | 11 | 1 | rls_regress_user1 | my first novel
4 | 44 | 1 | rls_regress_user1 | my first manga
6 | 22 | 1 | rls_regress_user2 | great science fiction
(3 rows)
-- viewpoint from rls_regress_user2
SET SESSION AUTHORIZATION rls_regress_user2;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
@ -145,6 +156,21 @@ NOTICE: f_leak => great manga
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(8 rows)
SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first novel
NOTICE: f_leak => my second novel
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great technology book
did | cid | dlevel | dauthor | dtitle
-----+-----+--------+-------------------+-----------------------
1 | 11 | 1 | rls_regress_user1 | my first novel
2 | 11 | 2 | rls_regress_user1 | my second novel
4 | 44 | 1 | rls_regress_user1 | my first manga
6 | 22 | 1 | rls_regress_user2 | great science fiction
7 | 33 | 2 | rls_regress_user2 | great technology book
(5 rows)
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
QUERY PLAN
----------------------------------------------------------

View File

@ -128,6 +128,7 @@ pg_shdepend|t
pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
pg_tablesample_method|t
pg_tablespace|t
pg_transform|t
pg_trigger|t

View File

@ -0,0 +1,231 @@
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
id
----
0
1
2
3
4
5
9
(7 rows)
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
id
----
6
7
8
(3 rows)
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
count
-------
10
(1 row)
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
id
----
0
1
2
6
7
8
9
(7 rows)
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
id
----
0
1
3
4
5
(5 rows)
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
id
----
0
5
(2 rows)
CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
pg_get_viewdef
--------------------------------------------------------------------------------
SELECT test_tablesample.id +
FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
(1 row)
SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
pg_get_viewdef
-----------------------------------------------------------
SELECT test_tablesample.id +
FROM test_tablesample TABLESAMPLE system ((99)::real);
(1 row)
BEGIN;
DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
FETCH FIRST FROM tablesample_cur;
id
----
0
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
1
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
2
(1 row)
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
id
----
0
1
2
3
4
5
9
(7 rows)
FETCH NEXT FROM tablesample_cur;
id
----
6
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
7
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
8
(1 row)
FETCH FIRST FROM tablesample_cur;
id
----
0
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
1
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
2
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
6
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
7
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
8
(1 row)
CLOSE tablesample_cur;
END;
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
QUERY PLAN
-------------------------------------------------------------------------------
Sample Scan (system) on test_tablesample (cost=0.00..26.35 rows=635 width=4)
(1 row)
EXPLAIN SELECT * FROM test_tablesample_v1;
QUERY PLAN
-------------------------------------------------------------------------------
Sample Scan (system) on test_tablesample (cost=0.00..10.54 rows=254 width=4)
(1 row)
-- errors
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
ERROR: tablesample method "foobar" does not exist
LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
^
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
ERROR: REPEATABLE clause must be NOT NULL numeric value
LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
^
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
ERROR: invalid sample size
HINT: Sample size must be numeric value between 0 and 100 (inclusive).
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
ERROR: invalid sample size
HINT: Sample size must be numeric value between 0 and 100 (inclusive).
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
ERROR: invalid sample size
HINT: Sample size must be numeric value between 0 and 100 (inclusive).
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
ERROR: invalid sample size
HINT: Sample size must be numeric value between 0 and 100 (inclusive).
SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
ERROR: TABLESAMPLE clause can only be used on tables and materialized views
LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
^
INSERT INTO test_tablesample_v1 VALUES(1);
ERROR: cannot insert into view "test_tablesample_v1"
DETAIL: Views containing TABLESAMPLE are not automatically updatable.
HINT: To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
WITH query_select AS (SELECT * FROM test_tablesample)
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
ERROR: TABLESAMPLE clause can only be used on tables and materialized views
LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA...
^
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
ERROR: syntax error at or near "TABLESAMPLE"
LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
^
-- catalog sanity
SELECT *
FROM pg_tablesample_method
WHERE tsminit IS NULL
OR tsmseqscan IS NULL
OR tsmpagemode IS NULL
OR tsmnextblock IS NULL
OR tsmnexttuple IS NULL
OR tsmend IS NULL
OR tsmreset IS NULL
OR tsmcost IS NULL;
tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost
---------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+---------
(0 rows)
-- done
DROP TABLE test_tablesample CASCADE;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to view test_tablesample_v1
drop cascades to view test_tablesample_v2

View File

@ -84,7 +84,7 @@ test: select_into select_distinct select_distinct_on select_implicit select_havi
# ----------
# Another group of parallel tests
# ----------
test: brin gin gist spgist privileges security_label collate matview lock replica_identity rowsecurity object_address
test: brin gin gist spgist privileges security_label collate matview lock replica_identity rowsecurity object_address tablesample
# ----------
# Another group of parallel tests

View File

@ -154,3 +154,4 @@ test: with
test: xml
test: event_trigger
test: stats
test: tablesample

View File

@ -94,11 +94,15 @@ SET row_security TO ON;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-- viewpoint from rls_regress_user2
SET SESSION AUTHORIZATION rls_regress_user2;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);

View File

@ -0,0 +1,74 @@
CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
BEGIN;
DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
FETCH FIRST FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH FIRST FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
CLOSE tablesample_cur;
END;
EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
EXPLAIN SELECT * FROM test_tablesample_v1;
-- errors
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
INSERT INTO test_tablesample_v1 VALUES(1);
WITH query_select AS (SELECT * FROM test_tablesample)
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
-- catalog sanity
SELECT *
FROM pg_tablesample_method
WHERE tsminit IS NULL
OR tsmseqscan IS NULL
OR tsmpagemode IS NULL
OR tsmnextblock IS NULL
OR tsmnexttuple IS NULL
OR tsmend IS NULL
OR tsmreset IS NULL
OR tsmcost IS NULL;
-- done
DROP TABLE test_tablesample CASCADE;