mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
84 lines
2.4 KiB
Plaintext
84 lines
2.4 KiB
Plaintext
CREATE EXTENSION tsm_system_rows;
|
|
CREATE TABLE test_tablesample (id int, name text);
|
|
INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
|
|
FROM generate_series(0, 30) s(i);
|
|
ANALYZE test_tablesample;
|
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
|
|
count
|
|
-------
|
|
0
|
|
(1 row)
|
|
|
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
|
|
count
|
|
-------
|
|
1
|
|
(1 row)
|
|
|
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
|
|
count
|
|
-------
|
|
10
|
|
(1 row)
|
|
|
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
|
|
count
|
|
-------
|
|
31
|
|
(1 row)
|
|
|
|
-- bad parameters should get through planning, but not execution:
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
|
|
QUERY PLAN
|
|
----------------------------------------
|
|
Sample Scan on test_tablesample
|
|
Sampling: system_rows ('-1'::bigint)
|
|
(2 rows)
|
|
|
|
SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
|
|
ERROR: sample size must not be negative
|
|
-- fail, this method is not repeatable:
|
|
SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
|
|
ERROR: tablesample method system_rows does not support REPEATABLE
|
|
LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) ...
|
|
^
|
|
-- but a join should be allowed:
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT * FROM
|
|
(VALUES (0),(10),(100)) v(nrows),
|
|
LATERAL (SELECT count(*) FROM test_tablesample
|
|
TABLESAMPLE system_rows (nrows)) ss;
|
|
QUERY PLAN
|
|
----------------------------------------------------------
|
|
Nested Loop
|
|
-> Values Scan on "*VALUES*"
|
|
-> Aggregate
|
|
-> Sample Scan on test_tablesample
|
|
Sampling: system_rows ("*VALUES*".column1)
|
|
(5 rows)
|
|
|
|
SELECT * FROM
|
|
(VALUES (0),(10),(100)) v(nrows),
|
|
LATERAL (SELECT count(*) FROM test_tablesample
|
|
TABLESAMPLE system_rows (nrows)) ss;
|
|
nrows | count
|
|
-------+-------
|
|
0 | 0
|
|
10 | 10
|
|
100 | 31
|
|
(3 rows)
|
|
|
|
CREATE VIEW vv AS
|
|
SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
|
|
SELECT * FROM vv;
|
|
count
|
|
-------
|
|
20
|
|
(1 row)
|
|
|
|
DROP EXTENSION tsm_system_rows; -- fail, view depends on extension
|
|
ERROR: cannot drop extension tsm_system_rows because other objects depend on it
|
|
DETAIL: view vv depends on function system_rows(internal)
|
|
HINT: Use DROP ... CASCADE to drop the dependent objects too.
|