mirror of
https://github.com/postgres/postgres.git
synced 2025-05-12 16:21:30 +03:00
Rework the stats_ext test
As suggested by Tom Lane, avoid printing specific estimated cost values, because they vary across architectures; instead, verify plan shapes (in this case, HashAggregate vs. GroupAggregate), as we do in other planner tests. We can now remove expected/stats_ext_1.out. Author: Tomas Vondra
This commit is contained in:
parent
70ec3f1f8f
commit
bed9ef5a16
@ -1,4 +1,10 @@
|
|||||||
-- Generic extended statistics support
|
-- Generic extended statistics support
|
||||||
|
-- We will be checking execution plans without/with statistics, so
|
||||||
|
-- let's make sure we get simple non-parallel plans. Also set the
|
||||||
|
-- work_mem low so that we can use small amounts of data.
|
||||||
|
SET max_parallel_workers = 0;
|
||||||
|
SET max_parallel_workers_per_gather = 0;
|
||||||
|
SET work_mem = '128kB';
|
||||||
-- Ensure stats are dropped sanely
|
-- Ensure stats are dropped sanely
|
||||||
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
||||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||||
@ -49,6 +55,67 @@ CREATE TABLE ndistinct (
|
|||||||
c INT,
|
c INT,
|
||||||
d INT
|
d INT
|
||||||
);
|
);
|
||||||
|
-- over-estimates when using only per-column statistics
|
||||||
|
INSERT INTO ndistinct (a, b, c, filler1)
|
||||||
|
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
|
||||||
|
FROM generate_series(1,30000) s(i);
|
||||||
|
ANALYZE ndistinct;
|
||||||
|
-- Group Aggregate, due to over-estimate of the number of groups
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: a, b
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: b, c
|
||||||
|
-> Sort
|
||||||
|
Sort Key: b, c
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: a, b, c
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b, c
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: a, b, c, d
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b, c, d
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: b, c, d
|
||||||
|
-> Sort
|
||||||
|
Sort Key: b, c, d
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
-- unknown column
|
-- unknown column
|
||||||
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
||||||
ERROR: column "unknown_column" referenced in statistics does not exist
|
ERROR: column "unknown_column" referenced in statistics does not exist
|
||||||
@ -63,100 +130,184 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
|
|||||||
ERROR: duplicate column name in statistics definition
|
ERROR: duplicate column name in statistics definition
|
||||||
-- correct command
|
-- correct command
|
||||||
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
||||||
-- perfectly correlated groups
|
ANALYZE ndistinct;
|
||||||
|
SELECT staenabled, standistinct
|
||||||
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||||
|
staenabled | standistinct
|
||||||
|
------------+------------------------------------------------------------------------------------------------
|
||||||
|
{d} | [{(b 3 4), 301.000000}, {(b 3 6), 301.000000}, {(b 4 6), 301.000000}, {(b 3 4 6), 301.000000}]
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Hash Aggregate, thanks to estimates improved by the statistic
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------
|
||||||
|
HashAggregate
|
||||||
|
Group Key: a, b
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------
|
||||||
|
HashAggregate
|
||||||
|
Group Key: b, c
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------
|
||||||
|
HashAggregate
|
||||||
|
Group Key: a, b, c
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
-- last two plans keep using Group Aggregate, because 'd' is not covered
|
||||||
|
-- by the statistic and while it's NULL-only we assume 200 values for it
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: a, b, c, d
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b, c, d
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: b, c, d
|
||||||
|
-> Sort
|
||||||
|
Sort Key: b, c, d
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
TRUNCATE TABLE ndistinct;
|
||||||
|
-- under-estimates when using only per-column statistics
|
||||||
INSERT INTO ndistinct (a, b, c, filler1)
|
INSERT INTO ndistinct (a, b, c, filler1)
|
||||||
SELECT i/100, i/100, i/100, cash_words(i::money)
|
SELECT mod(i,50), mod(i,51), mod(i,32),
|
||||||
|
cash_words(mod(i,33)::int::money)
|
||||||
FROM generate_series(1,10000) s(i);
|
FROM generate_series(1,10000) s(i);
|
||||||
ANALYZE ndistinct;
|
ANALYZE ndistinct;
|
||||||
SELECT staenabled, standistinct
|
SELECT staenabled, standistinct
|
||||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||||
staenabled | standistinct
|
staenabled | standistinct
|
||||||
------------+------------------------------------------------------------------------------------------------
|
------------+----------------------------------------------------------------------------------------------------
|
||||||
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
|
{d} | [{(b 3 4), 2550.000000}, {(b 3 6), 800.000000}, {(b 4 6), 1632.000000}, {(b 3 4 6), 10000.000000}]
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- plans using Group Aggregate, thanks to using correct esimates
|
||||||
EXPLAIN (COSTS off)
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
-----------------------------
|
-----------------------------------
|
||||||
HashAggregate
|
GroupAggregate
|
||||||
Group Key: a, b
|
Group Key: a, b
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b
|
||||||
-> Seq Scan on ndistinct
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(5 rows)
|
||||||
|
|
||||||
EXPLAIN (COSTS off)
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
-----------------------------
|
-----------------------------------
|
||||||
HashAggregate
|
GroupAggregate
|
||||||
Group Key: a, b, c
|
Group Key: a, b, c
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b, c
|
||||||
-> Seq Scan on ndistinct
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(5 rows)
|
||||||
|
|
||||||
EXPLAIN (COSTS off)
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
GroupAggregate
|
||||||
|
Group Key: a, b, c, d
|
||||||
|
-> Sort
|
||||||
|
Sort Key: a, b, c, d
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
QUERY PLAN
|
||||||
-----------------------------
|
-----------------------------
|
||||||
HashAggregate
|
HashAggregate
|
||||||
Group Key: a, b, c, d
|
Group Key: b, c, d
|
||||||
-> Seq Scan on ndistinct
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
TRUNCATE TABLE ndistinct;
|
EXPLAIN (COSTS off)
|
||||||
-- partially correlated groups
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||||
INSERT INTO ndistinct (a, b, c)
|
QUERY PLAN
|
||||||
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
|
-----------------------------
|
||||||
ANALYZE ndistinct;
|
HashAggregate
|
||||||
|
Group Key: a, d
|
||||||
|
-> Seq Scan on ndistinct
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
DROP STATISTICS s10;
|
||||||
SELECT staenabled, standistinct
|
SELECT staenabled, standistinct
|
||||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||||
staenabled | standistinct
|
staenabled | standistinct
|
||||||
------------+------------------------------------------------------------------------------------------------
|
------------+--------------
|
||||||
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
|
(0 rows)
|
||||||
(1 row)
|
|
||||||
|
|
||||||
EXPLAIN
|
-- dropping the statistics switches the plans to Hash Aggregate,
|
||||||
|
-- due to under-estimates
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
---------------------------------------------------------------------
|
-----------------------------
|
||||||
HashAggregate (cost=230.00..232.01 rows=201 width=16)
|
HashAggregate
|
||||||
Group Key: a, b
|
Group Key: a, b
|
||||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
----------------------------------------------------------------------
|
-----------------------------
|
||||||
HashAggregate (cost=255.00..257.01 rows=201 width=20)
|
HashAggregate
|
||||||
Group Key: a, b, c
|
Group Key: a, b, c
|
||||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
----------------------------------------------------------------------
|
-----------------------------
|
||||||
HashAggregate (cost=280.00..290.00 rows=1000 width=24)
|
HashAggregate
|
||||||
Group Key: a, b, c, d
|
Group Key: a, b, c, d
|
||||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16)
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
----------------------------------------------------------------------
|
-----------------------------
|
||||||
HashAggregate (cost=255.00..265.00 rows=1000 width=20)
|
HashAggregate
|
||||||
Group Key: b, c, d
|
Group Key: b, c, d
|
||||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
---------------------------------------------------------------------
|
-----------------------------
|
||||||
HashAggregate (cost=230.00..240.00 rows=1000 width=16)
|
HashAggregate
|
||||||
Group Key: a, d
|
Group Key: a, d
|
||||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
|
-> Seq Scan on ndistinct
|
||||||
(3 rows)
|
(3 rows)
|
||||||
|
|
||||||
DROP TABLE ndistinct;
|
DROP TABLE ndistinct;
|
||||||
|
@ -1,155 +0,0 @@
|
|||||||
-- Generic extended statistics support
|
|
||||||
-- Ensure stats are dropped sanely
|
|
||||||
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
|
||||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
|
||||||
DROP STATISTICS ab1_a_b_stats;
|
|
||||||
CREATE SCHEMA regress_schema_2;
|
|
||||||
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
|
|
||||||
DROP STATISTICS regress_schema_2.ab1_a_b_stats;
|
|
||||||
-- Ensure statistics are dropped when columns are
|
|
||||||
CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
|
|
||||||
CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
|
|
||||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
|
||||||
ALTER TABLE ab1 DROP COLUMN a;
|
|
||||||
\d ab1
|
|
||||||
Table "public.ab1"
|
|
||||||
Column | Type | Collation | Nullable | Default
|
|
||||||
--------+---------+-----------+----------+---------
|
|
||||||
b | integer | | |
|
|
||||||
c | integer | | |
|
|
||||||
Statistics:
|
|
||||||
"public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
|
|
||||||
|
|
||||||
DROP TABLE ab1;
|
|
||||||
-- Ensure things work sanely with SET STATISTICS 0
|
|
||||||
CREATE TABLE ab1 (a INTEGER, b INTEGER);
|
|
||||||
ALTER TABLE ab1 ALTER a SET STATISTICS 0;
|
|
||||||
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
|
|
||||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
|
||||||
ANALYZE ab1;
|
|
||||||
ERROR: extended statistics could not be collected for column "a" of relation public.ab1
|
|
||||||
HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
|
|
||||||
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
|
|
||||||
ANALYZE ab1;
|
|
||||||
DROP TABLE ab1;
|
|
||||||
-- n-distinct tests
|
|
||||||
CREATE TABLE ndistinct (
|
|
||||||
filler1 TEXT,
|
|
||||||
filler2 NUMERIC,
|
|
||||||
a INT,
|
|
||||||
b INT,
|
|
||||||
filler3 DATE,
|
|
||||||
c INT,
|
|
||||||
d INT
|
|
||||||
);
|
|
||||||
-- unknown column
|
|
||||||
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
|
||||||
ERROR: column "unknown_column" referenced in statistics does not exist
|
|
||||||
-- single column
|
|
||||||
CREATE STATISTICS s10 ON (a) FROM ndistinct;
|
|
||||||
ERROR: statistics require at least 2 columns
|
|
||||||
-- single column, duplicated
|
|
||||||
CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
|
|
||||||
ERROR: duplicate column name in statistics definition
|
|
||||||
-- two columns, one duplicated
|
|
||||||
CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
|
|
||||||
ERROR: duplicate column name in statistics definition
|
|
||||||
-- correct command
|
|
||||||
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
|
||||||
-- perfectly correlated groups
|
|
||||||
INSERT INTO ndistinct (a, b, c, filler1)
|
|
||||||
SELECT i/100, i/100, i/100, cash_words(i::money)
|
|
||||||
FROM generate_series(1,10000) s(i);
|
|
||||||
ANALYZE ndistinct;
|
|
||||||
SELECT staenabled, standistinct
|
|
||||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
|
||||||
staenabled | standistinct
|
|
||||||
------------+------------------------------------------------------------------------------------------------
|
|
||||||
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
EXPLAIN (COSTS off)
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
|
||||||
QUERY PLAN
|
|
||||||
-----------------------------
|
|
||||||
HashAggregate
|
|
||||||
Group Key: a, b
|
|
||||||
-> Seq Scan on ndistinct
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
EXPLAIN (COSTS off)
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
|
||||||
QUERY PLAN
|
|
||||||
-----------------------------
|
|
||||||
HashAggregate
|
|
||||||
Group Key: a, b, c
|
|
||||||
-> Seq Scan on ndistinct
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
EXPLAIN (COSTS off)
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
|
||||||
QUERY PLAN
|
|
||||||
-----------------------------
|
|
||||||
HashAggregate
|
|
||||||
Group Key: a, b, c, d
|
|
||||||
-> Seq Scan on ndistinct
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
TRUNCATE TABLE ndistinct;
|
|
||||||
-- partially correlated groups
|
|
||||||
INSERT INTO ndistinct (a, b, c)
|
|
||||||
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
|
|
||||||
ANALYZE ndistinct;
|
|
||||||
SELECT staenabled, standistinct
|
|
||||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
|
||||||
staenabled | standistinct
|
|
||||||
------------+------------------------------------------------------------------------------------------------
|
|
||||||
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
EXPLAIN
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
|
||||||
QUERY PLAN
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
HashAggregate (cost=225.00..227.01 rows=201 width=16)
|
|
||||||
Group Key: a, b
|
|
||||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=8)
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
EXPLAIN
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
|
||||||
QUERY PLAN
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
HashAggregate (cost=250.00..252.01 rows=201 width=20)
|
|
||||||
Group Key: a, b, c
|
|
||||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=12)
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
EXPLAIN
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
|
||||||
QUERY PLAN
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
HashAggregate (cost=275.00..285.00 rows=1000 width=24)
|
|
||||||
Group Key: a, b, c, d
|
|
||||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=16)
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
EXPLAIN
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
|
||||||
QUERY PLAN
|
|
||||||
----------------------------------------------------------------------
|
|
||||||
HashAggregate (cost=250.00..260.00 rows=1000 width=20)
|
|
||||||
Group Key: b, c, d
|
|
||||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=12)
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
EXPLAIN
|
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
|
||||||
QUERY PLAN
|
|
||||||
---------------------------------------------------------------------
|
|
||||||
HashAggregate (cost=225.00..235.00 rows=1000 width=16)
|
|
||||||
Group Key: a, d
|
|
||||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=8)
|
|
||||||
(3 rows)
|
|
||||||
|
|
||||||
DROP TABLE ndistinct;
|
|
@ -1,5 +1,12 @@
|
|||||||
-- Generic extended statistics support
|
-- Generic extended statistics support
|
||||||
|
|
||||||
|
-- We will be checking execution plans without/with statistics, so
|
||||||
|
-- let's make sure we get simple non-parallel plans. Also set the
|
||||||
|
-- work_mem low so that we can use small amounts of data.
|
||||||
|
SET max_parallel_workers = 0;
|
||||||
|
SET max_parallel_workers_per_gather = 0;
|
||||||
|
SET work_mem = '128kB';
|
||||||
|
|
||||||
-- Ensure stats are dropped sanely
|
-- Ensure stats are dropped sanely
|
||||||
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
||||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||||
@ -43,6 +50,29 @@ CREATE TABLE ndistinct (
|
|||||||
d INT
|
d INT
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- over-estimates when using only per-column statistics
|
||||||
|
INSERT INTO ndistinct (a, b, c, filler1)
|
||||||
|
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
|
||||||
|
FROM generate_series(1,30000) s(i);
|
||||||
|
|
||||||
|
ANALYZE ndistinct;
|
||||||
|
|
||||||
|
-- Group Aggregate, due to over-estimate of the number of groups
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
|
||||||
-- unknown column
|
-- unknown column
|
||||||
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
||||||
|
|
||||||
@ -58,9 +88,35 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
|
|||||||
-- correct command
|
-- correct command
|
||||||
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
||||||
|
|
||||||
-- perfectly correlated groups
|
ANALYZE ndistinct;
|
||||||
|
|
||||||
|
SELECT staenabled, standistinct
|
||||||
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||||
|
|
||||||
|
-- Hash Aggregate, thanks to estimates improved by the statistic
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
|
|
||||||
|
-- last two plans keep using Group Aggregate, because 'd' is not covered
|
||||||
|
-- by the statistic and while it's NULL-only we assume 200 values for it
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
|
||||||
|
TRUNCATE TABLE ndistinct;
|
||||||
|
|
||||||
|
-- under-estimates when using only per-column statistics
|
||||||
INSERT INTO ndistinct (a, b, c, filler1)
|
INSERT INTO ndistinct (a, b, c, filler1)
|
||||||
SELECT i/100, i/100, i/100, cash_words(i::money)
|
SELECT mod(i,50), mod(i,51), mod(i,32),
|
||||||
|
cash_words(mod(i,33)::int::money)
|
||||||
FROM generate_series(1,10000) s(i);
|
FROM generate_series(1,10000) s(i);
|
||||||
|
|
||||||
ANALYZE ndistinct;
|
ANALYZE ndistinct;
|
||||||
@ -68,6 +124,7 @@ ANALYZE ndistinct;
|
|||||||
SELECT staenabled, standistinct
|
SELECT staenabled, standistinct
|
||||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||||
|
|
||||||
|
-- plans using Group Aggregate, thanks to using correct esimates
|
||||||
EXPLAIN (COSTS off)
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
|
|
||||||
@ -77,30 +134,32 @@ EXPLAIN (COSTS off)
|
|||||||
EXPLAIN (COSTS off)
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
|
|
||||||
TRUNCATE TABLE ndistinct;
|
EXPLAIN (COSTS off)
|
||||||
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
|
||||||
-- partially correlated groups
|
EXPLAIN (COSTS off)
|
||||||
INSERT INTO ndistinct (a, b, c)
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||||
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
|
|
||||||
|
|
||||||
ANALYZE ndistinct;
|
DROP STATISTICS s10;
|
||||||
|
|
||||||
SELECT staenabled, standistinct
|
SELECT staenabled, standistinct
|
||||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||||
|
|
||||||
EXPLAIN
|
-- dropping the statistics switches the plans to Hash Aggregate,
|
||||||
|
-- due to under-estimates
|
||||||
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||||
|
|
||||||
EXPLAIN
|
EXPLAIN (COSTS off)
|
||||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||||
|
|
||||||
DROP TABLE ndistinct;
|
DROP TABLE ndistinct;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user