1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Rework the pg_statistic_ext catalog

Since extended statistic got introduced in PostgreSQL 10, there was a
single catalog pg_statistic_ext storing both the definitions and built
statistic.  That's however problematic when a user is supposed to have
access only to the definitions, but not to user data.

Consider for example pg_dump on a database with RLS enabled - if the
pg_statistic_ext catalog respects RLS (which it should, if it contains
user data), pg_dump would not see any records and the result would not
define any extended statistics.  That would be a surprising behavior.

Until now this was not a pressing issue, because the existing types of
extended statistic (functional dependencies and ndistinct coefficients)
do not include any user data directly.  This changed with introduction
of MCV lists, which do include most common combinations of values.

The easiest way to fix this is to split the pg_statistic_ext catalog
into two - one for definitions, one for the built statistic values.
The new catalog is called pg_statistic_ext_data, and we're maintaining
a 1:1 relationship with the old catalog - either there are matching
records in both catalogs, or neither of them.

Bumped CATVERSION due to changing system catalog definitions.

Author: Dean Rasheed, with improvements by me
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAEZATCUhT9rt7Ui%3DVdx4N%3D%3DVV5XOK5dsXfnGgVOz_JhAicB%3DZA%40mail.gmail.com
This commit is contained in:
Tomas Vondra
2019-06-13 17:19:21 +02:00
parent e3846a00c2
commit 6cbfb784c3
25 changed files with 337 additions and 118 deletions

View File

@ -985,6 +985,14 @@ WHERE stxowner != 0 AND
------+----------
(0 rows)
SELECT ctid, stxoid
FROM pg_catalog.pg_statistic_ext_data fk
WHERE stxoid != 0 AND
NOT EXISTS(SELECT 1 FROM pg_catalog.pg_statistic_ext pk WHERE pk.oid = fk.stxoid);
ctid | stxoid
------+--------
(0 rows)
SELECT ctid, spcowner
FROM pg_catalog.pg_tablespace fk
WHERE spcowner != 0 AND

View File

@ -149,6 +149,7 @@ pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
pg_statistic_ext|t
pg_statistic_ext_data|t
pg_subscription|t
pg_subscription_rel|t
pg_tablespace|t

View File

@ -199,9 +199,11 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c
-- correct command
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
stxkind | stxndistinct
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid;
stxkind | stxdndistinct
---------+-----------------------------------------------------
{d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
(1 row)
@ -246,9 +248,11 @@ INSERT INTO ndistinct (a, b, c, filler1)
cash_words(mod(i,33)::int::money)
FROM generate_series(1,5000) s(i);
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
stxkind | stxndistinct
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid;
stxkind | stxdndistinct
---------+------------------------------------------------------------
{d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 5000}
(1 row)
@ -285,10 +289,12 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d
(1 row)
DROP STATISTICS s10;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
stxkind | stxndistinct
---------+--------------
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid;
stxkind | stxdndistinct
---------+---------------
(0 rows)
-- dropping the statistics results in under-estimates
@ -537,7 +543,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
-- check change of unrelated column type does not reset the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
SELECT stxmcv IS NOT NULL FROM pg_statistic_ext WHERE stxname = 'mcv_lists_stats';
SELECT d.stxdmcv IS NOT NULL
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxname = 'mcv_lists_stats'
AND d.stxoid = s.oid;
?column?
----------
t
@ -600,8 +609,11 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
TRUNCATE mcv_lists;
INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i);
ANALYZE mcv_lists;
SELECT m.* FROM pg_statistic_ext,
pg_mcv_list_items(stxmcv) m WHERE stxname = 'mcv_lists_stats';
SELECT m.*
FROM pg_statistic_ext s, pg_statistic_ext_data d,
pg_mcv_list_items(d.stxdmcv) m
WHERE s.stxname = 'mcv_lists_stats'
AND d.stxoid = s.oid;
index | values | nulls | frequency | base_frequency
-------+-----------+---------+-----------+----------------
0 | {1, 2, 3} | {f,f,f} | 1 | 1

View File

@ -493,6 +493,10 @@ SELECT ctid, stxowner
FROM pg_catalog.pg_statistic_ext fk
WHERE stxowner != 0 AND
NOT EXISTS(SELECT 1 FROM pg_catalog.pg_authid pk WHERE pk.oid = fk.stxowner);
SELECT ctid, stxoid
FROM pg_catalog.pg_statistic_ext_data fk
WHERE stxoid != 0 AND
NOT EXISTS(SELECT 1 FROM pg_catalog.pg_statistic_ext pk WHERE pk.oid = fk.stxoid);
SELECT ctid, spcowner
FROM pg_catalog.pg_tablespace fk
WHERE spcowner != 0 AND

View File

@ -144,8 +144,10 @@ CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid;
-- Hash Aggregate, thanks to estimates improved by the statistic
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
@ -170,8 +172,10 @@ INSERT INTO ndistinct (a, b, c, filler1)
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid;
-- correct esimates
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
@ -186,8 +190,10 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d
DROP STATISTICS s10;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid;
-- dropping the statistics results in under-estimates
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
@ -335,7 +341,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
-- check change of unrelated column type does not reset the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
SELECT stxmcv IS NOT NULL FROM pg_statistic_ext WHERE stxname = 'mcv_lists_stats';
SELECT d.stxdmcv IS NOT NULL
FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxname = 'mcv_lists_stats'
AND d.stxoid = s.oid;
-- check change of column type resets the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric;
@ -378,8 +387,11 @@ TRUNCATE mcv_lists;
INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i);
ANALYZE mcv_lists;
SELECT m.* FROM pg_statistic_ext,
pg_mcv_list_items(stxmcv) m WHERE stxname = 'mcv_lists_stats';
SELECT m.*
FROM pg_statistic_ext s, pg_statistic_ext_data d,
pg_mcv_list_items(d.stxdmcv) m
WHERE s.stxname = 'mcv_lists_stats'
AND d.stxoid = s.oid;
-- mcv with arrays
CREATE TABLE mcv_lists_arrays (