1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-19 13:42:17 +03:00

Rework output format of pg_ndistinct

The existing format of pg_ndistinct uses a single-object JSON structure
where each key is itself a comma-separated list of attnums, like:
{"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}

While this is a very compact format, it is confusing to read and it is
difficult to manipulate the values within the object.

The new output format introduced in this commit is an array of objects,
with:
- A key named "attributes", that contains an array of attribute numbers.
- A key named "ndistinct", represented as an integer.

The values use the same underlying type as previously when printed, with
a new output format that shows now as follows:
[{"ndistinct": 11, "attributes": [3,4]},
 {"ndistinct": 11, "attributes": [3,6]},
 {"ndistinct": 11, "attributes": [4,6]},
 {"ndistinct": 11, "attributes": [3,4,6]}]

This new format will become handy for a follow-up set of changes, so as
it becomes possible to inject extended statistics rather than require an
ANALYZE, like in a dump/restore sequence or after pg_upgrade on a new
cluster.

This format has been suggested by Tomas Vondra.  The key names are
defined in a new header, to ease with the integration of
frontend-specific changes that are still under discussion.  (Personal
note: I am not specifically wedded to these key names, but if there are
better name suggestions for this release, feel free.)

The bulk of the changes come from the regression tests, where
jsonb_pretty() is now used to make the outputs generated easier to
parse.

Author: Corey Huinker <corey.huinker@gmail.com>
Reviewed-by: Jian He <jian.universality@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com
This commit is contained in:
Michael Paquier
2025-11-17 09:52:20 +09:00
parent 32b236644d
commit 1f927cce44
5 changed files with 223 additions and 37 deletions

View File

@@ -1576,12 +1576,42 @@ CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes;
ANALYZE zipcodes; ANALYZE zipcodes;
SELECT stxkeys AS k, stxdndistinct AS nd SELECT stxkeys AS k, jsonb_pretty(stxdndistinct::text::jsonb) AS nd
FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid) FROM pg_statistic_ext JOIN pg_statistic_ext_data on (oid = stxoid)
WHERE stxname = 'stts2'; WHERE stxname = 'stts2';
-[ RECORD 1 ]------------------------------------------------------&zwsp;-- -[ RECORD 1 ]-------------------
k | 1 2 5 k | 1 2 5
nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178} nd | [ +
| { +
| "ndistinct": 33178,+
| "attributes": [ +
| 1, +
| 2 +
| ] +
| }, +
| { +
| "ndistinct": 33178,+
| "attributes": [ +
| 1, +
| 5 +
| ] +
| }, +
| { +
| "ndistinct": 27435,+
| "attributes": [ +
| 2, +
| 5 +
| ] +
| }, +
| { +
| "ndistinct": 33178,+
| "attributes": [ +
| 1, +
| 2, +
| 5 +
| ] +
| } +
| ]
(1 row) (1 row)
</programlisting> </programlisting>
This indicates that there are three combinations of columns that This indicates that there are three combinations of columns that

View File

@@ -16,6 +16,7 @@
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "statistics/extended_stats_internal.h" #include "statistics/extended_stats_internal.h"
#include "statistics/statistics_format.h"
#include "utils/fmgrprotos.h" #include "utils/fmgrprotos.h"
@@ -51,26 +52,29 @@ pg_ndistinct_out(PG_FUNCTION_ARGS)
StringInfoData str; StringInfoData str;
initStringInfo(&str); initStringInfo(&str);
appendStringInfoChar(&str, '{'); appendStringInfoChar(&str, '[');
for (i = 0; i < ndist->nitems; i++) for (i = 0; i < ndist->nitems; i++)
{ {
int j;
MVNDistinctItem item = ndist->items[i]; MVNDistinctItem item = ndist->items[i];
if (i > 0) if (i > 0)
appendStringInfoString(&str, ", "); appendStringInfoString(&str, ", ");
for (j = 0; j < item.nattributes; j++) if (item.nattributes <= 0)
{ elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
AttrNumber attnum = item.attributes[j];
appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum); appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
} item.attributes[0]);
appendStringInfo(&str, "\": %d", (int) item.ndistinct);
for (int j = 1; j < item.nattributes; j++)
appendStringInfo(&str, ", %d", item.attributes[j]);
appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
(int) item.ndistinct);
} }
appendStringInfoChar(&str, '}'); appendStringInfoChar(&str, ']');
PG_RETURN_CSTRING(str.data); PG_RETURN_CSTRING(str.data);
} }

View File

@@ -0,0 +1,32 @@
/*-------------------------------------------------------------------------
*
* statistics_format.h
* Data related to the format of extended statistics, usable by both
* frontend and backend code.
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/include/statistics/statistics_format.h
*
*-------------------------------------------------------------------------
*/
#ifndef STATISTICS_FORMAT_H
#define STATISTICS_FORMAT_H
/* ----------
* pg_ndistinct in human-readable format is a JSON array made of elements with
* a predefined set of keys, like:
*
* [{"ndistinct": 11, "attributes": [3,4]},
* {"ndistinct": 11, "attributes": [3,6]},
* {"ndistinct": 11, "attributes": [4,6]},
* {"ndistinct": 11, "attributes": [3,4,6]},
* ... ]
* ----------
*/
#define PG_NDISTINCT_KEY_ATTRIBUTES "attributes"
#define PG_NDISTINCT_KEY_NDISTINCT "ndistinct"
#endif /* STATISTICS_FORMAT_H */

View File

@@ -196,7 +196,7 @@ Statistics objects:
"public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0 "public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0
ANALYZE ab1; ANALYZE ab1;
SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid) FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
WHERE s.stxname = 'ab1_a_b_stats'; WHERE s.stxname = 'ab1_a_b_stats';
stxname | stxdndistinct | stxddependencies | stxdmcv | stxdinherit stxname | stxdndistinct | stxddependencies | stxdmcv | stxdinherit
@@ -476,13 +476,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
-- correct command -- correct command
CREATE STATISTICS s10 ON a, b, c FROM ndistinct; CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
stxkind | stxdndistinct stxkind | stxdndistinct
---------+----------------------------------------------------- ---------+--------------------------
{d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} {d,f,m} | [ +
| { +
| "ndistinct": 11,+
| "attributes": [ +
| 3, +
| 4 +
| ] +
| }, +
| { +
| "ndistinct": 11,+
| "attributes": [ +
| 3, +
| 6 +
| ] +
| }, +
| { +
| "ndistinct": 11,+
| "attributes": [ +
| 4, +
| 6 +
| ] +
| }, +
| { +
| "ndistinct": 11,+
| "attributes": [ +
| 3, +
| 4, +
| 6 +
| ] +
| } +
| ]
(1 row) (1 row)
-- minor improvement, make sure the ctid does not break the matching -- minor improvement, make sure the ctid does not break the matching
@@ -558,13 +588,43 @@ INSERT INTO ndistinct (a, b, c, filler1)
mod(i,23) || ' dollars and zero cents' mod(i,23) || ' dollars and zero cents'
FROM generate_series(1,1000) s(i); FROM generate_series(1,1000) s(i);
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
stxkind | stxdndistinct stxkind | stxdndistinct
---------+---------------------------------------------------------- ---------+----------------------------
{d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000} {d,f,m} | [ +
| { +
| "ndistinct": 221, +
| "attributes": [ +
| 3, +
| 4 +
| ] +
| }, +
| { +
| "ndistinct": 247, +
| "attributes": [ +
| 3, +
| 6 +
| ] +
| }, +
| { +
| "ndistinct": 323, +
| "attributes": [ +
| 4, +
| 6 +
| ] +
| }, +
| { +
| "ndistinct": 1000,+
| "attributes": [ +
| 3, +
| 4, +
| 6 +
| ] +
| } +
| ]
(1 row) (1 row)
-- correct estimates -- correct estimates
@@ -623,7 +683,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
(1 row) (1 row)
DROP STATISTICS s10; DROP STATISTICS s10;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
@@ -707,13 +767,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct; CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
stxkind | stxdndistinct stxkind | stxdndistinct
---------+------------------------------------------------------------------- ---------+----------------------------
{d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000} {d,e} | [ +
| { +
| "ndistinct": 221, +
| "attributes": [ +
| -1, +
| -2 +
| ] +
| }, +
| { +
| "ndistinct": 247, +
| "attributes": [ +
| -1, +
| -3 +
| ] +
| }, +
| { +
| "ndistinct": 323, +
| "attributes": [ +
| -2, +
| -3 +
| ] +
| }, +
| { +
| "ndistinct": 1000,+
| "attributes": [ +
| -1, +
| -2, +
| -3 +
| ] +
| } +
| ]
(1 row) (1 row)
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
@@ -756,13 +846,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct; CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
stxkind | stxdndistinct stxkind | stxdndistinct
---------+------------------------------------------------------------- ---------+----------------------------
{d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000} {d,e} | [ +
| { +
| "ndistinct": 221, +
| "attributes": [ +
| 3, +
| 4 +
| ] +
| }, +
| { +
| "ndistinct": 247, +
| "attributes": [ +
| 3, +
| -1 +
| ] +
| }, +
| { +
| "ndistinct": 323, +
| "attributes": [ +
| 4, +
| -1 +
| ] +
| }, +
| { +
| "ndistinct": 1000,+
| "attributes": [ +
| 3, +
| 4, +
| -1 +
| ] +
| } +
| ]
(1 row) (1 row)
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');

View File

@@ -125,7 +125,7 @@ ALTER TABLE ab1 ALTER a SET STATISTICS -1;
ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0; ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
\d ab1 \d ab1
ANALYZE ab1; ANALYZE ab1;
SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid) FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
WHERE s.stxname = 'ab1_a_b_stats'; WHERE s.stxname = 'ab1_a_b_stats';
ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1; ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
@@ -297,7 +297,7 @@ CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
@@ -338,7 +338,7 @@ INSERT INTO ndistinct (a, b, c, filler1)
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
@@ -364,7 +364,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
DROP STATISTICS s10; DROP STATISTICS s10;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
@@ -399,7 +399,7 @@ CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;
@@ -423,7 +423,7 @@ CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
ANALYZE ndistinct; ANALYZE ndistinct;
SELECT s.stxkind, d.stxdndistinct SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d FROM pg_statistic_ext s, pg_statistic_ext_data d
WHERE s.stxrelid = 'ndistinct'::regclass WHERE s.stxrelid = 'ndistinct'::regclass
AND d.stxoid = s.oid; AND d.stxoid = s.oid;