mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-24 01:29:19 +03:00 
			
		
		
		
	Build inherited extended stats on partitioned tables
Commit859b3003dedisabled building of extended stats for inheritance trees, to prevent updating the same catalog row twice. While that resolved the issue, it also means there are no extended stats for declaratively partitioned tables, because there are no data in the non-leaf relations. That also means declaratively partitioned tables were not affected by the issue859b3003deaddressed, which means this is a regression affecting queries that calculate estimates for the whole inheritance tree as a whole (which includes e.g. GROUP BY queries). But because partitioned tables are empty, we can invert the condition and build statistics only for the case with inheritance, without losing anything. And we can consider them when calculating estimates. It may be necessary to run ANALYZE on partitioned tables, to collect proper statistics. For declarative partitioning there should no prior statistics, and it might take time before autoanalyze is triggered. For tables partitioned by inheritance the statistics may include data from child relations (if built859b3003de), contradicting the current code. Report and patch by Justin Pryzby, minor fixes and cleanup by me. Backpatch all the way back to PostgreSQL 10, where extended statistics were introduced (same as859b3003de). Author: Justin Pryzby Reported-by: Justin Pryzby Backpatch-through: 10 Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
This commit is contained in:
		| @@ -533,6 +533,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, | |||||||
| 	{ | 	{ | ||||||
| 		MemoryContext col_context, | 		MemoryContext col_context, | ||||||
| 					old_context; | 					old_context; | ||||||
|  | 		bool		build_ext_stats; | ||||||
|  |  | ||||||
| 		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, | 		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, | ||||||
| 									 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS); | 									 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS); | ||||||
| @@ -596,13 +597,28 @@ do_analyze_rel(Relation onerel, VacuumParams *params, | |||||||
| 							thisdata->attr_cnt, thisdata->vacattrstats); | 							thisdata->attr_cnt, thisdata->vacattrstats); | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		/* | ||||||
|  | 		 * Should we build extended statistics for this relation? | ||||||
|  | 		 * | ||||||
|  | 		 * The extended statistics catalog does not include an inheritance | ||||||
|  | 		 * flag, so we can't store statistics built both with and without | ||||||
|  | 		 * data from child relations. We can store just one set of statistics | ||||||
|  | 		 * per relation. For plain relations that's fine, but for inheritance | ||||||
|  | 		 * trees we have to pick whether to store statistics for just the | ||||||
|  | 		 * one relation or the whole tree. For plain inheritance we store | ||||||
|  | 		 * the (!inh) version, mostly for backwards compatibility reasons. | ||||||
|  | 		 * For partitioned tables that's pointless (the non-leaf tables are | ||||||
|  | 		 * always empty), so we store stats representing the whole tree. | ||||||
|  | 		 */ | ||||||
|  | 		build_ext_stats = (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ? inh : (!inh); | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Build extended statistics (if there are any). | 		 * Build extended statistics (if there are any). | ||||||
| 		 * | 		 * | ||||||
| 		 * For now we only build extended statistics on individual relations, | 		 * For now we only build extended statistics on individual relations, | ||||||
| 		 * not for relations representing inheritance trees. | 		 * not for relations representing inheritance trees. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (!inh) | 		if (build_ext_stats) | ||||||
| 			BuildRelationExtStatistics(onerel, totalrows, numrows, rows, | 			BuildRelationExtStatistics(onerel, totalrows, numrows, rows, | ||||||
| 									   attr_cnt, vacattrstats); | 									   attr_cnt, vacattrstats); | ||||||
| 	} | 	} | ||||||
|   | |||||||
| @@ -1217,10 +1217,13 @@ dependencies_clauselist_selectivity(PlannerInfo *root, | |||||||
| 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); | 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * When dealing with inheritance trees, ignore extended stats (which were | 	 * When dealing with regular inheritance trees, ignore extended stats | ||||||
| 	 * built without data from child rels, and thus do not represent them). | 	 * (which were built without data from child rels, and thus do not | ||||||
|  | 	 * represent them). For partitioned tables data there's no data in the | ||||||
|  | 	 * non-leaf relations, so we build stats only for the inheritance tree. | ||||||
|  | 	 * So for partitioned tables we do consider extended stats. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (rte->inh) | 	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) | ||||||
| 		return 1.0; | 		return 1.0; | ||||||
|  |  | ||||||
| 	/* check if there's any stats that might be useful for us. */ | 	/* check if there's any stats that might be useful for us. */ | ||||||
|   | |||||||
| @@ -1298,10 +1298,13 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli | |||||||
| 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); | 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * When dealing with inheritance trees, ignore extended stats (which were | 	 * When dealing with regular inheritance trees, ignore extended stats | ||||||
| 	 * built without data from child rels, and thus do not represent them). | 	 * (which were built without data from child rels, and thus do not | ||||||
|  | 	 * represent them). For partitioned tables data there's no data in the | ||||||
|  | 	 * non-leaf relations, so we build stats only for the inheritance tree. | ||||||
|  | 	 * So for partitioned tables we do consider extended stats. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (rte->inh) | 	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) | ||||||
| 		return 1.0; | 		return 1.0; | ||||||
|  |  | ||||||
| 	/* check if there's any stats that might be useful for us. */ | 	/* check if there's any stats that might be useful for us. */ | ||||||
|   | |||||||
| @@ -3887,19 +3887,23 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, | |||||||
| 	Oid			statOid = InvalidOid; | 	Oid			statOid = InvalidOid; | ||||||
| 	MVNDistinct *stats; | 	MVNDistinct *stats; | ||||||
| 	Bitmapset  *matched = NULL; | 	Bitmapset  *matched = NULL; | ||||||
| 	RangeTblEntry		*rte = planner_rt_fetch(rel->relid, root); | 	RangeTblEntry		*rte; | ||||||
|  |  | ||||||
| 	/* |  | ||||||
| 	 * When dealing with inheritance trees, ignore extended stats (which were |  | ||||||
| 	 * built without data from child rels, and thus do not represent them). |  | ||||||
| 	 */ |  | ||||||
| 	if (rte->inh) |  | ||||||
| 		return false; |  | ||||||
|  |  | ||||||
| 	/* bail out immediately if the table has no extended statistics */ | 	/* bail out immediately if the table has no extended statistics */ | ||||||
| 	if (!rel->statlist) | 	if (!rel->statlist) | ||||||
| 		return false; | 		return false; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * When dealing with regular inheritance trees, ignore extended stats | ||||||
|  | 	 * (which were built without data from child rels, and thus do not | ||||||
|  | 	 * represent them). For partitioned tables data there's no data in the | ||||||
|  | 	 * non-leaf relations, so we build stats only for the inheritance tree. | ||||||
|  | 	 * So for partitioned tables we do consider extended stats. | ||||||
|  | 	 */ | ||||||
|  | 	rte = planner_rt_fetch(rel->relid, root); | ||||||
|  | 	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) | ||||||
|  | 		return false; | ||||||
|  |  | ||||||
| 	/* Determine the attnums we're looking for */ | 	/* Determine the attnums we're looking for */ | ||||||
| 	foreach(lc, *varinfos) | 	foreach(lc, *varinfos) | ||||||
| 	{ | 	{ | ||||||
|   | |||||||
| @@ -202,6 +202,25 @@ SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b | |||||||
| (1 row) | (1 row) | ||||||
|  |  | ||||||
| DROP TABLE stxdinh, stxdinh1, stxdinh2; | DROP TABLE stxdinh, stxdinh1, stxdinh2; | ||||||
|  | -- Ensure inherited stats ARE applied to inherited query in partitioned table | ||||||
|  | CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i); | ||||||
|  | CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100); | ||||||
|  | INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a; | ||||||
|  | CREATE STATISTICS stxdinp ON a, b FROM stxdinp; | ||||||
|  | VACUUM ANALYZE stxdinp; -- partitions are processed recursively | ||||||
|  | SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass; | ||||||
|  |  ?column?  | ||||||
|  | ---------- | ||||||
|  |         1 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2'); | ||||||
|  |  estimated | actual  | ||||||
|  | -----------+-------- | ||||||
|  |         10 |     10 | ||||||
|  | (1 row) | ||||||
|  |  | ||||||
|  | DROP TABLE stxdinp; | ||||||
| -- Verify supported object types for extended statistics | -- Verify supported object types for extended statistics | ||||||
| CREATE schema tststats; | CREATE schema tststats; | ||||||
| CREATE TABLE tststats.t (a int, b int, c text); | CREATE TABLE tststats.t (a int, b int, c text); | ||||||
|   | |||||||
| @@ -128,6 +128,16 @@ SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); | |||||||
| SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); | SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); | ||||||
| DROP TABLE stxdinh, stxdinh1, stxdinh2; | DROP TABLE stxdinh, stxdinh1, stxdinh2; | ||||||
|  |  | ||||||
|  | -- Ensure inherited stats ARE applied to inherited query in partitioned table | ||||||
|  | CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i); | ||||||
|  | CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1) TO (100); | ||||||
|  | INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1, 999) a; | ||||||
|  | CREATE STATISTICS stxdinp ON a, b FROM stxdinp; | ||||||
|  | VACUUM ANALYZE stxdinp; -- partitions are processed recursively | ||||||
|  | SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass; | ||||||
|  | SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2'); | ||||||
|  | DROP TABLE stxdinp; | ||||||
|  |  | ||||||
| -- Verify supported object types for extended statistics | -- Verify supported object types for extended statistics | ||||||
| CREATE schema tststats; | CREATE schema tststats; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user