mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Improve statistics estimation for single-column GROUP BY in sub-queries
This commit follows the idea of the 4767bc8ff2
. If sub-query has only one
GROUP BY column, we can consider its output variable as being unique. We can
employ this fact in the statistics to make more precise estimations in the
upper query block.
Author: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
This commit is contained in:
@ -322,10 +322,10 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we matched the var to a unique index or DISTINCT clause, assume
|
* If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
|
||||||
* there is exactly one match regardless of anything else. (This is
|
* assume there is exactly one match regardless of anything else. (This
|
||||||
* slightly bogus, since the index or clause's equality operator might be
|
* is slightly bogus, since the index or clause's equality operator might
|
||||||
* different from ours, but it's much more likely to be right than
|
* be different from ours, but it's much more likely to be right than
|
||||||
* ignoring the information.)
|
* ignoring the information.)
|
||||||
*/
|
*/
|
||||||
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
|
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
|
||||||
@ -484,10 +484,10 @@ var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we matched the var to a unique index or DISTINCT clause, assume
|
* If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
|
||||||
* there is exactly one match regardless of anything else. (This is
|
* assume there is exactly one match regardless of anything else. (This
|
||||||
* slightly bogus, since the index or clause's equality operator might be
|
* is slightly bogus, since the index or clause's equality operator might
|
||||||
* different from ours, but it's much more likely to be right than
|
* be different from ours, but it's much more likely to be right than
|
||||||
* ignoring the information.)
|
* ignoring the information.)
|
||||||
*/
|
*/
|
||||||
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
|
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
|
||||||
@ -5018,11 +5018,11 @@ ReleaseDummy(HeapTuple tuple)
|
|||||||
* atttype, atttypmod: actual type/typmod of the "var" expression. This is
|
* atttype, atttypmod: actual type/typmod of the "var" expression. This is
|
||||||
* commonly the same as the exposed type of the variable argument,
|
* commonly the same as the exposed type of the variable argument,
|
||||||
* but can be different in binary-compatible-type cases.
|
* but can be different in binary-compatible-type cases.
|
||||||
* isunique: true if we were able to match the var to a unique index or a
|
* isunique: true if we were able to match the var to a unique index, a
|
||||||
* single-column DISTINCT clause, implying its values are unique for
|
* single-column DISTINCT or GROUP-BY clause, implying its values are
|
||||||
* this query. (Caution: this should be trusted for statistical
|
* unique for this query. (Caution: this should be trusted for
|
||||||
* purposes only, since we do not check indimmediate nor verify that
|
* statistical purposes only, since we do not check indimmediate nor
|
||||||
* the exact same definition of equality applies.)
|
* verify that the exact same definition of equality applies.)
|
||||||
* acl_ok: true if current user has permission to read the column(s)
|
* acl_ok: true if current user has permission to read the column(s)
|
||||||
* underlying the pg_statistic entry. This is consulted by
|
* underlying the pg_statistic entry. This is consulted by
|
||||||
* statistic_proc_security_check().
|
* statistic_proc_security_check().
|
||||||
@ -5680,15 +5680,14 @@ examine_simple_variable(PlannerInfo *root, Var *var,
|
|||||||
Assert(IsA(subquery, Query));
|
Assert(IsA(subquery, Query));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Punt if subquery uses set operations or GROUP BY, as these will
|
* Punt if subquery uses set operations or grouping sets, as these
|
||||||
* mash underlying columns' stats beyond recognition. (Set ops are
|
* will mash underlying columns' stats beyond recognition. (Set ops
|
||||||
* particularly nasty; if we forged ahead, we would return stats
|
* are particularly nasty; if we forged ahead, we would return stats
|
||||||
* relevant to only the leftmost subselect...) DISTINCT is also
|
* relevant to only the leftmost subselect...) DISTINCT is also
|
||||||
* problematic, but we check that later because there is a possibility
|
* problematic, but we check that later because there is a possibility
|
||||||
* of learning something even with it.
|
* of learning something even with it.
|
||||||
*/
|
*/
|
||||||
if (subquery->setOperations ||
|
if (subquery->setOperations ||
|
||||||
subquery->groupClause ||
|
|
||||||
subquery->groupingSets)
|
subquery->groupingSets)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -5718,6 +5717,16 @@ examine_simple_variable(PlannerInfo *root, Var *var,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The same idea as with DISTINCT clause works for a GROUP-BY too */
|
||||||
|
if (subquery->groupClause)
|
||||||
|
{
|
||||||
|
if (list_length(subquery->groupClause) == 1 &&
|
||||||
|
targetIsInSortList(ste, InvalidOid, subquery->groupClause))
|
||||||
|
vardata->isunique = true;
|
||||||
|
/* cannot go further */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the sub-query originated from a view with the security_barrier
|
* If the sub-query originated from a view with the security_barrier
|
||||||
* attribute, we must not look at the variable's statistics, though it
|
* attribute, we must not look at the variable's statistics, though it
|
||||||
@ -5869,11 +5878,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a unique index or DISTINCT clause for the variable, assume
|
* If there is a unique index, DISTINCT or GROUP-BY clause for the
|
||||||
* it is unique no matter what pg_statistic says; the statistics could be
|
* variable, assume it is unique no matter what pg_statistic says; the
|
||||||
* out of date, or we might have found a partial unique index that proves
|
* statistics could be out of date, or we might have found a partial
|
||||||
* the var is unique for this query. However, we'd better still believe
|
* unique index that proves the var is unique for this query. However,
|
||||||
* the null-fraction statistic.
|
* we'd better still believe the null-fraction statistic.
|
||||||
*/
|
*/
|
||||||
if (vardata->isunique)
|
if (vardata->isunique)
|
||||||
stadistinct = -1.0 * (1.0 - stanullfrac);
|
stadistinct = -1.0 * (1.0 - stanullfrac);
|
||||||
|
@ -94,7 +94,8 @@ typedef struct VariableStatData
|
|||||||
Oid vartype; /* exposed type of expression */
|
Oid vartype; /* exposed type of expression */
|
||||||
Oid atttype; /* actual type (after stripping relabel) */
|
Oid atttype; /* actual type (after stripping relabel) */
|
||||||
int32 atttypmod; /* actual typmod (after stripping relabel) */
|
int32 atttypmod; /* actual typmod (after stripping relabel) */
|
||||||
bool isunique; /* matches unique index or DISTINCT clause */
|
bool isunique; /* matches unique index, DISTINCT or GROUP-BY
|
||||||
|
* clause */
|
||||||
bool acl_ok; /* result of ACL check on table or column */
|
bool acl_ok; /* result of ACL check on table or column */
|
||||||
} VariableStatData;
|
} VariableStatData;
|
||||||
|
|
||||||
|
@ -3368,3 +3368,18 @@ NOTICE: drop cascades to 2 other objects
|
|||||||
DETAIL: drop cascades to table tststats.priv_test_tbl
|
DETAIL: drop cascades to table tststats.priv_test_tbl
|
||||||
drop cascades to view tststats.priv_test_view
|
drop cascades to view tststats.priv_test_view
|
||||||
DROP USER regress_stats_user1;
|
DROP USER regress_stats_user1;
|
||||||
|
CREATE TABLE grouping_unique (x integer);
|
||||||
|
INSERT INTO grouping_unique (x) SELECT gs FROM generate_series(1,1000) AS gs;
|
||||||
|
ANALYZE grouping_unique;
|
||||||
|
-- Optimiser treat GROUP-BY operator as an 'uniqueser' of the input
|
||||||
|
SELECT * FROM check_estimated_rows('
|
||||||
|
SELECT * FROM generate_series(1, 1) t1 LEFT JOIN (
|
||||||
|
SELECT x FROM grouping_unique t2 GROUP BY x) AS q1
|
||||||
|
ON t1.t1 = q1.x;
|
||||||
|
');
|
||||||
|
estimated | actual
|
||||||
|
-----------+--------
|
||||||
|
1 | 1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP TABLE grouping_unique;
|
||||||
|
@ -1707,3 +1707,15 @@ RESET SESSION AUTHORIZATION;
|
|||||||
DROP TABLE stats_ext_tbl;
|
DROP TABLE stats_ext_tbl;
|
||||||
DROP SCHEMA tststats CASCADE;
|
DROP SCHEMA tststats CASCADE;
|
||||||
DROP USER regress_stats_user1;
|
DROP USER regress_stats_user1;
|
||||||
|
|
||||||
|
CREATE TABLE grouping_unique (x integer);
|
||||||
|
INSERT INTO grouping_unique (x) SELECT gs FROM generate_series(1,1000) AS gs;
|
||||||
|
ANALYZE grouping_unique;
|
||||||
|
|
||||||
|
-- Optimiser treat GROUP-BY operator as an 'uniqueser' of the input
|
||||||
|
SELECT * FROM check_estimated_rows('
|
||||||
|
SELECT * FROM generate_series(1, 1) t1 LEFT JOIN (
|
||||||
|
SELECT x FROM grouping_unique t2 GROUP BY x) AS q1
|
||||||
|
ON t1.t1 = q1.x;
|
||||||
|
');
|
||||||
|
DROP TABLE grouping_unique;
|
||||||
|
Reference in New Issue
Block a user