mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Add support for multivariate MCV lists
Introduce a third extended statistic type, supported by the CREATE STATISTICS command - MCV lists, a generalization of the statistic already built and used for individual columns. Compared to the already supported types (n-distinct coefficients and functional dependencies), MCV lists are more complex, include column values and allow estimation of much wider range of common clauses (equality and inequality conditions, IS NULL, IS NOT NULL etc.). Similarly to the other types, a new pseudo-type (pg_mcv_list) is used. Author: Tomas Vondra Reviewed-by: Dean Rasheed, David Rowley, Mark Dilger, Alvaro Herrera Discussion: https://postgr.es/m/dfdac334-9cf2-2597-fb27-f0fb3753f435@2ndquadrant.com
This commit is contained in:
@@ -324,6 +324,12 @@
|
||||
{ castsource => 'pg_dependencies', casttarget => 'text', castfunc => '0',
|
||||
castcontext => 'i', castmethod => 'i' },
|
||||
|
||||
# pg_mcv_list can be coerced to, but not from, bytea and text
|
||||
{ castsource => 'pg_mcv_list', casttarget => 'bytea', castfunc => '0',
|
||||
castcontext => 'i', castmethod => 'b' },
|
||||
{ castsource => 'pg_mcv_list', casttarget => 'text', castfunc => '0',
|
||||
castcontext => 'i', castmethod => 'i' },
|
||||
|
||||
# Datetime category
|
||||
{ castsource => 'date', casttarget => 'timestamp',
|
||||
castfunc => 'timestamp(date)', castcontext => 'i', castmethod => 'f' },
|
||||
|
@@ -4999,6 +4999,30 @@
|
||||
proname => 'pg_dependencies_send', provolatile => 's', prorettype => 'bytea',
|
||||
proargtypes => 'pg_dependencies', prosrc => 'pg_dependencies_send' },
|
||||
|
||||
{ oid => '5018', descr => 'I/O',
|
||||
proname => 'pg_mcv_list_in', prorettype => 'pg_mcv_list',
|
||||
proargtypes => 'cstring', prosrc => 'pg_mcv_list_in' },
|
||||
{ oid => '5019', descr => 'I/O',
|
||||
proname => 'pg_mcv_list_out', prorettype => 'cstring',
|
||||
proargtypes => 'pg_mcv_list', prosrc => 'pg_mcv_list_out' },
|
||||
{ oid => '5020', descr => 'I/O',
|
||||
proname => 'pg_mcv_list_recv', provolatile => 's',
|
||||
prorettype => 'pg_mcv_list', proargtypes => 'internal',
|
||||
prosrc => 'pg_mcv_list_recv' },
|
||||
{ oid => '5021', descr => 'I/O',
|
||||
proname => 'pg_mcv_list_send', provolatile => 's', prorettype => 'bytea',
|
||||
proargtypes => 'pg_mcv_list', prosrc => 'pg_mcv_list_send' },
|
||||
|
||||
{ oid => '3427',
|
||||
descr => 'details about MCV list items',
|
||||
proname => 'pg_mcv_list_items', prorows => '1000', proisstrict => 't',
|
||||
proretset => 't', provolatile => 's', prorettype => 'record',
|
||||
proargtypes => 'pg_mcv_list',
|
||||
proallargtypes => '{pg_mcv_list,int4,text,_bool,float8,float8}',
|
||||
proargmodes => '{i,o,o,o,o,o}',
|
||||
proargnames => '{mcv_list,index,values,nulls,frequency,base_frequency}',
|
||||
prosrc => 'pg_stats_ext_mcvlist_items' },
|
||||
|
||||
{ oid => '1928', descr => 'statistics: number of scans done for table/index',
|
||||
proname => 'pg_stat_get_numscans', provolatile => 's', proparallel => 'r',
|
||||
prorettype => 'int8', proargtypes => 'oid',
|
||||
|
@@ -49,6 +49,7 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId)
|
||||
* to build */
|
||||
pg_ndistinct stxndistinct; /* ndistinct coefficients (serialized) */
|
||||
pg_dependencies stxdependencies; /* dependencies (serialized) */
|
||||
pg_mcv_list stxmcv; /* MCV (serialized) */
|
||||
#endif
|
||||
|
||||
} FormData_pg_statistic_ext;
|
||||
@@ -64,6 +65,7 @@ typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
|
||||
|
||||
#define STATS_EXT_NDISTINCT 'd'
|
||||
#define STATS_EXT_DEPENDENCIES 'f'
|
||||
#define STATS_EXT_MCV 'm'
|
||||
|
||||
#endif /* EXPOSE_TO_CLIENT_CODE */
|
||||
|
||||
|
@@ -165,6 +165,13 @@
|
||||
typoutput => 'pg_dependencies_out', typreceive => 'pg_dependencies_recv',
|
||||
typsend => 'pg_dependencies_send', typalign => 'i', typstorage => 'x',
|
||||
typcollation => 'default' },
|
||||
{ oid => '5017', oid_symbol => 'PGMCVLISTOID',
|
||||
descr => 'multivariate MCV list',
|
||||
typname => 'pg_mcv_list', typlen => '-1', typbyval => 'f',
|
||||
typcategory => 'S', typinput => 'pg_mcv_list_in',
|
||||
typoutput => 'pg_mcv_list_out', typreceive => 'pg_mcv_list_recv',
|
||||
typsend => 'pg_mcv_list_send', typalign => 'i', typstorage => 'x',
|
||||
typcollation => 'default' },
|
||||
{ oid => '32', oid_symbol => 'PGDDLCOMMANDOID',
|
||||
descr => 'internal type for passing CollectedCommand',
|
||||
typname => 'pg_ddl_command', typlen => 'SIZEOF_POINTER', typbyval => 't',
|
||||
|
@@ -87,6 +87,7 @@ extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b);
|
||||
extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
|
||||
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
|
||||
extern bool bms_is_member(int x, const Bitmapset *a);
|
||||
extern int bms_member_index(Bitmapset *a, int x);
|
||||
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
|
||||
extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
|
||||
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
|
||||
|
@@ -58,6 +58,12 @@ extern Selectivity clause_selectivity(PlannerInfo *root,
|
||||
int varRelid,
|
||||
JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo);
|
||||
extern Selectivity clauselist_selectivity_simple(PlannerInfo *root,
|
||||
List *clauses,
|
||||
int varRelid,
|
||||
JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo,
|
||||
Bitmapset *estimatedclauses);
|
||||
extern Selectivity clauselist_selectivity(PlannerInfo *root,
|
||||
List *clauses,
|
||||
int varRelid,
|
||||
|
@@ -31,6 +31,15 @@ typedef struct
|
||||
int tupno; /* position index for tuple it came from */
|
||||
} ScalarItem;
|
||||
|
||||
/* (de)serialization info */
|
||||
typedef struct DimensionInfo
|
||||
{
|
||||
int nvalues; /* number of deduplicated values */
|
||||
int nbytes; /* number of bytes (serialized) */
|
||||
int typlen; /* pg_type.typlen */
|
||||
bool typbyval; /* pg_type.typbyval */
|
||||
} DimensionInfo;
|
||||
|
||||
/* multi-sort */
|
||||
typedef struct MultiSortSupportData
|
||||
{
|
||||
@@ -44,6 +53,7 @@ typedef struct SortItem
|
||||
{
|
||||
Datum *values;
|
||||
bool *isnull;
|
||||
int count;
|
||||
} SortItem;
|
||||
|
||||
extern MVNDistinct *statext_ndistinct_build(double totalrows,
|
||||
@@ -57,6 +67,12 @@ extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
|
||||
extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
|
||||
extern MVDependencies *statext_dependencies_deserialize(bytea *data);
|
||||
|
||||
extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
|
||||
Bitmapset *attrs, VacAttrStats **stats,
|
||||
double totalrows);
|
||||
extern bytea *statext_mcv_serialize(MCVList * mcv, VacAttrStats **stats);
|
||||
extern MCVList * statext_mcv_deserialize(bytea *data);
|
||||
|
||||
extern MultiSortSupport multi_sort_init(int ndims);
|
||||
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
|
||||
Oid oper, Oid collation);
|
||||
@@ -65,5 +81,29 @@ extern int multi_sort_compare_dim(int dim, const SortItem *a,
|
||||
const SortItem *b, MultiSortSupport mss);
|
||||
extern int multi_sort_compare_dims(int start, int end, const SortItem *a,
|
||||
const SortItem *b, MultiSortSupport mss);
|
||||
extern int compare_scalars_simple(const void *a, const void *b, void *arg);
|
||||
extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
|
||||
|
||||
extern void *bsearch_arg(const void *key, const void *base,
|
||||
size_t nmemb, size_t size,
|
||||
int (*compar) (const void *, const void *, void *),
|
||||
void *arg);
|
||||
|
||||
extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
|
||||
|
||||
extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
|
||||
TupleDesc tdesc, MultiSortSupport mss,
|
||||
int numattrs, AttrNumber *attnums);
|
||||
|
||||
|
||||
extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
|
||||
StatisticExtInfo *stat,
|
||||
List *clauses,
|
||||
int varRelid,
|
||||
JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo,
|
||||
RelOptInfo *rel,
|
||||
Selectivity *basesel,
|
||||
Selectivity *totalsel);
|
||||
|
||||
#endif /* EXTENDED_STATS_INTERNAL_H */
|
||||
|
@@ -78,8 +78,41 @@ typedef struct MVDependencies
|
||||
/* size of the struct excluding the deps array */
|
||||
#define SizeOfDependencies (offsetof(MVDependencies, ndeps) + sizeof(uint32))
|
||||
|
||||
/* used to flag stats serialized to bytea */
|
||||
#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized bytea */
|
||||
#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */
|
||||
|
||||
/* max items in MCV list (mostly arbitrary number) */
|
||||
#define STATS_MCVLIST_MAX_ITEMS 8192
|
||||
|
||||
/*
|
||||
* Multivariate MCV (most-common value) lists
|
||||
*
|
||||
* A straightforward extension of MCV items - i.e. a list (array) of
|
||||
* combinations of attribute values, together with a frequency and null flags.
|
||||
*/
|
||||
typedef struct MCVItem
|
||||
{
|
||||
double frequency; /* frequency of this combination */
|
||||
double base_frequency; /* frequency if independent */
|
||||
bool *isnull; /* NULL flags */
|
||||
Datum *values; /* item values */
|
||||
} MCVItem;
|
||||
|
||||
/* multivariate MCV list - essentally an array of MCV items */
|
||||
typedef struct MCVList
|
||||
{
|
||||
uint32 magic; /* magic constant marker */
|
||||
uint32 type; /* type of MCV list (BASIC) */
|
||||
uint32 nitems; /* number of MCV items in the array */
|
||||
AttrNumber ndimensions; /* number of dimensions */
|
||||
Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */
|
||||
MCVItem **items; /* array of MCV items */
|
||||
} MCVList;
|
||||
|
||||
extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
|
||||
extern MVDependencies *statext_dependencies_load(Oid mvoid);
|
||||
extern MCVList *statext_mcv_load(Oid mvoid);
|
||||
|
||||
extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
|
||||
int numrows, HeapTuple *rows,
|
||||
@@ -92,6 +125,13 @@ extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root,
|
||||
SpecialJoinInfo *sjinfo,
|
||||
RelOptInfo *rel,
|
||||
Bitmapset **estimatedclauses);
|
||||
extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
|
||||
List *clauses,
|
||||
int varRelid,
|
||||
JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo,
|
||||
RelOptInfo *rel,
|
||||
Bitmapset **estimatedclauses);
|
||||
extern bool has_stats_of_kind(List *stats, char requiredkind);
|
||||
extern StatisticExtInfo *choose_best_statistics(List *stats,
|
||||
Bitmapset *attnums, char requiredkind);
|
||||
|
Reference in New Issue
Block a user