mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Repair error noticed by Roberto Cornacchia: selectivity code
was rejecting negative attnums as bogus, which of course they are not. Add code to get_attdisbursion to produce a useful value for OID attribute, since VACUUM does not store stats for system attributes. Also, repair bug that's been in eqjoinsel for a long time: it was taking the max of the two columns' disbursions, whereas it should use the min.
This commit is contained in:
parent
45500964f6
commit
51db6455ea
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.25 1999/07/25 23:07:24 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.26 1999/09/09 02:35:47 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -230,17 +230,7 @@ compute_clause_selec(Query *root, Node *clause)
|
|||||||
int flag;
|
int flag;
|
||||||
|
|
||||||
get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
|
get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
|
||||||
if (relidx <= 0 || attno <= 0)
|
if (relidx && attno)
|
||||||
{
|
|
||||||
/*
|
|
||||||
* attno can be Invalid if the clause had a function in it,
|
|
||||||
* i.e. WHERE myFunc(f) = 10
|
|
||||||
*
|
|
||||||
* XXX should be FIXED to use function selectivity
|
|
||||||
*/
|
|
||||||
s1 = (Cost) (0.5);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
s1 = (Cost) restriction_selectivity(oprrest,
|
s1 = (Cost) restriction_selectivity(oprrest,
|
||||||
opno,
|
opno,
|
||||||
getrelid(relidx,
|
getrelid(relidx,
|
||||||
@ -248,6 +238,16 @@ compute_clause_selec(Query *root, Node *clause)
|
|||||||
attno,
|
attno,
|
||||||
constval,
|
constval,
|
||||||
flag);
|
flag);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* attno can be 0 if the clause had a function in it,
|
||||||
|
* i.e. WHERE myFunc(f) = 10
|
||||||
|
*
|
||||||
|
* XXX should be FIXED to use function selectivity
|
||||||
|
*/
|
||||||
|
s1 = (Cost) (0.5);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -274,7 +274,8 @@ compute_clause_selec(Query *root, Node *clause)
|
|||||||
attno2;
|
attno2;
|
||||||
|
|
||||||
get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2);
|
get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2);
|
||||||
if (relid1 > 0 && relid2 > 0 && attno1 > 0 && attno2 > 0)
|
if (relid1 && relid2 && attno1 && attno2)
|
||||||
|
|
||||||
s1 = (Cost) join_selectivity(oprjoin,
|
s1 = (Cost) join_selectivity(oprjoin,
|
||||||
opno,
|
opno,
|
||||||
getrelid(relid1,
|
getrelid(relid1,
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.50 1999/08/26 05:09:05 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.51 1999/09/09 02:35:53 tgl Exp $
|
||||||
*
|
*
|
||||||
* HISTORY
|
* HISTORY
|
||||||
* AUTHOR DATE MAJOR EVENT
|
* AUTHOR DATE MAJOR EVENT
|
||||||
@ -116,7 +116,10 @@ make_opclause(Oper *op, Var *leftop, Var *rightop)
|
|||||||
*
|
*
|
||||||
* Returns the left operand of a clause of the form (op expr expr)
|
* Returns the left operand of a clause of the form (op expr expr)
|
||||||
* or (op expr)
|
* or (op expr)
|
||||||
* NB: it is assumed (for now) that all expr must be Var nodes
|
*
|
||||||
|
* NB: for historical reasons, the result is declared Var *, even
|
||||||
|
* though many callers can cope with results that are not Vars.
|
||||||
|
* The result really ought to be declared Expr * or Node *.
|
||||||
*/
|
*/
|
||||||
Var *
|
Var *
|
||||||
get_leftop(Expr *clause)
|
get_leftop(Expr *clause)
|
||||||
@ -549,8 +552,11 @@ NumRelids(Node *clause)
|
|||||||
* if the "something" is a constant, the value of the constant
|
* if the "something" is a constant, the value of the constant
|
||||||
* flags indicating whether a constant was found, and on which side.
|
* flags indicating whether a constant was found, and on which side.
|
||||||
* Default values are returned if the expression is too complicated,
|
* Default values are returned if the expression is too complicated,
|
||||||
* specifically -1 for the relid and attno, 0 for the constant value.
|
* specifically 0 for the relid and attno, 0 for the constant value.
|
||||||
* Note that InvalidAttrNumber is *not* -1, but 0.
|
*
|
||||||
|
* Note that negative attno values are *not* invalid, but represent
|
||||||
|
* system attributes such as OID. It's sufficient to check for relid=0
|
||||||
|
* to determine whether the routine succeeded.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
get_relattval(Node *clause,
|
get_relattval(Node *clause,
|
||||||
@ -610,8 +616,8 @@ get_relattval(Node *clause,
|
|||||||
{
|
{
|
||||||
/* Duh, it's too complicated for me... */
|
/* Duh, it's too complicated for me... */
|
||||||
default_results:
|
default_results:
|
||||||
*relid = -1;
|
*relid = 0;
|
||||||
*attno = -1;
|
*attno = 0;
|
||||||
*constval = 0;
|
*constval = 0;
|
||||||
*flag = 0;
|
*flag = 0;
|
||||||
return;
|
return;
|
||||||
@ -663,7 +669,7 @@ static int is_single_func(Node *node)
|
|||||||
* for a joinclause.
|
* for a joinclause.
|
||||||
*
|
*
|
||||||
* If the clause is not of the form (var op var) or if any of the vars
|
* If the clause is not of the form (var op var) or if any of the vars
|
||||||
* refer to nested attributes, then -1's are returned.
|
* refer to nested attributes, then zeroes are returned.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
@ -674,10 +680,10 @@ get_rels_atts(Node *clause,
|
|||||||
AttrNumber *attno2)
|
AttrNumber *attno2)
|
||||||
{
|
{
|
||||||
/* set default values */
|
/* set default values */
|
||||||
*relid1 = -1;
|
*relid1 = 0;
|
||||||
*attno1 = -1;
|
*attno1 = 0;
|
||||||
*relid2 = -1;
|
*relid2 = 0;
|
||||||
*attno2 = -1;
|
*attno2 = 0;
|
||||||
|
|
||||||
if (is_opclause(clause))
|
if (is_opclause(clause))
|
||||||
{
|
{
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.36 1999/07/25 23:07:26 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.37 1999/09/09 02:35:53 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -288,7 +288,7 @@ index_selectivity(Query *root,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* restriction_selectivity in lisp system.
|
* restriction_selectivity
|
||||||
*
|
*
|
||||||
* NOTE: The routine is now merged with RestrictionClauseSelectivity
|
* NOTE: The routine is now merged with RestrictionClauseSelectivity
|
||||||
* as defined in plancat.c
|
* as defined in plancat.c
|
||||||
@ -298,7 +298,7 @@ index_selectivity(Query *root,
|
|||||||
* operator relation, by calling the function manager.
|
* operator relation, by calling the function manager.
|
||||||
*
|
*
|
||||||
* XXX The assumption in the selectivity procedures is that if the
|
* XXX The assumption in the selectivity procedures is that if the
|
||||||
* relation OIDs or attribute numbers are -1, then the clause
|
* relation OIDs or attribute numbers are 0, then the clause
|
||||||
* isn't of the form (op var const).
|
* isn't of the form (op var const).
|
||||||
*/
|
*/
|
||||||
Cost
|
Cost
|
||||||
@ -337,7 +337,7 @@ restriction_selectivity(Oid functionObjectId,
|
|||||||
* information.
|
* information.
|
||||||
*
|
*
|
||||||
* XXX The assumption in the selectivity procedures is that if the
|
* XXX The assumption in the selectivity procedures is that if the
|
||||||
* relation OIDs or attribute numbers are -1, then the clause
|
* relation OIDs or attribute numbers are 0, then the clause
|
||||||
* isn't of the form (op var var).
|
* isn't of the form (op var var).
|
||||||
*/
|
*/
|
||||||
Cost
|
Cost
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.39 1999/08/21 00:56:18 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.40 1999/09/09 02:35:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -31,7 +31,7 @@
|
|||||||
#include "utils/syscache.h"
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
/* N is not a valid var/constant or relation id */
|
/* N is not a valid var/constant or relation id */
|
||||||
#define NONVALUE(N) ((N) == -1)
|
#define NONVALUE(N) ((N) == 0)
|
||||||
|
|
||||||
/* are we looking at a functional index selectivity request? */
|
/* are we looking at a functional index selectivity request? */
|
||||||
#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
|
#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
|
||||||
@ -170,7 +170,9 @@ eqsel(Oid opid,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* No VACUUM ANALYZE stats available, so make a guess using
|
/* No VACUUM ANALYZE stats available, so make a guess using
|
||||||
* the disbursion stat (if we have that, which is unlikely...)
|
* the disbursion stat (if we have that, which is unlikely
|
||||||
|
* for a normal attribute; but for a system attribute we may
|
||||||
|
* be able to estimate it).
|
||||||
*/
|
*/
|
||||||
selec = get_attdisbursion(relid, attno, 0.01);
|
selec = get_attdisbursion(relid, attno, 0.01);
|
||||||
}
|
}
|
||||||
@ -366,7 +368,7 @@ eqjoinsel(Oid opid,
|
|||||||
float64 result;
|
float64 result;
|
||||||
float64data num1,
|
float64data num1,
|
||||||
num2,
|
num2,
|
||||||
max;
|
min;
|
||||||
|
|
||||||
result = (float64) palloc(sizeof(float64data));
|
result = (float64) palloc(sizeof(float64data));
|
||||||
if (NONVALUE(attno1) || NONVALUE(relid1) ||
|
if (NONVALUE(attno1) || NONVALUE(relid1) ||
|
||||||
@ -376,11 +378,23 @@ eqjoinsel(Oid opid,
|
|||||||
{
|
{
|
||||||
num1 = get_attdisbursion(relid1, attno1, 0.01);
|
num1 = get_attdisbursion(relid1, attno1, 0.01);
|
||||||
num2 = get_attdisbursion(relid2, attno2, 0.01);
|
num2 = get_attdisbursion(relid2, attno2, 0.01);
|
||||||
max = (num1 > num2) ? num1 : num2;
|
/*
|
||||||
if (max <= 0)
|
* The join selectivity cannot be more than num2, since each
|
||||||
*result = 1.0;
|
* tuple in table 1 could match no more than num2 fraction of
|
||||||
else
|
* tuples in table 2 (and that's only if the table-1 tuple
|
||||||
*result = max;
|
* matches the most common value in table 2, so probably it's
|
||||||
|
* less). By the same reasoning it is not more than num1.
|
||||||
|
* The min is therefore an upper bound.
|
||||||
|
*
|
||||||
|
* XXX can we make a better estimate here? Using the nullfrac
|
||||||
|
* statistic might be helpful, for example. Assuming the operator
|
||||||
|
* is strict (does not succeed for null inputs) then the selectivity
|
||||||
|
* couldn't be more than (1-nullfrac1)*(1-nullfrac2), which might
|
||||||
|
* be usefully small if there are many nulls. How about applying
|
||||||
|
* the operator to the most common values?
|
||||||
|
*/
|
||||||
|
min = (num1 < num2) ? num1 : num2;
|
||||||
|
*result = min;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
10
src/backend/utils/cache/lsyscache.c
vendored
10
src/backend/utils/cache/lsyscache.c
vendored
@ -6,7 +6,7 @@
|
|||||||
* Copyright (c) 1994, Regents of the University of California
|
* Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.33 1999/08/16 02:06:25 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.34 1999/09/09 02:36:04 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Eventually, the index information should go through here, too.
|
* Eventually, the index information should go through here, too.
|
||||||
@ -223,6 +223,14 @@ get_attdisbursion(Oid relid, AttrNumber attnum, double min_estimate)
|
|||||||
if (disbursion < 0.0) /* VACUUM thinks there are no duplicates */
|
if (disbursion < 0.0) /* VACUUM thinks there are no duplicates */
|
||||||
return 1.0 / (double) ntuples;
|
return 1.0 / (double) ntuples;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VACUUM ANALYZE does not compute disbursion for system attributes,
|
||||||
|
* but some of them can reasonably be assumed unique anyway.
|
||||||
|
*/
|
||||||
|
if (attnum == ObjectIdAttributeNumber ||
|
||||||
|
attnum == SelfItemPointerAttributeNumber)
|
||||||
|
return 1.0 / (double) ntuples;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* VACUUM ANALYZE has not been run for this table.
|
* VACUUM ANALYZE has not been run for this table.
|
||||||
* Produce an estimate = 1/numtuples. This may produce
|
* Produce an estimate = 1/numtuples. This may produce
|
||||||
|
Loading…
x
Reference in New Issue
Block a user