mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit
7f3eba30c9 of 2008-10-23.  That patch
accounted for baserel restriction clauses that reduced the number of rows
coming out of a table (and hence the number of possibly-distinct values of
a join variable), but not for join restriction clauses that might have been
applied at a lower level of join.  To account for the latter, look up the
sizes of the min_lefthand and min_righthand inputs of the current join,
and clamp with those in the same way as for the base relations.
Noted while investigating a complaint from Ben Chobot, although this in
itself doesn't seem to explain his report.
Back-patch to 8.4; previous versions used different estimation methods
for which this heuristic isn't relevant.
			
			
This commit is contained in:
		@@ -141,9 +141,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
 | 
			
		||||
						   FmgrInfo *opproc, bool isgt,
 | 
			
		||||
						   Datum constval, Oid consttype);
 | 
			
		||||
static double eqjoinsel_inner(Oid operator,
 | 
			
		||||
				VariableStatData *vardata1, VariableStatData *vardata2);
 | 
			
		||||
				VariableStatData *vardata1, VariableStatData *vardata2,
 | 
			
		||||
				RelOptInfo *rel1, RelOptInfo *rel2);
 | 
			
		||||
static double eqjoinsel_semi(Oid operator,
 | 
			
		||||
			   VariableStatData *vardata1, VariableStatData *vardata2);
 | 
			
		||||
			   VariableStatData *vardata1, VariableStatData *vardata2,
 | 
			
		||||
			   RelOptInfo *rel1, RelOptInfo *rel2);
 | 
			
		||||
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 | 
			
		||||
				  Datum lobound, Datum hibound, Oid boundstypid,
 | 
			
		||||
				  double *scaledlobound, double *scaledhibound);
 | 
			
		||||
@@ -172,6 +174,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
 | 
			
		||||
						  VariableStatData *vardata,
 | 
			
		||||
						  Oid sortop,
 | 
			
		||||
						  Datum *min, Datum *max);
 | 
			
		||||
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
 | 
			
		||||
static Selectivity prefix_selectivity(PlannerInfo *root,
 | 
			
		||||
				   VariableStatData *vardata,
 | 
			
		||||
				   Oid vartype, Oid opfamily, Const *prefixcon);
 | 
			
		||||
@@ -2007,24 +2010,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
 | 
			
		||||
	VariableStatData vardata1;
 | 
			
		||||
	VariableStatData vardata2;
 | 
			
		||||
	bool		join_is_reversed;
 | 
			
		||||
	RelOptInfo *rel1;
 | 
			
		||||
	RelOptInfo *rel2;
 | 
			
		||||
 | 
			
		||||
	get_join_variables(root, args, sjinfo,
 | 
			
		||||
					   &vardata1, &vardata2, &join_is_reversed);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Identify the join's direct input relations.  We use the min lefthand
 | 
			
		||||
	 * and min righthand as the inputs, even though the join might actually
 | 
			
		||||
	 * get done with larger input relations.  The min inputs are guaranteed to
 | 
			
		||||
	 * have been formed by now, though, and always using them ensures
 | 
			
		||||
	 * consistency of estimates.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!join_is_reversed)
 | 
			
		||||
	{
 | 
			
		||||
		rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
 | 
			
		||||
		rel2 = find_join_input_rel(root, sjinfo->min_righthand);
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
	{
 | 
			
		||||
		rel1 = find_join_input_rel(root, sjinfo->min_righthand);
 | 
			
		||||
		rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	switch (sjinfo->jointype)
 | 
			
		||||
	{
 | 
			
		||||
		case JOIN_INNER:
 | 
			
		||||
		case JOIN_LEFT:
 | 
			
		||||
		case JOIN_FULL:
 | 
			
		||||
			selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
 | 
			
		||||
			selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
 | 
			
		||||
									rel1, rel2);
 | 
			
		||||
			break;
 | 
			
		||||
		case JOIN_SEMI:
 | 
			
		||||
		case JOIN_ANTI:
 | 
			
		||||
			if (!join_is_reversed)
 | 
			
		||||
				selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
 | 
			
		||||
				selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
 | 
			
		||||
									   rel1, rel2);
 | 
			
		||||
			else
 | 
			
		||||
				selec = eqjoinsel_semi(get_commutator(operator),
 | 
			
		||||
									   &vardata2, &vardata1);
 | 
			
		||||
									   &vardata2, &vardata1,
 | 
			
		||||
									   rel2, rel1);
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			/* other values not expected here */
 | 
			
		||||
@@ -2050,7 +2076,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
 | 
			
		||||
 */
 | 
			
		||||
static double
 | 
			
		||||
eqjoinsel_inner(Oid operator,
 | 
			
		||||
				VariableStatData *vardata1, VariableStatData *vardata2)
 | 
			
		||||
				VariableStatData *vardata1, VariableStatData *vardata2,
 | 
			
		||||
				RelOptInfo *rel1, RelOptInfo *rel2)
 | 
			
		||||
{
 | 
			
		||||
	double		selec;
 | 
			
		||||
	double		nd1;
 | 
			
		||||
@@ -2251,15 +2278,19 @@ eqjoinsel_inner(Oid operator,
 | 
			
		||||
		 * be, providing a crude correction for the selectivity of restriction
 | 
			
		||||
		 * clauses on those relations.	(We don't do that in the other path
 | 
			
		||||
		 * since there we are comparing the nd values to stats for the whole
 | 
			
		||||
		 * relations.)
 | 
			
		||||
		 * relations.)  We can apply this clamp both with respect to the base
 | 
			
		||||
		 * relations from which the join variables come, and to the immediate
 | 
			
		||||
		 * input relations of the current join.
 | 
			
		||||
		 */
 | 
			
		||||
		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
 | 
			
		||||
		double		nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
 | 
			
		||||
 | 
			
		||||
		if (vardata1->rel)
 | 
			
		||||
			nd1 = Min(nd1, vardata1->rel->rows);
 | 
			
		||||
		nd1 = Min(nd1, rel1->rows);
 | 
			
		||||
		if (vardata2->rel)
 | 
			
		||||
			nd2 = Min(nd2, vardata2->rel->rows);
 | 
			
		||||
		nd2 = Min(nd2, rel2->rows);
 | 
			
		||||
 | 
			
		||||
		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
 | 
			
		||||
		if (nd1 > nd2)
 | 
			
		||||
@@ -2286,7 +2317,8 @@ eqjoinsel_inner(Oid operator,
 | 
			
		||||
 */
 | 
			
		||||
static double
 | 
			
		||||
eqjoinsel_semi(Oid operator,
 | 
			
		||||
			   VariableStatData *vardata1, VariableStatData *vardata2)
 | 
			
		||||
			   VariableStatData *vardata1, VariableStatData *vardata2,
 | 
			
		||||
			   RelOptInfo *rel1, RelOptInfo *rel2)
 | 
			
		||||
{
 | 
			
		||||
	double		selec;
 | 
			
		||||
	double		nd1;
 | 
			
		||||
@@ -2434,8 +2466,10 @@ eqjoinsel_semi(Oid operator,
 | 
			
		||||
		{
 | 
			
		||||
			if (vardata1->rel)
 | 
			
		||||
				nd1 = Min(nd1, vardata1->rel->rows);
 | 
			
		||||
			nd1 = Min(nd1, rel1->rows);
 | 
			
		||||
			if (vardata2->rel)
 | 
			
		||||
				nd2 = Min(nd2, vardata2->rel->rows);
 | 
			
		||||
			nd2 = Min(nd2, rel2->rows);
 | 
			
		||||
 | 
			
		||||
			if (nd1 <= nd2 || nd2 <= 0)
 | 
			
		||||
				selec = 1.0 - nullfrac1;
 | 
			
		||||
@@ -4758,6 +4792,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 | 
			
		||||
	return have_data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * find_join_input_rel
 | 
			
		||||
 *		Look up the input relation for a join.
 | 
			
		||||
 *
 | 
			
		||||
 * We assume that the input relation's RelOptInfo must have been constructed
 | 
			
		||||
 * already.
 | 
			
		||||
 */
 | 
			
		||||
static RelOptInfo *
 | 
			
		||||
find_join_input_rel(PlannerInfo *root, Relids relids)
 | 
			
		||||
{
 | 
			
		||||
	RelOptInfo *rel = NULL;
 | 
			
		||||
 | 
			
		||||
	switch (bms_membership(relids))
 | 
			
		||||
	{
 | 
			
		||||
		case BMS_EMPTY_SET:
 | 
			
		||||
			/* should not happen */
 | 
			
		||||
			break;
 | 
			
		||||
		case BMS_SINGLETON:
 | 
			
		||||
			rel = find_base_rel(root, bms_singleton_member(relids));
 | 
			
		||||
			break;
 | 
			
		||||
		case BMS_MULTIPLE:
 | 
			
		||||
			rel = find_join_rel(root, relids);
 | 
			
		||||
			break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (rel == NULL)
 | 
			
		||||
		elog(ERROR, "could not find RelOptInfo for given relids");
 | 
			
		||||
 | 
			
		||||
	return rel;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*-------------------------------------------------------------------------
 | 
			
		||||
 *
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user