First cut at full support for OUTER JOINs. There are still a few loose

ends to clean up (see my message of same date to pghackers), but mostly it works. INITDB REQUIRED!
2025-11-09 06:21:09 +03:00 · 2000-09-12 21:07:18 +00:00
parent b5c0ab278b
commit ed5003c584
93 changed files with 6386 additions and 4262 deletions
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.73 2000/08/24 03:29:05 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.74 2000/09/12 21:06:58 tgl Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@@ -591,7 +591,7 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
 					elog(ERROR, "cache lookup of attribute %d in relation %u failed",
 						 var->varattno, rte->relid);
 				elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query",
-					 rte->ref->relname, attname);
+					 rte->eref->relname, attname);
 			}
 		}
 	}
@@ -1639,25 +1639,44 @@ simplify_op_or_func(Expr *expr, List *args)
 * will have List structure at the top level, and it handles TargetEntry nodes
 * so that a scan of a target list can be handled without additional code.
 * (But only the "expr" part of a TargetEntry is examined, unless the walker
- * chooses to process TargetEntry nodes specially.)
+ * chooses to process TargetEntry nodes specially.)  Also, RangeTblRef and
+ * JoinExpr nodes are handled, so that qual expressions in a jointree can be
+ * processed without additional code.
 *
- * expression_tree_walker will handle a SUBPLAN_EXPR node by recursing into
- * the args and slink->oper lists (which belong to the outer plan), but it
- * will *not* visit the inner plan, since that's typically what expression
- * tree walkers want.  A walker that wants to visit the subplan can force
- * appropriate behavior by recognizing subplan expression nodes and doing
- * the right thing.
+ * expression_tree_walker will handle SubLink and SubPlan nodes by recursing
+ * normally into the "lefthand" arguments (which belong to the outer plan).
+ * It will also call the walker on the sub-Query node; however, when
+ * expression_tree_walker itself is called on a Query node, it does nothing
+ * and returns "false".  The net effect is that unless the walker does
+ * something special at a Query node, sub-selects will not be visited
+ * during an expression tree walk.  This is exactly the behavior wanted
+ * in many cases --- and for those walkers that do want to recurse into
+ * sub-selects, special behavior is typically needed anyway at the entry
+ * to a sub-select (such as incrementing a depth counter).  A walker that
+ * wants to examine sub-selects should include code along the lines of:
 *
- * Bare SubLink nodes (without a SUBPLAN_EXPR) are handled by recursing into
- * the "lefthand" argument list only.  (A bare SubLink should be seen only if
- * the tree has not yet been processed by subselect.c.)  Again, this can be
- * overridden by the walker, but it seems to be the most useful default
- * behavior.
+ *		if (IsA(node, Query))
+ *		{
+ *			adjust context for subquery;
+ *			result = query_tree_walker((Query *) node, my_walker, context);
+ *			restore context if needed;
+ *			return result;
+ *		}
+ *
+ * query_tree_walker is a convenience routine (see below) that calls the
+ * walker on all the expression subtrees of the given Query node.
+ *
+ * NOTE: currently, because make_subplan() clears the subselect link in
+ * a SubLink node, it is not actually possible to recurse into subselects
+ * of an already-planned expression tree.  This is OK for current uses,
+ * but ought to be cleaned up when we redesign querytree processing.
 *--------------------
 */

 bool
-			expression_tree_walker(Node *node, bool (*walker) (), void *context)
+expression_tree_walker(Node *node,
+					   bool (*walker) (),
+					   void *context)
 {
 	List	   *temp;

@@ -1677,6 +1696,7 @@ bool
 		case T_Const:
 		case T_Var:
 		case T_Param:
+		case T_RangeTblRef:
 			/* primitive node types with no subnodes */
 			break;
 		case T_Expr:
@@ -1750,17 +1770,31 @@ bool

 				/*
 				 * If the SubLink has already been processed by
-				 * subselect.c, it will have lefthand=NIL, and we only
-				 * need to look at the oper list.  Otherwise we only need
-				 * to look at lefthand (the Oper nodes in the oper list
-				 * are deemed uninteresting).
+				 * subselect.c, it will have lefthand=NIL, and we need to
+				 * scan the oper list.  Otherwise we only need to look at
+				 * the lefthand list (the incomplete Oper nodes in the oper
+				 * list are deemed uninteresting, perhaps even confusing).
 				 */
 				if (sublink->lefthand)
-					return walker((Node *) sublink->lefthand, context);
+				{
+					if (walker((Node *) sublink->lefthand, context))
+						return true;
+				}
 				else
-					return walker((Node *) sublink->oper, context);
+				{
+					if (walker((Node *) sublink->oper, context))
+						return true;
+				}
+				/*
+				 * Also invoke the walker on the sublink's Query node,
+				 * so it can recurse into the sub-query if it wants to.
+				 */
+				return walker(sublink->subselect, context);
 			}
 			break;
+		case T_Query:
+			/* Do nothing with a sub-Query, per discussion above */
+			break;
 		case T_List:
 			foreach(temp, (List *) node)
 			{
@@ -1770,6 +1804,23 @@ bool
 			break;
 		case T_TargetEntry:
 			return walker(((TargetEntry *) node)->expr, context);
+		case T_JoinExpr:
+			{
+				JoinExpr    *join = (JoinExpr *) node;
+
+				if (walker(join->larg, context))
+					return true;
+				if (walker(join->rarg, context))
+					return true;
+				if (walker(join->quals, context))
+					return true;
+				if (walker((Node *) join->colvars, context))
+					return true;
+				/* alias clause, using list, colnames list are deemed
+				 * uninteresting.
+				 */
+			}
+			break;
 		default:
 			elog(ERROR, "expression_tree_walker: Unexpected node type %d",
 				 nodeTag(node));
@@ -1778,6 +1829,37 @@ bool
 	return false;
 }

+/*
+ * query_tree_walker --- initiate a walk of a Query's expressions
+ *
+ * This routine exists just to reduce the number of places that need to know
+ * where all the expression subtrees of a Query are.  Note it can be used
+ * for starting a walk at top level of a Query regardless of whether the
+ * walker intends to descend into subqueries.  It is also useful for
+ * descending into subqueries within a walker.
+ */
+bool
+query_tree_walker(Query *query,
+				  bool (*walker) (),
+				  void *context)
+{
+	Assert(query != NULL && IsA(query, Query));
+
+	if (walker((Node *) query->targetList, context))
+		return true;
+	if (walker(query->qual, context))
+		return true;
+	if (walker(query->havingQual, context))
+		return true;
+	if (walker((Node *) query->jointree, context))
+		return true;
+	/*
+	 * XXX for subselect-in-FROM, may need to examine rtable as well
+	 */
+	return false;
+}
+
+
 /*--------------------
 * expression_tree_mutator() is designed to support routines that make a
 * modified copy of an expression tree, with some nodes being added,
@@ -1838,7 +1920,9 @@ bool
 */

 Node *
-			expression_tree_mutator(Node *node, Node *(*mutator) (), void *context)
+expression_tree_mutator(Node *node,
+						Node *(*mutator) (),
+						void *context)
 {

 	/*
@@ -1866,6 +1950,7 @@ Node *
 		case T_Const:
 		case T_Var:
 		case T_Param:
+		case T_RangeTblRef:
 			/* primitive node types with no subnodes */
 			return (Node *) copyObject(node);
 		case T_Expr:
@@ -2044,6 +2129,20 @@ Node *
 				return (Node *) newnode;
 			}
 			break;
+		case T_JoinExpr:
+			{
+				JoinExpr *join = (JoinExpr *) node;
+				JoinExpr *newnode;
+
+				FLATCOPY(newnode, join, JoinExpr);
+				MUTATE(newnode->larg, join->larg, Node *);
+				MUTATE(newnode->rarg, join->rarg, Node *);
+				MUTATE(newnode->quals, join->quals, Node *);
+				MUTATE(newnode->colvars, join->colvars, List *);
+				/* We do not mutate alias, using, or colnames by default */
+				return (Node *) newnode;
+			}
+			break;
 		default:
 			elog(ERROR, "expression_tree_mutator: Unexpected node type %d",
 				 nodeTag(node));
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.64 2000/05/30 00:49:49 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.65 2000/09/12 21:06:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -119,7 +119,9 @@ set_cheapest(RelOptInfo *parent_rel)
 	Path	   *cheapest_total_path;

 	Assert(IsA(parent_rel, RelOptInfo));
-	Assert(pathlist != NIL);
+
+	if (pathlist == NIL)
+		elog(ERROR, "Unable to devise a query plan for the given query");

 	cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);

@@ -352,6 +354,7 @@ create_index_path(Query *root,
 	 * number of rows is the same as the parent rel's estimate.
 	 */
 	pathnode->joinrelids = NIL; /* no join clauses here */
+	pathnode->alljoinquals = false;
 	pathnode->rows = rel->rows;

 	cost_index(&pathnode->path, root, rel, index, indexquals, false);
@@ -393,6 +396,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
 *	  relations.
 *
 * 'joinrel' is the join relation.
+ * 'jointype' is the type of join required
 * 'outer_path' is the outer path
 * 'inner_path' is the inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@@ -403,6 +407,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
 */
 NestPath   *
 create_nestloop_path(RelOptInfo *joinrel,
+					 JoinType jointype,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
@@ -412,6 +417,7 @@ create_nestloop_path(RelOptInfo *joinrel,

 	pathnode->path.pathtype = T_NestLoop;
 	pathnode->path.parent = joinrel;
+	pathnode->jointype = jointype;
 	pathnode->outerjoinpath = outer_path;
 	pathnode->innerjoinpath = inner_path;
 	pathnode->joinrestrictinfo = restrict_clauses;
@@ -428,6 +434,7 @@ create_nestloop_path(RelOptInfo *joinrel,
 *	  two relations
 *
 * 'joinrel' is the join relation
+ * 'jointype' is the type of join required
 * 'outer_path' is the outer path
 * 'inner_path' is the inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@@ -440,6 +447,7 @@ create_nestloop_path(RelOptInfo *joinrel,
 */
 MergePath  *
 create_mergejoin_path(RelOptInfo *joinrel,
+					  JoinType jointype,
 					  Path *outer_path,
 					  Path *inner_path,
 					  List *restrict_clauses,
@@ -463,6 +471,7 @@ create_mergejoin_path(RelOptInfo *joinrel,

 	pathnode->jpath.path.pathtype = T_MergeJoin;
 	pathnode->jpath.path.parent = joinrel;
+	pathnode->jpath.jointype = jointype;
 	pathnode->jpath.outerjoinpath = outer_path;
 	pathnode->jpath.innerjoinpath = inner_path;
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
@@ -486,6 +495,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
 *	  Creates a pathnode corresponding to a hash join between two relations.
 *
 * 'joinrel' is the join relation
+ * 'jointype' is the type of join required
 * 'outer_path' is the cheapest outer path
 * 'inner_path' is the cheapest inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@@ -496,6 +506,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
 */
 HashPath   *
 create_hashjoin_path(RelOptInfo *joinrel,
+					 JoinType jointype,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
@@ -506,6 +517,7 @@ create_hashjoin_path(RelOptInfo *joinrel,

 	pathnode->jpath.path.pathtype = T_HashJoin;
 	pathnode->jpath.path.parent = joinrel;
+	pathnode->jpath.jointype = jointype;
 	pathnode->jpath.outerjoinpath = outer_path;
 	pathnode->jpath.innerjoinpath = inner_path;
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.27 2000/06/18 22:44:12 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.28 2000/09/12 21:06:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -72,6 +72,7 @@ get_base_rel(Query *root, int relid)
 	rel->tuples = 0;
 	rel->baserestrictinfo = NIL;
 	rel->baserestrictcost = 0;
+	rel->outerjoinset = NIL;
 	rel->joininfo = NIL;
 	rel->innerjoin = NIL;

@@ -178,6 +179,7 @@ get_join_rel(Query *root,
 	joinrel->tuples = 0;
 	joinrel->baserestrictinfo = NIL;
 	joinrel->baserestrictcost = 0;
+	joinrel->outerjoinset = NIL;
 	joinrel->joininfo = NIL;
 	joinrel->innerjoin = NIL;

@@ -216,8 +218,7 @@ get_join_rel(Query *root,
 							   restrictlist);

 	/*
-	 * Add the joinrel to the front of the query's joinrel list.
-	 * (allpaths.c depends on this ordering!)
+	 * Add the joinrel to the query's joinrel list.
 	 */
 	root->join_rel_list = lcons(joinrel, root->join_rel_list);

--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.10 2000/05/30 00:49:49 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.11 2000/09/12 21:06:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -54,3 +54,29 @@ get_actual_clauses(List *restrictinfo_list)
 	}
 	return result;
 }
+
+/*
+ * get_actual_join_clauses
+ *
+ * Extract clauses from 'restrictinfo_list', separating those that
+ * came from JOIN/ON conditions from those that didn't.
+ */
+void
+get_actual_join_clauses(List *restrictinfo_list,
+						List **joinquals, List **otherquals)
+{
+	List	   *temp;
+
+	*joinquals = NIL;
+	*otherquals = NIL;
+
+	foreach(temp, restrictinfo_list)
+	{
+		RestrictInfo *clause = (RestrictInfo *) lfirst(temp);
+
+		if (clause->isjoinqual)
+			*joinquals = lappend(*joinquals, clause->clause);
+		else
+			*otherquals = lappend(*otherquals, clause->clause);
+	}
+}
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.26 2000/04/12 17:15:24 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.27 2000/09/12 21:06:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -16,17 +16,25 @@

 #include "postgres.h"

+#include "nodes/plannodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/var.h"


+typedef struct
+{
+	List	   *varlist;
+	int			sublevels_up;
+} pull_varnos_context;
+
 typedef struct
 {
 	List	   *varlist;
 	bool		includeUpperVars;
 } pull_var_clause_context;

-static bool pull_varnos_walker(Node *node, List **listptr);
+static bool pull_varnos_walker(Node *node,
+							   pull_varnos_context *context);
 static bool contain_var_clause_walker(Node *node, void *context);
 static bool pull_var_clause_walker(Node *node,
 					   pull_var_clause_context *context);
@@ -35,21 +43,39 @@ static bool pull_var_clause_walker(Node *node,
 /*
 *		pull_varnos
 *
- *		Create a list of all the distinct varnos present in a parsetree
- *		(tlist or qual).  Note that only varnos attached to level-zero
- *		Vars are considered --- upper Vars refer to some other rtable!
+ *		Create a list of all the distinct varnos present in a parsetree.
+ *		Only varnos that reference level-zero rtable entries are considered.
+ *
+ * NOTE: unlike other routines in this file, pull_varnos() is used on
+ * not-yet-planned expressions.  It may therefore find bare SubLinks,
+ * and if so it needs to recurse into them to look for uplevel references
+ * to the desired rtable level!  But when we find a completed SubPlan,
+ * we only need to look at the parameters passed to the subplan.
 */
 List *
 pull_varnos(Node *node)
 {
-	List	   *result = NIL;
+	pull_varnos_context context;

-	pull_varnos_walker(node, &result);
-	return result;
+	context.varlist = NIL;
+	context.sublevels_up = 0;
+
+	/*
+	 * Must be prepared to start with a Query or a bare expression tree;
+	 * if it's a Query, go straight to query_tree_walker to make sure that
+	 * sublevels_up doesn't get incremented prematurely.
+	 */
+	if (node && IsA(node, Query))
+		query_tree_walker((Query *) node, pull_varnos_walker,
+						  (void *) &context);
+	else
+		pull_varnos_walker(node, &context);
+
+	return context.varlist;
 }

 static bool
-pull_varnos_walker(Node *node, List **listptr)
+pull_varnos_walker(Node *node, pull_varnos_context *context)
 {
 	if (node == NULL)
 		return false;
@@ -57,11 +83,42 @@ pull_varnos_walker(Node *node, List **listptr)
 	{
 		Var		   *var = (Var *) node;

-		if (var->varlevelsup == 0 && !intMember(var->varno, *listptr))
-			*listptr = lconsi(var->varno, *listptr);
+		if (var->varlevelsup == context->sublevels_up &&
+			!intMember(var->varno, context->varlist))
+			context->varlist = lconsi(var->varno, context->varlist);
 		return false;
 	}
-	return expression_tree_walker(node, pull_varnos_walker, (void *) listptr);
+	if (is_subplan(node))
+	{
+		/*
+		 * Already-planned subquery.  Examine the args list (parameters
+		 * to be passed to subquery), as well as the "oper" list which
+		 * is executed by the outer query.  But short-circuit recursion into
+		 * the subquery itself, which would be a waste of effort.
+		 */
+		Expr	   *expr = (Expr *) node;
+
+		if (pull_varnos_walker((Node*) ((SubPlan*) expr->oper)->sublink->oper,
+							   context))
+			return true;
+		if (pull_varnos_walker((Node *) expr->args,
+							   context))
+			return true;
+		return false;
+	}
+	if (IsA(node, Query))
+	{
+		/* Recurse into not-yet-planned subquery */
+		bool		result;
+
+		context->sublevels_up++;
+		result = query_tree_walker((Query *) node, pull_varnos_walker,
+								   (void *) context);
+		context->sublevels_up--;
+		return result;
+	}
+	return expression_tree_walker(node, pull_varnos_walker,
+								  (void *) context);
 }

 /*