Extend the parser location infrastructure to include a location field in

most node types used in expression trees (both before and after parse analysis). This allows us to place an error cursor in many situations where we formerly could not, because the information wasn't available beyond the very first level of parse analysis. There's a fair amount of work still to be done to persuade individual ereport() calls to actually include an error location, but this gets the initdb-forcing part of the work out of the way; and the situation is already markedly better than before for complaints about unimplementable implicit casts, such as CASE and UNION constructs with incompatible alternative data types. Per my proposal of a few days ago.
2025-11-24 00:23:06 +03:00 · 2008-08-28 23:09:48 +00:00
parent 6734182c16
commit a2794623d2
44 changed files with 1295 additions and 502 deletions
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/nodeFuncs.c,v 1.30 2008/08/25 22:42:32 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/nodeFuncs.c,v 1.31 2008/08/28 23:09:46 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -23,6 +23,7 @@


 static bool expression_returns_set_walker(Node *node, void *context);
+static int	leftmostLoc(int loc1, int loc2);


 /*
@@ -574,6 +575,315 @@ expression_returns_set_walker(Node *node, void *context)
 }


+/*
+ *	exprLocation -
+ *	  returns the parse location of an expression tree, for error reports
+ *
+ * -1 is returned if the location can't be determined.
+ *
+ * For expressions larger than a single token, the intent here is to
+ * return the location of the expression's leftmost token, not necessarily
+ * the topmost Node's location field.  For example, an OpExpr's location
+ * field will point at the operator name, but if it is not a prefix operator
+ * then we should return the location of the left-hand operand instead.
+ * The reason is that we want to reference the entire expression not just
+ * that operator, and pointing to its start seems to be the most natural way.
+ *
+ * The location is not perfect --- for example, since the grammar doesn't
+ * explicitly represent parentheses in the parsetree, given something that
+ * had been written "(a + b) * c" we are going to point at "a" not "(".
+ * But it should be plenty good enough for error reporting purposes.
+ *
+ * You might think that this code is overly general, for instance why check
+ * the operands of a FuncExpr node, when the function name can be expected
+ * to be to the left of them?  There are a couple of reasons.  The grammar
+ * sometimes builds expressions that aren't quite what the user wrote;
+ * for instance x IS NOT BETWEEN ... becomes a NOT-expression whose keyword
+ * pointer is to the right of its leftmost argument.  Also, nodes that were
+ * inserted implicitly by parse analysis (such as FuncExprs for implicit
+ * coercions) will have location -1, and so we can have odd combinations of
+ * known and unknown locations in a tree.
+ */
+int
+exprLocation(Node *expr)
+{
+	int			loc;
+
+	if (expr == NULL)
+		return -1;
+	switch (nodeTag(expr))
+	{
+		case T_Var:
+			loc = ((Var *) expr)->location;
+			break;
+		case T_Const:
+			loc = ((Const *) expr)->location;
+			break;
+		case T_Param:
+			loc = ((Param *) expr)->location;
+			break;
+		case T_Aggref:
+			/* function name should always be the first thing */
+			loc = ((Aggref *) expr)->location;
+			break;
+		case T_ArrayRef:
+			/* just use array argument's location */
+			loc = exprLocation((Node *) ((ArrayRef *) expr)->refexpr);
+			break;
+		case T_FuncExpr:
+			{
+				FuncExpr   *fexpr = (FuncExpr *) expr;
+
+				/* consider both function name and leftmost arg */
+				loc = leftmostLoc(fexpr->location,
+								  exprLocation((Node *) fexpr->args));
+			}
+			break;
+		case T_OpExpr:
+		case T_DistinctExpr:	/* struct-equivalent to OpExpr */
+		case T_NullIfExpr:		/* struct-equivalent to OpExpr */
+			{
+				OpExpr   *opexpr = (OpExpr *) expr;
+
+				/* consider both operator name and leftmost arg */
+				loc = leftmostLoc(opexpr->location,
+								  exprLocation((Node *) opexpr->args));
+			}
+			break;
+		case T_ScalarArrayOpExpr:
+			{
+				ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) expr;
+
+				/* consider both operator name and leftmost arg */
+				loc = leftmostLoc(saopexpr->location,
+								  exprLocation((Node *) saopexpr->args));
+			}
+			break;
+		case T_BoolExpr:
+			{
+				BoolExpr   *bexpr = (BoolExpr *) expr;
+
+				/*
+				 * Same as above, to handle either NOT or AND/OR.  We can't
+				 * special-case NOT because of the way that it's used for
+				 * things like IS NOT BETWEEN.
+				 */
+				loc = leftmostLoc(bexpr->location,
+								  exprLocation((Node *) bexpr->args));
+			}
+			break;
+		case T_SubLink:
+			{
+				SubLink *sublink = (SubLink *) expr;
+
+				/* check the testexpr, if any, and the operator/keyword */
+				loc = leftmostLoc(exprLocation(sublink->testexpr),
+								  sublink->location);
+			}
+			break;
+		case T_FieldSelect:
+			/* just use argument's location */
+			loc = exprLocation((Node *) ((FieldSelect *) expr)->arg);
+			break;
+		case T_FieldStore:
+			/* just use argument's location */
+			loc = exprLocation((Node *) ((FieldStore *) expr)->arg);
+			break;
+		case T_RelabelType:
+			{
+				RelabelType *rexpr = (RelabelType *) expr;
+
+				/* Much as above */
+				loc = leftmostLoc(rexpr->location,
+								  exprLocation((Node *) rexpr->arg));
+			}
+			break;
+		case T_CoerceViaIO:
+			{
+				CoerceViaIO *cexpr = (CoerceViaIO *) expr;
+
+				/* Much as above */
+				loc = leftmostLoc(cexpr->location,
+								  exprLocation((Node *) cexpr->arg));
+			}
+			break;
+		case T_ArrayCoerceExpr:
+			{
+				ArrayCoerceExpr *cexpr = (ArrayCoerceExpr *) expr;
+
+				/* Much as above */
+				loc = leftmostLoc(cexpr->location,
+								  exprLocation((Node *) cexpr->arg));
+			}
+			break;
+		case T_ConvertRowtypeExpr:
+			{
+				ConvertRowtypeExpr *cexpr = (ConvertRowtypeExpr *) expr;
+
+				/* Much as above */
+				loc = leftmostLoc(cexpr->location,
+								  exprLocation((Node *) cexpr->arg));
+			}
+			break;
+		case T_CaseExpr:
+			/* CASE keyword should always be the first thing */
+			loc = ((CaseExpr *) expr)->location;
+			break;
+		case T_CaseWhen:
+			/* WHEN keyword should always be the first thing */
+			loc = ((CaseWhen *) expr)->location;
+			break;
+		case T_ArrayExpr:
+			/* the location points at ARRAY or [, which must be leftmost */
+			loc = ((ArrayExpr *) expr)->location;
+			break;
+		case T_RowExpr:
+			/* the location points at ROW or (, which must be leftmost */
+			loc = ((RowExpr *) expr)->location;
+			break;
+		case T_RowCompareExpr:
+			/* just use leftmost argument's location */
+			loc = exprLocation((Node *) ((RowCompareExpr *) expr)->largs);
+			break;
+		case T_CoalesceExpr:
+			/* COALESCE keyword should always be the first thing */
+			loc = ((CoalesceExpr *) expr)->location;
+			break;
+		case T_MinMaxExpr:
+			/* GREATEST/LEAST keyword should always be the first thing */
+			loc = ((MinMaxExpr *) expr)->location;
+			break;
+		case T_XmlExpr:
+			{
+				XmlExpr   *xexpr = (XmlExpr *) expr;
+
+				/* consider both function name and leftmost arg */
+				loc = leftmostLoc(xexpr->location,
+								  exprLocation((Node *) xexpr->args));
+			}
+			break;
+		case T_NullTest:
+			/* just use argument's location */
+			loc = exprLocation((Node *) ((NullTest *) expr)->arg);
+			break;
+		case T_BooleanTest:
+			/* just use argument's location */
+			loc = exprLocation((Node *) ((BooleanTest *) expr)->arg);
+			break;
+		case T_CoerceToDomain:
+			{
+				CoerceToDomain *cexpr = (CoerceToDomain *) expr;
+
+				/* Much as above */
+				loc = leftmostLoc(cexpr->location,
+								  exprLocation((Node *) cexpr->arg));
+			}
+			break;
+		case T_CoerceToDomainValue:
+			loc = ((CoerceToDomainValue *) expr)->location;
+			break;
+		case T_SetToDefault:
+			loc = ((SetToDefault *) expr)->location;
+			break;
+		case T_TargetEntry:
+			/* just use argument's location */
+			loc = exprLocation((Node *) ((TargetEntry *) expr)->expr);
+			break;
+		case T_List:
+			{
+				/* report location of first list member that has a location */
+				ListCell   *lc;
+
+				loc = -1;		/* just to suppress compiler warning */
+				foreach(lc, (List *) expr)
+				{
+					loc = exprLocation((Node *) lfirst(lc));
+					if (loc >= 0)
+						break;
+				}
+			}
+			break;
+		case T_A_Expr:
+			{
+				A_Expr *aexpr = (A_Expr *) expr;
+
+				/* use leftmost of operator or left operand (if any) */
+				/* we assume right operand can't be to left of operator */
+				loc = leftmostLoc(aexpr->location,
+								  exprLocation(aexpr->lexpr));
+			}
+			break;
+		case T_ColumnRef:
+			loc = ((ColumnRef *) expr)->location;
+			break;
+		case T_ParamRef:
+			loc = ((ParamRef *) expr)->location;
+			break;
+		case T_A_Const:
+			loc = ((A_Const *) expr)->location;
+			break;
+		case T_FuncCall:
+			{
+				FuncCall *fc = (FuncCall *) expr;
+
+				/* consider both function name and leftmost arg */
+				loc = leftmostLoc(fc->location,
+								  exprLocation((Node *) fc->args));
+			}
+			break;
+		case T_A_ArrayExpr:
+			/* the location points at ARRAY or [, which must be leftmost */
+			loc = ((A_ArrayExpr *) expr)->location;
+			break;
+		case T_ResTarget:
+			/* we need not examine the contained expression (if any) */
+			loc = ((ResTarget *) expr)->location;
+			break;
+		case T_TypeCast:
+			{
+				TypeCast *tc = (TypeCast *) expr;
+
+				/*
+				 * This could represent CAST(), ::, or TypeName 'literal',
+				 * so any of the components might be leftmost.
+				 */
+				loc = exprLocation(tc->arg);
+				loc = leftmostLoc(loc, tc->typename->location);
+				loc = leftmostLoc(loc, tc->location);
+			}
+			break;
+		case T_TypeName:
+			loc = ((TypeName *) expr)->location;
+			break;
+		case T_XmlSerialize:
+			/* XMLSERIALIZE keyword should always be the first thing */
+			loc = ((XmlSerialize *) expr)->location;
+			break;
+		default:
+			/* for any other node type it's just unknown... */
+			loc = -1;
+			break;
+	}
+	return loc;
+}
+
+/*
+ * leftmostLoc - support for exprLocation
+ *
+ * Take the minimum of two parse location values, but ignore unknowns
+ */
+static int
+leftmostLoc(int loc1, int loc2)
+{
+	if (loc1 < 0)
+		return loc2;
+	else if (loc2 < 0)
+		return loc1;
+	else
+		return Min(loc1, loc2);
+}
+
+
 /*
 * Standard expression-tree walking support
 *