Cache if PathTarget and RestrictInfos contain volatile functions

Here we aim to reduce duplicate work done by contain_volatile_functions() by caching whether PathTargets and RestrictInfos contain any volatile functions the first time contain_volatile_functions() is called for them. Any future calls for these nodes just use the cached value rather than going to the trouble of recursively checking the sub-node all over again. Thanks to Tom Lane for the idea. Any locations in the code which make changes to a PathTarget or RestrictInfo which could change the outcome of the volatility check must change the cached value back to VOLATILITY_UNKNOWN again. contain_volatile_functions() is the only code in charge of setting the cache value to either VOLATILITY_VOLATILE or VOLATILITY_NOVOLATILE. Some existing code does benefit from this additional caching, however, this change is mainly aimed at an upcoming patch that must check for volatility during the join search. Repeated volatility checks in that case can become very expensive when the join search contains more than a few relations. Author: David Rowley Discussion: https://postgr.es/m/3795226.1614059027@sss.pgh.pa.us
2025-09-02 04:21:28 +03:00 · 2021-03-29 14:47:05 +13:00
parent b64654d6c4
commit f58b230ed0
8 changed files with 134 additions and 20 deletions
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -432,6 +432,16 @@ contain_mutable_functions_walker(Node *node, void *context)
 * subsequent planning need not consider volatility within those, since
 * the executor won't change its evaluation rules for a SubPlan based on
 * volatility.
+ *
+ * For some node types, for example, RestrictInfo and PathTarget, we cache
+ * whether we found any volatile functions or not and reuse that value in any
+ * future checks for that node.  All of the logic for determining if the
+ * cached value should be set to VOLATILITY_NOVOLATILE or VOLATILITY_VOLATILE
+ * belongs in this function.  Any code which makes changes to these nodes
+ * which could change the outcome this function must set the cached value back
+ * to VOLATILITY_UNKNOWN.  That allows this function to redetermine the
+ * correct value during the next call, should we need to redetermine if the
+ * node contains any volatile functions again in the future.
 */
 bool
 contain_volatile_functions(Node *clause)
@@ -461,6 +471,63 @@ contain_volatile_functions_walker(Node *node, void *context)
 		return true;
 	}

+	if (IsA(node, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) node;
+
+		/*
+		 * For RestrictInfo, check if we've checked the volatility of it
+		 * before.  If so, we can just use the cached value and not bother
+		 * checking it again.  Otherwise, check it and cache if whether we
+		 * found any volatile functions.
+		 */
+		if (rinfo->has_volatile == VOLATILITY_NOVOLATILE)
+			return false;
+		else if (rinfo->has_volatile == VOLATILITY_VOLATILE)
+			return true;
+		else
+		{
+			bool		hasvolatile;
+
+			hasvolatile = contain_volatile_functions_walker((Node *) rinfo->clause,
+															context);
+			if (hasvolatile)
+				rinfo->has_volatile = VOLATILITY_VOLATILE;
+			else
+				rinfo->has_volatile = VOLATILITY_NOVOLATILE;
+
+			return hasvolatile;
+		}
+	}
+
+	if (IsA(node, PathTarget))
+	{
+		PathTarget *target = (PathTarget *) node;
+
+		/*
+		 * We also do caching for PathTarget the same as we do above for
+		 * RestrictInfos.
+		 */
+		if (target->has_volatile_expr == VOLATILITY_NOVOLATILE)
+			return false;
+		else if (target->has_volatile_expr == VOLATILITY_VOLATILE)
+			return true;
+		else
+		{
+			bool		hasvolatile;
+
+			hasvolatile = contain_volatile_functions_walker((Node *) target->exprs,
+															context);
+
+			if (hasvolatile)
+				target->has_volatile_expr = VOLATILITY_VOLATILE;
+			else
+				target->has_volatile_expr = VOLATILITY_NOVOLATILE;
+
+			return hasvolatile;
+		}
+	}
+
 	/*
 	 * See notes in contain_mutable_functions_walker about why we treat
 	 * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable, while
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -137,6 +137,13 @@ make_restrictinfo_internal(PlannerInfo *root,
 	else
 		restrictinfo->leakproof = false;	/* really, "don't know" */

+	/*
+	 * Mark volatility as unknown.  The contain_volatile_functions function
+	 * will determine if there are any volatile functions when called for the
+	 * first time with this RestrictInfo.
+	 */
+	restrictinfo->has_volatile = VOLATILITY_UNKNOWN;
+
 	/*
 	 * If it's a binary opclause, set up left/right relids info. In any case
 	 * set up the total clause relids info.
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -623,6 +623,13 @@ make_pathtarget_from_tlist(List *tlist)
 		i++;
 	}

+	/*
+	 * Mark volatility as unknown.  The contain_volatile_functions function
+	 * will determine if there are any volatile functions when called for the
+	 * first time with this PathTarget.
+	 */
+	target->has_volatile_expr = VOLATILITY_UNKNOWN;
+
 	return target;
 }

@@ -724,6 +731,16 @@ add_column_to_pathtarget(PathTarget *target, Expr *expr, Index sortgroupref)
 		target->sortgrouprefs = (Index *) palloc0(nexprs * sizeof(Index));
 		target->sortgrouprefs[nexprs - 1] = sortgroupref;
 	}
+
+	/*
+	 * Reset has_volatile_expr to UNKNOWN.  We just leave it up to
+	 * contain_volatile_functions to set this properly again.  Technically we
+	 * could save some effort here and just check the new Expr, but it seems
+	 * better to keep the logic for setting this flag in one location rather
+	 * than duplicating the logic here.
+	 */
+	if (target->has_volatile_expr == VOLATILITY_NOVOLATILE)
+		target->has_volatile_expr = VOLATILITY_UNKNOWN;
 }

 /*