Support XMLTABLE query expression

XMLTABLE is defined by the SQL/XML standard as a feature that allows turning XML-formatted data into relational form, so that it can be used as a <table primary> in the FROM clause of a query. This new construct provides significant simplicity and performance benefit for XML data processing; what in a client-side custom implementation was reported to take 20 minutes can be executed in 400ms using XMLTABLE. (The same functionality was said to take 10 seconds using nested PostgreSQL XPath function calls, and 5 seconds using XMLReader under PL/Python). The implemented syntax deviates slightly from what the standard requires. First, the standard indicates that the PASSING clause is optional and that multiple XML input documents may be given to it; we make it mandatory and accept a single document only. Second, we don't currently support a default namespace to be specified. This implementation relies on a new executor node based on a hardcoded method table. (Because the grammar is fixed, there is no extensibility in the current approach; further constructs can be implemented on top of this such as JSON_TABLE, but they require changes to core code.) Author: Pavel Stehule, Álvaro Herrera Extensively reviewed by: Craig Ringer Discussion: https://postgr.es/m/CAFj8pRAgfzMD-LoSmnMGybD0WsEznLHWap8DO79+-GTRAPR4qA@mail.gmail.com
2025-10-28 11:55:03 +03:00 · 2017-03-08 12:39:37 -03:00
parent 270d7dd8a5
commit fcec6caafa
52 changed files with 4606 additions and 50 deletions
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1277,6 +1277,62 @@ cost_functionscan(Path *path, PlannerInfo *root,
 	path->total_cost = startup_cost + run_cost;
 }

+/*
+ * cost_tablefuncscan
+ *	  Determines and returns the cost of scanning a table function.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tablefuncscan(Path *path, PlannerInfo *root,
+				   RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	QualCost	qpqual_cost;
+	Cost		cpu_per_tuple;
+	RangeTblEntry *rte;
+	QualCost	exprcost;
+
+	/* Should only be applied to base relations that are functions */
+	Assert(baserel->relid > 0);
+	rte = planner_rt_fetch(baserel->relid, root);
+	Assert(rte->rtekind == RTE_TABLEFUNC);
+
+	/* Mark the path with the correct row estimate */
+	if (param_info)
+		path->rows = param_info->ppi_rows;
+	else
+		path->rows = baserel->rows;
+
+	/*
+	 * Estimate costs of executing the table func expression(s).
+	 *
+	 * XXX in principle we ought to charge tuplestore spill costs if the
+	 * number of rows is large.  However, given how phony our rowcount
+	 * estimates for tablefuncs tend to be, there's not a lot of point in that
+	 * refinement right now.
+	 */
+	cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root);
+
+	startup_cost += exprcost.startup + exprcost.per_tuple;
+
+	/* Add scanning CPU costs */
+	get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+	startup_cost += qpqual_cost.startup;
+	cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+	run_cost += cpu_per_tuple * baserel->tuples;
+
+	/* tlist eval costs are paid per output row, not per tuple scanned */
+	startup_cost += path->pathtarget->cost.startup;
+	run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
+}
+
 /*
 * cost_valuesscan
 *	  Determines and returns the cost of scanning a VALUES RTE.
@@ -4421,6 +4477,31 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 	set_baserel_size_estimates(root, rel);
 }

+/*
+ * set_function_size_estimates
+ *		Set the size estimates for a base relation that is a function call.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ *
+ * We set the same fields as set_tablefunc_size_estimates.
+ */
+void
+set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+	RangeTblEntry *rte;
+
+	/* Should only be applied to base relations that are functions */
+	Assert(rel->relid > 0);
+	rte = planner_rt_fetch(rel->relid, root);
+	Assert(rte->rtekind == RTE_TABLEFUNC);
+
+	rel->tuples = 100;
+
+	/* Now estimate number of output rows, etc */
+	set_baserel_size_estimates(root, rel);
+}
+
 /*
 * set_values_size_estimates
 *		Set the size estimates for a base relation that is a values list.