1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-15 19:21:59 +03:00

Support XMLTABLE query expression

XMLTABLE is defined by the SQL/XML standard as a feature that allows
turning XML-formatted data into relational form, so that it can be used
as a <table primary> in the FROM clause of a query.

This new construct provides significant simplicity and performance
benefit for XML data processing; what in a client-side custom
implementation was reported to take 20 minutes can be executed in 400ms
using XMLTABLE.  (The same functionality was said to take 10 seconds
using nested PostgreSQL XPath function calls, and 5 seconds using
XMLReader under PL/Python).

The implemented syntax deviates slightly from what the standard
requires.  First, the standard indicates that the PASSING clause is
optional and that multiple XML input documents may be given to it; we
make it mandatory and accept a single document only.  Second, we don't
currently support a default namespace to be specified.

This implementation relies on a new executor node based on a hardcoded
method table.  (Because the grammar is fixed, there is no extensibility
in the current approach; further constructs can be implemented on top of
this such as JSON_TABLE, but they require changes to core code.)

Author: Pavel Stehule, Álvaro Herrera
Extensively reviewed by: Craig Ringer
Discussion: https://postgr.es/m/CAFj8pRAgfzMD-LoSmnMGybD0WsEznLHWap8DO79+-GTRAPR4qA@mail.gmail.com
This commit is contained in:
Alvaro Herrera
2017-03-08 12:39:37 -03:00
parent 270d7dd8a5
commit fcec6caafa
52 changed files with 4606 additions and 50 deletions

View File

@ -106,6 +106,8 @@ static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
@ -365,6 +367,9 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
case RTE_FUNCTION:
set_function_size_estimates(root, rel);
break;
case RTE_TABLEFUNC:
set_tablefunc_size_estimates(root, rel);
break;
case RTE_VALUES:
set_values_size_estimates(root, rel);
break;
@ -437,6 +442,10 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* RangeFunction */
set_function_pathlist(root, rel, rte);
break;
case RTE_TABLEFUNC:
/* Table Function */
set_tablefunc_pathlist(root, rel, rte);
break;
case RTE_VALUES:
/* Values list */
set_values_pathlist(root, rel, rte);
@ -599,6 +608,10 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
return;
break;
case RTE_TABLEFUNC:
/* not parallel safe */
return;
case RTE_VALUES:
/* Check for parallel-restricted functions. */
if (!is_parallel_safe(root, (Node *) rte->values_lists))
@ -1932,6 +1945,27 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
add_path(rel, create_valuesscan_path(root, rel, required_outer));
}
/*
* set_tablefunc_pathlist
* Build the (single) access path for a table func RTE
*/
static void
set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
/*
* We don't support pushing join clauses into the quals of a tablefunc
* scan, but it could still have required parameterization due to LATERAL
* refs in the function expression.
*/
required_outer = rel->lateral_relids;
/* Generate appropriate path */
add_path(rel, create_tablefuncscan_path(root, rel,
required_outer));
}
/*
* set_cte_pathlist
* Build the (single) access path for a non-self-reference CTE RTE
@ -3032,6 +3066,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
case T_FunctionScan:
ptype = "FunctionScan";
break;
case T_TableFuncScan:
ptype = "TableFuncScan";
break;
case T_ValuesScan:
ptype = "ValuesScan";
break;

View File

@ -1277,6 +1277,62 @@ cost_functionscan(Path *path, PlannerInfo *root,
path->total_cost = startup_cost + run_cost;
}
/*
* cost_tablefuncscan
* Determines and returns the cost of scanning a table function.
*
* 'baserel' is the relation to be scanned
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
cost_tablefuncscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
QualCost qpqual_cost;
Cost cpu_per_tuple;
RangeTblEntry *rte;
QualCost exprcost;
/* Should only be applied to base relations that are functions */
Assert(baserel->relid > 0);
rte = planner_rt_fetch(baserel->relid, root);
Assert(rte->rtekind == RTE_TABLEFUNC);
/* Mark the path with the correct row estimate */
if (param_info)
path->rows = param_info->ppi_rows;
else
path->rows = baserel->rows;
/*
* Estimate costs of executing the table func expression(s).
*
* XXX in principle we ought to charge tuplestore spill costs if the
* number of rows is large. However, given how phony our rowcount
* estimates for tablefuncs tend to be, there's not a lot of point in that
* refinement right now.
*/
cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root);
startup_cost += exprcost.startup + exprcost.per_tuple;
/* Add scanning CPU costs */
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;
run_cost += path->pathtarget->cost.per_tuple * path->rows;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
}
/*
* cost_valuesscan
* Determines and returns the cost of scanning a VALUES RTE.
@ -4421,6 +4477,31 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
set_baserel_size_estimates(root, rel);
}
/*
* set_function_size_estimates
* Set the size estimates for a base relation that is a function call.
*
* The rel's targetlist and restrictinfo list must have been constructed
* already.
*
* We set the same fields as set_tablefunc_size_estimates.
*/
void
set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel)
{
RangeTblEntry *rte;
/* Should only be applied to base relations that are functions */
Assert(rel->relid > 0);
rte = planner_rt_fetch(rel->relid, root);
Assert(rte->rtekind == RTE_TABLEFUNC);
rel->tuples = 100;
/* Now estimate number of output rows, etc */
set_baserel_size_estimates(root, rel);
}
/*
* set_values_size_estimates
* Set the size estimates for a base relation that is a values list.