1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-30 06:01:21 +03:00

Support XMLTABLE query expression

XMLTABLE is defined by the SQL/XML standard as a feature that allows
turning XML-formatted data into relational form, so that it can be used
as a <table primary> in the FROM clause of a query.

This new construct provides significant simplicity and performance
benefit for XML data processing; what in a client-side custom
implementation was reported to take 20 minutes can be executed in 400ms
using XMLTABLE.  (The same functionality was said to take 10 seconds
using nested PostgreSQL XPath function calls, and 5 seconds using
XMLReader under PL/Python).

The implemented syntax deviates slightly from what the standard
requires.  First, the standard indicates that the PASSING clause is
optional and that multiple XML input documents may be given to it; we
make it mandatory and accept a single document only.  Second, we don't
currently support a default namespace to be specified.

This implementation relies on a new executor node based on a hardcoded
method table.  (Because the grammar is fixed, there is no extensibility
in the current approach; further constructs can be implemented on top of
this such as JSON_TABLE, but they require changes to core code.)

Author: Pavel Stehule, Álvaro Herrera
Extensively reviewed by: Craig Ringer
Discussion: https://postgr.es/m/CAFj8pRAgfzMD-LoSmnMGybD0WsEznLHWap8DO79+-GTRAPR4qA@mail.gmail.com
This commit is contained in:
Alvaro Herrera
2017-03-08 12:39:37 -03:00
parent 270d7dd8a5
commit fcec6caafa
52 changed files with 4606 additions and 50 deletions

View File

@@ -106,6 +106,8 @@ static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -365,6 +367,9 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
case RTE_FUNCTION:
set_function_size_estimates(root, rel);
break;
case RTE_TABLEFUNC:
set_tablefunc_size_estimates(root, rel);
break;
case RTE_VALUES:
set_values_size_estimates(root, rel);
break;
@@ -437,6 +442,10 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* RangeFunction */
set_function_pathlist(root, rel, rte);
break;
case RTE_TABLEFUNC:
/* Table Function */
set_tablefunc_pathlist(root, rel, rte);
break;
case RTE_VALUES:
/* Values list */
set_values_pathlist(root, rel, rte);
@@ -599,6 +608,10 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
return;
break;
case RTE_TABLEFUNC:
/* not parallel safe */
return;
case RTE_VALUES:
/* Check for parallel-restricted functions. */
if (!is_parallel_safe(root, (Node *) rte->values_lists))
@@ -1932,6 +1945,27 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
add_path(rel, create_valuesscan_path(root, rel, required_outer));
}
/*
* set_tablefunc_pathlist
* Build the (single) access path for a table func RTE
*/
static void
set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
/*
* We don't support pushing join clauses into the quals of a tablefunc
* scan, but it could still have required parameterization due to LATERAL
* refs in the function expression.
*/
required_outer = rel->lateral_relids;
/* Generate appropriate path */
add_path(rel, create_tablefuncscan_path(root, rel,
required_outer));
}
/*
* set_cte_pathlist
* Build the (single) access path for a non-self-reference CTE RTE
@@ -3032,6 +3066,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
case T_FunctionScan:
ptype = "FunctionScan";
break;
case T_TableFuncScan:
ptype = "TableFuncScan";
break;
case T_ValuesScan:
ptype = "ValuesScan";
break;

View File

@@ -1277,6 +1277,62 @@ cost_functionscan(Path *path, PlannerInfo *root,
path->total_cost = startup_cost + run_cost;
}
/*
* cost_tablefuncscan
* Determines and returns the cost of scanning a table function.
*
* 'baserel' is the relation to be scanned
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
cost_tablefuncscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
QualCost qpqual_cost;
Cost cpu_per_tuple;
RangeTblEntry *rte;
QualCost exprcost;
/* Should only be applied to base relations that are functions */
Assert(baserel->relid > 0);
rte = planner_rt_fetch(baserel->relid, root);
Assert(rte->rtekind == RTE_TABLEFUNC);
/* Mark the path with the correct row estimate */
if (param_info)
path->rows = param_info->ppi_rows;
else
path->rows = baserel->rows;
/*
* Estimate costs of executing the table func expression(s).
*
* XXX in principle we ought to charge tuplestore spill costs if the
* number of rows is large. However, given how phony our rowcount
* estimates for tablefuncs tend to be, there's not a lot of point in that
* refinement right now.
*/
cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root);
startup_cost += exprcost.startup + exprcost.per_tuple;
/* Add scanning CPU costs */
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;
run_cost += path->pathtarget->cost.per_tuple * path->rows;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
}
/*
* cost_valuesscan
* Determines and returns the cost of scanning a VALUES RTE.
@@ -4421,6 +4477,31 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
set_baserel_size_estimates(root, rel);
}
/*
* set_function_size_estimates
* Set the size estimates for a base relation that is a function call.
*
* The rel's targetlist and restrictinfo list must have been constructed
* already.
*
* We set the same fields as set_tablefunc_size_estimates.
*/
void
set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel)
{
RangeTblEntry *rte;
/* Should only be applied to base relations that are functions */
Assert(rel->relid > 0);
rte = planner_rt_fetch(rel->relid, root);
Assert(rte->rtekind == RTE_TABLEFUNC);
rel->tuples = 100;
/* Now estimate number of output rows, etc */
set_baserel_size_estimates(root, rel);
}
/*
* set_values_size_estimates
* Set the size estimates for a base relation that is a values list.

View File

@@ -134,6 +134,8 @@ static FunctionScan *create_functionscan_plan(PlannerInfo *root, Path *best_path
List *tlist, List *scan_clauses);
static ValuesScan *create_valuesscan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
static TableFuncScan *create_tablefuncscan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
static CteScan *create_ctescan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
static WorkTableScan *create_worktablescan_plan(PlannerInfo *root, Path *best_path,
@@ -190,6 +192,8 @@ static FunctionScan *make_functionscan(List *qptlist, List *qpqual,
Index scanrelid, List *functions, bool funcordinality);
static ValuesScan *make_valuesscan(List *qptlist, List *qpqual,
Index scanrelid, List *values_lists);
static TableFuncScan *make_tablefuncscan(List *qptlist, List *qpqual,
Index scanrelid, TableFunc *tablefunc);
static CteScan *make_ctescan(List *qptlist, List *qpqual,
Index scanrelid, int ctePlanId, int cteParam);
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
@@ -355,6 +359,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
case T_TidScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
@@ -635,6 +640,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
scan_clauses);
break;
case T_TableFuncScan:
plan = (Plan *) create_tablefuncscan_plan(root,
best_path,
tlist,
scan_clauses);
break;
case T_ValuesScan:
plan = (Plan *) create_valuesscan_plan(root,
best_path,
@@ -749,11 +761,12 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
/*
* We can do this for real relation scans, subquery scans, function scans,
* values scans, and CTE scans (but not for, eg, joins).
* tablefunc scans, values scans, and CTE scans (but not for, eg, joins).
*/
if (rel->rtekind != RTE_RELATION &&
rel->rtekind != RTE_SUBQUERY &&
rel->rtekind != RTE_FUNCTION &&
rel->rtekind != RTE_TABLEFUNC &&
rel->rtekind != RTE_VALUES &&
rel->rtekind != RTE_CTE)
return false;
@@ -3014,6 +3027,49 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path,
return scan_plan;
}
/*
* create_tablefuncscan_plan
* Returns a tablefuncscan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
*/
static TableFuncScan *
create_tablefuncscan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses)
{
TableFuncScan *scan_plan;
Index scan_relid = best_path->parent->relid;
RangeTblEntry *rte;
TableFunc *tablefunc;
/* it should be a function base rel... */
Assert(scan_relid > 0);
rte = planner_rt_fetch(scan_relid, root);
Assert(rte->rtekind == RTE_TABLEFUNC);
tablefunc = rte->tablefunc;
/* Sort clauses into best execution order */
scan_clauses = order_qual_clauses(root, scan_clauses);
/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
scan_clauses = extract_actual_clauses(scan_clauses, false);
/* Replace any outer-relation variables with nestloop params */
if (best_path->param_info)
{
scan_clauses = (List *)
replace_nestloop_params(root, (Node *) scan_clauses);
/* The function expressions could contain nestloop params, too */
tablefunc = (TableFunc *) replace_nestloop_params(root, (Node *) tablefunc);
}
scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid,
tablefunc);
copy_generic_path_info(&scan_plan->scan.plan, best_path);
return scan_plan;
}
/*
* create_valuesscan_plan
* Returns a valuesscan plan for the base relation scanned by 'best_path'
@@ -4909,6 +4965,25 @@ make_functionscan(List *qptlist,
return node;
}
static TableFuncScan *
make_tablefuncscan(List *qptlist,
List *qpqual,
Index scanrelid,
TableFunc *tablefunc)
{
TableFuncScan *node = makeNode(TableFuncScan);
Plan *plan = &node->scan.plan;
plan->targetlist = qptlist;
plan->qual = qpqual;
plan->lefttree = NULL;
plan->righttree = NULL;
node->scan.scanrelid = scanrelid;
node->tablefunc = tablefunc;
return node;
}
static ValuesScan *
make_valuesscan(List *qptlist,
List *qpqual,

View File

@@ -335,6 +335,8 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
vars = pull_vars_of_level((Node *) rte->subquery, 1);
else if (rte->rtekind == RTE_FUNCTION)
vars = pull_vars_of_level((Node *) rte->functions, 0);
else if (rte->rtekind == RTE_TABLEFUNC)
vars = pull_vars_of_level((Node *) rte->tablefunc, 0);
else if (rte->rtekind == RTE_VALUES)
vars = pull_vars_of_level((Node *) rte->values_lists, 0);
else

View File

@@ -69,17 +69,19 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
/* Expression kind codes for preprocess_expression */
#define EXPRKIND_QUAL 0
#define EXPRKIND_TARGET 1
#define EXPRKIND_RTFUNC 2
#define EXPRKIND_RTFUNC_LATERAL 3
#define EXPRKIND_VALUES 4
#define EXPRKIND_VALUES_LATERAL 5
#define EXPRKIND_LIMIT 6
#define EXPRKIND_APPINFO 7
#define EXPRKIND_PHV 8
#define EXPRKIND_TABLESAMPLE 9
#define EXPRKIND_ARBITER_ELEM 10
#define EXPRKIND_QUAL 0
#define EXPRKIND_TARGET 1
#define EXPRKIND_RTFUNC 2
#define EXPRKIND_RTFUNC_LATERAL 3
#define EXPRKIND_VALUES 4
#define EXPRKIND_VALUES_LATERAL 5
#define EXPRKIND_LIMIT 6
#define EXPRKIND_APPINFO 7
#define EXPRKIND_PHV 8
#define EXPRKIND_TABLESAMPLE 9
#define EXPRKIND_ARBITER_ELEM 10
#define EXPRKIND_TABLEFUNC 11
#define EXPRKIND_TABLEFUNC_LATERAL 12
/* Passthrough data for standard_qp_callback */
typedef struct
@@ -685,7 +687,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
{
/* Preprocess the function expression(s) fully */
kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
rte->functions = (List *) preprocess_expression(root, (Node *) rte->functions, kind);
rte->functions = (List *)
preprocess_expression(root, (Node *) rte->functions, kind);
}
else if (rte->rtekind == RTE_TABLEFUNC)
{
/* Preprocess the function expression(s) fully */
kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
rte->tablefunc = (TableFunc *)
preprocess_expression(root, (Node *) rte->tablefunc, kind);
}
else if (rte->rtekind == RTE_VALUES)
{
@@ -844,7 +854,8 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
if (root->hasJoinRTEs &&
!(kind == EXPRKIND_RTFUNC ||
kind == EXPRKIND_VALUES ||
kind == EXPRKIND_TABLESAMPLE))
kind == EXPRKIND_TABLESAMPLE ||
kind == EXPRKIND_TABLEFUNC))
expr = flatten_join_alias_vars(root, expr);
/*

View File

@@ -393,6 +393,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte)
newrte->subquery = NULL;
newrte->joinaliasvars = NIL;
newrte->functions = NIL;
newrte->tablefunc = NULL;
newrte->values_lists = NIL;
newrte->coltypes = NIL;
newrte->coltypmods = NIL;
@@ -553,6 +554,19 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
fix_scan_list(root, splan->functions, rtoffset);
}
break;
case T_TableFuncScan:
{
TableFuncScan *splan = (TableFuncScan *) plan;
splan->scan.scanrelid += rtoffset;
splan->scan.plan.targetlist =
fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
splan->scan.plan.qual =
fix_scan_list(root, splan->scan.plan.qual, rtoffset);
splan->tablefunc = (TableFunc *)
fix_scan_expr(root, (Node *) splan->tablefunc, rtoffset);
}
break;
case T_ValuesScan:
{
ValuesScan *splan = (ValuesScan *) plan;

View File

@@ -2421,6 +2421,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
}
break;
case T_TableFuncScan:
finalize_primnode((Node *) ((TableFuncScan *) plan)->tablefunc,
&context);
context.paramids = bms_add_members(context.paramids, scan_params);
break;
case T_ValuesScan:
finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
&context);

View File

@@ -1118,6 +1118,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
case RTE_SUBQUERY:
case RTE_FUNCTION:
case RTE_VALUES:
case RTE_TABLEFUNC:
child_rte->lateral = true;
break;
case RTE_JOIN:
@@ -1964,6 +1965,11 @@ replace_vars_in_jointree(Node *jtnode,
pullup_replace_vars((Node *) rte->functions,
context);
break;
case RTE_TABLEFUNC:
rte->tablefunc = (TableFunc *)
pullup_replace_vars((Node *) rte->tablefunc,
context);
break;
case RTE_VALUES:
rte->values_lists = (List *)
pullup_replace_vars((Node *) rte->values_lists,

View File

@@ -1750,6 +1750,32 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
return pathnode;
}
/*
* create_tablefuncscan_path
* Creates a path corresponding to a sequential scan of a table function,
* returning the pathnode.
*/
Path *
create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_TableFuncScan;
pathnode->parent = rel;
pathnode->pathtarget = rel->reltarget;
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->parallel_aware = false;
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* result is always unordered */
cost_tablefuncscan(pathnode, root, rel, pathnode->param_info);
return pathnode;
}
/*
* create_valuesscan_path
* Creates a path corresponding to a scan of a VALUES list,

View File

@@ -1381,8 +1381,9 @@ relation_excluded_by_constraints(PlannerInfo *root,
* dropped cols.
*
* We also support building a "physical" tlist for subqueries, functions,
* values lists, and CTEs, since the same optimization can occur in
* SubqueryScan, FunctionScan, ValuesScan, CteScan, and WorkTableScan nodes.
* values lists, table expressions and CTEs, since the same optimization can
* occur in SubqueryScan, FunctionScan, ValuesScan, CteScan, TableFunc
* and WorkTableScan nodes.
*/
List *
build_physical_tlist(PlannerInfo *root, RelOptInfo *rel)
@@ -1454,6 +1455,7 @@ build_physical_tlist(PlannerInfo *root, RelOptInfo *rel)
break;
case RTE_FUNCTION:
case RTE_TABLEFUNC:
case RTE_VALUES:
case RTE_CTE:
/* Not all of these can have dropped cols, but share code anyway */

View File

@@ -150,12 +150,13 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
break;
case RTE_SUBQUERY:
case RTE_FUNCTION:
case RTE_TABLEFUNC:
case RTE_VALUES:
case RTE_CTE:
/*
* Subquery, function, or values list --- set up attr range and
* arrays
* Subquery, function, tablefunc, or values list --- set up attr
* range and arrays
*
* Note: 0 is included in range to support whole-row Vars
*/