1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Extend CTE patch to support recursive UNION (ie, without ALL). The

implementation uses an in-memory hash table, so it will poop out for very
large recursive results ... but the performance characteristics of a
sort-based implementation would be pretty unpleasant too.
This commit is contained in:
Tom Lane
2008-10-07 19:27:04 +00:00
parent 059349be0c
commit 0d115dde82
13 changed files with 345 additions and 74 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeRecursiveunion.c,v 1.1 2008/10/04 21:56:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/nodeRecursiveunion.c,v 1.2 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -17,6 +17,41 @@
#include "executor/execdebug.h"
#include "executor/nodeRecursiveunion.h"
#include "miscadmin.h"
#include "utils/memutils.h"
/*
* To implement UNION (without ALL), we need a hashtable that stores tuples
* already seen. The hash key is computed from the grouping columns.
*/
typedef struct RUHashEntryData *RUHashEntry;
typedef struct RUHashEntryData
{
TupleHashEntryData shared; /* common header for hash table entries */
} RUHashEntryData;
/*
* Initialize the hash table to empty.
*/
static void
build_hash_table(RecursiveUnionState *rustate)
{
RecursiveUnion *node = (RecursiveUnion *) rustate->ps.plan;
Assert(node->numCols > 0);
Assert(node->numGroups > 0);
rustate->hashtable = BuildTupleHashTable(node->numCols,
node->dupColIdx,
rustate->eqfunctions,
rustate->hashfunctions,
node->numGroups,
sizeof(RUHashEntryData),
rustate->tableContext,
rustate->tempContext);
}
/* ----------------------------------------------------------------
@ -44,49 +79,85 @@ ExecRecursiveUnion(RecursiveUnionState *node)
PlanState *innerPlan = innerPlanState(node);
RecursiveUnion *plan = (RecursiveUnion *) node->ps.plan;
TupleTableSlot *slot;
RUHashEntry entry;
bool isnew;
/* 1. Evaluate non-recursive term */
if (!node->recursing)
{
slot = ExecProcNode(outerPlan);
if (!TupIsNull(slot))
for (;;)
{
slot = ExecProcNode(outerPlan);
if (TupIsNull(slot))
break;
if (plan->numCols > 0)
{
/* Find or build hashtable entry for this tuple's group */
entry = (RUHashEntry)
LookupTupleHashEntry(node->hashtable, slot, &isnew);
/* Must reset temp context after each hashtable lookup */
MemoryContextReset(node->tempContext);
/* Ignore tuple if already seen */
if (!isnew)
continue;
}
/* Each non-duplicate tuple goes to the working table ... */
tuplestore_puttupleslot(node->working_table, slot);
/* ... and to the caller */
return slot;
}
node->recursing = true;
}
retry:
/* 2. Execute recursive term */
slot = ExecProcNode(innerPlan);
if (TupIsNull(slot))
for (;;)
{
if (node->intermediate_empty)
return NULL;
slot = ExecProcNode(innerPlan);
if (TupIsNull(slot))
{
/* Done if there's nothing in the intermediate table */
if (node->intermediate_empty)
break;
/* done with old working table ... */
tuplestore_end(node->working_table);
/* done with old working table ... */
tuplestore_end(node->working_table);
/* intermediate table becomes working table */
node->working_table = node->intermediate_table;
/* intermediate table becomes working table */
node->working_table = node->intermediate_table;
/* create new empty intermediate table */
node->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
node->intermediate_empty = true;
/* create new empty intermediate table */
node->intermediate_table = tuplestore_begin_heap(false, false,
work_mem);
node->intermediate_empty = true;
/* and reset the inner plan */
innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
plan->wtParam);
goto retry;
}
else
{
/* reset the recursive term */
innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
plan->wtParam);
/* and continue fetching from recursive term */
continue;
}
if (plan->numCols > 0)
{
/* Find or build hashtable entry for this tuple's group */
entry = (RUHashEntry)
LookupTupleHashEntry(node->hashtable, slot, &isnew);
/* Must reset temp context after each hashtable lookup */
MemoryContextReset(node->tempContext);
/* Ignore tuple if already seen */
if (!isnew)
continue;
}
/* Else, tuple is good; stash it in intermediate table ... */
node->intermediate_empty = false;
tuplestore_puttupleslot(node->intermediate_table, slot);
}
/* ... and return it */
return slot;
}
return slot;
return NULL;
}
/* ----------------------------------------------------------------
@ -109,12 +180,40 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
rustate->ps.plan = (Plan *) node;
rustate->ps.state = estate;
rustate->eqfunctions = NULL;
rustate->hashfunctions = NULL;
rustate->hashtable = NULL;
rustate->tempContext = NULL;
rustate->tableContext = NULL;
/* initialize processing state */
rustate->recursing = false;
rustate->intermediate_empty = true;
rustate->working_table = tuplestore_begin_heap(false, false, work_mem);
rustate->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
/*
* If hashing, we need a per-tuple memory context for comparisons, and a
* longer-lived context to store the hash table. The table can't just be
* kept in the per-query context because we want to be able to throw it
* away when rescanning.
*/
if (node->numCols > 0)
{
rustate->tempContext =
AllocSetContextCreate(CurrentMemoryContext,
"RecursiveUnion",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
rustate->tableContext =
AllocSetContextCreate(CurrentMemoryContext,
"RecursiveUnion hash table",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
}
/*
* Make the state structure available to descendant WorkTableScan nodes
* via the Param slot reserved for it.
@ -154,6 +253,19 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
outerPlanState(rustate) = ExecInitNode(outerPlan(node), estate, eflags);
innerPlanState(rustate) = ExecInitNode(innerPlan(node), estate, eflags);
/*
* If hashing, precompute fmgr lookup data for inner loop, and create
* the hash table.
*/
if (node->numCols > 0)
{
execTuplesHashPrepare(node->numCols,
node->dupOperators,
&rustate->eqfunctions,
&rustate->hashfunctions);
build_hash_table(rustate);
}
return rustate;
}
@ -178,6 +290,12 @@ ExecEndRecursiveUnion(RecursiveUnionState *node)
tuplestore_end(node->working_table);
tuplestore_end(node->intermediate_table);
/* free subsidiary stuff including hashtable */
if (node->tempContext)
MemoryContextDelete(node->tempContext);
if (node->tableContext)
MemoryContextDelete(node->tableContext);
/*
* clean out the upper tuple table
*/
@ -217,6 +335,14 @@ ExecRecursiveUnionReScan(RecursiveUnionState *node, ExprContext *exprCtxt)
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan, exprCtxt);
/* Release any hashtable storage */
if (node->tableContext)
MemoryContextResetAndDeleteChildren(node->tableContext);
/* And rebuild empty hashtable if needed */
if (plan->numCols > 0)
build_hash_table(node);
/* reset processing state */
node->recursing = false;
node->intermediate_empty = true;

View File

@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.407 2008/10/06 17:39:25 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.408 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -193,6 +193,13 @@ _copyRecursiveUnion(RecursiveUnion *from)
* copy remainder of node
*/
COPY_SCALAR_FIELD(wtParam);
COPY_SCALAR_FIELD(numCols);
if (from->numCols > 0)
{
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
return newnode;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.341 2008/10/06 17:39:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.342 2008/10/07 19:27:04 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@ -334,11 +334,24 @@ _outAppend(StringInfo str, Append *node)
static void
_outRecursiveUnion(StringInfo str, RecursiveUnion *node)
{
int i;
WRITE_NODE_TYPE("RECURSIVEUNION");
_outPlanInfo(str, (Plan *) node);
WRITE_INT_FIELD(wtParam);
WRITE_INT_FIELD(numCols);
appendStringInfo(str, " :dupColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->dupColIdx[i]);
appendStringInfo(str, " :dupOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_LONG_FIELD(numGroups);
}
static void

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.249 2008/10/04 21:56:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.250 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -2545,10 +2545,13 @@ RecursiveUnion *
make_recursive_union(List *tlist,
Plan *lefttree,
Plan *righttree,
int wtParam)
int wtParam,
List *distinctList,
long numGroups)
{
RecursiveUnion *node = makeNode(RecursiveUnion);
Plan *plan = &node->plan;
int numCols = list_length(distinctList);
cost_recursive_union(plan, lefttree, righttree);
@ -2558,6 +2561,37 @@ make_recursive_union(List *tlist,
plan->righttree = righttree;
node->wtParam = wtParam;
/*
* convert SortGroupClause list into arrays of attr indexes and equality
* operators, as wanted by executor
*/
node->numCols = numCols;
if (numCols > 0)
{
int keyno = 0;
AttrNumber *dupColIdx;
Oid *dupOperators;
ListCell *slitem;
dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
TargetEntry *tle = get_sortgroupclause_tle(sortcl,
plan->targetlist);
dupColIdx[keyno] = tle->resno;
dupOperators[keyno] = sortcl->eqop;
Assert(OidIsValid(dupOperators[keyno]));
keyno++;
}
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
}
node->numGroups = numGroups;
return node;
}

View File

@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.157 2008/10/06 17:39:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.158 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -318,10 +318,12 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root,
Plan *lplan;
Plan *rplan;
List *tlist;
List *groupList;
long numGroups;
/* Parser should have rejected other cases */
if (setOp->op != SETOP_UNION || !setOp->all)
elog(ERROR, "only UNION ALL queries can be recursive");
if (setOp->op != SETOP_UNION)
elog(ERROR, "only UNION queries can be recursive");
/* Worktable ID should be assigned */
Assert(root->wt_param_id >= 0);
@ -346,13 +348,46 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root,
list_make2(lplan, rplan),
refnames_tlist);
/*
* If UNION, identify the grouping operators
*/
if (setOp->all)
{
groupList = NIL;
numGroups = 0;
}
else
{
double dNumGroups;
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(setOp, tlist);
/* We only support hashing here */
if (!grouping_is_hashable(groupList))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement recursive UNION"),
errdetail("All column datatypes must be hashable.")));
/*
* For the moment, take the number of distinct groups as equal to
* the total input size, ie, the worst case.
*/
dNumGroups = lplan->plan_rows + rplan->plan_rows * 10;
/* Also convert to long int --- but 'ware overflow! */
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
}
/*
* And make the plan node.
*/
plan = (Plan *) make_recursive_union(tlist, lplan, rplan,
root->wt_param_id);
root->wt_param_id,
groupList, numGroups);
*sortClauses = NIL; /* result of UNION ALL is always unsorted */
*sortClauses = NIL; /* RecursiveUnion result is always unsorted */
return plan;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.2 2008/10/05 22:50:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.3 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -601,11 +601,11 @@ checkWellFormedRecursion(CteState *cstate)
if (!cte->cterecursive)
continue;
/* Must have top-level UNION ALL */
if (stmt->op != SETOP_UNION || !stmt->all)
/* Must have top-level UNION */
if (stmt->op != SETOP_UNION)
ereport(ERROR,
(errcode(ERRCODE_INVALID_RECURSION),
errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION ALL recursive-term",
errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term",
cte->ctename),
parser_errposition(cstate->pstate, cte->location)));
@ -628,7 +628,7 @@ checkWellFormedRecursion(CteState *cstate)
elog(ERROR, "missing recursive reference");
/*
* Disallow ORDER BY and similar decoration atop the UNION ALL.
* Disallow ORDER BY and similar decoration atop the UNION.
* These don't make sense because it's impossible to figure out what
* they mean when we have only part of the recursive query's results.
* (If we did allow them, we'd have to check for recursive references

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.189 2008/10/04 21:56:55 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.190 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -964,6 +964,12 @@ typedef struct RecursiveUnionState
bool intermediate_empty;
Tuplestorestate *working_table;
Tuplestorestate *intermediate_table;
/* Remaining fields are unused in UNION ALL case */
FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
MemoryContext tempContext; /* short-term context for comparisons */
TupleHashTable hashtable; /* hash table for tuples already seen */
MemoryContext tableContext; /* memory context containing hash table */
} RecursiveUnionState;
/* ----------------

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.104 2008/10/04 21:56:55 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.105 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -194,6 +194,12 @@ typedef struct RecursiveUnion
{
Plan plan;
int wtParam; /* ID of Param representing work table */
/* Remaining fields are zero/null in UNION ALL case */
int numCols; /* number of columns to check for
* duplicate-ness */
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
} RecursiveUnion;
/* ----------------

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.113 2008/10/04 21:56:55 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.114 2008/10/07 19:27:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -43,7 +43,8 @@ extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
Index scanrelid, Plan *subplan, List *subrtable);
extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
extern RecursiveUnion *make_recursive_union(List *tlist,
Plan *lefttree, Plan *righttree, int wtParam);
Plan *lefttree, Plan *righttree, int wtParam,
List *distinctList, long numGroups);
extern Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree,
List *pathkeys, double limit_tuples);
extern Sort *make_sort_from_sortclauses(PlannerInfo *root, List *sortcls,

View File

@ -49,6 +49,18 @@ SELECT * FROM t;
5
(5 rows)
-- This is an infinite loop with UNION ALL, but not with UNION
WITH RECURSIVE t(n) AS (
SELECT 1
UNION
SELECT 10-n FROM t)
SELECT * FROM t;
n
---
1
9
(2 rows)
-- This'd be an infinite loop, but outside query reads only as much as needed
WITH RECURSIVE t(n) AS (
VALUES (1)
@ -69,6 +81,26 @@ SELECT * FROM t LIMIT 10;
10
(10 rows)
-- UNION case should have same property
WITH RECURSIVE t(n) AS (
SELECT 1
UNION
SELECT n+1 FROM t)
SELECT * FROM t LIMIT 10;
n
----
1
2
3
4
5
6
7
8
9
10
(10 rows)
-- Test behavior with an unknown-type literal in the WITH
WITH q AS (SELECT 'foo' AS x)
SELECT x, x IS OF (unknown) as is_unknown FROM q;
@ -510,38 +542,32 @@ WITH RECURSIVE
--
-- error cases
--
-- UNION (should be supported someday)
WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
SELECT * FROM x;
ERROR: recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
^
-- INTERSECT
WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x)
SELECT * FROM x;
ERROR: recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
ERROR: recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x...
^
WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT ALL SELECT n+1 FROM x)
SELECT * FROM x;
ERROR: recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
ERROR: recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT ALL SELECT n+1 FR...
^
-- EXCEPT
WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT SELECT n+1 FROM x)
SELECT * FROM x;
ERROR: recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
ERROR: recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT SELECT n+1 FROM x)
^
WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT ALL SELECT n+1 FROM x)
SELECT * FROM x;
ERROR: recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
ERROR: recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT ALL SELECT n+1 FROM ...
^
-- no non-recursive term
WITH RECURSIVE x(n) AS (SELECT n FROM x)
SELECT * FROM x;
ERROR: recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
ERROR: recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
LINE 1: WITH RECURSIVE x(n) AS (SELECT n FROM x)
^
-- recursive term in the left hand side (strictly speaking, should allow this)

View File

@ -31,6 +31,13 @@ UNION ALL
)
SELECT * FROM t;
-- This is an infinite loop with UNION ALL, but not with UNION
WITH RECURSIVE t(n) AS (
SELECT 1
UNION
SELECT 10-n FROM t)
SELECT * FROM t;
-- This'd be an infinite loop, but outside query reads only as much as needed
WITH RECURSIVE t(n) AS (
VALUES (1)
@ -38,6 +45,13 @@ UNION ALL
SELECT n+1 FROM t)
SELECT * FROM t LIMIT 10;
-- UNION case should have same property
WITH RECURSIVE t(n) AS (
SELECT 1
UNION
SELECT n+1 FROM t)
SELECT * FROM t LIMIT 10;
-- Test behavior with an unknown-type literal in the WITH
WITH q AS (SELECT 'foo' AS x)
SELECT x, x IS OF (unknown) as is_unknown FROM q;
@ -265,10 +279,6 @@ WITH RECURSIVE
-- error cases
--
-- UNION (should be supported someday)
WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
SELECT * FROM x;
-- INTERSECT
WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x)
SELECT * FROM x;