Avoid using a cursor in plpgsql's RETURN QUERY statement.

plpgsql has always executed the query given in a RETURN QUERY command by opening it as a cursor and then fetching a few rows at a time, which it turns around and dumps into the function's result tuplestore. The point of this was to keep from blowing out memory with an oversized SPITupleTable result (note that while a tuplestore can spill tuples to disk, SPITupleTable cannot). However, it's rather inefficient, both because of extra data copying and because of executor entry/exit overhead. In recent versions, a new performance problem has emerged: use of a cursor prevents use of a parallel plan for the executed query. We can improve matters by skipping use of a cursor and having the executor push result tuples directly into the function's result tuplestore. However, a moderate amount of new infrastructure is needed to make that idea work: * We can use the existing tstoreReceiver.c DestReceiver code to funnel executor output to the tuplestore, but it has to be extended to support plpgsql's requirement for possibly applying a tuple conversion map. * SPI needs to be extended to allow use of a caller-supplied DestReceiver instead of its usual receiver that puts tuples into a SPITupleTable. Two new API calls are needed to handle both the RETURN QUERY and RETURN QUERY EXECUTE cases. I also felt that I didn't want these new API calls to use the legacy method of specifying query parameter values with "char" null flags (the old ' '/'n' convention); rather they should accept ParamListInfo objects containing the parameter type and value info. This required a bit of additional new infrastructure since we didn't yet have any parse analysis callback that would interpret $N parameter symbols according to type data supplied in a ParamListInfo. There seems to be no harm in letting makeParamList install that callback by default, rather than leaving a new ParamListInfo's parserSetup hook as NULL. (Indeed, as of HEAD, I couldn't find anyplace that was using the parserSetup field at all; plpgsql was using parserSetupArg for its own purposes, but parserSetup seemed to be write-only.) We can actually get plpgsql out of the business of using legacy null flags altogether, and using ParamListInfo instead of its ad-hoc PreparedParamsData structure; but this requires inventing one more SPI API call that can replace SPI_cursor_open_with_args. That seems worth doing, though. SPI_execute_with_args and SPI_cursor_open_with_args are now unused anywhere in the core PG distribution. Perhaps someday we could deprecate/remove them. But cleaning up the crufty bits of the SPI API is a task for a different patch. Per bug #16040 from Jeremy Smith. This is unfortunately too invasive to consider back-patching. Patch by me; thanks to Hamid Akhtar for review. Discussion: https://postgr.es/m/16040-eaacad11fecfb198@postgresql.org
2025-12-21 05:21:08 +03:00 · 2020-06-12 12:14:32 -04:00
parent aaf8c99050
commit 2f48ede080
9 changed files with 800 additions and 151 deletions
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -60,7 +60,8 @@ static void _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan);

 static int	_SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
 							  Snapshot snapshot, Snapshot crosscheck_snapshot,
-							  bool read_only, bool fire_triggers, uint64 tcount);
+							  bool read_only, bool fire_triggers, uint64 tcount,
+							  DestReceiver *caller_dest);

 static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes,
 										 Datum *Values, const char *Nulls);
@@ -513,7 +514,7 @@ SPI_execute(const char *src, bool read_only, long tcount)

 	res = _SPI_execute_plan(&plan, NULL,
 							InvalidSnapshot, InvalidSnapshot,
-							read_only, true, tcount);
+							read_only, true, tcount, NULL);

 	_SPI_end_call(true);
 	return res;
@@ -547,7 +548,7 @@ SPI_execute_plan(SPIPlanPtr plan, Datum *Values, const char *Nulls,
 							_SPI_convert_params(plan->nargs, plan->argtypes,
 												Values, Nulls),
 							InvalidSnapshot, InvalidSnapshot,
-							read_only, true, tcount);
+							read_only, true, tcount, NULL);

 	_SPI_end_call(true);
 	return res;
@@ -576,7 +577,36 @@ SPI_execute_plan_with_paramlist(SPIPlanPtr plan, ParamListInfo params,

 	res = _SPI_execute_plan(plan, params,
 							InvalidSnapshot, InvalidSnapshot,
-							read_only, true, tcount);
+							read_only, true, tcount, NULL);
+
+	_SPI_end_call(true);
+	return res;
+}
+
+/*
+ * Execute a previously prepared plan.  If dest isn't NULL, we send result
+ * tuples to the caller-supplied DestReceiver rather than through the usual
+ * SPI output arrangements.  If dest is NULL this is equivalent to
+ * SPI_execute_plan_with_paramlist.
+ */
+int
+SPI_execute_plan_with_receiver(SPIPlanPtr plan,
+							   ParamListInfo params,
+							   bool read_only, long tcount,
+							   DestReceiver *dest)
+{
+	int			res;
+
+	if (plan == NULL || plan->magic != _SPI_PLAN_MAGIC || tcount < 0)
+		return SPI_ERROR_ARGUMENT;
+
+	res = _SPI_begin_call(true);
+	if (res < 0)
+		return res;
+
+	res = _SPI_execute_plan(plan, params,
+							InvalidSnapshot, InvalidSnapshot,
+							read_only, true, tcount, dest);

 	_SPI_end_call(true);
 	return res;
@@ -617,7 +647,7 @@ SPI_execute_snapshot(SPIPlanPtr plan,
 							_SPI_convert_params(plan->nargs, plan->argtypes,
 												Values, Nulls),
 							snapshot, crosscheck_snapshot,
-							read_only, fire_triggers, tcount);
+							read_only, fire_triggers, tcount, NULL);

 	_SPI_end_call(true);
 	return res;
@@ -664,7 +694,50 @@ SPI_execute_with_args(const char *src,

 	res = _SPI_execute_plan(&plan, paramLI,
 							InvalidSnapshot, InvalidSnapshot,
-							read_only, true, tcount);
+							read_only, true, tcount, NULL);
+
+	_SPI_end_call(true);
+	return res;
+}
+
+/*
+ * SPI_execute_with_receiver -- plan and execute a query with arguments
+ *
+ * This is the same as SPI_execute_with_args except that parameters are
+ * supplied through a ParamListInfo, and (if dest isn't NULL) we send
+ * result tuples to the caller-supplied DestReceiver rather than through
+ * the usual SPI output arrangements.
+ */
+int
+SPI_execute_with_receiver(const char *src,
+						  ParamListInfo params,
+						  bool read_only, long tcount,
+						  DestReceiver *dest)
+{
+	int			res;
+	_SPI_plan	plan;
+
+	if (src == NULL || tcount < 0)
+		return SPI_ERROR_ARGUMENT;
+
+	res = _SPI_begin_call(true);
+	if (res < 0)
+		return res;
+
+	memset(&plan, 0, sizeof(_SPI_plan));
+	plan.magic = _SPI_PLAN_MAGIC;
+	plan.cursor_options = CURSOR_OPT_PARALLEL_OK;
+	if (params)
+	{
+		plan.parserSetup = params->parserSetup;
+		plan.parserSetupArg = params->parserSetupArg;
+	}
+
+	_SPI_prepare_oneshot_plan(src, &plan);
+
+	res = _SPI_execute_plan(&plan, params,
+							InvalidSnapshot, InvalidSnapshot,
+							read_only, true, tcount, dest);

 	_SPI_end_call(true);
 	return res;
@@ -1303,6 +1376,49 @@ SPI_cursor_open_with_paramlist(const char *name, SPIPlanPtr plan,
 	return SPI_cursor_open_internal(name, plan, params, read_only);
 }

+/*
+ * SPI_cursor_parse_open_with_paramlist()
+ *
+ * Same as SPI_cursor_open_with_args except that parameters (if any) are passed
+ * as a ParamListInfo, which supports dynamic parameter set determination
+ */
+Portal
+SPI_cursor_parse_open_with_paramlist(const char *name,
+									 const char *src,
+									 ParamListInfo params,
+									 bool read_only, int cursorOptions)
+{
+	Portal		result;
+	_SPI_plan	plan;
+
+	if (src == NULL)
+		elog(ERROR, "SPI_cursor_parse_open_with_paramlist called with invalid arguments");
+
+	SPI_result = _SPI_begin_call(true);
+	if (SPI_result < 0)
+		elog(ERROR, "SPI_cursor_parse_open_with_paramlist called while not connected");
+
+	memset(&plan, 0, sizeof(_SPI_plan));
+	plan.magic = _SPI_PLAN_MAGIC;
+	plan.cursor_options = cursorOptions;
+	if (params)
+	{
+		plan.parserSetup = params->parserSetup;
+		plan.parserSetupArg = params->parserSetupArg;
+	}
+
+	_SPI_prepare_plan(src, &plan);
+
+	/* We needn't copy the plan; SPI_cursor_open_internal will do so */
+
+	result = SPI_cursor_open_internal(name, &plan, params, read_only);
+
+	/* And clean up */
+	_SPI_end_call(true);
+
+	return result;
+}
+

 /*
 * SPI_cursor_open_internal()
@@ -2090,11 +2206,13 @@ _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan)
 * fire_triggers: true to fire AFTER triggers at end of query (normal case);
 *		false means any AFTER triggers are postponed to end of outer query
 * tcount: execution tuple-count limit, or 0 for none
+ * caller_dest: DestReceiver to receive output, or NULL for normal SPI output
 */
 static int
 _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
 				  Snapshot snapshot, Snapshot crosscheck_snapshot,
-				  bool read_only, bool fire_triggers, uint64 tcount)
+				  bool read_only, bool fire_triggers, uint64 tcount,
+				  DestReceiver *caller_dest)
 {
 	int			my_res = 0;
 	uint64		my_processed = 0;
@@ -2228,6 +2346,12 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
 			bool		canSetTag = stmt->canSetTag;
 			DestReceiver *dest;

+			/*
+			 * Reset output state.  (Note that if a non-SPI receiver is used,
+			 * _SPI_current->processed will stay zero, and that's what we'll
+			 * report to the caller.  It's the receiver's job to count tuples
+			 * in that case.)
+			 */
 			_SPI_current->processed = 0;
 			_SPI_current->tuptable = NULL;

@@ -2267,7 +2391,16 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
 				UpdateActiveSnapshotCommandId();
 			}

-			dest = CreateDestReceiver(canSetTag ? DestSPI : DestNone);
+			/*
+			 * Select appropriate tuple receiver.  Output from non-canSetTag
+			 * subqueries always goes to the bit bucket.
+			 */
+			if (!canSetTag)
+				dest = CreateDestReceiver(DestNone);
+			else if (caller_dest)
+				dest = caller_dest;
+			else
+				dest = CreateDestReceiver(DestSPI);

 			if (stmt->utilityStmt == NULL)
 			{
@@ -2373,7 +2506,13 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
 				SPI_freetuptable(_SPI_current->tuptable);
 				_SPI_current->tuptable = NULL;
 			}
-			/* we know that the receiver doesn't need a destroy call */
+
+			/*
+			 * We don't issue a destroy call to the receiver.  The SPI and
+			 * None receivers would ignore it anyway, while if the caller
+			 * supplied a receiver, it's not our job to destroy it.
+			 */
+
 			if (res < 0)
 			{
 				my_res = res;
@@ -2465,7 +2604,7 @@ _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount)
 	switch (operation)
 	{
 		case CMD_SELECT:
-			if (queryDesc->dest->mydest != DestSPI)
+			if (queryDesc->dest->mydest == DestNone)
 			{
 				/* Don't return SPI_OK_SELECT if we're discarding result */
 				res = SPI_OK_UTILITY;
--- a/src/backend/executor/tstoreReceiver.c
+++ b/src/backend/executor/tstoreReceiver.c
@@ -8,6 +8,8 @@
 * toasted values.  This is to support cursors WITH HOLD, which must retain
 * data even if the underlying table is dropped.
 *
+ * Also optionally, we can apply a tuple conversion map before storing.
+ *
 *
 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
@@ -21,6 +23,7 @@
 #include "postgres.h"

 #include "access/detoast.h"
+#include "access/tupconvert.h"
 #include "executor/tstoreReceiver.h"


@@ -31,14 +34,19 @@ typedef struct
 	Tuplestorestate *tstore;	/* where to put the data */
 	MemoryContext cxt;			/* context containing tstore */
 	bool		detoast;		/* were we told to detoast? */
+	TupleDesc	target_tupdesc; /* target tupdesc, or NULL if none */
+	const char *map_failure_msg;	/* tupdesc mapping failure message */
 	/* workspace: */
 	Datum	   *outvalues;		/* values array for result tuple */
 	Datum	   *tofree;			/* temp values to be pfree'd */
+	TupleConversionMap *tupmap; /* conversion map, if needed */
+	TupleTableSlot *mapslot;	/* slot for mapped tuples */
 } TStoreState;


 static bool tstoreReceiveSlot_notoast(TupleTableSlot *slot, DestReceiver *self);
 static bool tstoreReceiveSlot_detoast(TupleTableSlot *slot, DestReceiver *self);
+static bool tstoreReceiveSlot_tupmap(TupleTableSlot *slot, DestReceiver *self);


 /*
@@ -69,27 +77,46 @@ tstoreStartupReceiver(DestReceiver *self, int operation, TupleDesc typeinfo)
 		}
 	}

+	/* Check if tuple conversion is needed */
+	if (myState->target_tupdesc)
+		myState->tupmap = convert_tuples_by_position(typeinfo,
+													 myState->target_tupdesc,
+													 myState->map_failure_msg);
+	else
+		myState->tupmap = NULL;
+
 	/* Set up appropriate callback */
 	if (needtoast)
 	{
+		Assert(!myState->tupmap);
 		myState->pub.receiveSlot = tstoreReceiveSlot_detoast;
 		/* Create workspace */
 		myState->outvalues = (Datum *)
 			MemoryContextAlloc(myState->cxt, natts * sizeof(Datum));
 		myState->tofree = (Datum *)
 			MemoryContextAlloc(myState->cxt, natts * sizeof(Datum));
+		myState->mapslot = NULL;
+	}
+	else if (myState->tupmap)
+	{
+		myState->pub.receiveSlot = tstoreReceiveSlot_tupmap;
+		myState->outvalues = NULL;
+		myState->tofree = NULL;
+		myState->mapslot = MakeSingleTupleTableSlot(myState->target_tupdesc,
+													&TTSOpsVirtual);
 	}
 	else
 	{
 		myState->pub.receiveSlot = tstoreReceiveSlot_notoast;
 		myState->outvalues = NULL;
 		myState->tofree = NULL;
+		myState->mapslot = NULL;
 	}
 }

 /*
 * Receive a tuple from the executor and store it in the tuplestore.
- * This is for the easy case where we don't have to detoast.
+ * This is for the easy case where we don't have to detoast nor map anything.
 */
 static bool
 tstoreReceiveSlot_notoast(TupleTableSlot *slot, DestReceiver *self)
@@ -157,6 +184,21 @@ tstoreReceiveSlot_detoast(TupleTableSlot *slot, DestReceiver *self)
 	return true;
 }

+/*
+ * Receive a tuple from the executor and store it in the tuplestore.
+ * This is for the case where we must apply a tuple conversion map.
+ */
+static bool
+tstoreReceiveSlot_tupmap(TupleTableSlot *slot, DestReceiver *self)
+{
+	TStoreState *myState = (TStoreState *) self;
+
+	execute_attr_map_slot(myState->tupmap->attrMap, slot, myState->mapslot);
+	tuplestore_puttupleslot(myState->tstore, myState->mapslot);
+
+	return true;
+}
+
 /*
 * Clean up at end of an executor run
 */
@@ -172,6 +214,12 @@ tstoreShutdownReceiver(DestReceiver *self)
 	if (myState->tofree)
 		pfree(myState->tofree);
 	myState->tofree = NULL;
+	if (myState->tupmap)
+		free_conversion_map(myState->tupmap);
+	myState->tupmap = NULL;
+	if (myState->mapslot)
+		ExecDropSingleTupleTableSlot(myState->mapslot);
+	myState->mapslot = NULL;
 }

 /*
@@ -204,17 +252,32 @@ CreateTuplestoreDestReceiver(void)

 /*
 * Set parameters for a TuplestoreDestReceiver
+ *
+ * tStore: where to store the tuples
+ * tContext: memory context containing tStore
+ * detoast: forcibly detoast contained data?
+ * target_tupdesc: if not NULL, forcibly convert tuples to this rowtype
+ * map_failure_msg: error message to use if mapping to target_tupdesc fails
+ *
+ * We don't currently support both detoast and target_tupdesc at the same
+ * time, just because no existing caller needs that combination.
 */
 void
 SetTuplestoreDestReceiverParams(DestReceiver *self,
 								Tuplestorestate *tStore,
 								MemoryContext tContext,
-								bool detoast)
+								bool detoast,
+								TupleDesc target_tupdesc,
+								const char *map_failure_msg)
 {
 	TStoreState *myState = (TStoreState *) self;

+	Assert(!(detoast && target_tupdesc));
+
 	Assert(myState->pub.mydest == DestTuplestore);
 	myState->tstore = tStore;
 	myState->cxt = tContext;
 	myState->detoast = detoast;
+	myState->target_tupdesc = target_tupdesc;
+	myState->map_failure_msg = map_failure_msg;
 }