mirror of
https://github.com/postgres/postgres.git
synced 2025-08-31 17:02:12 +03:00
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund
This commit is contained in:
@@ -2200,6 +2200,22 @@ build_datatype(HeapTuple typeTup, int32 typmod, Oid collation)
|
||||
typ->collation = typeStruct->typcollation;
|
||||
if (OidIsValid(collation) && OidIsValid(typ->collation))
|
||||
typ->collation = collation;
|
||||
/* Detect if type is true array, or domain thereof */
|
||||
/* NB: this is only used to decide whether to apply expand_array */
|
||||
if (typeStruct->typtype == TYPTYPE_BASE)
|
||||
{
|
||||
/* this test should match what get_element_type() checks */
|
||||
typ->typisarray = (typeStruct->typlen == -1 &&
|
||||
OidIsValid(typeStruct->typelem));
|
||||
}
|
||||
else if (typeStruct->typtype == TYPTYPE_DOMAIN)
|
||||
{
|
||||
/* we can short-circuit looking up base types if it's not varlena */
|
||||
typ->typisarray = (typeStruct->typlen == -1 &&
|
||||
OidIsValid(get_base_element_type(typeStruct->typbasetype)));
|
||||
}
|
||||
else
|
||||
typ->typisarray = false;
|
||||
typ->atttypmod = typmod;
|
||||
|
||||
return typ;
|
||||
|
@@ -34,6 +34,7 @@
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/datum.h"
|
||||
#include "utils/fmgroids.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
@@ -173,6 +174,8 @@ static void exec_prepare_plan(PLpgSQL_execstate *estate,
|
||||
static bool exec_simple_check_node(Node *node);
|
||||
static void exec_simple_check_plan(PLpgSQL_expr *expr);
|
||||
static void exec_simple_recheck_plan(PLpgSQL_expr *expr, CachedPlan *cplan);
|
||||
static void exec_check_rw_parameter(PLpgSQL_expr *expr, int target_dno);
|
||||
static bool contains_target_param(Node *node, int *target_dno);
|
||||
static bool exec_eval_simple_expr(PLpgSQL_execstate *estate,
|
||||
PLpgSQL_expr *expr,
|
||||
Datum *result,
|
||||
@@ -312,6 +315,44 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo,
|
||||
var->value = fcinfo->arg[i];
|
||||
var->isnull = fcinfo->argnull[i];
|
||||
var->freeval = false;
|
||||
|
||||
/*
|
||||
* Force any array-valued parameter to be stored in
|
||||
* expanded form in our local variable, in hopes of
|
||||
* improving efficiency of uses of the variable. (This is
|
||||
* a hack, really: why only arrays? Need more thought
|
||||
* about which cases are likely to win. See also
|
||||
* typisarray-specific heuristic in exec_assign_value.)
|
||||
*
|
||||
* Special cases: If passed a R/W expanded pointer, assume
|
||||
* we can commandeer the object rather than having to copy
|
||||
* it. If passed a R/O expanded pointer, just keep it as
|
||||
* the value of the variable for the moment. (We'll force
|
||||
* it to R/W if the variable gets modified, but that may
|
||||
* very well never happen.)
|
||||
*/
|
||||
if (!var->isnull && var->datatype->typisarray)
|
||||
{
|
||||
if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(var->value)))
|
||||
{
|
||||
/* take ownership of R/W object */
|
||||
var->value = TransferExpandedObject(var->value,
|
||||
CurrentMemoryContext);
|
||||
var->freeval = true;
|
||||
}
|
||||
else if (VARATT_IS_EXTERNAL_EXPANDED_RO(DatumGetPointer(var->value)))
|
||||
{
|
||||
/* R/O pointer, keep it as-is until assigned to */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* flat array, so force to expanded form */
|
||||
var->value = expand_array(var->value,
|
||||
CurrentMemoryContext,
|
||||
NULL);
|
||||
var->freeval = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -477,18 +518,14 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo,
|
||||
|
||||
/*
|
||||
* If the function's return type isn't by value, copy the value
|
||||
* into upper executor memory context.
|
||||
* into upper executor memory context. However, if we have a R/W
|
||||
* expanded datum, we can just transfer its ownership out to the
|
||||
* upper executor context.
|
||||
*/
|
||||
if (!fcinfo->isnull && !func->fn_retbyval)
|
||||
{
|
||||
Size len;
|
||||
void *tmp;
|
||||
|
||||
len = datumGetSize(estate.retval, false, func->fn_rettyplen);
|
||||
tmp = SPI_palloc(len);
|
||||
memcpy(tmp, DatumGetPointer(estate.retval), len);
|
||||
estate.retval = PointerGetDatum(tmp);
|
||||
}
|
||||
estate.retval = SPI_datumTransfer(estate.retval,
|
||||
false,
|
||||
func->fn_rettyplen);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2476,6 +2513,13 @@ exec_stmt_return(PLpgSQL_execstate *estate, PLpgSQL_stmt_return *stmt)
|
||||
* Special case path when the RETURN expression is a simple variable
|
||||
* reference; in particular, this path is always taken in functions with
|
||||
* one or more OUT parameters.
|
||||
*
|
||||
* This special case is especially efficient for returning variables that
|
||||
* have R/W expanded values: we can put the R/W pointer directly into
|
||||
* estate->retval, leading to transferring the value to the caller's
|
||||
* context cheaply. If we went through exec_eval_expr we'd end up with a
|
||||
* R/O pointer. It's okay to skip MakeExpandedObjectReadOnly here since
|
||||
* we know we won't need the variable's value within the function anymore.
|
||||
*/
|
||||
if (stmt->retvarno >= 0)
|
||||
{
|
||||
@@ -2604,6 +2648,11 @@ exec_stmt_return_next(PLpgSQL_execstate *estate,
|
||||
* Special case path when the RETURN NEXT expression is a simple variable
|
||||
* reference; in particular, this path is always taken in functions with
|
||||
* one or more OUT parameters.
|
||||
*
|
||||
* Unlike exec_statement_return, there's no special win here for R/W
|
||||
* expanded values, since they'll have to get flattened to go into the
|
||||
* tuplestore. Indeed, we'd better make them R/O to avoid any risk of the
|
||||
* casting step changing them in-place.
|
||||
*/
|
||||
if (stmt->retvarno >= 0)
|
||||
{
|
||||
@@ -2622,6 +2671,11 @@ exec_stmt_return_next(PLpgSQL_execstate *estate,
|
||||
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
||||
errmsg("wrong result type supplied in RETURN NEXT")));
|
||||
|
||||
/* let's be very paranoid about the cast step */
|
||||
retval = MakeExpandedObjectReadOnly(retval,
|
||||
isNull,
|
||||
var->datatype->typlen);
|
||||
|
||||
/* coerce type if needed */
|
||||
retval = exec_cast_value(estate,
|
||||
retval,
|
||||
@@ -3333,6 +3387,13 @@ exec_prepare_plan(PLpgSQL_execstate *estate,
|
||||
|
||||
/* Check to see if it's a simple expression */
|
||||
exec_simple_check_plan(expr);
|
||||
|
||||
/*
|
||||
* Mark expression as not using a read-write param. exec_assign_value has
|
||||
* to take steps to override this if appropriate; that seems cleaner than
|
||||
* adding parameters to all other callers.
|
||||
*/
|
||||
expr->rwparam = -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -4071,6 +4132,19 @@ exec_assign_expr(PLpgSQL_execstate *estate, PLpgSQL_datum *target,
|
||||
Oid valtype;
|
||||
int32 valtypmod;
|
||||
|
||||
/*
|
||||
* If first time through, create a plan for this expression, and then see
|
||||
* if we can pass the target variable as a read-write parameter to the
|
||||
* expression. (This is a bit messy, but it seems cleaner than modifying
|
||||
* the API of exec_eval_expr for the purpose.)
|
||||
*/
|
||||
if (expr->plan == NULL)
|
||||
{
|
||||
exec_prepare_plan(estate, expr, 0);
|
||||
if (target->dtype == PLPGSQL_DTYPE_VAR)
|
||||
exec_check_rw_parameter(expr, target->dno);
|
||||
}
|
||||
|
||||
value = exec_eval_expr(estate, expr, &isnull, &valtype, &valtypmod);
|
||||
exec_assign_value(estate, target, value, isnull, valtype, valtypmod);
|
||||
exec_eval_cleanup(estate);
|
||||
@@ -4140,26 +4214,51 @@ exec_assign_value(PLpgSQL_execstate *estate,
|
||||
/*
|
||||
* If type is by-reference, copy the new value (which is
|
||||
* probably in the eval_econtext) into the procedure's memory
|
||||
* context.
|
||||
* context. But if it's a read/write reference to an expanded
|
||||
* object, no physical copy needs to happen; at most we need
|
||||
* to reparent the object's memory context.
|
||||
*
|
||||
* If it's an array, we force the value to be stored in R/W
|
||||
* expanded form. This wins if the function later does, say,
|
||||
* a lot of array subscripting operations on the variable, and
|
||||
* otherwise might lose. We might need to use a different
|
||||
* heuristic, but it's too soon to tell. Also, are there
|
||||
* cases where it'd be useful to force non-array values into
|
||||
* expanded form?
|
||||
*/
|
||||
if (!var->datatype->typbyval && !isNull)
|
||||
newvalue = datumCopy(newvalue,
|
||||
false,
|
||||
var->datatype->typlen);
|
||||
{
|
||||
if (var->datatype->typisarray &&
|
||||
!VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(newvalue)))
|
||||
{
|
||||
/* array and not already R/W, so apply expand_array */
|
||||
newvalue = expand_array(newvalue,
|
||||
CurrentMemoryContext,
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* else transfer value if R/W, else just datumCopy */
|
||||
newvalue = datumTransfer(newvalue,
|
||||
false,
|
||||
var->datatype->typlen);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now free the old value. (We can't do this any earlier
|
||||
* because of the possibility that we are assigning the var's
|
||||
* old value to it, eg "foo := foo". We could optimize out
|
||||
* the assignment altogether in such cases, but it's too
|
||||
* infrequent to be worth testing for.)
|
||||
* Now free the old value, unless it's the same as the new
|
||||
* value (ie, we're doing "foo := foo"). Note that for
|
||||
* expanded objects, this test is necessary and cannot
|
||||
* reliably be made any earlier; we have to be looking at the
|
||||
* object's standard R/W pointer to be sure pointer equality
|
||||
* is meaningful.
|
||||
*/
|
||||
free_var(var);
|
||||
if (var->value != newvalue || var->isnull || isNull)
|
||||
free_var(var);
|
||||
|
||||
var->value = newvalue;
|
||||
var->isnull = isNull;
|
||||
if (!var->datatype->typbyval && !isNull)
|
||||
var->freeval = true;
|
||||
var->freeval = (!var->datatype->typbyval && !isNull);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4505,10 +4604,14 @@ exec_assign_value(PLpgSQL_execstate *estate,
|
||||
*
|
||||
* At present this doesn't handle PLpgSQL_expr or PLpgSQL_arrayelem datums.
|
||||
*
|
||||
* NOTE: caller must not modify the returned value, since it points right
|
||||
* at the stored value in the case of pass-by-reference datatypes. In some
|
||||
* cases we have to palloc a return value, and in such cases we put it into
|
||||
* the estate's short-term memory context.
|
||||
* NOTE: the returned Datum points right at the stored value in the case of
|
||||
* pass-by-reference datatypes. Generally callers should take care not to
|
||||
* modify the stored value. Some callers intentionally manipulate variables
|
||||
* referenced by R/W expanded pointers, though; it is those callers'
|
||||
* responsibility that the results are semantically OK.
|
||||
*
|
||||
* In some cases we have to palloc a return value, and in such cases we put
|
||||
* it into the estate's short-term memory context.
|
||||
*/
|
||||
static void
|
||||
exec_eval_datum(PLpgSQL_execstate *estate,
|
||||
@@ -5216,6 +5319,9 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate,
|
||||
{
|
||||
/* It got replanned ... is it still simple? */
|
||||
exec_simple_recheck_plan(expr, cplan);
|
||||
/* better recheck r/w safety, as well */
|
||||
if (expr->rwparam >= 0)
|
||||
exec_check_rw_parameter(expr, expr->rwparam);
|
||||
if (expr->expr_simple_expr == NULL)
|
||||
{
|
||||
/* Ooops, release refcount and fail */
|
||||
@@ -5362,7 +5468,13 @@ setup_param_list(PLpgSQL_execstate *estate, PLpgSQL_expr *expr)
|
||||
*/
|
||||
MemSet(paramLI->params, 0, estate->ndatums * sizeof(ParamExternData));
|
||||
|
||||
/* Instantiate values for "safe" parameters of the expression */
|
||||
/*
|
||||
* Instantiate values for "safe" parameters of the expression. One of
|
||||
* them might be the variable the expression result will be assigned
|
||||
* to, in which case we can pass the variable's value as-is even if
|
||||
* it's a read-write expanded object; otherwise, convert read-write
|
||||
* pointers to read-only pointers for safety.
|
||||
*/
|
||||
dno = -1;
|
||||
while ((dno = bms_next_member(expr->paramnos, dno)) >= 0)
|
||||
{
|
||||
@@ -5373,7 +5485,12 @@ setup_param_list(PLpgSQL_execstate *estate, PLpgSQL_expr *expr)
|
||||
PLpgSQL_var *var = (PLpgSQL_var *) datum;
|
||||
ParamExternData *prm = ¶mLI->params[dno];
|
||||
|
||||
prm->value = var->value;
|
||||
if (dno == expr->rwparam)
|
||||
prm->value = var->value;
|
||||
else
|
||||
prm->value = MakeExpandedObjectReadOnly(var->value,
|
||||
var->isnull,
|
||||
var->datatype->typlen);
|
||||
prm->isnull = var->isnull;
|
||||
prm->pflags = PARAM_FLAG_CONST;
|
||||
prm->ptype = var->datatype->typoid;
|
||||
@@ -5442,6 +5559,15 @@ plpgsql_param_fetch(ParamListInfo params, int paramid)
|
||||
exec_eval_datum(estate, datum,
|
||||
&prm->ptype, &prmtypmod,
|
||||
&prm->value, &prm->isnull);
|
||||
|
||||
/*
|
||||
* If it's a read/write expanded datum, convert reference to read-only,
|
||||
* unless it's safe to pass as read-write.
|
||||
*/
|
||||
if (datum->dtype == PLPGSQL_DTYPE_VAR && dno != expr->rwparam)
|
||||
prm->value = MakeExpandedObjectReadOnly(prm->value,
|
||||
prm->isnull,
|
||||
((PLpgSQL_var *) datum)->datatype->typlen);
|
||||
}
|
||||
|
||||
|
||||
@@ -6384,6 +6510,113 @@ exec_simple_recheck_plan(PLpgSQL_expr *expr, CachedPlan *cplan)
|
||||
expr->expr_simple_typmod = exprTypmod((Node *) tle->expr);
|
||||
}
|
||||
|
||||
/*
|
||||
* exec_check_rw_parameter --- can we pass expanded object as read/write param?
|
||||
*
|
||||
* If we have an assignment like "x := array_append(x, foo)" in which the
|
||||
* top-level function is trusted not to corrupt its argument in case of an
|
||||
* error, then when x has an expanded object as value, it is safe to pass the
|
||||
* value as a read/write pointer and let the function modify the value
|
||||
* in-place.
|
||||
*
|
||||
* This function checks for a safe expression, and sets expr->rwparam to the
|
||||
* dno of the target variable (x) if safe, or -1 if not safe.
|
||||
*/
|
||||
static void
|
||||
exec_check_rw_parameter(PLpgSQL_expr *expr, int target_dno)
|
||||
{
|
||||
Oid funcid;
|
||||
List *fargs;
|
||||
ListCell *lc;
|
||||
|
||||
/* Assume unsafe */
|
||||
expr->rwparam = -1;
|
||||
|
||||
/*
|
||||
* If the expression isn't simple, there's no point in trying to optimize
|
||||
* (because the exec_run_select code path will flatten any expanded result
|
||||
* anyway). Even without that, this seems like a good safety restriction.
|
||||
*/
|
||||
if (expr->expr_simple_expr == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If target variable isn't referenced by expression, no need to look
|
||||
* further.
|
||||
*/
|
||||
if (!bms_is_member(target_dno, expr->paramnos))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Top level of expression must be a simple FuncExpr or OpExpr.
|
||||
*/
|
||||
if (IsA(expr->expr_simple_expr, FuncExpr))
|
||||
{
|
||||
FuncExpr *fexpr = (FuncExpr *) expr->expr_simple_expr;
|
||||
|
||||
funcid = fexpr->funcid;
|
||||
fargs = fexpr->args;
|
||||
}
|
||||
else if (IsA(expr->expr_simple_expr, OpExpr))
|
||||
{
|
||||
OpExpr *opexpr = (OpExpr *) expr->expr_simple_expr;
|
||||
|
||||
funcid = opexpr->opfuncid;
|
||||
fargs = opexpr->args;
|
||||
}
|
||||
else
|
||||
return;
|
||||
|
||||
/*
|
||||
* The top-level function must be one that we trust to be "safe".
|
||||
* Currently we hard-wire the list, but it would be very desirable to
|
||||
* allow extensions to mark their functions as safe ...
|
||||
*/
|
||||
if (!(funcid == F_ARRAY_APPEND ||
|
||||
funcid == F_ARRAY_PREPEND))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The target variable (in the form of a Param) must only appear as a
|
||||
* direct argument of the top-level function.
|
||||
*/
|
||||
foreach(lc, fargs)
|
||||
{
|
||||
Node *arg = (Node *) lfirst(lc);
|
||||
|
||||
/* A Param is OK, whether it's the target variable or not */
|
||||
if (arg && IsA(arg, Param))
|
||||
continue;
|
||||
/* Otherwise, argument expression must not reference target */
|
||||
if (contains_target_param(arg, &target_dno))
|
||||
return;
|
||||
}
|
||||
|
||||
/* OK, we can pass target as a read-write parameter */
|
||||
expr->rwparam = target_dno;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recursively check for a Param referencing the target variable
|
||||
*/
|
||||
static bool
|
||||
contains_target_param(Node *node, int *target_dno)
|
||||
{
|
||||
if (node == NULL)
|
||||
return false;
|
||||
if (IsA(node, Param))
|
||||
{
|
||||
Param *param = (Param *) node;
|
||||
|
||||
if (param->paramkind == PARAM_EXTERN &&
|
||||
param->paramid == *target_dno + 1)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
return expression_tree_walker(node, contains_target_param,
|
||||
(void *) target_dno);
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* exec_set_found Set the global found variable to true/false
|
||||
* ----------
|
||||
@@ -6540,7 +6773,12 @@ free_var(PLpgSQL_var *var)
|
||||
{
|
||||
if (var->freeval)
|
||||
{
|
||||
pfree(DatumGetPointer(var->value));
|
||||
if (DatumIsReadWriteExpandedObject(var->value,
|
||||
var->isnull,
|
||||
var->datatype->typlen))
|
||||
DeleteExpandedObject(var->value);
|
||||
else
|
||||
pfree(DatumGetPointer(var->value));
|
||||
var->freeval = false;
|
||||
}
|
||||
}
|
||||
@@ -6750,8 +6988,9 @@ format_expr_params(PLpgSQL_execstate *estate,
|
||||
|
||||
curvar = (PLpgSQL_var *) estate->datums[dno];
|
||||
|
||||
exec_eval_datum(estate, (PLpgSQL_datum *) curvar, ¶mtypeid,
|
||||
¶mtypmod, ¶mdatum, ¶misnull);
|
||||
exec_eval_datum(estate, (PLpgSQL_datum *) curvar,
|
||||
¶mtypeid, ¶mtypmod,
|
||||
¶mdatum, ¶misnull);
|
||||
|
||||
appendStringInfo(¶mstr, "%s%s = ",
|
||||
paramno > 0 ? ", " : "",
|
||||
|
@@ -2625,6 +2625,7 @@ read_sql_construct(int until,
|
||||
expr->query = pstrdup(ds.data);
|
||||
expr->plan = NULL;
|
||||
expr->paramnos = NULL;
|
||||
expr->rwparam = -1;
|
||||
expr->ns = plpgsql_ns_top();
|
||||
pfree(ds.data);
|
||||
|
||||
@@ -2849,6 +2850,7 @@ make_execsql_stmt(int firsttoken, int location)
|
||||
expr->query = pstrdup(ds.data);
|
||||
expr->plan = NULL;
|
||||
expr->paramnos = NULL;
|
||||
expr->rwparam = -1;
|
||||
expr->ns = plpgsql_ns_top();
|
||||
pfree(ds.data);
|
||||
|
||||
@@ -3732,6 +3734,7 @@ read_cursor_args(PLpgSQL_var *cursor, int until, const char *expected)
|
||||
expr->query = pstrdup(ds.data);
|
||||
expr->plan = NULL;
|
||||
expr->paramnos = NULL;
|
||||
expr->rwparam = -1;
|
||||
expr->ns = plpgsql_ns_top();
|
||||
pfree(ds.data);
|
||||
|
||||
|
@@ -183,6 +183,7 @@ typedef struct
|
||||
char typtype;
|
||||
Oid typrelid;
|
||||
Oid collation; /* from pg_type, but can be overridden */
|
||||
bool typisarray; /* is "true" array, or domain over one */
|
||||
int32 atttypmod; /* typmod (taken from someplace else) */
|
||||
} PLpgSQL_type;
|
||||
|
||||
@@ -216,6 +217,7 @@ typedef struct PLpgSQL_expr
|
||||
char *query;
|
||||
SPIPlanPtr plan;
|
||||
Bitmapset *paramnos; /* all dnos referenced by this query */
|
||||
int rwparam; /* dno of read/write param, or -1 if none */
|
||||
|
||||
/* function containing this expr (not set until we first parse query) */
|
||||
struct PLpgSQL_function *func;
|
||||
|
Reference in New Issue
Block a user