1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-05 07:21:24 +03:00

Provide moving-aggregate support for a bunch of numerical aggregates.

First installment of the promised moving-aggregate support in built-in
aggregates: count(), sum(), avg(), stddev() and variance() for
assorted datatypes, though not for float4/float8.

In passing, remove a 2001-vintage kluge in interval_accum(): interval
array elements have been properly aligned since around 2003, but
nobody remembered to take out this workaround.  Also, fix a thinko
in the opr_sanity tests for moving-aggregate catalog entries.

David Rowley and Florian Pflug, reviewed by Dean Rasheed
This commit is contained in:
Tom Lane
2014-04-12 20:33:09 -04:00
parent a9d9acbf21
commit 9d229f399e
13 changed files with 1232 additions and 224 deletions

View File

@ -2506,22 +2506,19 @@ numeric_float4(PG_FUNCTION_ARGS)
* Actually, it's a pointer to a NumericAggState allocated in the aggregate
* context. The digit buffers for the NumericVars will be there too.
*
* Note that the transition functions don't bother to create a NumericAggState
* until they see the first non-null input value; therefore, the final
* functions will never see N == 0. (The case is represented as a NULL
* state pointer, instead.)
*
* ----------------------------------------------------------------------
*/
typedef struct NumericAggState
{
bool calcSumX2; /* if true, calculate sumX2 */
bool isNaN; /* true if any processed number was NaN */
MemoryContext agg_context; /* context we're calculating in */
int64 N; /* count of processed numbers */
NumericVar sumX; /* sum of processed numbers */
NumericVar sumX2; /* sum of squares of processed numbers */
int maxScale; /* maximum scale seen so far */
int64 maxScaleCount; /* number of values seen with maximum scale */
int64 NaNcount; /* count of NaN values (not included in N!) */
} NumericAggState;
/*
@ -2559,16 +2556,28 @@ do_numeric_accum(NumericAggState *state, Numeric newval)
NumericVar X2;
MemoryContext old_context;
/* result is NaN if any processed number is NaN */
if (state->isNaN || NUMERIC_IS_NAN(newval))
/* Count NaN inputs separately from all else */
if (NUMERIC_IS_NAN(newval))
{
state->isNaN = true;
state->NaNcount++;
return;
}
/* load processed number in short-lived context */
init_var_from_num(newval, &X);
/*
* Track the highest input dscale that we've seen, to support inverse
* transitions (see do_numeric_discard).
*/
if (X.dscale > state->maxScale)
{
state->maxScale = X.dscale;
state->maxScaleCount = 1;
}
else if (X.dscale == state->maxScale)
state->maxScaleCount++;
/* if we need X^2, calculate that in short-lived context */
if (state->calcSumX2)
{
@ -2599,6 +2608,97 @@ do_numeric_accum(NumericAggState *state, Numeric newval)
MemoryContextSwitchTo(old_context);
}
/*
* Attempt to remove an input value from the aggregated state.
*
* If the value cannot be removed then the function will return false; the
* possible reasons for failing are described below.
*
* If we aggregate the values 1.01 and 2 then the result will be 3.01.
* If we are then asked to un-aggregate the 1.01 then we must fail as we
* won't be able to tell what the new aggregated value's dscale should be.
* We don't want to return 2.00 (dscale = 2), since the sum's dscale would
* have been zero if we'd really aggregated only 2.
*
* Note: alternatively, we could count the number of inputs with each possible
* dscale (up to some sane limit). Not yet clear if it's worth the trouble.
*/
static bool
do_numeric_discard(NumericAggState *state, Numeric newval)
{
NumericVar X;
NumericVar X2;
MemoryContext old_context;
/* Count NaN inputs separately from all else */
if (NUMERIC_IS_NAN(newval))
{
state->NaNcount--;
return true;
}
/* load processed number in short-lived context */
init_var_from_num(newval, &X);
/*
* state->sumX's dscale is the maximum dscale of any of the inputs.
* Removing the last input with that dscale would require us to recompute
* the maximum dscale of the *remaining* inputs, which we cannot do unless
* no more non-NaN inputs remain at all. So we report a failure instead,
* and force the aggregation to be redone from scratch.
*/
if (X.dscale == state->maxScale)
{
if (state->maxScaleCount > 1 || state->maxScale == 0)
{
/*
* Some remaining inputs have same dscale, or dscale hasn't
* gotten above zero anyway
*/
state->maxScaleCount--;
}
else if (state->N == 1)
{
/* No remaining non-NaN inputs at all, so reset maxScale */
state->maxScale = 0;
state->maxScaleCount = 0;
}
else
{
/* Correct new maxScale is uncertain, must fail */
return false;
}
}
/* if we need X^2, calculate that in short-lived context */
if (state->calcSumX2)
{
init_var(&X2);
mul_var(&X, &X, &X2, X.dscale * 2);
}
/* The rest of this needs to work in the aggregate context */
old_context = MemoryContextSwitchTo(state->agg_context);
if (state->N-- > 1)
{
/* De-accumulate sums */
sub_var(&(state->sumX), &X, &(state->sumX));
if (state->calcSumX2)
sub_var(&(state->sumX2), &X2, &(state->sumX2));
}
else
{
/* Sums will be reset by next call to do_numeric_accum */
Assert(state->N == 0);
}
MemoryContextSwitchTo(old_context);
return true;
}
/*
* Generic transition function for numeric aggregates that require sumX2.
*/
@ -2609,14 +2709,12 @@ numeric_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
if (!PG_ARGISNULL(1))
{
/* Create the state data when we see the first non-null input. */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
/* Create the state data on the first call */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
if (!PG_ARGISNULL(1))
do_numeric_accum(state, PG_GETARG_NUMERIC(1));
}
PG_RETURN_POINTER(state);
}
@ -2631,18 +2729,42 @@ numeric_avg_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Create the state data on the first call */
if (state == NULL)
state = makeNumericAggState(fcinfo, false);
if (!PG_ARGISNULL(1))
do_numeric_accum(state, PG_GETARG_NUMERIC(1));
PG_RETURN_POINTER(state);
}
/*
* Generic inverse transition function for numeric aggregates
* (with or without requirement for X^2).
*/
Datum
numeric_accum_inv(PG_FUNCTION_ARGS)
{
NumericAggState *state;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Should not get here with no state */
if (state == NULL)
elog(ERROR, "numeric_accum_inv called with NULL state");
if (!PG_ARGISNULL(1))
{
/* Create the state data when we see the first non-null input. */
if (state == NULL)
state = makeNumericAggState(fcinfo, false);
do_numeric_accum(state, PG_GETARG_NUMERIC(1));
/* If we fail to perform the inverse transition, return NULL */
if (!do_numeric_discard(state, PG_GETARG_NUMERIC(1)))
PG_RETURN_NULL();
}
PG_RETURN_POINTER(state);
}
/*
* Integer data types all use Numeric accumulators to share code and
* avoid risk of overflow. For int2 and int4 inputs, Numeric accumulation
@ -2659,17 +2781,16 @@ int2_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Create the state data on the first call */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric,
PG_GETARG_DATUM(1)));
/* Create the state data when we see the first non-null input. */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
do_numeric_accum(state, newval);
}
@ -2683,17 +2804,16 @@ int4_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Create the state data on the first call */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
PG_GETARG_DATUM(1)));
/* Create the state data when we see the first non-null input. */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
do_numeric_accum(state, newval);
}
@ -2707,17 +2827,16 @@ int8_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Create the state data on the first call */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
PG_GETARG_DATUM(1)));
/* Create the state data when we see the first non-null input. */
if (state == NULL)
state = makeNumericAggState(fcinfo, true);
do_numeric_accum(state, newval);
}
@ -2734,6 +2853,90 @@ int8_avg_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Create the state data on the first call */
if (state == NULL)
state = makeNumericAggState(fcinfo, false);
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
PG_GETARG_DATUM(1)));
do_numeric_accum(state, newval);
}
PG_RETURN_POINTER(state);
}
/*
* Inverse transition functions to go with the above.
*/
Datum
int2_accum_inv(PG_FUNCTION_ARGS)
{
NumericAggState *state;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Should not get here with no state */
if (state == NULL)
elog(ERROR, "int2_accum_inv called with NULL state");
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric,
PG_GETARG_DATUM(1)));
/* Should never fail, all inputs have dscale 0 */
if (!do_numeric_discard(state, newval))
elog(ERROR, "do_numeric_discard failed unexpectedly");
}
PG_RETURN_POINTER(state);
}
Datum
int4_accum_inv(PG_FUNCTION_ARGS)
{
NumericAggState *state;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Should not get here with no state */
if (state == NULL)
elog(ERROR, "int4_accum_inv called with NULL state");
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
PG_GETARG_DATUM(1)));
/* Should never fail, all inputs have dscale 0 */
if (!do_numeric_discard(state, newval))
elog(ERROR, "do_numeric_discard failed unexpectedly");
}
PG_RETURN_POINTER(state);
}
Datum
int8_accum_inv(PG_FUNCTION_ARGS)
{
NumericAggState *state;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
/* Should not get here with no state */
if (state == NULL)
elog(ERROR, "int8_accum_inv called with NULL state");
if (!PG_ARGISNULL(1))
{
Numeric newval;
@ -2741,17 +2944,14 @@ int8_avg_accum(PG_FUNCTION_ARGS)
newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
PG_GETARG_DATUM(1)));
/* Create the state data when we see the first non-null input. */
if (state == NULL)
state = makeNumericAggState(fcinfo, false);
do_numeric_accum(state, newval);
/* Should never fail, all inputs have dscale 0 */
if (!do_numeric_discard(state, newval))
elog(ERROR, "do_numeric_discard failed unexpectedly");
}
PG_RETURN_POINTER(state);
}
Datum
numeric_avg(PG_FUNCTION_ARGS)
{
@ -2760,10 +2960,12 @@ numeric_avg(PG_FUNCTION_ARGS)
Datum sumX_datum;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
if (state == NULL) /* there were no non-null inputs */
/* If there were no non-null inputs, return NULL */
if (state == NULL || (state->N + state->NaNcount) == 0)
PG_RETURN_NULL();
if (state->isNaN) /* there was at least one NaN input */
if (state->NaNcount > 0) /* there was at least one NaN input */
PG_RETURN_NUMERIC(make_result(&const_nan));
N_datum = DirectFunctionCall1(int8_numeric, Int64GetDatum(state->N));
@ -2778,10 +2980,12 @@ numeric_sum(PG_FUNCTION_ARGS)
NumericAggState *state;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
if (state == NULL) /* there were no non-null inputs */
/* If there were no non-null inputs, return NULL */
if (state == NULL || (state->N + state->NaNcount) == 0)
PG_RETURN_NULL();
if (state->isNaN) /* there was at least one NaN input */
if (state->NaNcount > 0) /* there was at least one NaN input */
PG_RETURN_NUMERIC(make_result(&const_nan));
PG_RETURN_NUMERIC(make_result(&(state->sumX)));
@ -2812,7 +3016,7 @@ numeric_stddev_internal(NumericAggState *state,
int rscale;
/* Deal with empty input and NaN-input cases */
if (state == NULL)
if (state == NULL || (state->N + state->NaNcount) == 0)
{
*is_null = true;
return NULL;
@ -2820,7 +3024,7 @@ numeric_stddev_internal(NumericAggState *state,
*is_null = false;
if (state->isNaN)
if (state->NaNcount > 0)
return make_result(&const_nan);
init_var(&vN);
@ -2965,6 +3169,9 @@ numeric_stddev_pop(PG_FUNCTION_ARGS)
* data value into the transition data: it doesn't know how to do the type
* conversion. The upshot is that these routines have to be marked non-strict
* and handle substitution of the first non-null input themselves.
*
* Note: these functions are used only in plain aggregation mode.
* In moving-aggregate mode, we use intX_avg_accum and intX_avg_accum_inv.
*/
Datum
@ -3107,6 +3314,10 @@ int8_sum(PG_FUNCTION_ARGS)
/*
* Routines for avg(int2) and avg(int4). The transition datatype
* is a two-element int8 array, holding count and sum.
*
* These functions are also used for sum(int2) and sum(int4) when
* operating in moving-aggregate mode, since for correct inverse transitions
* we need to count the inputs.
*/
typedef struct Int8TransTypeData
@ -3171,6 +3382,62 @@ int4_avg_accum(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(transarray);
}
Datum
int2_avg_accum_inv(PG_FUNCTION_ARGS)
{
ArrayType *transarray;
int16 newval = PG_GETARG_INT16(1);
Int8TransTypeData *transdata;
/*
* If we're invoked as an aggregate, we can cheat and modify our first
* parameter in-place to reduce palloc overhead. Otherwise we need to make
* a copy of it before scribbling on it.
*/
if (AggCheckCallContext(fcinfo, NULL))
transarray = PG_GETARG_ARRAYTYPE_P(0);
else
transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
if (ARR_HASNULL(transarray) ||
ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
elog(ERROR, "expected 2-element int8 array");
transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
transdata->count--;
transdata->sum -= newval;
PG_RETURN_ARRAYTYPE_P(transarray);
}
Datum
int4_avg_accum_inv(PG_FUNCTION_ARGS)
{
ArrayType *transarray;
int32 newval = PG_GETARG_INT32(1);
Int8TransTypeData *transdata;
/*
* If we're invoked as an aggregate, we can cheat and modify our first
* parameter in-place to reduce palloc overhead. Otherwise we need to make
* a copy of it before scribbling on it.
*/
if (AggCheckCallContext(fcinfo, NULL))
transarray = PG_GETARG_ARRAYTYPE_P(0);
else
transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
if (ARR_HASNULL(transarray) ||
ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
elog(ERROR, "expected 2-element int8 array");
transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
transdata->count--;
transdata->sum -= newval;
PG_RETURN_ARRAYTYPE_P(transarray);
}
Datum
int8_avg(PG_FUNCTION_ARGS)
{
@ -3196,6 +3463,28 @@ int8_avg(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd));
}
/*
* SUM(int2) and SUM(int4) both return int8, so we can use this
* final function for both.
*/
Datum
int2int4_sum(PG_FUNCTION_ARGS)
{
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
Int8TransTypeData *transdata;
if (ARR_HASNULL(transarray) ||
ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
elog(ERROR, "expected 2-element int8 array");
transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
/* SQL defines SUM of no values to be NULL */
if (transdata->count == 0)
PG_RETURN_NULL();
PG_RETURN_DATUM(Int64GetDatumFast(transdata->sum));
}
/* ----------------------------------------------------------------------
*