From ee4dcf147881a21c848cd0ebeb5c8c255f396431 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 26 Mar 2000 19:45:21 +0000 Subject: [PATCH] Update/improve documentation about creating aggregate functions. --- doc/src/sgml/ref/create_aggregate.sgml | 207 +++++++++++++------------ doc/src/sgml/xaggr.sgml | 75 +++++++-- 2 files changed, 165 insertions(+), 117 deletions(-) diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml index 936ee2b43bc..b279360a5c3 100644 --- a/doc/src/sgml/ref/create_aggregate.sgml +++ b/doc/src/sgml/ref/create_aggregate.sgml @@ -1,5 +1,5 @@ @@ -24,9 +24,9 @@ Postgres documentation 1999-07-20 -CREATE AGGREGATE name [ AS ] ( BASETYPE = data_type - [ , SFUNC1 = sfunc1, STYPE1 = sfunc1_return_type ] - [ , SFUNC2 = sfunc2, STYPE2 = sfunc2_return_type ] +CREATE AGGREGATE name [ AS ] ( BASETYPE = input_data_type + [ , SFUNC1 = sfunc1, STYPE1 = state1_type ] + [ , SFUNC2 = sfunc2, STYPE2 = state2_type ] [ , FINALFUNC = ffunc ] [ , INITCOND1 = initial_condition1 ] [ , INITCOND2 = initial_condition2 ] ) @@ -51,10 +51,10 @@ CREATE AGGREGATE name [ AS ] ( BASE - data_type + input_data_type - The fundamental data type on which this aggregate function operates. + The input data type on which this aggregate function operates. @@ -63,21 +63,25 @@ CREATE AGGREGATE name [ AS ] ( BASE sfunc1 - The state transition function - to be called for every non-NULL field from the source column. - It takes a variable of - type sfunc1_return_type as - the first argument and that field as the - second argument. + A state transition function + to be called for every non-NULL input data value. + This must be a function of two arguments, the first being of + type state1_type + and the second of + type input_data_type. + The function must return a value of + type state1_type. + This function takes the current state value 1 and the current + input data item, and returns the next state value 1. - sfunc1_return_type + state1_type - The return type of the first transition function. + The data type for the first state value of the aggregate. @@ -86,20 +90,22 @@ CREATE AGGREGATE name [ AS ] ( BASE sfunc2 - The state transition function - to be called for every non-NULL field from the source column. - It takes a variable - of type sfunc2_return_type - as the only argument and returns a variable of the same type. + A state transition function + to be called for every non-NULL input data value. + This must be a function of one argument of + type state2_type, + returning a value of the same type. + This function takes the current state value 2 and + returns the next state value 2. - sfunc2_return_type + state2_type - The return type of the second transition function. + The data type for the second state value of the aggregate. @@ -108,12 +114,17 @@ CREATE AGGREGATE name [ AS ] ( BASE ffunc - The final function - called after traversing all input fields. This function must + The final function called to compute the aggregate's result + after all input data has been traversed. + If both state values are used, the final function must take two arguments of types - sfunc1_return_type + state1_type and - sfunc2_return_type. + state2_type. + If only one state value is used, the final function must + take a single argument of that state value's type. + The output datatype of the aggregate is defined as the return + type of this function. @@ -122,7 +133,7 @@ CREATE AGGREGATE name [ AS ] ( BASE initial_condition1 - The initial value for the first transition function argument. + The initial value for state value 1. @@ -131,7 +142,7 @@ CREATE AGGREGATE name [ AS ] ( BASE initial_condition2 - The initial value for the second transition function argument. + The initial value for state value 2. @@ -182,84 +193,66 @@ CREATE can be used to provide the desired features. - An aggregate function can require up to three functions, two - state transition functions, + An aggregate function is identified by its name and input data type. + Two aggregates can have the same name if they operate on different + input types. To avoid confusion, do not make an ordinary function + of the same name and input data type as an aggregate. + + + An aggregate function is made from between one and three ordinary + functions: + two state transition functions, sfunc1 - and sfunc2: - -sfunc1( internal-state1, next-data_item ) ---> next-internal-state1 sfunc2( internal-state2 ) ---> next-internal-state2 - + and sfunc2, and a final calculation function, - ffunc: + ffunc. + These are used as follows: +sfunc1( internal-state1, next-data-item ) ---> next-internal-state1 +sfunc2( internal-state2 ) ---> next-internal-state2 ffunc(internal-state1, internal-state2) ---> aggregate-value - Postgres creates up to two temporary variables - (referred to here as temp1 - and temp2) - to hold intermediate results used as arguments to the transition functions. + Postgres creates one or two temporary variables + (of data types stype1 and/or + stype2) to hold the + current internal states of the aggregate. At each input data item, + the state transition function(s) are invoked to calculate new values + for the internal state values. After all the data has been processed, + the final function is invoked once to calculate the aggregate's output + value. - These transition functions are required to have the following properties: - - - - The arguments to - sfunc1 - must be - temp1 - of type - sfunc1_return_type - and - column_value - of type data_type. - The return value must be of type - sfunc1_return_type - and will be used as the first argument in the next call to - sfunc1. - - - - - - The argument and return value of - sfunc2 - must be - temp2 - of type - sfunc2_return_type. - - - - - The arguments to the final-calculation-function - must be - temp1 - and - temp2 - and its return value must - be a Postgres - base type (not necessarily - data_type - which had been specified for BASETYPE). - - - - - FINALFUNC should be specified - if and only if both state-transition functions are - specified. - - + ffunc must be specified if + both transition functions are specified. If only one transition function + is used, then ffunc is + optional. The default behavior when + ffunc is not provided is + to return the ending value of the internal state value being used + (and, therefore, the aggregate's output type is the same as that + state value's type). - An aggregate function may also require one or two initial conditions, - one for - each transition function. These are specified and stored - in the database as fields of type text. + An aggregate function may also provide one or two initial conditions, + that is, initial values for the internal state values being used. + These are specified and stored in the database as fields of type + text, but they must be valid external representations + of constants of the state value datatypes. If + sfunc1 is specified + without an initcond1 value, + then the system does not call + sfunc1 + at the first input item; instead, the internal state value 1 is + initialized with the first input value, and + sfunc1 is called beginning + at the second input item. This is useful for aggregates like MIN and + MAX. Note that an aggregate using this feature will return NULL when + called with no input values. There is no comparable provision for + state value 2; if sfunc2 is + specified then an initcond2 is + required. @@ -274,18 +267,32 @@ CREATE to drop aggregate functions. + + The parameters of CREATE AGGREGATE can be written + in any order, not just the order illustrated above. + + It is possible to specify aggregate functions that have varying combinations of state and final functions. - For example, the count aggregate requires SFUNC2 - (an incrementing function) but not SFUNC1 or FINALFUNC, - whereas the sum aggregate requires SFUNC1 (an addition - function) but not SFUNC2 or FINALFUNC and the avg + For example, the count aggregate requires + sfunc2 + (an incrementing function) but not + sfunc1 or + ffunc, + whereas the sum aggregate requires + sfunc1 (an addition + function) but not sfunc2 or + ffunc, and the + avg aggregate requires - both of the above state functions as - well as a FINALFUNC (a division function) to produce its + both state functions as + well as a ffunc (a division + function) to produce its answer. In any case, at least one state function must be - defined, and any SFUNC2 must have a corresponding INITCOND2. + defined, and any sfunc2 must + have a corresponding + initcond2. diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml index b0a5a48b28c..d12cc78353d 100644 --- a/doc/src/sgml/xaggr.sgml +++ b/doc/src/sgml/xaggr.sgml @@ -2,26 +2,57 @@ Extending <Acronym>SQL</Acronym>: Aggregates - Aggregates in Postgres -are expressed in terms of state - transition functions. That is, an aggregate can be + Aggregate functions in Postgres + are expressed as state values + and state transition functions. + That is, an aggregate can be defined in terms of state that is modified whenever an - instance is processed. Some state functions look at a - particular value in the instance when computing the new - state (sfunc1 in the -create aggregate syntax) while - others only keep track of their own internal state - (sfunc2). - If we define an aggregate that uses only -sfunc1, we - define an aggregate that computes a running function of + input item is processed. To define a new aggregate + function, one selects a datatype for the state value, + an initial value for the state, and a state transition + function. The state transition function is just an + ordinary function that could also be used outside the + context of the aggregate. + + + + Actually, in order to make it easier to construct useful + aggregates from existing functions, an aggregate can have + one or two separate state values, one or two transition + functions to update those state values, and a + final function that computes the + actual aggregate result from the ending state values. + + + + Thus there can be as many as four datatypes involved: + the type of the input data items, the type of the aggregate's + result, and the types of the two state values. Only the + input and result datatypes are seen by a user of the aggregate. + + + + Some state transition functions need to look at each successive + input to compute the next state value, while others ignore the + specific input value and simply update their internal state. + (The most useful example of the second kind is a running count + of the number of input items.) The Postgres + aggregate machinery defines sfunc1 for + an aggregate as a function that is passed both the old state + value and the current input value, while sfunc2 + is a function that is passed only the old state value. + + + + If we define an aggregate that uses only sfunc1, + we have an aggregate that computes a running function of the attribute values from each instance. "Sum" is an example of this kind of aggregate. "Sum" starts at zero and always adds the current instance's value to - its running total. We will use the -int4pl that is - built into Postgres -to perform this addition. + its running total. For example, if we want to make a Sum + aggregate to work on a datatype for complex numbers, + we only need the addition function for that datatype. + The aggregate definition is: CREATE AGGREGATE complex_sum ( @@ -39,11 +70,15 @@ SELECT complex_sum(a) FROM test_complex; |(34,53.9) | +------------+ + + (In practice, we'd just name the aggregate "sum", and rely on + Postgres to figure out which kind + of sum to apply to a complex column.) If we define only sfunc2, we are -specifying an aggregate + specifying an aggregate that computes a running function that is independent of the attribute values from each instance. "Count" is the most common example of this kind of @@ -104,4 +139,10 @@ SELECT my_average(salary) as emp_average FROM EMP; +------------+ + + + For further details see + . +