1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Fix behavior of float aggregates for single Inf or NaN inputs.

When there is just one non-null input value, and it is infinity or NaN,
aggregates such as stddev_pop and covar_pop should produce a NaN
result, because the calculation is not well-defined.  They used to do
so, but since we adopted Youngs-Cramer aggregation in commit e954a727f,
they produced zero instead.  That's an oversight, so fix it.  Add tests
exercising these edge cases.

Affected aggregates are

 var_pop(double precision)
 stddev_pop(double precision)
 var_pop(real)
 stddev_pop(real)
 regr_sxx(double precision,double precision)
 regr_syy(double precision,double precision)
 regr_sxy(double precision,double precision)
 regr_r2(double precision,double precision)
 regr_slope(double precision,double precision)
 regr_intercept(double precision,double precision)
 covar_pop(double precision,double precision)
 corr(double precision,double precision)

Back-patch to v12 where the behavior change was accidentally introduced.

Report and patch by me; thanks to Dean Rasheed for review.

Discussion: https://postgr.es/m/353062.1591898766@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2020-06-13 13:43:24 -04:00
parent 81cd796b82
commit 874372a941
3 changed files with 159 additions and 2 deletions

View File

@ -127,7 +127,79 @@ SELECT var_samp(b::numeric) FROM aggtest;
-- population variance is defined for a single tuple, sample variance
-- is not
SELECT var_pop(1.0), var_samp(2.0);
SELECT var_pop(1.0::float8), var_samp(2.0::float8);
var_pop | var_samp
---------+----------
0 |
(1 row)
SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
stddev_pop | stddev_samp
------------+-------------
0 |
(1 row)
SELECT var_pop('inf'::float8), var_samp('inf'::float8);
var_pop | var_samp
---------+----------
NaN |
(1 row)
SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
stddev_pop | stddev_samp
------------+-------------
NaN |
(1 row)
SELECT var_pop('nan'::float8), var_samp('nan'::float8);
var_pop | var_samp
---------+----------
NaN |
(1 row)
SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
stddev_pop | stddev_samp
------------+-------------
NaN |
(1 row)
SELECT var_pop(1.0::float4), var_samp(2.0::float4);
var_pop | var_samp
---------+----------
0 |
(1 row)
SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
stddev_pop | stddev_samp
------------+-------------
0 |
(1 row)
SELECT var_pop('inf'::float4), var_samp('inf'::float4);
var_pop | var_samp
---------+----------
NaN |
(1 row)
SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
stddev_pop | stddev_samp
------------+-------------
NaN |
(1 row)
SELECT var_pop('nan'::float4), var_samp('nan'::float4);
var_pop | var_samp
---------+----------
NaN |
(1 row)
SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
stddev_pop | stddev_samp
------------+-------------
NaN |
(1 row)
SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
var_pop | var_samp
---------+----------
0 |
@ -139,6 +211,18 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
0 |
(1 row)
SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
var_pop | var_samp
---------+----------
NaN | NaN
(1 row)
SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
stddev_pop | stddev_samp
------------+-------------
NaN | NaN
(1 row)
-- verify correct results for null and NaN inputs
select sum(null::int4) from generate_series(1,3);
sum
@ -299,6 +383,25 @@ SELECT corr(b, a) FROM aggtest;
0.139634516517873
(1 row)
-- check single-tuple behavior
SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
covar_pop | covar_samp
-----------+------------
0 |
(1 row)
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
covar_pop | covar_samp
-----------+------------
NaN |
(1 row)
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
covar_pop | covar_samp
-----------+------------
NaN |
(1 row)
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);

View File

@ -39,8 +39,22 @@ SELECT var_samp(b::numeric) FROM aggtest;
-- population variance is defined for a single tuple, sample variance
-- is not
SELECT var_pop(1.0), var_samp(2.0);
SELECT var_pop(1.0::float8), var_samp(2.0::float8);
SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8);
SELECT var_pop('inf'::float8), var_samp('inf'::float8);
SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8);
SELECT var_pop('nan'::float8), var_samp('nan'::float8);
SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8);
SELECT var_pop(1.0::float4), var_samp(2.0::float4);
SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4);
SELECT var_pop('inf'::float4), var_samp('inf'::float4);
SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4);
SELECT var_pop('nan'::float4), var_samp('nan'::float4);
SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4);
SELECT var_pop(1.0::numeric), var_samp(2.0::numeric);
SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric);
SELECT var_pop('nan'::numeric), var_samp('nan'::numeric);
SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric);
-- verify correct results for null and NaN inputs
select sum(null::int4) from generate_series(1,3);
@ -81,6 +95,11 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
SELECT corr(b, a) FROM aggtest;
-- check single-tuple behavior
SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);