diff --git a/Makefile.in b/Makefile.in index 052fd8fa6d..a82f4d2e15 100644 --- a/Makefile.in +++ b/Makefile.in @@ -393,6 +393,7 @@ TESTSRC += \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ + $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/wholenumber.c diff --git a/Makefile.msc b/Makefile.msc index 57d7065b07..206f1680d4 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -713,6 +713,7 @@ TESTEXT = \ $(TOP)\ext\misc\fuzzer.c \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\nextchar.c \ + $(TOP)\ext\misc\percentile.c \ $(TOP)\ext\misc\regexp.c \ $(TOP)\ext\misc\spellfix.c \ $(TOP)\ext\misc\wholenumber.c diff --git a/ext/misc/percentile.c b/ext/misc/percentile.c new file mode 100644 index 0000000000..40ef151f60 --- /dev/null +++ b/ext/misc/percentile.c @@ -0,0 +1,219 @@ +/* +** 2013-05-28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code to implement the percentile(Y,P) SQL function +** as described below: +** +** (1) The percentile(Y,P) function is an aggregate function taking +** exactly two arguments. +** +** (2) If the P argument to percentile(Y,P) is not the same for every +** row in the aggregate then an error is thrown. The word "same" +** in the previous sentence means that the value differ by less +** than 0.001. +** +** (3) If the P argument to percentile(Y,P) evaluates to anything other +** than a number in the range of 0.0 to 100.0 inclusive then an +** error is thrown. +** +** (4) If any Y argument to percentile(Y,P) evaluates to a value that +** is not NULL and is not numeric then an error is thrown. +** +** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus +** infinity then an error is thrown. (SQLite always interprets NaN +** values as NULL.) +** +** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions, +** including CASE WHEN expressions. +** +** (7) The percentile(Y,P) aggregate is able to handle inputs of at least +** one million (1,000,000) rows. +** +** (8) If there are no non-NULL values for Y, then percentile(Y,P) +** returns NULL. +** +** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P) +** returns the one Y value. +** +** (10) If there N non-NULL values of Y where N is two or more and +** the Y values are ordered from least to greatest and a graph is +** drawn from 0 to N-1 such that the height of the graph at J is +** the J-th Y value and such that straight lines are drawn between +** adjacent Y values, then the percentile(Y,P) function returns +** the height of the graph at P*(N-1)/100. +** +** (11) The percentile(Y,P) function always returns either a floating +** point number or NULL. +** +** (12) The percentile(Y,P) is implemented as a single C99 source-code +** file that compiles into a shared-library or DLL that can be loaded +** into SQLite using the sqlite3_load_extension() interface. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include +#include +#include + +/* The following object is the session context for a single percentile() +** function. We have to remember all input Y values until the very end. +** Those values are accumulated in the Percentile.a[] array. +*/ +typedef struct Percentile Percentile; +struct Percentile { + unsigned nAlloc; /* Number of slots allocated for a[] */ + unsigned nUsed; /* Number of slots actually used in a[] */ + double rPct; /* 1.0 more than the value for P */ + double *a; /* Array of Y values */ +}; + +/* +** Return TRUE if the input floating-point number is an infinity. +*/ +static int isInfinity(double r){ + sqlite3_uint64 u; + assert( sizeof(u)==sizeof(r) ); + memcpy(&u, &r, sizeof(u)); + return ((u>>52)&0x7ff)==0x7ff; +} + +/* +** Return TRUE if two doubles differ by 0.001 or less +*/ +static int sameValue(double a, double b){ + a -= b; + return a>=-0.001 && a<=0.001; +} + +/* +** The "step" function for percentile(Y,P) is called once for each +** input row. +*/ +static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){ + Percentile *p; + double rPct; + int eType; + double y; + assert( argc==2 ); + + /* Requirement 3: P must be a number between 0 and 100 */ + eType = sqlite3_value_numeric_type(argv[1]); + rPct = sqlite3_value_double(argv[1]); + if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT) || + ((rPct = sqlite3_value_double(argv[1]))<0.0 || rPct>100.0) ){ + sqlite3_result_error(pCtx, "2nd argument to percentile() is not " + "a number between 0.0 and 100.0", -1); + return; + } + + /* Allocate the session context. */ + p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p==0 ) return; + + /* Remember the P value. Throw an error if the P value is different + ** from any prior row, per Requirement (2). */ + if( p->rPct==0.0 ){ + p->rPct = rPct+1.0; + }else if( !sameValue(p->rPct,rPct+1.0) ){ + sqlite3_result_error(pCtx, "2nd argument to percentile() is not the " + "same for all input rows", -1); + return; + } + + /* Ignore rows for which Y is NULL */ + eType = sqlite3_value_type(argv[0]); + if( eType==SQLITE_NULL ) return; + + /* If not NULL, then Y must be numeric. Otherwise throw an error. + ** Requirement 4 */ + if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){ + sqlite3_result_error(pCtx, "1st argument to percentile() is not " + "numeric", -1); + return; + } + + /* Throw an error if the Y value is infinity or NaN */ + y = sqlite3_value_double(argv[0]); + if( isInfinity(y) ){ + sqlite3_result_error(pCtx, "Inf input to percentile()", -1); + return; + } + + /* Allocate and store the Y */ + if( p->nUsed>=p->nAlloc ){ + unsigned n = p->nAlloc*2 + 250; + double *a = sqlite3_realloc(p->a, sizeof(double)*n); + if( a==0 ){ + sqlite3_free(p->a); + memset(p, 0, sizeof(*p)); + sqlite3_result_error_nomem(pCtx); + return; + } + p->nAlloc = n; + p->a = a; + } + p->a[p->nUsed++] = y; +} + +/* +** Compare to doubles for sorting using qsort() +*/ +static int doubleCmp(const void *pA, const void *pB){ + double a = *(double*)pA; + double b = *(double*)pB; + if( a==b ) return 0; + if( aa==0 ) return; + if( p->nUsed ){ + qsort(p->a, p->nUsed, sizeof(double), doubleCmp); + ix = (p->rPct-1.0)*(p->nUsed-1)*0.01; + i1 = ix; + i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1; + v1 = p->a[i1]; + v2 = p->a[i2]; + vx = v1 + (v2-v1)*(ix-i1); + sqlite3_result_double(pCtx, vx); + } + sqlite3_free(p->a); + memset(p, 0, sizeof(*p)); +} + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_percentile_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); + (void)pzErrMsg; /* Unused parameter */ + rc = sqlite3_create_function(db, "percentile", 2, SQLITE_UTF8, 0, + 0, percentStep, percentFinal); + return rc; +} diff --git a/main.mk b/main.mk index ba73edd711..845df4beaa 100644 --- a/main.mk +++ b/main.mk @@ -275,6 +275,7 @@ TESTSRC += \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ + $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/wholenumber.c diff --git a/manifest b/manifest index 588d892738..6cd3474131 100644 --- a/manifest +++ b/manifest @@ -1,9 +1,9 @@ -C Finish\sremoving\sthe\ssqlite3.inTrans\sfield.\s\sIn\sthe\sprevious\scheck-in,\sit\swas\nmerely\scommented\sout\sbecause\sI\sfailed\sto\sselect\sFile->Save\son\smy\stext\seditor. -D 2013-05-28T17:30:52.422 +C Add\sthe\spercentile()\sSQL\sfunction\sas\sa\sloadable\s\nextension\sin\sthe\sext/misc\sdirectory. +D 2013-05-28T20:25:54.766 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f -F Makefile.in f6b58b7bdf6535f0f0620c486dd59aa4662c0b4f +F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 -F Makefile.msc 5dc042f51187414d5886ac6d8308630d484690c4 +F Makefile.msc 7d226394826f060f232c0a02a468e8651819b7c2 F Makefile.vxworks db21ed42a01d5740e656b16f92cb5d8d5e5dd315 F README cd04a36fbc7ea56932a4052d7d0b7f09f27c33d6 F VERSION 05c7bd63b96f31cfdef5c766ed91307ac121f5aa @@ -111,6 +111,7 @@ F ext/misc/closure.c 40788c54c59190a1f52f6492a260d8894a246fe9 F ext/misc/fuzzer.c 51bd96960b6b077d41d6f3cedefbcb57f29efaa2 F ext/misc/ieee754.c 2565ce373d842977efe0922dc50b8a41b3289556 F ext/misc/nextchar.c 1131e2b36116ffc6fe6b2e3464bfdace27978b1e +F ext/misc/percentile.c 4fb5e46c4312b0be74e8e497ac18f805f0e3e6c5 F ext/misc/regexp.c c25c65fe775f5d9801fb8573e36ebe73f2c0c2e0 F ext/misc/rot13.c 1ac6f95f99b575907b9b09c81a349114cf9be45a F ext/misc/spellfix.c 6d7ce6105a4b7729f6c44ccdf1ab7e80d9707c02 @@ -137,7 +138,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt f2b23a6bde8f1c6e86b957e4d94eab0add520b0d -F main.mk 2a3cd58acfd1ecc656027afdd60ed1eefb07380f +F main.mk e536751ac719806209c51f5dc63022a5dd40c631 F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac @@ -224,7 +225,7 @@ F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c bedc37ec1a6bb9399944024d63f4c769971955a9 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e F src/tclsqlite.c 2ecec9937e69bc17560ad886da35195daa7261b8 -F src/test1.c 43c87e52cb504e8c9928b1e9bad21a6117c695e9 +F src/test1.c 6d2a340eea1d866bf7059894491652a69a7ee802 F src/test2.c 7355101c085304b90024f2261e056cdff13c6c35 F src/test3.c 1c0e5d6f080b8e33c1ce8b3078e7013fdbcd560c F src/test4.c 9b32d22f5f150abe23c1830e2057c4037c45b3df @@ -703,6 +704,7 @@ F test/pageropt.test 6b8f6a123a5572c195ad4ae40f2987007923bbd6 F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0 F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16 F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025 +F test/percentile.test 4614301e38398df7fdd5f28f4ed8f272b328251b F test/permutations.test d997a947ab8aabb15f763d50a030b3c11e8ef1b6 F test/pragma.test 5e7de6c32a5d764f09437d2025f07e4917b9e178 F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947 @@ -1090,7 +1092,7 @@ F tool/vdbe-compress.tcl f12c884766bd14277f4fcedcae07078011717381 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac -P c3381cd4957013d46075996c3b865177c888d2a8 -R b4b6c4d83a50772ea0d179f36ddb400e +P 2f97e38a6611cb17c24d74332d3ac3777dc0dd3e +R 339888db3396051923023dfe123ab06e U drh -Z a65b38b6ed5056fa8821564d4893ee8d +Z 1117fca47d6619d98ed20043a3bd4774 diff --git a/manifest.uuid b/manifest.uuid index d4c6edd558..c471926f30 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2f97e38a6611cb17c24d74332d3ac3777dc0dd3e \ No newline at end of file +a64d760d9290b1be78cdda7ae66d4f02c3b3fa53 \ No newline at end of file diff --git a/src/test1.c b/src/test1.c index d95274b0d5..bd6d4a4cfd 100644 --- a/src/test1.c +++ b/src/test1.c @@ -6012,6 +6012,7 @@ static int tclLoadStaticExtensionCmd( extern int sqlite3_fuzzer_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*); + extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_regexp_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*); @@ -6024,6 +6025,7 @@ static int tclLoadStaticExtensionCmd( { "fuzzer", sqlite3_fuzzer_init }, { "ieee754", sqlite3_ieee_init }, { "nextchar", sqlite3_nextchar_init }, + { "percentile", sqlite3_percentile_init }, { "regexp", sqlite3_regexp_init }, { "spellfix", sqlite3_spellfix_init }, { "wholenumber", sqlite3_wholenumber_init }, diff --git a/test/percentile.test b/test/percentile.test new file mode 100644 index 0000000000..29947b9dee --- /dev/null +++ b/test/percentile.test @@ -0,0 +1,207 @@ +# 2013-05-28 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is percentile.c extension +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# Basic test of the percentile() function. +# +do_test percentile-1.0 { + load_static_extension db percentile + execsql { + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(1),(4),(6),(7),(8),(9),(11),(11),(11); + } + execsql {SELECT percentile(x,0) FROM t1} +} {1.0} +foreach {in out} { + 100 11.0 + 50 8.0 + 12.5 4.0 + 15 4.4 + 20 5.2 + 80 11.0 + 89 11.0 +} { + do_test percentile-1.1.$in { + execsql {SELECT percentile(x,$in) FROM t1} + } $out +} + +# Add some NULL values. +# +do_test percentile-1.2 { + execsql {INSERT INTO t1 VALUES(NULL),(NULL);} +} {} +foreach {in out} { + 100 11.0 + 50 8.0 + 12.5 4.0 + 15 4.4 + 20 5.2 + 80 11.0 + 89 11.0 +} { + do_test percentile-1.3.$in { + execsql {SELECT percentile(x,$in) FROM t1} + } $out +} + +# The second argument to percentile can change some, but not much. +# +do_test percentile-1.4 { + catchsql {SELECT round(percentile(x, 15+0.000001*rowid),1) FROM t1} +} {0 4.4} +do_test percentile-1.5 { + catchsql {SELECT round(percentile(x, 15+0.1*rowid),1) FROM t1} +} {1 {2nd argument to percentile() is not the same for all input rows}} + +# Input values in a random order +# +do_test percentile-1.6 { + execsql { + CREATE TABLE t2(x); + INSERT INTO t2 SELECT x+0.0 FROM t1 ORDER BY random(); + } +} {} +foreach {in out} { + 100 11.0 + 50 8.0 + 12.5 4.0 + 15 4.4 + 20 5.2 + 80 11.0 + 89 11.0 +} { + do_test percentile-1.7.$in { + execsql {SELECT percentile(x,$in) FROM t2} + } $out +} + +# Wrong number of arguments +# +do_test percentile-1.8 { + catchsql {SELECT percentile(x,0,1) FROM t1} +} {1 {wrong number of arguments to function percentile()}} +do_test percentile-1.9 { + catchsql {SELECT percentile(x) FROM t1} +} {1 {wrong number of arguments to function percentile()}} + +# Second argument must be numeric +# +do_test percentile-1.10 { + catchsql {SELECT percentile(x,null) FROM t1} +} {1 {2nd argument to percentile() is not a number between 0.0 and 100.0}} +do_test percentile-1.11 { + catchsql {SELECT percentile(x,'fifty') FROM t1} +} {1 {2nd argument to percentile() is not a number between 0.0 and 100.0}} +do_test percentile-1.12 { + catchsql {SELECT percentile(x,x'3530') FROM t1} +} {1 {2nd argument to percentile() is not a number between 0.0 and 100.0}} + +# Second argument is out of range +# +do_test percentile-1.13 { + catchsql {SELECT percentile(x,-0.0000001) FROM t1} +} {1 {2nd argument to percentile() is not a number between 0.0 and 100.0}} +do_test percentile-1.14 { + catchsql {SELECT percentile(x,100.0000001) FROM t1} +} {1 {2nd argument to percentile() is not a number between 0.0 and 100.0}} + +# First argument is not NULL and is not NUMERIC +# +do_test percentile-1.15 { + catchsql { + BEGIN; + UPDATE t1 SET x='50' WHERE x IS NULL; + SELECT percentile(x, 50) FROM t1; + } +} {1 {1st argument to percentile() is not numeric}} +do_test percentile-1.16 { + catchsql { + ROLLBACK; + BEGIN; + UPDATE t1 SET x=x'3530' WHERE x IS NULL; + SELECT percentile(x, 50) FROM t1; + } +} {1 {1st argument to percentile() is not numeric}} +do_test percentile-1.17 { + catchsql { + ROLLBACK; + SELECT percentile(x, 50) FROM t1; + } +} {0 8.0} + +# No non-NULL entries. +# +do_test percentile-1.18 { + execsql { + UPDATE t1 SET x=NULL; + SELECT ifnull(percentile(x, 50),'NULL') FROM t1 + } +} {NULL} + +# Exactly one non-NULL entry +# +do_test percentile-1.19 { + execsql { + UPDATE t1 SET x=12345 WHERE rowid=5; + SELECT percentile(x, 0), percentile(x, 50), percentile(x,100) FROM t1 + } +} {12345.0 12345.0 12345.0} + +# Infinity as an input +# +do_test percentile-1.20 { + catchsql { + DELETE FROM t1; + INSERT INTO t1 SELECT x+0.0 FROM t2; + UPDATE t1 SET x=1.0e300*1.0e300 WHERE rowid=5; + SELECT percentile(x,50) from t1; + } +} {1 {Inf input to percentile()}} +do_test percentile-1.21 { + catchsql { + UPDATE t1 SET x=-1.0e300*1.0e300 WHERE rowid=5; + SELECT percentile(x,50) from t1; + } +} {1 {Inf input to percentile()}} + +# Million-row Inputs +# +do_test percentile-2.0 { + load_static_extension db wholenumber + execsql { + CREATE VIRTUAL TABLE nums USING wholenumber; + CREATE TABLE t3(x); + INSERT INTO t3 SELECT value-1 FROM nums WHERE value BETWEEN 1 AND 500000; + INSERT INTO t3 SELECT value*10 FROM nums + WHERE value BETWEEN 500000 AND 999999; + SELECT count(*) FROM t3; + } +} {1000000} +foreach {in out} { + 0 0.0 + 100 9999990.0 + 50 2749999.5 + 10 99999.9 +} { + do_test percentile-2.1.$in { + execsql { + SELECT percentile(x, $in) from t3; + } + } $out +} + +finish_test