diff --git a/manifest b/manifest index 34f024e720..36affcf425 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\stypo\sin\scomment.\s\sNo\schanges\sto\scode. -D 2017-05-02T01:30:44.805 +C Move\sterms\sof\sthe\sHAVING\sclause\sthat\sreference\sonly\scolumns\sin\sthe\sGROUP\sBY\nclause\sover\sto\sthe\sWHERE\sclause,\sresulting\sin\sa\sfaster\squery\splan. +D 2017-05-02T16:55:07.827 F Makefile.in 1cc758ce3374a32425e4d130c2fe7b026b20de5b8843243de75f087c0a2661fb F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 6a8c838220f7c00820e1fc0ac1bccaaa8e5676067e1dbfa1bafa7a4ffecf8ae6 @@ -355,7 +355,7 @@ F src/ctime.c 47d91a25ad8f199a71a5b1b7b169d6dd0d6e98c5719eca801568798743d1161c F src/date.c cc42a41c7422389860d40419a5e3bce5eaf6e7835c3ba2677751dc653550a5c7 F src/dbstat.c 19ee7a4e89979d4df8e44cfac7a8f905ec89b77d F src/delete.c 0d9d5549d42e79ce4d82ff1db1e6c81e36d2f67c -F src/expr.c f10e35dc50be4c8f82eb99bf5d8530229d1d60957cc3c9473ffe584d0444087c +F src/expr.c 965f5e6074ee61cf933be079c6a443c88414490c13ec270b5baaacaa920280fa F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007 F src/fkey.c db65492ae549c3b548c9ef1f279ce1684f1c473b116e1c56a90878cd5dcf968d F src/func.c 9d52522cc8ae7f5cdadfe14594262f1618bc1f86083c4cd6da861b4cf5af6174 @@ -402,12 +402,12 @@ F src/printf.c 8757834f1b54dae512fb25eb1acc8e94a0d15dd2290b58f2563f65973265adb2 F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384 F src/resolve.c 3e518b962d932a997fae373366880fc028c75706 F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac -F src/select.c 478e95d424bb86d34c7c95d20872cbd78df97af5f83c3fd7de55d5b2413f927d +F src/select.c 4bbdacd119f22b3b7712b1c1f54bb52fdc7d97d24e131440cc5f235b9df42b0c F src/shell.c 21b79c0e1b93f8e35fd7b4087d6ba438326c3d7e285d0dd51dfd741475f858a1 F src/sqlite.h.in eeb1da70a61d52e1d58e5b55446b85bbac571699421d3cf857421c56214013ce F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 58fd0676d3111d02e62e5a35992a7d3da5d3f88753acc174f2d37b774fbbdd28 -F src/sqliteInt.h 9affb53bb405dcea1d86e85198ebaf6232a684cc2b2af6b3c181869f1c8f3e93 +F src/sqliteInt.h aea3aa1b81e0d07d5b1c39b8c5a54a1dc5e4f10136cb63da392aef9eb2a5108b F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6a76b F src/status.c a9e66593dfb28a9e746cba7153f84d49c1ddc4b1 F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34 @@ -850,6 +850,7 @@ F test/fuzzer2.test a85ef814ce071293bce1ad8dffa217cbbaad4c14 F test/fuzzerfault.test 8792cd77fd5bce765b05d0c8e01b9edcf8af8536 F test/gcfault.test dd28c228a38976d6336a3fc42d7e5f1ad060cb8c F test/genesis.tcl 1e2e2e8e5cc4058549a154ff1892fe5c9de19f98 +F test/having.test 30a02b8a9a47cba7bdb5281999c5cbff407c2ac296511ee64dd0b418fe38eb0f F test/hexlit.test 4a6a5f46e3c65c4bf1fa06f5dd5a9507a5627751 F test/hidden.test 23c1393a79e846d68fd902d72c85d5e5dcf98711 F test/hook.test dbc0b87756e1e20e7497b56889c9e9cd2f8cc2b5 @@ -1577,7 +1578,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P adcad37b001d255299be716b16003b56ec1fba349ef73261ee0e543186c4f311 -R 333e64e1a0afdbe4625860741bf47c15 -U mistachkin -Z b65b5e3a6f4ab6d1f0aa8214a9325839 +P 6674814afb9e763e7f7060776586e26da28040b3208ce06c8a285dd647e5a53d 8424492eac506866d2918e5fe03c8f65fef960215d56012a3b52ed42789ed35a +R 645ee5368137c061c8ff554b7c3cb973 +T +closed 8424492eac506866d2918e5fe03c8f65fef960215d56012a3b52ed42789ed35a +U drh +Z f125bc7c2e400862ed032d286e9e90a2 diff --git a/manifest.uuid b/manifest.uuid index 4e8c369818..46b6c41181 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6674814afb9e763e7f7060776586e26da28040b3208ce06c8a285dd647e5a53d \ No newline at end of file +47cbb471d056c8e1834a5ca72491404a3bfb273b5ff7bdd84b98d263938ea874 \ No newline at end of file diff --git a/src/expr.c b/src/expr.c index 873911633f..a79b0b7495 100644 --- a/src/expr.c +++ b/src/expr.c @@ -1815,6 +1815,65 @@ int sqlite3ExprIsTableConstant(Expr *p, int iCur){ return exprIsConst(p, 3, iCur); } + +/* +** sqlite3WalkExpr() callback used by sqlite3ExprIsConstantOrGroupBy(). +*/ +static int exprNodeIsConstantOrGroupBy(Walker *pWalker, Expr *pExpr){ + ExprList *pGroupBy = pWalker->u.pGroupBy; + int i; + + /* Check if pExpr is identical to any GROUP BY term. If so, consider + ** it constant. */ + for(i=0; inExpr; i++){ + Expr *p = pGroupBy->a[i].pExpr; + if( sqlite3ExprCompare(pExpr, p, -1)<2 ){ + CollSeq *pColl = sqlite3ExprCollSeq(pWalker->pParse, p); + if( pColl==0 || sqlite3_stricmp("BINARY", pColl->zName)==0 ){ + return WRC_Prune; + } + } + } + + /* Check if pExpr is a sub-select. If so, consider it variable. */ + if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + pWalker->eCode = 0; + return WRC_Abort; + } + + return exprNodeIsConstant(pWalker, pExpr); +} + +/* +** Walk the expression tree passed as the first argument. Return non-zero +** if the expression consists entirely of constants or copies of terms +** in pGroupBy that sort with the BINARY collation sequence. +** +** This routine is used to determine if a term of the HAVING clause can +** be promoted into the WHERE clause. In order for such a promotion to work, +** the value of the HAVING clause term must be the same for all members of +** a "group". The requirement that the GROUP BY term must be BINARY +** assumes that no other collating sequence will have a finer-grained +** grouping than binary. In other words (A=B COLLATE binary) implies +** A=B in every other collating sequence. The requirement that the +** GROUP BY be BINARY is stricter than necessary. It would also work +** to promote HAVING clauses that use the same alternative collating +** sequence as the GROUP BY term, but that is much harder to check, +** alternative collating sequences are uncommon, and this is only an +** optimization, so we take the easy way out and simply require the +** GROUP BY to use the BINARY collating sequence. +*/ +int sqlite3ExprIsConstantOrGroupBy(Parse *pParse, Expr *p, ExprList *pGroupBy){ + Walker w; + memset(&w, 0, sizeof(w)); + w.eCode = 1; + w.xExprCallback = exprNodeIsConstantOrGroupBy; + w.u.pGroupBy = pGroupBy; + w.pParse = pParse; + sqlite3WalkExpr(&w, p); + return w.eCode; +} + /* ** Walk an expression tree. Return non-zero if the expression is constant ** or a function call with constant arguments. Return and 0 if there diff --git a/src/select.c b/src/select.c index 77824c4aaa..9227be890e 100644 --- a/src/select.c +++ b/src/select.c @@ -4879,6 +4879,77 @@ static void explainSimpleCount( # define explainSimpleCount(a,b,c) #endif +/* +** Context object for havingToWhereExprCb(). +*/ +struct HavingToWhereCtx { + Expr **ppWhere; + ExprList *pGroupBy; +}; + +/* +** sqlite3WalkExpr() callback used by havingToWhere(). +** +** If the node passed to the callback is a TK_AND node, return +** WRC_Continue to tell sqlite3WalkExpr() to iterate through child nodes. +** +** Otherwise, return WRC_Prune. In this case, also check if the +** sub-expression matches the criteria for being moved to the WHERE +** clause. If so, add it to the WHERE clause and replace the sub-expression +** within the HAVING expression with a constant "1". +*/ +static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){ + if( pExpr->op!=TK_AND ){ + struct HavingToWhereCtx *p = pWalker->u.pHavingCtx; + if( sqlite3ExprIsConstantOrGroupBy(pWalker->pParse, pExpr, p->pGroupBy) ){ + sqlite3 *db = pWalker->pParse->db; + Expr *pNew = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[1], 0); + if( pNew ){ + Expr *pWhere = *(p->ppWhere); + SWAP(Expr, *pNew, *pExpr); + pNew = sqlite3ExprAnd(db, pWhere, pNew); + *(p->ppWhere) = pNew; + } + } + return WRC_Prune; + } + return WRC_Continue; +} + +/* +** Transfer eligible terms from the HAVING clause of a query, which is +** processed after grouping, to the WHERE clause, which is processed before +** grouping. For example, the query: +** +** SELECT * FROM WHERE a=? GROUP BY b HAVING b=? AND c=? +** +** can be rewritten as: +** +** SELECT * FROM WHERE a=? AND b=? GROUP BY b HAVING c=? +** +** A term of the HAVING expression is eligible for transfer if it consists +** entirely of constants and expressions that are also GROUP BY terms that +** use the "BINARY" collation sequence. +*/ +static void havingToWhere( + Parse *pParse, + ExprList *pGroupBy, + Expr *pHaving, + Expr **ppWhere +){ + struct HavingToWhereCtx sCtx; + Walker sWalker; + + sCtx.ppWhere = ppWhere; + sCtx.pGroupBy = pGroupBy; + + memset(&sWalker, 0, sizeof(sWalker)); + sWalker.pParse = pParse; + sWalker.xExprCallback = havingToWhereExprCb; + sWalker.u.pHavingCtx = &sCtx; + sqlite3WalkExpr(&sWalker, pHaving); +} + /* ** Generate code for the SELECT statement given in the p argument. ** @@ -5347,6 +5418,11 @@ int sqlite3Select( sqlite3ExprAnalyzeAggList(&sNC, pEList); sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); if( pHaving ){ + if( pGroupBy ){ + assert( pWhere==p->pWhere ); + havingToWhere(pParse, pGroupBy, pHaving, &p->pWhere); + pWhere = p->pWhere; + } sqlite3ExprAnalyzeAggregates(&sNC, pHaving); } sAggInfo.nAccumulator = sAggInfo.nColumn; diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 40660aed99..fc24885e28 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -3316,15 +3316,17 @@ struct Walker { int walkerDepth; /* Number of subqueries */ u8 eCode; /* A small processing code */ union { /* Extra data for callback */ - NameContext *pNC; /* Naming context */ - int n; /* A counter */ - int iCur; /* A cursor number */ - SrcList *pSrcList; /* FROM clause */ - struct SrcCount *pSrcCount; /* Counting column references */ - struct CCurHint *pCCurHint; /* Used by codeCursorHint() */ - int *aiCol; /* array of column indexes */ - struct IdxCover *pIdxCover; /* Check for index coverage */ - struct IdxExprTrans *pIdxTrans; /* Convert indexed expr to column */ + NameContext *pNC; /* Naming context */ + int n; /* A counter */ + int iCur; /* A cursor number */ + SrcList *pSrcList; /* FROM clause */ + struct SrcCount *pSrcCount; /* Counting column references */ + struct CCurHint *pCCurHint; /* Used by codeCursorHint() */ + int *aiCol; /* array of column indexes */ + struct IdxCover *pIdxCover; /* Check for index coverage */ + struct IdxExprTrans *pIdxTrans; /* Convert indexed expr to column */ + ExprList *pGroupBy; /* GROUP BY clause */ + struct HavingToWhereCtx *pHavingCtx; /* HAVING to WHERE clause ctx */ } u; }; @@ -3794,6 +3796,7 @@ void sqlite3LeaveMutexAndCloseZombie(sqlite3*); int sqlite3ExprIsConstant(Expr*); int sqlite3ExprIsConstantNotJoin(Expr*); int sqlite3ExprIsConstantOrFunction(Expr*, u8); +int sqlite3ExprIsConstantOrGroupBy(Parse*, Expr*, ExprList*); int sqlite3ExprIsTableConstant(Expr*,int); #ifdef SQLITE_ENABLE_CURSOR_HINTS int sqlite3ExprContainsSubquery(Expr*); diff --git a/test/having.test b/test/having.test new file mode 100644 index 0000000000..6d2f9fdcc5 --- /dev/null +++ b/test/having.test @@ -0,0 +1,154 @@ +# 2017 April 30 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Test the HAVING->WHERE optimization. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix having + +do_execsql_test 1.0 { + CREATE TABLE t2(c, d); + + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 2); + INSERT INTO t1 VALUES(1, 3); + INSERT INTO t1 VALUES(2, 4); + INSERT INTO t1 VALUES(1, 5); + INSERT INTO t1 VALUES(2, 6); +} {} + +foreach {tn sql res} { + 1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" {2 12} + 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2 AND sum(b)>10" {2 12} + 3 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>12" {} +} { + do_execsql_test 1.$tn $sql $res +} + +# Run an EXPLAIN command for both SQL statements. Return true if +# the outputs are identical, or false otherwise. +# +proc compare_vdbe {sql1 sql2} { + set r1 [list] + set r2 [list] + db eval "explain $sql1" { lappend r1 $opcode $p1 $p2 $p3 $p4 $p5} + db eval "explain $sql2" { lappend r2 $opcode $p1 $p2 $p3 $p4 $p5} + return [expr {$r1==$r2}] +} + +proc do_compare_vdbe_test {tn sql1 sql2 res} { + uplevel [list do_test $tn [list compare_vdbe $sql1 $sql2] $res] +} + +#------------------------------------------------------------------------- +# Test that various statements that are eligible for the optimization +# produce the same VDBE code as optimizing by hand does. +# +foreach {tn sql1 sql2} { + 1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a" + + 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>5 AND a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a HAVING sum(b)>5" + + 3 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE binary" + + 4 { + SELECT x,y FROM ( + SELECT a AS x, sum(b) AS y FROM t1 + GROUP BY a + ) WHERE x BETWEEN 8888 AND 9999 + } { + SELECT x,y FROM ( + SELECT a AS x, sum(b) AS y FROM t1 + WHERE x BETWEEN 8888 AND 9999 + GROUP BY a + ) + } + + 5 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING 0" + "SELECT a, sum(b) FROM t1 WHERE 0 GROUP BY a COLLATE binary" + + 6 "SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d HAVING b=d" + "SELECT count(*) FROM t1,t2 WHERE a=c AND b=d GROUP BY b, d" + + 7 { + SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d + HAVING b=d COLLATE nocase + } { + SELECT count(*) FROM t1,t2 WHERE a=c AND b=d COLLATE nocase + GROUP BY b, d + } + + 8 "SELECT a, sum(b) FROM t1 GROUP BY a||b HAVING substr(a||b, 1, 1)='a'" + "SELECT a, sum(b) FROM t1 WHERE substr(a||b, 1, 1)='a' GROUP BY a||b" +} { + do_compare_vdbe_test 2.$tn $sql1 $sql2 1 +} + +#------------------------------------------------------------------------- +# 1: Test that the optimization is only applied if the GROUP BY term +# uses BINARY collation. +# +# 2: Not applied if there is a non-deterministic function in the HAVING +# term. +# +foreach {tn sql1 sql2} { + 1 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE nocase HAVING a=2" + "SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE nocase" + + 2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING randomblob(a)