1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-14 00:22:38 +03:00

Move terms of the HAVING clause that reference only columns in the GROUP BY

clause over to the WHERE clause, resulting in a faster query plan.

FossilOrigin-Name: 47cbb471d056c8e1834a5ca72491404a3bfb273b5ff7bdd84b98d263938ea874
This commit is contained in:
drh
2017-05-02 16:55:07 +00:00
6 changed files with 313 additions and 19 deletions

View File

@@ -1,5 +1,5 @@
C Fix\stypo\sin\scomment.\s\sNo\schanges\sto\scode. C Move\sterms\sof\sthe\sHAVING\sclause\sthat\sreference\sonly\scolumns\sin\sthe\sGROUP\sBY\nclause\sover\sto\sthe\sWHERE\sclause,\sresulting\sin\sa\sfaster\squery\splan.
D 2017-05-02T01:30:44.805 D 2017-05-02T16:55:07.827
F Makefile.in 1cc758ce3374a32425e4d130c2fe7b026b20de5b8843243de75f087c0a2661fb F Makefile.in 1cc758ce3374a32425e4d130c2fe7b026b20de5b8843243de75f087c0a2661fb
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 6a8c838220f7c00820e1fc0ac1bccaaa8e5676067e1dbfa1bafa7a4ffecf8ae6 F Makefile.msc 6a8c838220f7c00820e1fc0ac1bccaaa8e5676067e1dbfa1bafa7a4ffecf8ae6
@@ -355,7 +355,7 @@ F src/ctime.c 47d91a25ad8f199a71a5b1b7b169d6dd0d6e98c5719eca801568798743d1161c
F src/date.c cc42a41c7422389860d40419a5e3bce5eaf6e7835c3ba2677751dc653550a5c7 F src/date.c cc42a41c7422389860d40419a5e3bce5eaf6e7835c3ba2677751dc653550a5c7
F src/dbstat.c 19ee7a4e89979d4df8e44cfac7a8f905ec89b77d F src/dbstat.c 19ee7a4e89979d4df8e44cfac7a8f905ec89b77d
F src/delete.c 0d9d5549d42e79ce4d82ff1db1e6c81e36d2f67c F src/delete.c 0d9d5549d42e79ce4d82ff1db1e6c81e36d2f67c
F src/expr.c f10e35dc50be4c8f82eb99bf5d8530229d1d60957cc3c9473ffe584d0444087c F src/expr.c 965f5e6074ee61cf933be079c6a443c88414490c13ec270b5baaacaa920280fa
F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007 F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007
F src/fkey.c db65492ae549c3b548c9ef1f279ce1684f1c473b116e1c56a90878cd5dcf968d F src/fkey.c db65492ae549c3b548c9ef1f279ce1684f1c473b116e1c56a90878cd5dcf968d
F src/func.c 9d52522cc8ae7f5cdadfe14594262f1618bc1f86083c4cd6da861b4cf5af6174 F src/func.c 9d52522cc8ae7f5cdadfe14594262f1618bc1f86083c4cd6da861b4cf5af6174
@@ -402,12 +402,12 @@ F src/printf.c 8757834f1b54dae512fb25eb1acc8e94a0d15dd2290b58f2563f65973265adb2
F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384 F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384
F src/resolve.c 3e518b962d932a997fae373366880fc028c75706 F src/resolve.c 3e518b962d932a997fae373366880fc028c75706
F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac
F src/select.c 478e95d424bb86d34c7c95d20872cbd78df97af5f83c3fd7de55d5b2413f927d F src/select.c 4bbdacd119f22b3b7712b1c1f54bb52fdc7d97d24e131440cc5f235b9df42b0c
F src/shell.c 21b79c0e1b93f8e35fd7b4087d6ba438326c3d7e285d0dd51dfd741475f858a1 F src/shell.c 21b79c0e1b93f8e35fd7b4087d6ba438326c3d7e285d0dd51dfd741475f858a1
F src/sqlite.h.in eeb1da70a61d52e1d58e5b55446b85bbac571699421d3cf857421c56214013ce F src/sqlite.h.in eeb1da70a61d52e1d58e5b55446b85bbac571699421d3cf857421c56214013ce
F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
F src/sqlite3ext.h 58fd0676d3111d02e62e5a35992a7d3da5d3f88753acc174f2d37b774fbbdd28 F src/sqlite3ext.h 58fd0676d3111d02e62e5a35992a7d3da5d3f88753acc174f2d37b774fbbdd28
F src/sqliteInt.h 9affb53bb405dcea1d86e85198ebaf6232a684cc2b2af6b3c181869f1c8f3e93 F src/sqliteInt.h aea3aa1b81e0d07d5b1c39b8c5a54a1dc5e4f10136cb63da392aef9eb2a5108b
F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6a76b F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6a76b
F src/status.c a9e66593dfb28a9e746cba7153f84d49c1ddc4b1 F src/status.c a9e66593dfb28a9e746cba7153f84d49c1ddc4b1
F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34 F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34
@@ -850,6 +850,7 @@ F test/fuzzer2.test a85ef814ce071293bce1ad8dffa217cbbaad4c14
F test/fuzzerfault.test 8792cd77fd5bce765b05d0c8e01b9edcf8af8536 F test/fuzzerfault.test 8792cd77fd5bce765b05d0c8e01b9edcf8af8536
F test/gcfault.test dd28c228a38976d6336a3fc42d7e5f1ad060cb8c F test/gcfault.test dd28c228a38976d6336a3fc42d7e5f1ad060cb8c
F test/genesis.tcl 1e2e2e8e5cc4058549a154ff1892fe5c9de19f98 F test/genesis.tcl 1e2e2e8e5cc4058549a154ff1892fe5c9de19f98
F test/having.test 30a02b8a9a47cba7bdb5281999c5cbff407c2ac296511ee64dd0b418fe38eb0f
F test/hexlit.test 4a6a5f46e3c65c4bf1fa06f5dd5a9507a5627751 F test/hexlit.test 4a6a5f46e3c65c4bf1fa06f5dd5a9507a5627751
F test/hidden.test 23c1393a79e846d68fd902d72c85d5e5dcf98711 F test/hidden.test 23c1393a79e846d68fd902d72c85d5e5dcf98711
F test/hook.test dbc0b87756e1e20e7497b56889c9e9cd2f8cc2b5 F test/hook.test dbc0b87756e1e20e7497b56889c9e9cd2f8cc2b5
@@ -1577,7 +1578,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P adcad37b001d255299be716b16003b56ec1fba349ef73261ee0e543186c4f311 P 6674814afb9e763e7f7060776586e26da28040b3208ce06c8a285dd647e5a53d 8424492eac506866d2918e5fe03c8f65fef960215d56012a3b52ed42789ed35a
R 333e64e1a0afdbe4625860741bf47c15 R 645ee5368137c061c8ff554b7c3cb973
U mistachkin T +closed 8424492eac506866d2918e5fe03c8f65fef960215d56012a3b52ed42789ed35a
Z b65b5e3a6f4ab6d1f0aa8214a9325839 U drh
Z f125bc7c2e400862ed032d286e9e90a2

View File

@@ -1 +1 @@
6674814afb9e763e7f7060776586e26da28040b3208ce06c8a285dd647e5a53d 47cbb471d056c8e1834a5ca72491404a3bfb273b5ff7bdd84b98d263938ea874

View File

@@ -1815,6 +1815,65 @@ int sqlite3ExprIsTableConstant(Expr *p, int iCur){
return exprIsConst(p, 3, iCur); return exprIsConst(p, 3, iCur);
} }
/*
** sqlite3WalkExpr() callback used by sqlite3ExprIsConstantOrGroupBy().
*/
static int exprNodeIsConstantOrGroupBy(Walker *pWalker, Expr *pExpr){
ExprList *pGroupBy = pWalker->u.pGroupBy;
int i;
/* Check if pExpr is identical to any GROUP BY term. If so, consider
** it constant. */
for(i=0; i<pGroupBy->nExpr; i++){
Expr *p = pGroupBy->a[i].pExpr;
if( sqlite3ExprCompare(pExpr, p, -1)<2 ){
CollSeq *pColl = sqlite3ExprCollSeq(pWalker->pParse, p);
if( pColl==0 || sqlite3_stricmp("BINARY", pColl->zName)==0 ){
return WRC_Prune;
}
}
}
/* Check if pExpr is a sub-select. If so, consider it variable. */
if( ExprHasProperty(pExpr, EP_xIsSelect) ){
pWalker->eCode = 0;
return WRC_Abort;
}
return exprNodeIsConstant(pWalker, pExpr);
}
/*
** Walk the expression tree passed as the first argument. Return non-zero
** if the expression consists entirely of constants or copies of terms
** in pGroupBy that sort with the BINARY collation sequence.
**
** This routine is used to determine if a term of the HAVING clause can
** be promoted into the WHERE clause. In order for such a promotion to work,
** the value of the HAVING clause term must be the same for all members of
** a "group". The requirement that the GROUP BY term must be BINARY
** assumes that no other collating sequence will have a finer-grained
** grouping than binary. In other words (A=B COLLATE binary) implies
** A=B in every other collating sequence. The requirement that the
** GROUP BY be BINARY is stricter than necessary. It would also work
** to promote HAVING clauses that use the same alternative collating
** sequence as the GROUP BY term, but that is much harder to check,
** alternative collating sequences are uncommon, and this is only an
** optimization, so we take the easy way out and simply require the
** GROUP BY to use the BINARY collating sequence.
*/
int sqlite3ExprIsConstantOrGroupBy(Parse *pParse, Expr *p, ExprList *pGroupBy){
Walker w;
memset(&w, 0, sizeof(w));
w.eCode = 1;
w.xExprCallback = exprNodeIsConstantOrGroupBy;
w.u.pGroupBy = pGroupBy;
w.pParse = pParse;
sqlite3WalkExpr(&w, p);
return w.eCode;
}
/* /*
** Walk an expression tree. Return non-zero if the expression is constant ** Walk an expression tree. Return non-zero if the expression is constant
** or a function call with constant arguments. Return and 0 if there ** or a function call with constant arguments. Return and 0 if there

View File

@@ -4879,6 +4879,77 @@ static void explainSimpleCount(
# define explainSimpleCount(a,b,c) # define explainSimpleCount(a,b,c)
#endif #endif
/*
** Context object for havingToWhereExprCb().
*/
struct HavingToWhereCtx {
Expr **ppWhere;
ExprList *pGroupBy;
};
/*
** sqlite3WalkExpr() callback used by havingToWhere().
**
** If the node passed to the callback is a TK_AND node, return
** WRC_Continue to tell sqlite3WalkExpr() to iterate through child nodes.
**
** Otherwise, return WRC_Prune. In this case, also check if the
** sub-expression matches the criteria for being moved to the WHERE
** clause. If so, add it to the WHERE clause and replace the sub-expression
** within the HAVING expression with a constant "1".
*/
static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){
if( pExpr->op!=TK_AND ){
struct HavingToWhereCtx *p = pWalker->u.pHavingCtx;
if( sqlite3ExprIsConstantOrGroupBy(pWalker->pParse, pExpr, p->pGroupBy) ){
sqlite3 *db = pWalker->pParse->db;
Expr *pNew = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[1], 0);
if( pNew ){
Expr *pWhere = *(p->ppWhere);
SWAP(Expr, *pNew, *pExpr);
pNew = sqlite3ExprAnd(db, pWhere, pNew);
*(p->ppWhere) = pNew;
}
}
return WRC_Prune;
}
return WRC_Continue;
}
/*
** Transfer eligible terms from the HAVING clause of a query, which is
** processed after grouping, to the WHERE clause, which is processed before
** grouping. For example, the query:
**
** SELECT * FROM <tables> WHERE a=? GROUP BY b HAVING b=? AND c=?
**
** can be rewritten as:
**
** SELECT * FROM <tables> WHERE a=? AND b=? GROUP BY b HAVING c=?
**
** A term of the HAVING expression is eligible for transfer if it consists
** entirely of constants and expressions that are also GROUP BY terms that
** use the "BINARY" collation sequence.
*/
static void havingToWhere(
Parse *pParse,
ExprList *pGroupBy,
Expr *pHaving,
Expr **ppWhere
){
struct HavingToWhereCtx sCtx;
Walker sWalker;
sCtx.ppWhere = ppWhere;
sCtx.pGroupBy = pGroupBy;
memset(&sWalker, 0, sizeof(sWalker));
sWalker.pParse = pParse;
sWalker.xExprCallback = havingToWhereExprCb;
sWalker.u.pHavingCtx = &sCtx;
sqlite3WalkExpr(&sWalker, pHaving);
}
/* /*
** Generate code for the SELECT statement given in the p argument. ** Generate code for the SELECT statement given in the p argument.
** **
@@ -5347,6 +5418,11 @@ int sqlite3Select(
sqlite3ExprAnalyzeAggList(&sNC, pEList); sqlite3ExprAnalyzeAggList(&sNC, pEList);
sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy); sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy);
if( pHaving ){ if( pHaving ){
if( pGroupBy ){
assert( pWhere==p->pWhere );
havingToWhere(pParse, pGroupBy, pHaving, &p->pWhere);
pWhere = p->pWhere;
}
sqlite3ExprAnalyzeAggregates(&sNC, pHaving); sqlite3ExprAnalyzeAggregates(&sNC, pHaving);
} }
sAggInfo.nAccumulator = sAggInfo.nColumn; sAggInfo.nAccumulator = sAggInfo.nColumn;

View File

@@ -3316,15 +3316,17 @@ struct Walker {
int walkerDepth; /* Number of subqueries */ int walkerDepth; /* Number of subqueries */
u8 eCode; /* A small processing code */ u8 eCode; /* A small processing code */
union { /* Extra data for callback */ union { /* Extra data for callback */
NameContext *pNC; /* Naming context */ NameContext *pNC; /* Naming context */
int n; /* A counter */ int n; /* A counter */
int iCur; /* A cursor number */ int iCur; /* A cursor number */
SrcList *pSrcList; /* FROM clause */ SrcList *pSrcList; /* FROM clause */
struct SrcCount *pSrcCount; /* Counting column references */ struct SrcCount *pSrcCount; /* Counting column references */
struct CCurHint *pCCurHint; /* Used by codeCursorHint() */ struct CCurHint *pCCurHint; /* Used by codeCursorHint() */
int *aiCol; /* array of column indexes */ int *aiCol; /* array of column indexes */
struct IdxCover *pIdxCover; /* Check for index coverage */ struct IdxCover *pIdxCover; /* Check for index coverage */
struct IdxExprTrans *pIdxTrans; /* Convert indexed expr to column */ struct IdxExprTrans *pIdxTrans; /* Convert indexed expr to column */
ExprList *pGroupBy; /* GROUP BY clause */
struct HavingToWhereCtx *pHavingCtx; /* HAVING to WHERE clause ctx */
} u; } u;
}; };
@@ -3794,6 +3796,7 @@ void sqlite3LeaveMutexAndCloseZombie(sqlite3*);
int sqlite3ExprIsConstant(Expr*); int sqlite3ExprIsConstant(Expr*);
int sqlite3ExprIsConstantNotJoin(Expr*); int sqlite3ExprIsConstantNotJoin(Expr*);
int sqlite3ExprIsConstantOrFunction(Expr*, u8); int sqlite3ExprIsConstantOrFunction(Expr*, u8);
int sqlite3ExprIsConstantOrGroupBy(Parse*, Expr*, ExprList*);
int sqlite3ExprIsTableConstant(Expr*,int); int sqlite3ExprIsTableConstant(Expr*,int);
#ifdef SQLITE_ENABLE_CURSOR_HINTS #ifdef SQLITE_ENABLE_CURSOR_HINTS
int sqlite3ExprContainsSubquery(Expr*); int sqlite3ExprContainsSubquery(Expr*);

154
test/having.test Normal file
View File

@@ -0,0 +1,154 @@
# 2017 April 30
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test the HAVING->WHERE optimization.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix having
do_execsql_test 1.0 {
CREATE TABLE t2(c, d);
CREATE TABLE t1(a, b);
INSERT INTO t1 VALUES(1, 1);
INSERT INTO t1 VALUES(2, 2);
INSERT INTO t1 VALUES(1, 3);
INSERT INTO t1 VALUES(2, 4);
INSERT INTO t1 VALUES(1, 5);
INSERT INTO t1 VALUES(2, 6);
} {}
foreach {tn sql res} {
1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2" {2 12}
2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2 AND sum(b)>10" {2 12}
3 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>12" {}
} {
do_execsql_test 1.$tn $sql $res
}
# Run an EXPLAIN command for both SQL statements. Return true if
# the outputs are identical, or false otherwise.
#
proc compare_vdbe {sql1 sql2} {
set r1 [list]
set r2 [list]
db eval "explain $sql1" { lappend r1 $opcode $p1 $p2 $p3 $p4 $p5}
db eval "explain $sql2" { lappend r2 $opcode $p1 $p2 $p3 $p4 $p5}
return [expr {$r1==$r2}]
}
proc do_compare_vdbe_test {tn sql1 sql2 res} {
uplevel [list do_test $tn [list compare_vdbe $sql1 $sql2] $res]
}
#-------------------------------------------------------------------------
# Test that various statements that are eligible for the optimization
# produce the same VDBE code as optimizing by hand does.
#
foreach {tn sql1 sql2} {
1 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING a=2"
"SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a"
2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING sum(b)>5 AND a=2"
"SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a HAVING sum(b)>5"
3 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING a=2"
"SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE binary"
4 {
SELECT x,y FROM (
SELECT a AS x, sum(b) AS y FROM t1
GROUP BY a
) WHERE x BETWEEN 8888 AND 9999
} {
SELECT x,y FROM (
SELECT a AS x, sum(b) AS y FROM t1
WHERE x BETWEEN 8888 AND 9999
GROUP BY a
)
}
5 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE binary HAVING 0"
"SELECT a, sum(b) FROM t1 WHERE 0 GROUP BY a COLLATE binary"
6 "SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d HAVING b=d"
"SELECT count(*) FROM t1,t2 WHERE a=c AND b=d GROUP BY b, d"
7 {
SELECT count(*) FROM t1,t2 WHERE a=c GROUP BY b, d
HAVING b=d COLLATE nocase
} {
SELECT count(*) FROM t1,t2 WHERE a=c AND b=d COLLATE nocase
GROUP BY b, d
}
8 "SELECT a, sum(b) FROM t1 GROUP BY a||b HAVING substr(a||b, 1, 1)='a'"
"SELECT a, sum(b) FROM t1 WHERE substr(a||b, 1, 1)='a' GROUP BY a||b"
} {
do_compare_vdbe_test 2.$tn $sql1 $sql2 1
}
#-------------------------------------------------------------------------
# 1: Test that the optimization is only applied if the GROUP BY term
# uses BINARY collation.
#
# 2: Not applied if there is a non-deterministic function in the HAVING
# term.
#
foreach {tn sql1 sql2} {
1 "SELECT a, sum(b) FROM t1 GROUP BY a COLLATE nocase HAVING a=2"
"SELECT a, sum(b) FROM t1 WHERE a=2 GROUP BY a COLLATE nocase"
2 "SELECT a, sum(b) FROM t1 GROUP BY a HAVING randomblob(a)<X'88'"
"SELECT a, sum(b) FROM t1 WHERE randomblob(a)<X'88' GROUP BY a"
} {
do_compare_vdbe_test 3.$tn $sql1 $sql2 0
}
#-------------------------------------------------------------------------
# Test that non-deterministic functions disqualify a term from being
# moved from the HAVING to WHERE clause.
#
do_execsql_test 4.1 {
CREATE TABLE t3(a, b);
INSERT INTO t3 VALUES(1, 1);
INSERT INTO t3 VALUES(1, 2);
INSERT INTO t3 VALUES(1, 3);
INSERT INTO t3 VALUES(2, 1);
INSERT INTO t3 VALUES(2, 2);
INSERT INTO t3 VALUES(2, 3);
}
proc nondeter {args} {
incr ::nondeter_ret
expr {$::nondeter_ret % 2}
}
db func nondeter nondeter
set ::nondeter_ret 0
do_execsql_test 4.2 {
SELECT a, sum(b) FROM t3 GROUP BY a HAVING nondeter(a)
} {1 6}
# If the term where moved, the query above would return the same
# result as the following. But it does not.
#
set ::nondeter_ret 0
do_execsql_test 4.3 {
SELECT a, sum(b) FROM t3 WHERE nondeter(a) GROUP BY a
} {1 4 2 2}
finish_test