1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-19 21:43:15 +03:00

Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint.

FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d
This commit is contained in:
dan
2013-08-08 11:48:57 +00:00
parent eea568d68e
commit 6cb8d76ccb
4 changed files with 63 additions and 55 deletions

View File

@@ -1,5 +1,5 @@
C Replace\svariable\sIndex.avgEq\s(average\snumber\sof\srows\sin\skeys\sfor\swhich\sthere\sis\sno\ssample\sin\ssqlite_stat4)\swith\svector\sIndex.aAvgEq.
D 2013-08-07T19:46:15.623
C Fix\sa\sbug\sin\susing\sstat4\sdata\sto\sestimate\sthe\snumber\sof\srows\sselected\sby\sa\srange\sconstraint.
D 2013-08-08T11:48:57.819
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -290,7 +290,7 @@ F src/vtab.c 2e8b489db47e20ae36cd247932dc671c9ded0624
F src/wal.c 7dc3966ef98b74422267e7e6e46e07ff6c6eb1b4
F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
F src/walker.c 4fa43583d0a84b48f93b1e88f11adf2065be4e73
F src/where.c c973297fc29c5dae03a07d6deb479af432d24005
F src/where.c 5ea698bd91c8c264bd00fb9c6aafc30043a3873b
F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/aggnested.test 45c0201e28045ad38a530b5a144b73cd4aa2cfd6
@@ -307,7 +307,7 @@ F test/analyze4.test eff2df19b8dd84529966420f29ea52edc6b56213
F test/analyze5.test e3eece09761c935ec0b85dc4ed70dbf6cac1ed77
F test/analyze6.test 3c01e084309706a1033f850330ea24f6f7846297
F test/analyze7.test c0af22c5e0140e2e4ac556a21c2b6fff58229c98
F test/analyze8.test 092425439c12f62f9d5c3127e2b4f6e7b3e170cc
F test/analyze8.test 8d1f76ff1e47c4093bb7be3971ba08fa56dc470d
F test/analyze9.test 1ed4e7d95d8e1e1923766281b20870d61730450c
F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b
F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b
@@ -1106,7 +1106,7 @@ F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
F tool/wherecosttest.c f407dc4c79786982a475261866a161cd007947ae
F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac
P 08f74c45ecf711a2373af578d44470add9082377
R 4d9cb44bdbdf4b1edd0f4ec9caccbe56
P 7b70b419c43b2c3b2daf11d833a1d60245bfaef5
R 57059d9c69ace9dccd9c4ceedb6afa1e
U dan
Z a128130e52d0e5080bab7e97885213b4
Z 9ec2673c689c33df3101a910ffd90aa8

View File

@@ -1 +1 @@
7b70b419c43b2c3b2daf11d833a1d60245bfaef5
f783938ea999731ea073cd2c78e278095f7bea6d

View File

@@ -394,7 +394,6 @@ struct WhereLoopBuilder {
#ifdef SQLITE_ENABLE_STAT4
UnpackedRecord *pRec; /* Probe for stat4 (if required) */
int nRecValid; /* Number of valid fields currently in pRec */
tRowcnt nMaxRowcnt; /* If !=0, the maximum estimated row count */
#endif
};
@@ -2478,38 +2477,40 @@ static int whereKeyStats(
** If either of the upper or lower bound is not present, then NULL is passed in
** place of the corresponding WhereTerm.
**
** The nEq parameter is passed the index of the index column subject to the
** range constraint. Or, equivalently, the number of equality constraints
** optimized by the proposed index scan. For example, assuming index p is
** on t1(a, b), and the SQL query is:
** The value in (pBuilder->pNew->u.btree.nEq) is the index of the index
** column subject to the range constraint. Or, equivalently, the number of
** equality constraints optimized by the proposed index scan. For example,
** assuming index p is on t1(a, b), and the SQL query is:
**
** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ...
**
** then nEq should be passed the value 1 (as the range restricted column,
** b, is the second left-most column of the index). Or, if the query is:
** then nEq is set to 1 (as the range restricted column, b, is the second
** left-most column of the index). Or, if the query is:
**
** ... FROM t1 WHERE a > ? AND a < ? ...
**
** then nEq should be passed 0.
** then nEq is set to 0.
**
** The returned value is an integer divisor to reduce the estimated
** search space. A return value of 1 means that range constraints are
** no help at all. A return value of 2 means range constraints are
** expected to reduce the search space by half. And so forth...
** When this function is called, *pnOut is set to the whereCost() of the
** number of rows that the index scan is expected to visit without
** considering the range constraints. If nEq is 0, this is the number of
** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced)
** to account for the range contraints pLower and pUpper.
**
** In the absence of sqlite_stat3 ANALYZE data, each range inequality
** reduces the search space by a factor of 4. Hence a single constraint (x>?)
** results in a return of 4 and a range constraint (x>? AND x<?) results
** in a return of 16.
** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be
** used, each range inequality reduces the search space by a factor of 4.
** Hence a pair of constraints (x>? AND x<?) reduces the expected number of
** rows visited by a factor of 16.
*/
static int whereRangeScanEst(
Parse *pParse, /* Parsing & code generating context */
WhereLoopBuilder *pBuilder,
WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */
WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */
WhereCost *pRangeDiv /* OUT: Reduce search space by this divisor */
WhereCost *pnOut /* IN/OUT: Number of rows visited */
){
int rc = SQLITE_OK;
int nOut = (int)*pnOut;
#ifdef SQLITE_ENABLE_STAT4
Index *p = pBuilder->pNew->u.btree.pIndex;
@@ -2550,17 +2551,18 @@ static int whereRangeScanEst(
}
pBuilder->pRec = pRec;
if( rc==SQLITE_OK ){
WhereCost iBase = whereCost(p->aiRowEst[0]);
WhereCost nNew;
if( iUpper>iLower ){
iBase -= whereCost(iUpper - iLower);
}
if( pBuilder->nMaxRowcnt && iBase<pBuilder->nMaxRowcnt ){
*pRangeDiv = pBuilder->nMaxRowcnt;
nNew = whereCost(iUpper - iLower);
}else{
*pRangeDiv = iBase;
nNew = whereCost(2); /* Small number */
}
WHERETRACE(0x100, ("range scan regions: %u..%u div=%d\n",
(u32)iLower, (u32)iUpper, *pRangeDiv));
if( nNew<nOut ){
nOut = nNew;
}
*pnOut = (WhereCost)nOut;
WHERETRACE(0x100, ("range scan regions: %u..%u est=%d\n",
(u32)iLower, (u32)iUpper, nOut));
return SQLITE_OK;
}
}
@@ -2569,15 +2571,16 @@ static int whereRangeScanEst(
UNUSED_PARAMETER(pBuilder);
#endif
assert( pLower || pUpper );
*pRangeDiv = 0;
/* TUNING: Each inequality constraint reduces the search space 4-fold.
** A BETWEEN operator, therefore, reduces the search space 16-fold */
if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){
*pRangeDiv += 20; assert( 20==whereCost(4) );
nOut -= 20; assert( 20==whereCost(4) );
}
if( pUpper ){
*pRangeDiv += 20; assert( 20==whereCost(4) );
nOut -= 20; assert( 20==whereCost(4) );
}
if( nOut<10 ) nOut = 10;
*pnOut = (WhereCost)nOut;
return rc;
}
@@ -2641,9 +2644,6 @@ static int whereEqualScanEst(
if( rc==SQLITE_OK ){
WHERETRACE(0x100,("equality scan regions: %d\n", (int)a[1]));
*pnRow = a[1];
if( pBuilder->nMaxRowcnt && *pnRow>pBuilder->nMaxRowcnt ){
*pnRow = pBuilder->nMaxRowcnt;
}
}
return rc;
@@ -2690,11 +2690,7 @@ static int whereInScanEst(
if( rc==SQLITE_OK ){
if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
if( pBuilder->nMaxRowcnt && nRowEst>pBuilder->nMaxRowcnt ){
*pnRow = pBuilder->nMaxRowcnt;
}else{
*pnRow = nRowEst;
}
*pnRow = nRowEst;
WHERETRACE(0x100,("IN row estimate: est=%g\n", nRowEst));
}
assert( pBuilder->nRecValid==nRecValid );
@@ -4246,7 +4242,6 @@ static int whereLoopAddBtreeIndex(
int nIn = 0;
#ifdef SQLITE_ENABLE_STAT4
int nRecValid = pBuilder->nRecValid;
int nMaxRowcnt = pBuilder->nMaxRowcnt;
if( (pTerm->wtFlags & TERM_VNULL)!=0 && pSrc->pTab->aCol[iCol].notNull ){
continue; /* skip IS NOT NULL constraints on a NOT NULL column */
}
@@ -4309,9 +4304,8 @@ static int whereLoopAddBtreeIndex(
}
if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
/* Adjust nOut and rRun for STAT3 range values */
WhereCost rDiv;
whereRangeScanEst(pParse, pBuilder, pBtm, pTop, &rDiv);
pNew->nOut = saved_nOut>rDiv+10 ? saved_nOut - rDiv : 10;
assert( pNew->nOut==saved_nOut );
whereRangeScanEst(pParse, pBuilder, pBtm, pTop, &pNew->nOut);
}
#ifdef SQLITE_ENABLE_STAT4
if( nInMul==0 && pProbe->nSample && OptimizationEnabled(db, SQLITE_Stat3) ){
@@ -4321,14 +4315,15 @@ static int whereLoopAddBtreeIndex(
testcase( pTerm->eOperator & WO_EQ );
testcase( pTerm->eOperator & WO_ISNULL );
rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut);
assert( nOut==0||pBuilder->nMaxRowcnt==0||nOut<=pBuilder->nMaxRowcnt);
if( nOut ) pBuilder->nMaxRowcnt = nOut;
}else if( (pTerm->eOperator & WO_IN)
&& !ExprHasProperty(pExpr, EP_xIsSelect) ){
rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut);
}
assert( nOut==0 || rc==SQLITE_OK );
if( nOut ) pNew->nOut = whereCost(nOut);
if( nOut ){
nOut = whereCost(nOut);
pNew->nOut = MIN(nOut, saved_nOut);
}
}
#endif
if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
@@ -4347,7 +4342,7 @@ static int whereLoopAddBtreeIndex(
}
#ifdef SQLITE_ENABLE_STAT4
pBuilder->nRecValid = nRecValid;
pBuilder->nMaxRowcnt = nMaxRowcnt;
pNew->nOut = saved_nOut;
#endif
}
pNew->prereq = saved_prereq;

View File

@@ -85,13 +85,26 @@ do_test 2.1 {
# between 800000 and 900000. So t1c is more selective for the latter
# range.
#
# Test 3.2 is a little unstable. It depends on the planner estimating
# that (b BETWEEN 40 AND 44) will match more rows than (c BETWEEN
# 800000 AND 900000). Which is a pretty close call (50 vs. 32), so
# the planner could get it wrong with an unlucky set of samples. This
# case happens to work, but others ("b BETWEEN 50 AND 54" for example)
# will fail.
#
do_execsql_test 3.0 {
SELECT count(*) FROM t1 WHERE b BETWEEN 40 AND 44;
SELECT count(*) FROM t1 WHERE c BETWEEN 0 AND 100000;
SELECT count(*) FROM t1 WHERE c BETWEEN 800000 AND 900000;
} {50 376 32}
do_test 3.1 {
eqp {SELECT * FROM t1 WHERE b BETWEEN 50 AND 54 AND c BETWEEN 0 AND 100000}
eqp {SELECT * FROM t1 WHERE b BETWEEN 40 AND 44 AND c BETWEEN 0 AND 100000}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)}}
do_test 3.2 {
eqp {SELECT * FROM t1
WHERE b BETWEEN 50 AND 54 AND c BETWEEN 800000 AND 900000}
WHERE b BETWEEN 40 AND 44 AND c BETWEEN 800000 AND 900000}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1c (c>? AND c<?)}}
do_test 3.3 {
eqp {SELECT * FROM t1 WHERE a=100 AND c BETWEEN 0 AND 100000}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?)}}