mirror of
https://github.com/sqlite/sqlite.git
synced 2025-11-18 10:21:03 +03:00
If a skip-scan is a proper subset of some other scan, then adjust the
cost of the skip-scan upward so that it is more costly than the other scan. Such a cost imbalance can arise under STAT4 because of difficulties in getting an accurate estimate for skip-scans. FossilOrigin-Name: f4b22a2620a5dc48949048c2ecbd226755d4b2c3
This commit is contained in:
121
src/where.c
121
src/where.c
@@ -2638,7 +2638,7 @@ static int codeAllEqualityTerms(
|
||||
pLoop = pLevel->pWLoop;
|
||||
assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 );
|
||||
nEq = pLoop->u.btree.nEq;
|
||||
nSkip = pLoop->u.btree.nSkip;
|
||||
nSkip = pLoop->nSkip;
|
||||
pIdx = pLoop->u.btree.pIndex;
|
||||
assert( pIdx!=0 );
|
||||
|
||||
@@ -2752,7 +2752,7 @@ static void explainAppendTerm(
|
||||
static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){
|
||||
Index *pIndex = pLoop->u.btree.pIndex;
|
||||
u16 nEq = pLoop->u.btree.nEq;
|
||||
u16 nSkip = pLoop->u.btree.nSkip;
|
||||
u16 nSkip = pLoop->nSkip;
|
||||
int i, j;
|
||||
Column *aCol = pTab->aCol;
|
||||
i16 *aiColumn = pIndex->aiColumn;
|
||||
@@ -3189,7 +3189,7 @@ static Bitmask codeOneLoopStart(
|
||||
|
||||
pIdx = pLoop->u.btree.pIndex;
|
||||
iIdxCur = pLevel->iIdxCur;
|
||||
assert( nEq>=pLoop->u.btree.nSkip );
|
||||
assert( nEq>=pLoop->nSkip );
|
||||
|
||||
/* If this loop satisfies a sort order (pOrderBy) request that
|
||||
** was passed to this function to implement a "SELECT min(x) ..."
|
||||
@@ -3206,7 +3206,7 @@ static Bitmask codeOneLoopStart(
|
||||
&& pWInfo->nOBSat>0
|
||||
&& (pIdx->nKeyCol>nEq)
|
||||
){
|
||||
assert( pLoop->u.btree.nSkip==0 );
|
||||
assert( pLoop->nSkip==0 );
|
||||
bSeekPastNull = 1;
|
||||
nExtraReg = 1;
|
||||
}
|
||||
@@ -3827,7 +3827,7 @@ static void whereLoopPrint(WhereLoop *p, WhereClause *pWC){
|
||||
sqlite3_free(z);
|
||||
}
|
||||
if( p->wsFlags & WHERE_SKIPSCAN ){
|
||||
sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->u.btree.nSkip);
|
||||
sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->nSkip);
|
||||
}else{
|
||||
sqlite3DebugPrintf(" f %05x N %d", p->wsFlags, p->nLTerm);
|
||||
}
|
||||
@@ -3956,12 +3956,15 @@ static int whereLoopCheaperProperSubset(
|
||||
const WhereLoop *pY /* Compare against this WhereLoop */
|
||||
){
|
||||
int i, j;
|
||||
if( pX->nLTerm >= pY->nLTerm ) return 0; /* X is not a subset of Y */
|
||||
if( pX->nLTerm-pX->nSkip >= pY->nLTerm-pY->nSkip ){
|
||||
return 0; /* X is not a subset of Y */
|
||||
}
|
||||
if( pX->rRun >= pY->rRun ){
|
||||
if( pX->rRun > pY->rRun ) return 0; /* X costs more than Y */
|
||||
if( pX->nOut > pY->nOut ) return 0; /* X costs more than Y */
|
||||
}
|
||||
for(i=pX->nLTerm-1; i>=0; i--){
|
||||
if( pX->aLTerm[i]==0 ) continue;
|
||||
for(j=pY->nLTerm-1; j>=0; j--){
|
||||
if( pY->aLTerm[j]==pX->aLTerm[i] ) break;
|
||||
}
|
||||
@@ -3983,25 +3986,12 @@ static int whereLoopCheaperProperSubset(
|
||||
** To say "WhereLoop X is a proper subset of Y" means that X uses fewer
|
||||
** WHERE clause terms than Y and that every WHERE clause term used by X is
|
||||
** also used by Y.
|
||||
**
|
||||
** This adjustment is omitted for SKIPSCAN loops. In a SKIPSCAN loop, the
|
||||
** WhereLoop.nLTerm field is not an accurate measure of the number of WHERE
|
||||
** clause terms covered, since some of the first nLTerm entries in aLTerm[]
|
||||
** will be NULL (because they are skipped). That makes it more difficult
|
||||
** to compare the loops. We could add extra code to do the comparison, and
|
||||
** perhaps we will someday. But SKIPSCAN is sufficiently uncommon, and this
|
||||
** adjustment is sufficient minor, that it is very difficult to construct
|
||||
** a test case where the extra code would improve the query plan. Better
|
||||
** to avoid the added complexity and just omit cost adjustments to SKIPSCAN
|
||||
** loops.
|
||||
*/
|
||||
static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){
|
||||
if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return;
|
||||
if( (pTemplate->wsFlags & WHERE_SKIPSCAN)!=0 ) return;
|
||||
for(; p; p=p->pNextLoop){
|
||||
if( p->iTab!=pTemplate->iTab ) continue;
|
||||
if( (p->wsFlags & WHERE_INDEXED)==0 ) continue;
|
||||
if( (p->wsFlags & WHERE_SKIPSCAN)!=0 ) continue;
|
||||
if( whereLoopCheaperProperSubset(p, pTemplate) ){
|
||||
/* Adjust pTemplate cost downward so that it is cheaper than its
|
||||
** subset p */
|
||||
@@ -4295,7 +4285,7 @@ static int whereLoopAddBtreeIndex(
|
||||
Bitmask saved_prereq; /* Original value of pNew->prereq */
|
||||
u16 saved_nLTerm; /* Original value of pNew->nLTerm */
|
||||
u16 saved_nEq; /* Original value of pNew->u.btree.nEq */
|
||||
u16 saved_nSkip; /* Original value of pNew->u.btree.nSkip */
|
||||
u16 saved_nSkip; /* Original value of pNew->nSkip */
|
||||
u32 saved_wsFlags; /* Original value of pNew->wsFlags */
|
||||
LogEst saved_nOut; /* Original value of pNew->nOut */
|
||||
int iCol; /* Index of the column in the table */
|
||||
@@ -4324,7 +4314,7 @@ static int whereLoopAddBtreeIndex(
|
||||
pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol,
|
||||
opMask, pProbe);
|
||||
saved_nEq = pNew->u.btree.nEq;
|
||||
saved_nSkip = pNew->u.btree.nSkip;
|
||||
saved_nSkip = pNew->nSkip;
|
||||
saved_nLTerm = pNew->nLTerm;
|
||||
saved_wsFlags = pNew->wsFlags;
|
||||
saved_prereq = pNew->prereq;
|
||||
@@ -4332,44 +4322,6 @@ static int whereLoopAddBtreeIndex(
|
||||
pNew->rSetup = 0;
|
||||
rSize = pProbe->aiRowLogEst[0];
|
||||
rLogSize = estLog(rSize);
|
||||
|
||||
/* Consider using a skip-scan if there are no WHERE clause constraints
|
||||
** available for the left-most terms of the index, and if the average
|
||||
** number of repeats in the left-most terms is at least 18.
|
||||
**
|
||||
** The magic number 18 is selected on the basis that scanning 17 rows
|
||||
** is almost always quicker than an index seek (even though if the index
|
||||
** contains fewer than 2^17 rows we assume otherwise in other parts of
|
||||
** the code). And, even if it is not, it should not be too much slower.
|
||||
** On the other hand, the extra seeks could end up being significantly
|
||||
** more expensive. */
|
||||
assert( 42==sqlite3LogEst(18) );
|
||||
if( saved_nEq==saved_nSkip
|
||||
&& saved_nEq+1<pProbe->nKeyCol
|
||||
&& pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */
|
||||
&& (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
|
||||
){
|
||||
LogEst nIter;
|
||||
pNew->u.btree.nEq++;
|
||||
pNew->u.btree.nSkip++;
|
||||
pNew->aLTerm[pNew->nLTerm++] = 0;
|
||||
pNew->wsFlags |= WHERE_SKIPSCAN;
|
||||
nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
|
||||
if( pTerm ){
|
||||
/* TUNING: When estimating skip-scan for a term that is also indexable,
|
||||
** multiply the cost of the skip-scan by 2.0, to make it a little less
|
||||
** desirable than the regular index lookup. */
|
||||
nIter += 10; assert( 10==sqlite3LogEst(2) );
|
||||
}
|
||||
pNew->nOut -= nIter;
|
||||
/* TUNING: Because uncertainties in the estimates for skip-scan queries,
|
||||
** add a 1.375 fudge factor to make skip-scan slightly less likely. */
|
||||
nIter += 5;
|
||||
whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
|
||||
pNew->nOut = saved_nOut;
|
||||
pNew->u.btree.nEq = saved_nEq;
|
||||
pNew->u.btree.nSkip = saved_nSkip;
|
||||
}
|
||||
for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
|
||||
u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */
|
||||
LogEst rCostIdx;
|
||||
@@ -4532,10 +4484,50 @@ static int whereLoopAddBtreeIndex(
|
||||
}
|
||||
pNew->prereq = saved_prereq;
|
||||
pNew->u.btree.nEq = saved_nEq;
|
||||
pNew->u.btree.nSkip = saved_nSkip;
|
||||
pNew->nSkip = saved_nSkip;
|
||||
pNew->wsFlags = saved_wsFlags;
|
||||
pNew->nOut = saved_nOut;
|
||||
pNew->nLTerm = saved_nLTerm;
|
||||
|
||||
/* Consider using a skip-scan if there are no WHERE clause constraints
|
||||
** available for the left-most terms of the index, and if the average
|
||||
** number of repeats in the left-most terms is at least 18.
|
||||
**
|
||||
** The magic number 18 is selected on the basis that scanning 17 rows
|
||||
** is almost always quicker than an index seek (even though if the index
|
||||
** contains fewer than 2^17 rows we assume otherwise in other parts of
|
||||
** the code). And, even if it is not, it should not be too much slower.
|
||||
** On the other hand, the extra seeks could end up being significantly
|
||||
** more expensive. */
|
||||
assert( 42==sqlite3LogEst(18) );
|
||||
if( saved_nEq==saved_nSkip
|
||||
&& saved_nEq+1<pProbe->nKeyCol
|
||||
&& pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */
|
||||
&& (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
|
||||
){
|
||||
LogEst nIter;
|
||||
pNew->u.btree.nEq++;
|
||||
pNew->nSkip++;
|
||||
pNew->aLTerm[pNew->nLTerm++] = 0;
|
||||
pNew->wsFlags |= WHERE_SKIPSCAN;
|
||||
nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
|
||||
if( pTerm ){
|
||||
/* TUNING: When estimating skip-scan for a term that is also indexable,
|
||||
** multiply the cost of the skip-scan by 2.0, to make it a little less
|
||||
** desirable than the regular index lookup. */
|
||||
nIter += 10; assert( 10==sqlite3LogEst(2) );
|
||||
}
|
||||
pNew->nOut -= nIter;
|
||||
/* TUNING: Because uncertainties in the estimates for skip-scan queries,
|
||||
** add a 1.375 fudge factor to make skip-scan slightly less likely. */
|
||||
nIter += 5;
|
||||
whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
|
||||
pNew->nOut = saved_nOut;
|
||||
pNew->u.btree.nEq = saved_nEq;
|
||||
pNew->nSkip = saved_nSkip;
|
||||
pNew->wsFlags = saved_wsFlags;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -4714,7 +4706,7 @@ static int whereLoopAddBtree(
|
||||
if( pTerm->prereqRight & pNew->maskSelf ) continue;
|
||||
if( termCanDriveIndex(pTerm, pSrc, 0) ){
|
||||
pNew->u.btree.nEq = 1;
|
||||
pNew->u.btree.nSkip = 0;
|
||||
pNew->nSkip = 0;
|
||||
pNew->u.btree.pIndex = 0;
|
||||
pNew->nLTerm = 1;
|
||||
pNew->aLTerm[0] = pTerm;
|
||||
@@ -4755,7 +4747,7 @@ static int whereLoopAddBtree(
|
||||
}
|
||||
rSize = pProbe->aiRowLogEst[0];
|
||||
pNew->u.btree.nEq = 0;
|
||||
pNew->u.btree.nSkip = 0;
|
||||
pNew->nSkip = 0;
|
||||
pNew->nLTerm = 0;
|
||||
pNew->iSortIdx = 0;
|
||||
pNew->rSetup = 0;
|
||||
@@ -5305,7 +5297,7 @@ static i8 wherePathSatisfiesOrderBy(
|
||||
|
||||
/* Skip over == and IS NULL terms */
|
||||
if( j<pLoop->u.btree.nEq
|
||||
&& pLoop->u.btree.nSkip==0
|
||||
&& pLoop->nSkip==0
|
||||
&& ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL))!=0
|
||||
){
|
||||
if( i & WO_ISNULL ){
|
||||
@@ -5878,7 +5870,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){
|
||||
pWC = &pWInfo->sWC;
|
||||
pLoop = pBuilder->pNew;
|
||||
pLoop->wsFlags = 0;
|
||||
pLoop->u.btree.nSkip = 0;
|
||||
pLoop->nSkip = 0;
|
||||
pTerm = findTerm(pWC, iCur, -1, 0, WO_EQ, 0);
|
||||
if( pTerm ){
|
||||
pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW;
|
||||
@@ -5890,7 +5882,6 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){
|
||||
}else{
|
||||
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
|
||||
assert( pLoop->aLTermSpace==pLoop->aLTerm );
|
||||
assert( ArraySize(pLoop->aLTermSpace)==4 );
|
||||
if( !IsUniqueIndex(pIdx)
|
||||
|| pIdx->pPartIdxWhere!=0
|
||||
|| pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace)
|
||||
|
||||
Reference in New Issue
Block a user