1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-18 10:21:03 +03:00

If a skip-scan is a proper subset of some other scan, then adjust the

cost of the skip-scan upward so that it is more costly than the other scan.
Such a cost imbalance can arise under STAT4 because of difficulties in getting
an accurate estimate for skip-scans.

FossilOrigin-Name: f4b22a2620a5dc48949048c2ecbd226755d4b2c3
This commit is contained in:
drh
2014-10-21 01:05:09 +00:00
parent 40253262e0
commit c8bbce1e6a
5 changed files with 212 additions and 75 deletions

View File

@@ -2638,7 +2638,7 @@ static int codeAllEqualityTerms(
pLoop = pLevel->pWLoop;
assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 );
nEq = pLoop->u.btree.nEq;
nSkip = pLoop->u.btree.nSkip;
nSkip = pLoop->nSkip;
pIdx = pLoop->u.btree.pIndex;
assert( pIdx!=0 );
@@ -2752,7 +2752,7 @@ static void explainAppendTerm(
static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){
Index *pIndex = pLoop->u.btree.pIndex;
u16 nEq = pLoop->u.btree.nEq;
u16 nSkip = pLoop->u.btree.nSkip;
u16 nSkip = pLoop->nSkip;
int i, j;
Column *aCol = pTab->aCol;
i16 *aiColumn = pIndex->aiColumn;
@@ -3189,7 +3189,7 @@ static Bitmask codeOneLoopStart(
pIdx = pLoop->u.btree.pIndex;
iIdxCur = pLevel->iIdxCur;
assert( nEq>=pLoop->u.btree.nSkip );
assert( nEq>=pLoop->nSkip );
/* If this loop satisfies a sort order (pOrderBy) request that
** was passed to this function to implement a "SELECT min(x) ..."
@@ -3206,7 +3206,7 @@ static Bitmask codeOneLoopStart(
&& pWInfo->nOBSat>0
&& (pIdx->nKeyCol>nEq)
){
assert( pLoop->u.btree.nSkip==0 );
assert( pLoop->nSkip==0 );
bSeekPastNull = 1;
nExtraReg = 1;
}
@@ -3827,7 +3827,7 @@ static void whereLoopPrint(WhereLoop *p, WhereClause *pWC){
sqlite3_free(z);
}
if( p->wsFlags & WHERE_SKIPSCAN ){
sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->u.btree.nSkip);
sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->nSkip);
}else{
sqlite3DebugPrintf(" f %05x N %d", p->wsFlags, p->nLTerm);
}
@@ -3956,12 +3956,15 @@ static int whereLoopCheaperProperSubset(
const WhereLoop *pY /* Compare against this WhereLoop */
){
int i, j;
if( pX->nLTerm >= pY->nLTerm ) return 0; /* X is not a subset of Y */
if( pX->nLTerm-pX->nSkip >= pY->nLTerm-pY->nSkip ){
return 0; /* X is not a subset of Y */
}
if( pX->rRun >= pY->rRun ){
if( pX->rRun > pY->rRun ) return 0; /* X costs more than Y */
if( pX->nOut > pY->nOut ) return 0; /* X costs more than Y */
}
for(i=pX->nLTerm-1; i>=0; i--){
if( pX->aLTerm[i]==0 ) continue;
for(j=pY->nLTerm-1; j>=0; j--){
if( pY->aLTerm[j]==pX->aLTerm[i] ) break;
}
@@ -3983,25 +3986,12 @@ static int whereLoopCheaperProperSubset(
** To say "WhereLoop X is a proper subset of Y" means that X uses fewer
** WHERE clause terms than Y and that every WHERE clause term used by X is
** also used by Y.
**
** This adjustment is omitted for SKIPSCAN loops. In a SKIPSCAN loop, the
** WhereLoop.nLTerm field is not an accurate measure of the number of WHERE
** clause terms covered, since some of the first nLTerm entries in aLTerm[]
** will be NULL (because they are skipped). That makes it more difficult
** to compare the loops. We could add extra code to do the comparison, and
** perhaps we will someday. But SKIPSCAN is sufficiently uncommon, and this
** adjustment is sufficient minor, that it is very difficult to construct
** a test case where the extra code would improve the query plan. Better
** to avoid the added complexity and just omit cost adjustments to SKIPSCAN
** loops.
*/
static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){
if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return;
if( (pTemplate->wsFlags & WHERE_SKIPSCAN)!=0 ) return;
for(; p; p=p->pNextLoop){
if( p->iTab!=pTemplate->iTab ) continue;
if( (p->wsFlags & WHERE_INDEXED)==0 ) continue;
if( (p->wsFlags & WHERE_SKIPSCAN)!=0 ) continue;
if( whereLoopCheaperProperSubset(p, pTemplate) ){
/* Adjust pTemplate cost downward so that it is cheaper than its
** subset p */
@@ -4295,7 +4285,7 @@ static int whereLoopAddBtreeIndex(
Bitmask saved_prereq; /* Original value of pNew->prereq */
u16 saved_nLTerm; /* Original value of pNew->nLTerm */
u16 saved_nEq; /* Original value of pNew->u.btree.nEq */
u16 saved_nSkip; /* Original value of pNew->u.btree.nSkip */
u16 saved_nSkip; /* Original value of pNew->nSkip */
u32 saved_wsFlags; /* Original value of pNew->wsFlags */
LogEst saved_nOut; /* Original value of pNew->nOut */
int iCol; /* Index of the column in the table */
@@ -4324,7 +4314,7 @@ static int whereLoopAddBtreeIndex(
pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol,
opMask, pProbe);
saved_nEq = pNew->u.btree.nEq;
saved_nSkip = pNew->u.btree.nSkip;
saved_nSkip = pNew->nSkip;
saved_nLTerm = pNew->nLTerm;
saved_wsFlags = pNew->wsFlags;
saved_prereq = pNew->prereq;
@@ -4332,44 +4322,6 @@ static int whereLoopAddBtreeIndex(
pNew->rSetup = 0;
rSize = pProbe->aiRowLogEst[0];
rLogSize = estLog(rSize);
/* Consider using a skip-scan if there are no WHERE clause constraints
** available for the left-most terms of the index, and if the average
** number of repeats in the left-most terms is at least 18.
**
** The magic number 18 is selected on the basis that scanning 17 rows
** is almost always quicker than an index seek (even though if the index
** contains fewer than 2^17 rows we assume otherwise in other parts of
** the code). And, even if it is not, it should not be too much slower.
** On the other hand, the extra seeks could end up being significantly
** more expensive. */
assert( 42==sqlite3LogEst(18) );
if( saved_nEq==saved_nSkip
&& saved_nEq+1<pProbe->nKeyCol
&& pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */
&& (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
){
LogEst nIter;
pNew->u.btree.nEq++;
pNew->u.btree.nSkip++;
pNew->aLTerm[pNew->nLTerm++] = 0;
pNew->wsFlags |= WHERE_SKIPSCAN;
nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
if( pTerm ){
/* TUNING: When estimating skip-scan for a term that is also indexable,
** multiply the cost of the skip-scan by 2.0, to make it a little less
** desirable than the regular index lookup. */
nIter += 10; assert( 10==sqlite3LogEst(2) );
}
pNew->nOut -= nIter;
/* TUNING: Because uncertainties in the estimates for skip-scan queries,
** add a 1.375 fudge factor to make skip-scan slightly less likely. */
nIter += 5;
whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
pNew->nOut = saved_nOut;
pNew->u.btree.nEq = saved_nEq;
pNew->u.btree.nSkip = saved_nSkip;
}
for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */
LogEst rCostIdx;
@@ -4532,10 +4484,50 @@ static int whereLoopAddBtreeIndex(
}
pNew->prereq = saved_prereq;
pNew->u.btree.nEq = saved_nEq;
pNew->u.btree.nSkip = saved_nSkip;
pNew->nSkip = saved_nSkip;
pNew->wsFlags = saved_wsFlags;
pNew->nOut = saved_nOut;
pNew->nLTerm = saved_nLTerm;
/* Consider using a skip-scan if there are no WHERE clause constraints
** available for the left-most terms of the index, and if the average
** number of repeats in the left-most terms is at least 18.
**
** The magic number 18 is selected on the basis that scanning 17 rows
** is almost always quicker than an index seek (even though if the index
** contains fewer than 2^17 rows we assume otherwise in other parts of
** the code). And, even if it is not, it should not be too much slower.
** On the other hand, the extra seeks could end up being significantly
** more expensive. */
assert( 42==sqlite3LogEst(18) );
if( saved_nEq==saved_nSkip
&& saved_nEq+1<pProbe->nKeyCol
&& pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */
&& (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
){
LogEst nIter;
pNew->u.btree.nEq++;
pNew->nSkip++;
pNew->aLTerm[pNew->nLTerm++] = 0;
pNew->wsFlags |= WHERE_SKIPSCAN;
nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
if( pTerm ){
/* TUNING: When estimating skip-scan for a term that is also indexable,
** multiply the cost of the skip-scan by 2.0, to make it a little less
** desirable than the regular index lookup. */
nIter += 10; assert( 10==sqlite3LogEst(2) );
}
pNew->nOut -= nIter;
/* TUNING: Because uncertainties in the estimates for skip-scan queries,
** add a 1.375 fudge factor to make skip-scan slightly less likely. */
nIter += 5;
whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
pNew->nOut = saved_nOut;
pNew->u.btree.nEq = saved_nEq;
pNew->nSkip = saved_nSkip;
pNew->wsFlags = saved_wsFlags;
}
return rc;
}
@@ -4714,7 +4706,7 @@ static int whereLoopAddBtree(
if( pTerm->prereqRight & pNew->maskSelf ) continue;
if( termCanDriveIndex(pTerm, pSrc, 0) ){
pNew->u.btree.nEq = 1;
pNew->u.btree.nSkip = 0;
pNew->nSkip = 0;
pNew->u.btree.pIndex = 0;
pNew->nLTerm = 1;
pNew->aLTerm[0] = pTerm;
@@ -4755,7 +4747,7 @@ static int whereLoopAddBtree(
}
rSize = pProbe->aiRowLogEst[0];
pNew->u.btree.nEq = 0;
pNew->u.btree.nSkip = 0;
pNew->nSkip = 0;
pNew->nLTerm = 0;
pNew->iSortIdx = 0;
pNew->rSetup = 0;
@@ -5305,7 +5297,7 @@ static i8 wherePathSatisfiesOrderBy(
/* Skip over == and IS NULL terms */
if( j<pLoop->u.btree.nEq
&& pLoop->u.btree.nSkip==0
&& pLoop->nSkip==0
&& ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL))!=0
){
if( i & WO_ISNULL ){
@@ -5878,7 +5870,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){
pWC = &pWInfo->sWC;
pLoop = pBuilder->pNew;
pLoop->wsFlags = 0;
pLoop->u.btree.nSkip = 0;
pLoop->nSkip = 0;
pTerm = findTerm(pWC, iCur, -1, 0, WO_EQ, 0);
if( pTerm ){
pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW;
@@ -5890,7 +5882,6 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){
}else{
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
assert( pLoop->aLTermSpace==pLoop->aLTerm );
assert( ArraySize(pLoop->aLTermSpace)==4 );
if( !IsUniqueIndex(pIdx)
|| pIdx->pPartIdxWhere!=0
|| pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace)