1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances.

FossilOrigin-Name: e6f7f97dbc677c9f01b23142928c3fa7307c2fba
This commit is contained in:
dan
2014-10-03 19:16:53 +00:00
parent 79f7af9a9e
commit 43085d7425
4 changed files with 53 additions and 23 deletions

View File

@@ -1448,12 +1448,12 @@ static void decodeIntArray(
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
if( aOut ){
aOut[i] = v;
}else
}
#else
assert( aOut==0 );
UNUSED_PARAMETER(aOut);
#endif
{
if( aLog ){
aLog[i] = sqlite3LogEst(v);
}
if( *z==' ' ) z++;
@@ -1516,8 +1516,16 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
z = argv[2];
if( pIndex ){
int nCol = pIndex->nKeyCol+1;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3DbMallocZero(
pInfo->db, sizeof(tRowcnt) * nCol
);
#else
tRowcnt * const aiRowEst = 0;
#endif
pIndex->bUnordered = 0;
decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex);
if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
}else{
Index fakeIdx;
@@ -1576,25 +1584,38 @@ static void initAvgEq(Index *pIdx){
pIdx->aAvgEq[nCol] = 1;
}
for(iCol=0; iCol<nCol; iCol++){
int nSample = pIdx->nSample;
int i; /* Used to iterate through samples */
tRowcnt sumEq = 0; /* Sum of the nEq values */
tRowcnt nSum = 0; /* Number of terms contributing to sumEq */
tRowcnt avgEq = 0;
tRowcnt nDLt = pFinal->anDLt[iCol];
tRowcnt nRow; /* Number of rows in index */
i64 nSum100 = 0; /* Number of terms contributing to sumEq */
i64 nDist100; /* Number of distinct values in index */
if( pIdx->aiRowEst==0 ){
nRow = pFinal->anLt[iCol];
nDist100 = (i64)100 * pFinal->anDLt[iCol];
nSample--;
}else{
nRow = pIdx->aiRowEst[0];
nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
}
/* Set nSum to the number of distinct (iCol+1) field prefixes that
** occur in the stat4 table for this index before pFinal. Set
** sumEq to the sum of the nEq values for column iCol for the same
** set (adding the value only once where there exist duplicate
** prefixes). */
for(i=0; i<(pIdx->nSample-1); i++){
if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){
** occur in the stat4 table for this index. Set sumEq to the sum of
** the nEq values for column iCol for the same set (adding the value
** only once where there exist duplicate prefixes). */
for(i=0; i<nSample; i++){
if( i==(pIdx->nSample-1)
|| aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol]
){
sumEq += aSample[i].anEq[iCol];
nSum++;
nSum100 += 100;
}
}
if( nDLt>nSum ){
avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum);
if( nDist100>nSum100 ){
avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
}
if( avgEq==0 ) avgEq = 1;
pIdx->aAvgEq[iCol] = avgEq;
@@ -1846,6 +1867,11 @@ int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
rc = loadStat4(db, sInfo.zDatabase);
db->lookaside.bEnabled = lookasideEnabled;
}
for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
Index *pIdx = sqliteHashData(i);
sqlite3DbFree(db, pIdx->aiRowEst);
pIdx->aiRowEst = 0;
}
#endif
if( rc==SQLITE_NOMEM ){