1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-18 10:21:03 +03:00

When estimating the number of rows visited by a range scan for which the keys consist of more than one field, consider prefixes of stat4 samples as well as the full samples.

FossilOrigin-Name: e1caf93c9ad0ee15d42030af95619f212d3fcf9d
This commit is contained in:
dan
2015-03-14 18:59:58 +00:00
parent dfac7016a7
commit a3d0c13654
4 changed files with 210 additions and 46 deletions

View File

@@ -1931,11 +1931,14 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){
** Estimate the location of a particular key among all keys in an
** index. Store the results in aStat as follows:
**
** aStat[0] Est. number of rows less than pVal
** aStat[1] Est. number of rows equal to pVal
** aStat[0] Est. number of rows less than pRec
** aStat[1] Est. number of rows equal to pRec
**
** Return the index of the sample that is the smallest sample that
** is greater than or equal to pRec.
** is greater than or equal to pRec. Note that this index is not an index
** into the aSample[] array - it is an index into a virtual set of samples
** based on the contents of aSample[] and the number of fields in record
** pRec.
*/
static int whereKeyStats(
Parse *pParse, /* Database connection */
@@ -1946,67 +1949,158 @@ static int whereKeyStats(
){
IndexSample *aSample = pIdx->aSample;
int iCol; /* Index of required stats in anEq[] etc. */
int i; /* Index of first sample >= pRec */
int iSample; /* Smallest sample larger than or equal to pRec */
int iMin = 0; /* Smallest sample not yet tested */
int i = pIdx->nSample; /* Smallest sample larger than or equal to pRec */
int iTest; /* Next sample to test */
int res; /* Result of comparison operation */
int nField; /* Number of fields in pRec */
tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */
#ifndef SQLITE_DEBUG
UNUSED_PARAMETER( pParse );
#endif
assert( pRec!=0 );
iCol = pRec->nField - 1;
assert( pIdx->nSample>0 );
assert( pRec->nField>0 && iCol<pIdx->nSampleCol );
assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol );
/* Do a binary search to find the first sample greater than or equal
** to pRec. If pRec contains a single field, the set of samples to search
** is simply the aSample[] array. If the samples in aSample[] contain more
** than one fields, all fields following the first are ignored.
**
** If pRec contains N fields, where N is more than one, then as well as the
** samples in aSample[] (truncated to N fields), the search also has to
** consider prefixes of those samples. For example, if the set of samples
** in aSample is:
**
** aSample[0] = (a, 5)
** aSample[1] = (a, 10)
** aSample[2] = (b, 5)
** aSample[3] = (c, 100)
** aSample[4] = (c, 105)
**
** Then the search space should ideally be the samples above and the
** unique prefixes [a], [b] and [c]. But since that is hard to organize,
** the code actually searches this set:
**
** 0: (a)
** 1: (a, 5)
** 2: (a, 10)
** 3: (a, 10)
** 4: (b)
** 5: (b, 5)
** 6: (c)
** 7: (c, 100)
** 8: (c, 105)
** 9: (c, 105)
**
** For each sample in the aSample[] array, N samples are present in the
** effective sample array. In the above, samples 0 and 1 are based on
** sample aSample[0]. Samples 2 and 3 on aSample[1] etc.
**
** Often, sample i of each block of N effective samples has (i+1) fields.
** Except, each sample may be extended to ensure that it is greater than or
** equal to the previous sample in the array. For example, in the above,
** sample 2 is the first sample of a block of N samples, so at first it
** appears that it should be 1 field in size. However, that would make it
** smaller than sample 1, so the binary search would not work. As a result,
** it is extended to two fields. The duplicates that this creates do not
** cause any problems.
*/
nField = pRec->nField;
iCol = 0;
iSample = pIdx->nSample * nField;
do{
iTest = (iMin+i)/2;
res = sqlite3VdbeRecordCompare(aSample[iTest].n, aSample[iTest].p, pRec);
if( res<0 ){
iMin = iTest+1;
int iSamp; /* Index in aSample[] of test sample */
int n; /* Number of fields in test sample */
iTest = (iMin+iSample)/2;
iSamp = iTest / nField;
if( iSamp>0 ){
/* The proposed effective sample is a prefix of sample aSample[iSamp].
** Specifically, the shortest prefix of at least (1 + iTest%nField)
** fields that is greater than the previous effective sample. */
for(n=(iTest % nField) + 1; n<nField; n++){
if( aSample[iSamp-1].anLt[n-1]!=aSample[iSamp].anLt[n-1] ) break;
}
}else{
i = iTest;
n = iTest + 1;
}
}while( res && iMin<i );
pRec->nField = n;
res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec);
if( res<0 ){
iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1];
iMin = iTest+1;
}else if( res==0 && n<nField ){
iLower = aSample[iSamp].anLt[n-1];
iMin = iTest+1;
res = -1;
}else{
iSample = iTest;
iCol = n-1;
}
}while( res && iMin<iSample );
i = iSample / nField;
#ifdef SQLITE_DEBUG
/* The following assert statements check that the binary search code
** above found the right answer. This block serves no purpose other
** than to invoke the asserts. */
if( res==0 ){
/* If (res==0) is true, then sample $i must be equal to pRec */
assert( i<pIdx->nSample );
assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)
|| pParse->db->mallocFailed );
}else{
/* Otherwise, pRec must be smaller than sample $i and larger than
** sample ($i-1). */
assert( i==pIdx->nSample
|| sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
|| pParse->db->mallocFailed );
assert( i==0
|| sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
|| pParse->db->mallocFailed );
if( pParse->db->mallocFailed==0 ){
if( res==0 ){
/* If (res==0) is true, then pRec must be equal to sample i. */
assert( i<pIdx->nSample );
assert( iCol==nField-1 );
pRec->nField = nField;
assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)
|| pParse->db->mallocFailed
);
}else{
/* Unless i==pIdx->nSample, indicating that pRec is larger than
** all samples in the aSample[] array, pRec must be smaller than the
** (iCol+1) field prefix of sample i. */
assert( i<=pIdx->nSample && i>=0 );
pRec->nField = iCol+1;
assert( i==pIdx->nSample
|| sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
|| pParse->db->mallocFailed );
/* if i==0 and iCol==0, then record pRec is smaller than all samples
** in the aSample[] array. Otherwise, if (iCol>0) then pRec must
** be greater than or equal to the (iCol) field prefix of sample i.
** If (i>0), then pRec must also be greater than sample (i-1). */
if( iCol>0 ){
pRec->nField = iCol;
assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0
|| pParse->db->mallocFailed );
}
if( i>0 ){
pRec->nField = nField;
assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
|| pParse->db->mallocFailed );
}
}
}
#endif /* ifdef SQLITE_DEBUG */
/* At this point, aSample[i] is the first sample that is greater than
** or equal to pVal. Or if i==pIdx->nSample, then all samples are less
** than pVal. If aSample[i]==pVal, then res==0.
*/
if( res==0 ){
/* Record pRec is equal to sample i */
assert( iCol==nField-1 );
aStat[0] = aSample[i].anLt[iCol];
aStat[1] = aSample[i].anEq[iCol];
}else{
tRowcnt iLower, iUpper, iGap;
if( i==0 ){
iLower = 0;
iUpper = aSample[0].anLt[iCol];
/* At this point, the (iCol+1) field prefix of aSample[i] is the first
** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec
** is larger than all samples in the array. */
tRowcnt iUpper, iGap;
if( i>=pIdx->nSample ){
iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
}else{
i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol];
iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol];
iUpper = aSample[i].anLt[iCol];
}
aStat[1] = pIdx->aAvgEq[iCol];
if( iLower>=iUpper ){
iGap = 0;
}else{
@@ -2018,7 +2112,11 @@ static int whereKeyStats(
iGap = iGap/3;
}
aStat[0] = iLower + iGap;
aStat[1] = pIdx->aAvgEq[iCol];
}
/* Restore the pRec->nField value before returning. */
pRec->nField = nField;
return i;
}
#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */