From e847d3247fbc3a7a5804894f60342b6331757bec Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 20 Jan 2011 02:56:37 +0000 Subject: [PATCH 01/11] The first of a planned series of enhancements to the query planner that enable it to make better use of sqlite_stat2 histograms when the table has many repeated values. FossilOrigin-Name: 2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 --- manifest | 25 +++-- manifest.uuid | 2 +- src/where.c | 54 +++++++---- test/analyze2.test | 26 +++--- test/analyze5.test | 225 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 291 insertions(+), 41 deletions(-) create mode 100644 test/analyze5.test diff --git a/manifest b/manifest index f9bb697af7..00b3208ed7 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Comment\simprovements\sin\spcache1.c.\s\sNo\schanges\sto\scode. -D 2011-01-19T21:58:56.344 +C The\sfirst\sof\sa\splanned\sseries\sof\senhancements\sto\sthe\squery\splanner\sthat\nenable\sit\sto\smake\sbetter\suse\sof\ssqlite_stat2\shistograms\swhen\sthe\stable\nhas\smany\srepeated\svalues. +D 2011-01-20T02:56:37.736 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c af069e6b53234118014dabfece96a9515b69d76b +F src/where.c 5cd6b88d57bfc816ba7f753a3cdf03686d954b8a F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -253,9 +253,10 @@ F test/alter3.test 8677e48d95536f7a6ed86a1a774744dadcc22b07 F test/alter4.test 1e5dd6b951e9f65ca66422edff02e56df82dd403 F test/altermalloc.test e81ac9657ed25c6c5bb09bebfa5a047cd8e4acfc F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 -F test/analyze2.test 3bde8f0879d9c1f2df3af21fcf42e706d8ee1e43 +F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 +F test/analyze5.test 18987796646efdf009ca0b8c8f060874a8fe57fb F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -899,14 +900,18 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P e5ca59e63b18ac45a8c82ca39dc8cce1c4ce903c -R 246c042190fbeb83b3f1f3c471b7048c +P 9660a0a22547656cc3765b673d0cee9e1dd829ef +R 9ff1bb21abd03a28e074b829beb25c52 +T *bgcolor * #a8c7d3 +T *branch * stat2-enhancement +T *sym-stat2-enhancement * +T -sym-trunk * U drh -Z d4b4af53a8b712c86792c5f30e91ad66 +Z f191562671825ddb731a0f83c41674dd -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNN16joxKgR168RlERApzpAJ9ub7vBM3MbU8WJio56+Ng2W8PfBQCeLClb -tiAK9YA5ekgfGTTQ91uwQlY= -=gzKU +iD8DBQFNN6RpoxKgR168RlERAtbyAJ4tlGP5CKHBEdaaRtF9LD6pnMJo9QCaAnxY +oc508+oZBxzr/UoIZL3o+G4= +=2eyE -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 96c640d6cf..a32e6ed42f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9660a0a22547656cc3765b673d0cee9e1dd829ef \ No newline at end of file +2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 4a5026f5f4..734f24019f 100644 --- a/src/where.c +++ b/src/where.c @@ -2201,11 +2201,18 @@ static void bestVirtualIndex( /* ** Argument pIdx is a pointer to an index structure that has an array of ** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column -** stored in Index.aSample. The domain of values stored in said column -** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions. -** Region 0 contains all values smaller than the first sample value. Region -** 1 contains values larger than or equal to the value of the first sample, -** but smaller than the value of the second. And so on. +** stored in Index.aSample. These samples divide the domain of values stored +** the index into (SQLITE_INDEX_SAMPLES+1) regions. +** Region 0 contains all values less than the first sample value. Region +** 1 contains values between the first and second samples. Region 2 contains +** values between samples 2 and 3. And so on. Region SQLITE_INDEX_SAMPLES +** contains values larger than the last sample. +** +** If the index contains many duplicates of a single value, then it is +** possible that two or more adjacent samples can hold the same value. +** When that is the case, the smallest possible region code is returned +** when roundUp is false and the largest possible region code is returned +** when roundUp is true. ** ** If successful, this function determines which of the regions value ** pVal lies in, sets *piRegion to the region index (a value between 0 @@ -2218,8 +2225,10 @@ static int whereRangeRegion( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ sqlite3_value *pVal, /* Value to consider */ + int roundUp, /* Return largest valid region if true */ int *piRegion /* OUT: Region of domain in which value lies */ ){ + assert( roundUp==0 || roundUp==1 ); if( ALWAYS(pVal) ){ IndexSample *aSample = pIdx->aSample; int i = 0; @@ -2229,7 +2238,12 @@ static int whereRangeRegion( double r = sqlite3_value_double(pVal); for(i=0; i=SQLITE_TEXT || aSample[i].u.r>r ) break; + if( aSample[i].eType>=SQLITE_TEXT ) break; + if( roundUp ){ + if( aSample[i].u.r>r ) break; + }else{ + if( aSample[i].u.r>=r ) break; + } } }else{ sqlite3 *db = pParse->db; @@ -2260,7 +2274,7 @@ static int whereRangeRegion( n = sqlite3ValueBytes(pVal, pColl->enc); for(i=0; imallocFailed ); return SQLITE_NOMEM; } - r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z); + c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z); sqlite3DbFree(db, zSample); }else #endif { - r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); + c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); } - if( r>0 ) break; + if( c-roundUp>=0 ) break; } } @@ -2386,15 +2400,21 @@ static int whereRangeScanEst( int iEst; int iLower = 0; int iUpper = SQLITE_INDEX_SAMPLES; + int roundUpUpper; + int roundUpLower; u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pLower ){ Expr *pExpr = pLower->pExpr->pRight; rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal); + assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE ); + roundUpLower = (pLower->eOperator==WO_GT) ?1:0; } if( rc==SQLITE_OK && pUpper ){ Expr *pExpr = pUpper->pExpr->pRight; rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal); + assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE ); + roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0; } if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){ @@ -2402,15 +2422,15 @@ static int whereRangeScanEst( sqlite3ValueFree(pUpperVal); goto range_est_fallback; }else if( pLowerVal==0 ){ - rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper); + rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( pLower ) iLower = iUpper/2; }else if( pUpperVal==0 ){ - rc = whereRangeRegion(pParse, p, pLowerVal, &iLower); + rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2; }else{ - rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper); + rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( rc==SQLITE_OK ){ - rc = whereRangeRegion(pParse, p, pLowerVal, &iLower); + rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); } } @@ -2418,12 +2438,12 @@ static int whereRangeScanEst( testcase( iEst==SQLITE_INDEX_SAMPLES ); assert( iEst<=SQLITE_INDEX_SAMPLES ); if( iEst<1 ){ - iEst = 1; + *piEst = 50/SQLITE_INDEX_SAMPLES; + }else{ + *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES; } - sqlite3ValueFree(pLowerVal); sqlite3ValueFree(pUpperVal); - *piEst = (iEst * 100)/SQLITE_INDEX_SAMPLES; return rc; } range_est_fallback: diff --git a/test/analyze2.test b/test/analyze2.test index 039fb378cc..add73af43a 100644 --- a/test/analyze2.test +++ b/test/analyze2.test @@ -154,22 +154,22 @@ do_eqp_test 2.6 { do_eqp_test 2.7 { SELECT * FROM t1 WHERE x BETWEEN -400 AND -300 AND y BETWEEN 100 AND 300 } { - 0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x? AND x? AND y? AND y? AND x? AND x? AND y? AND y? AND x? AND x'h' @@ -416,14 +416,14 @@ do_test analyze2-6.2.2 { t5.a>1 AND t5.a<15 AND t6.a>1 } -} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} +} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-6.2.3 { sqlite3 db test.db eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND t5.a>1 AND t5.a<15 AND t6.a>1 } -} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} +} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-6.2.4 { execsql { PRAGMA writable_schema = 1; @@ -457,7 +457,7 @@ do_test analyze2-6.2.6 { t5.a>1 AND t5.a<15 AND t6.a>1 } -} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} +} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} #-------------------------------------------------------------------- # These tests, analyze2-7.*, test that the sqlite_stat2 functionality @@ -501,7 +501,7 @@ ifcapable shared_cache { t5.a>1 AND t5.a<15 AND t6.a>1 } db1 - } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} + } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-7.6 { incr_schema_cookie test.db execsql { SELECT * FROM sqlite_master } db2 @@ -509,7 +509,7 @@ ifcapable shared_cache { t5.a>1 AND t5.a<15 AND t6.a>1 } db2 - } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} + } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-7.7 { incr_schema_cookie test.db execsql { SELECT * FROM sqlite_master } db1 @@ -517,7 +517,7 @@ ifcapable shared_cache { t5.a>1 AND t5.a<15 AND t6.a>1 } db1 - } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} + } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-7.8 { execsql { DELETE FROM sqlite_stat2 } db2 @@ -526,14 +526,14 @@ ifcapable shared_cache { t5.a>1 AND t5.a<15 AND t6.a>1 } db1 - } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} + } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-7.9 { execsql { SELECT * FROM sqlite_master } db2 eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND t5.a>1 AND t5.a<15 AND t6.a>1 } db2 - } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} + } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}} do_test analyze2-7.10 { incr_schema_cookie test.db diff --git a/test/analyze5.test b/test/analyze5.test new file mode 100644 index 0000000000..233e7ff9e7 --- /dev/null +++ b/test/analyze5.test @@ -0,0 +1,225 @@ +# 2011 January 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file implements tests for SQLite library. The focus of the tests +# in this file is the use of the sqlite_stat2 histogram data on tables +# with many repeated values and only a few distinct values. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +ifcapable !stat2 { + finish_test + return +} + +set testprefix analyze5 + +proc eqp {sql {db db}} { + uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db +} + +do_test analyze5-1.0 { + execsql { CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z) } + for {set i 0} {$i < 1000} {incr i} { + set j [expr {$i>=25 && $i<=50}] + set k [expr {($i>=400) + ($i>=700) + ($i>=875)}] + execsql { INSERT INTO t1 VALUES($i,$j,$k) } + } + execsql { + CREATE INDEX t1y ON t1(y); + CREATE INDEX t1z ON t1(z); + ANALYZE; + SELECT * FROM sqlite_stat2 ORDER BY 1, 2, 3; + } +} [list t1 t1y 0 0 \ + t1 t1y 1 0 \ + t1 t1y 2 0 \ + t1 t1y 3 0 \ + t1 t1y 4 0 \ + t1 t1y 5 0 \ + t1 t1y 6 0 \ + t1 t1y 7 0 \ + t1 t1y 8 0 \ + t1 t1y 9 0 \ + t1 t1z 0 0 \ + t1 t1z 1 0 \ + t1 t1z 2 0 \ + t1 t1z 3 0 \ + t1 t1z 4 1 \ + t1 t1z 5 1 \ + t1 t1z 6 1 \ + t1 t1z 7 2 \ + t1 t1z 8 2 \ + t1 t1z 9 3] + +# Verify that range queries generate the correct row count estimates +# +foreach {testid where rows} { + 1 {z>=0 AND z<=0} 400 + 2 {z>=1 AND z<=1} 300 + 3 {z>=2 AND z<=2} 200 + 4 {z>=3 AND z<=3} 100 + 5 {z>=4 AND z<=4} 50 + 6 {z>=-1 AND z<=-1} 50 + 7 {z>1 AND z<3} 200 + 8 {z>0 AND z<100} 600 + 9 {z>=1 AND z<100} 600 + 10 {z>1 AND z<100} 300 + 11 {z>=2 AND z<100} 300 + 12 {z>2 AND z<100} 100 + 13 {z>=3 AND z<100} 100 + 14 {z>3 AND z<100} 50 + 15 {z>=4 AND z<100} 50 + 16 {z>=-100 AND z<=-1} 50 + 17 {z>=-100 AND z<=0} 400 + 18 {z>=-100 AND z<0} 50 + 19 {z>=-100 AND z<=1} 700 + 20 {z>=-100 AND z<2} 700 + 21 {z>=-100 AND z<=2} 900 + 22 {z>=-100 AND z<3} 900 + + 31 {z>=0.0 AND z<=0.0} 400 + 32 {z>=1.0 AND z<=1.0} 300 + 33 {z>=2.0 AND z<=2.0} 200 + 34 {z>=3.0 AND z<=3.0} 100 + 35 {z>=4.0 AND z<=4.0} 50 + 36 {z>=-1.0 AND z<=-1.0} 50 + 37 {z>1.5 AND z<3.0} 200 + 38 {z>0.5 AND z<100} 600 + 39 {z>=1.0 AND z<100} 600 + 40 {z>1.5 AND z<100} 300 + 41 {z>=2.0 AND z<100} 300 + 42 {z>2.1 AND z<100} 100 + 43 {z>=3.0 AND z<100} 100 + 44 {z>3.2 AND z<100} 50 + 45 {z>=4.0 AND z<100} 50 + 46 {z>=-100 AND z<=-1.0} 50 + 47 {z>=-100 AND z<=0.0} 400 + 48 {z>=-100 AND z<0.0} 50 + 49 {z>=-100 AND z<=1.0} 700 + 50 {z>=-100 AND z<2.0} 700 + 51 {z>=-100 AND z<=2.0} 900 + 52 {z>=-100 AND z<3.0} 900 + +} { + do_test analyze5-1.$testid { + eqp "SELECT * FROM t1 WHERE $where" + } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z=0 AND z<=0} 400 + 2 {z>=1 AND z<=1} 300 + 3 {z>=2 AND z<=2} 200 + 4 {z>=3 AND z<=3} 100 + 5 {z>=4 AND z<=4} 50 + 6 {z>=-1 AND z<=-1} 50 + 7 {z>1 AND z<3} 200 + 8 {z>0 AND z<100} 600 + 9 {z>=1 AND z<100} 600 + 10 {z>1 AND z<100} 300 + 11 {z>=2 AND z<100} 300 + 12 {z>2 AND z<100} 100 + 13 {z>=3 AND z<100} 100 + 14 {z>3 AND z<100} 50 + 15 {z>=4 AND z<100} 50 + 16 {z>=-100 AND z<=-1} 50 + 17 {z>=-100 AND z<=0} 400 + 18 {z>=-100 AND z<0} 50 + 19 {z>=-100 AND z<=1} 700 + 20 {z>=-100 AND z<2} 700 + 21 {z>=-100 AND z<=2} 900 + 22 {z>=-100 AND z<3} 900 + + 31 {z>=0.0 AND z<=0.0} 400 + 32 {z>=1.0 AND z<=1.0} 300 + 33 {z>=2.0 AND z<=2.0} 200 + 34 {z>=3.0 AND z<=3.0} 100 + 35 {z>=4.0 AND z<=4.0} 50 + 36 {z>=-1.0 AND z<=-1.0} 50 + 37 {z>1.5 AND z<3.0} 200 + 38 {z>0.5 AND z<100} 600 + 39 {z>=1.0 AND z<100} 600 + 40 {z>1.5 AND z<100} 300 + 41 {z>=2.0 AND z<100} 300 + 42 {z>2.1 AND z<100} 100 + 43 {z>=3.0 AND z<100} 100 + 44 {z>3.2 AND z<100} 50 + 45 {z>=4.0 AND z<100} 50 + 46 {z>=-100 AND z<=-1.0} 50 + 47 {z>=-100 AND z<=0.0} 400 + 48 {z>=-100 AND z<0.0} 50 + 49 {z>=-100 AND z<=1.0} 700 + 50 {z>=-100 AND z<2.0} 700 + 51 {z>=-100 AND z<=2.0} 900 + 52 {z>=-100 AND z<3.0} 900 +} { + do_test analyze5-2.$testid { + eqp "SELECT * FROM t1 WHERE $where" + } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z='alpha' AND y<='alpha'} 400 + 2 {y>='bravo' AND y<='bravo'} 300 + 3 {y>='charlie' AND y<='charlie'} 200 + 4 {y>='delta' AND y<='delta'} 100 + 5 {y>='echo' AND y<='echo'} 50 + 6 {y>='' AND y<=''} 50 + 7 {y>'bravo' AND y<'delta'} 200 + 8 {y>'alpha' AND y<'zzz'} 600 + 9 {y>='bravo' AND y<'zzz'} 600 + 10 {y>'bravo' AND y<'zzz'} 300 + 11 {y>='charlie' AND y<'zzz'} 300 + 12 {y>'charlie' AND y<'zzz'} 100 + 13 {y>='delta' AND y<'zzz'} 100 + 14 {y>'delta' AND y<'zzz'} 50 + 15 {y>='echo' AND y<'zzz'} 50 + 16 {y>=0 AND y<=''} 50 + 17 {y>=0 AND y<='alpha'} 400 + 18 {y>=0 AND y<'alpha'} 50 + 19 {y>=0 AND y<='bravo'} 700 + 20 {y>=0 AND y<'charlie'} 700 + 21 {y>=0 AND y<='charlie'} 900 + 22 {y>=0 AND y<'delta'} 900 +} { + do_test analyze5-3.$testid { + eqp "SELECT * FROM t1 WHERE $where" + } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y Date: Thu, 20 Jan 2011 16:52:09 +0000 Subject: [PATCH 02/11] Use histogram data to improve the row-count estimates on equality constraints. FossilOrigin-Name: 6bfc5c69eb22938972bbf4e60179952dc215f770 --- manifest | 24 +++++++--------- manifest.uuid | 2 +- src/where.c | 68 ++++++++++++++++++++++++++++++++++++++++++++-- test/analyze5.test | 8 ++++++ 4 files changed, 85 insertions(+), 17 deletions(-) diff --git a/manifest b/manifest index 00b3208ed7..cfa47d73c3 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C The\sfirst\sof\sa\splanned\sseries\sof\senhancements\sto\sthe\squery\splanner\sthat\nenable\sit\sto\smake\sbetter\suse\sof\ssqlite_stat2\shistograms\swhen\sthe\stable\nhas\smany\srepeated\svalues. -D 2011-01-20T02:56:37.736 +C Use\shistogram\sdata\sto\simprove\sthe\srow-count\sestimates\son\sequality\sconstraints. +D 2011-01-20T16:52:09.439 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 5cd6b88d57bfc816ba7f753a3cdf03686d954b8a +F src/where.c 068ecc5195578aed083a4314e572de9979a489e5 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test 18987796646efdf009ca0b8c8f060874a8fe57fb +F test/analyze5.test b6bd2e4fbbe2ffcf2a22250b28fb1f2a2ca507de F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,18 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 9660a0a22547656cc3765b673d0cee9e1dd829ef -R 9ff1bb21abd03a28e074b829beb25c52 -T *bgcolor * #a8c7d3 -T *branch * stat2-enhancement -T *sym-stat2-enhancement * -T -sym-trunk * +P 2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 +R b298fdc0ea4be4264436dd1639cb2644 U drh -Z f191562671825ddb731a0f83c41674dd +Z 10e84b987d0709faa336295530a6ebae -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNN6RpoxKgR168RlERAtbyAJ4tlGP5CKHBEdaaRtF9LD6pnMJo9QCaAnxY -oc508+oZBxzr/UoIZL3o+G4= -=2eyE +iD8DBQFNOGg8oxKgR168RlERAhVwAJ9ZyON4XSbnksAxlSedyp9Kpxx2QACfe+y0 +6eKoyLZRueuMI2F5tREdfw4= +=Hhmt -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index a32e6ed42f..cbf97eefea 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 \ No newline at end of file +6bfc5c69eb22938972bbf4e60179952dc215f770 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 734f24019f..048ab940a3 100644 --- a/src/where.c +++ b/src/where.c @@ -2461,6 +2461,51 @@ range_est_fallback: return rc; } +#ifdef SQLITE_ENABLE_STAT2 +/* +** Estimate the number of rows that will be returned based on +** an equality constraint x=VALUE and where that VALUE occurs in +** the histogram data. This only works when x is the left-most +** column of an index and sqlite_stat2 histogram data is available +** for that index. +** +** Write the estimated row count into *pnRow. If unable to make +** an estimate, leave *pnRow unchanged. +*/ +void whereEqScanEst( + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index whose left-most column is pTerm */ + WhereTerm *pTerm, /* The x=VALUE constraint */ + double *pnRow /* Write the revised row estimate here */ +){ + sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ + int iLower, iUpper; /* Range of histogram regions containing pRhs */ + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + double nRowEst; /* New estimate of the number of rows */ + + assert( p->aSample!=0 ); + assert( pTerm->eOperator==WO_EQ ); + aff = p->pTable->aCol[p->aiColumn[0]].affinity; + rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs); + if( rc ) goto whereEqScanEst_cancel; + rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); + if( rc ) goto whereEqScanEst_cancel; + rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); + if( rc ) goto whereEqScanEst_cancel; + if( iLower>=iUpper ){ + nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); + if( nRowEst<*pnRow ) *pnRow = nRowEst; + }else{ + nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES; + *pnRow = nRowEst; + } + +whereEqScanEst_cancel: + sqlite3ValueFree(pRhs); +} +#endif /* defined(SQLITE_ENABLE_STAT2) */ + /* ** Find the query plan for accessing a particular table. Write the @@ -2624,10 +2669,13 @@ static void bestBtreeIndex( int bInEst = 0; int nInMul = 1; int estBound = 100; - int nBound = 0; /* Number of range constraints seen */ + int nBound = 0; /* Number of range constraints seen */ int bSort = 0; int bLookup = 0; - WhereTerm *pTerm; /* A single term of the WHERE clause */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ +#ifdef SQLITE_ENABLE_STAT2 + WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */ +#endif /* Determine the values of nEq and nInMul */ for(nEq=0; nEqnColumn; nEq++){ @@ -2647,6 +2695,11 @@ static void bestBtreeIndex( }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } +#ifdef SQLITE_ENABLE_STAT2 + else if( nEq==0 && pProbe->aSample ){ + pFirstEqTerm = pTerm; + } +#endif used |= pTerm->prereqRight; } @@ -2723,6 +2776,17 @@ static void bestBtreeIndex( nInMul = (int)(nRow / aiRowEst[nEq]); } +#ifdef SQLITE_ENABLE_STAT2 + /* If the constraint is of the form x=VALUE and histogram + ** data is available for column x, then it might be possible + ** to get a better estimate on the number of rows based on + ** VALUE and how common that value is according to the histogram. + */ + if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){ + whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow); + } +#endif /* SQLITE_ENABLE_STAT2 */ + /* Assume constant cost to access a row and logarithmic cost to ** do a binary search. Hence, the initial cost is the number of output ** rows plus log2(table-size) times the number of binary searches. diff --git a/test/analyze5.test b/test/analyze5.test index 233e7ff9e7..da0dbe3ead 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -214,6 +214,14 @@ foreach {testid where rows} { 20 {y>=0 AND y<'charlie'} 700 21 {y>=0 AND y<='charlie'} 900 22 {y>=0 AND y<'delta'} 900 + 23 {y>'alpha' AND y='bravo' AND y'bravo' AND y='charlie' AND y'charlie' AND y='delta' AND y'delta' AND y='echo' AND y Date: Thu, 20 Jan 2011 20:36:13 +0000 Subject: [PATCH 03/11] Update ANALYZE test cases to check out the use of histograms for equality constraints. FossilOrigin-Name: c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f --- manifest | 18 ++++++------ manifest.uuid | 2 +- test/analyze5.test | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 10 deletions(-) diff --git a/manifest b/manifest index cfa47d73c3..ab69287cda 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Use\shistogram\sdata\sto\simprove\sthe\srow-count\sestimates\son\sequality\sconstraints. -D 2011-01-20T16:52:09.439 +C Update\sANALYZE\stest\scases\sto\scheck\sout\sthe\suse\sof\shistograms\sfor\sequality\nconstraints. +D 2011-01-20T20:36:13.223 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test b6bd2e4fbbe2ffcf2a22250b28fb1f2a2ca507de +F test/analyze5.test b2139886ee199a226df8f319e37aa7bd78b8f402 F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5 -R b298fdc0ea4be4264436dd1639cb2644 +P 6bfc5c69eb22938972bbf4e60179952dc215f770 +R 2f06d09a87a4d54539d7aa2af234aaef U drh -Z 10e84b987d0709faa336295530a6ebae +Z 7d948c493671d8082107f9a1879413d8 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNOGg8oxKgR168RlERAhVwAJ9ZyON4XSbnksAxlSedyp9Kpxx2QACfe+y0 -6eKoyLZRueuMI2F5tREdfw4= -=Hhmt +iD8DBQFNOJzAoxKgR168RlERAtM4AJwMzXlyhMoDKbExZY4UcDKSjGIdkwCcDJv7 +Vr+PmfFc04BsuQfthFYbAv4= +=9ci9 -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index cbf97eefea..d7ce1c3e85 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6bfc5c69eb22938972bbf4e60179952dc215f770 \ No newline at end of file +c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f \ No newline at end of file diff --git a/test/analyze5.test b/test/analyze5.test index da0dbe3ead..a6bf640235 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -117,6 +117,40 @@ foreach {testid where rows} { } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z? AND z? AND y Date: Fri, 21 Jan 2011 14:37:04 +0000 Subject: [PATCH 04/11] Add the ability to use indices when a range contraint is bounded on the lower end by NULL. FossilOrigin-Name: f73a167b434fadcbbd15e3891c4b7f4f87f6363c --- manifest | 20 ++++++++++---------- manifest.uuid | 2 +- src/vdbemem.c | 2 ++ src/where.c | 18 ++++++++++++++++-- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/manifest b/manifest index ab69287cda..bb09b610c7 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Update\sANALYZE\stest\scases\sto\scheck\sout\sthe\suse\sof\shistograms\sfor\sequality\nconstraints. -D 2011-01-20T20:36:13.223 +C Add\sthe\sability\sto\suse\sindices\swhen\sa\srange\scontraint\sis\sbounded\son\nthe\slower\send\sby\sNULL. +D 2011-01-21T14:37:04.663 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -237,13 +237,13 @@ F src/vdbeInt.h 6e6f28e9bccc6c703dca1372fd661c57b5c15fb0 F src/vdbeapi.c 69c82283ab2b64c0c37a07799d771d4058330743 F src/vdbeaux.c 33448d23b857654dd69ed2103611f5c733606f68 F src/vdbeblob.c 18955f0ee6b133cd08e1592010cb9a6b11e9984c -F src/vdbemem.c 411649a35686f54268ccabeda175322c4697f5a6 +F src/vdbemem.c c011228c6fb1b5df924e4584765b16bde863c9c6 F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2 F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 068ecc5195578aed083a4314e572de9979a489e5 +F src/where.c cf219a4275cf430d0e7df9d2db04e9ba29702f8e F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 6bfc5c69eb22938972bbf4e60179952dc215f770 -R 2f06d09a87a4d54539d7aa2af234aaef +P c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f +R 07d2f363ee58370384294fd182ee30ff U drh -Z 7d948c493671d8082107f9a1879413d8 +Z 32ed47abc5f3838cb9feba0ba8b99416 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNOJzAoxKgR168RlERAtM4AJwMzXlyhMoDKbExZY4UcDKSjGIdkwCcDJv7 -Vr+PmfFc04BsuQfthFYbAv4= -=9ci9 +iD8DBQFNOZoToxKgR168RlERAkh6AJ4kec2vRcpVEtGIXoGz4TjpsHbYigCeMtj1 +RYqs8Oaohb8CL1KIU7eSI9Q= +=lXJE -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index d7ce1c3e85..a9f1839b77 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f \ No newline at end of file +f73a167b434fadcbbd15e3891c4b7f4f87f6363c \ No newline at end of file diff --git a/src/vdbemem.c b/src/vdbemem.c index 4831d80658..aae8dbb652 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -1082,6 +1082,8 @@ int sqlite3ValueFromExpr( pVal->r = (double)-1 * pVal->r; sqlite3ValueApplyAffinity(pVal, affinity, enc); } + }else if( op==TK_NULL ){ + pVal = sqlite3ValueNew(db); } #ifndef SQLITE_OMIT_BLOB_LITERAL else if( op==TK_BLOB ){ diff --git a/src/where.c b/src/where.c index 048ab940a3..57552a4cba 100644 --- a/src/where.c +++ b/src/where.c @@ -2245,6 +2245,11 @@ static int whereRangeRegion( if( aSample[i].u.r>=r ) break; } } + }else if( eType==SQLITE_NULL ){ + i = 0; + if( roundUp ){ + while( idb; CollSeq *pColl; @@ -2433,6 +2438,7 @@ static int whereRangeScanEst( rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); } } + WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper)); iEst = iUpper - iLower; testcase( iEst==SQLITE_INDEX_SAMPLES ); @@ -2471,6 +2477,11 @@ range_est_fallback: ** ** Write the estimated row count into *pnRow. If unable to make ** an estimate, leave *pnRow unchanged. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. */ void whereEqScanEst( Parse *pParse, /* Parsing & code generating context */ @@ -2493,6 +2504,7 @@ void whereEqScanEst( if( rc ) goto whereEqScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); if( rc ) goto whereEqScanEst_cancel; + WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); if( iLower>=iUpper ){ nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); if( nRowEst<*pnRow ) *pnRow = nRowEst; @@ -2687,9 +2699,11 @@ static void bestBtreeIndex( Expr *pExpr = pTerm->pExpr; wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + /* "x IN (SELECT ...)": Assume the SELECT returns 25 rows */ nInMul *= 25; bInEst = 1; }else if( ALWAYS(pExpr->x.pList) ){ + /* "x IN (value, value, ...)" */ nInMul *= pExpr->x.pList->nExpr + 1; } }else if( pTerm->eOperator & WO_ISNULL ){ @@ -2767,8 +2781,8 @@ static void bestBtreeIndex( } /* - ** Estimate the number of rows of output. For an IN operator, - ** do not let the estimate exceed half the rows in the table. + ** Estimate the number of rows of output. For an "x IN (SELECT...)" + ** constraint, do not let the estimate exceed half the rows in the table. */ nRow = (double)(aiRowEst[nEq] * nInMul); if( bInEst && nRow*2>aiRowEst[0] ){ From 0c50fa0f61724035b6be561c17e50dc0407d4305 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 21 Jan 2011 16:27:18 +0000 Subject: [PATCH 05/11] Make use of histogram data to make better estimates for the number of rows that will be returned from "x IN (v1,v2,v3,...)" constraints. FossilOrigin-Name: fd3977a27ae68e694df12a4713e55515c1e87c5d --- manifest | 18 ++++----- manifest.uuid | 2 +- src/where.c | 103 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 97 insertions(+), 26 deletions(-) diff --git a/manifest b/manifest index bb09b610c7..e202477c08 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Add\sthe\sability\sto\suse\sindices\swhen\sa\srange\scontraint\sis\sbounded\son\nthe\slower\send\sby\sNULL. -D 2011-01-21T14:37:04.663 +C Make\suse\sof\shistogram\sdata\sto\smake\sbetter\sestimates\sfor\sthe\snumber\sof\srows\nthat\swill\sbe\sreturned\sfrom\s"x\sIN\s(v1,v2,v3,...)"\sconstraints. +D 2011-01-21T16:27:18.621 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c cf219a4275cf430d0e7df9d2db04e9ba29702f8e +F src/where.c 7f2844afffd9e09373e874a74de81d3502b2a35c F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f -R 07d2f363ee58370384294fd182ee30ff +P f73a167b434fadcbbd15e3891c4b7f4f87f6363c +R 46f7d508c889f9891a64638b5f1737ae U drh -Z 32ed47abc5f3838cb9feba0ba8b99416 +Z 14ba122ed035896c7c1b08aa324c4833 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNOZoToxKgR168RlERAkh6AJ4kec2vRcpVEtGIXoGz4TjpsHbYigCeMtj1 -RYqs8Oaohb8CL1KIU7eSI9Q= -=lXJE +iD8DBQFNObPpoxKgR168RlERAmgEAJ97hcV3wI5jmVOjUrAeDzSnM45gLACghPy2 +7kt0j2FfeGbbS4tWO9hsJaU= +=BYPr -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index a9f1839b77..ad6a13da6f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f73a167b434fadcbbd15e3891c4b7f4f87f6363c \ No newline at end of file +fd3977a27ae68e694df12a4713e55515c1e87c5d \ No newline at end of file diff --git a/src/where.c b/src/where.c index 57552a4cba..8eaef351d7 100644 --- a/src/where.c +++ b/src/where.c @@ -2475,18 +2475,19 @@ range_est_fallback: ** column of an index and sqlite_stat2 histogram data is available ** for that index. ** -** Write the estimated row count into *pnRow. If unable to make -** an estimate, leave *pnRow unchanged. +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. ** ** This routine can fail if it is unable to load a collating sequence ** required for string comparison, or if unable to allocate memory ** for a UTF conversion required for comparison. The error is stored ** in the pParse structure. */ -void whereEqScanEst( +int whereEqualScanEst( Parse *pParse, /* Parsing & code generating context */ Index *p, /* The index whose left-most column is pTerm */ - WhereTerm *pTerm, /* The x=VALUE constraint */ + Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ double *pnRow /* Write the revised row estimate here */ ){ sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ @@ -2496,14 +2497,14 @@ void whereEqScanEst( double nRowEst; /* New estimate of the number of rows */ assert( p->aSample!=0 ); - assert( pTerm->eOperator==WO_EQ ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; - rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs); - if( rc ) goto whereEqScanEst_cancel; + rc = valueFromExpr(pParse, pExpr, aff, &pRhs); + if( rc ) goto whereEqualScanEst_cancel; + if( pRhs==0 ) return SQLITE_NOTFOUND; rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); - if( rc ) goto whereEqScanEst_cancel; + if( rc ) goto whereEqualScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); - if( rc ) goto whereEqScanEst_cancel; + if( rc ) goto whereEqualScanEst_cancel; WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); if( iLower>=iUpper ){ nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); @@ -2513,8 +2514,76 @@ void whereEqScanEst( *pnRow = nRowEst; } -whereEqScanEst_cancel: +whereEqualScanEst_cancel: sqlite3ValueFree(pRhs); + return rc; +} +#endif /* defined(SQLITE_ENABLE_STAT2) */ + +#ifdef SQLITE_ENABLE_STAT2 +/* +** Estimate the number of rows that will be returned based on +** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side +** of the IN operator is a list of values. +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +int whereInScanEst( + Parse *pParse, /* Parsing & code generating context */ + Index *p, /* The index whose left-most column is pTerm */ + ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ + double *pnRow /* Write the revised row estimate here */ +){ + sqlite3_value *pVal = 0; /* One value from list */ + int iLower, iUpper; /* Range of histogram regions containing pRhs */ + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + double nRowEst; /* New estimate of the number of rows */ + int nRegion = 0; /* Number of histogram regions spanned */ + int nSingle = 0; /* Count of values contained within one region */ + int nNotFound = 0; /* Count of values that are not constants */ + int i; /* Loop counter */ + u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + + assert( p->aSample!=0 ); + aff = p->pTable->aCol[p->aiColumn[0]].affinity; + memset(aHit, 0, sizeof(aHit)); + for(i=0; inExpr; i++){ + sqlite3ValueFree(pVal); + rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); + if( rc ) break; + if( pVal==0 ){ + nNotFound++; + continue; + } + rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); + if( rc ) break; + rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); + if( rc ) break; + if( iLower>=iUpper ){ + nSingle++; + } + assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); + while( iLower<=iUpper ) aHit[iLower++] = 1; + } + if( rc==SQLITE_OK ){ + for(i=nRegion=0; iaiRowEst[0]/(SQLITE_INDEX_SAMPLES+1) + + nNotFound*p->aiRowEst[1]; + if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; + *pnRow = nRowEst; + WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n", + nRegion, nSingle, nNotFound)); + } + sqlite3ValueFree(pVal); + return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ @@ -2686,7 +2755,7 @@ static void bestBtreeIndex( int bLookup = 0; WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 - WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */ + WhereTerm *pFirstTerm = 0; /* First term matching the index */ #endif /* Determine the values of nEq and nInMul */ @@ -2710,9 +2779,7 @@ static void bestBtreeIndex( wsFlags |= WHERE_COLUMN_NULL; } #ifdef SQLITE_ENABLE_STAT2 - else if( nEq==0 && pProbe->aSample ){ - pFirstEqTerm = pTerm; - } + if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm; #endif used |= pTerm->prereqRight; } @@ -2796,8 +2863,12 @@ static void bestBtreeIndex( ** to get a better estimate on the number of rows based on ** VALUE and how common that value is according to the histogram. */ - if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){ - whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow); + if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){ + if( pFirstTerm->eOperator==WO_EQ ){ + whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow); + }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){ + whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow); + } } #endif /* SQLITE_ENABLE_STAT2 */ From 5ac06071690a465efea1c904fce30f3b56bfc2a6 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 21 Jan 2011 18:18:13 +0000 Subject: [PATCH 06/11] Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same. FossilOrigin-Name: c82cb9c028b3ba5463ae50c30196dbf157a7a305 --- manifest | 20 +++++++++---------- manifest.uuid | 2 +- src/where.c | 49 +++++++++++++++++++++++++++++----------------- test/analyze5.test | 34 +++++++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 30 deletions(-) diff --git a/manifest b/manifest index e202477c08..f65504e819 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Make\suse\sof\shistogram\sdata\sto\smake\sbetter\sestimates\sfor\sthe\snumber\sof\srows\nthat\swill\sbe\sreturned\sfrom\s"x\sIN\s(v1,v2,v3,...)"\sconstraints. -D 2011-01-21T16:27:18.621 +C Adjustments\sto\sthe\sresult\srow\sestimator\sfor\sthe\sIN\soperator\sso\sthat\sit\sgives\nthe\ssame\sestimates\sas\sthe\sequivalent\sOR\soperator.\s\sTest\scases\sfor\sthe\ssame. +D 2011-01-21T18:18:13.960 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 7f2844afffd9e09373e874a74de81d3502b2a35c +F src/where.c 2de6723cfb051bcfcfd3d3ca1ac04bb1388ba530 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test b2139886ee199a226df8f319e37aa7bd78b8f402 +F test/analyze5.test c19fe600c48dade660eb374fa7209435463c2d4a F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P f73a167b434fadcbbd15e3891c4b7f4f87f6363c -R 46f7d508c889f9891a64638b5f1737ae +P fd3977a27ae68e694df12a4713e55515c1e87c5d +R cea6312924a8fb4373e961fbaf9716e5 U drh -Z 14ba122ed035896c7c1b08aa324c4833 +Z d2cdc178cbf264c31de567c61a7d5758 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNObPpoxKgR168RlERAmgEAJ97hcV3wI5jmVOjUrAeDzSnM45gLACghPy2 -7kt0j2FfeGbbS4tWO9hsJaU= -=BYPr +iD8DBQFNOc3poxKgR168RlERAkIwAKCEe6e9BZEE6g3M5kOLzfgzYu8BvQCghsyD +JkbODaFMx8NcwWU/YYsOcuo= +=cn1U -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index ad6a13da6f..67ab42aca6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fd3977a27ae68e694df12a4713e55515c1e87c5d \ No newline at end of file +c82cb9c028b3ba5463ae50c30196dbf157a7a305 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 8eaef351d7..cb0b4638f3 100644 --- a/src/where.c +++ b/src/where.c @@ -117,6 +117,7 @@ struct WhereTerm { #define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */ #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ +#define TERM_NOHELP 0x80 /* This term does not reduce the search space */ /* ** An instance of the following structure holds all information about a @@ -1060,6 +1061,7 @@ static void exprAnalyzeOrTerm( }else{ sqlite3ExprListDelete(db, pList); } + pTerm->wtFlags |= TERM_NOHELP; pTerm->eOperator = 0; /* case 1 trumps case 2 */ } } @@ -2523,8 +2525,10 @@ whereEqualScanEst_cancel: #ifdef SQLITE_ENABLE_STAT2 /* ** Estimate the number of rows that will be returned based on -** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side -** of the IN operator is a list of values. +** an IN constraint where the right-hand side of the IN operator +** is a list of values. Example: +** +** WHERE x IN (1,2,3,4) ** ** Write the estimated row count into *pnRow and return SQLITE_OK. ** If unable to make an estimate, leave *pnRow unchanged and return @@ -2544,22 +2548,24 @@ int whereInScanEst( sqlite3_value *pVal = 0; /* One value from list */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ - int rc; /* Subfunction return code */ + int rc = SQLITE_OK; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ - int nRegion = 0; /* Number of histogram regions spanned */ - int nSingle = 0; /* Count of values contained within one region */ + int nSpan = 0; /* Number of histogram regions spanned */ + int nSingle = 0; /* Histogram regions hit by a single value */ int nNotFound = 0; /* Count of values that are not constants */ - int i; /* Loop counter */ - u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + int i; /* Loop counter */ + u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ + u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */ assert( p->aSample!=0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; - memset(aHit, 0, sizeof(aHit)); + memset(aSpan, 0, sizeof(aSpan)); + memset(aSingle, 0, sizeof(aSingle)); for(i=0; inExpr; i++){ sqlite3ValueFree(pVal); rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); if( rc ) break; - if( pVal==0 ){ + if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){ nNotFound++; continue; } @@ -2568,19 +2574,26 @@ int whereInScanEst( rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); if( rc ) break; if( iLower>=iUpper ){ - nSingle++; + aSingle[iLower] = 1; + }else{ + assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); + while( iLower=0 && iUpper<=SQLITE_INDEX_SAMPLES ); - while( iLower<=iUpper ) aHit[iLower++] = 1; } if( rc==SQLITE_OK ){ - for(i=nRegion=0; iaiRowEst[0]/(SQLITE_INDEX_SAMPLES+1) + for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){ + if( aSpan[i] ){ + nSpan++; + }else if( aSingle[i] ){ + nSingle++; + } + } + nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES) + nNotFound*p->aiRowEst[1]; if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; *pnRow = nRowEst; - WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n", - nRegion, nSingle, nNotFound)); + WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n", + nSpan, nSingle, nNotFound, nRowEst)); } sqlite3ValueFree(pVal); return rc; @@ -2923,7 +2936,7 @@ static void bestBtreeIndex( thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ - if( pTerm->wtFlags & TERM_VIRTUAL ) continue; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_NOHELP) ) continue; if( (pTerm->prereqAll & notValid)!=thisTab ) continue; if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){ if( nSkipEq ){ @@ -2937,7 +2950,7 @@ static void bestBtreeIndex( } }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){ if( nSkipRange ){ - /* Ignore the first nBound range constraints since the index + /* Ignore the first nSkipRange range constraints since the index ** has already accounted for these */ nSkipRange--; }else{ diff --git a/test/analyze5.test b/test/analyze5.test index a6bf640235..3c89690b4b 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -118,7 +118,7 @@ foreach {testid where rows} { $rows] } foreach {testid where rows} { - 101 {z=-1} 50 + 101 {z=-1} 50 102 {z=0} 400 103 {z=1} 300 104 {z=2} 200 @@ -138,6 +138,38 @@ foreach {testid where rows} { } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows] } +# for the next sequence of tests a value of rows<=0 means a full-table scan +# is used. +# +#set sqlite_where_trace 1 +foreach {testid where rows} { + 201 {z IN (-1)} 50 + 202 {z IN (0)} 400 + 203 {z IN (1)} 300 + 204 {z IN (2)} 200 + 205 {z IN (3)} 100 + 206 {z IN (4)} 50 + 207 {z IN (0.5)} 50 + 208 {z IN (0,1)} 700 + 209 {z IN (0,1,2)} 900 + 210 {z IN (0,1,2,3)} 0 + 211 {z IN (0,1,2,3,4,5)} 0 + 212 {z IN (1,2)} 500 + 213 {z IN (2,3)} 300 + 214 {z=3 OR z=2} 300 + 215 {z IN (-1,3)} 150 + 216 {z=-1 OR z=3} 150 +} { + if {$rows<=0} { + set ans {SCAN TABLE t1 (~100 rows)} + } else { + set ans [format {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)} $rows] + } + do_test analyze5-1.$testid { + lindex [eqp "SELECT * FROM t1 WHERE $where"] 3 + } $ans +} + # For the t1.y column, most entries are known to be zero. So do a # full table scan for y=0 but use the index for any other constraint on # y. From 534230cf2e576a26706dfa8379f209f6e71b731b Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 22 Jan 2011 00:10:45 +0000 Subject: [PATCH 07/11] Add the ability to use indices for constraints of the form "x IS NOT NULL" when sqlite_stat2 is available and most entries for column x are NULL. FossilOrigin-Name: 5d5bddd290e71a7b03bcc23ff29881c23233cbff --- manifest | 18 +++++++-------- manifest.uuid | 2 +- src/where.c | 62 +++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/manifest b/manifest index f65504e819..2e9f7b38fc 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Adjustments\sto\sthe\sresult\srow\sestimator\sfor\sthe\sIN\soperator\sso\sthat\sit\sgives\nthe\ssame\sestimates\sas\sthe\sequivalent\sOR\soperator.\s\sTest\scases\sfor\sthe\ssame. -D 2011-01-21T18:18:13.960 +C Add\sthe\sability\sto\suse\sindices\sfor\sconstraints\sof\sthe\sform\s"x\sIS\sNOT\sNULL"\nwhen\ssqlite_stat2\sis\savailable\sand\smost\sentries\sfor\scolumn\sx\sare\sNULL. +D 2011-01-22T00:10:45.721 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 2de6723cfb051bcfcfd3d3ca1ac04bb1388ba530 +F src/where.c 99a9ea77114b649d68d01127331119f6785a80f1 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P fd3977a27ae68e694df12a4713e55515c1e87c5d -R cea6312924a8fb4373e961fbaf9716e5 +P c82cb9c028b3ba5463ae50c30196dbf157a7a305 +R 8c710a35ac2f95522b4422902520d5c8 U drh -Z d2cdc178cbf264c31de567c61a7d5758 +Z 5feaab9c960a4232f37e5b9d507f4c5a -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNOc3poxKgR168RlERAkIwAKCEe6e9BZEE6g3M5kOLzfgzYu8BvQCghsyD -JkbODaFMx8NcwWU/YYsOcuo= -=cn1U +iD8DBQFNOiCJoxKgR168RlERAvjuAKCFxe3Zz4WQnNCqaR5BtD/txHvS9QCePp1G +iZQ2yz7nxUFtZ+UwOppTLQo= +=91DY -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 67ab42aca6..372d8e6a1e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c82cb9c028b3ba5463ae50c30196dbf157a7a305 \ No newline at end of file +5d5bddd290e71a7b03bcc23ff29881c23233cbff \ No newline at end of file diff --git a/src/where.c b/src/where.c index cb0b4638f3..8a7e258bd9 100644 --- a/src/where.c +++ b/src/where.c @@ -117,7 +117,7 @@ struct WhereTerm { #define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */ #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ -#define TERM_NOHELP 0x80 /* This term does not reduce the search space */ +#define TERM_VNULL 0x80 /* Manufactured x>NULL or x<=NULL term */ /* ** An instance of the following structure holds all information about a @@ -211,6 +211,7 @@ struct WhereCost { #define WO_ISNULL 0x080 #define WO_OR 0x100 /* Two or more OR-connected terms */ #define WO_AND 0x200 /* Two or more AND-connected terms */ +#define WO_NOOP 0x800 /* This term does not restrict search space */ #define WO_ALL 0xfff /* Mask of all possible WO_* values */ #define WO_SINGLE 0x0ff /* Mask of all non-compound WO_* values */ @@ -1061,8 +1062,7 @@ static void exprAnalyzeOrTerm( }else{ sqlite3ExprListDelete(db, pList); } - pTerm->wtFlags |= TERM_NOHELP; - pTerm->eOperator = 0; /* case 1 trumps case 2 */ + pTerm->eOperator = WO_NOOP; /* case 1 trumps case 2 */ } } } @@ -1326,6 +1326,42 @@ static void exprAnalyze( } #endif /* SQLITE_OMIT_VIRTUALTABLE */ +#ifdef SQLITE_ENABLE_STAT2 + /* When sqlite_stat2 histogram data is available an operator of the + ** form "x IS NOT NULL" can sometimes be evaluated more efficiently + ** as "x>NULL" if x is not an INTEGER PRIMARY KEY. So construct a + ** virtual term of that form. + ** + ** Note that the virtual term must be tagged with TERM_VNULL. This + ** TERM_VNULL tag will suppress the not-null check at the beginning + ** of the loop. Without the TERM_VNULL flag, the not-null check at + ** the start of the loop will prevent any results from being returned. + */ + if( pExpr->op==TK_NOTNULL && pExpr->pLeft->iColumn>=0 ){ + Expr *pNewExpr; + Expr *pLeft = pExpr->pLeft; + int idxNew; + WhereTerm *pNewTerm; + + pNewExpr = sqlite3PExpr(pParse, TK_GT, + sqlite3ExprDup(db, pLeft, 0), + sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0); + + idxNew = whereClauseInsert(pWC, pNewExpr, + TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); + testcase( idxNew==0 ); + pNewTerm = &pWC->a[idxNew]; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_GT; + pNewTerm->iParent = idxTerm; + pTerm = &pWC->a[idxTerm]; + pTerm->nChild = 1; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } +#endif /* SQLITE_ENABLE_STAT2 */ + /* Prevent ON clause terms of a LEFT JOIN from being used to drive ** an index for tables to the left of the join. */ @@ -2461,11 +2497,9 @@ range_est_fallback: UNUSED_PARAMETER(nEq); #endif assert( pLower || pUpper ); - if( pLower && pUpper ){ - *piEst = 11; - }else{ - *piEst = 33; - } + *piEst = 100; + if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 3; + if( pUpper ) *piEst /= 3; return rc; } @@ -2936,7 +2970,7 @@ static void bestBtreeIndex( thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_NOHELP) ) continue; + if( pTerm->wtFlags & TERM_VIRTUAL ) continue; if( (pTerm->prereqAll & notValid)!=thisTab ) continue; if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){ if( nSkipEq ){ @@ -2958,7 +2992,7 @@ static void bestBtreeIndex( ** set size by a factor of 3 */ nRow /= 3; } - }else{ + }else if( pTerm->eOperator!=WO_NOOP ){ /* Any other expression lowers the output row count by half */ nRow /= 2; } @@ -3796,7 +3830,9 @@ static Bitmask codeOneLoopStart( if( pRangeStart ){ Expr *pRight = pRangeStart->pExpr->pRight; sqlite3ExprCode(pParse, pRight, regBase+nEq); - sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + if( (pRangeStart->wtFlags & TERM_VNULL)==0 ){ + sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + } if( zStartAff ){ if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){ /* Since the comparison is to be performed with no conversions @@ -3835,7 +3871,9 @@ static Bitmask codeOneLoopStart( Expr *pRight = pRangeEnd->pExpr->pRight; sqlite3ExprCacheRemove(pParse, regBase+nEq, 1); sqlite3ExprCode(pParse, pRight, regBase+nEq); - sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + if( (pRangeEnd->wtFlags & TERM_VNULL)==0 ){ + sqlite3ExprCodeIsNullJump(v, pRight, regBase+nEq, addrNxt); + } if( zEndAff ){ if( sqlite3CompareAffinity(pRight, zEndAff[nEq])==SQLITE_AFF_NONE){ /* Since the comparison is to be performed with no conversions From b966194a63d2dac670a22c40098803a6c2d1e761 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 24 Jan 2011 15:11:23 +0000 Subject: [PATCH 08/11] Change the cost estimator in the query planner to take into account the logN rowid lookup cost when going from an index to a table. FossilOrigin-Name: b442525b0ba642bb8d57b87b7b9e373b6046454a --- manifest | 18 +++++++++--------- manifest.uuid | 2 +- src/where.c | 30 ++++++++++++++---------------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/manifest b/manifest index 2e9f7b38fc..6b94cf8473 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Add\sthe\sability\sto\suse\sindices\sfor\sconstraints\sof\sthe\sform\s"x\sIS\sNOT\sNULL"\nwhen\ssqlite_stat2\sis\savailable\sand\smost\sentries\sfor\scolumn\sx\sare\sNULL. -D 2011-01-22T00:10:45.721 +C Change\sthe\scost\sestimator\sin\sthe\squery\splanner\sto\stake\sinto\saccount\sthe\nlogN\srowid\slookup\scost\swhen\sgoing\sfrom\san\sindex\sto\sa\stable. +D 2011-01-24T15:11:23.443 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 99a9ea77114b649d68d01127331119f6785a80f1 +F src/where.c 87de2616150606fd1b61d7c88afdb90089ddd53d F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P c82cb9c028b3ba5463ae50c30196dbf157a7a305 -R 8c710a35ac2f95522b4422902520d5c8 +P 5d5bddd290e71a7b03bcc23ff29881c23233cbff +R 803835303ce0b4a50432effc77dd4645 U drh -Z 5feaab9c960a4232f37e5b9d507f4c5a +Z 6e3a0f2d24eebc65cc3b5cda90be2fe9 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNOiCJoxKgR168RlERAvjuAKCFxe3Zz4WQnNCqaR5BtD/txHvS9QCePp1G -iZQ2yz7nxUFtZ+UwOppTLQo= -=91DY +iD4DBQFNPZaeoxKgR168RlERAhweAJipv5VYvhXPpe7yt5oZmj9HR78gAJ9JRIoC +ox/GIojaWZ/2rYPgDsVVnQ== +=pp6G -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 372d8e6a1e..78fe186878 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5d5bddd290e71a7b03bcc23ff29881c23233cbff \ No newline at end of file +b442525b0ba642bb8d57b87b7b9e373b6046454a \ No newline at end of file diff --git a/src/where.c b/src/where.c index 8a7e258bd9..d0540a4903 100644 --- a/src/where.c +++ b/src/where.c @@ -2919,31 +2919,29 @@ static void bestBtreeIndex( } #endif /* SQLITE_ENABLE_STAT2 */ - /* Assume constant cost to access a row and logarithmic cost to - ** do a binary search. Hence, the initial cost is the number of output - ** rows plus log2(table-size) times the number of binary searches. - */ - cost = nRow + nInMul*estLog(aiRowEst[0]); - /* Adjust the number of rows and the cost downward to reflect rows ** that are excluded by range constraints. */ nRow = (nRow * (double)estBound) / (double)100; - cost = (cost * (double)estBound) / (double)100; - /* Add in the estimated cost of sorting the result + /* Assume constant cost to access a row and logarithmic cost to + ** do a binary search. Hence, the initial cost is the number of output + ** rows plus log2(table-size) times the number of binary searches. + */ + if( pIdx && bLookup ){ + cost = nRow + (nInMul+nRow)*estLog(aiRowEst[0]); + }else{ + cost = nRow + nInMul*estLog(aiRowEst[0]); + } + + /* Add in the estimated cost of sorting the result. This cost is expanded + ** by a fudge factor of 3.0 to account for the fact that a sorting step + ** involves a write and is thus more expensive than a lookup step. */ if( bSort ){ - cost += cost*estLog(cost); + cost += nRow*estLog(nRow)*(double)3; } - /* If all information can be taken directly from the index, we avoid - ** doing table lookups. This reduces the cost by half. (Not really - - ** this needs to be fixed.) - */ - if( pIdx && bLookup==0 ){ - cost /= (double)2; - } /**** Cost of using this index has now been computed ****/ /* If there are additional constraints on this table that cannot From fc4491366b782f48fcc05f7ec2b536fb07ca1f84 Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 24 Jan 2011 17:46:35 +0000 Subject: [PATCH 09/11] Restructuring and generalizing analyze5.test. The whole script is currently disabled and will need to be reenabled prior to merging with trunk. FossilOrigin-Name: 31fcc7067bd76da4bf19232811b90cf8b76eed74 --- manifest | 18 +-- manifest.uuid | 2 +- test/analyze5.test | 293 +++++++++++++++++++++++---------------------- 3 files changed, 161 insertions(+), 152 deletions(-) diff --git a/manifest b/manifest index 6b94cf8473..2793b4be74 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Change\sthe\scost\sestimator\sin\sthe\squery\splanner\sto\stake\sinto\saccount\sthe\nlogN\srowid\slookup\scost\swhen\sgoing\sfrom\san\sindex\sto\sa\stable. -D 2011-01-24T15:11:23.443 +C Restructuring\sand\sgeneralizing\sanalyze5.test.\s\s\sThe\swhole\sscript\sis\ncurrently\sdisabled\sand\swill\sneed\sto\sbe\sreenabled\sprior\sto\smerging\swith\strunk. +D 2011-01-24T17:46:35.872 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test c19fe600c48dade660eb374fa7209435463c2d4a +F test/analyze5.test 12df6def85e21971888f8be3c1867a505f1bf587 F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 5d5bddd290e71a7b03bcc23ff29881c23233cbff -R 803835303ce0b4a50432effc77dd4645 +P b442525b0ba642bb8d57b87b7b9e373b6046454a +R ebf83ed63e8d1b48962a32de01a8b2cc U drh -Z 6e3a0f2d24eebc65cc3b5cda90be2fe9 +Z a63bcf5b3583a55e26e7d3f132337aba -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD4DBQFNPZaeoxKgR168RlERAhweAJipv5VYvhXPpe7yt5oZmj9HR78gAJ9JRIoC -ox/GIojaWZ/2rYPgDsVVnQ== -=pp6G +iD8DBQFNPbr/oxKgR168RlERAhb1AJ99UR+RTzr0SNXjxuLAr4iQrdLWlQCcC8Ik +EMLXFqheBOQt775+utT5Fjc= +=NdWh -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 78fe186878..0a3f6d961e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b442525b0ba642bb8d57b87b7b9e373b6046454a \ No newline at end of file +31fcc7067bd76da4bf19232811b90cf8b76eed74 \ No newline at end of file diff --git a/test/analyze5.test b/test/analyze5.test index 3c89690b4b..6366439f53 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -14,6 +14,8 @@ # with many repeated values and only a few distinct values. # +return + set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -28,161 +30,168 @@ proc eqp {sql {db db}} { uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db } +unset -nocomplain i t u v w x y z do_test analyze5-1.0 { - execsql { CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z) } + db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)} for {set i 0} {$i < 1000} {incr i} { - set j [expr {$i>=25 && $i<=50}] - set k [expr {($i>=400) + ($i>=700) + ($i>=875)}] - execsql { INSERT INTO t1 VALUES($i,$j,$k) } + set y [expr {$i>=25 && $i<=50}] + set z [expr {($i>=400) + ($i>=700) + ($i>=875)}] + set x $z + set w $z + set t [expr {$z+0.5}] + switch $z { + 0 {set u "alpha"; unset x} + 1 {set u "bravo"} + 2 {set u "charlie"} + 3 {set u "delta"; unset w} + } + if {$i%2} {set v $u} {set v [string toupper $u]} + db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)} } - execsql { - CREATE INDEX t1y ON t1(y); - CREATE INDEX t1z ON t1(z); + db eval { + CREATE INDEX t1t ON t1(t); -- 0.5, 1.5, 2.5, and 3.5 + CREATE INDEX t1u ON t1(u); -- text + CREATE INDEX t1v ON t1(v); -- mixed case text + CREATE INDEX t1w ON t1(w); -- integers 0, 1, 2 and a few NULLs + CREATE INDEX t1x ON t1(x); -- integers 1, 2, 3 and many NULLs + CREATE INDEX t1y ON t1(y); -- integers 0 and very few 1s + CREATE INDEX t1z ON t1(z); -- integers 0, 1, 2, and 3 ANALYZE; - SELECT * FROM sqlite_stat2 ORDER BY 1, 2, 3; + SELECT sample FROM sqlite_stat2 WHERE idx='t1u' ORDER BY sampleno; } -} [list t1 t1y 0 0 \ - t1 t1y 1 0 \ - t1 t1y 2 0 \ - t1 t1y 3 0 \ - t1 t1y 4 0 \ - t1 t1y 5 0 \ - t1 t1y 6 0 \ - t1 t1y 7 0 \ - t1 t1y 8 0 \ - t1 t1y 9 0 \ - t1 t1z 0 0 \ - t1 t1z 1 0 \ - t1 t1z 2 0 \ - t1 t1z 3 0 \ - t1 t1z 4 1 \ - t1 t1z 5 1 \ - t1 t1z 6 1 \ - t1 t1z 7 2 \ - t1 t1z 8 2 \ - t1 t1z 9 3] +} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta} +do_test analyze5-1.1 { + string tolower \ + [db eval {SELECT sample from sqlite_stat2 WHERE idx='t1v' ORDER BY sampleno}] +} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta} +do_test analyze5-1.2 { + db eval {SELECT sample from sqlite_stat2 WHERE idx='t1w' ORDER BY sampleno} +} {{} 0 0 0 0 1 1 1 2 2} +do_test analyze5-1.3 { + db eval {SELECT sample from sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno} +} {{} {} {} {} 1 1 1 2 2 3} +do_test analyze5-1.4 { + db eval {SELECT sample from sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno} +} {0 0 0 0 0 0 0 0 0 0} +do_test analyze5-1.5 { + db eval {SELECT sample from sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno} +} {0 0 0 0 1 1 1 2 2 3} +do_test analyze5-1.6 { + db eval {SELECT sample from sqlite_stat2 WHERE idx='t1t' ORDER BY sampleno} +} {0.5 0.5 0.5 0.5 1.5 1.5 1.5 2.5 2.5 3.5} + # Verify that range queries generate the correct row count estimates # -foreach {testid where rows} { - 1 {z>=0 AND z<=0} 400 - 2 {z>=1 AND z<=1} 300 - 3 {z>=2 AND z<=2} 200 - 4 {z>=3 AND z<=3} 100 - 5 {z>=4 AND z<=4} 50 - 6 {z>=-1 AND z<=-1} 50 - 7 {z>1 AND z<3} 200 - 8 {z>0 AND z<100} 600 - 9 {z>=1 AND z<100} 600 - 10 {z>1 AND z<100} 300 - 11 {z>=2 AND z<100} 300 - 12 {z>2 AND z<100} 100 - 13 {z>=3 AND z<100} 100 - 14 {z>3 AND z<100} 50 - 15 {z>=4 AND z<100} 50 - 16 {z>=-100 AND z<=-1} 50 - 17 {z>=-100 AND z<=0} 400 - 18 {z>=-100 AND z<0} 50 - 19 {z>=-100 AND z<=1} 700 - 20 {z>=-100 AND z<2} 700 - 21 {z>=-100 AND z<=2} 900 - 22 {z>=-100 AND z<3} 900 +foreach {testid where index rows} { + 1 {z>=0 AND z<=0} t1z 400 + 2 {z>=1 AND z<=1} t1z 300 + 3 {z>=2 AND z<=2} t1z 200 + 4 {z>=3 AND z<=3} t1z 100 + 5 {z>=4 AND z<=4} t1z 50 + 6 {z>=-1 AND z<=-1} t1z 50 + 7 {z>1 AND z<3} t1z 200 + 8 {z>0 AND z<100} t1z 600 + 9 {z>=1 AND z<100} t1z 600 + 10 {z>1 AND z<100} t1z 300 + 11 {z>=2 AND z<100} t1z 300 + 12 {z>2 AND z<100} t1z 100 + 13 {z>=3 AND z<100} t1z 100 + 14 {z>3 AND z<100} t1z 50 + 15 {z>=4 AND z<100} t1z 50 + 16 {z>=-100 AND z<=-1} t1z 50 + 17 {z>=-100 AND z<=0} t1z 400 + 18 {z>=-100 AND z<0} t1z 50 + 19 {z>=-100 AND z<=1} t1z 700 + 20 {z>=-100 AND z<2} t1z 700 + 21 {z>=-100 AND z<=2} t1z 900 + 22 {z>=-100 AND z<3} t1z 900 + + 31 {z>=0.0 AND z<=0.0} t1z 400 + 32 {z>=1.0 AND z<=1.0} t1z 300 + 33 {z>=2.0 AND z<=2.0} t1z 200 + 34 {z>=3.0 AND z<=3.0} t1z 100 + 35 {z>=4.0 AND z<=4.0} t1z 50 + 36 {z>=-1.0 AND z<=-1.0} t1z 50 + 37 {z>1.5 AND z<3.0} t1z 200 + 38 {z>0.5 AND z<100} t1z 600 + 39 {z>=1.0 AND z<100} t1z 600 + 40 {z>1.5 AND z<100} t1z 300 + 41 {z>=2.0 AND z<100} t1z 300 + 42 {z>2.1 AND z<100} t1z 100 + 43 {z>=3.0 AND z<100} t1z 100 + 44 {z>3.2 AND z<100} t1z 50 + 45 {z>=4.0 AND z<100} t1z 50 + 46 {z>=-100 AND z<=-1.0} t1z 50 + 47 {z>=-100 AND z<=0.0} t1z 400 + 48 {z>=-100 AND z<0.0} t1z 50 + 49 {z>=-100 AND z<=1.0} t1z 700 + 50 {z>=-100 AND z<2.0} t1z 700 + 51 {z>=-100 AND z<=2.0} t1z 900 + 52 {z>=-100 AND z<3.0} t1z 900 + + 101 {z=-1} t1z 50 + 102 {z=0} t1z 400 + 103 {z=1} t1z 300 + 104 {z=2} t1z 200 + 105 {z=3} t1z 100 + 106 {z=4} t1z 50 + 107 {z=-10.0} t1z 50 + 108 {z=0.0} t1z 400 + 109 {z=1.0} t1z 300 + 110 {z=2.0} t1z 200 + 111 {z=3.0} t1z 100 + 112 {z=4.0} t1z 50 + 113 {z=1.5} t1z 50 + 114 {z=2.5} t1z 50 + + 201 {z IN (-1)} t1z 50 + 202 {z IN (0)} t1z 400 + 203 {z IN (1)} t1z 300 + 204 {z IN (2)} t1z 200 + 205 {z IN (3)} t1z 100 + 206 {z IN (4)} t1z 50 + 207 {z IN (0.5)} t1z 50 + 208 {z IN (0,1)} t1z 700 + 209 {z IN (0,1,2)} t1z 900 + 210 {z IN (0,1,2,3)} {} 100 + 211 {z IN (0,1,2,3,4,5)} {} 100 + 212 {z IN (1,2)} t1z 500 + 213 {z IN (2,3)} t1z 300 + 214 {z=3 OR z=2} t1z 300 + 215 {z IN (-1,3)} t1z 150 + 216 {z=-1 OR z=3} t1z 150 - 31 {z>=0.0 AND z<=0.0} 400 - 32 {z>=1.0 AND z<=1.0} 300 - 33 {z>=2.0 AND z<=2.0} 200 - 34 {z>=3.0 AND z<=3.0} 100 - 35 {z>=4.0 AND z<=4.0} 50 - 36 {z>=-1.0 AND z<=-1.0} 50 - 37 {z>1.5 AND z<3.0} 200 - 38 {z>0.5 AND z<100} 600 - 39 {z>=1.0 AND z<100} 600 - 40 {z>1.5 AND z<100} 300 - 41 {z>=2.0 AND z<100} 300 - 42 {z>2.1 AND z<100} 100 - 43 {z>=3.0 AND z<100} 100 - 44 {z>3.2 AND z<100} 50 - 45 {z>=4.0 AND z<100} 50 - 46 {z>=-100 AND z<=-1.0} 50 - 47 {z>=-100 AND z<=0.0} 400 - 48 {z>=-100 AND z<0.0} 50 - 49 {z>=-100 AND z<=1.0} 700 - 50 {z>=-100 AND z<2.0} 700 - 51 {z>=-100 AND z<=2.0} 900 - 52 {z>=-100 AND z<3.0} 900 + 300 {y=0} {} 100 + 301 {y=1} t1y 50 + 302 {y=0.1} t1y 50 } { - do_test analyze5-1.$testid { - eqp "SELECT * FROM t1 WHERE $where" - } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z Date: Fri, 28 Jan 2011 01:57:41 +0000 Subject: [PATCH 10/11] Change the weighting of binary searches on tables to 1/10th the cost of a search on an index. Change the assumed reduction in search space from a indexed range constraint from 1/3rd to 1/4th. Do not let the estimated number of rows drop below 1. FossilOrigin-Name: 4847c6cb71423248b186ab7842b97c83e2f5fefd --- manifest | 36 ++++++------- manifest.uuid | 2 +- src/where.c | 113 +++++++++++++++++++++++++++------------- test/analyze2.test | 12 +++-- test/analyze3.test | 2 +- test/e_createtable.test | 2 +- test/eqp.test | 4 +- test/indexedby.test | 4 +- test/like.test | 38 +++++++------- test/minmax3.test | 1 + test/where3.test | 5 +- test/where9.test | 2 +- 12 files changed, 133 insertions(+), 88 deletions(-) diff --git a/manifest b/manifest index 2793b4be74..d4624a6e59 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Restructuring\sand\sgeneralizing\sanalyze5.test.\s\s\sThe\swhole\sscript\sis\ncurrently\sdisabled\sand\swill\sneed\sto\sbe\sreenabled\sprior\sto\smerging\swith\strunk. -D 2011-01-24T17:46:35.872 +C Change\sthe\sweighting\sof\sbinary\ssearches\son\stables\sto\s1/10th\sthe\scost\sof\sa\nsearch\son\san\sindex.\s\sChange\sthe\sassumed\sreduction\sin\ssearch\sspace\sfrom\sa\nindexed\srange\sconstraint\sfrom\s1/3rd\sto\s1/4th.\s\sDo\snot\slet\sthe\sestimated\s\nnumber\sof\srows\sdrop\sbelow\s1. +D 2011-01-28T01:57:41.767 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30 F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7 F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 87de2616150606fd1b61d7c88afdb90089ddd53d +F src/where.c f4915ac03e5e42c8416b35ca3ba34af841c00d12 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125 @@ -253,8 +253,8 @@ F test/alter3.test 8677e48d95536f7a6ed86a1a774744dadcc22b07 F test/alter4.test 1e5dd6b951e9f65ca66422edff02e56df82dd403 F test/altermalloc.test e81ac9657ed25c6c5bb09bebfa5a047cd8e4acfc F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 -F test/analyze2.test f45ac8d54bdad822139e53fc6307fc6b5ee41c69 -F test/analyze3.test 820ddfb7591b49607fbaf77240c7955ac3cabb04 +F test/analyze2.test 8f2b1534d43f5547ce9a6b736c021d4192c75be3 +F test/analyze3.test d61f55d8b472fc6e713160b1e577f7a68e63f38b F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 F test/analyze5.test 12df6def85e21971888f8be3c1867a505f1bf587 F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 @@ -357,7 +357,7 @@ F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f F test/diskfull.test 0cede7ef9d8f415d9d3944005c76be7589bb5ebb F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376 -F test/e_createtable.test b8f5286879315d5b7f4cc5ead1afda4846f0c0bb +F test/e_createtable.test b40fc61bc4f1ad2a3c84590bd1d711507263d921 F test/e_delete.test 55d868b647acc091c261a10b9b0cb0ab660a6acb F test/e_droptrigger.test ddd4b28ed8a3d81bd5153fa0ab7559529a2ca03a F test/e_dropview.test b347bab30fc8de67b131594b3cd6f3d3bdaa753d @@ -375,7 +375,7 @@ F test/enc.test e54531cd6bf941ee6760be041dff19a104c7acea F test/enc2.test 6d91a5286f59add0cfcbb2d0da913b76f2242398 F test/enc3.test 5c550d59ff31dccdba5d1a02ae11c7047d77c041 F test/enc4.test 4b575ef09e0eff896e73bd24076f96c2aa6a42de -F test/eqp.test 69670e7919030f21de29fb99bf1d68f97aedcbdb +F test/eqp.test 8f535d902b2df780d22edb95113880480664f976 F test/eval.test bc269c365ba877554948441e91ad5373f9f91be3 F test/exclusive.test 53e1841b422e554cecf0160f937c473d6d0e3062 F test/exclusive2.test c2f2b2242dc436a12df8dd531e06843053bd3b9a @@ -486,7 +486,7 @@ F test/incrvacuum_ioerr.test 57d2f5777ab13fa03b87b262a4ea1bad5cfc0291 F test/index.test df7c00c6edd9504ab71c83a9514f1c5ca0fa54d8 F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6 F test/index3.test 423a25c789fc8cc51aaf2a4370bbdde2d9e9eed7 -F test/indexedby.test d7367c5a0e8ed8db642824a68126753e0808c706 +F test/indexedby.test be501e381b82b2f8ab406309ba7aac46e221f4ad F test/init.test 15c823093fdabbf7b531fe22cf037134d09587a7 F test/insert.test aef273dd1cee84cc92407469e6bd1b3cdcb76908 F test/insert2.test 4f3a04d168c728ed5ec2c88842e772606c7ce435 @@ -517,7 +517,7 @@ F test/jrnlmode3.test c6522b276ba315fd1416198de6fc1da9e72409fb F test/keyword1.test a2400977a2e4fde43bf33754c2929fda34dbca05 F test/lastinsert.test 474d519c68cb79d07ecae56a763aa7f322c72f51 F test/laststmtchanges.test ae613f53819206b3222771828d024154d51db200 -F test/like.test 0f64aeaed50b6e3ebaef3af0b3b8f894aed5acca +F test/like.test a47f52692aac96ba82508efba74819214cdebc17 F test/like2.test 3b2ee13149ba4a8a60b59756f4e5d345573852da F test/limit.test 2db7b3b34fb925b8e847d583d2eb67531d0ce67e F test/loadext.test 0393ce12d9616aa87597dd0ec88181de181f6db0 @@ -561,7 +561,7 @@ F test/memsubsys1.test 66bf4e153e0eccc08c3b6af3641eb4f42d391c32 F test/memsubsys2.test 72a731225997ad5e8df89fdbeae9224616b6aecc F test/minmax.test 722d80816f7e096bf2c04f4111f1a6c1ba65453d F test/minmax2.test 33504c01a03bd99226144e4b03f7631a274d66e0 -F test/minmax3.test 66a60eb0f20281b0753249d347c5de0766954cee +F test/minmax3.test cc1e8b010136db0d01a6f2a29ba5a9f321034354 F test/misc1.test e56baf44656dd68d6475a4b44521045a60241e9b F test/misc2.test a628db7b03e18973e5d446c67696b03de718c9fd F test/misc3.test 72c5dc87a78e7865c5ec7a969fc572913dbe96b6 @@ -856,14 +856,14 @@ F test/walslow.test d21625e2e99e11c032ce949e8a94661576548933 F test/walthread.test a25a393c068a2b42b44333fa3fdaae9072f1617c F test/where.test de337a3fe0a459ec7c93db16a519657a90552330 F test/where2.test 43d4becaf5a5df854e6c21d624a1cb84c6904554 -F test/where3.test 8ebedae552e13fc7f2b4e8df6cbe72a095347400 +F test/where3.test 858d089cc4bf9aa0b06917a02abd5fd198655c96 F test/where4.test e9b9e2f2f98f00379e6031db6a6fca29bae782a2 F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2 F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b F test/where7.test aa4cfcd6f66e2a4ef87b6717327325bf4d547502 F test/where8.test a6c740fd286d7883e274e17b6230a9d672a7ab1f F test/where8m.test da346596e19d54f0aba35ebade032a7c47d79739 -F test/where9.test 7ee38c3fd67e76789a6ec769f62f6433d3d4a5cf +F test/where9.test 24f19ad14bb1b831564ced5273e681e495662848 F test/whereA.test 24c234263c8fe358f079d5e57d884fb569d2da0a F test/whereB.test 0def95db3bdec220a731c7e4bec5930327c1d8c5 F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31 @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P b442525b0ba642bb8d57b87b7b9e373b6046454a -R ebf83ed63e8d1b48962a32de01a8b2cc +P 31fcc7067bd76da4bf19232811b90cf8b76eed74 +R f9a9d7b39b29b332b353f1913f9309e4 U drh -Z a63bcf5b3583a55e26e7d3f132337aba +Z 1ad30d06ebdb0226c333d2dcf85b95e8 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNPbr/oxKgR168RlERAhb1AJ99UR+RTzr0SNXjxuLAr4iQrdLWlQCcC8Ik -EMLXFqheBOQt775+utT5Fjc= -=NdWh +iD8DBQFNQiKZoxKgR168RlERAjJeAJ9DU+63Qt3QOlsjnHQ7MP5GX/m8egCfe8c1 +SQn1lZ3E7QkvjZ3JOaQppDw= +=0Mzf -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 0a3f6d961e..50652bbd99 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -31fcc7067bd76da4bf19232811b90cf8b76eed74 \ No newline at end of file +4847c6cb71423248b186ab7842b97c83e2f5fefd \ No newline at end of file diff --git a/src/where.c b/src/where.c index d0540a4903..6d660e8cc0 100644 --- a/src/where.c +++ b/src/where.c @@ -2421,9 +2421,9 @@ static int valueFromExpr( ** constraints. ** ** In the absence of sqlite_stat2 ANALYZE data, each range inequality -** reduces the search space by 2/3rds. Hence a single constraint (x>?) -** results in a return of 33 and a range constraint (x>? AND x?) +** results in a return of 25 and a range constraint (x>? AND xwtFlags & TERM_VNULL)==0 ) *piEst /= 3; - if( pUpper ) *piEst /= 3; + if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 4; + if( pUpper ) *piEst /= 4; return rc; } @@ -2636,12 +2636,12 @@ int whereInScanEst( /* -** Find the query plan for accessing a particular table. Write the +** Find the best query plan for accessing a particular table. Write the ** best query plan and its cost into the WhereCost object supplied as the ** last parameter. ** ** The lowest cost plan wins. The cost is an estimate of the amount of -** CPU and disk I/O need to process the request using the selected plan. +** CPU and disk I/O needed to process the requested result. ** Factors that influence cost include: ** ** * The estimated number of rows that will be retrieved. (The @@ -2660,7 +2660,7 @@ int whereInScanEst( ** ** If a NOT INDEXED clause (pSrc->notIndexed!=0) was attached to the table ** in the SELECT statement, then no indexes are considered. However, the -** selected plan may still take advantage of the tables built-in rowid +** selected plan may still take advantage of the built-in rowid primary key ** index. */ static void bestBtreeIndex( @@ -2703,9 +2703,11 @@ static void bestBtreeIndex( wsFlagMask = ~(WHERE_ROWID_EQ|WHERE_ROWID_RANGE); eqTermMask = idxEqTermMask; }else{ - /* There is no INDEXED BY clause. Create a fake Index object to - ** represent the primary key */ - Index *pFirst; /* Any other index on the table */ + /* There is no INDEXED BY clause. Create a fake Index object in local + ** variable sPk to represent the rowid primary key index. Make this + ** fake index the first in a chain of Index objects with all of the real + ** indices to follow */ + Index *pFirst; /* First of real indices on the table */ memset(&sPk, 0, sizeof(Index)); sPk.nColumn = 1; sPk.aiColumn = &aiColumnPk; @@ -2716,6 +2718,8 @@ static void bestBtreeIndex( aiRowEstPk[1] = 1; pFirst = pSrc->pTab->pIndex; if( pSrc->notIndexed==0 ){ + /* The real indices of the table are only considered if the + ** NOT INDEXED qualifier is omitted from the FROM clause */ sPk.pNext = pFirst; } pProbe = &sPk; @@ -2733,15 +2737,18 @@ static void bestBtreeIndex( double cost; /* Cost of using pProbe */ double nRow; /* Estimated number of rows in result set */ int rev; /* True to scan in reverse order */ + double nSearch; /* Estimated number of binary searches */ int wsFlags = 0; Bitmask used = 0; /* The following variables are populated based on the properties of - ** scan being evaluated. They are then used to determine the expected + ** index being evaluated. They are then used to determine the expected ** cost and number of rows returned. ** ** nEq: ** Number of equality terms that can be implemented using the index. + ** In other words, the number of initial fields in the index that + ** are used in == or IN or NOT NULL constraints of the WHERE clause. ** ** nInMul: ** The "in-multiplier". This is an estimate of how many seek operations @@ -2765,7 +2772,9 @@ static void bestBtreeIndex( ** ** bInEst: ** Set to true if there was at least one "x IN (SELECT ...)" term used - ** in determining the value of nInMul. + ** in determining the value of nInMul. Note that the RHS of the + ** IN operator must be a SELECT, not a value list, for this variable + ** to be true. ** ** estBound: ** An estimate on the amount of the table that must be searched. A @@ -2773,8 +2782,8 @@ static void bestBtreeIndex( ** might reduce this to a value less than 100 to indicate that only ** a fraction of the table needs searching. In the absence of ** sqlite_stat2 ANALYZE data, a single inequality reduces the search - ** space to 1/3rd its original size. So an x>? constraint reduces - ** estBound to 33. Two constraints (x>? AND x? constraint reduces + ** estBound to 25. Two constraints (x>? AND xx.pList) ){ + }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ /* "x IN (value, value, ...)" */ - nInMul *= pExpr->x.pList->nExpr + 1; + nInMul *= pExpr->x.pList->nExpr; } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; @@ -2923,16 +2935,41 @@ static void bestBtreeIndex( ** that are excluded by range constraints. */ nRow = (nRow * (double)estBound) / (double)100; + if( nRow<1 ) nRow = 1; - /* Assume constant cost to access a row and logarithmic cost to - ** do a binary search. Hence, the initial cost is the number of output - ** rows plus log2(table-size) times the number of binary searches. + /* Assume constant cost to advance from one row to the next and + ** logarithmic cost to do a binary search. Hence, the initial cost + ** is the number of output rows plus log2(table-size) times the + ** number of binary searches. + ** + ** Because fan-out on tables is so much higher than the fan-out on + ** indices (because table btrees contain only integer keys in non-leaf + ** nodes) we weight the cost of a table binary search as 1/10th the + ** cost of an index binary search. */ - if( pIdx && bLookup ){ - cost = nRow + (nInMul+nRow)*estLog(aiRowEst[0]); + if( pIdx ){ + if( bLookup ){ + /* For an index lookup followed by a table lookup: + ** nInMul index searches to find the start of each index range + ** + nRow steps through the index + ** + nRow table searches to lookup the table entry using the rowid + */ + nSearch = nInMul + nRow/10; + }else{ + /* For a covering index: + ** nInMul binary searches to find the initial entry + ** + nRow steps through the index + */ + nSearch = nInMul; + } }else{ - cost = nRow + nInMul*estLog(aiRowEst[0]); + /* For a rowid primary key lookup: + ** nInMult binary searches to find the initial entry scaled by 1/10th + ** + nRow steps through the table + */ + nSearch = nInMul/10; } + cost = nRow + nSearch*estLog(aiRowEst[0]); /* Add in the estimated cost of sorting the result. This cost is expanded ** by a fudge factor of 3.0 to account for the fact that a sorting step @@ -2987,7 +3024,11 @@ static void bestBtreeIndex( nSkipRange--; }else{ /* Assume each additional range constraint reduces the result - ** set size by a factor of 3 */ + ** set size by a factor of 3. Indexed range constraints reduce + ** the search space by a larger factor: 4. We make indexed range + ** more selective intentionally because of the subjective + ** observation that indexed range constraints really are more + ** selective in practice, on average. */ nRow /= 3; } }else if( pTerm->eOperator!=WO_NOOP ){ diff --git a/test/analyze2.test b/test/analyze2.test index add73af43a..de2567bb6f 100644 --- a/test/analyze2.test +++ b/test/analyze2.test @@ -242,10 +242,12 @@ do_test analyze2-4.1 { } {} do_test analyze2-4.2 { execsql { + PRAGMA automatic_index=OFF; SELECT tbl,idx,group_concat(sample,' ') FROM sqlite_stat2 WHERE idx = 't3a' - GROUP BY tbl,idx + GROUP BY tbl,idx; + PRAGMA automatic_index=ON; } } {t3 t3a {AfA bEj CEj dEj EEj fEj GEj hEj IEj jEj}} do_test analyze2-4.3 { @@ -408,7 +410,7 @@ do_test analyze2-6.2.1 { t5.a>1 AND t5.a<15 AND t6.a>1 } -} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a? AND a1 AND t5.a<15 AND t6.a>1 } -} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a? AND a1 AND t5.a<15 AND t6.a>1 } -} {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a? AND a1 AND t5.a<15 AND t6.a>1 } db1 - } {0 0 0 {SEARCH TABLE t5 USING COVERING INDEX t5i (a>? AND a? AND a? AND b? AND b10" - {0 0 0 {SEARCH TABLE t2 USING INDEX sqlite_autoindex_t2_1 (b=? AND c>?) (~3 rows)}} + {0 0 0 {SEARCH TABLE t2 USING INDEX sqlite_autoindex_t2_1 (b=? AND c>?) (~2 rows)}} } # EVIDENCE-OF: R-45493-35653 A CHECK constraint may be attached to a diff --git a/test/eqp.test b/test/eqp.test index cd08b8ff16..ca7544b41b 100644 --- a/test/eqp.test +++ b/test/eqp.test @@ -392,7 +392,7 @@ det 5.3.1 "SELECT a, b FROM t1 WHERE a=1" { # (~1000000 rows) do_execsql_test 5.4.0 {CREATE TABLE t2(c, d)} det 5.4.1 "SELECT t1.*, t2.* FROM t1, t2 WHERE t1.a=1 AND t1.b>2" { - 0 0 0 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~3 rows)} + 0 0 0 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~2 rows)} 0 1 1 {SCAN TABLE t2 (~1000000 rows)} } @@ -401,7 +401,7 @@ det 5.4.1 "SELECT t1.*, t2.* FROM t1, t2 WHERE t1.a=1 AND t1.b>2" { # USING COVERING INDEX i2 (a=? AND b>?) (~3 rows) 0|1|0|SCAN TABLE t2 # (~1000000 rows) det 5.5 "SELECT t1.*, t2.* FROM t2, t1 WHERE t1.a=1 AND t1.b>2" { - 0 0 1 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~3 rows)} + 0 0 1 {SEARCH TABLE t1 USING COVERING INDEX i2 (a=? AND b>?) (~2 rows)} 0 1 0 {SCAN TABLE t2 (~1000000 rows)} } diff --git a/test/indexedby.test b/test/indexedby.test index b8b5be63b6..7ccc4de24a 100644 --- a/test/indexedby.test +++ b/test/indexedby.test @@ -154,10 +154,10 @@ do_test indexedby-4.4 { do_execsql_test indexedby-5.1 { CREATE VIEW v2 AS SELECT * FROM t1 INDEXED BY i1 WHERE a > 5; EXPLAIN QUERY PLAN SELECT * FROM v2 -} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~330000 rows)}} +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~250000 rows)}} do_execsql_test indexedby-5.2 { EXPLAIN QUERY PLAN SELECT * FROM v2 WHERE b = 10 -} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~33000 rows)}} +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~25000 rows)}} do_test indexedby-5.3 { execsql { DROP INDEX i1 } catchsql { SELECT * FROM v2 } diff --git a/test/like.test b/test/like.test index e60d89bdc3..bd9a6c39c1 100644 --- a/test/like.test +++ b/test/like.test @@ -707,32 +707,32 @@ ifcapable like_opt&&!icu { INSERT INTO t10 VALUES(45,45,45,45,45,45); } count { - SELECT a FROM t10 WHERE b LIKE '12%' ORDER BY a; + SELECT a FROM t10 WHERE b LIKE '12%' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.2 { count { - SELECT a FROM t10 WHERE c LIKE '12%' ORDER BY a; + SELECT a FROM t10 WHERE c LIKE '12%' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.3 { count { - SELECT a FROM t10 WHERE d LIKE '12%' ORDER BY a; + SELECT a FROM t10 WHERE d LIKE '12%' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.4 { count { - SELECT a FROM t10 WHERE e LIKE '12%' ORDER BY a; + SELECT a FROM t10 WHERE e LIKE '12%' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.5 { count { - SELECT a FROM t10 WHERE f LIKE '12%' ORDER BY a; + SELECT a FROM t10 WHERE f LIKE '12%' ORDER BY +a; } } {12 123 scan 3 like 0} do_test like-10.6 { count { - SELECT a FROM t10 WHERE a LIKE '12%' ORDER BY a; + SELECT a FROM t10 WHERE a LIKE '12%' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.10 { @@ -748,32 +748,32 @@ ifcapable like_opt&&!icu { INSERT INTO t10b SELECT * FROM t10; } count { - SELECT a FROM t10b WHERE b GLOB '12*' ORDER BY a; + SELECT a FROM t10b WHERE b GLOB '12*' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.11 { count { - SELECT a FROM t10b WHERE c GLOB '12*' ORDER BY a; + SELECT a FROM t10b WHERE c GLOB '12*' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.12 { count { - SELECT a FROM t10b WHERE d GLOB '12*' ORDER BY a; + SELECT a FROM t10b WHERE d GLOB '12*' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.13 { count { - SELECT a FROM t10b WHERE e GLOB '12*' ORDER BY a; + SELECT a FROM t10b WHERE e GLOB '12*' ORDER BY +a; } } {12 123 scan 5 like 6} do_test like-10.14 { count { - SELECT a FROM t10b WHERE f GLOB '12*' ORDER BY a; + SELECT a FROM t10b WHERE f GLOB '12*' ORDER BY +a; } } {12 123 scan 3 like 0} do_test like-10.15 { count { - SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY a; + SELECT a FROM t10b WHERE a GLOB '12*' ORDER BY +a; } } {12 123 scan 5 like 6} } @@ -819,7 +819,7 @@ do_test like-11.3 { queryplan { PRAGMA case_sensitive_like=OFF; CREATE INDEX t11b ON t11(b); - SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a; } } {abc abcd ABC ABCD sort {} t11b} do_test like-11.4 { @@ -833,37 +833,37 @@ do_test like-11.5 { PRAGMA case_sensitive_like=OFF; DROP INDEX t11b; CREATE INDEX t11bnc ON t11(b COLLATE nocase); - SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a; } } {abc abcd ABC ABCD sort {} t11bnc} do_test like-11.6 { queryplan { CREATE INDEX t11bb ON t11(b COLLATE binary); - SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a; } } {abc abcd ABC ABCD sort {} t11bnc} do_test like-11.7 { queryplan { PRAGMA case_sensitive_like=ON; - SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY a; + SELECT b FROM t11 WHERE b LIKE 'abc%' ORDER BY +a; } } {abc abcd sort {} t11bb} do_test like-11.8 { queryplan { PRAGMA case_sensitive_like=OFF; - SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY a; + SELECT b FROM t11 WHERE b GLOB 'abc*' ORDER BY +a; } } {abc abcd sort {} t11bb} do_test like-11.9 { queryplan { CREATE INDEX t11cnc ON t11(c COLLATE nocase); CREATE INDEX t11cb ON t11(c COLLATE binary); - SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY a; + SELECT c FROM t11 WHERE c LIKE 'abc%' ORDER BY +a; } } {abc abcd ABC ABCD sort {} t11cnc} do_test like-11.10 { queryplan { - SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY a; + SELECT c FROM t11 WHERE c GLOB 'abc*' ORDER BY +a; } } {abc abcd sort {} t11cb} diff --git a/test/minmax3.test b/test/minmax3.test index 8e8a0a5256..483fa612d2 100644 --- a/test/minmax3.test +++ b/test/minmax3.test @@ -52,6 +52,7 @@ do_test minmax3-1.0 { INSERT INTO t1 VALUES('2', 'V', 'five'); INSERT INTO t1 VALUES('3', 'VI', 'six'); COMMIT; + PRAGMA automatic_index=OFF; } } {} do_test minmax3-1.1.1 { diff --git a/test/where3.test b/test/where3.test index c032f1f0ea..29ba9e33f0 100644 --- a/test/where3.test +++ b/test/where3.test @@ -225,14 +225,15 @@ do_execsql_test where3-3.0 { ANALYZE; explain query plan SELECT * FROM t302, t301 WHERE t302.x=5 AND t301.a=t302.y; } { - 0 0 0 {SCAN TABLE t302 (~0 rows)} + 0 0 0 {SCAN TABLE t302 (~1 rows)} 0 1 1 {SEARCH TABLE t301 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)} } +exit do_execsql_test where3-3.1 { explain query plan SELECT * FROM t301, t302 WHERE t302.x=5 AND t301.a=t302.y; } { - 0 0 1 {SCAN TABLE t302 (~0 rows)} + 0 0 1 {SCAN TABLE t302 (~1 rows)} 0 1 0 {SEARCH TABLE t301 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)} } diff --git a/test/where9.test b/test/where9.test index 1ffc8ef9ea..9a180116a8 100644 --- a/test/where9.test +++ b/test/where9.test @@ -472,7 +472,7 @@ ifcapable explain { do_execsql_test where9-5.3 { EXPLAIN QUERY PLAN SELECT a FROM t1 WHERE b>1000 AND (c>=31031 OR d IS NULL) } { - 0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>?) (~165000 rows)} + 0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>?) (~125000 rows)} } } From 40ad34c608b42ce0adc5f35f07e07a2b6f5ef4b3 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 28 Jan 2011 03:13:58 +0000 Subject: [PATCH 11/11] Reactivate the analyze5.test script. FossilOrigin-Name: a2a9f6401c927f6259cda3ba35219cabef24e84d --- manifest | 18 ++--- manifest.uuid | 2 +- test/analyze5.test | 159 ++------------------------------------------- 3 files changed, 15 insertions(+), 164 deletions(-) diff --git a/manifest b/manifest index d4624a6e59..30a2df5c4d 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Change\sthe\sweighting\sof\sbinary\ssearches\son\stables\sto\s1/10th\sthe\scost\sof\sa\nsearch\son\san\sindex.\s\sChange\sthe\sassumed\sreduction\sin\ssearch\sspace\sfrom\sa\nindexed\srange\sconstraint\sfrom\s1/3rd\sto\s1/4th.\s\sDo\snot\slet\sthe\sestimated\s\nnumber\sof\srows\sdrop\sbelow\s1. -D 2011-01-28T01:57:41.767 +C Reactivate\sthe\sanalyze5.test\sscript. +D 2011-01-28T03:13:58.522 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -256,7 +256,7 @@ F test/analyze.test c1eb87067fc16ece7c07e823d6395fd831b270c5 F test/analyze2.test 8f2b1534d43f5547ce9a6b736c021d4192c75be3 F test/analyze3.test d61f55d8b472fc6e713160b1e577f7a68e63f38b F test/analyze4.test 757b37875cf9bb528d46f74497bc789c88365045 -F test/analyze5.test 12df6def85e21971888f8be3c1867a505f1bf587 +F test/analyze5.test 18659612dd854330b9f2a0bf4c90658f3739fd67 F test/async.test ad4ba51b77cd118911a3fe1356b0809da9c108c3 F test/async2.test bf5e2ca2c96763b4cba3d016249ad7259a5603b6 F test/async3.test 93edaa9122f498e56ea98c36c72abc407f4fb11e @@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 31fcc7067bd76da4bf19232811b90cf8b76eed74 -R f9a9d7b39b29b332b353f1913f9309e4 +P 4847c6cb71423248b186ab7842b97c83e2f5fefd +R 2a7cda6de985ac7d8bc394704ea20bbe U drh -Z 1ad30d06ebdb0226c333d2dcf85b95e8 +Z 90a99f0eefff820df6066cb462690f7d -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFNQiKZoxKgR168RlERAjJeAJ9DU+63Qt3QOlsjnHQ7MP5GX/m8egCfe8c1 -SQn1lZ3E7QkvjZ3JOaQppDw= -=0Mzf +iD8DBQFNQjR5oxKgR168RlERArUiAJ91iPioq77gB015ZhcABywWj5kEnACghSgp +HH7RJ7G2ecFXfbSqcbfkm08= +=pv9L -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 50652bbd99..61f326c2b7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4847c6cb71423248b186ab7842b97c83e2f5fefd \ No newline at end of file +a2a9f6401c927f6259cda3ba35219cabef24e84d \ No newline at end of file diff --git a/test/analyze5.test b/test/analyze5.test index 6366439f53..93b8b2e01f 100644 --- a/test/analyze5.test +++ b/test/analyze5.test @@ -14,8 +14,6 @@ # with many repeated values and only a few distinct values. # -return - set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -104,8 +102,8 @@ foreach {testid where index rows} { 18 {z>=-100 AND z<0} t1z 50 19 {z>=-100 AND z<=1} t1z 700 20 {z>=-100 AND z<2} t1z 700 - 21 {z>=-100 AND z<=2} t1z 900 - 22 {z>=-100 AND z<3} t1z 900 + 21 {z>=-100 AND z<=2} {} 111 + 22 {z>=-100 AND z<3} {} 111 31 {z>=0.0 AND z<=0.0} t1z 400 32 {z>=1.0 AND z<=1.0} t1z 300 @@ -127,8 +125,8 @@ foreach {testid where index rows} { 48 {z>=-100 AND z<0.0} t1z 50 49 {z>=-100 AND z<=1.0} t1z 700 50 {z>=-100 AND z<2.0} t1z 700 - 51 {z>=-100 AND z<=2.0} t1z 900 - 52 {z>=-100 AND z<3.0} t1z 900 + 51 {z>=-100 AND z<=2.0} {} 111 + 52 {z>=-100 AND z<3.0} {} 111 101 {z=-1} t1z 50 102 {z=0} t1z 400 @@ -153,7 +151,7 @@ foreach {testid where index rows} { 206 {z IN (4)} t1z 50 207 {z IN (0.5)} t1z 50 208 {z IN (0,1)} t1z 700 - 209 {z IN (0,1,2)} t1z 900 + 209 {z IN (0,1,2)} {} 100 210 {z IN (0,1,2,3)} {} 100 211 {z IN (0,1,2,3,4,5)} {} 100 212 {z IN (1,2)} t1z 500 @@ -191,153 +189,6 @@ foreach {testid where index rows} { set res } {ok} } -exit - -# Change the table values from integer to floating point and then -# repeat the same sequence of tests. We should get the same results. -# -do_test analyze5-2.0 { - db eval { - UPDATE t1 SET z=z+0.0; - ANALYZE; - SELECT sample FROM sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno; - } -} {0.0 0.0 0.0 0.0 1.0 1.0 1.0 2.0 2.0 3.0} -foreach {testid where rows} { - 1 {z>=0 AND z<=0} 400 - 2 {z>=1 AND z<=1} 300 - 3 {z>=2 AND z<=2} 200 - 4 {z>=3 AND z<=3} 100 - 5 {z>=4 AND z<=4} 50 - 6 {z>=-1 AND z<=-1} 50 - 7 {z>1 AND z<3} 200 - 8 {z>0 AND z<100} 600 - 9 {z>=1 AND z<100} 600 - 10 {z>1 AND z<100} 300 - 11 {z>=2 AND z<100} 300 - 12 {z>2 AND z<100} 100 - 13 {z>=3 AND z<100} 100 - 14 {z>3 AND z<100} 50 - 15 {z>=4 AND z<100} 50 - 16 {z>=-100 AND z<=-1} 50 - 17 {z>=-100 AND z<=0} 400 - 18 {z>=-100 AND z<0} 50 - 19 {z>=-100 AND z<=1} 700 - 20 {z>=-100 AND z<2} 700 - 21 {z>=-100 AND z<=2} 900 - 22 {z>=-100 AND z<3} 900 - - 31 {z>=0.0 AND z<=0.0} 400 - 32 {z>=1.0 AND z<=1.0} 300 - 33 {z>=2.0 AND z<=2.0} 200 - 34 {z>=3.0 AND z<=3.0} 100 - 35 {z>=4.0 AND z<=4.0} 50 - 36 {z>=-1.0 AND z<=-1.0} 50 - 37 {z>1.5 AND z<3.0} 200 - 38 {z>0.5 AND z<100} 600 - 39 {z>=1.0 AND z<100} 600 - 40 {z>1.5 AND z<100} 300 - 41 {z>=2.0 AND z<100} 300 - 42 {z>2.1 AND z<100} 100 - 43 {z>=3.0 AND z<100} 100 - 44 {z>3.2 AND z<100} 50 - 45 {z>=4.0 AND z<100} 50 - 46 {z>=-100 AND z<=-1.0} 50 - 47 {z>=-100 AND z<=0.0} 400 - 48 {z>=-100 AND z<0.0} 50 - 49 {z>=-100 AND z<=1.0} 700 - 50 {z>=-100 AND z<2.0} 700 - 51 {z>=-100 AND z<=2.0} 900 - 52 {z>=-100 AND z<3.0} 900 -} { - do_test analyze5-2.$testid { - eqp "SELECT * FROM t1 WHERE $where" - } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z='alpha' AND y<='alpha'} 400 - 2 {y>='bravo' AND y<='bravo'} 300 - 3 {y>='charlie' AND y<='charlie'} 200 - 4 {y>='delta' AND y<='delta'} 100 - 5 {y>='echo' AND y<='echo'} 50 - 6 {y>='' AND y<=''} 50 - 7 {y>'bravo' AND y<'delta'} 200 - 8 {y>'alpha' AND y<'zzz'} 600 - 9 {y>='bravo' AND y<'zzz'} 600 - 10 {y>'bravo' AND y<'zzz'} 300 - 11 {y>='charlie' AND y<'zzz'} 300 - 12 {y>'charlie' AND y<'zzz'} 100 - 13 {y>='delta' AND y<'zzz'} 100 - 14 {y>'delta' AND y<'zzz'} 50 - 15 {y>='echo' AND y<'zzz'} 50 - 16 {y>=0 AND y<=''} 50 - 17 {y>=0 AND y<='alpha'} 400 - 18 {y>=0 AND y<'alpha'} 50 - 19 {y>=0 AND y<='bravo'} 700 - 20 {y>=0 AND y<'charlie'} 700 - 21 {y>=0 AND y<='charlie'} 900 - 22 {y>=0 AND y<'delta'} 900 - 23 {y>'alpha' AND y='bravo' AND y'bravo' AND y='charlie' AND y'charlie' AND y='delta' AND y'delta' AND y='echo' AND y? AND y