Modify the way the costs of various query plans are estimated. If the user supplies a likelihood() value (or equivalent) on an indexed WHERE constraint, use it to estimate the number of index rows visited.

FossilOrigin-Name: 90e36676476e8db00658772e6c938242f766d306
2025-11-03 16:53:36 +03:00 · 2014-04-30 15:22:25 +00:00
parent 65e6b0dd12 8164722c58
commit 4243e27f22
22 changed files with 653 additions and 240 deletions
--- a/52
+++ b/52
@@ -1,5 +1,5 @@
-C Add\sthe\ssqlite3_rtree_query_callback()\sAPI\sto\sthe\sRTree\svirtual\stable.\n(Cherrypick\sfrom\sthe\ssessions\sbranch.)
-D 2014-04-28T17:56:19.891
+C Modify\sthe\sway\sthe\scosts\sof\svarious\squery\splans\sare\sestimated.\sIf\sthe\suser\ssupplies\sa\slikelihood()\svalue\s(or\sequivalent)\son\san\sindexed\sWHERE\sconstraint,\suse\sit\sto\sestimate\sthe\snumber\sof\sindex\srows\svisited.
+D 2014-04-30T15:22:25.359
 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
 F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81
 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -159,7 +159,7 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b
 F sqlite3.1 3d8b83c91651f53472ca17599dae3457b8b89494
 F sqlite3.pc.in 48fed132e7cb71ab676105d2a4dc77127d8c1f3a
 F src/alter.c b00900877f766f116f9e16116f1ccacdc21d82f1
-F src/analyze.c 663e0b291d27eb03c9fd6b421e2d61ba348a2389
+F src/analyze.c 92f1495304dd33b4f9e0b0e5aa030b068ada504d
 F src/attach.c 3801129015ef59d76bf23c95ef9b0069d18a0c52
 F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34
 F src/backup.c a729e63cf5cd1829507cb7b8e89f99b95141bb53
@@ -168,7 +168,7 @@ F src/btmutex.c 976f45a12e37293e32cae0281b15a21d48a8aaa7
 F src/btree.c 6c9b51abd404ce5b78b173b6f2248e8cb824758c
 F src/btree.h d79306df4ed9181b48916737fe8871a4392c4594
 F src/btreeInt.h cf180d86b2e9e418f638d65baa425c4c69c0e0e3
-F src/build.c 5bfeea8f302ec2926c9eea321a61daea92a29fa4
+F src/build.c 02665ca158431a0926b10cbd7d8178a4c9fc4a22
 F src/callback.c 174e3c8656bc29f91d710ab61550d16eea34be98
 F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac
 F src/ctime.c 0231df905e2c4abba4483ee18ffc05adc321df2a
@@ -212,18 +212,18 @@ F src/parse.y 22d6a074e5f5a7258947a1dc55a9bf946b765dd0
 F src/pcache.c d8eafac28290d4bb80332005435db44991d07fc2
 F src/pcache.h a5e4f5d9f5d592051d91212c5949517971ae6222
 F src/pcache1.c 102e6f5a2fbc646154463eb856d1fd716867b64c
-F src/pragma.c 21ece94d4f3e76e8e150deecafb9c7abd398ec67
+F src/pragma.c 810ef31ccfaa233201dcf100637a9777cc24e897
 F src/prepare.c 677521ab7132615a8a26107a1d1c3132f44ae337
 F src/printf.c e5a0005f8b3de21f85da6a709d2fbee76775bf4b
 F src/random.c d10c1f85b6709ca97278428fd5db5bbb9c74eece
 F src/resolve.c 273d5f47c4e2c05b2d3d2bffeda939551ab59e66
 F src/rowset.c a9c9aae3234b44a6d7c6f5a3cadf90dce1e627be
-F src/select.c bc7feff0fb4c4a1b9d655b717bef166846b48e33
+F src/select.c ed459f7f478a1e533d19c4b953693b3ffa2efd15
 F src/shell.c 2afe7a7154e97be0c74c5feacf09626bda8493be
 F src/sqlite.h.in bde98816e1ba0c9ffef50afe7b32f4e5a8f54fe0
 F src/sqlite3.rc 11094cc6a157a028b301a9f06b3d03089ea37c3e
 F src/sqlite3ext.h 886f5a34de171002ad46fae8c36a7d8051c190fc
-F src/sqliteInt.h 03e2f60ccb0745fa2d3a072cb4f75fa29251d2ee
+F src/sqliteInt.h b2947801eccefd7ba3e5f14e1353289351a83cf3
 F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d
 F src/status.c 7ac05a5c7017d0b9f0b4bcd701228b784f987158
 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e
@@ -277,7 +277,7 @@ F src/tokenize.c 6da2de6e12218ccb0aea5184b56727d011f4bee7
 F src/trigger.c 66f3470b03b52b395e839155786966e3e037fddb
 F src/update.c 5b3e74a03b3811e586b4f2b4cbd7c49f01c93115
 F src/utf.c 6dc9ec9f1b3db43ae8ba0365377f11df1ee4c01c
-F src/util.c c46c90459ef9bdc0c6c73803cf4c55425b4771cf
+F src/util.c 2b5fb283a190aacdb286f7835a447c45b345b83c
 F src/vacuum.c 3728d74919d4fb1356f9e9a13e27773db60b7179
 F src/vdbe.c 699693bea6710ed436392c928b02cb4e91944137
 F src/vdbe.h 394464909ed682334aa3d5831aae0c2fe2abef94
@@ -292,8 +292,8 @@ F src/vtab.c 21b932841e51ebd7d075e2d0ad1415dce8d2d5fd
 F src/wal.c 76e7fc6de229bea8b30bb2539110f03a494dc3a8
 F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
 F src/walker.c 11edb74d587bc87b33ca96a5173e3ec1b8389e45
-F src/where.c 6ae02f1e8b1b29744d9e8cd9b95eac4c5232736d
-F src/whereInt.h 929c1349b5355fd44f22cee5c14d72b3329c58a6
+F src/where.c 4aeb1caa0a16c76e0c0566af4c64ba003836a0aa
+F src/whereInt.h 6804c2e5010378568c2bb1350477537755296a46
 F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
 F test/aggnested.test 45c0201e28045ad38a530b5a144b73cd4aa2cfd6
@@ -306,13 +306,13 @@ F test/alter4.test d6c011fa0d6227abba762498cafbb607c9609e93
 F test/altermalloc.test e81ac9657ed25c6c5bb09bebfa5a047cd8e4acfc
 F test/amatch1.test b5ae7065f042b7f4c1c922933f4700add50cdb9f
 F test/analyze.test 1772936d66471c65221e437b6d1999c3a03166c4
-F test/analyze3.test 412f690dfe95b337475e3e78a84a85d25f6f125d
+F test/analyze3.test bf41f0f680dd1e0d44eed5e769531e93a5320275
 F test/analyze4.test eff2df19b8dd84529966420f29ea52edc6b56213
 F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4
 F test/analyze6.test d31defa011a561b938b4608d3538c1b4e0b5e92c
 F test/analyze7.test bb1409afc9e8629e414387ef048b8e0e3e0bdc4f
 F test/analyze8.test 093d15c1c888eed5034304a98c992f7360130b88
-F test/analyze9.test e072a5172d55afcba98d6ca6a219ce8878c2f5c9
+F test/analyze9.test 623e02a99a78fa12fe5def2fd559032d5d887e0f
 F test/analyzeA.test 1a5c40079894847976d983ca39c707aaa44b6944
 F test/analyzeB.test 8bf35ee0a548aea831bf56762cb8e7fdb1db083d
 F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b
@@ -330,7 +330,7 @@ F test/auth.test 5bdf154eb28c0e4bbc0473f335858c0d96171768
 F test/auth2.test c3b415b76c033bedb81292118fb7c01f5f10cbcd
 F test/auth3.test a4755e6a2a2fea547ffe63c874eb569e60a28eb5
 F test/autoinc.test c58912526998a39e11f66b533e23cfabea7f25b7
-F test/autoindex1.test d4dfe14001dfcb74cfbd7107f45a79fc1ab6183e
+F test/autoindex1.test 762ff3f8e25d852aae55c6462ca166a80c0cde61
 F test/autovacuum.test 941892505d2c0f410a0cb5970dfa1c7c4e5f6e74
 F test/autovacuum_ioerr2.test 8a367b224183ad801e0e24dcb7d1501f45f244b4
 F test/avtrans.test 0252654f4295ddda3b2cce0e894812259e655a85
@@ -407,6 +407,7 @@ F test/corruptF.test be9fde98e4c93648f1ba52b74e5318edc8f59fe4
 F test/corruptG.test 1ab3bf97ee7bdba70e0ff3ba2320657df55d1804
 F test/corruptH.test 88ed71a086e13591c917aac6de32750e7c7281cb
 F test/corruptI.test b3e4203d420490fc3d3062711597bc1dea06a789
+F test/cost.test 3f7904d623ef8dc6e55f2206db5ce0549077b438
 F test/count.test 42a251178e32f617eda33f76236a7f79825a50b5
 F test/coveridxscan.test cdb47d01acc4a634a34fd25abe85189e0d0f1e62
 F test/crash.test fb9dc4a02dcba30d4aa5c2c226f98b220b2b959f
@@ -453,7 +454,7 @@ F test/enc.test e54531cd6bf941ee6760be041dff19a104c7acea
 F test/enc2.test 83437a79ba1545a55fb549309175c683fb334473
 F test/enc3.test 90683ad0e6ea587b9d5542ca93568af9a9858c40
 F test/enc4.test c8f1ce3618508fd0909945beb8b8831feef2c020
-F test/eqp.test 57c6c604c2807fb5531731c5323133453c24afac
+F test/eqp.test 90b56d03a93a2e7bb90f88be6083a8ea53f11a0e
 F test/errmsg.test f31592a594b44ee121371d25ddd5d63497bb3401
 F test/eval.test bc269c365ba877554948441e91ad5373f9f91be3
 F test/exclusive.test c7ebbc756eacf544c108b15eed64d7d4e5f86b75
@@ -611,7 +612,7 @@ F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6
 F test/index3.test 55a90cff99834305e8141df7afaef39674b57062
 F test/index4.test ab92e736d5946840236cd61ac3191f91a7856bf6
 F test/index5.test fc07c14193c0430814e7a08b5da46888ee795c33
-F test/index6.test a0a2d286ffa6d35813f5003fdb7be124825b4422
+F test/index6.test fb370966ac3cd0989053dd5385757b5c3e24ab6a
 F test/index7.test a3baf9a625bda7fd49471e99aeae04095fbfeecf
 F test/indexedby.test b2f22f3e693a53813aa3f50b812eb609ba6df1ec
 F test/indexfault.test 31d4ab9a7d2f6e9616933eb079722362a883eb1d
@@ -723,7 +724,7 @@ F test/orderby1.test 9b524aff9147288da43a6d7ddfdcff47fa2303c6
 F test/orderby2.test bc11009f7cd99d96b1b11e57b199b00633eb5b04
 F test/orderby3.test 8619d06a3debdcd80a27c0fdea5c40b468854b99
 F test/orderby4.test 4d39bfbaaa3ae64d026ca2ff166353d2edca4ba4
-F test/orderby5.test 2490183fef54417209d1df253633a605d46bd350
+F test/orderby5.test 8f08a54836d21fb7c70245360751aedd1c2286fb
 F test/orderby6.test 8b38138ab0972588240b3fca0985d2e400432859
 F test/orderby7.test 3d1383d52ade5b9eb3a173b3147fdd296f0202da
 F test/oserror.test 50417780d0e0d7cd23cf12a8277bb44024765df3
@@ -816,7 +817,7 @@ F test/show_speedtest1_rtree.tcl 32e6c5f073d7426148a6936a0408f4b5b169aba5
 F test/shrink.test 8c70f62b6e8eb4d54533de6d65bd06b1b9a17868
 F test/sidedelete.test f0ad71abe6233e3b153100f3b8d679b19a488329
 F test/skipscan1.test bed8cbe9d554c8c27afb6c88500f704c86a9196f
-F test/skipscan2.test 5a4db0799c338ddbacb154aaa5589c0254b36a8d
+F test/skipscan2.test d77f79cdbba25f0f6f35298136cff21a7d7a553a
 F test/soak.test 0b5b6375c9f4110c828070b826b3b4b0bb65cd5f
 F test/softheap1.test 40562fe6cac6d9827b7b42b86d45aedf12c15e24
 F test/sort.test 0e4456e729e5a92a625907c63dcdedfbe72c5dc5
@@ -1026,7 +1027,7 @@ F test/types2.test 3555aacf8ed8dc883356e59efc314707e6247a84
 F test/types3.test 99e009491a54f4dc02c06bdbc0c5eea56ae3e25a
 F test/unique.test 93f8b2ef5ea51b9495f8d6493429b1fd0f465264
 F test/unixexcl.test cd6c765f75e50e8e2c2ba763149e5d340ea19825
-F test/unordered.test ef85ac8f2f3c93ed2b9e811b684de73175fc464c
+F test/unordered.test ca7adce0419e4ca0c50f039885e76ed2c531eda8
 F test/update.test 1b6c488a8f993d090b7ee9ad0e234faa161b3aeb
 F test/uri.test 23662b7b61958b0f0e47082de7d06341ccf85d5b
 F test/utf16align.test 54cd35a27c005a9b6e7815d887718780b6a462ae
@@ -1083,7 +1084,7 @@ F test/walslow.test e7be6d9888f83aa5d3d3c7c08aa9b5c28b93609a
 F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e
 F test/where.test 28b64e93428961b07b0d486778d63fd672948f6b
 F test/where2.test 455a2eb2666e66c1e84e2cb5815173a85e6237db
-F test/where3.test d28c51f257e60be30f74308fa385ceeddfb54a6e
+F test/where3.test 1ad55ba900bd7747f98b6082e65bd3e442c5004e
 F test/where4.test d8420ceeb8323a41ceff1f1841fc528e824e1ecf
 F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2
 F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b
@@ -1097,7 +1098,7 @@ F test/whereC.test d6f4ecd4fa2d9429681a5b22a25d2bda8e86ab8a
 F test/whereD.test fd9120e262f9da3c45940f52aefeef4d15b904e5
 F test/whereE.test b3a055eef928c992b0a33198a7b8dc10eea5ad2f
 F test/whereF.test 5b2ba0dbe8074aa13e416b37c753991f0a2492d7
-F test/whereG.test 2533b72ed4a31fd1687230a499b557b911525344
+F test/whereG.test 0ac23e5e8311b69d87245f4a85112de321031658
 F test/whereH.test e4b07f7a3c2f5d31195cd33710054c78667573b2
 F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31
 F test/wild001.test bca33f499866f04c24510d74baf1e578d4e44b1c
@@ -1127,7 +1128,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5
 F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce
 F tool/lemon.c 07aba6270d5a5016ba8107b09e431eea4ecdc123
 F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc
-F tool/logest.c 388c318c7ac8b52b7c08ca1e2de0f4ca9a8f7e81
+F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6
 F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383
 F tool/mkkeywordhash.c c9e05e4a7bcab8fab9f583d5b321fb72f565ad97
 F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e
@@ -1165,8 +1166,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
 F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
-P f5a263658187250044afc1a74000e6f6962733ca
-Q +3dca2809352c6c6d56db74447a814f77011c6220
-R 70a04a84bf76743284b12f147604df6e
-U drh
-Z 8441eee0dd2b010346629b18a46aad71
+P af2cbe64adab5f9e3b0f3da00d06428088589d7f 05e6e16cb28c9ffb4596bd2ef81f687c5403ecbb
+R 20e49e26218874ddfe5fb9438df8f580
+U dan
+Z 75da520dd73c7e9f7e5ee96ec52c5c59
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-af2cbe64adab5f9e3b0f3da00d06428088589d7f
+90e36676476e8db00658772e6c938242f766d306
--- a/src/analyze.c
+++ b/src/analyze.c
@@ -1371,6 +1371,7 @@ static void decodeIntArray(
  char *zIntArray,       /* String containing int array to decode */
  int nOut,              /* Number of slots in aOut[] */
  tRowcnt *aOut,         /* Store integers here */
+  LogEst *aLog,          /* Or, if aOut==0, here */
  Index *pIndex          /* Handle extra flags for this index, if not NULL */
 ){
  char *z = zIntArray;
@@ -1389,7 +1390,11 @@ static void decodeIntArray(
      v = v*10 + c - '0';
      z++;
    }
-    aOut[i] = v;
+    if( aOut ){
+      aOut[i] = v;
+    }else{
+      aLog[i] = sqlite3LogEst(v);
+    }
    if( *z==' ' ) z++;
  }
 #ifndef SQLITE_ENABLE_STAT3_OR_STAT4
@@ -1445,12 +1450,12 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
  z = argv[2];

  if( pIndex ){
-    decodeIntArray((char*)z, pIndex->nKeyCol+1, pIndex->aiRowEst, pIndex);
-    if( pIndex->pPartIdxWhere==0 ) pTable->nRowEst = pIndex->aiRowEst[0];
+    decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
+    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
-    decodeIntArray((char*)z, 1, &pTable->nRowEst, &fakeIdx);
+    decodeIntArray((char*)z, 1, 0, &pTable->nRowLogEst, &fakeIdx);
    pTable->szTabRow = fakeIdx.szIdxRow;
  }

@@ -1642,9 +1647,9 @@ static int loadStatTbl(
      pPrevIdx = pIdx;
    }
    pSample = &pIdx->aSample[pIdx->nSample];
-    decodeIntArray((char*)sqlite3_column_text(pStmt,1), nCol, pSample->anEq, 0);
-    decodeIntArray((char*)sqlite3_column_text(pStmt,2), nCol, pSample->anLt, 0);
-    decodeIntArray((char*)sqlite3_column_text(pStmt,3), nCol, pSample->anDLt,0);
+    decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0);
+    decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0);
+    decodeIntArray((char*)sqlite3_column_text(pStmt,3),nCol,pSample->anDLt,0,0);

    /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer.
    ** This is in case the sample record is corrupted. In that case, the
--- a/src/build.c
+++ b/src/build.c
@@ -905,7 +905,7 @@ void sqlite3StartTable(
  pTable->iPKey = -1;
  pTable->pSchema = db->aDb[iDb].pSchema;
  pTable->nRef = 1;
-  pTable->nRowEst = 1048576;
+  pTable->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
  assert( pParse->pNewTable==0 );
  pParse->pNewTable = pTable;

@@ -2730,15 +2730,15 @@ Index *sqlite3AllocateIndexObject(

  nByte = ROUND8(sizeof(Index)) +              /* Index structure  */
          ROUND8(sizeof(char*)*nCol) +         /* Index.azColl     */
-          ROUND8(sizeof(tRowcnt)*(nCol+1) +    /* Index.aiRowEst   */
+          ROUND8(sizeof(LogEst)*(nCol+1) +     /* Index.aiRowLogEst   */
                 sizeof(i16)*nCol +            /* Index.aiColumn   */
                 sizeof(u8)*nCol);             /* Index.aSortOrder */
  p = sqlite3DbMallocZero(db, nByte + nExtra);
  if( p ){
    char *pExtra = ((char*)p)+ROUND8(sizeof(Index));
-    p->azColl = (char**)pExtra;      pExtra += ROUND8(sizeof(char*)*nCol);
-    p->aiRowEst = (tRowcnt*)pExtra;  pExtra += sizeof(tRowcnt)*(nCol+1);
-    p->aiColumn = (i16*)pExtra;      pExtra += sizeof(i16)*nCol;
+    p->azColl = (char**)pExtra;       pExtra += ROUND8(sizeof(char*)*nCol);
+    p->aiRowLogEst = (LogEst*)pExtra; pExtra += sizeof(LogEst)*(nCol+1);
+    p->aiColumn = (i16*)pExtra;       pExtra += sizeof(i16)*nCol;
    p->aSortOrder = (u8*)pExtra;
    p->nColumn = nCol;
    p->nKeyCol = nCol - 1;
@@ -2968,7 +2968,7 @@ Index *sqlite3CreateIndex(
  if( db->mallocFailed ){
    goto exit_create_index;
  }
-  assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowEst) );
+  assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowLogEst) );
  assert( EIGHT_BYTE_ALIGNMENT(pIndex->azColl) );
  pIndex->zName = zExtra;
  zExtra += nName + 1;
@@ -3249,7 +3249,7 @@ exit_create_index:
 ** Since we do not know, guess 1 million.  aiRowEst[1] is an estimate of the
 ** number of rows in the table that match any particular value of the
 ** first column of the index.  aiRowEst[2] is an estimate of the number
-** of rows that match any particular combiniation of the first 2 columns
+** of rows that match any particular combination of the first 2 columns
 ** of the index.  And so forth.  It must always be the case that
 *
 **           aiRowEst[N]<=aiRowEst[N-1]
@@ -3260,20 +3260,27 @@ exit_create_index:
 ** are based on typical values found in actual indices.
 */
 void sqlite3DefaultRowEst(Index *pIdx){
-  tRowcnt *a = pIdx->aiRowEst;
+  /*                10,  9,  8,  7,  6 */
+  LogEst aVal[] = { 33, 32, 30, 28, 26 };
+  LogEst *a = pIdx->aiRowLogEst;
+  int nCopy = MIN(ArraySize(aVal), pIdx->nKeyCol);
  int i;
-  tRowcnt n;
-  assert( a!=0 );
-  a[0] = pIdx->pTable->nRowEst;
-  if( a[0]<10 ) a[0] = 10;
-  n = 10;
-  for(i=1; i<=pIdx->nKeyCol; i++){
-    a[i] = n;
-    if( n>5 ) n--;
-  }
-  if( pIdx->onError!=OE_None ){
-    a[pIdx->nKeyCol] = 1;
+
+  /* Set the first entry (number of rows in the index) to the estimated 
+  ** number of rows in the table. Or 10, if the estimated number of rows 
+  ** in the table is less than that.  */
+  a[0] = pIdx->pTable->nRowLogEst;
+  if( a[0]<33 ) a[0] = 33;        assert( 33==sqlite3LogEst(10) );
+
+  /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is
+  ** 6 and each subsequent value (if any) is 5.  */
+  memcpy(&a[1], aVal, nCopy*sizeof(LogEst));
+  for(i=nCopy+1; i<=pIdx->nKeyCol; i++){
+    a[i] = 23;                    assert( 23==sqlite3LogEst(5) );
  }
+
+  assert( 0==sqlite3LogEst(1) );
+  if( pIdx->onError!=OE_None ) a[pIdx->nKeyCol] = 0;
 }

 /*
--- a/src/pragma.c
+++ b/src/pragma.c
@@ -1488,13 +1488,15 @@ void sqlite3Pragma(
      sqlite3VdbeAddOp2(v, OP_Null, 0, 2);
      sqlite3VdbeAddOp2(v, OP_Integer,
                           (int)sqlite3LogEstToInt(pTab->szTabRow), 3);
-      sqlite3VdbeAddOp2(v, OP_Integer, (int)pTab->nRowEst, 4);
+      sqlite3VdbeAddOp2(v, OP_Integer, 
+          (int)sqlite3LogEstToInt(pTab->nRowLogEst), 4);
      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
      for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
        sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0);
        sqlite3VdbeAddOp2(v, OP_Integer,
                             (int)sqlite3LogEstToInt(pIdx->szIdxRow), 3);
-        sqlite3VdbeAddOp2(v, OP_Integer, (int)pIdx->aiRowEst[0], 4);
+        sqlite3VdbeAddOp2(v, OP_Integer, 
+            (int)sqlite3LogEstToInt(pIdx->aiRowLogEst[0]), 4);
        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
      }
    }
--- a/src/select.c
+++ b/src/select.c
@@ -1690,7 +1690,7 @@ Table *sqlite3ResultSetOfSelect(Parse *pParse, Select *pSelect){
  assert( db->lookaside.bEnabled==0 );
  pTab->nRef = 1;
  pTab->zName = 0;
-  pTab->nRowEst = 1048576;
+  pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
  selectColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol);
  selectAddColumnTypeAndCollation(pParse, pTab, pSelect);
  pTab->iPKey = -1;
@@ -3829,7 +3829,7 @@ static int withExpand(
    pTab->nRef = 1;
    pTab->zName = sqlite3DbStrDup(db, pCte->zName);
    pTab->iPKey = -1;
-    pTab->nRowEst = 1048576;
+    pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
    pTab->tabFlags |= TF_Ephemeral;
    pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0);
    if( db->mallocFailed ) return SQLITE_NOMEM;
@@ -4005,7 +4005,7 @@ static int selectExpander(Walker *pWalker, Select *p){
      while( pSel->pPrior ){ pSel = pSel->pPrior; }
      selectColumnsFromExprList(pParse, pSel->pEList, &pTab->nCol, &pTab->aCol);
      pTab->iPKey = -1;
-      pTab->nRowEst = 1048576;
+      pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
      pTab->tabFlags |= TF_Ephemeral;
 #endif
    }else{
@@ -4655,7 +4655,7 @@ int sqlite3Select(
      sqlite3SelectDestInit(&dest, SRT_Coroutine, pItem->regReturn);
      explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
      sqlite3Select(pParse, pSub, &dest);
-      pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow;
+      pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow);
      pItem->viaCoroutine = 1;
      pItem->regResult = dest.iSdst;
      sqlite3VdbeAddOp1(v, OP_EndCoroutine, pItem->regReturn);
@@ -4686,7 +4686,7 @@ int sqlite3Select(
      sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor);
      explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
      sqlite3Select(pParse, pSub, &dest);
-      pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow;
+      pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow);
      if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr);
      retAddr = sqlite3VdbeAddOp1(v, OP_Return, pItem->regReturn);
      VdbeComment((v, "end %s", pItem->pTab->zName));
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -525,10 +525,10 @@ typedef INT8_TYPE i8;              /* 1-byte signed integer */
 ** gives a possible range of values of approximately 1.0e986 to 1e-986.
 ** But the allowed values are "grainy".  Not every value is representable.
 ** For example, quantities 16 and 17 are both represented by a LogEst
-** of 40.  However, since LogEst quantatites are suppose to be estimates,
+** of 40.  However, since LogEst quantaties are suppose to be estimates,
 ** not exact values, this imprecision is not a problem.
 **
-** "LogEst" is short for "Logarithimic Estimate".
+** "LogEst" is short for "Logarithmic Estimate".
 **
 ** Examples:
 **      1 -> 0              20 -> 43          10000 -> 132
@@ -1471,7 +1471,7 @@ struct Table {
 #ifndef SQLITE_OMIT_CHECK
  ExprList *pCheck;    /* All CHECK constraints */
 #endif
-  tRowcnt nRowEst;     /* Estimated rows in table - from sqlite_stat1 table */
+  LogEst nRowLogEst;   /* Estimated rows in table - from sqlite_stat1 table */
  int tnum;            /* Root BTree node for this table (see note above) */
  i16 iPKey;           /* If not negative, use aCol[iPKey] as the primary key */
  i16 nCol;            /* Number of columns in this table */
@@ -1680,7 +1680,7 @@ struct UnpackedRecord {
 struct Index {
  char *zName;             /* Name of this index */
  i16 *aiColumn;           /* Which columns are used by this index.  1st is 0 */
-  tRowcnt *aiRowEst;       /* From ANALYZE: Est. rows selected by each column */
+  LogEst *aiRowLogEst;     /* From ANALYZE: Est. rows selected by each column */
  Table *pTable;           /* The SQL table being indexed */
  char *zColAff;           /* String defining the affinity of each column */
  Index *pNext;            /* The next index associated with the same table */
--- a/src/util.c
+++ b/src/util.c
@@ -1246,8 +1246,8 @@ LogEst sqlite3LogEstAdd(LogEst a, LogEst b){
 }

 /*
-** Convert an integer into a LogEst.  In other words, compute a
-** good approximatation for 10*log2(x).
+** Convert an integer into a LogEst.  In other words, compute an
+** approximation for 10*log2(x).
 */
 LogEst sqlite3LogEst(u64 x){
  static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 };
--- a/src/where.c
+++ b/src/where.c
@@ -227,7 +227,7 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){
  if( p && ExprHasProperty(p, EP_Unlikely) ){
    pTerm->truthProb = sqlite3LogEst(p->iTable) - 99;
  }else{
-    pTerm->truthProb = -1;
+    pTerm->truthProb = 1;
  }
  pTerm->pExpr = sqlite3ExprSkipCollate(p);
  pTerm->wtFlags = wtFlags;
@@ -1956,7 +1956,8 @@ static void whereKeyStats(
      iLower = 0;
      iUpper = aSample[0].anLt[iCol];
    }else{
-      iUpper = i>=pIdx->nSample ? pIdx->aiRowEst[0] : aSample[i].anLt[iCol];
+      i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
+      iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol];
      iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol];
    }
    aStat[1] = (pIdx->nKeyCol>iCol ? pIdx->aAvgEq[iCol] : 1);
@@ -1975,6 +1976,29 @@ static void whereKeyStats(
 }
 #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */

+/*
+** If it is not NULL, pTerm is a term that provides an upper or lower
+** bound on a range scan. Without considering pTerm, it is estimated 
+** that the scan will visit nNew rows. This function returns the number
+** estimated to be visited after taking pTerm into account.
+**
+** If the user explicitly specified a likelihood() value for this term,
+** then the return value is the likelihood multiplied by the number of
+** input rows. Otherwise, this function assumes that an "IS NOT NULL" term
+** has a likelihood of 0.50, and any other term a likelihood of 0.25.
+*/
+static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){
+  LogEst nRet = nNew;
+  if( pTerm ){
+    if( pTerm->truthProb<=0 ){
+      nRet += pTerm->truthProb;
+    }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){
+      nRet -= 20;        assert( 20==sqlite3LogEst(4) );
+    }
+  }
+  return nRet;
+}
+
 /*
 ** This function is used to estimate the number of rows that will be visited
 ** by scanning an index for a range of values. The range may have an upper
@@ -2067,7 +2091,7 @@ static int whereRangeScanEst(
    /* Determine iLower and iUpper using ($P) only. */
    if( nEq==0 ){
      iLower = 0;
-      iUpper = p->aiRowEst[0];
+      iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]);
    }else{
      /* Note: this call could be optimized away - since the same values must 
      ** have been requested when testing key $P in whereEqualScanEst().  */
@@ -2127,17 +2151,18 @@ static int whereRangeScanEst(
  UNUSED_PARAMETER(pBuilder);
 #endif
  assert( pLower || pUpper );
-  /* TUNING:  Each inequality constraint reduces the search space 4-fold.
-  ** A BETWEEN operator, therefore, reduces the search space 16-fold */
-  nNew = nOut;
-  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){
-    nNew -= 20;        assert( 20==sqlite3LogEst(4) );
-    nOut--;
-  }
-  if( pUpper ){
-    nNew -= 20;        assert( 20==sqlite3LogEst(4) );
-    nOut--;
-  }
+  assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 );
+  nNew = whereRangeAdjust(pLower, nOut);
+  nNew = whereRangeAdjust(pUpper, nNew);
+
+  /* TUNING: If there is both an upper and lower limit, assume the range is
+  ** reduced by an additional 75%. This means that, by default, an open-ended
+  ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the
+  ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to
+  ** match 1/64 of the index. */ 
+  if( pLower && pUpper ) nNew -= 20;
+
+  nOut -= (pLower!=0) + (pUpper!=0);
  if( nNew<10 ) nNew = 10;
  if( nNew<nOut ) nOut = nNew;
  pLoop->nOut = (LogEst)nOut;
@@ -2234,6 +2259,7 @@ static int whereInScanEst(
  tRowcnt *pnRow       /* Write the revised row estimate here */
 ){
  Index *p = pBuilder->pNew->u.btree.pIndex;
+  i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]);
  int nRecValid = pBuilder->nRecValid;
  int rc = SQLITE_OK;     /* Subfunction return code */
  tRowcnt nEst;           /* Number of rows for a single term */
@@ -2242,14 +2268,14 @@ static int whereInScanEst(

  assert( p->aSample!=0 );
  for(i=0; rc==SQLITE_OK && i<pList->nExpr; i++){
-    nEst = p->aiRowEst[0];
+    nEst = nRow0;
    rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst);
    nRowEst += nEst;
    pBuilder->nRecValid = nRecValid;
  }

  if( rc==SQLITE_OK ){
-    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
+    if( nRowEst > nRow0 ) nRowEst = nRow0;
    *pnRow = nRowEst;
    WHERETRACE(0x10,("IN row estimate: est=%g\n", nRowEst));
  }
@@ -3760,9 +3786,11 @@ static int whereLoopCheaperProperSubset(
 */
 static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){
  if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return;
+  if( (pTemplate->wsFlags & WHERE_SKIPSCAN)!=0 ) return;
  for(; p; p=p->pNextLoop){
    if( p->iTab!=pTemplate->iTab ) continue;
    if( (p->wsFlags & WHERE_INDEXED)==0 ) continue;
+    if( (p->wsFlags & WHERE_SKIPSCAN)!=0 ) continue;
    if( whereLoopCheaperProperSubset(p, pTemplate) ){
      /* Adjust pTemplate cost downward so that it is cheaper than its 
      ** subset p */
@@ -3987,13 +4015,20 @@ static void whereLoopOutputAdjust(WhereClause *pWC, WhereLoop *pLoop){
      if( pX==pTerm ) break;
      if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
    }
-    if( j<0 ) pLoop->nOut += pTerm->truthProb;
+    if( j<0 ){
+      pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1);
+    }
  }
 }

 /*
-** We have so far matched pBuilder->pNew->u.btree.nEq terms of the index pIndex.
-** Try to match one more.
+** We have so far matched pBuilder->pNew->u.btree.nEq terms of the 
+** index pIndex. Try to match one more.
+**
+** When this function is called, pBuilder->pNew->nOut contains the 
+** number of rows expected to be visited by filtering using the nEq 
+** terms only. If it is modified, this value is restored before this 
+** function returns.
 **
 ** If pProbe->tnum==0, that means pIndex is a fake index used for the
 ** INTEGER PRIMARY KEY.
@@ -4019,7 +4054,6 @@ static int whereLoopAddBtreeIndex(
  LogEst saved_nOut;              /* Original value of pNew->nOut */
  int iCol;                       /* Index of the column in the table */
  int rc = SQLITE_OK;             /* Return code */
-  LogEst nRowEst;                 /* Estimated index selectivity */
  LogEst rLogSize;                /* Logarithm of table size */
  WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */

@@ -4040,11 +4074,8 @@ static int whereLoopAddBtreeIndex(
  assert( pNew->u.btree.nEq<=pProbe->nKeyCol );
  if( pNew->u.btree.nEq < pProbe->nKeyCol ){
    iCol = pProbe->aiColumn[pNew->u.btree.nEq];
-    nRowEst = sqlite3LogEst(pProbe->aiRowEst[pNew->u.btree.nEq+1]);
-    if( nRowEst==0 && pProbe->onError==OE_None ) nRowEst = 1;
  }else{
    iCol = -1;
-    nRowEst = 0;
  }
  pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol,
                        opMask, pProbe);
@@ -4055,18 +4086,23 @@ static int whereLoopAddBtreeIndex(
  saved_prereq = pNew->prereq;
  saved_nOut = pNew->nOut;
  pNew->rSetup = 0;
-  rLogSize = estLog(sqlite3LogEst(pProbe->aiRowEst[0]));
+  rLogSize = estLog(pProbe->aiRowLogEst[0]);

  /* Consider using a skip-scan if there are no WHERE clause constraints
  ** available for the left-most terms of the index, and if the average
-  ** number of repeats in the left-most terms is at least 18.  The magic
-  ** number 18 was found by experimentation to be the payoff point where
-  ** skip-scan become faster than a full-scan.
-  */
+  ** number of repeats in the left-most terms is at least 18. 
+  **
+  ** The magic number 18 is selected on the basis that scanning 17 rows
+  ** is almost always quicker than an index seek (even though if the index
+  ** contains fewer than 2^17 rows we assume otherwise in other parts of
+  ** the code). And, even if it is not, it should not be too much slower. 
+  ** On the other hand, the extra seeks could end up being significantly
+  ** more expensive.  */
+  assert( 42==sqlite3LogEst(18) );
  if( pTerm==0
   && saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
-   && pProbe->aiRowEst[saved_nEq+1]>=18  /* TUNING: Minimum for skip-scan */
+   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
@@ -4074,34 +4110,40 @@ static int whereLoopAddBtreeIndex(
    pNew->u.btree.nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;
-    nIter = sqlite3LogEst(pProbe->aiRowEst[0]/pProbe->aiRowEst[saved_nEq+1]);
-    pNew->rRun = rLogSize + nIter;
-    pNew->nOut += nIter;
-    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter);
+    nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
+    pNew->nOut -= nIter;
+    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
    pNew->nOut = saved_nOut;
  }
  for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
+    u16 eOp = pTerm->eOperator;   /* Shorthand for pTerm->eOperator */
+    LogEst rCostIdx;
+    LogEst nOutUnadjusted;        /* nOut before IN() and WHERE adjustments */
    int nIn = 0;
 #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    int nRecValid = pBuilder->nRecValid;
 #endif
-    if( (pTerm->eOperator==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0)
+    if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0)
     && (iCol<0 || pSrc->pTab->aCol[iCol].notNull)
    ){
      continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */
    }
    if( pTerm->prereqRight & pNew->maskSelf ) continue;

-    assert( pNew->nOut==saved_nOut );
-
    pNew->wsFlags = saved_wsFlags;
    pNew->u.btree.nEq = saved_nEq;
    pNew->nLTerm = saved_nLTerm;
    if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
    pNew->aLTerm[pNew->nLTerm++] = pTerm;
    pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf;
-    pNew->rRun = rLogSize; /* Baseline cost is log2(N).  Adjustments below */
-    if( pTerm->eOperator & WO_IN ){
+
+    assert( nInMul==0
+        || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 
+        || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 
+        || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 
+    );
+
+    if( eOp & WO_IN ){
      Expr *pExpr = pTerm->pExpr;
      pNew->wsFlags |= WHERE_COLUMN_IN;
      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
@@ -4113,83 +4155,117 @@ static int whereLoopAddBtreeIndex(
      }
      assert( nIn>0 );  /* RHS always has 2 or more terms...  The parser
                        ** changes "x IN (?)" into "x=?". */
-      pNew->rRun += nIn;
-      pNew->u.btree.nEq++;
-      pNew->nOut = nRowEst + nInMul + nIn;
-    }else if( pTerm->eOperator & (WO_EQ) ){
-      assert(
-        (pNew->wsFlags & (WHERE_COLUMN_NULL|WHERE_COLUMN_IN|WHERE_SKIPSCAN))!=0
-        || nInMul==0
-      );
+
+    }else if( eOp & (WO_EQ) ){
      pNew->wsFlags |= WHERE_COLUMN_EQ;
-      if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1)){
-        assert( (pNew->wsFlags & WHERE_COLUMN_IN)==0 || iCol<0 );
+      if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){
        if( iCol>=0 && pProbe->onError==OE_None ){
          pNew->wsFlags |= WHERE_UNQ_WANTED;
        }else{
          pNew->wsFlags |= WHERE_ONEROW;
        }
      }
-      pNew->u.btree.nEq++;
-      pNew->nOut = nRowEst + nInMul;
-    }else if( pTerm->eOperator & (WO_ISNULL) ){
+    }else if( eOp & WO_ISNULL ){
      pNew->wsFlags |= WHERE_COLUMN_NULL;
-      pNew->u.btree.nEq++;
-      /* TUNING: IS NULL selects 2 rows */
-      nIn = 10;  assert( 10==sqlite3LogEst(2) );
-      pNew->nOut = nRowEst + nInMul + nIn;
-    }else if( pTerm->eOperator & (WO_GT|WO_GE) ){
-      testcase( pTerm->eOperator & WO_GT );
-      testcase( pTerm->eOperator & WO_GE );
+    }else if( eOp & (WO_GT|WO_GE) ){
+      testcase( eOp & WO_GT );
+      testcase( eOp & WO_GE );
      pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT;
      pBtm = pTerm;
      pTop = 0;
    }else{
-      assert( pTerm->eOperator & (WO_LT|WO_LE) );
-      testcase( pTerm->eOperator & WO_LT );
-      testcase( pTerm->eOperator & WO_LE );
+      assert( eOp & (WO_LT|WO_LE) );
+      testcase( eOp & WO_LT );
+      testcase( eOp & WO_LE );
      pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT;
      pTop = pTerm;
      pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ?
                     pNew->aLTerm[pNew->nLTerm-2] : 0;
    }
+
+    /* At this point pNew->nOut is set to the number of rows expected to
+    ** be visited by the index scan before considering term pTerm, or the
+    ** values of nIn and nInMul. In other words, assuming that all 
+    ** "x IN(...)" terms are replaced with "x = ?". This block updates
+    ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul).  */
+    assert( pNew->nOut==saved_nOut );
    if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
-      /* Adjust nOut and rRun for STAT3 range values */
-      assert( pNew->nOut==saved_nOut );
+      /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4
+      ** data, using some other estimate.  */
      whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew);
-    }
+    }else{
+      int nEq = ++pNew->u.btree.nEq;
+      assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) );
+
+      assert( pNew->nOut==saved_nOut );
+      if( pTerm->truthProb<=0 && iCol>=0 ){
+        assert( (eOp & WO_IN) || nIn==0 );
+        pNew->nOut += pTerm->truthProb;
+        pNew->nOut -= nIn;
+        pNew->wsFlags |= WHERE_LIKELIHOOD;
+      }else{
 #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
-    if( nInMul==0 
-     && pProbe->nSample 
-     && pNew->u.btree.nEq<=pProbe->nSampleCol
-     && OptimizationEnabled(db, SQLITE_Stat3) 
-    ){
-      Expr *pExpr = pTerm->pExpr;
-      tRowcnt nOut = 0;
-      if( (pTerm->eOperator & (WO_EQ|WO_ISNULL))!=0 ){
-        testcase( pTerm->eOperator & WO_EQ );
-        testcase( pTerm->eOperator & WO_ISNULL );
-        rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut);
-      }else if( (pTerm->eOperator & WO_IN)
-             &&  !ExprHasProperty(pExpr, EP_xIsSelect)  ){
-        rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut);
-      }
-      assert( nOut==0 || rc==SQLITE_OK );
-      if( nOut ){
-        pNew->nOut = sqlite3LogEst(nOut);
-        if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut;
-      }
-    }
+        tRowcnt nOut = 0;
+        if( nInMul==0 
+         && pProbe->nSample 
+         && pNew->u.btree.nEq<=pProbe->nSampleCol
+         && OptimizationEnabled(db, SQLITE_Stat3) 
+         && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect))
+         && (pNew->wsFlags & WHERE_LIKELIHOOD)==0
+        ){
+          Expr *pExpr = pTerm->pExpr;
+          if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){
+            testcase( eOp & WO_EQ );
+            testcase( eOp & WO_ISNULL );
+            rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut);
+          }else{
+            rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut);
+          }
+          assert( rc!=SQLITE_OK || nOut>0 );
+          if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
+          if( rc!=SQLITE_OK ) break;          /* Jump out of the pTerm loop */
+          if( nOut ){
+            pNew->nOut = sqlite3LogEst(nOut);
+            if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut;
+            pNew->nOut -= nIn;
+          }
+        }
+        if( nOut==0 )
 #endif
-    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
-      /* Each row involves a step of the index, then a binary search of
-      ** the main table */
-      pNew->rRun =  sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10);
+        {
+          pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]);
+          if( eOp & WO_ISNULL ){
+            /* TUNING: If there is no likelihood() value, assume that a 
+            ** "col IS NULL" expression matches twice as many rows 
+            ** as (col=?). */
+            pNew->nOut += 10;
+          }
+        }
+      }
    }
-    /* Step cost for each output row */
-    pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut);
+
+    /* Set rCostIdx to the cost of visiting selected rows in index. Add
+    ** it to pNew->rRun, which is currently set to the cost of the index
+    ** seek only. Then, if this is a non-covering index, add the cost of
+    ** visiting the rows in the main table.  */
+    rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
+    pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx);
+    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
+      pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
+    }
+
+    nOutUnadjusted = pNew->nOut;
+    pNew->rRun += nInMul + nIn;
+    pNew->nOut += nInMul + nIn;
    whereLoopOutputAdjust(pBuilder->pWC, pNew);
    rc = whereLoopInsert(pBuilder, pNew);
+
+    if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
+      pNew->nOut = saved_nOut;
+    }else{
+      pNew->nOut = nOutUnadjusted;
+    }
+
    if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
     && pNew->u.btree.nEq<(pProbe->nKeyCol + (pProbe->zName!=0))
    ){
@@ -4273,6 +4349,29 @@ static int whereUsablePartialIndex(int iTab, WhereClause *pWC, Expr *pWhere){
 ** Add all WhereLoop objects for a single table of the join where the table
 ** is idenfied by pBuilder->pNew->iTab.  That table is guaranteed to be
 ** a b-tree table, not a virtual table.
+**
+** The costs (WhereLoop.rRun) of the b-tree loops added by this function
+** are calculated as follows:
+**
+** For a full scan, assuming the table (or index) contains nRow rows:
+**
+**     cost = nRow * 3.0                    // full-table scan
+**     cost = nRow * K                      // scan of covering index
+**     cost = nRow * (K+3.0)                // scan of non-covering index
+**
+** where K is a value between 1.1 and 3.0 set based on the relative 
+** estimated average size of the index and table records.
+**
+** For an index scan, where nVisit is the number of index rows visited
+** by the scan, and nSeek is the number of seek operations required on 
+** the index b-tree:
+**
+**     cost = nSeek * (log(nRow) + K * nVisit)          // covering index
+**     cost = nSeek * (log(nRow) + (K+3.0) * nVisit)    // non-covering index
+**
+** Normally, nSeek is 1. nSeek values greater than 1 come about if the 
+** WHERE clause includes "x IN (....)" terms used in place of "x=?". Or when 
+** implicit "x IN (SELECT x FROM tbl)" terms are added for skip-scans.
 */
 static int whereLoopAddBtree(
  WhereLoopBuilder *pBuilder, /* WHERE clause information */
@@ -4281,7 +4380,7 @@ static int whereLoopAddBtree(
  WhereInfo *pWInfo;          /* WHERE analysis context */
  Index *pProbe;              /* An index we are evaluating */
  Index sPk;                  /* A fake index object for the primary key */
-  tRowcnt aiRowEstPk[2];      /* The aiRowEst[] value for the sPk index */
+  LogEst aiRowEstPk[2];       /* The aiRowLogEst[] value for the sPk index */
  i16 aiColumnPk = -1;        /* The aColumn[] value for the sPk index */
  SrcList *pTabList;          /* The FROM clause */
  struct SrcList_item *pSrc;  /* The FROM clause btree term to add */
@@ -4316,11 +4415,12 @@ static int whereLoopAddBtree(
    memset(&sPk, 0, sizeof(Index));
    sPk.nKeyCol = 1;
    sPk.aiColumn = &aiColumnPk;
-    sPk.aiRowEst = aiRowEstPk;
+    sPk.aiRowLogEst = aiRowEstPk;
    sPk.onError = OE_Replace;
    sPk.pTable = pTab;
-    aiRowEstPk[0] = pTab->nRowEst;
-    aiRowEstPk[1] = 1;
+    sPk.szIdxRow = pTab->szTabRow;
+    aiRowEstPk[0] = pTab->nRowLogEst;
+    aiRowEstPk[1] = 0;
    pFirst = pSrc->pTab->pIndex;
    if( pSrc->notIndexed==0 ){
      /* The real indices of the table are only considered if the
@@ -4329,7 +4429,7 @@ static int whereLoopAddBtree(
    }
    pProbe = &sPk;
  }
-  rSize = sqlite3LogEst(pTab->nRowEst);
+  rSize = pTab->nRowLogEst;
  rLogSize = estLog(rSize);

 #ifndef SQLITE_OMIT_AUTOMATIC_INDEX
@@ -4379,6 +4479,7 @@ static int whereLoopAddBtree(
     && !whereUsablePartialIndex(pNew->iTab, pWC, pProbe->pPartIdxWhere) ){
      continue;  /* Partial index inappropriate for this query */
    }
+    rSize = pProbe->aiRowLogEst[0];
    pNew->u.btree.nEq = 0;
    pNew->u.btree.nSkip = 0;
    pNew->nLTerm = 0;
@@ -4396,10 +4497,8 @@ static int whereLoopAddBtree(

      /* Full table scan */
      pNew->iSortIdx = b ? iSortIdx : 0;
-      /* TUNING: Cost of full table scan is 3*(N + log2(N)).
-      **  +  The extra 3 factor is to encourage the use of indexed lookups
-      **     over full scans.  FIXME */
-      pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16;
+      /* TUNING: Cost of full table scan is (N*3.0). */
+      pNew->rRun = rSize + 16;
      whereLoopOutputAdjust(pWC, pNew);
      rc = whereLoopInsert(pBuilder, pNew);
      pNew->nOut = rSize;
@@ -4426,35 +4525,16 @@ static int whereLoopAddBtree(
          )
      ){
        pNew->iSortIdx = b ? iSortIdx : 0;
-        /* TUNING:  The base cost of an index scan is N + log2(N).
-        ** The log2(N) is for the initial seek to the beginning and the N
-        ** is for the scan itself. */
-        pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize);
-        if( m==0 ){
-          /* TUNING: Cost of a covering index scan is K*(N + log2(N)).
-          **  +  The extra factor K of between 1.1 and 3.0 that depends
-          **     on the relative sizes of the table and the index.  K
-          **     is smaller for smaller indices, thus favoring them.
-          **     The upper bound on K (3.0) matches the penalty factor
-          **     on a full table scan that tries to encourage the use of
-          **     indexed lookups over full scans.
-          */
-          pNew->rRun +=  1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
-        }else{
-          /* TUNING: The cost of scanning a non-covering index is multiplied
-          ** by log2(N) to account for the binary search of the main table
-          ** that must happen for each row of the index.
-          ** TODO: Should there be a multiplier here, analogous to the 3x
-          ** multiplier for a fulltable scan or covering index scan, to
-          ** further discourage the use of an index scan?  Or is the log2(N)
-          ** term sufficient discouragement?
-          ** TODO: What if some or all of the WHERE clause terms can be
-          ** computed without reference to the original table.  Then the
-          ** penality should reduce to logK where K is the number of output
-          ** rows.
-          */
-          pNew->rRun += rLogSize;
+
+        /* The cost of visiting the index rows is N*K, where K is
+        ** between 1.1 and 3.0, depending on the relative sizes of the
+        ** index and table rows. If this is a non-covering index scan,
+        ** also add the cost of visiting table rows (N*3.0).  */
+        pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
+        if( m!=0 ){
+          pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
        }
+
        whereLoopOutputAdjust(pWC, pNew);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
@@ -4732,8 +4812,7 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){
      pNew->iSortIdx = 0;
      memset(&pNew->u, 0, sizeof(pNew->u));
      for(i=0; rc==SQLITE_OK && i<sSum.n; i++){
-        /* TUNING: Multiple by 3.5 for the secondary table lookup */
-        pNew->rRun = sSum.a[i].rRun + 18;
+        pNew->rRun = sSum.a[i].rRun;
        pNew->nOut = sSum.a[i].nOut;
        pNew->prereq = sSum.a[i].prereq;
        rc = whereLoopInsert(pBuilder, pNew);
@@ -5179,22 +5258,27 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
                       pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
                       iLoop, pWLoop, &revMask);
          if( isOrdered>=0 && isOrdered<nOrderBy ){
-            /* TUNING: Estimated cost of sorting is N*log(N).
-            ** If the order-by clause has X terms but only the last Y terms
-            ** are out of order, then block-sorting will reduce the sorting
-            ** cost to N*log(N)*log(Y/X).  The log(Y/X) term is computed
-            ** by rScale.
-            ** TODO: Should the sorting cost get a small multiplier to help
-            ** discourage the use of sorting and encourage the use of index
-            ** scans instead?
-            */
+            /* TUNING: Estimated cost of a full external sort, where N is 
+            ** the number of rows to sort is:
+            **
+            **   cost = (3.0 * N * log(N)).
+            ** 
+            ** Or, if the order-by clause has X terms but only the last Y 
+            ** terms are out of order, then block-sorting will reduce the 
+            ** sorting cost to:
+            **
+            **   cost = (3.0 * N * log(N)) * (Y/X)
+            **
+            ** The (Y/X) term is implemented using stack variable rScale
+            ** below.  */
            LogEst rScale, rSortCost;
-            assert( nOrderBy>0 );
+            assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
            rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66;
-            rSortCost = nRowEst + estLog(nRowEst) + rScale;
+            rSortCost = nRowEst + estLog(nRowEst) + rScale + 16;
+
            /* TUNING: The cost of implementing DISTINCT using a B-TREE is
-            ** also N*log(N) but it has a larger constant of proportionality.
-            ** Multiply by 3.0. */
+            ** similar but with a larger constant of proportionality. 
+            ** Multiply by an additional factor of 3.0.  */
            if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){
              rSortCost += 16;
            }
--- a/src/whereInt.h
+++ b/src/whereInt.h
@@ -458,3 +458,4 @@ struct WhereInfo {
 #define WHERE_AUTO_INDEX   0x00004000  /* Uses an ephemeral index */
 #define WHERE_SKIPSCAN     0x00008000  /* Uses the skip-scan algorithm */
 #define WHERE_UNQ_WANTED   0x00010000  /* WHERE_ONEROW would have been helpful*/
+#define WHERE_LIKELIHOOD   0x00020000  /* A likelihood() is affecting nOut */
--- a/test/analyze3.test
+++ b/test/analyze3.test
@@ -103,12 +103,21 @@ do_test analyze3-1.1.1 {
  }
 } {1}

+do_execsql_test analyze3-1.1.x {
+  SELECT count(*) FROM t1 WHERE x>200 AND x<300;
+  SELECT count(*) FROM t1 WHERE x>0 AND x<1100;
+} {99 1000}
+
+# The first of the following two SELECT statements visits 99 rows. So
+# it is better to use the index. But the second visits every row in 
+# the table (1000 in total) so it is better to do a full-table scan.
+#
 do_eqp_test analyze3-1.1.2 {
  SELECT sum(y) FROM t1 WHERE x>200 AND x<300
 } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x<?)}}
 do_eqp_test analyze3-1.1.3 {
  SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 
-} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x<?)}}
+} {0 0 0 {SCAN TABLE t1}}

 do_test analyze3-1.1.4 {
  sf_execsql { SELECT sum(y) FROM t1 WHERE x>200 AND x<300 }
@@ -125,17 +134,17 @@ do_test analyze3-1.1.6 {
 } {199 0 14850}
 do_test analyze3-1.1.7 {
  sf_execsql { SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 }
-} {2000 0 499500}
+} {999 999 499500}
 do_test analyze3-1.1.8 {
  set l [string range "0" 0 end]
  set u [string range "1100" 0 end]
  sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u }
-} {2000 0 499500}
+} {999 999 499500}
 do_test analyze3-1.1.9 {
  set l [expr int(0)]
  set u [expr int(1100)]
  sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u }
-} {2000 0 499500}
+} {999 999 499500}


 # The following tests are similar to the block above. The difference is
@@ -152,12 +161,17 @@ do_test analyze3-1.2.1 {
    ANALYZE;
  }
 } {}
+do_execsql_test analyze3-2.1.x {
+  SELECT count(*) FROM t2 WHERE x>1 AND x<2;
+  SELECT count(*) FROM t2 WHERE x>0 AND x<99;
+} {200 990}
 do_eqp_test analyze3-1.2.2 {
  SELECT sum(y) FROM t2 WHERE x>1 AND x<2
 } {0 0 0 {SEARCH TABLE t2 USING INDEX i2 (x>? AND x<?)}}
 do_eqp_test analyze3-1.2.3 {
  SELECT sum(y) FROM t2 WHERE x>0 AND x<99
-} {0 0 0 {SEARCH TABLE t2 USING INDEX i2 (x>? AND x<?)}}
+} {0 0 0 {SCAN TABLE t2}}
+
 do_test analyze3-1.2.4 {
  sf_execsql { SELECT sum(y) FROM t2 WHERE x>12 AND x<20 }
 } {161 0 4760}
@@ -173,17 +187,17 @@ do_test analyze3-1.2.6 {
 } {161 0 integer integer 4760}
 do_test analyze3-1.2.7 {
  sf_execsql { SELECT sum(y) FROM t2 WHERE x>0 AND x<99 }
-} {1981 0 490555}
+} {999 999 490555}
 do_test analyze3-1.2.8 {
  set l [string range "0" 0 end]
  set u [string range "99" 0 end]
  sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u}
-} {1981 0 text text 490555}
+} {999 999 text text 490555}
 do_test analyze3-1.2.9 {
  set l [expr int(0)]
  set u [expr int(99)]
  sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u}
-} {1981 0 integer integer 490555}
+} {999 999 integer integer 490555}

 # Same tests a third time. This time, column x has INTEGER affinity and
 # is not the leftmost column of the table. This triggered a bug causing
@@ -199,12 +213,16 @@ do_test analyze3-1.3.1 {
    ANALYZE;
  }
 } {}
+do_execsql_test analyze3-1.3.x {
+  SELECT count(*) FROM t3 WHERE x>200 AND x<300;
+  SELECT count(*) FROM t3 WHERE x>0 AND x<1100
+} {99 1000}
 do_eqp_test analyze3-1.3.2 {
  SELECT sum(y) FROM t3 WHERE x>200 AND x<300
 } {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x<?)}}
 do_eqp_test analyze3-1.3.3 {
  SELECT sum(y) FROM t3 WHERE x>0 AND x<1100
-} {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x<?)}}
+} {0 0 0 {SCAN TABLE t3}}

 do_test analyze3-1.3.4 {
  sf_execsql { SELECT sum(y) FROM t3 WHERE x>200 AND x<300 }
@@ -221,17 +239,17 @@ do_test analyze3-1.3.6 {
 } {199 0 14850}
 do_test analyze3-1.3.7 {
  sf_execsql { SELECT sum(y) FROM t3 WHERE x>0 AND x<1100 }
-} {2000 0 499500}
+} {999 999 499500}
 do_test analyze3-1.3.8 {
  set l [string range "0" 0 end]
  set u [string range "1100" 0 end]
  sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u }
-} {2000 0 499500}
+} {999 999 499500}
 do_test analyze3-1.3.9 {
  set l [expr int(0)]
  set u [expr int(1100)]
  sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u }
-} {2000 0 499500}
+} {999 999 499500}

 #-------------------------------------------------------------------------
 # Test that the values of bound SQL variables may be used for the LIKE
--- a/test/analyze9.test
+++ b/test/analyze9.test
@@ -566,7 +566,7 @@ foreach {tn schema} {
 drop_all_tables
 do_test 13.1 {
  execsql {
-    CREATE TABLE t1(a, b, c);
+    CREATE TABLE t1(a, b, c, d);
    CREATE INDEX i1 ON t1(a);
    CREATE INDEX i2 ON t1(b, c);
  }
@@ -577,16 +577,16 @@ do_test 13.1 {
  execsql ANALYZE
 } {}
 do_eqp_test 13.2.1 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12
 } {/SEARCH TABLE t1 USING INDEX i1/}
 do_eqp_test 13.2.2 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12
 } {/SEARCH TABLE t1 USING INDEX i1/}
 do_eqp_test 13.3.1 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12
 } {/SEARCH TABLE t1 USING INDEX i2/}
 do_eqp_test 13.3.2 {
-  SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<20
+  SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12
 } {/SEARCH TABLE t1 USING INDEX i2/}

 #-------------------------------------------------------------------------
--- a/test/autoindex1.test
+++ b/test/autoindex1.test
@@ -97,6 +97,8 @@ do_test autoindex1-210 {
    PRAGMA automatic_index=ON;
    ANALYZE;
    UPDATE sqlite_stat1 SET stat='10000' WHERE tbl='t1';
+    -- Table t2 actually contains 8 rows.
+    UPDATE sqlite_stat1 SET stat='16' WHERE tbl='t2';
    ANALYZE sqlite_master;
    SELECT b, (SELECT d FROM t2 WHERE c=a) FROM t1;
  }
--- a/test/cost.test
+++ b/test/cost.test
@@ -0,0 +1,246 @@
+# 2014-04-26
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+# 
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set testprefix cost
+
+
+do_execsql_test 1.1 {
+  CREATE TABLE t3(id INTEGER PRIMARY KEY, b NOT NULL);
+  CREATE TABLE t4(c, d, e);
+  CREATE UNIQUE INDEX i3 ON t3(b);
+  CREATE UNIQUE INDEX i4 ON t4(c, d);
+}
+do_eqp_test 1.2 {
+  SELECT e FROM t3, t4 WHERE b=c ORDER BY b, d;
+} {
+  0 0 0 {SCAN TABLE t3 USING COVERING INDEX i3} 
+  0 1 1 {SEARCH TABLE t4 USING INDEX i4 (c=?)}
+}
+
+
+do_execsql_test 2.1 {
+  CREATE TABLE t1(a, b);
+  CREATE INDEX i1 ON t1(a);
+}
+
+# It is better to use an index for ORDER BY than sort externally, even 
+# if the index is a non-covering index.
+do_eqp_test 2.2 {
+  SELECT * FROM t1 ORDER BY a;
+} {
+  0 0 0 {SCAN TABLE t1 USING INDEX i1}
+}
+
+do_execsql_test 3.1 {
+  CREATE TABLE t5(a INTEGER PRIMARY KEY,b,c,d,e,f,g);
+  CREATE INDEX t5b ON t5(b);
+  CREATE INDEX t5c ON t5(c);
+  CREATE INDEX t5d ON t5(d);
+  CREATE INDEX t5e ON t5(e);
+  CREATE INDEX t5f ON t5(f);
+  CREATE INDEX t5g ON t5(g);
+}
+
+do_eqp_test 3.2 {
+  SELECT a FROM t5 
+  WHERE b IS NULL OR c IS NULL OR d IS NULL 
+  ORDER BY a;
+} {
+  0 0 0 {SEARCH TABLE t5 USING INDEX t5b (b=?)} 
+  0 0 0 {SEARCH TABLE t5 USING INDEX t5c (c=?)} 
+  0 0 0 {SEARCH TABLE t5 USING INDEX t5d (d=?)} 
+  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
+}
+
+#-------------------------------------------------------------------------
+# If there is no likelihood() or stat3 data, SQLite assumes that a closed
+# range scan (e.g. one constrained by "col BETWEEN ? AND ?" constraint)
+# visits 1/64 of the rows in a table.
+#
+# Note: 1/63 =~ 0.016
+# Note: 1/65 =~ 0.015
+#
+reset_db
+do_execsql_test 4.1 {
+  CREATE TABLE t1(a, b);
+  CREATE INDEX i1 ON t1(a);
+  CREATE INDEX i2 ON t1(b);
+}
+do_eqp_test 4.2 {
+  SELECT * FROM t1 WHERE likelihood(a=?, 0.014) AND b BETWEEN ? AND ?;
+} {
+  0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}
+}
+do_eqp_test 4.3 {
+  SELECT * FROM t1 WHERE likelihood(a=?, 0.016) AND b BETWEEN ? AND ?;
+} {
+  0 0 0 {SEARCH TABLE t1 USING INDEX i2 (b>? AND b<?)}
+}
+
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 5.1 {
+  CREATE TABLE t2(x, y);
+  CREATE INDEX t2i1 ON t2(x);
+}
+
+do_eqp_test 5.2 {
+  SELECT * FROM t2 ORDER BY x, y;
+} {
+  0 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
+  0 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
+}
+
+do_eqp_test 5.3 {
+  SELECT * FROM t2 WHERE x BETWEEN ? AND ? ORDER BY rowid;
+} {
+  0 0 0 {SEARCH TABLE t2 USING INDEX t2i1 (x>? AND x<?)} 
+  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
+}
+
+# where7.test, where8.test:
+#
+do_execsql_test 6.1 {
+  CREATE TABLE t3(a INTEGER PRIMARY KEY, b, c);
+  CREATE INDEX t3i1 ON t3(b);
+  CREATE INDEX t3i2 ON t3(c);
+}
+
+do_eqp_test 6.2 {
+  SELECT a FROM t3 WHERE (b BETWEEN 2 AND 4) OR c=100 ORDER BY a
+} {
+  0 0 0 {SEARCH TABLE t3 USING INDEX t3i1 (b>? AND b<?)} 
+  0 0 0 {SEARCH TABLE t3 USING INDEX t3i2 (c=?)}
+  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
+}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 7.1 {
+  CREATE TABLE t1(a INTEGER PRIMARY KEY,b,c,d,e,f,g);
+  CREATE INDEX t1b ON t1(b);
+  CREATE INDEX t1c ON t1(c);
+  CREATE INDEX t1d ON t1(d);
+  CREATE INDEX t1e ON t1(e);
+  CREATE INDEX t1f ON t1(f);
+  CREATE INDEX t1g ON t1(g);
+}
+
+do_eqp_test 7.2 {
+  SELECT a FROM t1
+     WHERE (b>=950 AND b<=1010) OR (b IS NULL AND c NOT NULL)
+  ORDER BY a
+} {
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)} 
+  0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?)} 
+  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
+}
+
+#set sqlite_where_trace 0xfff
+do_eqp_test 7.3 {
+  SELECT rowid FROM t1
+  WHERE (+b IS NULL AND c NOT NULL AND d NOT NULL)
+        OR (b NOT NULL AND c IS NULL AND d NOT NULL)
+        OR (b NOT NULL AND c NOT NULL AND d IS NULL)
+} {
+  0 0 0 {SCAN TABLE t1}
+}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+do_execsql_test 8.1 {
+  CREATE TABLE composer(
+    cid INTEGER PRIMARY KEY,
+    cname TEXT
+  );
+  CREATE TABLE album(
+    aid INTEGER PRIMARY KEY,
+    aname TEXT
+  );
+  CREATE TABLE track(
+    tid INTEGER PRIMARY KEY,
+    cid INTEGER REFERENCES composer,
+    aid INTEGER REFERENCES album,
+    title TEXT
+  );
+  CREATE INDEX track_i1 ON track(cid);
+  CREATE INDEX track_i2 ON track(aid);
+}
+
+do_eqp_test 8.2 {
+  SELECT DISTINCT aname
+    FROM album, composer, track
+   WHERE cname LIKE '%bach%'
+     AND unlikely(composer.cid=track.cid)
+     AND unlikely(album.aid=track.aid);
+} {
+  0 0 2 {SCAN TABLE track} 
+  0 1 0 {SEARCH TABLE album USING INTEGER PRIMARY KEY (rowid=?)}
+  0 2 1 {SEARCH TABLE composer USING INTEGER PRIMARY KEY (rowid=?)}
+  0 0 0 {USE TEMP B-TREE FOR DISTINCT}
+}
+
+#-------------------------------------------------------------------------
+#
+do_execsql_test 9.1 {
+  CREATE TABLE t1(
+    a,b,c,d,e, f,g,h,i,j,
+    k,l,m,n,o, p,q,r,s,t
+  );
+  CREATE INDEX i1 ON t1(k,l,m,n,o,p,q,r,s,t);
+}
+do_test 9.2 {
+  for {set i 0} {$i < 100} {incr i} {
+    execsql { INSERT INTO t1 DEFAULT VALUES }
+  }
+  execsql {
+    ANALYZE;
+    CREATE INDEX i2 ON t1(a,b,c,d,e,f,g,h,i,j);
+  }
+} {}
+
+set L [list a=? b=? c=? d=? e=? f=? g=? h=? i=? j=?]
+foreach {tn nTerm nRow} {
+  1   1 10
+  2   2  9
+  3   3  8
+  4   4  7
+  5   5  6
+  6   6  5
+  7   7  5
+  8   8  5
+  9   9  5
+  10 10  5
+} {
+  set w [join [lrange $L 0 [expr $nTerm-1]] " AND "]
+  set p1 [expr ($nRow-1) / 100.0]
+  set p2 [expr ($nRow+1) / 100.0]
+
+  set sql1 "SELECT * FROM t1 WHERE likelihood(k=?, $p1) AND $w"
+  set sql2 "SELECT * FROM t1 WHERE likelihood(k=?, $p2) AND $w"
+
+  do_eqp_test 9.3.$tn.1 $sql1 {/INDEX i1/}
+  do_eqp_test 9.3.$tn.2 $sql2 {/INDEX i2/}
+}
+
+
+
+finish_test
+
+
+
--- a/test/eqp.test
+++ b/test/eqp.test
@@ -312,8 +312,8 @@ do_eqp_test 4.2.3 {
 } {
  1 0 0 {SCAN TABLE t1} 
  1 0 0 {USE TEMP B-TREE FOR ORDER BY}
-  2 0 0 {SCAN TABLE t2} 
-  2 0 0 {USE TEMP B-TREE FOR ORDER BY}
+  2 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
+  2 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
  0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (UNION)} 
 }
 do_eqp_test 4.2.4 {
@@ -321,8 +321,8 @@ do_eqp_test 4.2.4 {
 } {
  1 0 0 {SCAN TABLE t1} 
  1 0 0 {USE TEMP B-TREE FOR ORDER BY}
-  2 0 0 {SCAN TABLE t2} 
-  2 0 0 {USE TEMP B-TREE FOR ORDER BY}
+  2 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
+  2 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
  0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (INTERSECT)} 
 }
 do_eqp_test 4.2.5 {
@@ -330,8 +330,8 @@ do_eqp_test 4.2.5 {
 } {
  1 0 0 {SCAN TABLE t1} 
  1 0 0 {USE TEMP B-TREE FOR ORDER BY}
-  2 0 0 {SCAN TABLE t2} 
-  2 0 0 {USE TEMP B-TREE FOR ORDER BY}
+  2 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
+  2 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
  0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (EXCEPT)} 
 }

--- a/test/index6.test
+++ b/test/index6.test
@@ -145,11 +145,11 @@ do_test index6-2.1 {
  execsql {
    CREATE TABLE t2(a,b);
    INSERT INTO t2(a,b) SELECT value, value FROM nums WHERE value<1000;
-    UPDATE t2 SET a=NULL WHERE b%5==0;
+    UPDATE t2 SET a=NULL WHERE b%2==0;
    CREATE INDEX t2a1 ON t2(a) WHERE a IS NOT NULL;
    SELECT count(*) FROM t2 WHERE a IS NOT NULL;
  }
-} {800}
+} {500}
 do_test index6-2.2 {
  execsql {
    EXPLAIN QUERY PLAN
@@ -157,6 +157,7 @@ do_test index6-2.2 {
  }
 } {/.* TABLE t2 USING INDEX t2a1 .*/}
 ifcapable stat4||stat3 {
+  execsql ANALYZE
  do_test index6-2.3stat4 {
    execsql {
      EXPLAIN QUERY PLAN
--- a/test/orderby5.test
+++ b/test/orderby5.test
@@ -80,12 +80,12 @@ do_execsql_test 2.1a {
  EXPLAIN QUERY PLAN
  SELECT * FROM t2 WHERE a=0 ORDER BY a, b, c;
 } {~/B-TREE/}
+
 do_execsql_test 2.1b {
  EXPLAIN QUERY PLAN
-  SELECT * FROM t1 WHERE a=0 ORDER BY a, b, c;
+  SELECT * FROM t1 WHERE likelihood(a=0, 0.05) ORDER BY a, b, c;
 } {/B-TREE/}

-
 do_execsql_test 2.2 {
  EXPLAIN QUERY PLAN
  SELECT * FROM t1 WHERE +a=0 ORDER BY a, b, c;
--- a/test/skipscan2.test
+++ b/test/skipscan2.test
@@ -74,6 +74,7 @@ do_execsql_test skipscan2-1.4 {
  -- of a skip-scan.  So make a manual adjustment to the stat1 table
  -- to make it seem like there are many more.
  UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1';
+  UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1';
  ANALYZE sqlite_master;
 }
 db cache flush
--- a/test/unordered.test
+++ b/test/unordered.test
@@ -42,7 +42,7 @@ foreach idxmode {ordered unordered} {
    1   "SELECT * FROM t1 ORDER BY a"
        {0 0 0 {SCAN TABLE t1 USING INDEX i1}}
        {0 0 0 {SCAN TABLE t1} 0 0 0 {USE TEMP B-TREE FOR ORDER BY}}
-    2   "SELECT * FROM t1 WHERE a >?"
+    2   "SELECT * FROM t1 WHERE a > 100"
        {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}}
        {0 0 0 {SCAN TABLE t1}}
    3   "SELECT * FROM t1 WHERE a = ? ORDER BY rowid"
--- a/test/where3.test
+++ b/test/where3.test
@@ -231,6 +231,7 @@ do_execsql_test where3-3.0 {
  CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c);
  CREATE INDEX t301c ON t301(c);
  INSERT INTO t301 VALUES(1,2,3);
+  INSERT INTO t301 VALUES(2,2,3);
  CREATE TABLE t302(x, y);
  INSERT INTO t302 VALUES(4,5);
  ANALYZE;
@@ -251,7 +252,7 @@ do_execsql_test where3-3.2 {
 } {}
 do_execsql_test where3-3.3 {
  SELECT * FROM t301 WHERE c=3 AND a IS NOT NULL;
-} {1 2 3}
+} {1 2 3 2 2 3}

 if 0 {  # Query planner no longer does this
 # Verify that when there are multiple tables in a join which must be
--- a/test/whereG.test
+++ b/test/whereG.test
@@ -14,6 +14,7 @@

 set testdir [file dirname $argv0]
 source $testdir/tester.tcl
+set testprefix whereG

 do_execsql_test whereG-1.0 {
  CREATE TABLE composer(
@@ -179,5 +180,46 @@ do_execsql_test whereG-4.0 {
   ORDER BY x;
 } {right}

+#-------------------------------------------------------------------------
+# Test that likelihood() specifications on indexed terms are taken into 
+# account by various forms of loops.
+#
+#   5.1.*: open ended range scans
+#   5.2.*: skip-scans
+#
+reset_db
+
+do_execsql_test 5.1 {
+  CREATE TABLE t1(a, b, c);
+  CREATE INDEX i1 ON t1(a, b);
+}
+do_eqp_test 5.1.2 {
+  SELECT * FROM t1 WHERE a>?
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}}
+do_eqp_test 5.1.3 {
+  SELECT * FROM t1 WHERE likelihood(a>?, 0.9)
+} {0 0 0 {SCAN TABLE t1}}
+
+do_test 5.2 {
+  for {set i 0} {$i < 100} {incr i} {
+    execsql { INSERT INTO t1 VALUES('abc', $i, $i); }
+  }
+  execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; }
+  execsql { ANALYZE }
+} {}
+do_eqp_test 5.2.2 {
+  SELECT * FROM t1 WHERE likelihood(b>?, 0.01)
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}}
+do_eqp_test 5.2.3 {
+  SELECT * FROM t1 WHERE likelihood(b>?, 0.9)
+} {0 0 0 {SCAN TABLE t1}}
+
+do_eqp_test 5.3.1 {
+  SELECT * FROM t1 WHERE a=?
+} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}}
+do_eqp_test 5.3.2 {
+  SELECT * FROM t1 WHERE likelihood(a=?, 0.9)
+} {0 0 0 {SCAN TABLE t1}}

 finish_test
+
--- a/tool/logest.c
+++ b/tool/logest.c
@@ -83,7 +83,8 @@ static LogEst logEstFromDouble(double x){
  LogEst e;
  assert( sizeof(x)==8 && sizeof(a)==8 );
  if( x<=0.0 ) return -32768;
-  if( x<1.0 ) return -logEstFromDouble(1/x);
+  if( x<0.01 ) return -logEstFromDouble(1.0/x);
+  if( x<1.0 ) return logEstFromDouble(100.0*x) - 66;
  if( x<1024.0 ) return logEstFromInteger((sqlite3_uint64)(1024.0*x)) - 100;
  if( x<=2000000000.0 ) return logEstFromInteger((sqlite3_uint64)x);
  memcpy(&a, &x, 8);
@@ -156,8 +157,10 @@ int main(int argc, char **argv){
    }
  }
  for(i=n-1; i>=0; i--){
-    if( a[i]<0 ){
+    if( a[i]<-40 ){
      printf("%5d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i]));
+    }else if( a[i]<10 ){
+      printf("%5d (%f)\n", a[i], logEstToInt(a[i]+100)/1024.0);
    }else{
      sqlite3_uint64 x = logEstToInt(a[i]+100)*100/1024;
      printf("%5d (%lld.%02lld)\n", a[i], x/100, x%100);