1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-10 01:02:56 +03:00

Improve the star-query heuristic so that it does a better job of identifying

actual star queries.  Also includes improved diagnostic output from the
query planner.

FossilOrigin-Name: 7cfbe14d199bb631abd4d009698eeaee9b8450d5061ded612095ee4738ac6a1f
This commit is contained in:
drh
2025-01-24 16:37:31 +00:00
4 changed files with 117 additions and 42 deletions

View File

@@ -1,5 +1,5 @@
C Correct\sa\sversion\snumber\sin\sautosetup/README.md. C Improve\sthe\sstar-query\sheuristic\sso\sthat\sit\sdoes\sa\sbetter\sjob\sof\sidentifying\nactual\sstar\squeries.\s\sAlso\sincludes\simproved\sdiagnostic\soutput\sfrom\sthe\nquery\splanner.
D 2025-01-24T15:41:33.001 D 2025-01-24T16:37:31.813
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d
@@ -863,8 +863,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997 F src/wal.c 4e6181d8780ab0af2e1388d0754cbe6f2f04593d2b1ab6c41699a89942fd8997
F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452
F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014
F src/where.c 6722991bece039c7094e9e31bb57b9eb155299f01d2209f1378a81c9605ded31 F src/where.c 647dfba23202523c64ca204f3501a09d18670ea081acea680ddcb1e63beda163
F src/whereInt.h 2b0804f300c7f65de4046a1d81c65f01b208d6c08950ccd1fa6b8c16162a8af7 F src/whereInt.h 3b2ef4617758174d00c6940850e4988c1b195d39bcc20f2965319a1f7bffc714
F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab F src/wherecode.c 0c3d3199a2b769a5e2bb70feb5003dc85b3d86842ecaf903a47f2b4205ca5dab
F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f F src/whereexpr.c 0f93a29cabd3a338d09a1f5c6770620a1ac51ec1157f3229502a7e7767c60b6f
F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385 F src/window.c 2bf01f9941a64fbcead61a0e3cb5db3fca5094b30d2ff0d23274c2a81d2e2385
@@ -2208,8 +2208,9 @@ F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd7227350
F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7
F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139 F tool/warnings.sh 49a486c5069de041aedcbde4de178293e0463ae9918ecad7539eedf0ec77a139
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 601636829b79888deda7d4dc5786f82b4f0b302d84663ee96b4d47fcb429abcf P c338caf0fd4adc7baacf20f6d4274b1776b9487a5a976b8d85e490791be82b11 a280f5f5480e560fc2b80e8947d8062e8b3487d930e71cb60fc9ba90d87977c1
R 7edf0e5d3b3392becf43b655692773e3 R 7125d6757cc5225f643dd2f0408b79ed
U stephan T +closed a280f5f5480e560fc2b80e8947d8062e8b3487d930e71cb60fc9ba90d87977c1
Z 8a4c0606d9fc49f835c4115814c6ca15 U drh
Z f818ed71ad97105ec5c66c8cb2f2a934
# Remove this line to create a well-formed Fossil manifest. # Remove this line to create a well-formed Fossil manifest.

View File

@@ -1 +1 @@
c338caf0fd4adc7baacf20f6d4274b1776b9487a5a976b8d85e490791be82b11 7cfbe14d199bb631abd4d009698eeaee9b8450d5061ded612095ee4738ac6a1f

View File

@@ -2431,8 +2431,9 @@ void sqlite3WhereClausePrint(WhereClause *pWC){
** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31 ** 1.002.001 t2.t2xy 2 f 010241 N 2 cost 0,56,31
*/ */
void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
WhereInfo *pWInfo;
if( pWC ){ if( pWC ){
WhereInfo *pWInfo = pWC->pWInfo; pWInfo = pWC->pWInfo;
int nb = 1+(pWInfo->pTabList->nSrc+3)/4; int nb = 1+(pWInfo->pTabList->nSrc+3)/4;
SrcItem *pItem = pWInfo->pTabList->a + p->iTab; SrcItem *pItem = pWInfo->pTabList->a + p->iTab;
Table *pTab = pItem->pSTab; Table *pTab = pItem->pSTab;
@@ -2442,6 +2443,7 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
sqlite3DebugPrintf(" %12s", sqlite3DebugPrintf(" %12s",
pItem->zAlias ? pItem->zAlias : pTab->zName); pItem->zAlias ? pItem->zAlias : pTab->zName);
}else{ }else{
pWInfo = 0;
sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d", sqlite3DebugPrintf("%c%2d.%03llx.%03llx %c%d",
p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab); p->cId, p->iTab, p->maskSelf, p->prereq & 0xfff, p->cId, p->iTab);
} }
@@ -2473,7 +2475,12 @@ void sqlite3WhereLoopPrint(const WhereLoop *p, const WhereClause *pWC){
}else{ }else{
sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm); sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm);
} }
sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut); if( pWInfo && pWInfo->nOutStarDelta>0 && p->rStarDelta!=0 ){
sqlite3DebugPrintf(" cost %d,%d,%d delta=%d\n",
p->rSetup, p->rRun, p->nOut, -p->rStarDelta);
}else{
sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
}
if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){ if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){
int i; int i;
for(i=0; i<p->nLTerm; i++){ for(i=0; i<p->nLTerm; i++){
@@ -5441,11 +5448,13 @@ static LogEst whereSortingCost(
** 18 for star queries ** 18 for star queries
** 12 otherwise ** 12 otherwise
** **
** For the purposes of SQLite, a star-query is defined as a query ** For the purposes of this heuristic, a star-query is defined as a query
** with a large central table that is joined (using an INNER JOIN, ** with a large central table that is joined using an INNER JOIN,
** not a LEFT JOIN) against four or more smaller tables. The central ** not CROSS or OUTER JOINs, against four or more smaller tables.
** table is called the "fact" table. The smaller tables that get * The central table is called the "fact" table. The smaller tables
** joined are "dimension tables". ** that get joined are "dimension tables". Also, any table that is
** self-joined cannot be a dimension table; we assume that dimension
** tables may only be joined against fact tables.
** **
** SIDE EFFECT: (and really the whole point of this subroutine) ** SIDE EFFECT: (and really the whole point of this subroutine)
** **
@@ -5456,52 +5465,109 @@ static LogEst whereSortingCost(
** resulting in poor query plans. The total amount of heuristic cost ** resulting in poor query plans. The total amount of heuristic cost
** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment ** adjustment is stored in pWInfo->nOutStarDelta and the cost adjustment
** for each WhereLoop is stored in its rStarDelta field. ** for each WhereLoop is stored in its rStarDelta field.
**
** This heuristic can be completely disabled, so that no query is
** considered a star-query, using SQLITE_TESTCTRL_OPTIMIZATION to
** disable the SQLITE_StarQuery optimization. In the CLI, the command
** to do that is: ".testctrl opt -starquery".
*/ */
static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){ static int computeMxChoice(WhereInfo *pWInfo){
int nLoop = pWInfo->nLevel; /* Number of terms in the join */ int nLoop = pWInfo->nLevel; /* Number of terms in the join */
if( nRowEst==0 WhereLoop *pWLoop; /* For looping over WhereLoops */
&& nLoop>=5
#ifdef SQLITE_DEBUG
/* The star-query detection code below makes use of the following
** properties of the WhereLoop list, so verifying them before
** continuing:
** (1) .maskSelf is the bitmask corresponding to .iTab
** (2) The WhereLoop list is in ascending .iTab order
*/
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
assert( pWLoop->maskSelf==MASKBIT(pWLoop->iTab) );
assert( pWLoop->pNextLoop==0 || pWLoop->iTab<=pWLoop->pNextLoop->iTab );
}
#endif /* SQLITE_DEBUG */
if( nLoop>=5
&& !pWInfo->bStarDone
&& OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery) && OptimizationEnabled(pWInfo->pParse->db, SQLITE_StarQuery)
){ ){
SrcItem *aFromTabs; /* All terms of the FROM clause */
int iFromIdx; /* Term of FROM clause is the candidate fact-table */
Bitmask m; /* Bitmask for candidate fact-table */
Bitmask mSelfJoin = 0; /* Tables that cannot be dimension tables */
WhereLoop *pStart; /* Where to start searching for dimension-tables */
pWInfo->bStarDone = 1; /* Only do this computation once */
/* Check to see if we are dealing with a star schema and if so, reduce /* Check to see if we are dealing with a star schema and if so, reduce
** the cost of fact tables relative to dimension tables, as a heuristic ** the cost of fact tables relative to dimension tables, as a heuristic
** to help keep the fact tables in outer loops. ** to help keep the fact tables in outer loops.
*/ */
int iLoop; /* Counter over join terms */
Bitmask m; /* Bitmask for current loop */
assert( pWInfo->nOutStarDelta==0 ); assert( pWInfo->nOutStarDelta==0 );
for(iLoop=0, m=1; iLoop<nLoop; iLoop++, m<<=1){ aFromTabs = pWInfo->pTabList->a;
WhereLoop *pWLoop; /* For looping over WhereLoops */ pStart = pWInfo->pLoops;
for(iFromIdx=0, m=1; iFromIdx<nLoop; iFromIdx++, m<<=1){
int nDep = 0; /* Number of dimension tables */ int nDep = 0; /* Number of dimension tables */
LogEst rDelta; /* Heuristic cost adjustment */ LogEst rDelta; /* Heuristic cost adjustment */
Bitmask mSeen = 0; /* Mask of dimension tables */ Bitmask mSeen = 0; /* Mask of dimension tables */
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ SrcItem *pFactTab; /* The candidate fact table */
if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iLoop */
pFactTab = aFromTabs + iFromIdx;
if( (pFactTab->fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
/* If the candidate fact-table is the right table of an outer join
** restrict the search for dimension-tables to be tables to the right
** of the fact-table. */
if( iFromIdx+4 > nLoop ) break; /* Impossible to reach nDep>=4 */
while( ALWAYS(pStart) && pStart->iTab<=iFromIdx ){
pStart = pStart->pNextLoop;
}
}
for(pWLoop=pStart; pWLoop; pWLoop=pWLoop->pNextLoop){
if( (aFromTabs[pWLoop->iTab].fg.jointype & (JT_OUTER|JT_CROSS))!=0 ){
/* Fact-tables and dimension-tables cannot be separated by an
** outer join (at least for the definition of fact- and dimension-
** used by this heuristic). */
break;
}
if( (pWLoop->prereq & m)!=0 /* pWInfo depends on iFromIdx */
&& (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */ && (pWLoop->maskSelf & mSeen)==0 /* pWInfo not already a dependency */
&& (pWInfo->pTabList->a[pWLoop->iTab].fg.jointype & JT_LEFT)==0 && (pWLoop->maskSelf & mSelfJoin)==0 /* Not a self-join */
/* ^- pWInfo isn't a LEFT JOIN */
){ ){
nDep++; if( aFromTabs[pWLoop->iTab].pSTab==pFactTab->pSTab ){
mSeen |= pWLoop->maskSelf; mSelfJoin |= m;
}else{
nDep++;
mSeen |= pWLoop->maskSelf;
}
} }
} }
if( nDep<=3 ) continue; if( nDep<=3 ) continue;
rDelta = 15*(nDep-3); rDelta = 15*(nDep-3);
#ifdef WHERETRACE_ENABLED /* 0x4 */ #ifdef WHERETRACE_ENABLED /* 0x4 */
if( sqlite3WhereTrace&0x4 ){ if( sqlite3WhereTrace&0x4 ){
SrcItem *pItem = pWInfo->pTabList->a + iLoop; Bitmask x;
sqlite3DebugPrintf( int ii;
"Fact-table %s(%d): %d dimensions, cost reduced %d\n", sqlite3DebugPrintf(
pItem->zAlias ? pItem->zAlias : pItem->pSTab->zName, iLoop, "Fact-table %s(%d): cost reduced %d due to %d dimension tables:",
nDep, rDelta); pFactTab->zAlias ? pFactTab->zAlias : pFactTab->pSTab->zName,
iFromIdx, rDelta, nDep
);
for(ii=0, x=1; ii<nLoop; ii++, x<<=1){
if( x & mSeen ){
SrcItem *pDim = aFromTabs + ii;
sqlite3DebugPrintf(" %s(%d)",
pDim->zAlias ? pDim->zAlias : pDim->pSTab->zName, ii
);
}
}
sqlite3DebugPrintf("\n");
} }
#endif #endif
if( pWInfo->nOutStarDelta==0 ){ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ pWLoop->rStarDelta = 0;
pWLoop->rStarDelta = 0;
}
} }
pWInfo->nOutStarDelta += rDelta; pWInfo->nOutStarDelta = rDelta;
for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){ for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
if( pWLoop->maskSelf==m ){ if( pWLoop->maskSelf==m ){
pWLoop->rRun -= rDelta; pWLoop->rRun -= rDelta;
@@ -5509,7 +5575,7 @@ static int computeMxChoice(WhereInfo *pWInfo, LogEst nRowEst){
pWLoop->rStarDelta = rDelta; pWLoop->rStarDelta = rDelta;
} }
} }
} }
} }
return pWInfo->nOutStarDelta>0 ? 18 : 12; return pWInfo->nOutStarDelta>0 ? 18 : 12;
} }
@@ -5587,7 +5653,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
}else if( nLoop==2 ){ }else if( nLoop==2 ){
mxChoice = 5; mxChoice = 5;
}else{ }else{
mxChoice = computeMxChoice(pWInfo, nRowEst); mxChoice = computeMxChoice(pWInfo);
} }
assert( nLoop<=pWInfo->pTabList->nSrc ); assert( nLoop<=pWInfo->pTabList->nSrc );
@@ -5956,6 +6022,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
} }
pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta; pWInfo->nRowOut = pFrom->nRow + pWInfo->nOutStarDelta;
#ifdef WHERETRACE_ENABLED
pWInfo->rTotalCost = pFrom->rCost + pWInfo->nOutStarDelta;
#endif
/* Free temporary memory and return success */ /* Free temporary memory and return success */
sqlite3StackFreeNN(pParse->db, pSpace); sqlite3StackFreeNN(pParse->db, pSpace);
@@ -6836,7 +6905,8 @@ WhereInfo *sqlite3WhereBegin(
assert( db->mallocFailed==0 ); assert( db->mallocFailed==0 );
#ifdef WHERETRACE_ENABLED #ifdef WHERETRACE_ENABLED
if( sqlite3WhereTrace ){ if( sqlite3WhereTrace ){
sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); sqlite3DebugPrintf("---- Solution cost=%d, nRow=%d",
pWInfo->rTotalCost, pWInfo->nRowOut);
if( pWInfo->nOBSat>0 ){ if( pWInfo->nOBSat>0 ){
sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask); sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask);
} }

View File

@@ -486,8 +486,12 @@ struct WhereInfo {
unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */ unsigned untestedTerms :1; /* Not all WHERE terms resolved by outer loop */
unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */ unsigned bOrderedInnerLoop:1;/* True if only the inner-most loop is ordered */
unsigned sorted :1; /* True if really sorted (not just grouped) */ unsigned sorted :1; /* True if really sorted (not just grouped) */
unsigned bStarDone :1; /* True if check for star-query is complete */
LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */ LogEst nOutStarDelta; /* Artifical nOut reduction for star-query */
LogEst nRowOut; /* Estimated number of output rows */ LogEst nRowOut; /* Estimated number of output rows */
#ifdef WHERETRACE_ENABLED
LogEst rTotalCost; /* Total cost of the solution */
#endif
int iTop; /* The very beginning of the WHERE loop */ int iTop; /* The very beginning of the WHERE loop */
int iEndWhere; /* End of the WHERE clause itself */ int iEndWhere; /* End of the WHERE clause itself */
WhereLoop *pLoops; /* List of all WhereLoop objects */ WhereLoop *pLoops; /* List of all WhereLoop objects */