From 30e58750c15a0742877d992180c352a07f3cd6f9 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 2 Mar 2002 20:41:57 +0000 Subject: [PATCH] Pager optimization: do not write or journal free pages. This results in a 2x performance gain for large INSERTs and a 5x performance gain for large DELETEs. (CVS 410) FossilOrigin-Name: cf1ebcfb741786f84a596c406f4c492f68cbe881 --- manifest | 16 +++++------ manifest.uuid | 2 +- src/btree.c | 80 ++++++++++++++++++++++++++++++++++++++++++++------- src/pager.c | 51 +++++++++++++++++++++++++++++++- src/pager.h | 4 ++- 5 files changed, 132 insertions(+), 21 deletions(-) diff --git a/manifest b/manifest index ae177e5f97..8397849c93 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sbtree\snode\sbalancers\sto\ssort\snodes\sinto\saccending\sorder.\s\sThis\nimproves\sinsert\sand\sdelete\sspeed\sby\s25%.\s(CVS\s409) -D 2002-03-02T19:00:31 +C Pager\soptimization:\sdo\snot\swrite\sor\sjournal\sfree\spages.\s\sThis\sresults\sin\na\s2x\sperformance\sgain\sfor\slarge\sINSERTs\sand\sa\s5x\sperformance\sgain\sfor\nlarge\sDELETEs.\s(CVS\s410) +D 2002-03-02T20:41:58 F Makefile.in 50f1b3351df109b5774771350d8c1b8d3640130d F Makefile.template 89e373b2dad0321df00400fa968dc14b61a03296 F README a4c0ba11354ef6ba0776b400d057c59da47a4cc0 @@ -19,7 +19,7 @@ F ltmain.sh e9ed72eb1d690f447c13945eaf69e28af531eda1 F publish.sh 5b59f4aff037aafa0e4a3b6fa599495dbd73f360 F sqlite.1 2e2bb0529ef468ade9e4322bd609d0695fb9ded9 F src/TODO af7f3cab0228e34149cf98e073aa83d45878e7e6 -F src/btree.c 360c0aa4db058bd2d33269d7416178bdc7b7fe41 +F src/btree.c e732bb03715f326a25e0b6fea2e778e063ec3893 F src/btree.h 8abeabfe6e0b1a990b64fa457592a6482f6674f3 F src/build.c 2f6d3136e6b824b2b446c54db2d2be5703033203 F src/delete.c bf569eeb66dc851966b5681e5154d5fe2aee92c2 @@ -32,8 +32,8 @@ F src/main.c 5651146585ae613e759fcf372ee064e4940c2463 F src/md5.c 52f677bfc590e09f71d07d7e327bd59da738d07c F src/os.c f6bc9b7ab530346bb7fef2ed39f2f1f214bc14ea F src/os.h a17596ecc7f38a228b83ecdb661fb03ce44726d6 -F src/pager.c 9761c79ccb844bf29ffc5cbed4fa1a32e0740147 -F src/pager.h b28f004e2f5541dc60cc32db01bf80cf4d056283 +F src/pager.c 0aa358a378c416ee0b0be5bf03fa7c35f7992c9b +F src/pager.h feb18aab2f6dea439393f23a382699b9b1053c32 F src/parse.y d62960cdee2d2e7821f277d2fe63d823c86602ba F src/printf.c 300a90554345751f26e1fc0c0333b90a66110a1d F src/random.c 19e8e00fe0df32a742f115773f57651be327cabe @@ -127,7 +127,7 @@ F www/speed.tcl 83457b2bf6bb430900bd48ca3dd98264d9a916a5 F www/sqlite.tcl 8b5884354cb615049aed83039f8dfe1552a44279 F www/tclsqlite.tcl 829b393d1ab187fd7a5e978631b3429318885c49 F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218 -P d5d3e79cc58da5bd315cc1fea1f7cbf46274da16 -R e2a0844c770bfd758020b0af68cec917 +P abbb999d4fc3fe142567b6ede5e625e7bf0da714 +R 65be3ccaeb081927d6806e4071f74e0f U drh -Z fff6edd2a1ed34d852214cfe902efd49 +Z 3493ea99db08effd4dc1d635e5b15238 diff --git a/manifest.uuid b/manifest.uuid index 6165d4720a..75e1b749c1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -abbb999d4fc3fe142567b6ede5e625e7bf0da714 \ No newline at end of file +cf1ebcfb741786f84a596c406f4c492f68cbe881 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 5d591f1504..0e8a2695a0 100644 --- a/src/btree.c +++ b/src/btree.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: btree.c,v 1.56 2002/03/02 19:00:31 drh Exp $ +** $Id: btree.c,v 1.57 2002/03/02 20:41:58 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to @@ -64,6 +64,7 @@ typedef struct Cell Cell; typedef struct CellHdr CellHdr; typedef struct FreeBlk FreeBlk; typedef struct OverflowPage OverflowPage; +typedef struct FreelistInfo FreelistInfo; /* ** All structures on a database page are aligned to 4-byte boundries. @@ -248,6 +249,18 @@ struct OverflowPage { char aPayload[OVERFLOW_SIZE]; }; +/* +** The PageOne.freeList field points to a linked list of overflow pages +** hold information about free pages. The aPayload section of each +** overflow page contains an instance of the following structure. The +** aFree[] array holds the page number of nFree unused pages in the disk +** file. +*/ +struct FreelistInfo { + int nFree; + Pgno aFree[(OVERFLOW_SIZE-sizeof(int))/sizeof(Pgno)]; +}; + /* ** For every page in the database file, an instance of the following structure ** is stored in memory. The u.aDisk[] array contains the raw bits read from @@ -1477,9 +1490,11 @@ static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno){ int rc; if( pPage1->freeList ){ OverflowPage *pOvfl; + FreelistInfo *pInfo; + rc = sqlitepager_write(pPage1); if( rc ) return rc; - *pPgno = pPage1->freeList; + pPage1->nFree--; rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pOvfl); if( rc ) return rc; rc = sqlitepager_write(pOvfl); @@ -1487,9 +1502,21 @@ static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno){ sqlitepager_unref(pOvfl); return rc; } - pPage1->freeList = pOvfl->iNext; - pPage1->nFree--; - *ppPage = (MemPage*)pOvfl; + pInfo = (FreelistInfo*)pOvfl->aPayload; + if( pInfo->nFree==0 ){ + *pPgno = pPage1->freeList; + pPage1->freeList = pOvfl->iNext; + *ppPage = (MemPage*)pOvfl; + }else{ + pInfo->nFree--; + *pPgno = pInfo->aFree[pInfo->nFree]; + rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage); + sqlitepager_unref(pOvfl); + if( rc==SQLITE_OK ){ + sqlitepager_dont_rollback(*ppPage); + rc = sqlitepager_write(*ppPage); + } + } }else{ *pPgno = sqlitepager_pagecount(pBt->pPager) + 1; rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage); @@ -1521,6 +1548,25 @@ static int freePage(Btree *pBt, void *pPage, Pgno pgno){ if( rc ){ return rc; } + pPage1->nFree++; + if( pPage1->nFree>0 && pPage1->freeList ){ + OverflowPage *pFreeIdx; + rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pFreeIdx); + if( rc==SQLITE_OK ){ + FreelistInfo *pInfo = (FreelistInfo*)pFreeIdx->aPayload; + if( pInfo->nFree<(sizeof(pInfo->aFree)/sizeof(pInfo->aFree[0])) ){ + rc = sqlitepager_write(pFreeIdx); + if( rc==SQLITE_OK ){ + pInfo->aFree[pInfo->nFree] = pgno; + pInfo->nFree++; + sqlitepager_unref(pFreeIdx); + sqlitepager_dont_write(pBt->pPager, pgno); + return rc; + } + } + sqlitepager_unref(pFreeIdx); + } + } if( pOvfl==0 ){ assert( pgno>0 ); rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl); @@ -1534,7 +1580,6 @@ static int freePage(Btree *pBt, void *pPage, Pgno pgno){ } pOvfl->iNext = pPage1->freeList; pPage1->freeList = pgno; - pPage1->nFree++; memset(pOvfl->aPayload, 0, OVERFLOW_SIZE); pMemPage = (MemPage*)pPage; pMemPage->isInit = 0; @@ -2703,9 +2748,16 @@ static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){ ** Check the integrity of the freelist or of an overflow page list. ** Verify that the number of pages on the list is N. */ -static void checkList(IntegrityCk *pCheck, int iPage, int N, char *zContext){ +static void checkList( + IntegrityCk *pCheck, /* Integrity checking context */ + int isFreeList, /* True for a freelist. False for overflow page list */ + int iPage, /* Page number for first page in the list */ + int N, /* Expected number of pages in the list */ + char *zContext /* Context for error messages */ +){ + int i; char zMsg[100]; - while( N-- ){ + while( N-- > 0 ){ OverflowPage *pOvfl; if( iPage<1 ){ sprintf(zMsg, "%d pages missing from overflow list", N+1); @@ -2718,6 +2770,13 @@ static void checkList(IntegrityCk *pCheck, int iPage, int N, char *zContext){ checkAppendMsg(pCheck, zContext, zMsg); break; } + if( isFreeList ){ + FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload; + for(i=0; inFree; i++){ + checkRef(pCheck, pInfo->aFree[i], zMsg); + } + N -= pInfo->nFree; + } iPage = (int)pOvfl->iNext; sqlitepager_unref(pOvfl); } @@ -2818,7 +2877,7 @@ static int checkTreePage( sprintf(zContext, "On page %d cell %d: ", iPage, i); if( sz>MX_LOCAL_PAYLOAD ){ int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE; - checkList(pCheck, pCell->ovfl, nPage, zContext); + checkList(pCheck, 0, pCell->ovfl, nPage, zContext); } /* Check that keys are in the right order @@ -2923,7 +2982,8 @@ char *sqliteBtreeIntegrityCheck(Btree *pBt, int *aRoot, int nRoot){ /* Check the integrity of the freelist */ - checkList(&sCheck, pBt->page1->freeList, pBt->page1->nFree,"Main freelist: "); + checkList(&sCheck, 1, pBt->page1->freeList, pBt->page1->nFree, + "Main freelist: "); /* Check all the tables. */ diff --git a/src/pager.c b/src/pager.c index 1cf1d2bc37..b69b283282 100644 --- a/src/pager.c +++ b/src/pager.c @@ -18,7 +18,7 @@ ** file simultaneously, or one process from reading the database while ** another is writing. ** -** @(#) $Id: pager.c,v 1.40 2002/02/19 13:39:22 drh Exp $ +** @(#) $Id: pager.c,v 1.41 2002/03/02 20:41:59 drh Exp $ */ #include "sqliteInt.h" #include "pager.h" @@ -1126,6 +1126,55 @@ int sqlitepager_iswriteable(void *pData){ return pPg->dirty; } +/* +** A call to this routine tells the pager that it is not necessary to +** write the information on page "pgno" back to the disk, even though +** that page might be marked as dirty. +** +** The overlying software layer calls this routine when all of the data +** on the given page is unused. The pager marks the page as clean so +** that it does not get written to disk. +** +** Tests show that this optimization, together with the +** sqlitepager_dont_rollback() below, more than double the speed +** of large INSERT operations and quadruple the speed of large DELETEs. +*/ +void sqlitepager_dont_write(Pager *pPager, Pgno pgno){ + PgHdr *pPg; + pPg = pager_lookup(pPager, pgno); + if( pPg && pPg->dirty ){ + pPg->dirty = 0; + } +} + +/* +** A call to this routine tells the pager that if a rollback occurs, +** it is not necessary to restore the data on the given page. This +** means that the pager does not have to record the given page in the +** rollback journal. +*/ +void sqlitepager_dont_rollback(void *pData){ + PgHdr *pPg = DATA_TO_PGHDR(pData); + Pager *pPager = pPg->pPager; + + if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return; + if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){ + assert( pPager->aInJournal!=0 ); + pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); + pPg->inJournal = 1; + if( pPager->ckptOpen ){ + pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7); + pPg->inCkpt = 1; + } + } + if( pPager->ckptOpen && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){ + assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); + assert( pPager->aInCkpt!=0 ); + pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7); + pPg->inCkpt = 1; + } +} + /* ** Commit all changes to the database and release the write lock. ** diff --git a/src/pager.h b/src/pager.h index 403ace1da0..91b3f8b0c1 100644 --- a/src/pager.h +++ b/src/pager.h @@ -13,7 +13,7 @@ ** subsystem. The page cache subsystem reads and writes a file a page ** at a time and provides a journal for rollback. ** -** @(#) $Id: pager.h,v 1.14 2002/02/02 15:01:16 drh Exp $ +** @(#) $Id: pager.h,v 1.15 2002/03/02 20:41:59 drh Exp $ */ /* @@ -65,6 +65,8 @@ int sqlitepager_isreadonly(Pager*); int sqlitepager_ckpt_begin(Pager*); int sqlitepager_ckpt_commit(Pager*); int sqlitepager_ckpt_rollback(Pager*); +void sqlitepager_dont_rollback(void*); +void sqlitepager_dont_write(Pager*, Pgno); int *sqlitepager_stats(Pager*); #ifdef SQLITE_TEST