1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-07 02:42:48 +03:00

Pager optimization: do not write or journal free pages. This results in

a 2x performance gain for large INSERTs and a 5x performance gain for
large DELETEs. (CVS 410)

FossilOrigin-Name: cf1ebcfb741786f84a596c406f4c492f68cbe881
This commit is contained in:
drh
2002-03-02 20:41:57 +00:00
parent f9ffac96a7
commit 30e58750c1
5 changed files with 132 additions and 21 deletions

View File

@@ -1,5 +1,5 @@
C Change\sthe\sbtree\snode\sbalancers\sto\ssort\snodes\sinto\saccending\sorder.\s\sThis\nimproves\sinsert\sand\sdelete\sspeed\sby\s25%.\s(CVS\s409) C Pager\soptimization:\sdo\snot\swrite\sor\sjournal\sfree\spages.\s\sThis\sresults\sin\na\s2x\sperformance\sgain\sfor\slarge\sINSERTs\sand\sa\s5x\sperformance\sgain\sfor\nlarge\sDELETEs.\s(CVS\s410)
D 2002-03-02T19:00:31 D 2002-03-02T20:41:58
F Makefile.in 50f1b3351df109b5774771350d8c1b8d3640130d F Makefile.in 50f1b3351df109b5774771350d8c1b8d3640130d
F Makefile.template 89e373b2dad0321df00400fa968dc14b61a03296 F Makefile.template 89e373b2dad0321df00400fa968dc14b61a03296
F README a4c0ba11354ef6ba0776b400d057c59da47a4cc0 F README a4c0ba11354ef6ba0776b400d057c59da47a4cc0
@@ -19,7 +19,7 @@ F ltmain.sh e9ed72eb1d690f447c13945eaf69e28af531eda1
F publish.sh 5b59f4aff037aafa0e4a3b6fa599495dbd73f360 F publish.sh 5b59f4aff037aafa0e4a3b6fa599495dbd73f360
F sqlite.1 2e2bb0529ef468ade9e4322bd609d0695fb9ded9 F sqlite.1 2e2bb0529ef468ade9e4322bd609d0695fb9ded9
F src/TODO af7f3cab0228e34149cf98e073aa83d45878e7e6 F src/TODO af7f3cab0228e34149cf98e073aa83d45878e7e6
F src/btree.c 360c0aa4db058bd2d33269d7416178bdc7b7fe41 F src/btree.c e732bb03715f326a25e0b6fea2e778e063ec3893
F src/btree.h 8abeabfe6e0b1a990b64fa457592a6482f6674f3 F src/btree.h 8abeabfe6e0b1a990b64fa457592a6482f6674f3
F src/build.c 2f6d3136e6b824b2b446c54db2d2be5703033203 F src/build.c 2f6d3136e6b824b2b446c54db2d2be5703033203
F src/delete.c bf569eeb66dc851966b5681e5154d5fe2aee92c2 F src/delete.c bf569eeb66dc851966b5681e5154d5fe2aee92c2
@@ -32,8 +32,8 @@ F src/main.c 5651146585ae613e759fcf372ee064e4940c2463
F src/md5.c 52f677bfc590e09f71d07d7e327bd59da738d07c F src/md5.c 52f677bfc590e09f71d07d7e327bd59da738d07c
F src/os.c f6bc9b7ab530346bb7fef2ed39f2f1f214bc14ea F src/os.c f6bc9b7ab530346bb7fef2ed39f2f1f214bc14ea
F src/os.h a17596ecc7f38a228b83ecdb661fb03ce44726d6 F src/os.h a17596ecc7f38a228b83ecdb661fb03ce44726d6
F src/pager.c 9761c79ccb844bf29ffc5cbed4fa1a32e0740147 F src/pager.c 0aa358a378c416ee0b0be5bf03fa7c35f7992c9b
F src/pager.h b28f004e2f5541dc60cc32db01bf80cf4d056283 F src/pager.h feb18aab2f6dea439393f23a382699b9b1053c32
F src/parse.y d62960cdee2d2e7821f277d2fe63d823c86602ba F src/parse.y d62960cdee2d2e7821f277d2fe63d823c86602ba
F src/printf.c 300a90554345751f26e1fc0c0333b90a66110a1d F src/printf.c 300a90554345751f26e1fc0c0333b90a66110a1d
F src/random.c 19e8e00fe0df32a742f115773f57651be327cabe F src/random.c 19e8e00fe0df32a742f115773f57651be327cabe
@@ -127,7 +127,7 @@ F www/speed.tcl 83457b2bf6bb430900bd48ca3dd98264d9a916a5
F www/sqlite.tcl 8b5884354cb615049aed83039f8dfe1552a44279 F www/sqlite.tcl 8b5884354cb615049aed83039f8dfe1552a44279
F www/tclsqlite.tcl 829b393d1ab187fd7a5e978631b3429318885c49 F www/tclsqlite.tcl 829b393d1ab187fd7a5e978631b3429318885c49
F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218 F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218
P d5d3e79cc58da5bd315cc1fea1f7cbf46274da16 P abbb999d4fc3fe142567b6ede5e625e7bf0da714
R e2a0844c770bfd758020b0af68cec917 R 65be3ccaeb081927d6806e4071f74e0f
U drh U drh
Z fff6edd2a1ed34d852214cfe902efd49 Z 3493ea99db08effd4dc1d635e5b15238

View File

@@ -1 +1 @@
abbb999d4fc3fe142567b6ede5e625e7bf0da714 cf1ebcfb741786f84a596c406f4c492f68cbe881

View File

@@ -9,7 +9,7 @@
** May you share freely, never taking more than you give. ** May you share freely, never taking more than you give.
** **
************************************************************************* *************************************************************************
** $Id: btree.c,v 1.56 2002/03/02 19:00:31 drh Exp $ ** $Id: btree.c,v 1.57 2002/03/02 20:41:58 drh Exp $
** **
** This file implements a external (disk-based) database using BTrees. ** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to ** For a detailed discussion of BTrees, refer to
@@ -64,6 +64,7 @@ typedef struct Cell Cell;
typedef struct CellHdr CellHdr; typedef struct CellHdr CellHdr;
typedef struct FreeBlk FreeBlk; typedef struct FreeBlk FreeBlk;
typedef struct OverflowPage OverflowPage; typedef struct OverflowPage OverflowPage;
typedef struct FreelistInfo FreelistInfo;
/* /*
** All structures on a database page are aligned to 4-byte boundries. ** All structures on a database page are aligned to 4-byte boundries.
@@ -248,6 +249,18 @@ struct OverflowPage {
char aPayload[OVERFLOW_SIZE]; char aPayload[OVERFLOW_SIZE];
}; };
/*
** The PageOne.freeList field points to a linked list of overflow pages
** hold information about free pages. The aPayload section of each
** overflow page contains an instance of the following structure. The
** aFree[] array holds the page number of nFree unused pages in the disk
** file.
*/
struct FreelistInfo {
int nFree;
Pgno aFree[(OVERFLOW_SIZE-sizeof(int))/sizeof(Pgno)];
};
/* /*
** For every page in the database file, an instance of the following structure ** For every page in the database file, an instance of the following structure
** is stored in memory. The u.aDisk[] array contains the raw bits read from ** is stored in memory. The u.aDisk[] array contains the raw bits read from
@@ -1477,9 +1490,11 @@ static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno){
int rc; int rc;
if( pPage1->freeList ){ if( pPage1->freeList ){
OverflowPage *pOvfl; OverflowPage *pOvfl;
FreelistInfo *pInfo;
rc = sqlitepager_write(pPage1); rc = sqlitepager_write(pPage1);
if( rc ) return rc; if( rc ) return rc;
*pPgno = pPage1->freeList; pPage1->nFree--;
rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pOvfl); rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pOvfl);
if( rc ) return rc; if( rc ) return rc;
rc = sqlitepager_write(pOvfl); rc = sqlitepager_write(pOvfl);
@@ -1487,9 +1502,21 @@ static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno){
sqlitepager_unref(pOvfl); sqlitepager_unref(pOvfl);
return rc; return rc;
} }
pPage1->freeList = pOvfl->iNext; pInfo = (FreelistInfo*)pOvfl->aPayload;
pPage1->nFree--; if( pInfo->nFree==0 ){
*ppPage = (MemPage*)pOvfl; *pPgno = pPage1->freeList;
pPage1->freeList = pOvfl->iNext;
*ppPage = (MemPage*)pOvfl;
}else{
pInfo->nFree--;
*pPgno = pInfo->aFree[pInfo->nFree];
rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
sqlitepager_unref(pOvfl);
if( rc==SQLITE_OK ){
sqlitepager_dont_rollback(*ppPage);
rc = sqlitepager_write(*ppPage);
}
}
}else{ }else{
*pPgno = sqlitepager_pagecount(pBt->pPager) + 1; *pPgno = sqlitepager_pagecount(pBt->pPager) + 1;
rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage); rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
@@ -1521,6 +1548,25 @@ static int freePage(Btree *pBt, void *pPage, Pgno pgno){
if( rc ){ if( rc ){
return rc; return rc;
} }
pPage1->nFree++;
if( pPage1->nFree>0 && pPage1->freeList ){
OverflowPage *pFreeIdx;
rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pFreeIdx);
if( rc==SQLITE_OK ){
FreelistInfo *pInfo = (FreelistInfo*)pFreeIdx->aPayload;
if( pInfo->nFree<(sizeof(pInfo->aFree)/sizeof(pInfo->aFree[0])) ){
rc = sqlitepager_write(pFreeIdx);
if( rc==SQLITE_OK ){
pInfo->aFree[pInfo->nFree] = pgno;
pInfo->nFree++;
sqlitepager_unref(pFreeIdx);
sqlitepager_dont_write(pBt->pPager, pgno);
return rc;
}
}
sqlitepager_unref(pFreeIdx);
}
}
if( pOvfl==0 ){ if( pOvfl==0 ){
assert( pgno>0 ); assert( pgno>0 );
rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl); rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl);
@@ -1534,7 +1580,6 @@ static int freePage(Btree *pBt, void *pPage, Pgno pgno){
} }
pOvfl->iNext = pPage1->freeList; pOvfl->iNext = pPage1->freeList;
pPage1->freeList = pgno; pPage1->freeList = pgno;
pPage1->nFree++;
memset(pOvfl->aPayload, 0, OVERFLOW_SIZE); memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
pMemPage = (MemPage*)pPage; pMemPage = (MemPage*)pPage;
pMemPage->isInit = 0; pMemPage->isInit = 0;
@@ -2703,9 +2748,16 @@ static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
** Check the integrity of the freelist or of an overflow page list. ** Check the integrity of the freelist or of an overflow page list.
** Verify that the number of pages on the list is N. ** Verify that the number of pages on the list is N.
*/ */
static void checkList(IntegrityCk *pCheck, int iPage, int N, char *zContext){ static void checkList(
IntegrityCk *pCheck, /* Integrity checking context */
int isFreeList, /* True for a freelist. False for overflow page list */
int iPage, /* Page number for first page in the list */
int N, /* Expected number of pages in the list */
char *zContext /* Context for error messages */
){
int i;
char zMsg[100]; char zMsg[100];
while( N-- ){ while( N-- > 0 ){
OverflowPage *pOvfl; OverflowPage *pOvfl;
if( iPage<1 ){ if( iPage<1 ){
sprintf(zMsg, "%d pages missing from overflow list", N+1); sprintf(zMsg, "%d pages missing from overflow list", N+1);
@@ -2718,6 +2770,13 @@ static void checkList(IntegrityCk *pCheck, int iPage, int N, char *zContext){
checkAppendMsg(pCheck, zContext, zMsg); checkAppendMsg(pCheck, zContext, zMsg);
break; break;
} }
if( isFreeList ){
FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload;
for(i=0; i<pInfo->nFree; i++){
checkRef(pCheck, pInfo->aFree[i], zMsg);
}
N -= pInfo->nFree;
}
iPage = (int)pOvfl->iNext; iPage = (int)pOvfl->iNext;
sqlitepager_unref(pOvfl); sqlitepager_unref(pOvfl);
} }
@@ -2818,7 +2877,7 @@ static int checkTreePage(
sprintf(zContext, "On page %d cell %d: ", iPage, i); sprintf(zContext, "On page %d cell %d: ", iPage, i);
if( sz>MX_LOCAL_PAYLOAD ){ if( sz>MX_LOCAL_PAYLOAD ){
int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE; int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE;
checkList(pCheck, pCell->ovfl, nPage, zContext); checkList(pCheck, 0, pCell->ovfl, nPage, zContext);
} }
/* Check that keys are in the right order /* Check that keys are in the right order
@@ -2923,7 +2982,8 @@ char *sqliteBtreeIntegrityCheck(Btree *pBt, int *aRoot, int nRoot){
/* Check the integrity of the freelist /* Check the integrity of the freelist
*/ */
checkList(&sCheck, pBt->page1->freeList, pBt->page1->nFree,"Main freelist: "); checkList(&sCheck, 1, pBt->page1->freeList, pBt->page1->nFree,
"Main freelist: ");
/* Check all the tables. /* Check all the tables.
*/ */

View File

@@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while ** file simultaneously, or one process from reading the database while
** another is writing. ** another is writing.
** **
** @(#) $Id: pager.c,v 1.40 2002/02/19 13:39:22 drh Exp $ ** @(#) $Id: pager.c,v 1.41 2002/03/02 20:41:59 drh Exp $
*/ */
#include "sqliteInt.h" #include "sqliteInt.h"
#include "pager.h" #include "pager.h"
@@ -1126,6 +1126,55 @@ int sqlitepager_iswriteable(void *pData){
return pPg->dirty; return pPg->dirty;
} }
/*
** A call to this routine tells the pager that it is not necessary to
** write the information on page "pgno" back to the disk, even though
** that page might be marked as dirty.
**
** The overlying software layer calls this routine when all of the data
** on the given page is unused. The pager marks the page as clean so
** that it does not get written to disk.
**
** Tests show that this optimization, together with the
** sqlitepager_dont_rollback() below, more than double the speed
** of large INSERT operations and quadruple the speed of large DELETEs.
*/
void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
PgHdr *pPg;
pPg = pager_lookup(pPager, pgno);
if( pPg && pPg->dirty ){
pPg->dirty = 0;
}
}
/*
** A call to this routine tells the pager that if a rollback occurs,
** it is not necessary to restore the data on the given page. This
** means that the pager does not have to record the given page in the
** rollback journal.
*/
void sqlitepager_dont_rollback(void *pData){
PgHdr *pPg = DATA_TO_PGHDR(pData);
Pager *pPager = pPg->pPager;
if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
assert( pPager->aInJournal!=0 );
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPg->inJournal = 1;
if( pPager->ckptOpen ){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPg->inCkpt = 1;
}
}
if( pPager->ckptOpen && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
assert( pPager->aInCkpt!=0 );
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPg->inCkpt = 1;
}
}
/* /*
** Commit all changes to the database and release the write lock. ** Commit all changes to the database and release the write lock.
** **

View File

@@ -13,7 +13,7 @@
** subsystem. The page cache subsystem reads and writes a file a page ** subsystem. The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback. ** at a time and provides a journal for rollback.
** **
** @(#) $Id: pager.h,v 1.14 2002/02/02 15:01:16 drh Exp $ ** @(#) $Id: pager.h,v 1.15 2002/03/02 20:41:59 drh Exp $
*/ */
/* /*
@@ -65,6 +65,8 @@ int sqlitepager_isreadonly(Pager*);
int sqlitepager_ckpt_begin(Pager*); int sqlitepager_ckpt_begin(Pager*);
int sqlitepager_ckpt_commit(Pager*); int sqlitepager_ckpt_commit(Pager*);
int sqlitepager_ckpt_rollback(Pager*); int sqlitepager_ckpt_rollback(Pager*);
void sqlitepager_dont_rollback(void*);
void sqlitepager_dont_write(Pager*, Pgno);
int *sqlitepager_stats(Pager*); int *sqlitepager_stats(Pager*);
#ifdef SQLITE_TEST #ifdef SQLITE_TEST