mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-08 14:02:16 +03:00
Improvements to the pager to help large updates against a large database run
faster. Also improved the testing of the pager rollback algorithms. (CVS 835) FossilOrigin-Name: 717523d3750dce784fa767ed9a8267d1246798ef
This commit is contained in:
214
src/pager.c
214
src/pager.c
@@ -18,7 +18,7 @@
|
||||
** file simultaneously, or one process from reading the database while
|
||||
** another is writing.
|
||||
**
|
||||
** @(#) $Id: pager.c,v 1.67 2003/01/12 18:02:18 drh Exp $
|
||||
** @(#) $Id: pager.c,v 1.68 2003/01/16 13:42:43 drh Exp $
|
||||
*/
|
||||
#include "os.h" /* Must be first to enable large file support */
|
||||
#include "sqliteInt.h"
|
||||
@@ -26,6 +26,25 @@
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Macros for troubleshooting. Normally turned off
|
||||
*/
|
||||
#if 0
|
||||
static Pager *mainPager = 0;
|
||||
#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
|
||||
#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
|
||||
#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
|
||||
#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
|
||||
#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
|
||||
#else
|
||||
#define SET_PAGER(X)
|
||||
#define CLR_PAGER(X)
|
||||
#define TRACE1(X)
|
||||
#define TRACE2(X,Y)
|
||||
#define TRACE3(X,Y,Z)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
** The page cache as a whole is always in one of the following
|
||||
** states:
|
||||
@@ -78,6 +97,7 @@ struct PgHdr {
|
||||
u8 inJournal; /* TRUE if has been written to journal */
|
||||
u8 inCkpt; /* TRUE if written to the checkpoint journal */
|
||||
u8 dirty; /* TRUE if we need to write back changes */
|
||||
u8 needSync; /* Sync journal before writing this page */
|
||||
u8 alwaysRollback; /* Disable dont_rollback() for this page */
|
||||
/* SQLITE_PAGE_SIZE bytes of page data follow this header */
|
||||
/* Pager.nExtra bytes of local data follow the page data */
|
||||
@@ -114,6 +134,9 @@ struct Pager {
|
||||
int origDbSize; /* dbSize before the current change */
|
||||
int ckptSize; /* Size of database (in pages) at ckpt_begin() */
|
||||
off_t ckptJSize; /* Size of journal at ckpt_begin() */
|
||||
#ifndef NDEBUG
|
||||
off_t syncJSize; /* Size of journal at last fsync() call */
|
||||
#endif
|
||||
int ckptNRec; /* Number of records in the checkpoint journal */
|
||||
int nExtra; /* Add this many bytes to each in-memory page */
|
||||
void (*xDestructor)(void*); /* Call this routine when freeing pages */
|
||||
@@ -122,6 +145,7 @@ struct Pager {
|
||||
int mxPage; /* Maximum number of pages to hold in cache */
|
||||
int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
|
||||
u8 journalOpen; /* True if journal file descriptors is valid */
|
||||
u8 journalStarted; /* True if initial magic of journal is synced */
|
||||
u8 useJournal; /* Do not use a rollback journal on this file */
|
||||
u8 ckptOpen; /* True if the checkpoint journal is open */
|
||||
u8 ckptInUse; /* True we are in a checkpoint */
|
||||
@@ -360,6 +384,7 @@ static int pager_unwritelock(Pager *pPager){
|
||||
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
|
||||
pPg->inJournal = 0;
|
||||
pPg->dirty = 0;
|
||||
pPg->needSync = 0;
|
||||
}
|
||||
}else{
|
||||
assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
|
||||
@@ -398,13 +423,16 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
|
||||
** at the same time, if there is one.
|
||||
*/
|
||||
pPg = pager_lookup(pPager, pgRec.pgno);
|
||||
if( pPg==0 || pPg->needSync==0 ){
|
||||
TRACE2("PLAYBACK %d\n", pgRec.pgno);
|
||||
sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
|
||||
rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
|
||||
}
|
||||
if( pPg ){
|
||||
memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
|
||||
memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
|
||||
}
|
||||
rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
|
||||
pPg->dirty = 0;
|
||||
pPg->needSync = 0;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
@@ -483,7 +511,32 @@ static int pager_playback(Pager *pPager){
|
||||
if( rc!=SQLITE_OK ) break;
|
||||
}
|
||||
|
||||
|
||||
end_playback:
|
||||
#if !defined(NDEBUG) && defined(SQLITE_TEST)
|
||||
/* For pages that were never written into the journal, restore the
|
||||
** memory copy from the original database file.
|
||||
**
|
||||
** This is code is used during testing only. It is necessary to
|
||||
** compensate for the sqliteOsTruncate() call inside
|
||||
** sqlitepager_rollback().
|
||||
*/
|
||||
if( rc==SQLITE_OK ){
|
||||
PgHdr *pPg;
|
||||
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
|
||||
if( (int)pPg->pgno <= pPager->origDbSize ){
|
||||
sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
|
||||
rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
|
||||
if( rc ) break;
|
||||
}else{
|
||||
memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
|
||||
}
|
||||
memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
|
||||
pPg->needSync = 0;
|
||||
pPg->dirty = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if( rc!=SQLITE_OK ){
|
||||
pager_unwritelock(pPager);
|
||||
pPager->errMask |= PAGER_ERR_CORRUPT;
|
||||
@@ -659,6 +712,7 @@ int sqlitepager_open(
|
||||
sqliteFree(zFullPathname);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
SET_PAGER(pPager);
|
||||
pPager->zFilename = (char*)&pPager[1];
|
||||
pPager->zJournal = &pPager->zFilename[nameLen+1];
|
||||
strcpy(pPager->zFilename, zFullPathname);
|
||||
@@ -761,6 +815,7 @@ int sqlitepager_close(Pager *pPager){
|
||||
** sqliteOsDelete(pPager->zFilename);
|
||||
** }
|
||||
*/
|
||||
CLR_PAGER(pPager);
|
||||
sqliteFree(pPager);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
@@ -827,7 +882,6 @@ int sqlitepager_ref(void *pData){
|
||||
*/
|
||||
static int syncAllPages(Pager *pPager){
|
||||
PgHdr *pPg;
|
||||
Pgno lastPgno = 0;
|
||||
int rc = SQLITE_OK;
|
||||
|
||||
/* Sync the journal before modifying the main database
|
||||
@@ -835,28 +889,26 @@ static int syncAllPages(Pager *pPager){
|
||||
*/
|
||||
if( pPager->needSync ){
|
||||
if( !pPager->tempFile ){
|
||||
assert( pPager->journalOpen );
|
||||
assert( !pPager->noSync );
|
||||
TRACE1("SYNC\n");
|
||||
rc = sqliteOsSync(&pPager->jfd);
|
||||
if( rc!=0 ) return rc;
|
||||
#ifndef NDEBUG
|
||||
rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
|
||||
if( rc!=0 ) return rc;
|
||||
#endif
|
||||
pPager->journalStarted = 1;
|
||||
}
|
||||
pPager->needSync = 0;
|
||||
}
|
||||
|
||||
/* Write all dirty free pages to the disk in the order that they
|
||||
** appear on the disk. We have experimented with sorting the pages
|
||||
** by page numbers so that they are written in order, but that does
|
||||
** not appear to improve performance.
|
||||
/* Erase the needSync flag from every page.
|
||||
*/
|
||||
for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
|
||||
if( pPg->dirty ){
|
||||
if( lastPgno==0 || pPg->pgno!=lastPgno+1 ){
|
||||
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
|
||||
}
|
||||
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
|
||||
if( rc!=SQLITE_OK ) break;
|
||||
pPg->dirty = 0;
|
||||
lastPgno = pPg->pgno;
|
||||
}
|
||||
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
|
||||
pPg->needSync = 0;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -939,6 +991,7 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
|
||||
return SQLITE_BUSY;
|
||||
}
|
||||
pPager->journalOpen = 1;
|
||||
pPager->journalStarted = 0;
|
||||
|
||||
/* Playback and delete the journal. Drop the database write
|
||||
** lock and reacquire the read lock.
|
||||
@@ -976,25 +1029,18 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
|
||||
pPager->pAll = pPg;
|
||||
pPager->nPage++;
|
||||
}else{
|
||||
/* Recycle an older page. First locate the page to be recycled.
|
||||
** Try to find one that is not dirty and is near the head of
|
||||
** of the free list */
|
||||
/* Find a page to recycle. Try to locate a page that does not
|
||||
** require us to do an fsync() on the journal.
|
||||
*/
|
||||
pPg = pPager->pFirst;
|
||||
while( pPg && pPg->dirty ){
|
||||
while( pPg && pPg->needSync ){
|
||||
pPg = pPg->pNextFree;
|
||||
}
|
||||
|
||||
/* If we could not find a page that has not been used recently
|
||||
** and which is not dirty, then sync the journal and write all
|
||||
** dirty free pages into the database file, thus making them
|
||||
** clean pages and available for recycling.
|
||||
**
|
||||
** We have to sync the journal before writing a page to the main
|
||||
** database. But syncing is a very slow operation. So after a
|
||||
** sync, it is best to write everything we can back to the main
|
||||
** database to minimize the risk of having to sync again in the
|
||||
** near future. That is why we write all dirty pages after a
|
||||
** sync.
|
||||
/* If we could not find a page that does not require an fsync()
|
||||
** on the journal file then fsync the journal file. This is a
|
||||
** very slow operation, so we work hard to avoid it. But sometimes
|
||||
** it can't be helped.
|
||||
*/
|
||||
if( pPg==0 ){
|
||||
int rc = syncAllPages(pPager);
|
||||
@@ -1006,9 +1052,24 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
|
||||
pPg = pPager->pFirst;
|
||||
}
|
||||
assert( pPg->nRef==0 );
|
||||
|
||||
/* Write the page to the database file if it is dirty.
|
||||
*/
|
||||
if( pPg->dirty ){
|
||||
assert( pPg->needSync==0 );
|
||||
TRACE2("SAVE %d\n", pPg->pgno);
|
||||
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
|
||||
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlitepager_rollback(pPager);
|
||||
*ppPage = 0;
|
||||
return SQLITE_IOERR;
|
||||
}
|
||||
pPg->dirty = 0;
|
||||
}
|
||||
assert( pPg->dirty==0 );
|
||||
|
||||
/* If the page we are recyclying is marked as alwaysRollback, then
|
||||
/* If the page we are recycling is marked as alwaysRollback, then
|
||||
** set the global alwaysRollback flag, thus disabling the
|
||||
** sqlite_dont_rollback() optimization for the rest of this transaction.
|
||||
** It is necessary to do this because the page marked alwaysRollback
|
||||
@@ -1051,9 +1112,12 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
|
||||
pPg->pgno = pgno;
|
||||
if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
|
||||
sqliteCheckMemory(pPager->aInJournal, pgno/8);
|
||||
assert( pPager->journalOpen );
|
||||
pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
|
||||
pPg->needSync = 0;
|
||||
}else{
|
||||
pPg->inJournal = 0;
|
||||
pPg->needSync = 0;
|
||||
}
|
||||
if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
|
||||
&& (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
|
||||
@@ -1205,6 +1269,7 @@ static int pager_open_journal(Pager *pPager){
|
||||
return SQLITE_CANTOPEN;
|
||||
}
|
||||
pPager->journalOpen = 1;
|
||||
pPager->journalStarted = 0;
|
||||
pPager->needSync = 0;
|
||||
pPager->alwaysRollback = 0;
|
||||
sqlitepager_pagecount(pPager);
|
||||
@@ -1227,6 +1292,9 @@ static int pager_open_journal(Pager *pPager){
|
||||
rc = SQLITE_FULL;
|
||||
}
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
pPager->syncJSize = 0;
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -1264,6 +1332,7 @@ int sqlitepager_begin(void *pData){
|
||||
}
|
||||
pPager->state = SQLITE_WRITELOCK;
|
||||
pPager->dirtyFile = 0;
|
||||
TRACE1("TRANSACTION\n");
|
||||
if( pPager->useJournal && !pPager->tempFile ){
|
||||
rc = pager_open_journal(pPager);
|
||||
}
|
||||
@@ -1335,24 +1404,32 @@ int sqlitepager_write(void *pData){
|
||||
** main database file. Write the current page to the transaction
|
||||
** journal if it is not there already.
|
||||
*/
|
||||
if( !pPg->inJournal && pPager->useJournal
|
||||
&& (int)pPg->pgno <= pPager->origDbSize ){
|
||||
rc = write32bits(&pPager->jfd, pPg->pgno);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
|
||||
if( !pPg->inJournal && pPager->useJournal ){
|
||||
if( (int)pPg->pgno <= pPager->origDbSize ){
|
||||
rc = write32bits(&pPager->jfd, pPg->pgno);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
|
||||
}
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlitepager_rollback(pPager);
|
||||
pPager->errMask |= PAGER_ERR_FULL;
|
||||
return rc;
|
||||
}
|
||||
assert( pPager->aInJournal!=0 );
|
||||
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
|
||||
pPg->needSync = !pPager->noSync;
|
||||
pPg->inJournal = 1;
|
||||
if( pPager->ckptInUse ){
|
||||
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
|
||||
page_add_to_ckpt_list(pPg);
|
||||
}
|
||||
TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
|
||||
}else{
|
||||
pPg->needSync = !pPager->journalStarted && !pPager->noSync;
|
||||
TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
|
||||
}
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlitepager_rollback(pPager);
|
||||
pPager->errMask |= PAGER_ERR_FULL;
|
||||
return rc;
|
||||
}
|
||||
assert( pPager->aInJournal!=0 );
|
||||
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
|
||||
pPager->needSync = !pPager->noSync;
|
||||
pPg->inJournal = 1;
|
||||
if( pPager->ckptInUse ){
|
||||
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
|
||||
page_add_to_ckpt_list(pPg);
|
||||
if( pPg->needSync ){
|
||||
pPager->needSync = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1434,6 +1511,7 @@ void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
|
||||
** corruption during the next transaction.
|
||||
*/
|
||||
}else{
|
||||
TRACE2("DONT_WRITE %d\n", pgno);
|
||||
pPg->dirty = 0;
|
||||
}
|
||||
}
|
||||
@@ -1459,6 +1537,7 @@ void sqlitepager_dont_rollback(void *pData){
|
||||
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
|
||||
page_add_to_ckpt_list(pPg);
|
||||
}
|
||||
TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
|
||||
}
|
||||
if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
|
||||
assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
|
||||
@@ -1478,6 +1557,7 @@ void sqlitepager_dont_rollback(void *pData){
|
||||
int sqlitepager_commit(Pager *pPager){
|
||||
int rc;
|
||||
PgHdr *pPg;
|
||||
int dbChanged;
|
||||
|
||||
if( pPager->errMask==PAGER_ERR_FULL ){
|
||||
rc = sqlitepager_rollback(pPager);
|
||||
@@ -1493,6 +1573,7 @@ int sqlitepager_commit(Pager *pPager){
|
||||
if( pPager->state!=SQLITE_WRITELOCK ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
TRACE1("COMMIT\n");
|
||||
if( pPager->dirtyFile==0 ){
|
||||
/* Exit early (without doing the time-consuming sqliteOsSync() calls)
|
||||
** if there have been no changes to the database file. */
|
||||
@@ -1501,17 +1582,21 @@ int sqlitepager_commit(Pager *pPager){
|
||||
return rc;
|
||||
}
|
||||
assert( pPager->journalOpen );
|
||||
if( !pPager->journalStarted && !pPager->noSync ) pPager->needSync = 1;
|
||||
assert( pPager->dirtyFile || !pPager->needSync );
|
||||
if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
|
||||
goto commit_abort;
|
||||
}
|
||||
dbChanged = 0;
|
||||
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
|
||||
if( pPg->dirty==0 ) continue;
|
||||
rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
|
||||
if( rc!=SQLITE_OK ) goto commit_abort;
|
||||
TRACE2("COMMIT-PAGE %d\n", pPg->pgno);
|
||||
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
|
||||
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
|
||||
if( rc!=SQLITE_OK ) goto commit_abort;
|
||||
dbChanged = 1;
|
||||
}
|
||||
if( !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
|
||||
if( dbChanged && !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
|
||||
goto commit_abort;
|
||||
}
|
||||
rc = pager_unwritelock(pPager);
|
||||
@@ -1542,11 +1627,28 @@ commit_abort:
|
||||
*/
|
||||
int sqlitepager_rollback(Pager *pPager){
|
||||
int rc;
|
||||
TRACE1("ROLLBACK\n");
|
||||
if( !pPager->dirtyFile || !pPager->journalOpen ){
|
||||
rc = pager_unwritelock(pPager);
|
||||
pPager->dbSize = -1;
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if defined(SQLITE_TEST) && !defined(NDEBUG)
|
||||
/* Truncate the journal to the size it was at the conclusion of the
|
||||
** last sqliteOsSync() call. This is really an error check. If the
|
||||
** rollback still works, it means that the rollback would have also
|
||||
** worked if it had occurred after an OS crash or unexpected power
|
||||
** loss.
|
||||
*/
|
||||
if( pPager->syncJSize<sizeof(aJournalMagic)+sizeof(Pgno) ){
|
||||
pPager->syncJSize = sizeof(aJournalMagic)+sizeof(Pgno);
|
||||
}
|
||||
TRACE2("TRUNCATE JOURNAL %lld\n", pPager->syncJSize);
|
||||
rc = sqliteOsTruncate(&pPager->jfd, pPager->syncJSize);
|
||||
if( rc ) return rc;
|
||||
#endif
|
||||
|
||||
if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
|
||||
if( pPager->state>=SQLITE_WRITELOCK ){
|
||||
pager_playback(pPager);
|
||||
|
Reference in New Issue
Block a user