1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

Improvements to the pager to help large updates against a large database run

faster.  Also improved the testing of the pager rollback algorithms. (CVS 835)

FossilOrigin-Name: 717523d3750dce784fa767ed9a8267d1246798ef
This commit is contained in:
drh
2003-01-16 13:42:43 +00:00
parent 2c3831cb23
commit db48ee02c4
6 changed files with 174 additions and 69 deletions

View File

@@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.67 2003/01/12 18:02:18 drh Exp $
** @(#) $Id: pager.c,v 1.68 2003/01/16 13:42:43 drh Exp $
*/
#include "os.h" /* Must be first to enable large file support */
#include "sqliteInt.h"
@@ -26,6 +26,25 @@
#include <assert.h>
#include <string.h>
/*
** Macros for troubleshooting. Normally turned off
*/
#if 0
static Pager *mainPager = 0;
#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
#else
#define SET_PAGER(X)
#define CLR_PAGER(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#endif
/*
** The page cache as a whole is always in one of the following
** states:
@@ -78,6 +97,7 @@ struct PgHdr {
u8 inJournal; /* TRUE if has been written to journal */
u8 inCkpt; /* TRUE if written to the checkpoint journal */
u8 dirty; /* TRUE if we need to write back changes */
u8 needSync; /* Sync journal before writing this page */
u8 alwaysRollback; /* Disable dont_rollback() for this page */
/* SQLITE_PAGE_SIZE bytes of page data follow this header */
/* Pager.nExtra bytes of local data follow the page data */
@@ -114,6 +134,9 @@ struct Pager {
int origDbSize; /* dbSize before the current change */
int ckptSize; /* Size of database (in pages) at ckpt_begin() */
off_t ckptJSize; /* Size of journal at ckpt_begin() */
#ifndef NDEBUG
off_t syncJSize; /* Size of journal at last fsync() call */
#endif
int ckptNRec; /* Number of records in the checkpoint journal */
int nExtra; /* Add this many bytes to each in-memory page */
void (*xDestructor)(void*); /* Call this routine when freeing pages */
@@ -122,6 +145,7 @@ struct Pager {
int mxPage; /* Maximum number of pages to hold in cache */
int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
u8 journalOpen; /* True if journal file descriptors is valid */
u8 journalStarted; /* True if initial magic of journal is synced */
u8 useJournal; /* Do not use a rollback journal on this file */
u8 ckptOpen; /* True if the checkpoint journal is open */
u8 ckptInUse; /* True we are in a checkpoint */
@@ -360,6 +384,7 @@ static int pager_unwritelock(Pager *pPager){
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
pPg->inJournal = 0;
pPg->dirty = 0;
pPg->needSync = 0;
}
}else{
assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
@@ -398,13 +423,16 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
** at the same time, if there is one.
*/
pPg = pager_lookup(pPager, pgRec.pgno);
if( pPg==0 || pPg->needSync==0 ){
TRACE2("PLAYBACK %d\n", pgRec.pgno);
sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
}
if( pPg ){
memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
}
rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
if( rc==SQLITE_OK ){
rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
pPg->dirty = 0;
pPg->needSync = 0;
}
return rc;
}
@@ -483,7 +511,32 @@ static int pager_playback(Pager *pPager){
if( rc!=SQLITE_OK ) break;
}
end_playback:
#if !defined(NDEBUG) && defined(SQLITE_TEST)
/* For pages that were never written into the journal, restore the
** memory copy from the original database file.
**
** This is code is used during testing only. It is necessary to
** compensate for the sqliteOsTruncate() call inside
** sqlitepager_rollback().
*/
if( rc==SQLITE_OK ){
PgHdr *pPg;
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
if( (int)pPg->pgno <= pPager->origDbSize ){
sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc ) break;
}else{
memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
}
memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
pPg->needSync = 0;
pPg->dirty = 0;
}
}
#endif
if( rc!=SQLITE_OK ){
pager_unwritelock(pPager);
pPager->errMask |= PAGER_ERR_CORRUPT;
@@ -659,6 +712,7 @@ int sqlitepager_open(
sqliteFree(zFullPathname);
return SQLITE_NOMEM;
}
SET_PAGER(pPager);
pPager->zFilename = (char*)&pPager[1];
pPager->zJournal = &pPager->zFilename[nameLen+1];
strcpy(pPager->zFilename, zFullPathname);
@@ -761,6 +815,7 @@ int sqlitepager_close(Pager *pPager){
** sqliteOsDelete(pPager->zFilename);
** }
*/
CLR_PAGER(pPager);
sqliteFree(pPager);
return SQLITE_OK;
}
@@ -827,7 +882,6 @@ int sqlitepager_ref(void *pData){
*/
static int syncAllPages(Pager *pPager){
PgHdr *pPg;
Pgno lastPgno = 0;
int rc = SQLITE_OK;
/* Sync the journal before modifying the main database
@@ -835,28 +889,26 @@ static int syncAllPages(Pager *pPager){
*/
if( pPager->needSync ){
if( !pPager->tempFile ){
assert( pPager->journalOpen );
assert( !pPager->noSync );
TRACE1("SYNC\n");
rc = sqliteOsSync(&pPager->jfd);
if( rc!=0 ) return rc;
#ifndef NDEBUG
rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
if( rc!=0 ) return rc;
#endif
pPager->journalStarted = 1;
}
pPager->needSync = 0;
}
/* Write all dirty free pages to the disk in the order that they
** appear on the disk. We have experimented with sorting the pages
** by page numbers so that they are written in order, but that does
** not appear to improve performance.
/* Erase the needSync flag from every page.
*/
for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
if( pPg->dirty ){
if( lastPgno==0 || pPg->pgno!=lastPgno+1 ){
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
}
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ) break;
pPg->dirty = 0;
lastPgno = pPg->pgno;
}
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
pPg->needSync = 0;
}
return rc;
}
@@ -939,6 +991,7 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
return SQLITE_BUSY;
}
pPager->journalOpen = 1;
pPager->journalStarted = 0;
/* Playback and delete the journal. Drop the database write
** lock and reacquire the read lock.
@@ -976,25 +1029,18 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
pPager->pAll = pPg;
pPager->nPage++;
}else{
/* Recycle an older page. First locate the page to be recycled.
** Try to find one that is not dirty and is near the head of
** of the free list */
/* Find a page to recycle. Try to locate a page that does not
** require us to do an fsync() on the journal.
*/
pPg = pPager->pFirst;
while( pPg && pPg->dirty ){
while( pPg && pPg->needSync ){
pPg = pPg->pNextFree;
}
/* If we could not find a page that has not been used recently
** and which is not dirty, then sync the journal and write all
** dirty free pages into the database file, thus making them
** clean pages and available for recycling.
**
** We have to sync the journal before writing a page to the main
** database. But syncing is a very slow operation. So after a
** sync, it is best to write everything we can back to the main
** database to minimize the risk of having to sync again in the
** near future. That is why we write all dirty pages after a
** sync.
/* If we could not find a page that does not require an fsync()
** on the journal file then fsync the journal file. This is a
** very slow operation, so we work hard to avoid it. But sometimes
** it can't be helped.
*/
if( pPg==0 ){
int rc = syncAllPages(pPager);
@@ -1006,9 +1052,24 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
pPg = pPager->pFirst;
}
assert( pPg->nRef==0 );
/* Write the page to the database file if it is dirty.
*/
if( pPg->dirty ){
assert( pPg->needSync==0 );
TRACE2("SAVE %d\n", pPg->pgno);
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ){
sqlitepager_rollback(pPager);
*ppPage = 0;
return SQLITE_IOERR;
}
pPg->dirty = 0;
}
assert( pPg->dirty==0 );
/* If the page we are recyclying is marked as alwaysRollback, then
/* If the page we are recycling is marked as alwaysRollback, then
** set the global alwaysRollback flag, thus disabling the
** sqlite_dont_rollback() optimization for the rest of this transaction.
** It is necessary to do this because the page marked alwaysRollback
@@ -1051,9 +1112,12 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
pPg->pgno = pgno;
if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
sqliteCheckMemory(pPager->aInJournal, pgno/8);
assert( pPager->journalOpen );
pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
pPg->needSync = 0;
}else{
pPg->inJournal = 0;
pPg->needSync = 0;
}
if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
&& (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
@@ -1205,6 +1269,7 @@ static int pager_open_journal(Pager *pPager){
return SQLITE_CANTOPEN;
}
pPager->journalOpen = 1;
pPager->journalStarted = 0;
pPager->needSync = 0;
pPager->alwaysRollback = 0;
sqlitepager_pagecount(pPager);
@@ -1227,6 +1292,9 @@ static int pager_open_journal(Pager *pPager){
rc = SQLITE_FULL;
}
}
#ifndef NDEBUG
pPager->syncJSize = 0;
#endif
return rc;
}
@@ -1264,6 +1332,7 @@ int sqlitepager_begin(void *pData){
}
pPager->state = SQLITE_WRITELOCK;
pPager->dirtyFile = 0;
TRACE1("TRANSACTION\n");
if( pPager->useJournal && !pPager->tempFile ){
rc = pager_open_journal(pPager);
}
@@ -1335,24 +1404,32 @@ int sqlitepager_write(void *pData){
** main database file. Write the current page to the transaction
** journal if it is not there already.
*/
if( !pPg->inJournal && pPager->useJournal
&& (int)pPg->pgno <= pPager->origDbSize ){
rc = write32bits(&pPager->jfd, pPg->pgno);
if( rc==SQLITE_OK ){
rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
if( !pPg->inJournal && pPager->useJournal ){
if( (int)pPg->pgno <= pPager->origDbSize ){
rc = write32bits(&pPager->jfd, pPg->pgno);
if( rc==SQLITE_OK ){
rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
}
if( rc!=SQLITE_OK ){
sqlitepager_rollback(pPager);
pPager->errMask |= PAGER_ERR_FULL;
return rc;
}
assert( pPager->aInJournal!=0 );
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPg->needSync = !pPager->noSync;
pPg->inJournal = 1;
if( pPager->ckptInUse ){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_ckpt_list(pPg);
}
TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
}else{
pPg->needSync = !pPager->journalStarted && !pPager->noSync;
TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
}
if( rc!=SQLITE_OK ){
sqlitepager_rollback(pPager);
pPager->errMask |= PAGER_ERR_FULL;
return rc;
}
assert( pPager->aInJournal!=0 );
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPager->needSync = !pPager->noSync;
pPg->inJournal = 1;
if( pPager->ckptInUse ){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_ckpt_list(pPg);
if( pPg->needSync ){
pPager->needSync = 1;
}
}
@@ -1434,6 +1511,7 @@ void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
** corruption during the next transaction.
*/
}else{
TRACE2("DONT_WRITE %d\n", pgno);
pPg->dirty = 0;
}
}
@@ -1459,6 +1537,7 @@ void sqlitepager_dont_rollback(void *pData){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_ckpt_list(pPg);
}
TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
}
if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
@@ -1478,6 +1557,7 @@ void sqlitepager_dont_rollback(void *pData){
int sqlitepager_commit(Pager *pPager){
int rc;
PgHdr *pPg;
int dbChanged;
if( pPager->errMask==PAGER_ERR_FULL ){
rc = sqlitepager_rollback(pPager);
@@ -1493,6 +1573,7 @@ int sqlitepager_commit(Pager *pPager){
if( pPager->state!=SQLITE_WRITELOCK ){
return SQLITE_ERROR;
}
TRACE1("COMMIT\n");
if( pPager->dirtyFile==0 ){
/* Exit early (without doing the time-consuming sqliteOsSync() calls)
** if there have been no changes to the database file. */
@@ -1501,17 +1582,21 @@ int sqlitepager_commit(Pager *pPager){
return rc;
}
assert( pPager->journalOpen );
if( !pPager->journalStarted && !pPager->noSync ) pPager->needSync = 1;
assert( pPager->dirtyFile || !pPager->needSync );
if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
goto commit_abort;
}
dbChanged = 0;
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
if( pPg->dirty==0 ) continue;
rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ) goto commit_abort;
TRACE2("COMMIT-PAGE %d\n", pPg->pgno);
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ) goto commit_abort;
dbChanged = 1;
}
if( !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
if( dbChanged && !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
goto commit_abort;
}
rc = pager_unwritelock(pPager);
@@ -1542,11 +1627,28 @@ commit_abort:
*/
int sqlitepager_rollback(Pager *pPager){
int rc;
TRACE1("ROLLBACK\n");
if( !pPager->dirtyFile || !pPager->journalOpen ){
rc = pager_unwritelock(pPager);
pPager->dbSize = -1;
return rc;
}
#if defined(SQLITE_TEST) && !defined(NDEBUG)
/* Truncate the journal to the size it was at the conclusion of the
** last sqliteOsSync() call. This is really an error check. If the
** rollback still works, it means that the rollback would have also
** worked if it had occurred after an OS crash or unexpected power
** loss.
*/
if( pPager->syncJSize<sizeof(aJournalMagic)+sizeof(Pgno) ){
pPager->syncJSize = sizeof(aJournalMagic)+sizeof(Pgno);
}
TRACE2("TRUNCATE JOURNAL %lld\n", pPager->syncJSize);
rc = sqliteOsTruncate(&pPager->jfd, pPager->syncJSize);
if( rc ) return rc;
#endif
if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
if( pPager->state>=SQLITE_WRITELOCK ){
pager_playback(pPager);