1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-09-11 08:30:57 +03:00

Improvements to the pager to help large updates against a large database run

faster.  Also improved the testing of the pager rollback algorithms. (CVS 835)

FossilOrigin-Name: 717523d3750dce784fa767ed9a8267d1246798ef
This commit is contained in:
drh
2003-01-16 13:42:43 +00:00
parent 2c3831cb23
commit db48ee02c4
6 changed files with 174 additions and 69 deletions

View File

@@ -1,5 +1,5 @@
C Finish\sout\sthe\stest\ssuite\sfor\sthe\snew\ssqlite_set_authorizer\sAPI.\s(CVS\s834)
D 2003-01-14T13:48:21
C Improvements\sto\sthe\spager\sto\shelp\slarge\supdates\sagainst\sa\slarge\sdatabase\srun\nfaster.\s\sAlso\simproved\sthe\stesting\sof\sthe\spager\srollback\salgorithms.\s(CVS\s835)
D 2003-01-16T13:42:43
F Makefile.in 6606854b1512f185b8e8c779b8d7fc2750463d64
F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
@@ -31,9 +31,9 @@ F src/hash.h cd0433998bc1a3759d244e1637fe5a3c13b53bf8
F src/insert.c db954e955970795819145a3649fd2ad116a58890
F src/main.c c8f8fdfe4548a8404fab90ff6ad374b217e6b7fa
F src/md5.c fe4f9c9c6f71dfc26af8da63e4d04489b1430565
F src/os.c 28447687e7914306650f72058f62f7162faeef1f
F src/os.c 3a652608c296cf639ce63bd31d255db862e45685
F src/os.h afa3e096213bad86845f8bdca81a9e917505e401
F src/pager.c 5b81639b38eb4250810ed2b31aada9fb040ed86b
F src/pager.c 081155624cff7bec54590133b69906a23f9b3659
F src/pager.h 540833e8cb826b80ce2e39aa917deee5e12db626
F src/parse.y 58655a50817f93ddd0bc3d8949e267729396949c
F src/printf.c 5c50fc1da75c8f5bf432b1ad17d91d6653acd167
@@ -53,7 +53,7 @@ F src/tokenize.c 7ac1c33e0149647c9eb5959c48992df6906d4809
F src/trigger.c da142decd2808bc39e801f3bb1f161dbc2bd4005
F src/update.c f06afa9bf1f777d17702e0f6e33cf44c44bc4f75
F src/util.c e23f8ffc654923e18f8db2d8e0de97c166fca20f
F src/vdbe.c e103bd5a154b1790dd344662dceb14566a51a879
F src/vdbe.c dc0a9c1b815c95b14ffe62765cfd528d974c6c1b
F src/vdbe.h 754eba497cfe0c3e352b9c101ab2f811f10d0a55
F src/where.c 5bf7f1e1d756ab3d25a18b24bb42106cb8e14d18
F test/all.test 873d30e25a41b3aa48fec5633a7ec1816e107029
@@ -110,7 +110,7 @@ F test/tester.tcl 6f603d90881bd835ea27c568a7fecaa57dce91cc
F test/trans.test 10b53c77e2cc4ad9529c15fdcb390b8d5722ea65
F test/trigger1.test ec1da76e1a9f618deb96e505f459dcf8a23f2247
F test/trigger2.test ee346d8c612e7f847c9543058f1b89d094d27ffb
F test/trigger3.test 2bf76f7367a36242f670026af542d9f60efe3dc6
F test/trigger3.test 5958cdb44e95842298436cb61d5de5251ec2d28e
F test/trigger4.test 9a5c1406344d743020c2753ae8d6dfe6eb75f818
F test/unique.test 572aa791327c1e8d797932263e9d67f176cfdb44
F test/update.test 7ffb062d580a972e7870d0f51d5af3ab9bfeae08
@@ -154,7 +154,7 @@ F www/speed.tcl a20a792738475b68756ea7a19321600f23d1d803
F www/sqlite.tcl ae3dcfb077e53833b59d4fcc94d8a12c50a44098
F www/tclsqlite.tcl 1db15abeb446aad0caf0b95b8b9579720e4ea331
F www/vdbe.tcl 2013852c27a02a091d39a766bc87cff329f21218
P ba58979f2ff3ec878a21e7c171fbcd8fa79ace6f
R d196fd62af24b9896e18def719841a67
P 701a73918db22fd134a8b959670ba7a4a908c8c5
R 542f195ac0271d966b46ad74cdefd9a5
U drh
Z 0dca7f72990a1e352389dd411ae1fee0
Z 7a6aa3269b75773568aa62b73bab365a

View File

@@ -1 +1 @@
701a73918db22fd134a8b959670ba7a4a908c8c5
717523d3750dce784fa767ed9a8267d1246798ef

View File

@@ -1429,4 +1429,3 @@ char *sqliteOsFullPathname(const char *zRelative){
return zFull;
#endif
}

View File

@@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.67 2003/01/12 18:02:18 drh Exp $
** @(#) $Id: pager.c,v 1.68 2003/01/16 13:42:43 drh Exp $
*/
#include "os.h" /* Must be first to enable large file support */
#include "sqliteInt.h"
@@ -26,6 +26,25 @@
#include <assert.h>
#include <string.h>
/*
** Macros for troubleshooting. Normally turned off
*/
#if 0
static Pager *mainPager = 0;
#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
#else
#define SET_PAGER(X)
#define CLR_PAGER(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#endif
/*
** The page cache as a whole is always in one of the following
** states:
@@ -78,6 +97,7 @@ struct PgHdr {
u8 inJournal; /* TRUE if has been written to journal */
u8 inCkpt; /* TRUE if written to the checkpoint journal */
u8 dirty; /* TRUE if we need to write back changes */
u8 needSync; /* Sync journal before writing this page */
u8 alwaysRollback; /* Disable dont_rollback() for this page */
/* SQLITE_PAGE_SIZE bytes of page data follow this header */
/* Pager.nExtra bytes of local data follow the page data */
@@ -114,6 +134,9 @@ struct Pager {
int origDbSize; /* dbSize before the current change */
int ckptSize; /* Size of database (in pages) at ckpt_begin() */
off_t ckptJSize; /* Size of journal at ckpt_begin() */
#ifndef NDEBUG
off_t syncJSize; /* Size of journal at last fsync() call */
#endif
int ckptNRec; /* Number of records in the checkpoint journal */
int nExtra; /* Add this many bytes to each in-memory page */
void (*xDestructor)(void*); /* Call this routine when freeing pages */
@@ -122,6 +145,7 @@ struct Pager {
int mxPage; /* Maximum number of pages to hold in cache */
int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
u8 journalOpen; /* True if journal file descriptors is valid */
u8 journalStarted; /* True if initial magic of journal is synced */
u8 useJournal; /* Do not use a rollback journal on this file */
u8 ckptOpen; /* True if the checkpoint journal is open */
u8 ckptInUse; /* True we are in a checkpoint */
@@ -360,6 +384,7 @@ static int pager_unwritelock(Pager *pPager){
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
pPg->inJournal = 0;
pPg->dirty = 0;
pPg->needSync = 0;
}
}else{
assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
@@ -398,13 +423,16 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
** at the same time, if there is one.
*/
pPg = pager_lookup(pPager, pgRec.pgno);
if( pPg==0 || pPg->needSync==0 ){
TRACE2("PLAYBACK %d\n", pgRec.pgno);
sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
}
if( pPg ){
memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
}
rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
if( rc==SQLITE_OK ){
rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
pPg->dirty = 0;
pPg->needSync = 0;
}
return rc;
}
@@ -483,7 +511,32 @@ static int pager_playback(Pager *pPager){
if( rc!=SQLITE_OK ) break;
}
end_playback:
#if !defined(NDEBUG) && defined(SQLITE_TEST)
/* For pages that were never written into the journal, restore the
** memory copy from the original database file.
**
** This is code is used during testing only. It is necessary to
** compensate for the sqliteOsTruncate() call inside
** sqlitepager_rollback().
*/
if( rc==SQLITE_OK ){
PgHdr *pPg;
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
if( (int)pPg->pgno <= pPager->origDbSize ){
sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc ) break;
}else{
memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
}
memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
pPg->needSync = 0;
pPg->dirty = 0;
}
}
#endif
if( rc!=SQLITE_OK ){
pager_unwritelock(pPager);
pPager->errMask |= PAGER_ERR_CORRUPT;
@@ -659,6 +712,7 @@ int sqlitepager_open(
sqliteFree(zFullPathname);
return SQLITE_NOMEM;
}
SET_PAGER(pPager);
pPager->zFilename = (char*)&pPager[1];
pPager->zJournal = &pPager->zFilename[nameLen+1];
strcpy(pPager->zFilename, zFullPathname);
@@ -761,6 +815,7 @@ int sqlitepager_close(Pager *pPager){
** sqliteOsDelete(pPager->zFilename);
** }
*/
CLR_PAGER(pPager);
sqliteFree(pPager);
return SQLITE_OK;
}
@@ -827,7 +882,6 @@ int sqlitepager_ref(void *pData){
*/
static int syncAllPages(Pager *pPager){
PgHdr *pPg;
Pgno lastPgno = 0;
int rc = SQLITE_OK;
/* Sync the journal before modifying the main database
@@ -835,28 +889,26 @@ static int syncAllPages(Pager *pPager){
*/
if( pPager->needSync ){
if( !pPager->tempFile ){
assert( pPager->journalOpen );
assert( !pPager->noSync );
TRACE1("SYNC\n");
rc = sqliteOsSync(&pPager->jfd);
if( rc!=0 ) return rc;
#ifndef NDEBUG
rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
if( rc!=0 ) return rc;
#endif
pPager->journalStarted = 1;
}
pPager->needSync = 0;
}
/* Write all dirty free pages to the disk in the order that they
** appear on the disk. We have experimented with sorting the pages
** by page numbers so that they are written in order, but that does
** not appear to improve performance.
/* Erase the needSync flag from every page.
*/
for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
if( pPg->dirty ){
if( lastPgno==0 || pPg->pgno!=lastPgno+1 ){
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
}
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ) break;
pPg->dirty = 0;
lastPgno = pPg->pgno;
}
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
pPg->needSync = 0;
}
return rc;
}
@@ -939,6 +991,7 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
return SQLITE_BUSY;
}
pPager->journalOpen = 1;
pPager->journalStarted = 0;
/* Playback and delete the journal. Drop the database write
** lock and reacquire the read lock.
@@ -976,25 +1029,18 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
pPager->pAll = pPg;
pPager->nPage++;
}else{
/* Recycle an older page. First locate the page to be recycled.
** Try to find one that is not dirty and is near the head of
** of the free list */
/* Find a page to recycle. Try to locate a page that does not
** require us to do an fsync() on the journal.
*/
pPg = pPager->pFirst;
while( pPg && pPg->dirty ){
while( pPg && pPg->needSync ){
pPg = pPg->pNextFree;
}
/* If we could not find a page that has not been used recently
** and which is not dirty, then sync the journal and write all
** dirty free pages into the database file, thus making them
** clean pages and available for recycling.
**
** We have to sync the journal before writing a page to the main
** database. But syncing is a very slow operation. So after a
** sync, it is best to write everything we can back to the main
** database to minimize the risk of having to sync again in the
** near future. That is why we write all dirty pages after a
** sync.
/* If we could not find a page that does not require an fsync()
** on the journal file then fsync the journal file. This is a
** very slow operation, so we work hard to avoid it. But sometimes
** it can't be helped.
*/
if( pPg==0 ){
int rc = syncAllPages(pPager);
@@ -1006,9 +1052,24 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
pPg = pPager->pFirst;
}
assert( pPg->nRef==0 );
/* Write the page to the database file if it is dirty.
*/
if( pPg->dirty ){
assert( pPg->needSync==0 );
TRACE2("SAVE %d\n", pPg->pgno);
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ){
sqlitepager_rollback(pPager);
*ppPage = 0;
return SQLITE_IOERR;
}
pPg->dirty = 0;
}
assert( pPg->dirty==0 );
/* If the page we are recyclying is marked as alwaysRollback, then
/* If the page we are recycling is marked as alwaysRollback, then
** set the global alwaysRollback flag, thus disabling the
** sqlite_dont_rollback() optimization for the rest of this transaction.
** It is necessary to do this because the page marked alwaysRollback
@@ -1051,9 +1112,12 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
pPg->pgno = pgno;
if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
sqliteCheckMemory(pPager->aInJournal, pgno/8);
assert( pPager->journalOpen );
pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
pPg->needSync = 0;
}else{
pPg->inJournal = 0;
pPg->needSync = 0;
}
if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
&& (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
@@ -1205,6 +1269,7 @@ static int pager_open_journal(Pager *pPager){
return SQLITE_CANTOPEN;
}
pPager->journalOpen = 1;
pPager->journalStarted = 0;
pPager->needSync = 0;
pPager->alwaysRollback = 0;
sqlitepager_pagecount(pPager);
@@ -1227,6 +1292,9 @@ static int pager_open_journal(Pager *pPager){
rc = SQLITE_FULL;
}
}
#ifndef NDEBUG
pPager->syncJSize = 0;
#endif
return rc;
}
@@ -1264,6 +1332,7 @@ int sqlitepager_begin(void *pData){
}
pPager->state = SQLITE_WRITELOCK;
pPager->dirtyFile = 0;
TRACE1("TRANSACTION\n");
if( pPager->useJournal && !pPager->tempFile ){
rc = pager_open_journal(pPager);
}
@@ -1335,24 +1404,32 @@ int sqlitepager_write(void *pData){
** main database file. Write the current page to the transaction
** journal if it is not there already.
*/
if( !pPg->inJournal && pPager->useJournal
&& (int)pPg->pgno <= pPager->origDbSize ){
rc = write32bits(&pPager->jfd, pPg->pgno);
if( rc==SQLITE_OK ){
rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
if( !pPg->inJournal && pPager->useJournal ){
if( (int)pPg->pgno <= pPager->origDbSize ){
rc = write32bits(&pPager->jfd, pPg->pgno);
if( rc==SQLITE_OK ){
rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
}
if( rc!=SQLITE_OK ){
sqlitepager_rollback(pPager);
pPager->errMask |= PAGER_ERR_FULL;
return rc;
}
assert( pPager->aInJournal!=0 );
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPg->needSync = !pPager->noSync;
pPg->inJournal = 1;
if( pPager->ckptInUse ){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_ckpt_list(pPg);
}
TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
}else{
pPg->needSync = !pPager->journalStarted && !pPager->noSync;
TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
}
if( rc!=SQLITE_OK ){
sqlitepager_rollback(pPager);
pPager->errMask |= PAGER_ERR_FULL;
return rc;
}
assert( pPager->aInJournal!=0 );
pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
pPager->needSync = !pPager->noSync;
pPg->inJournal = 1;
if( pPager->ckptInUse ){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_ckpt_list(pPg);
if( pPg->needSync ){
pPager->needSync = 1;
}
}
@@ -1434,6 +1511,7 @@ void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
** corruption during the next transaction.
*/
}else{
TRACE2("DONT_WRITE %d\n", pgno);
pPg->dirty = 0;
}
}
@@ -1459,6 +1537,7 @@ void sqlitepager_dont_rollback(void *pData){
pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
page_add_to_ckpt_list(pPg);
}
TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
}
if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
@@ -1478,6 +1557,7 @@ void sqlitepager_dont_rollback(void *pData){
int sqlitepager_commit(Pager *pPager){
int rc;
PgHdr *pPg;
int dbChanged;
if( pPager->errMask==PAGER_ERR_FULL ){
rc = sqlitepager_rollback(pPager);
@@ -1493,6 +1573,7 @@ int sqlitepager_commit(Pager *pPager){
if( pPager->state!=SQLITE_WRITELOCK ){
return SQLITE_ERROR;
}
TRACE1("COMMIT\n");
if( pPager->dirtyFile==0 ){
/* Exit early (without doing the time-consuming sqliteOsSync() calls)
** if there have been no changes to the database file. */
@@ -1501,17 +1582,21 @@ int sqlitepager_commit(Pager *pPager){
return rc;
}
assert( pPager->journalOpen );
if( !pPager->journalStarted && !pPager->noSync ) pPager->needSync = 1;
assert( pPager->dirtyFile || !pPager->needSync );
if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
goto commit_abort;
}
dbChanged = 0;
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
if( pPg->dirty==0 ) continue;
rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ) goto commit_abort;
TRACE2("COMMIT-PAGE %d\n", pPg->pgno);
sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
if( rc!=SQLITE_OK ) goto commit_abort;
dbChanged = 1;
}
if( !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
if( dbChanged && !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
goto commit_abort;
}
rc = pager_unwritelock(pPager);
@@ -1542,11 +1627,28 @@ commit_abort:
*/
int sqlitepager_rollback(Pager *pPager){
int rc;
TRACE1("ROLLBACK\n");
if( !pPager->dirtyFile || !pPager->journalOpen ){
rc = pager_unwritelock(pPager);
pPager->dbSize = -1;
return rc;
}
#if defined(SQLITE_TEST) && !defined(NDEBUG)
/* Truncate the journal to the size it was at the conclusion of the
** last sqliteOsSync() call. This is really an error check. If the
** rollback still works, it means that the rollback would have also
** worked if it had occurred after an OS crash or unexpected power
** loss.
*/
if( pPager->syncJSize<sizeof(aJournalMagic)+sizeof(Pgno) ){
pPager->syncJSize = sizeof(aJournalMagic)+sizeof(Pgno);
}
TRACE2("TRUNCATE JOURNAL %lld\n", pPager->syncJSize);
rc = sqliteOsTruncate(&pPager->jfd, pPager->syncJSize);
if( rc ) return rc;
#endif
if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
if( pPager->state>=SQLITE_WRITELOCK ){
pager_playback(pPager);

View File

@@ -36,7 +36,7 @@
** in this file for details. If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.197 2003/01/12 17:35:00 drh Exp $
** $Id: vdbe.c,v 1.198 2003/01/16 13:42:43 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
@@ -928,7 +928,8 @@ static void hardRealify(Vdbe *p, int i){
static void PopStack(Vdbe *p, int N){
assert( N>=0 );
if( p->zStack==0 ) return;
assert( p->aStack );
assert( p->aStack || sqlite_malloc_failed );
if( p->aStack==0 ) return;
while( N-- > 0 ){
if( p->aStack[p->tos].flags & STK_Dyn ){
sqliteFree(p->zStack[p->tos]);

View File

@@ -43,6 +43,9 @@ do_test trigger3-1.2 {
ROLLBACK;
}
} {5 5 6}
do_test trigger3-1.3 {
execsql {SELECT * FROM tbl}
} {}
# FAIL
do_test trigger3-2.1 {