diff --git a/manifest b/manifest index 6676da8ac4..f48b9a0540 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sto\swalcrash3.test. -D 2011-12-17T08:10:34.481 +C Merge\sin\schanges\sthat\scause\sthe\sfirst\ssector\sof\sthe\sWAL\sfile\sto\sbe\ssynced\nwhen\sthe\sWAL\srestarts.\s\sThis\sis\sa\sfix\sfor\sthe\spower-loss\scorruption\nproblem\sdescribed\sin\sticket\s[ff5be73dee086] +D 2011-12-17T13:45:28.989 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5b4a3e12a850b021547e43daf886b25133b44c07 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -168,7 +168,7 @@ F src/os_common.h 92815ed65f805560b66166e3583470ff94478f04 F src/os_os2.c 4a75888ba3dfc820ad5e8177025972d74d7f2440 F src/os_unix.c 7dc7df10331942b139032328449a3723e051979e F src/os_win.c 197d23ce8a0dff748e766e034bf95ff756dd3884 -F src/pager.c d981f3bfcc0e4460537d983899620700ccf8f539 +F src/pager.c c7c32a1c279e0bbbde3578172985c41d4c5efc35 F src/pager.h 5cd760857707529b403837d813d86b68938d6183 F src/parse.y fabb2e7047417d840e6fdb3ef0988a86849a08ba F src/pcache.c 1fdd77978c1525d1ca4b9ef48eb80abca710cb4c @@ -250,8 +250,8 @@ F src/vdbemem.c 2fc78b3e0fabcc1eaa23cd79dd2e30e6dcfe1e56 F src/vdbesort.c 468d43c057063e54da4f1988b38b4f46d60e7790 F src/vdbetrace.c d6e50e04e1ec498150e519058f617d91b8f5c843 F src/vtab.c e9318d88feac85be8e27ee783ac8f5397933fc8a -F src/wal.c 8f6fd4900934124f6c2701448c13038cbc02a4f4 -F src/wal.h 66b40bd91bc29a5be1c88ddd1f5ade8f3f48728a +F src/wal.c 89a60a8bf8daa805b819f97b2049a62ae5618707 +F src/wal.h 42f8313f7aaf8913e2d1fdf7b47025c23491ea1d F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f F src/where.c af623942514571895818b9b7ae11db95ae3b3d88 F test/8_3_names.test 631ea964a3edb091cf73c3b540f6bcfdb36ce823 @@ -902,8 +902,8 @@ F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d F test/wal.test c743be787e60c1242fa6cdf73b410e64b2977e25 -F test/wal2.test ad6412596815f553cd30f271d291ab003092bc7e -F test/wal3.test 18da4e65c30c43c646ad40e145e9a074e4062fc9 +F test/wal2.test 29e2cbe840582fc6efd0487b4f6337caed4b3e80 +F test/wal3.test 29a6e8843e5f5fd13f33cb0407d2923107020d32 F test/wal4.test 4744e155cd6299c6bd99d3eab1c82f77db9cdb3c F test/wal5.test 1bbfaa316dc2a1d0d1fac3f4500c38a90055a41b F test/wal6.test 2e3bc767d9c2ce35c47106148d43fcbd072a93b3 @@ -984,7 +984,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 7b63b11b93396079131686abb36c4221354fa50e -R 177c457106f4a322f13cf8ef2b836d21 -U dan -Z c66e15671707eedc5457d2cd5fb751c5 +P d76880428013ae2c5be00d87bb3e1695af6f706f 9799241f7de952c4d1ea8bf6508b577d2b57a370 +R a66c21780a9737a3739ef721dbd80b71 +U drh +Z 8683a36598a7c916281a4627e543388f diff --git a/manifest.uuid b/manifest.uuid index 47932d748d..9c694594ad 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d76880428013ae2c5be00d87bb3e1695af6f706f \ No newline at end of file +44ca4d123385d759c11919865525c998c2e35bdb \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index f3069e52ba..9db6ebd3f4 100644 --- a/src/pager.c +++ b/src/pager.c @@ -616,6 +616,7 @@ struct Pager { u8 noSync; /* Do not sync the journal if true */ u8 fullSync; /* Do extra syncs of the journal for robustness */ u8 ckptSyncFlags; /* SYNC_NORMAL or SYNC_FULL for checkpoint */ + u8 walSyncFlags; /* SYNC_NORMAL or SYNC_FULL for wal writes */ u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */ u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ @@ -786,7 +787,7 @@ static int pagerUseWal(Pager *pPager){ #else # define pagerUseWal(x) 0 # define pagerRollbackWal(x) 0 -# define pagerWalFrames(v,w,x,y,z) 0 +# define pagerWalFrames(v,w,x,y) 0 # define pagerOpenWalIfPresent(z) SQLITE_OK # define pagerBeginReadTransaction(z) SQLITE_OK #endif @@ -2955,8 +2956,7 @@ static int pagerWalFrames( Pager *pPager, /* Pager object */ PgHdr *pList, /* List of frames to log */ Pgno nTruncate, /* Database size after this commit */ - int isCommit, /* True if this is a commit */ - int syncFlags /* Flags to pass to OsSync() (or 0) */ + int isCommit /* True if this is a commit */ ){ int rc; /* Return code */ #if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES) @@ -2987,7 +2987,7 @@ static int pagerWalFrames( if( pList->pgno==1 ) pager_write_changecounter(pList); rc = sqlite3WalFrames(pPager->pWal, - pPager->pageSize, pList, nTruncate, isCommit, syncFlags + pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags ); if( rc==SQLITE_OK && pPager->pBackup ){ PgHdr *p; @@ -3367,6 +3367,10 @@ void sqlite3PagerSetSafetyLevel( pPager->syncFlags = SQLITE_SYNC_NORMAL; pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL; } + pPager->walSyncFlags = pPager->syncFlags; + if( pPager->fullSync ){ + pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS; + } } #endif @@ -4194,7 +4198,7 @@ static int pagerStress(void *p, PgHdr *pPg){ rc = subjournalPage(pPg); } if( rc==SQLITE_OK ){ - rc = pagerWalFrames(pPager, pPg, 0, 0, 0); + rc = pagerWalFrames(pPager, pPg, 0, 0); } }else{ @@ -4533,9 +4537,17 @@ int sqlite3PagerOpen( pPager->readOnly = (u8)readOnly; assert( useJournal || pPager->tempFile ); pPager->noSync = pPager->tempFile; - pPager->fullSync = pPager->noSync ?0:1; - pPager->syncFlags = pPager->noSync ? 0 : SQLITE_SYNC_NORMAL; - pPager->ckptSyncFlags = pPager->syncFlags; + if( pPager->noSync ){ + assert( pPager->fullSync==0 ); + assert( pPager->syncFlags==0 ); + assert( pPager->walSyncFlags==0 ); + assert( pPager->ckptSyncFlags==0 ); + }else{ + pPager->fullSync = 1; + pPager->syncFlags = SQLITE_SYNC_NORMAL; + pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS; + pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL; + } /* pPager->pFirst = 0; */ /* pPager->pFirstSynced = 0; */ /* pPager->pLast = 0; */ @@ -5765,9 +5777,7 @@ int sqlite3PagerCommitPhaseOne( } assert( rc==SQLITE_OK ); if( ALWAYS(pList) ){ - rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, - (pPager->fullSync ? pPager->syncFlags : 0) - ); + rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); } sqlite3PagerUnref(pPageOne); if( rc==SQLITE_OK ){ diff --git a/src/wal.c b/src/wal.c index 047368406f..03c482554e 100644 --- a/src/wal.c +++ b/src/wal.c @@ -414,14 +414,17 @@ struct Wal { u32 iCallback; /* Value to pass to log callback (or 0) */ i64 mxWalSize; /* Truncate WAL to this size upon reset */ int nWiData; /* Size of array apWiData */ + int szFirstBlock; /* Size of first block written to WAL file */ volatile u32 **apWiData; /* Pointer to wal-index content in memory */ u32 szPage; /* Database page size */ i16 readLock; /* Which read lock is being held. -1 for none */ + u8 syncFlags; /* Flags to use to sync header writes */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ + u8 noSyncHeader; /* Avoid WAL header fsyncs if true */ WalIndexHdr hdr; /* Wal-index header for current transaction */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ @@ -1094,6 +1097,7 @@ static int walIndexRecover(Wal *pWal){ int szPage; /* Page size according to the log */ u32 magic; /* Magic value read from WAL header */ u32 version; /* Magic value read from WAL header */ + int isValid; /* True if this frame is valid */ /* Read in the WAL header. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); @@ -1149,17 +1153,25 @@ static int walIndexRecover(Wal *pWal){ /* Read all frames from the log file. */ iFrame = 0; + isValid = 1; for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ - int isValid; /* True if this frame is valid */ /* Read and decode the next log frame. */ + iFrame++; rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); if( rc!=SQLITE_OK ) break; + if( sqlite3Get4byte(&aFrame[8]) == + 1+sqlite3Get4byte((u8*)&pWal->hdr.aSalt[0]) ){ + pWal->hdr.mxFrame = 0; + pWal->hdr.nPage = 0; + break; + } + if( !isValid ) continue; isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); - if( !isValid ) break; - rc = walIndexAppend(pWal, ++iFrame, pgno); + if( !isValid ) continue; + rc = walIndexAppend(pWal, iFrame, pgno); if( rc!=SQLITE_OK ) break; /* If nTruncate is non-zero, this is a commit record. */ @@ -1296,6 +1308,8 @@ int sqlite3WalOpen( sqlite3OsClose(pRet->pWalFd); sqlite3_free(pRet); }else{ + int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd); + if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; } *ppWal = pRet; WALTRACE(("WAL%d: opened\n", pRet)); } @@ -2617,6 +2631,44 @@ static int walRestartLog(Wal *pWal){ return rc; } +/* +** Write iAmt bytes of content into the WAL file beginning at iOffset. +** +** When crossing the boundary between the first and second sectors of the +** file, first write all of the first sector content, then fsync(), then +** continue writing content for the second sector. This ensures that +** the WAL header is overwritten before the first commit mark. +*/ +static int walWriteToLog( + Wal *pWal, /* WAL to write to */ + void *pContent, /* Content to be written */ + int iAmt, /* Number of bytes to write */ + sqlite3_int64 iOffset /* Start writing at this offset */ +){ + int rc; + if( iOffset>=pWal->szFirstBlock + || iOffset+iAmtszFirstBlock + || pWal->syncFlags==0 + ){ + /* The common and fast case. Just write the data. */ + rc = sqlite3OsWrite(pWal->pWalFd, pContent, iAmt, iOffset); + }else{ + /* If this write will cross the first sector boundary, it has to + ** be split it two with a sync in between. */ + int iFirstAmt = pWal->szFirstBlock - iOffset; + assert( iFirstAmt>0 && iFirstAmtpWalFd, pContent, iFirstAmt, iOffset); + if( rc ) return rc; + assert( pWal->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); + rc = sqlite3OsSync(pWal->pWalFd, pWal->syncFlags); + if( rc ) return rc; + pContent = (void*)(iFirstAmt + (char*)pContent); + rc = sqlite3OsWrite(pWal->pWalFd, pContent, + iAmt-iFirstAmt, iOffset+iFirstAmt); + } + return rc; +} + /* ** Write a set of frames to the log. The caller must hold the write-lock ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). @@ -2686,6 +2738,16 @@ int sqlite3WalFrames( } assert( (int)pWal->szPage==szPage ); + /* Setup information needed to do the WAL header sync */ + if( pWal->noSyncHeader ){ + assert( pWal->szFirstBlock==0 ); + assert( pWal->syncFlags==0 ); + }else{ + pWal->szFirstBlock = sqlite3OsSectorSize(pWal->pWalFd); + if( szPage>pWal->szFirstBlock ) pWal->szFirstBlock = szPage; + pWal->syncFlags = sync_flags & SQLITE_SYNC_MASK; + } + /* Write the log file. */ for(p=pList; p; p=p->pDirty){ u32 nDbsize; /* Db-size field for frame header */ @@ -2703,13 +2765,13 @@ int sqlite3WalFrames( pData = p->pData; #endif walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame); - rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); + rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } /* Write the page data */ - rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame)); + rc = walWriteToLog(pWal, pData, szPage, iOffset+sizeof(aFrame)); if( rc!=SQLITE_OK ){ return rc; } @@ -2717,11 +2779,10 @@ int sqlite3WalFrames( } /* Sync the log file if the 'isSync' flag was specified. */ - if( sync_flags ){ + if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd); i64 iOffset = walFrameOffset(iFrame+1, szPage); - assert( isCommit ); assert( iSegment>0 ); iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); @@ -2734,12 +2795,12 @@ int sqlite3WalFrames( #endif walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame); /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); + rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } iOffset += WAL_FRAME_HDRSIZE; - rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); + rc = walWriteToLog(pWal, pData, szPage, iOffset); if( rc!=SQLITE_OK ){ return rc; } @@ -2747,7 +2808,7 @@ int sqlite3WalFrames( iOffset += szPage; } - rc = sqlite3OsSync(pWal->pWalFd, sync_flags); + rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); } if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ diff --git a/src/wal.h b/src/wal.h index a62b23bbdc..7cd84be997 100644 --- a/src/wal.h +++ b/src/wal.h @@ -87,6 +87,12 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData); /* Write a frame or frames to the log. */ int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); +/* Additional values that can be added to the sync_flags argument of +** sqlite3WalFrames(): +*/ +#define WAL_SYNC_TRANSACTIONS 0x20 /* Sync at the end of each transaction */ +#define SQLITE_SYNC_MASK 0x13 /* Mask off the SQLITE_SYNC_* values */ + /* Copy pages from the log to the database file */ int sqlite3WalCheckpoint( Wal *pWal, /* Write-ahead log connection */ diff --git a/test/wal2.test b/test/wal2.test index f4887065a5..90ea9defed 100644 --- a/test/wal2.test +++ b/test/wal2.test @@ -1176,9 +1176,9 @@ if {$::tcl_platform(platform) == "unix"} { # Test that "PRAGMA checkpoint_fullsync" appears to be working. # foreach {tn sql reslist} { - 1 { } {8 0 3 0 5 0} - 2 { PRAGMA checkpoint_fullfsync = 1 } {8 4 3 2 5 2} - 3 { PRAGMA checkpoint_fullfsync = 0 } {8 0 3 0 5 0} + 1 { } {10 0 4 0 6 0} + 2 { PRAGMA checkpoint_fullfsync = 1 } {10 4 4 2 6 2} + 3 { PRAGMA checkpoint_fullfsync = 0 } {10 0 4 0 6 0} } { faultsim_delete_and_reopen @@ -1192,12 +1192,12 @@ foreach {tn sql reslist} { do_execsql_test wal2-14.$tn.2 { PRAGMA wal_autocheckpoint = 10; CREATE TABLE t1(a, b); -- 2 wal syncs - INSERT INTO t1 VALUES(1, 2); -- 1 wal sync + INSERT INTO t1 VALUES(1, 2); -- 2 wal sync PRAGMA wal_checkpoint; -- 1 wal sync, 1 db sync BEGIN; INSERT INTO t1 VALUES(3, 4); INSERT INTO t1 VALUES(5, 6); - COMMIT; -- 1 wal sync + COMMIT; -- 2 wal sync PRAGMA wal_checkpoint; -- 1 wal sync, 1 db sync } {10 0 5 5 0 2 2} @@ -1233,22 +1233,22 @@ catch { db close } # PRAGMA fullfsync # PRAGMA synchronous # -foreach {tn settings commit_sync ckpt_sync} { - 1 {0 0 off} {0 0} {0 0} - 2 {0 0 normal} {0 0} {2 0} - 3 {0 0 full} {1 0} {2 0} +foreach {tn settings restart_sync commit_sync ckpt_sync} { + 1 {0 0 off} {0 0} {0 0} {0 0} + 2 {0 0 normal} {1 0} {0 0} {2 0} + 3 {0 0 full} {2 0} {1 0} {2 0} - 4 {0 1 off} {0 0} {0 0} - 5 {0 1 normal} {0 0} {0 2} - 6 {0 1 full} {0 1} {0 2} + 4 {0 1 off} {0 0} {0 0} {0 0} + 5 {0 1 normal} {0 1} {0 0} {0 2} + 6 {0 1 full} {0 2} {0 1} {0 2} - 7 {1 0 off} {0 0} {0 0} - 8 {1 0 normal} {0 0} {0 2} - 9 {1 0 full} {1 0} {0 2} + 7 {1 0 off} {0 0} {0 0} {0 0} + 8 {1 0 normal} {1 0} {0 0} {0 2} + 9 {1 0 full} {2 0} {1 0} {0 2} - 10 {1 1 off} {0 0} {0 0} - 11 {1 1 normal} {0 0} {0 2} - 12 {1 1 full} {0 1} {0 2} + 10 {1 1 off} {0 0} {0 0} {0 0} + 11 {1 1 normal} {0 1} {0 0} {0 2} + 12 {1 1 full} {0 2} {0 1} {0 2} } { forcedelete test.db @@ -1262,27 +1262,35 @@ foreach {tn settings commit_sync ckpt_sync} { sqlite3 db test.db do_execsql_test 15.$tn.1 " CREATE TABLE t1(x); + PRAGMA wal_autocheckpoint = OFF; PRAGMA journal_mode = WAL; PRAGMA checkpoint_fullfsync = [lindex $settings 0]; PRAGMA fullfsync = [lindex $settings 1]; PRAGMA synchronous = [lindex $settings 2]; - " {wal} + " {0 wal} do_test 15.$tn.2 { set sync(normal) 0 set sync(full) 0 execsql { INSERT INTO t1 VALUES('abc') } list $::sync(normal) $::sync(full) - } $commit_sync + } $restart_sync do_test 15.$tn.3 { + set sync(normal) 0 + set sync(full) 0 + execsql { INSERT INTO t1 VALUES('abc') } + list $::sync(normal) $::sync(full) + } $commit_sync + + do_test 15.$tn.4 { set sync(normal) 0 set sync(full) 0 execsql { INSERT INTO t1 VALUES('def') } list $::sync(normal) $::sync(full) } $commit_sync - do_test 15.$tn.4 { + do_test 15.$tn.5 { set sync(normal) 0 set sync(full) 0 execsql { PRAGMA wal_checkpoint } diff --git a/test/wal3.test b/test/wal3.test index ea5e70571b..82f7d72a97 100644 --- a/test/wal3.test +++ b/test/wal3.test @@ -217,6 +217,7 @@ foreach {tn syncmode synccount} { execsql "PRAGMA synchronous = $syncmode" execsql { PRAGMA journal_mode = WAL } + execsql { CREATE TABLE filler(a,b,c); } set ::syncs [list] T filter xSync