diff --git a/manifest b/manifest index 2f08ae4c31..1aebed56f7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sinterfaces\ssqlite3_uri_boolean()\sand\ssqlite3_uri_int64()\swhich\sare\nwrappers\saround\ssqlite3_uri_parameter()\scombined\swith\sinternal\sroutines\sfor\nconverting\sstrings\sto\sbooleans\sand\s64-bit\sintegers. -D 2011-12-23T00:07:33.075 +C Merge\sthe\sPOWERSAFE_OVERWRITE\sfeatures\sand\sthe\suse\sof\sstatvfs()\sfrom\sthe\nstatvfs\sbranch\sinto\strunk. +D 2011-12-23T02:07:10.640 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5b4a3e12a850b021547e43daf886b25133b44c07 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -166,9 +166,9 @@ F src/os.c 28bbdab2170dfce84d86c45456a18eab1d0f99a9 F src/os.h 549b1a2e5e0ed1e1499f252dac126c4973e7379c F src/os_common.h 92815ed65f805560b66166e3583470ff94478f04 F src/os_os2.c 4a75888ba3dfc820ad5e8177025972d74d7f2440 -F src/os_unix.c ee4ea557de196798bafba1ac69f76a755035ebbb -F src/os_win.c 197d23ce8a0dff748e766e034bf95ff756dd3884 -F src/pager.c 523c64f6ca707e820d5cf10ed8371238ecac8333 +F src/os_unix.c 3a441671f35569df4b72641e928fdb1ab5cd8576 +F src/os_win.c 569fe7448e781bfb8116aa79081df0eadf576fc6 +F src/pager.c d03fb1de7bd724f0cdcae0aab0a733d89c94ac2f F src/pager.h 5cd760857707529b403837d813d86b68938d6183 F src/parse.y fabb2e7047417d840e6fdb3ef0988a86849a08ba F src/pcache.c 1fdd77978c1525d1ca4b9ef48eb80abca710cb4c @@ -182,19 +182,19 @@ F src/resolve.c 3d3e80a98f203ac6b9329e9621e29eda85ddfd40 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 F src/select.c a1d075db66a0ea42807353501b62997969e5be79 F src/shell.c aa4183d4a5243d8110b1d3d77faa4aea7e9c9c2d -F src/sqlite.h.in db834f87316a1422051a97cba5beac31c0cb1b2d +F src/sqlite.h.in 3d1a77e27b9d9979fb7ec84834a8423602d29dc5 F src/sqlite3ext.h 6904f4aadf976f95241311fbffb00823075d9477 -F src/sqliteInt.h 165409fa8adc8701148830804febeded3f2e4448 +F src/sqliteInt.h b8fdd9c39c8d7f5c794f4ea917293d9c75b9aff2 F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 4568e72dfd36b6a5911f93457364deb072e0b03a F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e -F src/tclsqlite.c de581e2e71f5e7f98366156afad83b4742ac6fe0 -F src/test1.c 33b9c49f728cf28937eea7b246c28e89e3cf5e9e +F src/tclsqlite.c bd86070f52ae3f77a2e6b3b065ff03adb9140bfa +F src/test1.c 1b1e514e85ffe7152b02cba38bd0a1ce8cd56113 F src/test2.c 80d323d11e909cf0eb1b6fbb4ac22276483bcf31 F src/test3.c 124ff9735fb6bb7d41de180d6bac90e7b1509432 F src/test4.c d1e5a5e904d4b444cf572391fdcb017638e36ff7 F src/test5.c e1a19845625144caf038031234a12185e40d315c -F src/test6.c 3191b4ab964a144230ff9ef96c093520375c7b2a +F src/test6.c cf6ab27a59e1ab64b011bb251ba600131e803e59 F src/test7.c 2e0781754905c8adc3268d8f0967e7633af58843 F src/test8.c 99f70341d6ec480313775127f4cd14b4a47db557 F src/test9.c bea1e8cf52aa93695487badedd6e1886c321ea60 @@ -230,7 +230,7 @@ F src/test_superlock.c 2b97936ca127d13962c3605dbc9a4ef269c424cd F src/test_syscall.c a992d8c80ea91fbf21fb2dd570db40e77dd7e6ae F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa F src/test_thread.c 35022393dd54d147b998b6b7f7e945b01114d666 -F src/test_vfs.c 27b7d9de40630f603b9e2cf9ef2a7c81d31c4515 +F src/test_vfs.c 07157a0bbfe161cb5e32cad2079abd26cd611c4b F src/test_vfstrace.c 065c7270a614254b2c68fbc7ba8d1fb1d5cbc823 F src/test_wholenumber.c 6129adfbe7c7444f2e60cc785927f3aa74e12290 F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 @@ -250,7 +250,7 @@ F src/vdbemem.c 2fc78b3e0fabcc1eaa23cd79dd2e30e6dcfe1e56 F src/vdbesort.c 468d43c057063e54da4f1988b38b4f46d60e7790 F src/vdbetrace.c d6e50e04e1ec498150e519058f617d91b8f5c843 F src/vtab.c e9318d88feac85be8e27ee783ac8f5397933fc8a -F src/wal.c 5525f049dffd47ee860bf4ffbb8da4ebef78637d +F src/wal.c 66e2afeec77933f80c8098cfb3c0b29bc875de0f F src/wal.h 42f8313f7aaf8913e2d1fdf7b47025c23491ea1d F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f F src/where.c af623942514571895818b9b7ae11db95ae3b3d88 @@ -515,7 +515,7 @@ F test/incrblob3.test aedbb35ea1b6450c33b98f2b6ed98e5020be8dc7 F test/incrblob_err.test d2562d2771ebffd4b3af89ef64c140dd44371597 F test/incrblobfault.test 917c0292224c64a56ef7215fd633a3a82f805be0 F test/incrvacuum.test d2a6ddf5e429720b5fe502766af747915ccf6c32 -F test/incrvacuum2.test 62fbeb85459fe4e501684d8fb5b6e98a23e3b0c0 +F test/incrvacuum2.test 379eeb8740b0ef60c372c439ad4cbea20b34bb9b F test/incrvacuum_ioerr.test 22f208d01c528403240e05beecc41dc98ed01637 F test/index.test b5429732b3b983fa810e3ac867d7ca85dae35097 F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6 @@ -545,7 +545,7 @@ F test/join4.test 1a352e4e267114444c29266ce79e941af5885916 F test/join5.test 86675fc2919269aa923c84dd00ee4249b97990fe F test/join6.test bf82cf3f979e9eade83ad0d056a66c5ed71d1901 F test/journal1.test 8b71ef1ed5798bdc0e6eb616d8694e2c2c188d4d -F test/journal2.test 29937bdbb253bbfd92057610120bdc0aa7e84a0a +F test/journal2.test ae06f566c28552c313ded3fee79a6c69e6d049b1 F test/journal3.test 6fd28532c88b447db844186bc190523108b6dbb4 F test/jrnlmode.test 9ee3a78f53d52cca737db69293d15dc41c0cbd36 F test/jrnlmode2.test 81610545a4e6ed239ea8fa661891893385e23a1d @@ -620,7 +620,7 @@ F test/notnull.test cc7c78340328e6112a13c3e311a9ab3127114347 F test/null.test a8b09b8ed87852742343b33441a9240022108993 F test/openv2.test 0d3040974bf402e19b7df4b783e447289d7ab394 F test/oserror.test 50417780d0e0d7cd23cf12a8277bb44024765df3 -F test/pager1.test b936e80553652002fc47d89bfde7018801d8257d +F test/pager1.test 9e9f5f1c6d4df4831dbff213b1262ef94bf72118 F test/pager2.test 745b911dde3d1f24ae0870bd433dfa83d7c658c1 F test/pager3.test 3856d9c80839be0668efee1b74811b1b7f7fc95f F test/pagerfault.test 452f2cc23e3bfcfa935f4442aec1da4fe1dc0442 @@ -704,9 +704,9 @@ F test/subquery.test b524f57c9574b2c0347045b4510ef795d4686796 F test/subquery2.test edcad5c118f0531c2e21bf16a09bbb105252d4cd F test/subselect.test d24fd8757daf97dafd2e889c73ea4c4272dcf4e4 F test/substr.test 18f57c4ca8a598805c4d64e304c418734d843c1a -F test/superlock.test 7b1167925e9d30a5d1f0701d24812fdda42c3a86 +F test/superlock.test 1cde669f68d2dd37d6c9bd35eee1d95491ae3fc2 F test/sync.test a34cd43e98b7fb84eabbf38f7ed8f7349b3f3d85 -F test/syscall.test 2a922050dbee032f587249b070fb42692f5e1e22 +F test/syscall.test 265cda616f56a297406728ee1e74c9b4a93aa6dd F test/sysfault.test c79441d88d23696fbec7b147dba98d42a04f523f F test/table.test a59d985ca366e39b17b175f387f9d5db5a18d4e2 F test/tableapi.test 2674633fa95d80da917571ebdd759a14d9819126 @@ -872,7 +872,7 @@ F test/types.test bf816ce73c7dfcfe26b700c19f97ef4050d194ff F test/types2.test 3555aacf8ed8dc883356e59efc314707e6247a84 F test/types3.test 99e009491a54f4dc02c06bdbc0c5eea56ae3e25a F test/unique.test 083c7fff74695bcc27a71d75699deba3595bc9c2 -F test/unixexcl.test 892937c53d0c16e76631674e38a0fce052ae5e9c +F test/unixexcl.test c82934b3fda907573c7dfccc4f4c9506e5537f36 F test/unordered.test f53095cee37851bf30130fa1bf299a8845e837bb F test/update.test 8bc86fd7ef1a00014f76dc6a6a7c974df4aef172 F test/uri.test 0d289d32396bdbc491e9dc845f1a52e13f861e0b @@ -902,11 +902,11 @@ F test/vtabF.test fd5ad376f5a34fe0891df1f3cddb4fe7c3eb077e F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d -F test/wal.test c743be787e60c1242fa6cdf73b410e64b2977e25 -F test/wal2.test 29e2cbe840582fc6efd0487b4f6337caed4b3e80 -F test/wal3.test 29a6e8843e5f5fd13f33cb0407d2923107020d32 +F test/wal.test edefe316b4125d7f68004ea53c5e73c398d436cc +F test/wal2.test f11883dd3cb7f647c5d2acfd7b5c6d4ba5770cc9 +F test/wal3.test 6504bbf348b2d6dfade64a064f1050fd617e8706 F test/wal4.test 4744e155cd6299c6bd99d3eab1c82f77db9cdb3c -F test/wal5.test 1bbfaa316dc2a1d0d1fac3f4500c38a90055a41b +F test/wal5.test f58ed4b8b542f71c7441da12fbd769d99b362437 F test/wal6.test 2e3bc767d9c2ce35c47106148d43fcbd072a93b3 F test/wal7.test 2ae8f427d240099cc4b2dfef63cff44e2a68a1bd F test/wal_common.tcl a98f17fba96206122eff624db0ab13ec377be4fe @@ -941,6 +941,7 @@ F test/whereC.test 13ff5ec0dba407c0e0c075980c75b3275a6774e5 F test/wherelimit.test 5e9fd41e79bb2b2d588ed999d641d9c965619b31 F test/win32lock.test b2a539e85ae6b2d78475e016a9636b4451dc7fb9 F test/zeroblob.test caaecfb4f908f7bc086ed238668049f96774d688 +F test/zerodamage.test 7ef0680559163118705975be132933898d349674 F tool/build-shell.sh 12aa4391073a777fcb6dcc490b219a018ae98bac F tool/diffdb.c 7524b1b5df217c20cd0431f6789851a4e0cb191b F tool/extract.c 054069d81b095fbdc189a6f5d4466e40380505e2 @@ -985,7 +986,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P 1c27d842163e27c39bbe9409f50657b9de9ade6e -R 712ad6ea662046687d3fa2e6906d477a +P 83d26b9a9115eadac9e59a33d608bca0ab2519e3 6191c5e45175f5c6040e891843b0725a929d6dd7 +R 9e1524ff0804966ed866a28455ef991b U drh -Z dfef488f9835a38d5955098b906ef223 +Z 88029f358469c654bea2978d031e29f2 diff --git a/manifest.uuid b/manifest.uuid index 0daedf9dab..18ed65b934 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -83d26b9a9115eadac9e59a33d608bca0ab2519e3 \ No newline at end of file +2370d70eb51d2259aaa8073d861ab79d6637cbd9 \ No newline at end of file diff --git a/src/os_unix.c b/src/os_unix.c index 5f26017b02..adf11541ff 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -122,6 +122,10 @@ #ifndef SQLITE_OMIT_WAL #include #endif +#ifndef MISSING_STATVFS +#include +#endif + #if SQLITE_ENABLE_LOCKING_STYLE # include @@ -211,6 +215,7 @@ struct unixFile { int h; /* The file descriptor */ unsigned char eFileLock; /* The type of lock held on this fd */ unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ + unsigned char szSector; /* Sectorsize/512 */ int lastErrno; /* The unix errno from last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ @@ -258,6 +263,7 @@ struct unixFile { #else # define UNIXFILE_DIRSYNC 0x00 #endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ /* ** Include code that is common to all os_*.c files @@ -414,6 +420,14 @@ static struct unix_syscall { { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) +#if defined(MISSING_STATVFS) + { "statvfs", (sqlite3_syscall_ptr)0, 0 }, +#define osStatvfs ((int(*)(const char*,void*))aSyscall[20].pCurrent) +#else + { "statvfs", (sqlite3_syscall_ptr)statvfs, 0 }, +#define osStatvfs ((int(*)(const char*,struct statvfs*))aSyscall[20].pCurrent) +#endif + }; /* End of the overrideable system calls */ /* @@ -3498,6 +3512,22 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ return SQLITE_OK; } +/* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + /* ** Information and control of an open file handle. */ @@ -3524,14 +3554,11 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return rc; } case SQLITE_FCNTL_PERSIST_WAL: { - int bPersist = *(int*)pArg; - if( bPersist<0 ){ - *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0; - }else if( bPersist==0 ){ - pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL; - }else{ - pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL; - } + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { @@ -3572,17 +3599,46 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ ** a database and its journal file) that the sector size will be the ** same for both. */ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return SQLITE_DEFAULT_SECTOR_SIZE; +static int unixSectorSize(sqlite3_file *pFile){ + unixFile *p = (unixFile*)pFile; + if( p->szSector==0 ){ +#ifdef MISSING_STATVFS + p->szSector = SQLITE_DEFAULT_SECTOR_SIZE/512; +#else + struct statvfs x; + int sz; + memset(&x, 0, sizeof(x)); + osStatvfs(p->zPath, &x); + sz = (int)x.f_frsize; + if( sz<512 || sz>65536 || (sz&(sz-1))!=0 ){ + sz = SQLITE_DEFAULT_SECTOR_SIZE; + } + p->szSector = sz/512; +#endif + } + return p->szSector*512; } /* -** Return the device characteristics for the file. This is always 0 for unix. +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is contraversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. */ -static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return 0; +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *p = (unixFile*)id; + if( p->ctrlFlags & UNIXFILE_PSOW ){ + return SQLITE_IOCAP_POWERSAFE_OVERWRITE; + }else{ + return 0; + } } #ifndef SQLITE_OMIT_WAL @@ -4565,10 +4621,12 @@ static int fillInUnixFile( pNew->h = h; pNew->pVfs = pVfs; pNew->zPath = zFilename; + pNew->ctrlFlags = 0; + if( sqlite3_uri_boolean(zFilename, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; + } if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ - pNew->ctrlFlags = UNIXFILE_EXCL; - }else{ - pNew->ctrlFlags = 0; + pNew->ctrlFlags |= UNIXFILE_EXCL; } if( isReadOnly ){ pNew->ctrlFlags |= UNIXFILE_RDONLY; @@ -6775,7 +6833,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==20 ); + assert( ArraySize(aSyscall)==21 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ diff --git a/src/os_win.c b/src/os_win.c index ab70eebbf7..18fdd93933 100644 --- a/src/os_win.c +++ b/src/os_win.c @@ -59,7 +59,7 @@ struct winFile { HANDLE h; /* Handle for accessing the file */ u8 locktype; /* Type of lock currently held on this file */ short sharedLockByte; /* Randomly chosen byte used as a shared lock */ - u8 bPersistWal; /* True to persist WAL files */ + u8 ctrlFlags; /* Flags. See WINFILE_* below */ DWORD lastErrno; /* The Windows errno from the last I/O error */ DWORD sectorSize; /* Sector size of the device file is on */ winShm *pShm; /* Instance of shared memory on this file */ @@ -74,6 +74,12 @@ struct winFile { #endif }; +/* +** Allowed values for winFile.ctrlFlags +*/ +#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ + /* * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the * various Win32 API heap functions instead of our own. @@ -2125,6 +2131,22 @@ static int winUnlock(sqlite3_file *id, int locktype){ return rc; } +/* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void winModeBit(winFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + /* ** Control and query of the open file handle. */ @@ -2160,12 +2182,11 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } case SQLITE_FCNTL_PERSIST_WAL: { - int bPersist = *(int*)pArg; - if( bPersist<0 ){ - *(int*)pArg = pFile->bPersistWal; - }else{ - pFile->bPersistWal = bPersist!=0; - } + winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + winModeBit(pFile, WINFILE_PSOW, (int*)pArg); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { @@ -2212,8 +2233,9 @@ static int winSectorSize(sqlite3_file *id){ ** Return a vector of device characteristics. */ static int winDeviceCharacteristics(sqlite3_file *id){ - UNUSED_PARAMETER(id); - return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN; + winFile *p = (winFile*)id; + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | + ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } #ifndef SQLITE_OMIT_WAL @@ -3178,6 +3200,9 @@ static int winOpen( pFile->pVfs = pVfs; pFile->pShm = 0; pFile->zPath = zName; + if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pFile->ctrlFlags |= WINFILE_PSOW; + } pFile->sectorSize = getSectorSize(pVfs, zUtf8Name); #if SQLITE_OS_WINCE diff --git a/src/pager.c b/src/pager.c index 4f8473e735..892c245b44 100644 --- a/src/pager.c +++ b/src/pager.c @@ -2515,23 +2515,36 @@ static int pager_truncate(Pager *pPager, Pgno nPage){ ** the value returned by the xSectorSize() method rounded up to 32 if ** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it ** is greater than MAX_SECTOR_SIZE. +** +** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set +** the effective sector size to its minimum value (512). The purpose of +** pPager->sectorSize is to define the "blast radius" of bytes that +** might change if a crash occurs while writing to a single byte in +** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero +** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector +** size. For backwards compatibility of the rollback journal file format, +** we cannot reduce the effective sector size below 512. */ static void setSectorSize(Pager *pPager){ assert( isOpen(pPager->fd) || pPager->tempFile ); - if( !pPager->tempFile ){ + if( pPager->tempFile + || (sqlite3OsDeviceCharacteristics(pPager->fd) & + SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0 + ){ /* Sector size doesn't matter for temporary files. Also, the file ** may not have been opened yet, in which case the OsSectorSize() - ** call will segfault. - */ - pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); - } - if( pPager->sectorSize<32 ){ + ** call will segfault. */ pPager->sectorSize = 512; - } - if( pPager->sectorSize>MAX_SECTOR_SIZE ){ - assert( MAX_SECTOR_SIZE>=512 ); - pPager->sectorSize = MAX_SECTOR_SIZE; + }else{ + pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); + if( pPager->sectorSize<32 ){ + pPager->sectorSize = 512; + } + if( pPager->sectorSize>MAX_SECTOR_SIZE ){ + assert( MAX_SECTOR_SIZE>=512 ); + pPager->sectorSize = MAX_SECTOR_SIZE; + } } } diff --git a/src/sqlite.h.in b/src/sqlite.h.in index ca0d7e3208..85ebd67a9f 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -504,7 +504,14 @@ int sqlite3_exec( ** first then the size of the file is extended, never the other ** way around. The SQLITE_IOCAP_SEQUENTIAL property means that ** information is written to disk in the same order as calls -** to xWrite(). +** to xWrite(). The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that +** after reboot following a crash or power loss, the value of +** each byte in a file is a value that was actually written +** into that byte at some point. In other words, a crash will +** not cause unwritten bytes of the file to change nor introduce +** randomness into a file nor zero out parts of the file, and any byte of +** a file that are never written will not change values due to +** writes to nearby bytes. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -518,6 +525,7 @@ int sqlite3_exec( #define SQLITE_IOCAP_SAFE_APPEND 0x00000200 #define SQLITE_IOCAP_SEQUENTIAL 0x00000400 #define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 +#define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 /* ** CAPI3REF: File Locking Levels @@ -767,6 +775,15 @@ struct sqlite3_io_methods { ** WAL mode. If the integer is -1, then it is overwritten with the current ** WAL persistence setting. ** +** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the +** persistent "powersafe-overwrite" or "PSOW" setting. The PSOW setting +** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the +** xDeviceCharacteristics methods. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage +** mode. If the integer is -1, then it is overwritten with the current +** zero-damage mode setting. +** ** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening ** a write transaction to indicate that, unless it is rolled back for some ** reason, the entire database file will be overwritten by the current @@ -783,18 +800,19 @@ struct sqlite3_io_methods { ** pointer in case this file-control is not implemented. This file-control ** is intended for diagnostic use only. */ -#define SQLITE_FCNTL_LOCKSTATE 1 -#define SQLITE_GET_LOCKPROXYFILE 2 -#define SQLITE_SET_LOCKPROXYFILE 3 -#define SQLITE_LAST_ERRNO 4 -#define SQLITE_FCNTL_SIZE_HINT 5 -#define SQLITE_FCNTL_CHUNK_SIZE 6 -#define SQLITE_FCNTL_FILE_POINTER 7 -#define SQLITE_FCNTL_SYNC_OMITTED 8 -#define SQLITE_FCNTL_WIN32_AV_RETRY 9 -#define SQLITE_FCNTL_PERSIST_WAL 10 -#define SQLITE_FCNTL_OVERWRITE 11 -#define SQLITE_FCNTL_VFSNAME 12 +#define SQLITE_FCNTL_LOCKSTATE 1 +#define SQLITE_GET_LOCKPROXYFILE 2 +#define SQLITE_SET_LOCKPROXYFILE 3 +#define SQLITE_LAST_ERRNO 4 +#define SQLITE_FCNTL_SIZE_HINT 5 +#define SQLITE_FCNTL_CHUNK_SIZE 6 +#define SQLITE_FCNTL_FILE_POINTER 7 +#define SQLITE_FCNTL_SYNC_OMITTED 8 +#define SQLITE_FCNTL_WIN32_AV_RETRY 9 +#define SQLITE_FCNTL_PERSIST_WAL 10 +#define SQLITE_FCNTL_OVERWRITE 11 +#define SQLITE_FCNTL_VFSNAME 12 +#define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 /* ** CAPI3REF: Mutex Handle diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 8613d865aa..6e48b1c4f2 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -125,6 +125,14 @@ #endif #endif +/* +** Powersafe overwrite is on by default. But can be turned off using +** the -DSQLITE_POWERSAFE_OVERWRITE=0 command-line option. +*/ +#ifndef SQLITE_POWERSAFE_OVERWRITE +# define SQLITE_POWERSAFE_OVERWRITE 1 +#endif + /* ** The SQLITE_DEFAULT_MEMSTATUS macro must be defined as either 0 or 1. ** It determines whether or not the features related to diff --git a/src/tclsqlite.c b/src/tclsqlite.c index c8f0fbd31b..3692bef9ca 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -3001,6 +3001,14 @@ static int DbMain(void *cd, Tcl_Interp *interp, int objc,Tcl_Obj *const*objv){ }else{ flags &= ~SQLITE_OPEN_FULLMUTEX; } + }else if( strcmp(zArg, "-uri")==0 ){ + int b; + if( Tcl_GetBooleanFromObj(interp, objv[i+1], &b) ) return TCL_ERROR; + if( b ){ + flags |= SQLITE_OPEN_URI; + }else{ + flags &= ~SQLITE_OPEN_URI; + } }else{ Tcl_AppendResult(interp, "unknown option: ", zArg, (char*)0); return TCL_ERROR; diff --git a/src/test1.c b/src/test1.c index 798366a8c7..9cc5d6347c 100644 --- a/src/test1.c +++ b/src/test1.c @@ -5235,6 +5235,38 @@ static int file_control_persist_wal( return TCL_OK; } +/* +** tclcmd: file_control_powersafe_overwrite DB PSOW-FLAG +** +** This TCL command runs the sqlite3_file_control interface with +** the SQLITE_FCNTL_POWERSAFE_OVERWRITE opcode. +*/ +static int file_control_powersafe_overwrite( + ClientData clientData, /* Pointer to sqlite3_enable_XXX function */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + sqlite3 *db; + int rc; + int b; + char z[100]; + + if( objc!=3 ){ + Tcl_AppendResult(interp, "wrong # args: should be \"", + Tcl_GetStringFromObj(objv[0], 0), " DB FLAG", 0); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[2], &b) ) return TCL_ERROR; + rc = sqlite3_file_control(db,NULL,SQLITE_FCNTL_POWERSAFE_OVERWRITE,(void*)&b); + sqlite3_snprintf(sizeof(z), z, "%d %d", rc, b); + Tcl_AppendResult(interp, z, (char*)0); + return TCL_OK; +} + /* ** tclcmd: file_control_vfsname DB ?AUXDB? @@ -6093,6 +6125,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "file_control_sizehint_test", file_control_sizehint_test, 0 }, { "file_control_win32_av_retry", file_control_win32_av_retry, 0 }, { "file_control_persist_wal", file_control_persist_wal, 0 }, + { "file_control_powersafe_overwrite",file_control_powersafe_overwrite,0}, { "file_control_vfsname", file_control_vfsname, 0 }, { "sqlite3_vfs_list", vfs_list, 0 }, { "sqlite3_create_function_v2", test_create_function_v2, 0 }, diff --git a/src/test6.c b/src/test6.c index 23fb14c5b3..89f4a090b0 100644 --- a/src/test6.c +++ b/src/test6.c @@ -705,17 +705,18 @@ static int processDevSymArgs( char *zName; int iValue; } aFlag[] = { - { "atomic", SQLITE_IOCAP_ATOMIC }, - { "atomic512", SQLITE_IOCAP_ATOMIC512 }, - { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, - { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, - { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, - { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, - { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, - { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, - { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, - { "sequential", SQLITE_IOCAP_SEQUENTIAL }, - { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, + { "atomic", SQLITE_IOCAP_ATOMIC }, + { "atomic512", SQLITE_IOCAP_ATOMIC512 }, + { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, + { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, + { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, + { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, + { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, + { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, + { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, + { "sequential", SQLITE_IOCAP_SEQUENTIAL }, + { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, + { "powersafe_overwrite", SQLITE_IOCAP_POWERSAFE_OVERWRITE }, { 0, 0 } }; diff --git a/src/test_vfs.c b/src/test_vfs.c index a59aa40a48..a79407b57f 100644 --- a/src/test_vfs.c +++ b/src/test_vfs.c @@ -1162,18 +1162,19 @@ static int testvfs_obj_cmd( int iValue; } aFlag[] = { { "default", -1 }, - { "atomic", SQLITE_IOCAP_ATOMIC }, - { "atomic512", SQLITE_IOCAP_ATOMIC512 }, - { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, - { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, - { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, - { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, - { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, - { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, - { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, - { "sequential", SQLITE_IOCAP_SEQUENTIAL }, - { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, + { "atomic", SQLITE_IOCAP_ATOMIC }, + { "atomic512", SQLITE_IOCAP_ATOMIC512 }, + { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, + { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, + { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, + { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, + { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, + { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, + { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, + { "sequential", SQLITE_IOCAP_SEQUENTIAL }, + { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, { "undeletable_when_open", SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN }, + { "powersafe_overwrite", SQLITE_IOCAP_POWERSAFE_OVERWRITE }, { 0, 0 } }; Tcl_Obj *pRet; @@ -1207,7 +1208,7 @@ static int testvfs_obj_cmd( iNew |= aFlag[idx].iValue; } - p->iDevchar = iNew; + p->iDevchar = iNew| 0x10000000; } pRet = Tcl_NewObj(); diff --git a/src/wal.c b/src/wal.c index fa44b66d32..657d59d527 100644 --- a/src/wal.c +++ b/src/wal.c @@ -424,7 +424,8 @@ struct Wal { u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ - u8 noSyncHeader; /* Avoid WAL header fsyncs if true */ + u8 syncHeader; /* Fsync the WAL header if true */ + u8 padToSectorBoundary; /* Pad transactions out to the next sector */ WalIndexHdr hdr; /* Wal-index header for current transaction */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ @@ -1153,7 +1154,6 @@ static int walIndexRecover(Wal *pWal){ /* Read all frames from the log file. */ iFrame = 0; - isValid = 1; for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ @@ -1162,15 +1162,8 @@ static int walIndexRecover(Wal *pWal){ iFrame++; rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); if( rc!=SQLITE_OK ) break; - if( sqlite3Get4byte(&aFrame[8]) == - 1+sqlite3Get4byte((u8*)&pWal->hdr.aSalt[0]) ){ - pWal->hdr.mxFrame = 0; - pWal->hdr.nPage = 0; - break; - } - if( !isValid ) continue; isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); - if( !isValid ) continue; + if( !isValid ) break; rc = walIndexAppend(pWal, iFrame, pgno); if( rc!=SQLITE_OK ) break; @@ -1294,6 +1287,8 @@ int sqlite3WalOpen( pRet->readLock = -1; pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; + pRet->syncHeader = 1; + pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); /* Open file handle on the write-ahead log file. */ @@ -1309,7 +1304,10 @@ int sqlite3WalOpen( sqlite3_free(pRet); }else{ int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd); - if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; } + if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } + if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ + pRet->padToSectorBoundary = 0; + } *ppWal = pRet; WALTRACE(("WAL%d: opened\n", pRet)); } @@ -2631,41 +2629,71 @@ static int walRestartLog(Wal *pWal){ return rc; } +/* +** Information about the current state of the WAL file and where +** the next fsync should occur - passed from sqlite3WalFrames() into +** walWriteToLog(). +*/ +typedef struct WalWriter { + Wal *pWal; /* The complete WAL information */ + sqlite3_file *pFd; /* The WAL file to which we write */ + sqlite3_int64 iSyncPoint; /* Fsync at this offset */ + int syncFlags; /* Flags for the fsync */ + int szPage; /* Size of one page */ +} WalWriter; + /* ** Write iAmt bytes of content into the WAL file beginning at iOffset. +** Do a sync when crossing the p->iSyncPoint boundary. ** -** When crossing the boundary between the first and second sectors of the -** file, first write all of the first sector content, then fsync(), then -** continue writing content for the second sector. This ensures that -** the WAL header is overwritten before the first commit mark. +** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, +** first write the part before iSyncPoint, then sync, then write the +** rest. */ static int walWriteToLog( - Wal *pWal, /* WAL to write to */ + WalWriter *p, /* WAL to write to */ void *pContent, /* Content to be written */ int iAmt, /* Number of bytes to write */ sqlite3_int64 iOffset /* Start writing at this offset */ ){ int rc; - if( iOffset>=pWal->szFirstBlock - || iOffset+iAmtszFirstBlock - || pWal->syncFlags==0 - ){ - /* The common and fast case. Just write the data. */ - rc = sqlite3OsWrite(pWal->pWalFd, pContent, iAmt, iOffset); - }else{ - /* If this write will cross the first sector boundary, it has to - ** be split it two with a sync in between. */ - int iFirstAmt = pWal->szFirstBlock - iOffset; - assert( iFirstAmt>0 && iFirstAmtpWalFd, pContent, iFirstAmt, iOffset); - if( rc ) return rc; - assert( pWal->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); - rc = sqlite3OsSync(pWal->pWalFd, pWal->syncFlags); + if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ + int iFirstAmt = (int)(p->iSyncPoint - iOffset); + rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); if( rc ) return rc; + iOffset += iFirstAmt; + iAmt -= iFirstAmt; pContent = (void*)(iFirstAmt + (char*)pContent); - rc = sqlite3OsWrite(pWal->pWalFd, pContent, - iAmt-iFirstAmt, iOffset+iFirstAmt); + assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); + rc = sqlite3OsSync(p->pFd, p->syncFlags); + if( iAmt==0 || rc ) return rc; } + rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); + return rc; +} + +/* +** Write out a single frame of the WAL +*/ +static int walWriteOneFrame( + WalWriter *p, /* Where to write the frame */ + PgHdr *pPage, /* The page of the frame to be written */ + int nTruncate, /* The commit flag. Usually 0. >0 for commit */ + sqlite3_int64 iOffset /* Byte offset at which to write */ +){ + int rc; /* Result code from subfunctions */ + void *pData; /* Data actually written */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ +#if defined(SQLITE_HAS_CODEC) + if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; +#else + pData = pPage->pData; +#endif + walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; + /* Write the page data */ + rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); return rc; } @@ -2683,10 +2711,12 @@ int sqlite3WalFrames( ){ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ - u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ PgHdr *p; /* Iterator to run through pList with. */ PgHdr *pLast = 0; /* Last frame in list */ - int nLast = 0; /* Number of extra copies of last page */ + int nExtra = 0; /* Number of extra copies of last page */ + int szFrame; /* The size of a single frame */ + i64 iOffset; /* Next byte to write in WAL file */ + WalWriter w; /* The writer */ assert( pList ); assert( pWal->writeLock ); @@ -2739,86 +2769,78 @@ int sqlite3WalFrames( if( rc!=SQLITE_OK ){ return rc; } + + /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless + ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise + ** an out-of-order write following a WAL restart could result in + ** database corruption. See the ticket: + ** + ** http://localhost:591/sqlite/info/ff5be73dee + */ + if( pWal->syncHeader && sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); + if( rc ) return rc; + } } assert( (int)pWal->szPage==szPage ); - /* Setup information needed to do the WAL header sync */ - if( pWal->noSyncHeader ){ - assert( pWal->szFirstBlock==0 ); - assert( pWal->syncFlags==0 ); - }else{ - pWal->szFirstBlock = sqlite3OsSectorSize(pWal->pWalFd); - if( szPage>pWal->szFirstBlock ) pWal->szFirstBlock = szPage; - pWal->syncFlags = sync_flags & SQLITE_SYNC_MASK; - } + /* Setup information needed to write frames into the WAL */ + w.pWal = pWal; + w.pFd = pWal->pWalFd; + w.iSyncPoint = 0; + w.syncFlags = sync_flags; + w.szPage = szPage; + iOffset = walFrameOffset(iFrame+1, szPage); + szFrame = szPage + WAL_FRAME_HDRSIZE; - /* Write the log file. */ + /* Write all frames into the log file exactly once */ for(p=pList; p; p=p->pDirty){ - u32 nDbsize; /* Db-size field for frame header */ - i64 iOffset; /* Write offset in log file */ - void *pData; - - iOffset = walFrameOffset(++iFrame, szPage); - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - - /* Populate and write the frame header */ - nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; -#if defined(SQLITE_HAS_CODEC) - if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM; -#else - pData = p->pData; -#endif - walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame); - rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Write the page data */ - rc = walWriteToLog(pWal, pData, szPage, iOffset+sizeof(aFrame)); - if( rc!=SQLITE_OK ){ - return rc; - } + int nDbSize; /* 0 normally. Positive == commit flag */ + iFrame++; + assert( iOffset==walFrameOffset(iFrame, szPage) ); + nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; + rc = walWriteOneFrame(&w, p, nDbSize, iOffset); + if( rc ) return rc; pLast = p; + iOffset += szFrame; } - /* Sync the log file if the 'isSync' flag was specified. */ + /* If this is the end of a transaction, then we might need to pad + ** the transaction and/or sync the WAL file. + ** + ** Padding and syncing only occur if this set of frames complete a + ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL + ** or synchonous==OFF, then no padding or syncing are needed. + ** + ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not + ** needed and only the sync is done. If padding is needed, then the + ** final frame is repeated (with its commit mark) until the next sector + ** boundary is crossed. Only the part of the WAL prior to the last + ** sector boundary is synced; the part of the last frame that extends + ** past the sector boundary is written after the sync. + */ if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ - i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd); - i64 iOffset = walFrameOffset(iFrame+1, szPage); - - assert( iSegment>0 ); - - iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); - while( iOffsetpData; -#endif - walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame); - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset); - if( rc!=SQLITE_OK ){ - return rc; + if( pWal->padToSectorBoundary ){ + int sectorSize = sqlite3OsSectorSize(pWal->pWalFd); + w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; + while( iOffsetpWalFd, sync_flags & SQLITE_SYNC_MASK); + rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); } + /* If this frame set completes the first transaction in the WAL and + ** if PRAGMA journal_size_limit is set, then truncate the WAL to the + ** journal size limit, if possible. + */ if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ i64 sz = pWal->mxWalSize; - if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){ - sz = walFrameOffset(iFrame+nLast+1, szPage); + if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ + sz = walFrameOffset(iFrame+nExtra+1, szPage); } walLimitSize(pWal, sz); pWal->truncateOnCommit = 0; @@ -2834,9 +2856,9 @@ int sqlite3WalFrames( iFrame++; rc = walIndexAppend(pWal, iFrame, p->pgno); } - while( nLast>0 && rc==SQLITE_OK ){ + while( nExtra>0 && rc==SQLITE_OK ){ iFrame++; - nLast--; + nExtra--; rc = walIndexAppend(pWal, iFrame, pLast->pgno); } diff --git a/test/incrvacuum2.test b/test/incrvacuum2.test index e67a086298..6e8e1bed5e 100644 --- a/test/incrvacuum2.test +++ b/test/incrvacuum2.test @@ -191,7 +191,7 @@ ifcapable wal { PRAGMA wal_checkpoint; } file size test.db-wal - } {1640} + } [expr {32+2*(512+24)}] do_test 4.3 { db close @@ -205,7 +205,7 @@ ifcapable wal { if {$newsz>$maxsz} {set maxsz $newsz} } set maxsz - } {2176} + } [expr {32+3*(512+24)}] } finish_test diff --git a/test/journal2.test b/test/journal2.test index 25ce941696..8f9b4d0b71 100644 --- a/test/journal2.test +++ b/test/journal2.test @@ -34,7 +34,7 @@ proc a_string {n} { # characteristics flags to "SAFE_DELETE". # testvfs tvfs -default 1 -tvfs devchar undeletable_when_open +tvfs devchar {undeletable_when_open powersafe_overwrite} # Set up a hook so that each time a journal file is opened, closed or # deleted, the method name ("xOpen", "xClose" or "xDelete") and the final @@ -231,4 +231,3 @@ ifcapable wal { tvfs delete finish_test - diff --git a/test/pager1.test b/test/pager1.test index 7bf8643d1a..99b25bdc00 100644 --- a/test/pager1.test +++ b/test/pager1.test @@ -1331,6 +1331,7 @@ foreach sectorsize { 4096 8192 16384 32768 65536 131072 262144 } { tv sectorsize $sectorsize + tv devchar {} set eff $sectorsize if {$sectorsize < 512} { set eff 512 } if {$sectorsize > 65536} { set eff 65536 } diff --git a/test/superlock.test b/test/superlock.test index 8155d929ce..8199d5218d 100644 --- a/test/superlock.test +++ b/test/superlock.test @@ -76,7 +76,10 @@ do_catchsql_test 3.4 { INSERT INTO t1 VALUES(5, 6)} {1 {database is locked}} do_catchsql_test 3.5 { PRAGMA wal_checkpoint } {0 {1 -1 -1}} do_test 3.6 { unlock } {} -do_execsql_test 4.1 { PRAGMA wal_checkpoint } {0 2 2} +# At this point the WAL file consists of a single frame only - written +# by test case 3.1. If the ZERO_DAMAGE flag were not set, it would consist +# of two frames - the frame written by 3.1 and a padding frame. +do_execsql_test 4.1 { PRAGMA wal_checkpoint } {0 1 1} do_test 4.2 { sqlite3demo_superlock unlock test.db } {unlock} do_catchsql_test 4.3 { SELECT * FROM t1 } {1 {database is locked}} diff --git a/test/syscall.test b/test/syscall.test index b67bead7da..dde4b467a2 100644 --- a/test/syscall.test +++ b/test/syscall.test @@ -59,7 +59,8 @@ do_test 2.1.2 { test_syscall exists nosuchcall } 0 foreach s { open close access getcwd stat fstat ftruncate fcntl read pread write pwrite fchmod fallocate - pread64 pwrite64 unlink openDirectory mkdir rmdir + pread64 pwrite64 unlink openDirectory mkdir rmdir + statvfs } { if {[test_syscall exists $s]} {lappend syscall_list $s} } diff --git a/test/unixexcl.test b/test/unixexcl.test index 207078acde..8c64488d75 100644 --- a/test/unixexcl.test +++ b/test/unixexcl.test @@ -82,8 +82,8 @@ do_multiclient_test tn { do_multiclient_test tn { do_test unixexcl-3.$tn.1 { - code1 { db close; sqlite3 db test.db -vfs unix-excl } - code2 { db2 close; sqlite3 db2 test.db -vfs unix-excl } + code1 { db close; sqlite3 db file:test.db?psow=0 -vfs unix-excl -uri 1 } + code2 { db2 close; sqlite3 db2 file:test.db?psow=0 -vfs unix-excl -uri 1 } sql1 { PRAGMA journal_mode = WAL; CREATE TABLE t1(a, b); @@ -108,7 +108,7 @@ do_multiclient_test tn { } {1 2} do_test unixexcl-3.$tn.3 { sql1 { PRAGMA wal_checkpoint; INSERT INTO t1 VALUES(3, 4); } - } {0 5 5} + } {0 3 3} do_test unixexcl-3.$tn.4 { sql2 { SELECT * FROM t1; } } {1 2} @@ -120,7 +120,7 @@ do_multiclient_test tn { } {1 2 3 4} do_test unixexcl-3.$tn.7 { sql1 { PRAGMA wal_checkpoint; } - } {0 7 7} + } {0 4 4} } } diff --git a/test/wal.test b/test/wal.test index 1c63ddc00f..3b63d3e792 100644 --- a/test/wal.test +++ b/test/wal.test @@ -546,7 +546,7 @@ do_multiclient_test tn { } {1 2 3 4 5 6 7 8 9 10} do_test wal-10.$tn.12 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 13 13}} ;# Reader no longer block checkpoints + } {0 {0 7 7}} ;# Reader no longer block checkpoints do_test wal-10.$tn.13 { execsql { INSERT INTO t1 VALUES(11, 12) } sql2 {SELECT * FROM t1} @@ -556,7 +556,7 @@ do_multiclient_test tn { # do_test wal-10.$tn.14 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 15 13}} + } {0 {0 8 7}} # The following series of test cases used to verify another blocking # case in WAL - a case which no longer blocks. @@ -566,10 +566,10 @@ do_multiclient_test tn { } {1 2 3 4 5 6 7 8 9 10 11 12} do_test wal-10.$tn.16 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 15 15}} + } {0 {0 8 8}} do_test wal-10.$tn.17 { execsql { PRAGMA wal_checkpoint } - } {0 15 15} + } {0 8 8} do_test wal-10.$tn.18 { sql3 { BEGIN; SELECT * FROM t1 } } {1 2 3 4 5 6 7 8 9 10 11 12} @@ -592,13 +592,13 @@ do_multiclient_test tn { # do_test wal-10.$tn.23 { execsql { PRAGMA wal_checkpoint } - } {0 17 17} + } {0 9 9} do_test wal-10.$tn.24 { sql2 { BEGIN; SELECT * FROM t1; } } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} do_test wal-10.$tn.25 { execsql { PRAGMA wal_checkpoint } - } {0 17 17} + } {0 9 9} do_test wal-10.$tn.26 { catchsql { INSERT INTO t1 VALUES(15, 16) } } {0 {}} @@ -615,11 +615,11 @@ do_multiclient_test tn { do_test wal-10.$tn.29 { execsql { INSERT INTO t1 VALUES(19, 20) } catchsql { PRAGMA wal_checkpoint } - } {0 {0 6 0}} + } {0 {0 3 0}} do_test wal-10.$tn.30 { code3 { sqlite3_finalize $::STMT } execsql { PRAGMA wal_checkpoint } - } {0 6 0} + } {0 3 0} # At one point, if a reader failed to upgrade to a writer because it # was reading an old snapshot, the write-locks were not being released. @@ -658,7 +658,7 @@ do_multiclient_test tn { } {a b c d} do_test wal-10.$tn.36 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 16 16}} + } {0 {0 8 8}} do_test wal-10.$tn.36 { sql3 { INSERT INTO t1 VALUES('e', 'f') } sql2 { SELECT * FROM t1 } @@ -666,7 +666,7 @@ do_multiclient_test tn { do_test wal-10.$tn.37 { sql2 COMMIT execsql { PRAGMA wal_checkpoint } - } {0 18 18} + } {0 9 9} } #------------------------------------------------------------------------- @@ -1040,7 +1040,7 @@ foreach {tn ckpt_cmd ckpt_res ckpt_main ckpt_aux} { 5 {sqlite3_wal_checkpoint db aux} SQLITE_OK 0 1 6 {sqlite3_wal_checkpoint db temp} SQLITE_OK 0 0 7 {db eval "PRAGMA main.wal_checkpoint"} {0 10 10} 1 0 - 8 {db eval "PRAGMA aux.wal_checkpoint"} {0 16 16} 0 1 + 8 {db eval "PRAGMA aux.wal_checkpoint"} {0 13 13} 0 1 9 {db eval "PRAGMA temp.wal_checkpoint"} {0 -1 -1} 0 0 } { do_test wal-16.$tn.1 { @@ -1054,7 +1054,8 @@ foreach {tn ckpt_cmd ckpt_res ckpt_main ckpt_aux} { PRAGMA aux.auto_vacuum = 0; PRAGMA main.journal_mode = WAL; PRAGMA aux.journal_mode = WAL; - PRAGMA synchronous = NORMAL; + PRAGMA main.synchronous = NORMAL; + PRAGMA aux.synchronous = NORMAL; } } {wal wal} @@ -1072,7 +1073,7 @@ foreach {tn ckpt_cmd ckpt_res ckpt_main ckpt_aux} { } [list [expr 1*1024] [wal_file_size 10 1024]] do_test wal-16.$tn.3 { list [file size test2.db] [file size test2.db-wal] - } [list [expr 1*1024] [wal_file_size 16 1024]] + } [list [expr 1*1024] [wal_file_size 13 1024]] do_test wal-16.$tn.4 [list eval $ckpt_cmd] $ckpt_res @@ -1082,7 +1083,7 @@ foreach {tn ckpt_cmd ckpt_res ckpt_main ckpt_aux} { do_test wal-16.$tn.6 { list [file size test2.db] [file size test2.db-wal] - } [list [expr ($ckpt_aux ? 7 : 1)*1024] [wal_file_size 16 1024]] + } [list [expr ($ckpt_aux ? 7 : 1)*1024] [wal_file_size 13 1024]] catch { db close } } @@ -1552,9 +1553,13 @@ ifcapable autovacuum { } file size test.db } [expr 3 * 1024] + + # WAL file now contains a single frame - the new root page for table t1. + # It would be two frames (the new root page and a padding frame) if the + # ZERO_DAMAGE flag were not set. do_test 24.5 { file size test.db-wal - } 2128 + } [wal_file_size 1 1024] } db close diff --git a/test/wal2.test b/test/wal2.test index 90ea9defed..c7f00ea60f 100644 --- a/test/wal2.test +++ b/test/wal2.test @@ -361,7 +361,9 @@ do_test wal2-4.1 { INSERT INTO data VALUES('need xShmOpen to see this'); PRAGMA wal_checkpoint; } -} {wal 0 5 5} + # Three pages in the WAL file at this point: One copy of page 1 and two + # of the root page for table "data". +} {wal 0 3 3} do_test wal2-4.2 { db close testvfs tvfs -noshm 1 @@ -730,7 +732,7 @@ do_test wal2-6.5.1 { INSERT INTO t2 VALUES('I', 'II'); PRAGMA journal_mode; } -} {wal exclusive 0 3 3 wal} +} {wal exclusive 0 2 2 wal} do_test wal2-6.5.2 { execsql { PRAGMA locking_mode = normal; @@ -741,7 +743,7 @@ do_test wal2-6.5.2 { } {normal exclusive I II III IV} do_test wal2-6.5.3 { execsql { PRAGMA wal_checkpoint } -} {0 4 4} +} {0 2 2} db close proc lock_control {method filename handle spec} { @@ -1184,6 +1186,7 @@ foreach {tn sql reslist} { execsql {PRAGMA auto_vacuum = 0} execsql $sql + do_execsql_test wal2-14.$tn.0 { PRAGMA page_size = 4096 } {} do_execsql_test wal2-14.$tn.1 { PRAGMA journal_mode = WAL } {wal} set sqlite_sync_count 0 @@ -1199,7 +1202,7 @@ foreach {tn sql reslist} { INSERT INTO t1 VALUES(5, 6); COMMIT; -- 2 wal sync PRAGMA wal_checkpoint; -- 1 wal sync, 1 db sync - } {10 0 5 5 0 2 2} + } {10 0 3 3 0 1 1} do_test wal2-14.$tn.3 { cond_incr_sync_count 1 @@ -1261,6 +1264,7 @@ foreach {tn settings restart_sync commit_sync ckpt_sync} { sqlite3 db test.db do_execsql_test 15.$tn.1 " + PRAGMA page_size = 4096; CREATE TABLE t1(x); PRAGMA wal_autocheckpoint = OFF; PRAGMA journal_mode = WAL; @@ -1269,6 +1273,7 @@ foreach {tn settings restart_sync commit_sync ckpt_sync} { PRAGMA synchronous = [lindex $settings 2]; " {0 wal} +if { $tn==2} breakpoint do_test 15.$tn.2 { set sync(normal) 0 set sync(full) 0 diff --git a/test/wal3.test b/test/wal3.test index 82f7d72a97..ccab93e5da 100644 --- a/test/wal3.test +++ b/test/wal3.test @@ -429,7 +429,7 @@ do_test wal3-6.1.2 { } {o t t f} do_test wal3-6.1.3 { execsql { PRAGMA wal_checkpoint } db2 -} {0 7 7} +} {0 4 4} # At this point the log file has been fully checkpointed. However, # connection [db3] holds a lock that prevents the log from being wrapped. @@ -518,7 +518,7 @@ proc lock_callback {method file handle spec} { } do_test wal3-6.2.2 { execsql { PRAGMA wal_checkpoint } -} {0 7 7} +} {0 4 4} do_test wal3-6.2.3 { set ::R } {h h l b} @@ -628,7 +628,7 @@ do_test wal3-8.1 { INSERT INTO b VALUES('Markazi'); PRAGMA wal_checkpoint; } -} {wal 0 9 9} +} {wal 0 5 5} do_test wal3-8.2 { execsql { SELECT * FROM b } } {Tehran Qom Markazi} diff --git a/test/wal5.test b/test/wal5.test index ad6bcfc7d8..6eceed5e59 100644 --- a/test/wal5.test +++ b/test/wal5.test @@ -197,9 +197,9 @@ foreach {testprefix do_wal_checkpoint} { INSERT INTO t2 VALUES(1, 2); } } {} - do_test 2.2.$tn.2 { file_page_counts } {1 5 1 5} - do_test 2.1.$tn.3 { code1 { do_wal_checkpoint db } } {0 5 5} - do_test 2.1.$tn.4 { file_page_counts } {2 5 2 5} + do_test 2.2.$tn.2 { file_page_counts } {1 3 1 3} + do_test 2.1.$tn.3 { code1 { do_wal_checkpoint db } } {0 3 3} + do_test 2.1.$tn.4 { file_page_counts } {2 3 2 3} } do_multiclient_test tn { @@ -213,10 +213,10 @@ foreach {testprefix do_wal_checkpoint} { INSERT INTO t2 VALUES(3, 4); } } {} - do_test 2.2.$tn.2 { file_page_counts } {1 5 1 7} + do_test 2.2.$tn.2 { file_page_counts } {1 3 1 4} do_test 2.2.$tn.3 { sql2 { BEGIN; SELECT * FROM t1 } } {1 2} - do_test 2.2.$tn.4 { code1 { do_wal_checkpoint db -mode restart } } {1 5 5} - do_test 2.2.$tn.5 { file_page_counts } {2 5 2 7} + do_test 2.2.$tn.4 { code1 { do_wal_checkpoint db -mode restart } } {1 3 3} + do_test 2.2.$tn.5 { file_page_counts } {2 3 2 4} } do_multiclient_test tn { @@ -229,13 +229,13 @@ foreach {testprefix do_wal_checkpoint} { INSERT INTO t2 VALUES(1, 2); } } {} - do_test 2.3.$tn.2 { file_page_counts } {1 5 1 5} + do_test 2.3.$tn.2 { file_page_counts } {1 3 1 3} do_test 2.3.$tn.3 { sql2 { BEGIN; SELECT * FROM t1 } } {1 2} do_test 2.3.$tn.4 { sql1 { INSERT INTO t1 VALUES(3, 4) } } {} do_test 2.3.$tn.5 { sql1 { INSERT INTO t2 VALUES(3, 4) } } {} - do_test 2.3.$tn.6 { file_page_counts } {1 7 1 7} - do_test 2.3.$tn.7 { code1 { do_wal_checkpoint db -mode full } } {1 7 5} - do_test 2.3.$tn.8 { file_page_counts } {1 7 2 7} + do_test 2.3.$tn.6 { file_page_counts } {1 4 1 4} + do_test 2.3.$tn.7 { code1 { do_wal_checkpoint db -mode full } } {1 4 3} + do_test 2.3.$tn.8 { file_page_counts } {1 4 2 4} } # Check that checkpoints block on the correct locks. And respond correctly @@ -256,18 +256,18 @@ foreach {testprefix do_wal_checkpoint} { # processes holding all three types of locks. # foreach {tn1 checkpoint busy_on ckpt_expected expected} { - 1 PASSIVE - {0 5 5} - - 2 TYPO - {0 5 5} - + 1 PASSIVE - {0 3 3} - + 2 TYPO - {0 3 3} - - 3 FULL - {0 7 7} 2 - 4 FULL 1 {1 5 5} 1 - 5 FULL 2 {1 7 5} 2 - 6 FULL 3 {0 7 7} 2 + 3 FULL - {0 4 4} 2 + 4 FULL 1 {1 3 3} 1 + 5 FULL 2 {1 4 3} 2 + 6 FULL 3 {0 4 4} 2 - 7 RESTART - {0 7 7} 3 - 8 RESTART 1 {1 5 5} 1 - 9 RESTART 2 {1 7 5} 2 - 10 RESTART 3 {1 7 7} 3 + 7 RESTART - {0 4 4} 3 + 8 RESTART 1 {1 3 3} 1 + 9 RESTART 2 {1 4 3} 2 + 10 RESTART 3 {1 4 4} 3 } { do_multiclient_test tn { diff --git a/test/zerodamage.test b/test/zerodamage.test new file mode 100644 index 0000000000..0e82ce3505 --- /dev/null +++ b/test/zerodamage.test @@ -0,0 +1,112 @@ +# 2011 December 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file implements tests of the SQLITE_IOCAP_POWERSAFE_OVERWRITE property +# and the SQLITE_FCNTL_POWERSAFE_OVERWRITE file-control for manipulating it. +# +# The name of this file comes from the fact that we used to call the +# POWERSAFE_OVERWRITE property ZERO_DAMAGE. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix wal5 + +# POWERSAFE_OVERWRITE defaults to true +# +do_test zerodamage-1.0 { + file_control_powersafe_overwrite db -1 +} {0 1} + +# Check the ability to turn zero-damage on and off. +# +do_test zerodamage-1.1 { + file_control_powersafe_overwrite db 0 + file_control_powersafe_overwrite db -1 +} {0 0} +do_test zerodamage-1.2 { + file_control_powersafe_overwrite db 1 + file_control_powersafe_overwrite db -1 +} {0 1} + +# Run a transaction with zero-damage on, a small page size and a much larger +# sectorsize. Verify that the maximum journal size is small - that the +# rollback journal is not being padded. +# +do_test zerodamage-2.0 { + db close + testvfs tv -default 1 + tv sectorsize 8192 + sqlite3 db file:test.db?psow=TRUE -uri 1 + unset -nocomplain ::max_journal_size + set ::max_journal_size 0 + proc xDeleteCallback {method file args} { + set sz [file size $file] + if {$sz>$::max_journal_size} {set ::max_journal_size $sz} + } + tv filter xDelete + tv script xDeleteCallback + register_wholenumber_module db + db eval { + PRAGMA page_size=1024; + PRAGMA journal_mode=DELETE; + PRAGMA cache_size=5; + CREATE VIRTUAL TABLE nums USING wholenumber; + CREATE TABLE t1(x, y); + INSERT INTO t1 SELECT value, randomblob(100) FROM nums + WHERE value BETWEEN 1 AND 400; + } + set ::max_journal_size 0 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=123; + } + concat [file_control_powersafe_overwrite db -1] [set ::max_journal_size] +} {0 1 2576} + +# Repeat the previous step with zero-damage turned off. This time the +# maximum rollback journal size should be much larger. +# +do_test zerodamage-2.1 { + set ::max_journal_size 0 + db close + sqlite3 db file:test.db?psow=FALSE -uri 1 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=124; + } + concat [file_control_powersafe_overwrite db -1] [set ::max_journal_size] +} {0 0 24704} + +# Run a WAL-mode transaction with POWERSAFE_OVERWRITE on to verify that the +# WAL file does not get too big. +# +do_test zerodamage-3.0 { + db eval { + PRAGMA journal_mode=WAL; + } + db close + sqlite3 db file:test.db?psow=TRUE -uri 1 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=124; + } + file size test.db-wal +} {1080} + +# Repeat the previous with POWERSAFE_OVERWRITE off. Verify that the WAL file +# is padded. +# +do_test zerodamage-3.1 { + db close + sqlite3 db file:test.db?psow=FALSE -uri 1 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=124; + } + file size test.db-wal +} {8416}