mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-07 02:42:48 +03:00
Fix issues and clarify the operation of pager_playback_one_page().
A block comment in pager.c identifies 13 invariants on the pager subsystem. Ticket [9d68c883132c8]. FossilOrigin-Name: 0906597698b697ab2993a460f257e326cb58e475
This commit is contained in:
18
manifest
18
manifest
@@ -1,8 +1,8 @@
|
||||
-----BEGIN PGP SIGNED MESSAGE-----
|
||||
Hash: SHA1
|
||||
|
||||
C Add\sa\stest\scase\sfor\sthe\sOOM-fault\scorruption\sissue.\nTicket\s[9d68c883132c8].
|
||||
D 2010-04-09T23:05:25
|
||||
C Fix\sissues\sand\sclarify\sthe\soperation\sof\spager_playback_one_page().\nA\sblock\scomment\sin\spager.c\sidentifies\s13\sinvariants\son\sthe\spager\ssubsystem.\nTicket\s[9d68c883132c8].
|
||||
D 2010-04-10T17:52:58
|
||||
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
||||
F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b
|
||||
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
||||
@@ -155,7 +155,7 @@ F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30
|
||||
F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f
|
||||
F src/os_unix.c 148d2f625db3727250c0b880481ae7630b6d0eb0
|
||||
F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053
|
||||
F src/pager.c ab2482d819ae5c53bfcb1b8e673a00627d8fdeec
|
||||
F src/pager.c bde3cc79424853c5e4b14bff0d465efe8d11d31a
|
||||
F src/pager.h ef8a2cf10084f60ab45ee2dfded8bf8b0c655ddf
|
||||
F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e
|
||||
F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf
|
||||
@@ -800,14 +800,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
||||
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
P 8c046eb6d16682d5e755624deb4f76f57350b9c9
|
||||
R 59022ecbcd3187c087a8d743b1c03f76
|
||||
P 0a64a937b583c32c02c83fc669addf79662efea8
|
||||
R d0f57454f63eb10f5ebef992026a2823
|
||||
U drh
|
||||
Z e65c61d02944a0779051a86fb1035cc9
|
||||
Z 5e031dacab379eda04d33b11e754a1c3
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
Version: GnuPG v1.4.6 (GNU/Linux)
|
||||
|
||||
iD8DBQFLv7K6oxKgR168RlERApcYAJ0fCcxRN5TrNOQ/GY6g7TNBl82jQACfb2dY
|
||||
vpyFpHFOXknbF1TmB6BNahM=
|
||||
=9nt1
|
||||
iD8DBQFLwLr9oxKgR168RlERAssXAJ4j+RdaTfsZ3v6bfrfa9otSr7mQcACfTiG0
|
||||
tKLTzrdLF0jiSa5xYSUaR2A=
|
||||
=tMzT
|
||||
-----END PGP SIGNATURE-----
|
||||
|
@@ -1 +1 @@
|
||||
0a64a937b583c32c02c83fc669addf79662efea8
|
||||
0906597698b697ab2993a460f257e326cb58e475
|
150
src/pager.c
150
src/pager.c
@@ -21,6 +21,87 @@
|
||||
#ifndef SQLITE_OMIT_DISKIO
|
||||
#include "sqliteInt.h"
|
||||
|
||||
/*
|
||||
******************** NOTES ON THE DESIGN OF THE PAGER ************************
|
||||
**
|
||||
** Within this comment block, a page is deemed to have been synced
|
||||
** automatically as soon as it is written when PRAGMA synchronous=OFF.
|
||||
** Otherwise, the page is not synced until the xSync method of the VFS
|
||||
** is called successfully on the file containing the page.
|
||||
**
|
||||
** Definition: A page of the database file is said to be "overwriteable" if
|
||||
** one or more of the following are true about the page:
|
||||
**
|
||||
** (a) The original content of the page as it was at the beginning of
|
||||
** the transaction has been written into the rollback journal and
|
||||
** synced.
|
||||
**
|
||||
** (b) The page was a freelist leaf page at the start of the transaction.
|
||||
**
|
||||
** (c) The page number is greater than the largest page that existed in
|
||||
** the database file at the start of the transaction.
|
||||
**
|
||||
** (1) A page of the database file is never overwritten unless one of the
|
||||
** following are true:
|
||||
**
|
||||
** (a) The page and all other pages on the same sector are overwriteable.
|
||||
**
|
||||
** (b) The atomic page write optimization is enabled, and the entire
|
||||
** transaction other than the update of the transaction sequence
|
||||
** number consists of a single page change.
|
||||
**
|
||||
** (2) The content of a page written into the rollback journal exactly matches
|
||||
** both the content in the database when the rollback journal was written
|
||||
** and the content in the database at the beginning of the current
|
||||
** transaction.
|
||||
**
|
||||
** (3) Writes to the database file are an integer multiple of the page size
|
||||
** in length and are aligned to a page boundary.
|
||||
**
|
||||
** (4) Reads from the database file are either aligned on a page boundary and
|
||||
** an integer multiple of the page size in length or are taken from the
|
||||
** first 100 bytes of the database file.
|
||||
**
|
||||
** (5) All writes to the database file are synced prior to the rollback journal
|
||||
** being deleted, truncated, or zeroed.
|
||||
**
|
||||
** (6) If a master journal file is used, then all writes to the database file
|
||||
** are synced prior to the master journal being deleted.
|
||||
**
|
||||
** Definition: Two databases (or the same database at two points it time)
|
||||
** are said to be "logically equivalent" if they give the same answer to
|
||||
** all queries. Note in particular the the content of freelist leaf
|
||||
** pages can be changed arbitarily without effecting the logical equivalence
|
||||
** of the database.
|
||||
**
|
||||
** (7) At any time, if any subset, including the empty set and the total set,
|
||||
** of the unsynced changes to a rollback journal are removed and the
|
||||
** journal is rolled back, the resulting database file will be logical
|
||||
** equivalent to the database file at the beginning of the transaction.
|
||||
**
|
||||
** (8) When a transaction is rolled back, the xTruncate method of the VFS
|
||||
** is called to restore the database file to the same size it was at
|
||||
** the beginning of the transaction. (In some VFSes, the xTruncate
|
||||
** method is a no-op, but that does not change the fact the SQLite will
|
||||
** invoke it.)
|
||||
**
|
||||
** (9) Whenever the database file is modified, at least one bit in the range
|
||||
** of bytes from 24 through 39 inclusive will be changed prior to releasing
|
||||
** the EXCLUSIVE lock.
|
||||
**
|
||||
** (10) The pattern of bits in bytes 24 through 39 shall not repeat in less
|
||||
** than one billion transactions.
|
||||
**
|
||||
** (11) A database file is well-formed at the beginning and at the conclusion
|
||||
** of every transaction.
|
||||
**
|
||||
** (12) An EXCLUSIVE lock is held on the database file when writing to
|
||||
** the database file.
|
||||
**
|
||||
** (13) A SHARED lock is held on the database file while reading any
|
||||
** content out of the database file.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Macros for troubleshooting. Normally turned off
|
||||
*/
|
||||
@@ -197,7 +278,8 @@ struct PagerSavepoint {
|
||||
**
|
||||
** journalStarted
|
||||
**
|
||||
** This flag is set whenever the the main journal is synced.
|
||||
** This flag is set whenever the the main journal is opened and
|
||||
** initialized
|
||||
**
|
||||
** The point of this flag is that it must be set after the
|
||||
** first journal header in a journal file has been synced to disk.
|
||||
@@ -223,7 +305,10 @@ struct PagerSavepoint {
|
||||
**
|
||||
** doNotSync
|
||||
**
|
||||
** This variable is set and cleared by sqlite3PagerWrite().
|
||||
** When enabled, cache spills are prohibited and the journal file cannot
|
||||
** be synced. This variable is set and cleared by sqlite3PagerWrite()
|
||||
** in order to prevent a journal sync from happening in between the
|
||||
** journalling of two pages on the same sector.
|
||||
**
|
||||
** needSync
|
||||
**
|
||||
@@ -283,6 +368,7 @@ struct Pager {
|
||||
sqlite3_file *sjfd; /* File descriptor for sub-journal */
|
||||
i64 journalOff; /* Current write offset in the journal file */
|
||||
i64 journalHdr; /* Byte offset to previous journal header */
|
||||
i64 journalSizeLimit; /* Size limit for persistent journal files */
|
||||
PagerSavepoint *aSavepoint; /* Array of active savepoints */
|
||||
int nSavepoint; /* Number of elements in aSavepoint[] */
|
||||
char dbFileVers[16]; /* Changes whenever database file changes */
|
||||
@@ -309,7 +395,6 @@ struct Pager {
|
||||
void *pCodec; /* First argument to xCodec... methods */
|
||||
#endif
|
||||
char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
|
||||
i64 journalSizeLimit; /* Size limit for persistent journal files */
|
||||
PCache *pPCache; /* Pointer to page cache object */
|
||||
sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */
|
||||
};
|
||||
@@ -825,6 +910,7 @@ static int writeJournalHdr(Pager *pPager){
|
||||
for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
|
||||
IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
|
||||
rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
|
||||
assert( pPager->journalHdr <= pPager->journalOff );
|
||||
pPager->journalOff += nHeader;
|
||||
}
|
||||
|
||||
@@ -983,6 +1069,7 @@ static int writeMasterJournal(Pager *pPager, const char *zMaster){
|
||||
}
|
||||
pPager->setMaster = 1;
|
||||
assert( isOpen(pPager->jfd) );
|
||||
assert( pPager->journalHdr <= pPager->journalOff );
|
||||
|
||||
/* Calculate the length in bytes and the checksum of zMaster */
|
||||
for(nMaster=0; zMaster[nMaster]; nMaster++){
|
||||
@@ -1412,11 +1499,10 @@ static u32 pager_cksum(Pager *pPager, const u8 *aData){
|
||||
*/
|
||||
static int pager_playback_one_page(
|
||||
Pager *pPager, /* The pager being played back */
|
||||
int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */
|
||||
int isUnsync, /* True if reading from unsynced main journal */
|
||||
i64 *pOffset, /* Offset of record to playback */
|
||||
int isSavepnt, /* True for a savepoint rollback */
|
||||
Bitvec *pDone /* Bitvec of pages already played back */
|
||||
Bitvec *pDone, /* Bitvec of pages already played back */
|
||||
int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */
|
||||
int isSavepnt /* True for a savepoint rollback */
|
||||
){
|
||||
int rc;
|
||||
PgHdr *pPg; /* An existing page in the cache */
|
||||
@@ -1424,6 +1510,7 @@ static int pager_playback_one_page(
|
||||
u32 cksum; /* Checksum used for sanity checking */
|
||||
char *aData; /* Temporary storage for the page */
|
||||
sqlite3_file *jfd; /* The file descriptor for the journal file */
|
||||
int isSynced; /* True if journal page is synced */
|
||||
|
||||
assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */
|
||||
assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */
|
||||
@@ -1507,10 +1594,14 @@ static int pager_playback_one_page(
|
||||
PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData),
|
||||
(isMainJrnl?"main-journal":"sub-journal")
|
||||
));
|
||||
if( isMainJrnl ){
|
||||
isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr);
|
||||
}else{
|
||||
isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC));
|
||||
}
|
||||
if( (pPager->state>=PAGER_EXCLUSIVE)
|
||||
&& (!isSavepnt || pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC))
|
||||
&& isOpen(pPager->fd)
|
||||
&& !isUnsync
|
||||
&& isSynced
|
||||
){
|
||||
i64 ofst = (pgno-1)*(i64)pPager->pageSize;
|
||||
testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
|
||||
@@ -1562,7 +1653,8 @@ static int pager_playback_one_page(
|
||||
/* If the contents of this page were just restored from the main
|
||||
** journal file, then its content must be as they were when the
|
||||
** transaction was first opened. In this case we can mark the page
|
||||
** as clean, since there will be no need to write it out to the.
|
||||
** as clean, since there will be no need to write it out to the
|
||||
** database.
|
||||
**
|
||||
** There is one exception to this rule. If the page is being rolled
|
||||
** back as part of a savepoint (or statement) rollback from an
|
||||
@@ -1909,8 +2001,6 @@ static int pager_playback(Pager *pPager, int isHot){
|
||||
** occurs.
|
||||
*/
|
||||
while( 1 ){
|
||||
int isUnsync = 0;
|
||||
|
||||
/* Read the next journal header from the journal file. If there are
|
||||
** not enough bytes left in the journal file for a complete header, or
|
||||
** it is corrupted, then a process must of failed while writing it.
|
||||
@@ -1951,7 +2041,6 @@ static int pager_playback(Pager *pPager, int isHot){
|
||||
if( nRec==0 && !isHot &&
|
||||
pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
|
||||
nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
|
||||
isUnsync = 1;
|
||||
}
|
||||
|
||||
/* If this is the first header read from the journal, truncate the
|
||||
@@ -1973,7 +2062,7 @@ static int pager_playback(Pager *pPager, int isHot){
|
||||
pager_reset(pPager);
|
||||
needPagerReset = 0;
|
||||
}
|
||||
rc = pager_playback_one_page(pPager,1,isUnsync,&pPager->journalOff,0,0);
|
||||
rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
if( rc==SQLITE_DONE ){
|
||||
rc = SQLITE_OK;
|
||||
@@ -2126,7 +2215,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
|
||||
iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
|
||||
pPager->journalOff = pSavepoint->iOffset;
|
||||
while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
|
||||
rc = pager_playback_one_page(pPager, 1, 0, &pPager->journalOff, 1, pDone);
|
||||
rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
|
||||
}
|
||||
assert( rc!=SQLITE_DONE );
|
||||
}else{
|
||||
@@ -2156,7 +2245,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
|
||||
nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager));
|
||||
}
|
||||
for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
|
||||
rc = pager_playback_one_page(pPager, 1, 0, &pPager->journalOff, 1, pDone);
|
||||
rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
|
||||
}
|
||||
assert( rc!=SQLITE_DONE );
|
||||
}
|
||||
@@ -2171,7 +2260,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
|
||||
i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize);
|
||||
for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){
|
||||
assert( offset==ii*(4+pPager->pageSize) );
|
||||
rc = pager_playback_one_page(pPager, 0, 0, &offset, 1, pDone);
|
||||
rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1);
|
||||
}
|
||||
assert( rc!=SQLITE_DONE );
|
||||
}
|
||||
@@ -2728,7 +2817,7 @@ static int syncJournal(Pager *pPager){
|
||||
** mode, then the journal file may at this point actually be larger
|
||||
** than Pager.journalOff bytes. If the next thing in the journal
|
||||
** file happens to be a journal-header (written as part of the
|
||||
** previous connections transaction), and a crash or power-failure
|
||||
** previous connection's transaction), and a crash or power-failure
|
||||
** occurs after nRec is updated but before this connection writes
|
||||
** anything else to the journal file (or commits/rolls back its
|
||||
** transaction), then SQLite may become confused when doing the
|
||||
@@ -2800,6 +2889,7 @@ static int syncJournal(Pager *pPager){
|
||||
*/
|
||||
pPager->needSync = 0;
|
||||
pPager->journalStarted = 1;
|
||||
pPager->journalHdr = pPager->journalOff;
|
||||
sqlite3PcacheClearSyncFlags(pPager->pPCache);
|
||||
}
|
||||
|
||||
@@ -3662,19 +3752,33 @@ int sqlite3PagerSharedLock(Pager *pPager){
|
||||
goto failed;
|
||||
}
|
||||
|
||||
/* TODO: Why are these cleared here? Is it necessary? */
|
||||
/* Reset the journal status fields to indicates that we have no
|
||||
** rollback journal at this time. */
|
||||
pPager->journalStarted = 0;
|
||||
pPager->journalOff = 0;
|
||||
pPager->setMaster = 0;
|
||||
pPager->journalHdr = 0;
|
||||
|
||||
/* Make sure the journal file has been synced to disk. */
|
||||
|
||||
/* Playback and delete the journal. Drop the database write
|
||||
** lock and reacquire the read lock. Purge the cache before
|
||||
** playing back the hot-journal so that we don't end up with
|
||||
** an inconsistent cache.
|
||||
** an inconsistent cache. Sync the hot journal before playing
|
||||
** it back since the process that crashed and left the hot journal
|
||||
** probably did not sync it and we are required to always sync
|
||||
** the journal before playing it back.
|
||||
*/
|
||||
if( isOpen(pPager->jfd) ){
|
||||
rc = pager_playback(pPager, 1);
|
||||
if( !pPager->noSync ){
|
||||
rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pager_playback(pPager, 1);
|
||||
}
|
||||
if( rc!=SQLITE_OK ){
|
||||
rc = pager_error(pPager, rc);
|
||||
goto failed;
|
||||
@@ -3780,7 +3884,7 @@ static void pagerUnlockIfUnused(Pager *pPager){
|
||||
** a) When reading a free-list leaf page from the database, and
|
||||
**
|
||||
** b) When a savepoint is being rolled back and we need to load
|
||||
** a new page into the cache to populate with the data read
|
||||
** a new page into the cache to be filled with the data read
|
||||
** from the savepoint journal.
|
||||
**
|
||||
** If noContent is true, then the data returned is zeroed instead of
|
||||
@@ -4212,6 +4316,8 @@ static int pager_write(PgHdr *pPg){
|
||||
** contains the database locks. The following assert verifies
|
||||
** that we do not. */
|
||||
assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
|
||||
|
||||
assert( pPager->journalHdr <= pPager->journalOff );
|
||||
CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
|
||||
cksum = pager_cksum(pPager, (u8*)pData2);
|
||||
rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
|
||||
|
Reference in New Issue
Block a user