1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-05 15:55:57 +03:00

Modifications to the journal format to make it more robust. (CVS 1686)

FossilOrigin-Name: 504246a18daca794473b17a7874096f1ec8648ee
This commit is contained in:
danielk1977
2004-06-25 02:38:54 +00:00
parent e970767144
commit 7657240a21
5 changed files with 479 additions and 206 deletions

View File

@@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.137 2004/06/23 10:43:10 danielk1977 Exp $
** @(#) $Id: pager.c,v 1.138 2004/06/25 02:38:55 danielk1977 Exp $
*/
#include "os.h" /* Must be first to enable large file support */
#include "sqliteInt.h"
@@ -225,13 +225,17 @@ struct Pager {
u8 memDb; /* True to inhibit all file I/O */
u8 *aInJournal; /* One bit for each page in the database file */
u8 *aInStmt; /* One bit for each page in the database */
int nMaster; /* Number of bytes to reserve for master j.p */
BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
PgHdr *pFirst, *pLast; /* List of free pages */
PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
PgHdr *pAll; /* List of all pages */
PgHdr *pStmt; /* List of pages in the statement subjournal */
PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number to PgHdr */
off_t journalOff; /* Current byte offset in the journal file */
off_t journalHdr; /* Byte offset to previous journal header */
off_t stmtHdrOff; /* First journal header written this statement */
int sectorSize; /* Assumed sector size during rollback */
u8 setMaster; /* True if a m-j name has been written to jrnl */
};
/*
@@ -274,9 +278,25 @@ static const unsigned char aJournalMagic[] = {
** The size of the header and of each page in the journal is determined
** by the following macros.
*/
#define JOURNAL_HDR_SZ(pPager) (24 + (pPager)->nMaster)
#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
/*
** The journal header size for this pager. In the future, this could be
** set to some value read from the disk controller. The important
** characteristic is that it is the same size as a disk sector.
*/
#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
#define PAGER_SECTOR_SIZE 512
/*
** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
** reserved for working around a windows/posix incompatibility). It is
** used in the journal to signify that the remainder of the journal file
** is devoted to storing a master journal name - there are no more pages to
** roll back. See comments for function writeMasterJournal() for details.
*/
#define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize))
/*
** Enable reference count tracking (for debugging) here:
@@ -368,6 +388,244 @@ static int pager_errcode(Pager *pPager){
return rc;
}
/*
** When this is called the journal file for pager pPager must be open.
** The master journal file name is read from the end of the file and
** written into memory obtained from sqliteMalloc(). *pzMaster is
** set to point at the memory and SQLITE_OK returned. The caller must
** sqliteFree() *pzMaster.
**
** If no master journal file name is present *pzMaster is set to 0 and
** SQLITE_OK returned.
*/
static int readMasterJournal(OsFile *pJrnl, char **pzMaster){
int rc;
u32 len;
off_t szJ;
unsigned char aMagic[8]; /* A buffer to hold the magic header */
*pzMaster = 0;
rc = sqlite3OsFileSize(pJrnl, &szJ);
if( rc!=SQLITE_OK || szJ<12 ) return rc;
rc = sqlite3OsSeek(pJrnl, szJ-12);
if( rc!=SQLITE_OK ) return rc;
rc = read32bits(pJrnl, &len);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsRead(pJrnl, aMagic, 8);
if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
rc = sqlite3OsSeek(pJrnl, szJ-12-len);
if( rc!=SQLITE_OK ) return rc;
*pzMaster = (char *)sqliteMalloc(len);
if( !*pzMaster ){
return SQLITE_NOMEM;
}
rc = sqlite3OsRead(pJrnl, *pzMaster, len);
if( rc!=SQLITE_OK ){
sqliteFree(*pzMaster);
*pzMaster = 0;
return rc;
}
return SQLITE_OK;
}
/*
** Seek the journal file descriptor to the next sector boundary where a
** journal header may be read or written. Pager.journalOff is updated with
** the new seek offset.
**
** i.e for a sector size of 512:
**
** Input Offset Output Offset
** ---------------------------------------
** 0 0
** 512 512
** 100 512
** 2000 2048
**
*/
static int seekJournalHdr(Pager *pPager){
off_t offset = 0;
off_t c = pPager->journalOff;
if( c ){
offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
}
assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
assert( offset>=c );
assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
pPager->journalOff = offset;
return sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
}
/*
** The journal file must be open when this routine is called. A journal
** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
** current location.
**
** The format for the journal header is as follows:
** - 8 bytes: Magic identifying journal format.
** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
** - 4 bytes: Random number used for page hash.
** - 4 bytes: Initial database page count.
** - 4 bytes: Sector size used by the process that wrote this journal.
**
** Followed by (JOURNAL_HDR_SZ - 20) bytes of unused space.
*/
static int writeJournalHdr(Pager *pPager){
int rc = seekJournalHdr(pPager);
if( rc ) return rc;
pPager->journalHdr = pPager->journalOff;
if( pPager->stmtHdrOff==0 ){
pPager->stmtHdrOff = pPager->journalHdr;
}
pPager->journalOff += JOURNAL_HDR_SZ(pPager);
/* FIX ME:
**
** Possibly for a pager not in no-sync mode, the journal magic should not
** be written until nRec is filled in as part of next syncJournal().
**
** Actually maybe the whole journal header should be delayed until that
** point. Think about this.
*/
rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
if( rc==SQLITE_OK ){
/* The nRec Field. 0xFFFFFFFF for no-sync journals. */
rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
}
if( rc==SQLITE_OK ){
/* The random check-hash initialiser */
sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
rc = write32bits(&pPager->jfd, pPager->cksumInit);
}
if( rc==SQLITE_OK ){
/* The initial database size */
rc = write32bits(&pPager->jfd, pPager->dbSize);
}
if( rc==SQLITE_OK ){
/* The assumed sector size for this process */
rc = write32bits(&pPager->jfd, pPager->sectorSize);
}
/* The journal header has been written successfully. Seek the journal
** file descriptor to the end of the journal header sector.
*/
if( rc==SQLITE_OK ){
sqlite3OsSeek(&pPager->jfd, pPager->journalOff-1);
rc = sqlite3OsWrite(&pPager->jfd, "\000", 1);
}
return rc;
}
/*
** The journal file must be open when this is called. A journal header file
** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
** file. See comments above function writeJournalHdr() for a description of
** the journal header format.
**
** If the header is read successfully, *nRec is set to the number of
** page records following this header and *dbSize is set to the size of the
** database before the transaction began, in pages. Also, pPager->cksumInit
** is set to the value read from the journal header. SQLITE_OK is returned
** in this case.
**
** If the journal header file appears to be corrupted, SQLITE_DONE is
** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes
** cannot be read from the journal file an error code is returned.
*/
static int readJournalHdr(
Pager *pPager,
off_t journalSize,
u32 *pNRec,
u32 *pDbSize
){
int rc;
unsigned char aMagic[8]; /* A buffer to hold the magic header */
rc = seekJournalHdr(pPager);
if( rc ) return rc;
if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
return SQLITE_DONE;
}
rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
if( rc ) return rc;
if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
return SQLITE_DONE;
}
rc = read32bits(&pPager->jfd, pNRec);
if( rc ) return rc;
rc = read32bits(&pPager->jfd, &pPager->cksumInit);
if( rc ) return rc;
rc = read32bits(&pPager->jfd, pDbSize);
if( rc ) return rc;
/* Update the assumed sector-size to match the value used by
** the process that created this journal. If this journal was
** created by a process other than this one, then this routine
** is being called from within pager_playback(). The local value
** of Pager.sectorSize is restored at the end of that routine.
*/
rc = read32bits(&pPager->jfd, &pPager->sectorSize);
if( rc ) return rc;
pPager->journalOff += JOURNAL_HDR_SZ(pPager);
rc = sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
return rc;
}
/*
** Write the supplied master journal name into the journal file for pager
** pPager at the current location.
*/
static int writeMasterJournal(Pager *pPager, const char *zMaster){
int rc;
int len;
if( !zMaster || pPager->setMaster) return SQLITE_OK;
pPager->setMaster = 1;
len = strlen(zMaster);
/* If in full-sync mode, advance to the next disk sector before writing
** the master journal name. This is in case the previous page written to
** the journal has already been synced.
*/
if( pPager->fullSync ){
rc = seekJournalHdr(pPager);
if( rc!=SQLITE_OK ) return rc;
}
pPager->journalOff += (len+16);
rc = write32bits(&pPager->jfd, PAGER_MJ_PGNO(pPager));
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsWrite(&pPager->jfd, zMaster, len);
if( rc!=SQLITE_OK ) return rc;
rc = write32bits(&pPager->jfd, len);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
return rc;
}
/*
** Add or remove a page from the list of all pages that are in the
** statement journal.
@@ -480,7 +738,6 @@ static int pager_unwritelock(Pager *pPager){
pPg->needSync = 0;
}
pPager->dirtyCache = 0;
pPager->nMaster = 0;
pPager->nRec = 0;
}else{
assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
@@ -488,6 +745,7 @@ static int pager_unwritelock(Pager *pPager){
sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
pPager->state = PAGER_SHARED;
pPager->origDbSize = 0;
pPager->setMaster = 0;
return SQLITE_OK;
}
@@ -540,6 +798,7 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
if( rc!=SQLITE_OK ) return rc;
pPager->journalOff += pPager->pageSize + 4;
/* Sanity checking on the page. This is more important that I originally
** thought. If a power failure occurs while the journal is being written,
@@ -555,6 +814,7 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
if( useCksum ){
rc = read32bits(jfd, &cksum);
if( rc ) return rc;
pPager->journalOff += 4;
if( pager_cksum(pPager, pgno, aData)!=cksum ){
return SQLITE_DONE;
}
@@ -566,10 +826,6 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
** page in the pager cache. In this case just update the pager cache,
** not the database file. The page is left marked dirty in this case.
**
** FIX ME: Ideally the page would only be left marked dirty when the
** pager is in RESERVED state if it was dirty when this statement
** transaction was started.
**
** If in EXCLUSIVE state, then we update the pager cache if it exists
** and the main file. The page is then marked not dirty.
*/
@@ -631,23 +887,16 @@ static int pager_delmaster(const char *zMaster){
if( nMasterJournal>0 ){
char *zJournal;
char *zMasterPtr;
int nMasterPtr; /* Number of bytes allocated at zMasterPtr */
char *zMasterPtr = 0;
/* Load the entire master journal file into space obtained from
** sqliteMalloc() and pointed to by zMasterJournal.
**
** Also allocate an extra (strlen(zMaster)+1) bytes. This space is used
** to load a master-journal filename from some other journal file to
** check if it points at this master journal file.
*/
nMasterPtr = strlen(zMaster) + 1;
zMasterJournal = (char *)sqliteMalloc(nMasterJournal) + nMasterPtr;
zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
if( !zMasterJournal ){
rc = SQLITE_NOMEM;
goto delmaster_out;
}
zMasterPtr = &zMasterJournal[nMasterJournal];
rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
if( rc!=SQLITE_OK ) goto delmaster_out;
@@ -659,46 +908,22 @@ static int pager_delmaster(const char *zMaster){
** so, return without deleting the master journal file.
*/
OsFile journal;
int nMaster;
off_t jsz;
memset(&journal, 0, sizeof(journal));
rc = sqlite3OsOpenReadOnly(zJournal, &journal);
if( rc!=SQLITE_OK ){
sqlite3OsClose(&journal);
goto delmaster_out;
}
/* Check if the file is big enough to be a journal file
** with the required master journal name. If not, ignore it.
*/
rc = sqlite3OsFileSize(&journal, &jsz);
rc = readMasterJournal(&journal, &zMasterPtr);
sqlite3OsClose(&journal);
if( rc!=SQLITE_OK ){
sqlite3OsClose(&journal);
goto delmaster_out;
}
if( jsz<(25+strlen(zMaster)) ){
sqlite3OsClose(&journal);
continue;
}
/* Seek to the point in the journal where the master journal name
** is stored. Read the master journal name into memory obtained
** from malloc.
*/
rc = sqlite3OsSeek(&journal, 20);
if( rc!=SQLITE_OK ) goto delmaster_out;
rc = read32bits(&journal, (u32*)&nMaster);
if( rc!=SQLITE_OK ) goto delmaster_out;
if( nMaster>=nMasterPtr ){
rc = sqlite3OsRead(&journal, zMasterPtr, nMasterPtr);
if( rc!=SQLITE_OK ){
goto delmaster_out;
}
if( zMasterPtr[nMasterPtr-1]=='\0' && !strcmp(zMasterPtr, zMaster) ){
/* We have a match. Do not delete the master journal file. */
goto delmaster_out;
}
if( zMasterPtr && !strcmp(zMasterPtr, zMaster) ){
/* We have a match. Do not delete the master journal file. */
goto delmaster_out;
}
}
zJournal += (strlen(zJournal)+1);
@@ -755,7 +980,6 @@ static int pager_reload_cache(Pager *pPager){
return rc;
}
/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.
@@ -809,97 +1033,94 @@ static int pager_reload_cache(Pager *pPager){
** If an I/O or malloc() error occurs, the journal-file is not deleted
** and an error code is returned.
*/
static int pager_playback(Pager *pPager, int useJournalSize){
static int pager_playback(Pager *pPager){
off_t szJ; /* Size of the journal file in bytes */
int nRec; /* Number of Records in the journal */
int i; /* Loop counter */
Pgno mxPg = 0; /* Size of the original file in pages */
unsigned char aMagic[8]; /* A buffer to hold the magic header */
int rc; /* Result code of a subroutine */
int nMaster; /* Number of bytes in the name of master journal */
char *zMaster = 0; /* Name of master journal file if any */
/* Figure out how many records are in the journal. Abort early if
** the journal is empty.
*/
assert( pPager->journalOpen );
sqlite3OsSeek(&pPager->jfd, 0);
rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
if( rc!=SQLITE_OK ){
goto end_playback;
}
/* If the journal file is too small to contain a complete header,
** it must mean that the process that created the journal was just
** beginning to write the journal file when it died. In that case,
** the database file should have still been completely unchanged.
** Nothing needs to be rolled back. We can safely ignore this journal.
/* Read the master journal name from the journal, if it is present.
** If a master journal file name is specified, but the file is not
** present on disk, then the journal is not hot and does not need to be
** played back.
*/
if( szJ < 24 ){
rc = readMasterJournal(&pPager->jfd, &zMaster);
assert( rc!=SQLITE_DONE );
if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){
sqliteFree(zMaster);
zMaster = 0;
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
goto end_playback;
}
sqlite3OsSeek(&pPager->jfd, 0);
pPager->journalOff = 0;
/* (1) Read the beginning of the journal and verify the magic string
** at the beginning of the journal. */
rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
goto end_playback;
}
/* This loop terminates either when the readJournalHdr() call returns
** SQLITE_DONE or an IO error occurs. */
while( 1 ){
/* (2) Read the number of pages stored in the journal. */
rc = read32bits(&pPager->jfd, (u32*)&nRec);
if( rc ) goto end_playback;
if( nRec==0xffffffff || useJournalSize ||
nRec>(szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)
){
nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
}
/* (3) Read the initial value for the sanity checksum */
rc = read32bits(&pPager->jfd, &pPager->cksumInit);
if( rc ) goto end_playback;
/* (4) Read the number of pages in the database file prior to the
** start of the transaction */
rc = read32bits(&pPager->jfd, &mxPg);
if( rc!=SQLITE_OK ){
goto end_playback;
}
/* (5) and (6): Check if a master journal file is specified. If one is
** specified, only proceed with the playback if it still exists. */
rc = read32bits(&pPager->jfd, &nMaster);
if( rc ) goto end_playback;
if( szJ < 24+nMaster ) goto end_playback;
if( nMaster>0 ){
zMaster = sqliteMalloc(nMaster);
if( !zMaster ){
rc = SQLITE_NOMEM;
goto end_playback;
}
rc = sqlite3OsRead(&pPager->jfd, zMaster, nMaster);
if( rc!=SQLITE_OK || (zMaster[0] && !sqlite3OsFileExists(zMaster)) ){
goto end_playback;
}
}
/* Truncate the database file back to it's original size */
assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
if( rc!=SQLITE_OK ){
goto end_playback;
}
pPager->dbSize = mxPg;
/* Copy original pages out of the journal and back into the database file.
*/
for(i=0; i<nRec; i++){
rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
if( rc!=SQLITE_OK ){
/* Read the next journal header from the journal file. If there are
** not enough bytes left in the journal file for a complete header, or
** it is corrupted, then a process must of failed while writing it.
** This indicates nothing more needs to be rolled back.
*/
rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_DONE ){
rc = SQLITE_OK;
}
break;
goto end_playback;
}
/* If nRec is 0xffffffff, then this journal was created by a process
** working in no-sync mode. This means that the rest of the journal
** file consists of pages, there are no more journal headers. Compute
** the value of nRec based on this assumption.
*/
if( nRec==0xffffffff ){
assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
}
/* If this is the first header read from the journal, truncate the
** database file back to it's original size.
*/
if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
if( rc!=SQLITE_OK ){
goto end_playback;
}
pPager->dbSize = mxPg;
}
/* rc = sqlite3OsSeek(&pPager->jfd, JOURNAL_HDR_SZ(pPager)); */
if( rc!=SQLITE_OK ) goto end_playback;
/* Copy original pages out of the journal and back into the database file.
*/
for(i=0; i<nRec; i++){
rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_DONE ){
rc = SQLITE_OK;
pPager->journalOff = szJ;
break;
}else{
goto end_playback;
}
}
}
}
@@ -907,9 +1128,8 @@ static int pager_playback(Pager *pPager, int useJournalSize){
** where not restored by the loop above. We have to restore those
** pages by reading them back from the original database.
*/
if( rc==SQLITE_OK ){
pager_reload_cache(pPager);
}
assert( rc==SQLITE_OK );
pager_reload_cache(pPager);
end_playback:
if( zMaster ){
@@ -925,6 +1145,12 @@ end_playback:
if( rc==SQLITE_OK ){
rc = pager_unwritelock(pPager);
}
/* The Pager.sectorSize variable may have been updated while rolling
** back a journal created by a process with a different PAGER_SECTOR_SIZE
** value. Reset it to the correct value for this process.
*/
pPager->sectorSize = PAGER_SECTOR_SIZE;
return rc;
}
@@ -944,10 +1170,32 @@ end_playback:
*/
static int pager_stmt_playback(Pager *pPager){
off_t szJ; /* Size of the full journal */
off_t hdrOff;
int nRec; /* Number of Records */
int i; /* Loop counter */
int rc;
szJ = pPager->journalOff;
#ifndef NDEBUG
{
off_t os_szJ;
rc = sqlite3OsFileSize(&pPager->jfd, &os_szJ);
if( rc!=SQLITE_OK ) return rc;
assert( szJ==os_szJ );
}
#endif
/* Set hdrOff to be the offset to the first journal header written
** this statement transaction, or the end of the file if no journal
** header was written.
*/
hdrOff = pPager->stmtHdrOff;
assert( pPager->fullSync || !hdrOff );
if( !hdrOff ){
hdrOff = szJ;
}
/* Truncate the database back to its original size.
*/
rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->stmtSize);
@@ -970,25 +1218,41 @@ static int pager_stmt_playback(Pager *pPager){
if( rc!=SQLITE_OK ) goto end_stmt_playback;
}
/* Figure out how many pages need to be copied out of the transaction
** journal.
/* Now roll some pages back from the transaction journal. Pager.stmtJSize
** was the size of the journal file when this statement was started, so
** everything after that needs to be rolled back, either into the
** database, the memory cache, or both.
**
** If it is not zero, then Pager.stmtHdrOff is the offset to the start
** of the first journal header written during this statement transaction.
*/
rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize);
if( rc!=SQLITE_OK ){
goto end_stmt_playback;
}
rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
if( rc!=SQLITE_OK ){
goto end_stmt_playback;
}
nRec = (szJ - pPager->stmtJSize)/JOURNAL_PG_SZ(pPager);
for(i=nRec-1; i>=0; i--){
pPager->journalOff = pPager->stmtJSize;
while( pPager->journalOff < hdrOff ){
rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
assert( rc!=SQLITE_DONE );
if( rc!=SQLITE_OK ) goto end_stmt_playback;
}
while( pPager->journalOff < szJ ){
u32 nRec;
u32 dummy;
rc = readJournalHdr(pPager, szJ, &nRec, &dummy);
if( rc!=SQLITE_OK ){
assert( rc!=SQLITE_DONE );
goto end_stmt_playback;
}
for(i=nRec-1; i>=0 && pPager->journalOff < szJ; i--){
rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
assert( rc!=SQLITE_DONE );
if( rc!=SQLITE_OK ) goto end_stmt_playback;
}
}
pPager->journalOff = szJ;
end_stmt_playback:
if( rc!=SQLITE_OK ){
@@ -1177,6 +1441,7 @@ int sqlite3pager_open(
pPager->pFirstSynced = 0;
pPager->pLast = 0;
pPager->nExtra = nExtra;
pPager->sectorSize = PAGER_SECTOR_SIZE;
pPager->pBusyHandler = (BusyHandler *)pBusyHandler;
memset(pPager->aHash, 0, sizeof(pPager->aHash));
*ppPager = pPager;
@@ -1233,7 +1498,7 @@ int sqlite3pager_pagecount(Pager *pPager){
/*
** Forward declaration
*/
static int syncJournal(Pager*, const char*);
static int syncJournal(Pager*);
/*
@@ -1325,7 +1590,7 @@ int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
memoryTruncate(pPager);
return SQLITE_OK;
}
rc = syncJournal(pPager, 0);
rc = syncJournal(pPager);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -1482,14 +1747,14 @@ int sqlite3pager_ref(void *pData){
** This routine clears the needSync field of every page current held in
** memory.
*/
static int syncJournal(Pager *pPager, const char *zMaster){
static int syncJournal(Pager *pPager){
PgHdr *pPg;
int rc = SQLITE_OK;
/* Sync the journal before modifying the main database
** (assuming there is a journal and it needs to be synced.)
*/
if( pPager->needSync || zMaster ){
if( pPager->needSync ){
if( !pPager->tempFile ){
assert( pPager->journalOpen );
/* assert( !pPager->noSync ); // noSync might be set if synchronous
@@ -1499,37 +1764,28 @@ static int syncJournal(Pager *pPager, const char *zMaster){
/* Make sure the pPager->nRec counter we are keeping agrees
** with the nRec computed from the size of the journal file.
*/
off_t hdrSz, pgSz, jSz;
hdrSz = JOURNAL_HDR_SZ(pPager);
pgSz = JOURNAL_PG_SZ(pPager);
off_t jSz;
rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
if( rc!=0 ) return rc;
assert( pPager->nRec*pgSz+hdrSz==jSz );
assert( pPager->journalOff==jSz );
}
#endif
{
/* Write the nRec value into the journal file header */
off_t szJ;
/* Write the nRec value into the journal file header. If in
** full-synchronous mode, sync the journal first. This ensures that
** all data has really hit the disk before nRec is updated to mark
** it as a candidate for rollback.
*/
if( pPager->fullSync ){
TRACE2("SYNC journal of %d\n", pPager->fd.h);
rc = sqlite3OsSync(&pPager->jfd);
if( rc!=0 ) return rc;
}
sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic));
sqlite3OsSeek(&pPager->jfd, pPager->journalHdr + sizeof(aJournalMagic));
rc = write32bits(&pPager->jfd, pPager->nRec);
if( rc ) return rc;
/* Write the name of the master journal file if one is specified */
if( zMaster ){
assert( strlen(zMaster)<pPager->nMaster );
rc = sqlite3OsSeek(&pPager->jfd, 20);
if( rc ) return rc;
rc = sqlite3OsWrite(&pPager->jfd, zMaster, strlen(zMaster)+1);
if( rc ) return rc;
}
szJ = JOURNAL_HDR_SZ(pPager) + pPager->nRec*JOURNAL_PG_SZ(pPager);
sqlite3OsSeek(&pPager->jfd, szJ);
sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
}
TRACE2("SYNC journal of %d\n", pPager->fd.h);
rc = sqlite3OsSync(&pPager->jfd);
@@ -1721,11 +1977,14 @@ int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
}
pPager->journalOpen = 1;
pPager->journalStarted = 0;
pPager->journalOff = 0;
pPager->setMaster = 0;
pPager->journalHdr = 0;
/* Playback and delete the journal. Drop the database write
** lock and reacquire the read lock.
*/
rc = pager_playback(pPager, 0);
rc = pager_playback(pPager);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -1774,11 +2033,25 @@ int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
** it can't be helped.
*/
if( pPg==0 ){
int rc = syncJournal(pPager, 0);
int rc = syncJournal(pPager);
if( rc!=0 ){
sqlite3pager_rollback(pPager);
return SQLITE_IOERR;
}
if( pPager->fullSync ){
/* If in full-sync mode, write a new journal header into the
** journal file. This is done to avoid ever modifying a journal
** header that is involved in the rollback of pages that have
** already been written to the database (in case the header is
** trashed when the nRec field is updated).
*/
pPager->nRec = 0;
rc = writeJournalHdr(pPager);
if( rc!=0 ){
sqlite3pager_rollback(pPager);
return SQLITE_IOERR;
}
}
pPg = pPager->pFirst;
}
assert( pPg->nRef==0 );
@@ -1975,6 +2248,9 @@ static int pager_open_journal(Pager *pPager){
return SQLITE_NOMEM;
}
rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
pPager->journalOff = 0;
pPager->setMaster = 0;
pPager->journalHdr = 0;
if( rc!=SQLITE_OK ){
sqliteFree(pPager->aInJournal);
pPager->aInJournal = 0;
@@ -1994,32 +2270,8 @@ static int pager_open_journal(Pager *pPager){
}
pPager->origDbSize = pPager->dbSize;
/* Create the header for the journal:
** - 8 bytes: Magic identifying journal format.
** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
** - 4 bytes: Magic used for page checksums.
** - 4 bytes: Initial database page count.
** - 4 bytes: Number of bytes reserved for master journal ptr (nMaster)
** - nMaster bytes: Space for a master journal pointer.
*/
rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
if( rc==SQLITE_OK ){
rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
}
if( rc==SQLITE_OK ){
sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
rc = write32bits(&pPager->jfd, pPager->cksumInit);
}
if( rc==SQLITE_OK ){
rc = write32bits(&pPager->jfd, pPager->dbSize);
}
if( rc==SQLITE_OK ){
rc = write32bits(&pPager->jfd, pPager->nMaster);
}
if( rc==SQLITE_OK ){
sqlite3OsSeek(&pPager->jfd, 24 + pPager->nMaster - 1);
rc = sqlite3OsWrite(&pPager->jfd, "\000", 1);
}
rc = writeJournalHdr(pPager);
if( pPager->stmtAutoopen && rc==SQLITE_OK ){
rc = sqlite3pager_stmt_begin(pPager);
}
@@ -2055,12 +2307,11 @@ static int pager_open_journal(Pager *pPager){
**
** If the database is already reserved for writing, this routine is a no-op.
*/
int sqlite3pager_begin(void *pData, int nMaster){
int sqlite3pager_begin(void *pData, int dummy_fixme){
PgHdr *pPg = DATA_TO_PGHDR(pData);
Pager *pPager = pPg->pPager;
int rc = SQLITE_OK;
assert( pPg->nRef>0 );
assert( nMaster>=0 );
assert( pPager->state!=PAGER_UNLOCK );
if( pPager->state==PAGER_SHARED ){
assert( pPager->aInJournal==0 );
@@ -2079,10 +2330,9 @@ int sqlite3pager_begin(void *pData, int nMaster){
if( rc!=SQLITE_OK ){
return rc;
}
pPager->nMaster = nMaster;
pPager->state = PAGER_RESERVED;
pPager->dirtyCache = 0;
TRACE3("TRANSACTION %d nMaster=%d\n", pPager->fd.h, nMaster);
TRACE2("TRANSACTION %d\n", pPager->fd.h);
if( pPager->useJournal && !pPager->tempFile ){
rc = pager_open_journal(pPager);
}
@@ -2122,6 +2372,8 @@ int sqlite3pager_write(void *pData){
return SQLITE_PERM;
}
assert( !pPager->setMaster );
/* Mark the page as dirty. If the page has already been written
** to the journal then we can return right away.
*/
@@ -2176,6 +2428,7 @@ int sqlite3pager_write(void *pData){
store32bits(pPg->pgno, pPg, -4);
CODEC(pPager, pData, pPg->pgno, 7);
rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
pPager->journalOff += szPg;
TRACE3("JOURNAL page %d needSync=%d\n", pPg->pgno, pPg->needSync);
CODEC(pPager, pData, pPg->pgno, 0);
*(u32*)PGHDR_TO_EXTRA(pPg) = saved;
@@ -2500,7 +2753,7 @@ int sqlite3pager_rollback(Pager *pPager){
if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
if( pPager->state>=PAGER_EXCLUSIVE ){
pager_playback(pPager, 1);
pager_playback(pPager);
}
return pager_errcode(pPager);
}
@@ -2512,7 +2765,7 @@ int sqlite3pager_rollback(Pager *pPager){
rc = rc2;
}
}else{
rc = pager_playback(pPager, 1);
rc = pager_playback(pPager);
}
if( rc!=SQLITE_OK ){
rc = SQLITE_CORRUPT;
@@ -2577,12 +2830,11 @@ int sqlite3pager_stmt_begin(Pager *pPager){
#ifndef NDEBUG
rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
if( rc ) goto stmt_begin_failed;
assert( pPager->stmtJSize ==
pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager) );
assert( pPager->stmtJSize == pPager->journalOff );
#endif
pPager->stmtJSize =
pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager);
pPager->stmtJSize = pPager->journalOff;
pPager->stmtSize = pPager->dbSize;
pPager->stmtHdrOff = 0;
if( !pPager->stmtOpen ){
rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
if( rc ) goto stmt_begin_failed;
@@ -2749,12 +3001,20 @@ int sqlite3pager_sync(Pager *pPager, const char *zMaster){
PgHdr *pPg;
assert( pPager->journalOpen );
rc = pager_incr_changecounter(pPager);
if( rc!=SQLITE_OK ) goto sync_exit;
/* Sync the journal file */
rc = syncJournal(pPager, zMaster);
if( rc!=SQLITE_OK ) goto sync_exit;
/* If a master journal file name has already been written to the
** journal file, then no sync is required. This happens when it is
** written, then the process fails to upgrade from a RESERVED to an
** EXCLUSIVE lock. The next time the process tries to commit the
** transaction the m-j name will have already been written.
*/
if( !pPager->setMaster ){
rc = pager_incr_changecounter(pPager);
if( rc!=SQLITE_OK ) goto sync_exit;
rc = writeMasterJournal(pPager, zMaster);
if( rc!=SQLITE_OK ) goto sync_exit;
rc = syncJournal(pPager);
if( rc!=SQLITE_OK ) goto sync_exit;
}
/* Write all dirty pages to the database file */
pPg = pager_get_all_dirty_pages(pPager);