mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-08 14:02:16 +03:00
Fix inaccuracies and add details to comments in the pager. Change the name
of one function to make its purpose clearer. Ticket #599. (CVS 1209) FossilOrigin-Name: 48832d35ed0d5ba02908822c749591e76b790c48
This commit is contained in:
12
manifest
12
manifest
@@ -1,5 +1,5 @@
|
|||||||
C Preliminary\sfix\sfor\sticket\s#599.\s\sMore\stesting\sand\sanalysis\sneeded.\s(CVS\s1208)
|
C Fix\sinaccuracies\sand\sadd\sdetails\sto\scomments\sin\sthe\spager.\s\sChange\sthe\sname\nof\sone\sfunction\sto\smake\sits\spurpose\sclearer.\s\sTicket\s#599.\s(CVS\s1209)
|
||||||
D 2004-02-08T00:40:52
|
D 2004-02-08T06:05:46
|
||||||
F Makefile.in 0515ff9218ad8d5a8f6220f0494b8ef94c67013b
|
F Makefile.in 0515ff9218ad8d5a8f6220f0494b8ef94c67013b
|
||||||
F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906
|
F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906
|
||||||
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
|
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
|
||||||
@@ -40,7 +40,7 @@ F src/main.c 808ea1bda0798f4a714479aee8289d65f04cf29b
|
|||||||
F src/md5.c fe4f9c9c6f71dfc26af8da63e4d04489b1430565
|
F src/md5.c fe4f9c9c6f71dfc26af8da63e4d04489b1430565
|
||||||
F src/os.c 681ec36217bc7c795d55d9a63ff79a8614ddee8c
|
F src/os.c 681ec36217bc7c795d55d9a63ff79a8614ddee8c
|
||||||
F src/os.h 8d02b622153d2df442da1ec37cdd6b1bd9804a25
|
F src/os.h 8d02b622153d2df442da1ec37cdd6b1bd9804a25
|
||||||
F src/pager.c 7872537f9f47339b2a1098a54101d7f4e4c25364
|
F src/pager.c f2be6a1f691b4bc4b2e30d93540ceff72d38ac90
|
||||||
F src/pager.h 5da62c83443f26b1792cfd72c96c422f91aadd31
|
F src/pager.h 5da62c83443f26b1792cfd72c96c422f91aadd31
|
||||||
F src/parse.y 7a121554c0c0c0150a77ab05417b01fa44813ac4
|
F src/parse.y 7a121554c0c0c0150a77ab05417b01fa44813ac4
|
||||||
F src/pragma.c 89d62c31c6f0a43376fe8d20549b87a6d30c467a
|
F src/pragma.c 89d62c31c6f0a43376fe8d20549b87a6d30c467a
|
||||||
@@ -182,7 +182,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604
|
|||||||
F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da
|
F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da
|
||||||
F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1
|
F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1
|
||||||
F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
|
F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
|
||||||
P 0b3f552b986fd89c48c350b0746be93b9d276ecc
|
P dc5be2c82b591a385adf02863d89e113272e2ebd
|
||||||
R 753fd39b58050d194e71064713a576ce
|
R 99f43def4bb0b74954a57e70fb60a6e4
|
||||||
U drh
|
U drh
|
||||||
Z 12eaf8d85a0609bf96a6d19c712cf69b
|
Z d377e040c4a9be8631352dbc02c26b62
|
||||||
|
@@ -1 +1 @@
|
|||||||
dc5be2c82b591a385adf02863d89e113272e2ebd
|
48832d35ed0d5ba02908822c749591e76b790c48
|
143
src/pager.c
143
src/pager.c
@@ -18,7 +18,7 @@
|
|||||||
** file simultaneously, or one process from reading the database while
|
** file simultaneously, or one process from reading the database while
|
||||||
** another is writing.
|
** another is writing.
|
||||||
**
|
**
|
||||||
** @(#) $Id: pager.c,v 1.93 2004/02/08 00:40:52 drh Exp $
|
** @(#) $Id: pager.c,v 1.94 2004/02/08 06:05:46 drh Exp $
|
||||||
*/
|
*/
|
||||||
#include "os.h" /* Must be first to enable large file support */
|
#include "os.h" /* Must be first to enable large file support */
|
||||||
#include "sqliteInt.h"
|
#include "sqliteInt.h"
|
||||||
@@ -146,8 +146,8 @@ struct Pager {
|
|||||||
int mxPage; /* Maximum number of pages to hold in cache */
|
int mxPage; /* Maximum number of pages to hold in cache */
|
||||||
int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
|
int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
|
||||||
u8 journalOpen; /* True if journal file descriptors is valid */
|
u8 journalOpen; /* True if journal file descriptors is valid */
|
||||||
u8 journalStarted; /* True if initial magic of journal is synced */
|
u8 journalStarted; /* True if header of journal is synced */
|
||||||
u8 useJournal; /* Do not use a rollback journal on this file */
|
u8 useJournal; /* Use a rollback journal on this file */
|
||||||
u8 ckptOpen; /* True if the checkpoint journal is open */
|
u8 ckptOpen; /* True if the checkpoint journal is open */
|
||||||
u8 ckptInUse; /* True we are in a checkpoint */
|
u8 ckptInUse; /* True we are in a checkpoint */
|
||||||
u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
|
u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
|
||||||
@@ -279,7 +279,13 @@ int journal_format = 3;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Read a 32-bit integer from the given file descriptor
|
** Read a 32-bit integer from the given file descriptor. Store the integer
|
||||||
|
** that is read in *pRes. Return SQLITE_OK if everything worked, or an
|
||||||
|
** error code is something goes wrong.
|
||||||
|
**
|
||||||
|
** If the journal format is 2 or 3, read a big-endian integer. If the
|
||||||
|
** journal format is 1, read an integer in the native byte-order of the
|
||||||
|
** host machine.
|
||||||
*/
|
*/
|
||||||
static int read32bits(int format, OsFile *fd, u32 *pRes){
|
static int read32bits(int format, OsFile *fd, u32 *pRes){
|
||||||
u32 res;
|
u32 res;
|
||||||
@@ -295,8 +301,13 @@ static int read32bits(int format, OsFile *fd, u32 *pRes){
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Write a 32-bit integer into the given file descriptor. Writing
|
** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
|
||||||
** is always done using the new journal format.
|
** on success or an error code is something goes wrong.
|
||||||
|
**
|
||||||
|
** If the journal format is 2 or 3, write the integer as 4 big-endian
|
||||||
|
** bytes. If the journal format is 1, write the integer in the native
|
||||||
|
** byte order. In normal operation, only formats 2 and 3 are used.
|
||||||
|
** Journal format 1 is only used for testing.
|
||||||
*/
|
*/
|
||||||
static int write32bits(OsFile *fd, u32 val){
|
static int write32bits(OsFile *fd, u32 val){
|
||||||
unsigned char ac[4];
|
unsigned char ac[4];
|
||||||
@@ -313,6 +324,9 @@ static int write32bits(OsFile *fd, u32 val){
|
|||||||
/*
|
/*
|
||||||
** Write a 32-bit integer into a page header right before the
|
** Write a 32-bit integer into a page header right before the
|
||||||
** page data. This will overwrite the PgHdr.pDirty pointer.
|
** page data. This will overwrite the PgHdr.pDirty pointer.
|
||||||
|
**
|
||||||
|
** The integer is big-endian for formats 2 and 3 and native byte order
|
||||||
|
** for journal format 1.
|
||||||
*/
|
*/
|
||||||
static void store32bits(u32 val, PgHdr *p, int offset){
|
static void store32bits(u32 val, PgHdr *p, int offset){
|
||||||
unsigned char *ac;
|
unsigned char *ac;
|
||||||
@@ -469,6 +483,10 @@ static int pager_unwritelock(Pager *pPager){
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
** Compute and return a checksum for the page of data.
|
** Compute and return a checksum for the page of data.
|
||||||
|
**
|
||||||
|
** This is not a real checksum. It is really just the sum of the
|
||||||
|
** random initial value and the page number. We considered do a checksum
|
||||||
|
** of the database, but that was found to be too slow.
|
||||||
*/
|
*/
|
||||||
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
|
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
|
||||||
u32 cksum = pPager->cksumInit + pgno;
|
u32 cksum = pPager->cksumInit + pgno;
|
||||||
@@ -537,21 +555,53 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
|
|||||||
** Playback the journal and thus restore the database file to
|
** Playback the journal and thus restore the database file to
|
||||||
** the state it was in before we started making changes.
|
** the state it was in before we started making changes.
|
||||||
**
|
**
|
||||||
** The journal file format is as follows: There is an initial
|
** The journal file format is as follows:
|
||||||
** file-type string for sanity checking. Then there is a single
|
**
|
||||||
** Pgno number which is the number of pages in the database before
|
** * 8 byte prefix. One of the aJournalMagic123 vectors defined
|
||||||
** changes were made. The database is truncated to this size.
|
** above. The format of the journal file is determined by which
|
||||||
** Next come zero or more page records where each page record
|
** of the three prefix vectors is seen.
|
||||||
** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
|
** * 4 byte big-endian integer which is the number of valid page records
|
||||||
** the PageRecord structure for details.
|
** in the journal. If this value is 0xffffffff, then compute the
|
||||||
|
** number of page records from the journal size. This field appears
|
||||||
|
** in format 3 only.
|
||||||
|
** * 4 byte big-endian integer which is the initial value for the
|
||||||
|
** sanity checksum. This field appears in format 3 only.
|
||||||
|
** * 4 byte integer which is the number of pages to truncate the
|
||||||
|
** database to during a rollback.
|
||||||
|
** * Zero or more pages instances, each as follows:
|
||||||
|
** + 4 byte page number.
|
||||||
|
** + SQLITE_PAGE_SIZE bytes of data.
|
||||||
|
** + 4 byte checksum (format 3 only)
|
||||||
|
**
|
||||||
|
** When we speak of the journal header, we mean the first 4 bullets above.
|
||||||
|
** Each entry in the journal is an instance of the 5th bullet. Note that
|
||||||
|
** bullets 2 and 3 only appear in format-3 journals.
|
||||||
|
**
|
||||||
|
** Call the value from the second bullet "nRec". nRec is the number of
|
||||||
|
** valid page entries in the journal. In most cases, you can compute the
|
||||||
|
** value of nRec from the size of the journal file. But if a power
|
||||||
|
** failure occurred while the journal was being written, it could be the
|
||||||
|
** case that the size of the journal file had already been increased but
|
||||||
|
** the extra entries had not yet made it safely to disk. In such a case,
|
||||||
|
** the value of nRec computed from the file size would be too large. For
|
||||||
|
** that reason, we always use the nRec value in the header.
|
||||||
|
**
|
||||||
|
** If the nRec value is 0xffffffff it means that nRec should be computed
|
||||||
|
** from the file size. This value is used when the user selects the
|
||||||
|
** no-sync option for the journal. A power failure could lead to corruption
|
||||||
|
** in this case. But for things like temporary table (which will be
|
||||||
|
** deleted when the power is restored) we don't care.
|
||||||
|
**
|
||||||
|
** Journal formats 1 and 2 do not have an nRec value in the header so we
|
||||||
|
** have to compute nRec from the file size. This has risks (as described
|
||||||
|
** above) which is why all persistent tables have been changed to use
|
||||||
|
** format 3.
|
||||||
**
|
**
|
||||||
** If the file opened as the journal file is not a well-formed
|
** If the file opened as the journal file is not a well-formed
|
||||||
** journal file (as determined by looking at the magic number
|
** journal file then the database will likely already be
|
||||||
** at the beginning) then this routine returns SQLITE_PROTOCOL.
|
** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
|
||||||
** If any other errors occur during playback, the database will
|
** and SQLITE_CORRUPT is returned. If it all works, then this routine
|
||||||
** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
|
** returns SQLITE_OK.
|
||||||
** pPager->errMask and SQLITE_CORRUPT is returned. If it all
|
|
||||||
** works, then this routine returns SQLITE_OK.
|
|
||||||
*/
|
*/
|
||||||
static int pager_playback(Pager *pPager, int useJournalSize){
|
static int pager_playback(Pager *pPager, int useJournalSize){
|
||||||
off_t szJ; /* Size of the journal file in bytes */
|
off_t szJ; /* Size of the journal file in bytes */
|
||||||
@@ -573,7 +623,10 @@ static int pager_playback(Pager *pPager, int useJournalSize){
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* If the journal file is too small to contain a complete header,
|
/* If the journal file is too small to contain a complete header,
|
||||||
** then ignore the journal completely.
|
** it must mean that the process that created the journal was just
|
||||||
|
** beginning to write the journal file when it died. In that case,
|
||||||
|
** the database file should have still been completely unchanged.
|
||||||
|
** Nothing needs to be rolled back. We can safely ignore this journal.
|
||||||
*/
|
*/
|
||||||
if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
|
if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
|
||||||
goto end_playback;
|
goto end_playback;
|
||||||
@@ -603,7 +656,7 @@ static int pager_playback(Pager *pPager, int useJournalSize){
|
|||||||
** header. We already did this test once above, but at the prior
|
** header. We already did this test once above, but at the prior
|
||||||
** test, we did not know the journal format and so we had to assume
|
** test, we did not know the journal format and so we had to assume
|
||||||
** the smallest possible header. Now we know the header is bigger
|
** the smallest possible header. Now we know the header is bigger
|
||||||
** than that so we test again.
|
** than the minimum so we test again.
|
||||||
*/
|
*/
|
||||||
goto end_playback;
|
goto end_playback;
|
||||||
}
|
}
|
||||||
@@ -785,8 +838,9 @@ void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
|
|||||||
** when it is rolled back.
|
** when it is rolled back.
|
||||||
**
|
**
|
||||||
** FULL The journal is synced twice before writes begin on the
|
** FULL The journal is synced twice before writes begin on the
|
||||||
** database (with some additional information being written
|
** database (with some additional information - the nRec field
|
||||||
** in between the two syncs. If we assume that writing a
|
** of the journal header - being written in between the two
|
||||||
|
** syncs). If we assume that writing a
|
||||||
** single disk sector is atomic, then this mode provides
|
** single disk sector is atomic, then this mode provides
|
||||||
** assurance that the journal will not be corrupted to the
|
** assurance that the journal will not be corrupted to the
|
||||||
** point of causing damage to the database during rollback.
|
** point of causing damage to the database during rollback.
|
||||||
@@ -946,7 +1000,7 @@ int sqlitepager_pagecount(Pager *pPager){
|
|||||||
/*
|
/*
|
||||||
** Forward declaration
|
** Forward declaration
|
||||||
*/
|
*/
|
||||||
static int syncAllPages(Pager*);
|
static int syncJournal(Pager*);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Truncate the file to the number of pages specified.
|
** Truncate the file to the number of pages specified.
|
||||||
@@ -963,7 +1017,7 @@ int sqlitepager_truncate(Pager *pPager, Pgno nPage){
|
|||||||
if( nPage>=(unsigned)pPager->dbSize ){
|
if( nPage>=(unsigned)pPager->dbSize ){
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
syncAllPages(pPager);
|
syncJournal(pPager);
|
||||||
rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
|
rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
|
||||||
if( rc==SQLITE_OK ){
|
if( rc==SQLITE_OK ){
|
||||||
pPager->dbSize = nPage;
|
pPager->dbSize = nPage;
|
||||||
@@ -1069,23 +1123,26 @@ int sqlitepager_ref(void *pData){
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Sync the journal and then write all free dirty pages to the database
|
** Sync the journal. In other words, make sure all the pages that have
|
||||||
** file.
|
** been written to the journal have actually reached the surface of the
|
||||||
|
** disk. It is not safe to modify the original database file until after
|
||||||
|
** the journal has been synced. If the original database is modified before
|
||||||
|
** the journal is synced and a power failure occurs, the unsynced journal
|
||||||
|
** data would be lost and we would be unable to completely rollback the
|
||||||
|
** database changes. Database corruption would occur.
|
||||||
**
|
**
|
||||||
** Writing all free dirty pages to the database after the sync is a
|
** This routine also updates the nRec field in the header of the journal.
|
||||||
** non-obvious optimization. fsync() is an expensive operation so we
|
** (See comments on the pager_playback() routine for additional information.)
|
||||||
** want to minimize the number ot times it is called. After an fsync() call,
|
** If the sync mode is FULL, two syncs will occur. First the whole journal
|
||||||
** we are free to write dirty pages back to the database. It is best
|
** is synced, then the nRec field is updated, then a second sync occurs.
|
||||||
** to go ahead and write as many dirty pages as possible to minimize
|
|
||||||
** the risk of having to do another fsync() later on. Writing dirty
|
|
||||||
** free pages in this way was observed to make database operations go
|
|
||||||
** up to 10 times faster.
|
|
||||||
**
|
**
|
||||||
** If we are writing to temporary database, there is no need to preserve
|
** For temporary databases, we do not care if we are able to rollback
|
||||||
** the integrity of the journal file, so we can save time and skip the
|
** after a power failure, so sync occurs.
|
||||||
** fsync().
|
**
|
||||||
|
** This routine clears the needSync field of every page current held in
|
||||||
|
** memory.
|
||||||
*/
|
*/
|
||||||
static int syncAllPages(Pager *pPager){
|
static int syncJournal(Pager *pPager){
|
||||||
PgHdr *pPg;
|
PgHdr *pPg;
|
||||||
int rc = SQLITE_OK;
|
int rc = SQLITE_OK;
|
||||||
|
|
||||||
@@ -1098,6 +1155,9 @@ static int syncAllPages(Pager *pPager){
|
|||||||
assert( !pPager->noSync );
|
assert( !pPager->noSync );
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
{
|
{
|
||||||
|
/* Make sure the pPager->nRec counter we are keeping agrees
|
||||||
|
** with the nRec computed from the size of the journal file.
|
||||||
|
*/
|
||||||
off_t hdrSz, pgSz, jSz;
|
off_t hdrSz, pgSz, jSz;
|
||||||
hdrSz = JOURNAL_HDR_SZ(journal_format);
|
hdrSz = JOURNAL_HDR_SZ(journal_format);
|
||||||
pgSz = JOURNAL_PG_SZ(journal_format);
|
pgSz = JOURNAL_PG_SZ(journal_format);
|
||||||
@@ -1107,6 +1167,7 @@ static int syncAllPages(Pager *pPager){
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if( journal_format>=3 ){
|
if( journal_format>=3 ){
|
||||||
|
/* Write the nRec value into the journal file header */
|
||||||
off_t szJ;
|
off_t szJ;
|
||||||
if( pPager->fullSync ){
|
if( pPager->fullSync ){
|
||||||
TRACE1("SYNC\n");
|
TRACE1("SYNC\n");
|
||||||
@@ -1317,7 +1378,7 @@ int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
|
|||||||
** it can't be helped.
|
** it can't be helped.
|
||||||
*/
|
*/
|
||||||
if( pPg==0 ){
|
if( pPg==0 ){
|
||||||
int rc = syncAllPages(pPager);
|
int rc = syncJournal(pPager);
|
||||||
if( rc!=0 ){
|
if( rc!=0 ){
|
||||||
sqlitepager_rollback(pPager);
|
sqlitepager_rollback(pPager);
|
||||||
return SQLITE_IOERR;
|
return SQLITE_IOERR;
|
||||||
@@ -1909,7 +1970,7 @@ int sqlitepager_commit(Pager *pPager){
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
assert( pPager->journalOpen );
|
assert( pPager->journalOpen );
|
||||||
rc = syncAllPages(pPager);
|
rc = syncJournal(pPager);
|
||||||
if( rc!=SQLITE_OK ){
|
if( rc!=SQLITE_OK ){
|
||||||
goto commit_abort;
|
goto commit_abort;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user