mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-05 15:55:57 +03:00
Update the wal-index hash format so that hash-table space is reused following
a rollback, thus preventing hash table overflows. Add assert()s to verify that hash tables do not overfill. Further refactoring of the wal-index code. FossilOrigin-Name: ada9a8c7b69c5dd2d66bbf62b61181651e6d2142
This commit is contained in:
18
manifest
18
manifest
@@ -1,8 +1,8 @@
|
||||
-----BEGIN PGP SIGNED MESSAGE-----
|
||||
Hash: SHA1
|
||||
|
||||
C Refactoring\sof\sthe\sWalIterator\simplementation.
|
||||
D 2010-05-18T18:01:09
|
||||
C Update\sthe\swal-index\shash\sformat\sso\sthat\shash-table\sspace\sis\sreused\sfollowing\na\srollback,\sthus\spreventing\shash\stable\soverflows.\s\sAdd\sassert()s\sto\sverify\nthat\shash\stables\sdo\snot\soverfill.\s\sFurther\srefactoring\sof\sthe\swal-index\scode.
|
||||
D 2010-05-18T23:29:53
|
||||
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
||||
F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3
|
||||
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
||||
@@ -227,7 +227,7 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e
|
||||
F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1
|
||||
F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2
|
||||
F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda
|
||||
F src/wal.c cfbb818b50bec82675aa5322d7ee0e2b2c2a7386
|
||||
F src/wal.c 6ef6731346daf2461462ea20d5fc44682feb1a28
|
||||
F src/wal.h 434f76f51225bb614e43ccb6bd2341541ba6a06e
|
||||
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
|
||||
F src/where.c 75fee9e255b62f773fcadd1d1f25b6f63ac7a356
|
||||
@@ -816,14 +816,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
||||
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
P 0a6787908e989bd5e6af25acbdc59ebc8fa61d6d
|
||||
R 70b31773a620515609c56c14086245f3
|
||||
P b5b60fdcc5dcf41f2c79912075ac241f7ce220d6
|
||||
R 4929b4583dfb4e1eb01782532f6827c1
|
||||
U drh
|
||||
Z 7b37c77bed71cbc8c1a6f26dcf7b1090
|
||||
Z d3a6efc94e4a817e6e627477c12bae31
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
Version: GnuPG v1.4.6 (GNU/Linux)
|
||||
|
||||
iD8DBQFL8tXooxKgR168RlERAkToAJ4sc1mZ1q5W9au06n2/yU3i2HlYxwCdEuup
|
||||
sJTTs22gXenKu7GRNzIKGS0=
|
||||
=4glP
|
||||
iD8DBQFL8yL1oxKgR168RlERAvCcAJ9c+PHCm9rZ4hPCfVE6HbCjS/YEFgCdF7L6
|
||||
bHpr3zEADy01V3PS/VC1PCE=
|
||||
=u4O9
|
||||
-----END PGP SIGNATURE-----
|
||||
|
@@ -1 +1 @@
|
||||
b5b60fdcc5dcf41f2c79912075ac241f7ce220d6
|
||||
ada9a8c7b69c5dd2d66bbf62b61181651e6d2142
|
146
src/wal.c
146
src/wal.c
@@ -12,17 +12,26 @@
|
||||
**
|
||||
** This file contains the implementation of a write-ahead log file used in
|
||||
** "journal_mode=wal" mode.
|
||||
*/
|
||||
#ifndef SQLITE_OMIT_WAL
|
||||
|
||||
#include "wal.h"
|
||||
|
||||
|
||||
/*
|
||||
**
|
||||
** WRITE-AHEAD LOG (WAL) FILE FORMAT
|
||||
**
|
||||
** A wal file consists of a header followed by zero or more "frames".
|
||||
** The file header is 12 bytes in size and consists of the following three
|
||||
** Each frame records the revised content of a single page within the
|
||||
** database file. All changes to the database are recorded by writing
|
||||
** frames into the WAL. Transactions commit when a frame is written that
|
||||
** contains a commit marker. A single WAL can and usually does record
|
||||
** multiple transactions. Periodically, the content of the WAL is
|
||||
** transferred back into the database file in an operation called a
|
||||
** "checkpoint".
|
||||
**
|
||||
** A single WAL file can be used multiple times. In other words, the
|
||||
** WAL can fill up with frames and then be checkpointed. Then new
|
||||
** frames can overwrite the old ones. A WAL always grows from beginning
|
||||
** toward the end. Checksums and counters attached to each frame are
|
||||
** used to determine which frames within the WAL are valid and which
|
||||
** are leftovers from prior checkpoints.
|
||||
**
|
||||
** The WAL header is 12 bytes in size and consists of the following three
|
||||
** big-endian 32-bit unsigned integer values:
|
||||
**
|
||||
** 0: Database page size,
|
||||
@@ -39,32 +48,54 @@
|
||||
** after the commit. For all other records, zero.
|
||||
** 8: Checksum value 1.
|
||||
** 12: Checksum value 2.
|
||||
**
|
||||
** READER ALGORITHM
|
||||
**
|
||||
** To read a page from the database (call it page number P), a reader
|
||||
** first checks the WAL to see if it contains page P. If so, then the
|
||||
** last valid instance of page P that is or is followed by a commit frame
|
||||
** become the value read. If the WAL contains no copies of page P that
|
||||
** are valid and which are or are followed by a commit frame, then page
|
||||
** P is read from the database file.
|
||||
**
|
||||
** The reader algorithm in the previous paragraph works correctly, but
|
||||
** because frames for page P can appear anywhere within the WAL, the
|
||||
** reader has to scan the either WAL looking for page P frames. If the
|
||||
** WAL is large (multiple megabytes is typical) that scan can be slow,
|
||||
** and read performanc suffers. To overcome this problem, a separate
|
||||
** datastructure called the wal-index is maintained to expedite the
|
||||
** search for frames of a particular page.
|
||||
**
|
||||
** WAL-INDEX FORMAT
|
||||
**
|
||||
** Conceptually, the wal-index is shared memory, though VFS implementations
|
||||
** might choose to implement the wal-index using a mmapped file. Because
|
||||
** the wal-index is shared memory, SQLite does not support journal_mode=WAL
|
||||
** on a network filesystem. All users of the database must be able to
|
||||
** share memory.
|
||||
**
|
||||
** The wal-index is transient. After a crash, the wal-index can (and should
|
||||
** be) reconstructed from the original WAL file. In fact, the VFS is required
|
||||
** to either truncate or zero the header of the wal-index when the last
|
||||
** connection to it closes. Because the wal-index is transient, it can
|
||||
** use an architecture-specific format; it does not have to be cross-platform.
|
||||
** Hence, unlike the database and WAL file formats which store all values
|
||||
** as big endian, the wal-index can store multi-byte values in the native
|
||||
** byte order of the host computer.
|
||||
**
|
||||
** The purpose of the wal-index is to answer this question quickly: Given
|
||||
** a page number P, return the index of the last frame for page P in the WAL,
|
||||
** or return NULL if there are no frames for page P in the WAL.
|
||||
**
|
||||
** The wal-index consists of a header region, followed by an one or
|
||||
** more index blocks.
|
||||
**
|
||||
** To be completed....
|
||||
*/
|
||||
#ifndef SQLITE_OMIT_WAL
|
||||
|
||||
#include "wal.h"
|
||||
|
||||
/*
|
||||
** WAL-INDEX FILE FORMAT
|
||||
**
|
||||
** The wal-index consists of a header region, followed by an
|
||||
** 8-byte region that contains no useful data (used to apply byte-range locks
|
||||
** in some implementations), followed by the data region.
|
||||
**
|
||||
** The contents of both the header and data region are specified in terms
|
||||
** of 1, 2 and 4 byte unsigned integers. All integers are stored in
|
||||
** machine-endian order. The wal-index is not a persistent file and
|
||||
** so it does not need to be portable across archtectures.
|
||||
**
|
||||
** A wal-index file is essentially a shadow-pager map. It contains a
|
||||
** mapping from database page number to the set of locations in the wal
|
||||
** file that contain versions of the database page. When a database
|
||||
** client needs to read a page of data, it first queries the wal-index
|
||||
** to determine if the required version of the page is stored in
|
||||
** the wal. If so, the page is read from the wal. If not, the page is
|
||||
** read from the database file.
|
||||
**
|
||||
** Whenever a transaction is appended to the wal or a checkpoint transfers
|
||||
** data from the wal into the database file, the wal-index is
|
||||
** updated accordingly.
|
||||
*/
|
||||
|
||||
/* Object declarations */
|
||||
typedef struct WalIndexHdr WalIndexHdr;
|
||||
@@ -81,7 +112,7 @@ typedef struct WalIterator WalIterator;
|
||||
struct WalIndexHdr {
|
||||
u32 iChange; /* Counter incremented each transaction */
|
||||
u32 pgsz; /* Database page size in bytes */
|
||||
u32 iLastPg; /* Address of last valid frame in log */
|
||||
u32 iLastPg; /* Index of last valid frame in the WAL */
|
||||
u32 nPage; /* Size of database in pages */
|
||||
u32 iCheck1; /* Checkpoint value 1 */
|
||||
u32 iCheck2; /* Checkpoint value 2 */
|
||||
@@ -305,15 +336,18 @@ static int walDecodeFrame(
|
||||
}
|
||||
|
||||
/*
|
||||
** Define the size of the hash tables in the wal-index file. There
|
||||
** Define the parameters of the hash tables in the wal-index file. There
|
||||
** is a hash-table following every HASHTABLE_NPAGE page numbers in the
|
||||
** wal-index.
|
||||
**
|
||||
** Changing any of these constants will alter the wal-index format and
|
||||
** create incompatibilities.
|
||||
*/
|
||||
#define HASHTABLE_NPAGE 4096
|
||||
#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */
|
||||
#define HASHTABLE_DATATYPE u16
|
||||
|
||||
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2)
|
||||
#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
|
||||
#define HASHTABLE_HASH_1 383 /* Should be prime */
|
||||
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
|
||||
#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
|
||||
|
||||
/*
|
||||
** Return the index in the Wal.pWiData array that corresponds to
|
||||
@@ -410,8 +444,18 @@ static int walIndexRemap(Wal *pWal, int enlargeTo){
|
||||
*/
|
||||
#define WALINDEX_MMAP_INCREMENT (64*1024)
|
||||
|
||||
static int walHashKey(u32 iPage){
|
||||
return (iPage*2) % (HASHTABLE_NSLOT-1);
|
||||
|
||||
/*
|
||||
** Compute a hash on a page number. The resulting hash value must land
|
||||
** between 0 and (HASHTABLE_NSLOT-1).
|
||||
*/
|
||||
static int walHash(u32 iPage){
|
||||
assert( iPage>0 );
|
||||
assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
|
||||
return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
|
||||
}
|
||||
static int walNextHash(int iPriorHash){
|
||||
return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
|
||||
}
|
||||
|
||||
|
||||
@@ -461,11 +505,8 @@ static void walHashFind(
|
||||
|
||||
|
||||
/*
|
||||
** Set an entry in the wal-index map to map log frame iFrame to db
|
||||
** page iPage. Values are always appended to the wal-index (i.e. the
|
||||
** value of iFrame is always exactly one more than the value passed to
|
||||
** the previous call), but that restriction is not enforced or asserted
|
||||
** here.
|
||||
** Set an entry in the wal-index that will map database page number
|
||||
** pPage into WAL frame iFrame.
|
||||
*/
|
||||
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
|
||||
int rc; /* Return code */
|
||||
@@ -490,12 +531,16 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
|
||||
volatile u32 *aPgno; /* Page number array */
|
||||
volatile HASHTABLE_DATATYPE *aHash; /* Hash table */
|
||||
int idx; /* Value to write to hash-table slot */
|
||||
TESTONLY( int nCollide = 0; /* Number of hash collisions */ )
|
||||
|
||||
walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
|
||||
idx = iFrame - iZero;
|
||||
if( idx==1 ) memset((void*)aHash, 0, HASHTABLE_NBYTE);
|
||||
if( idx==1 ) memset((void*)aHash, 0xff, HASHTABLE_NBYTE);
|
||||
assert( idx <= HASHTABLE_NSLOT/2 + 1 );
|
||||
aPgno[iFrame] = iPage;
|
||||
for(iKey=walHashKey(iPage); aHash[iKey]; iKey=(iKey+1)%HASHTABLE_NSLOT);
|
||||
for(iKey=walHash(iPage); aHash[iKey]<idx; iKey=walNextHash(iKey)){
|
||||
assert( nCollide++ < idx );
|
||||
}
|
||||
aHash[iKey] = idx;
|
||||
}
|
||||
|
||||
@@ -1233,11 +1278,14 @@ int sqlite3WalRead(
|
||||
volatile u32 *aPgno; /* Pointer to array of page numbers */
|
||||
u32 iZero; /* Frame number corresponding to aPgno[0] */
|
||||
int iKey; /* Hash slot index */
|
||||
int mxHash; /* upper bound on aHash[] values */
|
||||
|
||||
walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
|
||||
for(iKey=walHashKey(pgno); aHash[iKey]; iKey=(iKey+1)%HASHTABLE_NSLOT){
|
||||
mxHash = iLast - iZero;
|
||||
if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE;
|
||||
for(iKey=walHash(pgno); aHash[iKey]<=mxHash; iKey=walNextHash(iKey)){
|
||||
u32 iFrame = aHash[iKey] + iZero;
|
||||
if( iFrame<=iLast && aPgno[iFrame]==pgno && iFrame>iRead ){
|
||||
if( ALWAYS(iFrame<=iLast) && aPgno[iFrame]==pgno && iFrame>iRead ){
|
||||
iRead = iFrame;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user