1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-11-12 13:01:09 +03:00

Add the "unix-wfl" VFS that does whole-file locking in order to help NFS

do better cache coherency.

FossilOrigin-Name: 2aeab80e5b84f5e94c5c99b4adeca805601c844b
This commit is contained in:
drh
2009-09-03 16:23:44 +00:00
parent d7d385dde0
commit 0c2694b744
3 changed files with 128 additions and 52 deletions

View File

@@ -1,8 +1,8 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Do\snot\sreuse\sfunction\sparameters\sin\ssubsequent\sexpressions\ssince\sthe\s\nfunction\scall\smight\shave\striggered\sa\stext\sencoding\schange.\nFix\sfor\sticket\s[2ea2425d34be].
D 2009-09-03T01:18:01
C Add\sthe\s"unix-wfl"\sVFS\sthat\sdoes\swhole-file\slocking\sin\sorder\sto\shelp\sNFS\ndo\sbetter\scache\scoherency.
D 2009-09-03T16:23:45
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 73ddeec9dd10b85876c5c2ce1fdce627e1dcc7f8
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -148,7 +148,7 @@ F src/os.c 9fea283e336ee31caa4654d6cb05a129a1c42d2f
F src/os.h 00a1334a4eecee7f7bef79ac606b88d325119f21
F src/os_common.h 8c61457df58f1a4bd5f5adc3e90e01b37bf7afbc
F src/os_os2.c bed77dc26e3a95ce4a204936b9a1ca6fe612fcc5
F src/os_unix.c f14ff07aec3c3c0796e1a743d1bdafddc15d1af1
F src/os_unix.c 52b10971479bb77c0c1ed334f0a73cd87b972ee0
F src/os_win.c 58bb163f327e79726dd119344d908e4d98483c3f
F src/pager.c ebd0a8f2421e8f0ad5b78201440004bf3e1c96d8
F src/pager.h 11852d044c86cf5a9d6e34171fb0c4fcf1f6265f
@@ -753,14 +753,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 69055e9b4cb6346e9e10fd9dd65e6ea06b959e76
R 7bf564b16edb1681dea7652f8bb4c7d7
P f22e388727f0ba0f187cdee51ff8ba17a5d50b8a
R 318d6752e52f7f4a033e61119f592643
U drh
Z 7cf4ebdc1da81063254cac877fefb993
Z 47965b78e770a8b1304310d0ee191e44
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFKnxlMoxKgR168RlERApcIAJ97EhKOwWFb+tWJWmzMa9COqA10WACfVx0d
/MeR2s3kHlBnnVZggaFCIXs=
=s4CU
iD8DBQFKn+2UoxKgR168RlERAv/sAJ4sxtNY+4TxErcLxEqGtbpkH3VwnQCgi8kz
7ozoBqiBdahO12HaBHgOl6c=
=OZsT
-----END PGP SIGNATURE-----

View File

@@ -1 +1 @@
f22e388727f0ba0f187cdee51ff8ba17a5d50b8a
2aeab80e5b84f5e94c5c99b4adeca805601c844b

View File

@@ -195,6 +195,7 @@ struct unixFile {
int lastErrno; /* The unix errno from the last I/O error */
void *lockingContext; /* Locking style specific state */
UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */
int fileFlags; /* Miscellanous flags */
#if SQLITE_ENABLE_LOCKING_STYLE
int openFlags; /* The flags specified at open() */
#endif
@@ -225,6 +226,11 @@ struct unixFile {
#endif
};
/*
** The following macros define bits in unixFile.fileFlags
*/
#define SQLITE_WHOLE_FILE_LOCKING 0x0001 /* Use whole-file locking */
/*
** Include code that is common to all os_*.c files
*/
@@ -1149,6 +1155,62 @@ static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
return rc;
}
/*
** Perform a file locking operation on a range of bytes in a file.
** The "op" parameter should be one of F_RDLCK, F_WRLCK, or F_UNLCK.
** Return 0 on success or -1 for failure. On failure, write the error
** code into *pErrcode.
**
** If the SQLITE_WHOLE_FILE_LOCKING bit is clear, then only lock
** the range of bytes on the locking page between SHARED_FIRST and
** SHARED_SIZE. If SQLITE_WHOLE_FILE_LOCKING is set, then lock all
** bytes from 0 up to but not including PENDING_BYTE, and all bytes
** that follow SHARED_FIRST.
**
** In other words, of SQLITE_WHOLE_FILE_LOCKING if false (the historical
** default case) then only lock a small range of bytes from SHARED_FIRST
** through SHARED_FIRST+SHARED_SIZE-1. But if SQLITE_WHOLE_FILE_LOCKING is
** true then lock every byte in the file except for PENDING_BYTE and
** RESERVED_BYTE.
**
** SQLITE_WHOLE_FILE_LOCKING=true overlaps SQLITE_WHOLE_FILE_LOCKING=false
** and so the locking schemes are compatible. One type of lock will
** effectively exclude the other type. The reason for using the
** SQLITE_WHOLE_FILE_LOCKING=true is that by indicating the full range
** of bytes to be read or written, we give hints to NFS to help it
** maintain cache coherency. On the other hand, whole file locking
** is slower, so we don't want to use it except for NFS.
*/
static int rangeLock(unixFile *pFile, int op, int *pErrcode){
struct flock lock;
int rc;
lock.l_type = op;
lock.l_start = SHARED_FIRST;
lock.l_whence = SEEK_SET;
if( (pFile->fileFlags & SQLITE_WHOLE_FILE_LOCKING)==0 ){
lock.l_len = SHARED_SIZE;
rc = fcntl(pFile->h, F_SETLK, &lock);
*pErrcode = errno;
}else{
lock.l_len = 0;
rc = fcntl(pFile->h, F_SETLK, &lock);
*pErrcode = errno;
if( NEVER(op==F_UNLCK) || rc!=(-1) ){
lock.l_start = 0;
lock.l_len = PENDING_BYTE;
rc = fcntl(pFile->h, F_SETLK, &lock);
if( ALWAYS(op!=F_UNLCK) && rc==(-1) ){
*pErrcode = errno;
lock.l_type = F_UNLCK;
lock.l_start = SHARED_FIRST;
lock.l_len = 0;
fcntl(pFile->h, F_SETLK, &lock);
}
}
}
return rc;
}
/*
** Lock the file with the lock specified by parameter locktype - one
** of the following:
@@ -1217,6 +1279,7 @@ static int unixLock(sqlite3_file *id, int locktype){
struct unixLockInfo *pLock = pFile->pLock;
struct flock lock;
int s;
int tErrno;
assert( pFile );
OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
@@ -1233,7 +1296,10 @@ static int unixLock(sqlite3_file *id, int locktype){
return SQLITE_OK;
}
/* Make sure the locking sequence is correct
/* Make sure the locking sequence is correct.
** (1) We never move from unlocked to anything higher than shared lock.
** (2) SQLite never explicitly requests a pendig lock.
** (3) A shared lock is always held when a reserve lock is requested.
*/
assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
assert( locktype!=PENDING_LOCK );
@@ -1277,14 +1343,13 @@ static int unixLock(sqlite3_file *id, int locktype){
goto end_lock;
}
lock.l_len = 1L;
lock.l_whence = SEEK_SET;
/* A PENDING lock is needed before acquiring a SHARED lock and before
** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
** be released.
*/
lock.l_len = 1L;
lock.l_whence = SEEK_SET;
if( locktype==SHARED_LOCK
|| (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
){
@@ -1292,7 +1357,7 @@ static int unixLock(sqlite3_file *id, int locktype){
lock.l_start = PENDING_BYTE;
s = fcntl(pFile->h, F_SETLK, &lock);
if( s==(-1) ){
int tErrno = errno;
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( IS_LOCK_ERROR(rc) ){
pFile->lastErrno = tErrno;
@@ -1306,16 +1371,12 @@ static int unixLock(sqlite3_file *id, int locktype){
** operating system calls for the specified lock.
*/
if( locktype==SHARED_LOCK ){
int tErrno = 0;
assert( pLock->cnt==0 );
assert( pLock->locktype==0 );
/* Now get the read-lock */
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
tErrno = errno;
}
s = rangeLock(pFile, F_RDLCK, &tErrno);
/* Drop the temporary PENDING lock */
lock.l_start = PENDING_BYTE;
lock.l_len = 1L;
@@ -1355,17 +1416,16 @@ static int unixLock(sqlite3_file *id, int locktype){
switch( locktype ){
case RESERVED_LOCK:
lock.l_start = RESERVED_BYTE;
s = fcntl(pFile->h, F_SETLK, &lock);
tErrno = errno;
break;
case EXCLUSIVE_LOCK:
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
s = rangeLock(pFile, F_WRLCK, &tErrno);
break;
default:
assert(0);
}
s = fcntl(pFile->h, F_SETLK, &lock);
if( s==(-1) ){
int tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( IS_LOCK_ERROR(rc) ){
pFile->lastErrno = tErrno;
@@ -1457,11 +1517,12 @@ static void setPendingFd(unixFile *pFile){
** the requested locking level, this routine is a no-op.
*/
static int unixUnlock(sqlite3_file *id, int locktype){
struct unixLockInfo *pLock;
struct flock lock;
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
int h;
unixFile *pFile = (unixFile*)id; /* The open file */
struct unixLockInfo *pLock; /* Structure describing current lock state */
struct flock lock; /* Information passed into fcntl() */
int rc = SQLITE_OK; /* Return code from this interface */
int h; /* The underlying file descriptor */
int tErrno; /* Error code from system call errors */
assert( pFile );
OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
@@ -1501,12 +1562,7 @@ static int unixUnlock(sqlite3_file *id, int locktype){
if( locktype==SHARED_LOCK ){
lock.l_type = F_RDLCK;
lock.l_whence = SEEK_SET;
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
if( fcntl(h, F_SETLK, &lock)==(-1) ){
int tErrno = errno;
if( rangeLock(pFile, F_RDLCK, &tErrno)==(-1) ){
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
if( IS_LOCK_ERROR(rc) ){
pFile->lastErrno = tErrno;
@@ -1521,7 +1577,7 @@ static int unixUnlock(sqlite3_file *id, int locktype){
if( fcntl(h, F_SETLK, &lock)!=(-1) ){
pLock->locktype = SHARED_LOCK;
}else{
int tErrno = errno;
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
if( IS_LOCK_ERROR(rc) ){
pFile->lastErrno = tErrno;
@@ -1547,7 +1603,7 @@ static int unixUnlock(sqlite3_file *id, int locktype){
if( fcntl(h, F_SETLK, &lock)!=(-1) ){
pLock->locktype = NO_LOCK;
}else{
int tErrno = errno;
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
if( IS_LOCK_ERROR(rc) ){
pFile->lastErrno = tErrno;
@@ -1696,7 +1752,7 @@ static int nolockClose(sqlite3_file *id) {
/******************************************************************************
************************* Begin dot-file Locking ******************************
**
** The dotfile locking implementation uses the existing of separate lock
** The dotfile locking implementation uses the existance of separate lock
** files in order to control access to the database. This works on just
** about every filesystem imaginable. But there are serious downsides:
**
@@ -3199,11 +3255,11 @@ static const sqlite3_io_methods METHOD = { \
unixSectorSize, /* xSectorSize */ \
unixDeviceCharacteristics /* xDeviceCapabilities */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, int h){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(h); \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
return &METHOD; \
} \
static const sqlite3_io_methods *(*const FINDER)(const char*,int) \
static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
= FINDER##Impl;
/*
@@ -3269,6 +3325,23 @@ IOMETHODS(
)
#endif
/*
** The "Whole File Locking" finder returns the same set of methods as
** the posix locking finder. But it also sets the SQLITE_WHOLE_FILE_LOCKING
** flag to force the posix advisory locks to cover the whole file instead
** of just a small span of bytes near the 1GiB boundary. Whole File Locking
** is useful on NFS-mounted files since it helps NFS to maintain cache
** coherency. But it is a detriment to other filesystems since it runs
** slower.
*/
static const sqlite3_io_methods *posixWflIoFinderImpl(const char*z, unixFile*p){
UNUSED_PARAMETER(z);
p->fileFlags = SQLITE_WHOLE_FILE_LOCKING;
return &posixIoMethods;
}
static const sqlite3_io_methods
*(*const posixWflIoFinder)(const char*,unixFile *p) = posixWflIoFinderImpl;
/*
** The proxy locking method is a "super-method" in the sense that it
** opens secondary file descriptors for the conch and lock files and
@@ -3304,7 +3377,7 @@ IOMETHODS(
*/
static const sqlite3_io_methods *autolockIoFinderImpl(
const char *filePath, /* name of the database file */
int fd /* file descriptor open on the database file */
unixFile *pNew /* open file object for the database file */
){
static const struct Mapping {
const char *zFilesystem; /* Filesystem type name */
@@ -3349,14 +3422,15 @@ static const sqlite3_io_methods *autolockIoFinderImpl(
lockInfo.l_start = 0;
lockInfo.l_whence = SEEK_SET;
lockInfo.l_type = F_RDLCK;
if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
if( fcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
pNew->fileFlags = SQLITE_WHOLE_FILE_LOCKING;
return &posixIoMethods;
}else{
return &dotlockIoMethods;
}
}
static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,int)
= autolockIoFinderImpl;
static const sqlite3_io_methods
*(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
@@ -3370,7 +3444,7 @@ static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,int)
*/
static const sqlite3_io_methods *autolockIoFinderImpl(
const char *filePath, /* name of the database file */
int fd /* file descriptor open on the database file */
unixFile *pNew /* the open file object */
){
struct flock lockInfo;
@@ -3387,21 +3461,21 @@ static const sqlite3_io_methods *autolockIoFinderImpl(
lockInfo.l_start = 0;
lockInfo.l_whence = SEEK_SET;
lockInfo.l_type = F_RDLCK;
if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
if( fcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
return &posixIoMethods;
}else{
return &semIoMethods;
}
}
static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,int)
= autolockIoFinderImpl;
static const sqlite3_io_methods
*(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
#endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */
/*
** An abstract type for a pointer to a IO method finder function:
*/
typedef const sqlite3_io_methods *(*finder_type)(const char*,int);
typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
/****************************************************************************
@@ -3439,6 +3513,7 @@ static int fillInUnixFile(
pNew->h = h;
pNew->dirfd = dirfd;
SET_THREADID(pNew);
pNew->fileFlags = 0;
#if OS_VXWORKS
pNew->pId = vxworksFindFileId(zFilename);
@@ -3451,7 +3526,7 @@ static int fillInUnixFile(
if( noLock ){
pLockingStyle = &nolockIoMethods;
}else{
pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, h);
pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
#if SQLITE_ENABLE_LOCKING_STYLE
/* Cache zFilename in the locking context (AFP and dotlock override) for
** proxyLock activation is possible (remote proxy is based on db name)
@@ -5266,6 +5341,7 @@ int sqlite3_os_init(void){
#endif
UNIXVFS("unix-none", nolockIoFinder ),
UNIXVFS("unix-dotfile", dotlockIoFinder ),
UNIXVFS("unix-wfl", posixWflIoFinder ),
#if OS_VXWORKS
UNIXVFS("unix-namedsem", semIoFinder ),
#endif