1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-05 15:55:57 +03:00

Allocate more overflow data onto overflow pages, thus wasting less disk space. (CVS 1367)

FossilOrigin-Name: 1d52a4bb478648ef53a0dbb21865ccb9281dc24a
This commit is contained in:
drh
2004-05-13 01:12:56 +00:00
parent fc70e6fcec
commit 6f11bef7d6
3 changed files with 218 additions and 184 deletions

View File

@@ -1,5 +1,5 @@
C Fix\sa\sproblem\swith\sB+trees.\s(CVS\s1366) C Allocate\smore\soverflow\sdata\sonto\soverflow\spages,\sthus\swasting\sless\sdisk\sspace.\s(CVS\s1367)
D 2004-05-12T21:11:27 D 2004-05-13T01:12:57
F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a
F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
@@ -23,7 +23,7 @@ F sqlite.def fc4f5734786fe4743cfe2aa98eb2da4b089edb5f
F sqlite.pc.in 30552343140c53304c2a658c080fbe810cd09ca2 F sqlite.pc.in 30552343140c53304c2a658c080fbe810cd09ca2
F src/attach.c c315c58cb16fd6e913b3bfa6412aedecb4567fa5 F src/attach.c c315c58cb16fd6e913b3bfa6412aedecb4567fa5
F src/auth.c 5c2f0bea4729c98c2be3b69d6b466fc51448fe79 F src/auth.c 5c2f0bea4729c98c2be3b69d6b466fc51448fe79
F src/btree.c 09f5838dd0353b5a7439b677066e1f3a187b2a77 F src/btree.c 35fd97038d146f4880a0349b2fe7a4f6ce466eea
F src/btree.h 6f51ad0ffebfba71295fcacdbe86007512200050 F src/btree.h 6f51ad0ffebfba71295fcacdbe86007512200050
F src/btree_rb.c 9d7973e266ee6f9c61ce592f68742ce9cd5b10e5 F src/btree_rb.c 9d7973e266ee6f9c61ce592f68742ce9cd5b10e5
F src/build.c f25e4ac9f102efd70188bc09a459c2b461fe2135 F src/build.c f25e4ac9f102efd70188bc09a459c2b461fe2135
@@ -191,7 +191,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604
F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da
F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1 F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1
F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
P b8f70d17f06531269caa0a127efb2d25ad0f3e1c P 64a75c4cd40f79c7b384bb2972922ff0c10212a4
R f511235dca0d03c381f4bc44142fa7f8 R bb0bf681c9aa31fdafb3dd299e62bb5d
U drh U drh
Z 005ea33a8639fedd11d1cc2c48596cad Z 90888ec7d9d6df77a80907a07724f815

View File

@@ -1 +1 @@
64a75c4cd40f79c7b384bb2972922ff0c10212a4 1d52a4bb478648ef53a0dbb21865ccb9281dc24a

View File

@@ -9,7 +9,7 @@
** May you share freely, never taking more than you give. ** May you share freely, never taking more than you give.
** **
************************************************************************* *************************************************************************
** $Id: btree.c,v 1.130 2004/05/12 21:11:27 drh Exp $ ** $Id: btree.c,v 1.131 2004/05/13 01:12:57 drh Exp $
** **
** This file implements a external (disk-based) database using BTrees. ** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to ** For a detailed discussion of BTrees, refer to
@@ -61,24 +61,46 @@
** 16 2 Page size in bytes. ** 16 2 Page size in bytes.
** 18 1 File format write version ** 18 1 File format write version
** 19 1 File format read version ** 19 1 File format read version
** 20 2 Bytes of unused space at the end of each page ** 20 1 Bytes of unused space at the end of each page
** 22 2 Maximum allowed local payload per entry ** 21 1 Max embedded payload fraction
** 24 8 File change counter ** 22 1 Min embedded payload fraction
** 23 1 Min leaf payload fraction
** 24 4 File change counter
** 28 4 Reserved for future use
** 32 4 First freelist page ** 32 4 First freelist page
** 36 4 Number of freelist pages in the file ** 36 4 Number of freelist pages in the file
** 40 60 15 4-byte meta values passed to higher layers ** 40 60 15 4-byte meta values passed to higher layers
** **
** All of the integer values are big-endian (most significant byte first). ** All of the integer values are big-endian (most significant byte first).
** The file change counter is incremented every time the database is changed. **
** This allows other processes to know when the file has changed and thus ** The file change counter is incremented every time the database is more
** when they need to flush their cache. ** than once within the same second. This counter, together with the
** modification time of the file, allows other processes to know
** when the file has changed and thus when they need to flush their
** cache.
**
** The max embedded payload fraction is the amount of the total usable
** space in a page that can be consumed by a single cell for standard
** B-tree (non-LEAFDATA) tables. A value of 255 means 100%. The default
** is to limit the maximum cell size so that at least 4 cells will fit
** on one pages. Thus the default max embedded payload fraction is 64.
**
** If the payload for a cell is larger than the max payload, then extra
** payload is spilled to overflow pages. Once an overflow page is allocated,
** as many bytes as possible are moved into the overflow pages without letting
** the cell size drop below the min embedded payload fraction.
**
** The min leaf payload fraction is like the min embedded payload fraction
** except that it applies to leaf nodes in a LEAFDATA tree. The maximum
** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
** not specified in the header.
** **
** Each btree page begins with a header described below. Note that the ** Each btree page begins with a header described below. Note that the
** header for page one begins at byte 100. For all other btree pages, the ** header for page one begins at byte 100. For all other btree pages, the
** header begins on byte zero. ** header begins on byte zero.
** **
** OFFSET SIZE DESCRIPTION ** OFFSET SIZE DESCRIPTION
** 0 1 Flags. 01: leaf, 02: zerodata, 04: intkey, F8: type ** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf
** 1 2 byte offset to the first freeblock ** 1 2 byte offset to the first freeblock
** 3 2 byte offset to the first cell ** 3 2 byte offset to the first cell
** 5 1 number of fragmented free bytes ** 5 1 number of fragmented free bytes
@@ -91,14 +113,15 @@
** **
** A variable-length integer is 1 to 9 bytes where the lower 7 bits of each ** A variable-length integer is 1 to 9 bytes where the lower 7 bits of each
** byte are used. The integer consists of all bytes that have bit 8 set and ** byte are used. The integer consists of all bytes that have bit 8 set and
** the first byte with bit 8 clear. Unlike fixed-length values, variable- ** the first byte with bit 8 clear. The most significant byte of the integer
** length integers are little-endian. Examples: ** appears first.
** **
** 0x00 becomes 0x00000000 ** 0x00 becomes 0x00000000
** 0x1b becomes 0x0000001b ** 0x7f becomes 0x0000007f
** 0x9b 0x4a becomes 0x00000dca ** 0x81 0x00 becomes 0x00000080
** 0x80 0x1b becomes 0x0000001b ** 0x82 0x00 becomes 0x00000100
** 0xf8 0xac 0xb1 0x91 0x01 becomes 0x12345678 ** 0x80 0x7f becomes 0x0000007f
** 0x8a 0x91 0xd1 0xac 0x78 becomes 0x12345678
** 0x81 0x81 0x81 0x81 0x01 becomes 0x10204081 ** 0x81 0x81 0x81 0x81 0x01 becomes 0x10204081
** **
** Variable length integers are used for rowids and to hold the number of ** Variable length integers are used for rowids and to hold the number of
@@ -164,16 +187,10 @@
# define MX_PAGE_SIZE 1024 # define MX_PAGE_SIZE 1024
#endif #endif
/* Individual entries or "cells" are limited in size so that at least
** this many cells will fit on one page. Changing this value will result
** in an incompatible database.
*/
#define MN_CELLS_PER_PAGE 4
/* The following value is the maximum cell size assuming a maximum page /* The following value is the maximum cell size assuming a maximum page
** size give above. ** size give above.
*/ */
#define MX_CELL_SIZE ((MX_PAGE_SIZE-10)/MN_CELLS_PER_PAGE) #define MX_CELL_SIZE (MX_PAGE_SIZE-10)
/* The maximum number of cells on a single page of the database. This /* The maximum number of cells on a single page of the database. This
** assumes a minimum cell size of 3 bytes. Such small cells will be ** assumes a minimum cell size of 3 bytes. Such small cells will be
@@ -251,7 +268,13 @@ struct Btree {
u8 inStmt; /* True if there is a checkpoint on the transaction */ u8 inStmt; /* True if there is a checkpoint on the transaction */
u8 readOnly; /* True if the underlying file is readonly */ u8 readOnly; /* True if the underlying file is readonly */
int pageSize; /* Number of usable bytes on each page */ int pageSize; /* Number of usable bytes on each page */
int maxLocal; /* Maximum local payload */ int maxLocal; /* Maximum local payload in non-LEAFDATA tables */
int minLocal; /* Minimum local payload in non-LEAFDATA tables */
int maxLeaf; /* Maximum local payload in a LEAFDATA table */
int minLeaf; /* Minimum local payload in a LEAFDATA table */
u8 maxEmbedFrac; /* Maximum payload as % of total page size */
u8 minEmbedFrac; /* Minimum payload as % of total page size */
u8 minLeafFrac; /* Minimum leaf payload as % of total page size */
}; };
typedef Btree Bt; typedef Btree Bt;
@@ -275,6 +298,20 @@ struct BtCursor {
u8 status; /* Set to SQLITE_ABORT if cursors is invalidated */ u8 status; /* Set to SQLITE_ABORT if cursors is invalidated */
}; };
/*
** An instance of the following structure is used to hold information
** about a cell. The parseCell() function fills the structure in.
*/
typedef struct CellInfo CellInfo;
struct CellInfo {
i64 nKey; /* The key for INTKEY tables, or number of bytes in key */
u32 nData; /* Number of bytes of data */
int nHeader; /* Size of the header in bytes */
int nLocal; /* Amount of payload held locally */
int iOverflow; /* Offset to overflow page number. Zero if none */
int nSize; /* Size of the cell */
};
/* /*
** Read or write a two-, four-, and eight-byte big-endian integer values. ** Read or write a two-, four-, and eight-byte big-endian integer values.
*/ */
@@ -284,10 +321,6 @@ static u32 get2byte(unsigned char *p){
static u32 get4byte(unsigned char *p){ static u32 get4byte(unsigned char *p){
return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
} }
static u64 get8byte(unsigned char *p){
u64 v = get4byte(p);
return (v<<32) | get4byte(&p[4]);
}
static void put2byte(unsigned char *p, u32 v){ static void put2byte(unsigned char *p, u32 v){
p[0] = v>>8; p[0] = v>>8;
p[1] = v; p[1] = v;
@@ -298,21 +331,41 @@ static void put4byte(unsigned char *p, u32 v){
p[2] = v>>8; p[2] = v>>8;
p[3] = v; p[3] = v;
} }
#if 0 /* NOT_USED */
static u64 get8byte(unsigned char *p){
u64 v = get4byte(p);
return (v<<32) | get4byte(&p[4]);
}
static void put8byte(unsigned char *p, u64 v){ static void put8byte(unsigned char *p, u64 v){
put4byte(&p[4], v>>32); put4byte(&p[4], v>>32);
put4byte(p, v); put4byte(p, v);
} }
#endif
/* /*
** Read a variable-length integer. Store the result in *pResult. ** Read a variable-length integer. Store the result in *pResult.
** Return the number of bytes in the integer. ** Return the number of bytes in the integer.
*/ */
static unsigned int getVarint(unsigned char *p, u64 *pResult){ static unsigned int getVarint(unsigned char *p, u64 *pResult){
u64 x = p[0] & 0x7f; u64 x = 0;
int n = 0; int n = 0;
while( (p[n++]&0x80)!=0 ){ unsigned char c;
x |= ((u64)(p[n]&0x7f))<<(n*7); do{
} c = p[n++];
x = (x<<7) | (c & 0x7f);
}while( (c & 0x80)!=0 );
*pResult = x;
return n;
}
static unsigned int getVarint32(unsigned char *p, u32 *pResult){
u32 x = 0;
int n = 0;
unsigned char c;
do{
c = p[n++];
x = (x<<7) | (c & 0x7f);
}while( (c & 0x80)!=0 );
*pResult = x; *pResult = x;
return n; return n;
} }
@@ -322,38 +375,70 @@ static unsigned int getVarint(unsigned char *p, u64 *pResult){
** the number of bytes written. ** the number of bytes written.
*/ */
static unsigned int putVarint(unsigned char *p, u64 v){ static unsigned int putVarint(unsigned char *p, u64 v){
int i = 0; int i, j, n;
u8 buf[10];
n = 0;
do{ do{
p[i++] = (v & 0x7f) | 0x80; buf[n++] = (v & 0x7f) | 0x80;
v >>= 7; v >>= 7;
}while( v!=0 ); }while( v!=0 );
p[i-1] &= 0x7f; buf[0] &= 0x7f;
return i; for(i=0, j=n-1; j>=0; j--, i++){
p[i] = buf[j];
}
return n;
} }
/* /*
** Parse a cell header and fill in the CellInfo structure. ** Parse a cell header and fill in the CellInfo structure.
*/ */
static void parseCellHeader( static void parseCell(
MemPage *pPage, /* Page containing the cell */ MemPage *pPage, /* Page containing the cell */
unsigned char *pCell, /* The cell */ unsigned char *pCell, /* The cell */
u64 *pnData, /* Number of bytes of data in payload */ CellInfo *pInfo /* Fill in this structure */
i64 *pnKey, /* Number of bytes of key, or key value for intKey */
int *pnHeader /* Size of header in bytes. Offset to payload */
){ ){
int n; int n;
int nPayload;
Btree *pBt;
int minLocal, maxLocal;
if( pPage->leaf ){ if( pPage->leaf ){
n = 2; n = 2;
}else{ }else{
n = 6; n = 6;
} }
if( pPage->hasData ){ if( pPage->hasData ){
n += getVarint(&pCell[n], pnData); n += getVarint32(&pCell[n], &pInfo->nData);
}else{ }else{
*pnData = 0; pInfo->nData = 0;
}
n += getVarint(&pCell[n], &pInfo->nKey);
pInfo->nHeader = n;
nPayload = pInfo->nData;
if( !pPage->intKey ){
nPayload += pInfo->nKey;
}
pBt = pPage->pBt;
if( pPage->leafData ){
minLocal = pBt->minLeaf;
maxLocal = pBt->pageSize - 23;
}else{
minLocal = pBt->minLocal;
maxLocal = pBt->maxLocal;
}
if( nPayload<=maxLocal ){
pInfo->nLocal = nPayload;
pInfo->iOverflow = 0;
pInfo->nSize = nPayload + n;
}else{
int surplus = minLocal + (nPayload - minLocal)%(pBt->pageSize - 4);
if( surplus <= maxLocal ){
pInfo->nLocal = surplus;
}else{
pInfo->nLocal = minLocal;
}
pInfo->iOverflow = pInfo->nLocal + n;
pInfo->nSize = pInfo->iOverflow + 4;
} }
n += getVarint(&pCell[n], (u64*)pnKey);
*pnHeader = n;
} }
/* /*
@@ -364,21 +449,10 @@ static void parseCellHeader(
** is NOT included in the value returned from this routine. ** is NOT included in the value returned from this routine.
*/ */
static int cellSize(MemPage *pPage, unsigned char *pCell){ static int cellSize(MemPage *pPage, unsigned char *pCell){
int n; CellInfo info;
u64 nData;
i64 nKey;
int nPayload, maxPayload;
parseCellHeader(pPage, pCell, &nData, &nKey, &n); parseCell(pPage, pCell, &info);
nPayload = (int)nData; return info.nSize;
if( !pPage->intKey ){
nPayload += (int)nKey;
}
maxPayload = pPage->pBt->maxLocal;
if( nPayload>maxPayload ){
nPayload = maxPayload + 4;
}
return n + nPayload;
} }
/* /*
@@ -904,19 +978,29 @@ int sqlite3BtreeOpen(
pBt->pPage1 = 0; pBt->pPage1 = 0;
pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager); pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager);
pBt->pageSize = SQLITE_PAGE_SIZE; /* FIX ME - read from header */ pBt->pageSize = SQLITE_PAGE_SIZE; /* FIX ME - read from header */
pBt->maxEmbedFrac = 64; /* FIX ME - read from header */
pBt->minEmbedFrac = 32; /* FIX ME - read from header */
pBt->minLeafFrac = 32; /* FIX ME - read from header */
/* maxLocal is the maximum amount of payload to store locally for /* maxLocal is the maximum amount of payload to store locally for
** a cell. Make sure it is small enough so that at least MN_CELLS_PER_PAGE ** a cell. Make sure it is small enough so that at least minFanout
** will fit on one page. We assume a 10-byte page header. Besides ** cells can will fit on one page. We assume a 10-byte page header.
** the payload, the cell must store: ** Besides the payload, the cell must store:
** 2-byte pointer to next cell ** 2-byte pointer to next cell
** 4-byte child pointer ** 4-byte child pointer
** 9-byte nKey value ** 9-byte nKey value
** 4-byte nData value ** 4-byte nData value
** 4-byte overflow page pointer ** 4-byte overflow page pointer
** So a cell consists of a header which is as much as 19 bytes long,
** 0 to N bytes of payload, and an optional 4 byte overflow page pointer.
*/ */
pBt->maxLocal = (pBt->pageSize-10)/MN_CELLS_PER_PAGE - 23; assert(pBt->maxEmbedFrac>0 && 255/pBt->maxEmbedFrac>=3 );
assert( pBt->maxLocal + 23 <= MX_CELL_SIZE ); pBt->maxLocal = (pBt->pageSize-10)*pBt->maxEmbedFrac/255 - 23;
pBt->minLocal = (pBt->pageSize-10)*pBt->minEmbedFrac/255 - 23;
pBt->maxLeaf = pBt->pageSize - 33;
pBt->minLeaf = (pBt->pageSize-10)*pBt->minLeafFrac/255 - 23;
assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE );
*ppBtree = pBt; *ppBtree = pBt;
return SQLITE_OK; return SQLITE_OK;
} }
@@ -1437,7 +1521,6 @@ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
MemPage *pPage; MemPage *pPage;
unsigned char *cell; unsigned char *cell;
u64 size;
if( !pCur->isValid ){ if( !pCur->isValid ){
return pCur->status ? pCur->status : SQLITE_INTERNAL; return pCur->status ? pCur->status : SQLITE_INTERNAL;
@@ -1455,9 +1538,7 @@ int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
if( !pPage->leaf ){ if( !pPage->leaf ){
cell += 4; /* Skip the child pointer */ cell += 4; /* Skip the child pointer */
} }
getVarint(cell, &size); getVarint32(cell, pSize);
assert( (size & 0x00000000ffffffff)==size );
*pSize = (u32)size;
} }
return SQLITE_OK; return SQLITE_OK;
} }
@@ -1482,9 +1563,8 @@ static int getPayload(
int rc; int rc;
MemPage *pPage; MemPage *pPage;
Btree *pBt; Btree *pBt;
u64 nData; int ovflSize;
i64 nKey; CellInfo info;
int maxLocal, ovflSize;
assert( pCur!=0 && pCur->pPage!=0 ); assert( pCur!=0 && pCur->pPage!=0 );
assert( pCur->isValid ); assert( pCur->isValid );
@@ -1493,31 +1573,22 @@ static int getPayload(
pageIntegrity(pPage); pageIntegrity(pPage);
assert( pCur->idx>=0 && pCur->idx<pPage->nCell ); assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
aPayload = pPage->aCell[pCur->idx]; aPayload = pPage->aCell[pCur->idx];
aPayload += 2; /* Skip the next cell index */ parseCell(pPage, aPayload, &info);
if( !pPage->leaf ){ aPayload += info.nHeader;
aPayload += 4; /* Skip the child pointer */
}
if( pPage->hasData ){
aPayload += getVarint(aPayload, &nData);
}else{
nData = 0;
}
aPayload += getVarint(aPayload, (u64*)&nKey);
if( pPage->intKey ){ if( pPage->intKey ){
nKey = 0; info.nKey = 0;
} }
assert( offset>=0 ); assert( offset>=0 );
if( skipKey ){ if( skipKey ){
offset += nKey; offset += info.nKey;
} }
if( offset+amt > nKey+nData ){ if( offset+amt > info.nKey+info.nData ){
return SQLITE_ERROR; return SQLITE_ERROR;
} }
maxLocal = pBt->maxLocal; if( offset<info.nLocal ){
if( offset<maxLocal ){
int a = amt; int a = amt;
if( a+offset>maxLocal ){ if( a+offset>info.nLocal ){
a = maxLocal - offset; a = info.nLocal - offset;
} }
memcpy(pBuf, &aPayload[offset], a); memcpy(pBuf, &aPayload[offset], a);
if( a==amt ){ if( a==amt ){
@@ -1527,10 +1598,10 @@ static int getPayload(
pBuf += a; pBuf += a;
amt -= a; amt -= a;
}else{ }else{
offset -= maxLocal; offset -= info.nLocal;
} }
if( amt>0 ){ if( amt>0 ){
nextPage = get4byte(&aPayload[maxLocal]); nextPage = get4byte(&aPayload[info.nLocal]);
} }
ovflSize = pBt->pageSize - 4; ovflSize = pBt->pageSize - 4;
while( amt>0 && nextPage ){ while( amt>0 && nextPage ){
@@ -1629,9 +1700,7 @@ static const unsigned char *fetchPayload(
unsigned char *aPayload; unsigned char *aPayload;
MemPage *pPage; MemPage *pPage;
Btree *pBt; Btree *pBt;
u64 nData; CellInfo info;
i64 nKey;
int maxLocal;
assert( pCur!=0 && pCur->pPage!=0 ); assert( pCur!=0 && pCur->pPage!=0 );
assert( pCur->isValid ); assert( pCur->isValid );
@@ -1640,30 +1709,21 @@ static const unsigned char *fetchPayload(
pageIntegrity(pPage); pageIntegrity(pPage);
assert( pCur->idx>=0 && pCur->idx<pPage->nCell ); assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
aPayload = pPage->aCell[pCur->idx]; aPayload = pPage->aCell[pCur->idx];
aPayload += 2; /* Skip the next cell index */ parseCell(pPage, aPayload, &info);
if( !pPage->leaf ){ aPayload += info.nHeader;
aPayload += 4; /* Skip the child pointer */
}
if( pPage->hasData ){
aPayload += getVarint(aPayload, &nData);
}else{
nData = 0;
}
aPayload += getVarint(aPayload, (u64*)&nKey);
if( pPage->intKey ){ if( pPage->intKey ){
nKey = 0; info.nKey = 0;
} }
maxLocal = pBt->maxLocal;
if( skipKey ){ if( skipKey ){
aPayload += nKey; aPayload += info.nKey;
maxLocal -= nKey; info.nLocal -= info.nKey;
if( amt<0 ) amt = nData; if( amt<0 ) amt = info.nData;
assert( amt<=nData ); assert( amt<=info.nData );
}else{ }else{
if( amt<0 ) amt = nKey; if( amt<0 ) amt = info.nKey;
assert( amt<=nKey ); assert( amt<=info.nKey );
} }
if( amt>maxLocal ){ if( amt>info.nLocal ){
return 0; /* If any of the data is not local, return nothing */ return 0; /* If any of the data is not local, return nothing */
} }
return aPayload; return aPayload;
@@ -2297,21 +2357,15 @@ static int freePage(MemPage *pPage){
*/ */
static int clearCell(MemPage *pPage, unsigned char *pCell){ static int clearCell(MemPage *pPage, unsigned char *pCell){
Btree *pBt = pPage->pBt; Btree *pBt = pPage->pBt;
int rc, n, nPayload; CellInfo info;
u64 nData;
i64 nKey;
Pgno ovflPgno; Pgno ovflPgno;
int rc;
parseCellHeader(pPage, pCell, &nData, &nKey, &n); parseCell(pPage, pCell, &info);
assert( (nData&0x000000007fffffff)==nData ); if( info.iOverflow==0 ){
nPayload = (int)nData;
if( !pPage->intKey ){
nPayload += nKey;
}
if( nPayload<=pBt->maxLocal ){
return SQLITE_OK; /* No overflow pages. Return without doing anything */ return SQLITE_OK; /* No overflow pages. Return without doing anything */
} }
ovflPgno = get4byte(&pCell[n+pBt->maxLocal]); ovflPgno = get4byte(&pCell[info.iOverflow]);
while( ovflPgno!=0 ){ while( ovflPgno!=0 ){
MemPage *pOvfl; MemPage *pOvfl;
rc = getPage(pBt, ovflPgno, &pOvfl); rc = getPage(pBt, ovflPgno, &pOvfl);
@@ -2354,6 +2408,7 @@ static int fillInCell(
Btree *pBt = pPage->pBt; Btree *pBt = pPage->pBt;
Pgno pgnoOvfl = 0; Pgno pgnoOvfl = 0;
int nHeader; int nHeader;
CellInfo info;
/* Fill in the header. */ /* Fill in the header. */
nHeader = 2; nHeader = 2;
@@ -2362,13 +2417,16 @@ static int fillInCell(
} }
if( pPage->hasData ){ if( pPage->hasData ){
nHeader += putVarint(&pCell[nHeader], nData); nHeader += putVarint(&pCell[nHeader], nData);
} }else{
nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
/* Fill in the payload */
if( !pPage->hasData ){
nData = 0; nData = 0;
} }
nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
parseCell(pPage, pCell, &info);
assert( info.nHeader==nHeader );
assert( info.nKey==nKey );
assert( info.nData==nData );
/* Fill in the payload */
nPayload = nData; nPayload = nData;
if( pPage->intKey ){ if( pPage->intKey ){
pSrc = pData; pSrc = pData;
@@ -2379,14 +2437,10 @@ static int fillInCell(
pSrc = pKey; pSrc = pKey;
nSrc = nKey; nSrc = nKey;
} }
if( nPayload>pBt->maxLocal ){ *pnSize = info.nSize;
*pnSize = nHeader + pBt->maxLocal + 4; spaceLeft = info.nLocal;
}else{
*pnSize = nHeader + nPayload;
}
spaceLeft = pBt->maxLocal;
pPayload = &pCell[nHeader]; pPayload = &pCell[nHeader];
pPrior = &pPayload[pBt->maxLocal]; pPrior = &pCell[info.iOverflow];
while( nPayload>0 ){ while( nPayload>0 ){
if( spaceLeft==0 ){ if( spaceLeft==0 ){
@@ -3106,13 +3160,11 @@ static int balance(MemPage *pPage){
memcpy(&pNew->aData[6], pCell+2, 4); memcpy(&pNew->aData[6], pCell+2, 4);
pTemp = 0; pTemp = 0;
}else if( leafData ){ }else if( leafData ){
i64 nKey; CellInfo info;
u64 nData;
int nHeader;
j--; j--;
parseCellHeader(pNew, apCell[j], &nData, &nKey, &nHeader); parseCell(pNew, apCell[j], &info);
pCell = aInsBuf[i]; pCell = aInsBuf[i];
fillInCell(pParent, pCell, 0, nKey, 0, 0, &sz); fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);
pTemp = 0; pTemp = 0;
}else{ }else{
pCell -= 4; pCell -= 4;
@@ -3577,30 +3629,31 @@ int sqlite3BtreePageDump(Btree *pBt, int pgno, int recursive){
assert( hdr == (pgno==1 ? 100 : 0) ); assert( hdr == (pgno==1 ? 100 : 0) );
idx = get2byte(&data[hdr+3]); idx = get2byte(&data[hdr+3]);
while( idx>0 && idx<=pBt->pageSize ){ while( idx>0 && idx<=pBt->pageSize ){
u64 nData; CellInfo info;
i64 nKey;
int nHeader;
Pgno child; Pgno child;
unsigned char *pCell = &data[idx]; unsigned char *pCell = &data[idx];
int sz = cellSize(pPage, pCell); int sz;
pCell = &data[idx];
parseCell(pPage, pCell, &info);
sz = info.nSize;
sprintf(range,"%d..%d", idx, idx+sz-1); sprintf(range,"%d..%d", idx, idx+sz-1);
parseCellHeader(pPage, pCell, &nData, &nKey, &nHeader);
if( pPage->leaf ){ if( pPage->leaf ){
child = 0; child = 0;
}else{ }else{
child = get4byte(&pCell[2]); child = get4byte(&pCell[2]);
} }
sz = nData; sz = info.nData;
if( !pPage->intKey ) sz += nKey; if( !pPage->intKey ) sz += info.nKey;
if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1; if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
memcpy(payload, &pCell[nHeader], sz); memcpy(payload, &pCell[info.nHeader], sz);
for(j=0; j<sz; j++){ for(j=0; j<sz; j++){
if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.'; if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
} }
payload[sz] = 0; payload[sz] = 0;
printf( printf(
"cell %2d: i=%-10s chld=%-4d nk=%-4lld nd=%-4lld payload=%s\n", "cell %2d: i=%-10s chld=%-4d nk=%-4lld nd=%-4d payload=%s\n",
i, range, child, nKey, nData, payload i, range, child, info.nKey, info.nData, payload
); );
if( pPage->isInit && pPage->aCell[i]!=pCell ){ if( pPage->isInit && pPage->aCell[i]!=pCell ){
printf("**** aCell[%d] does not match on prior entry ****\n", i); printf("**** aCell[%d] does not match on prior entry ****\n", i);
@@ -3791,23 +3844,6 @@ static void checkList(
} }
} }
/*
** Return negative if zKey1<zKey2.
** Return zero if zKey1==zKey2.
** Return positive if zKey1>zKey2.
*/
static int keyCompare(
const char *zKey1, int nKey1,
const char *zKey2, int nKey2
){
int min = nKey1>nKey2 ? nKey2 : nKey1;
int c = memcmp(zKey1, zKey2, min);
if( c==0 ){
c = nKey1 - nKey2;
}
return c;
}
/* /*
** Do various sanity checks on a single page of a tree. Return ** Do various sanity checks on a single page of a tree. Return
** the tree depth. Root pages return 0. Parents of root pages ** the tree depth. Root pages return 0. Parents of root pages
@@ -3840,8 +3876,6 @@ static int checkTreePage(
int i, rc, depth, d2, pgno, cnt; int i, rc, depth, d2, pgno, cnt;
int hdr; int hdr;
u8 *data; u8 *data;
char *zKey1, *zKey2;
int nKey1, nKey2;
BtCursor cur; BtCursor cur;
Btree *pBt; Btree *pBt;
int maxLocal, pageSize; int maxLocal, pageSize;
@@ -3852,7 +3886,6 @@ static int checkTreePage(
/* Check that the page exists /* Check that the page exists
*/ */
cur.pBt = pBt = pCheck->pBt; cur.pBt = pBt = pCheck->pBt;
maxLocal = pBt->maxLocal;
pageSize = pBt->pageSize; pageSize = pBt->pageSize;
if( iPage==0 ) return 0; if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage, zParentContext) ) return 0; if( checkRef(pCheck, iPage, zParentContext) ) return 0;
@@ -3861,6 +3894,7 @@ static int checkTreePage(
checkAppendMsg(pCheck, zContext, zMsg); checkAppendMsg(pCheck, zContext, zMsg);
return 0; return 0;
} }
maxLocal = pPage->leafData ? pBt->maxLeaf : pBt->maxLocal;
if( (rc = initPage(pPage, pParent))!=0 ){ if( (rc = initPage(pPage, pParent))!=0 ){
sprintf(zMsg, "initPage() returns error code %d", rc); sprintf(zMsg, "initPage() returns error code %d", rc);
checkAppendMsg(pCheck, zContext, zMsg); checkAppendMsg(pCheck, zContext, zMsg);
@@ -3873,20 +3907,20 @@ static int checkTreePage(
depth = 0; depth = 0;
cur.pPage = pPage; cur.pPage = pPage;
for(i=0; i<pPage->nCell; i++){ for(i=0; i<pPage->nCell; i++){
u8 *pCell = pPage->aCell[i]; u8 *pCell;
i64 nKey; int sz;
u64 nData; CellInfo info;
int sz, nHeader;
/* Check payload overflow pages /* Check payload overflow pages
*/ */
sprintf(zContext, "On tree page %d cell %d: ", iPage, i); sprintf(zContext, "On tree page %d cell %d: ", iPage, i);
parseCellHeader(pPage, pCell, &nData, &nKey, &nHeader); pCell = pPage->aCell[i];
sz = nData; parseCell(pPage, pCell, &info);
if( !pPage->intKey ) sz += nKey; sz = info.nData;
if( sz>maxLocal ){ if( !pPage->intKey ) sz += info.nKey;
int nPage = (sz - maxLocal + pageSize - 5)/(pageSize - 4); if( sz>info.nLocal ){
checkList(pCheck, 0, get4byte(&pCell[nHeader+maxLocal]),nPage,zContext); int nPage = (sz - info.nLocal + pageSize - 5)/(pageSize - 4);
checkList(pCheck, 0, get4byte(&pCell[info.iOverflow]),nPage,zContext);
} }
/* Check sanity of left child page. /* Check sanity of left child page.