1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

In temp files used for merge sorting, store the size of each packed-memory-array at the start of the array itself. This is to avoid having to store the offsets of all arrays in the (potentially very large) file in main-memory.

FossilOrigin-Name: 8051c1767c4386b0f14a66742d9fac41e001eb07
This commit is contained in:
dan
2011-08-06 12:01:58 +00:00
parent f834eff2f0
commit 1e74e602ec
5 changed files with 155 additions and 119 deletions

View File

@@ -1,5 +1,5 @@
C Minor\sinternal\schanges\sto\svdbesort.c.\sAlso,\sdefault\sto\smerging\slists\stogether\s16\sat\sa\stime.
D 2011-08-05T11:49:12.597
C In\stemp\sfiles\sused\sfor\smerge\ssorting,\sstore\sthe\ssize\sof\seach\spacked-memory-array\sat\sthe\sstart\sof\sthe\sarray\sitself.\sThis\sis\sto\savoid\shaving\sto\sstore\sthe\soffsets\sof\sall\sarrays\sin\sthe\s(potentially\svery\slarge)\sfile\sin\smain-memory.
D 2011-08-06T12:01:58.831
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -238,14 +238,14 @@ F src/update.c 74a6cfb34e9732c1e2a86278b229913b4b51eeec
F src/utf.c c53eb7404b3eb5c1cbb5655c6a7a0e0ce6bd50f0
F src/util.c 0f33bbbdfcc4a2d8cf20c3b2a16ffc3b57c58a70
F src/vacuum.c 05513dca036a1e7848fe18d5ed1265ac0b32365e
F src/vdbe.c 379ccaa6e03797e08aadb1ae6b0495cedff69209
F src/vdbe.c ec7b04557d0849d835c4b1b95b463c2c470b60f8
F src/vdbe.h 5cf09e7ee8a3f7d93bc51f196a96550786afe7a1
F src/vdbeInt.h 9e38e4f866faa9b25e30a1712c3ec1f489097ca1
F src/vdbeInt.h de75338edfafb812f5bf7f1b3881cbc7256b3c17
F src/vdbeapi.c 11dc47987abacb76ad016dcf5abc0dc422482a98
F src/vdbeaux.c 8fb978eb73a97b34d352dd3ef3bff35b1b3fa7e9
F src/vdbeblob.c f024f0bf420f36b070143c32b15cc7287341ffd3
F src/vdbemem.c 0498796b6ffbe45e32960d6a1f5adfb6e419883b
F src/vdbesort.c f17fa625dbe19bfb8f0a0cb728cf9d73cab6ed1e
F src/vdbesort.c d7739da903c6eb41b864939b2e4a34288167f031
F src/vdbetrace.c 5d0dc3d5fd54878cc8d6d28eb41deb8d5885b114
F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582
F src/wal.c 0c70ad7b1cac6005fa5e2cbefd23ee05e391c290
@@ -954,7 +954,7 @@ F tool/symbols.sh caaf6ccc7300fd43353318b44524853e222557d5
F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262
P db8518cab8e329b1dbe4cd6c81b21ef3ea69fcb1
R 82f4652664dbb6f6efbe2830f0e7593b
P 9ddc324a34dbf97acef92eef21f8a35f63db4c5b
R fa1c073fe4f821491ae9f0a1c071e6ef
U dan
Z 838a8014a1d3a0c9d59ff3654d53daf0
Z 428b5e1af6502133e8ab01b3dcc7e84b

View File

@@ -1 +1 @@
9ddc324a34dbf97acef92eef21f8a35f63db4c5b
8051c1767c4386b0f14a66742d9fac41e001eb07

View File

@@ -4373,8 +4373,6 @@ case OP_IdxInsert: { /* in2 */
assert( pOp->p1>=0 && pOp->p1<p->nCursor );
pC = p->apCsr[pOp->p1];
assert( pC!=0 );
rc = sqlite3VdbeSorterWrite(db, pC);
if( rc!=SQLITE_OK ) goto abort_due_to_error;
pIn2 = &aMem[pOp->p2];
assert( pIn2->flags & MEM_Blob );
pCrsr = pC->pCursor;
@@ -4384,10 +4382,13 @@ case OP_IdxInsert: { /* in2 */
if( rc==SQLITE_OK ){
nKey = pIn2->n;
zKey = pIn2->z;
rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3,
((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
);
assert( pC->deferredMoveto==0 );
rc = sqlite3VdbeSorterWrite(db, pC, nKey);
if( rc==SQLITE_OK ){
rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3,
((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
);
assert( pC->deferredMoveto==0 );
}
pC->cacheStatus = CACHE_STALE;
}
}

View File

@@ -393,7 +393,7 @@ int sqlite3VdbeFrameRestore(VdbeFrame *);
void sqlite3VdbeMemStoreType(Mem *pMem);
int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, int);
void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterRowkey(sqlite3 *, VdbeCursor *, Mem *);

View File

@@ -89,14 +89,14 @@ typedef struct VdbeSorterIter VdbeSorterIter;
*/
struct VdbeSorter {
int nWorking; /* Start a new b-tree after this many pages */
int nBtree; /* Current size of b-tree contents as PMA */
int nTree; /* Used size of aTree/aIter (power of 2) */
VdbeSorterIter *aIter; /* Array of iterators to merge */
int *aTree; /* Current state of incremental merge */
i64 iWriteOff; /* Current write offset within file pTemp1 */
i64 iReadOff; /* Current read offset within file pTemp1 */
sqlite3_file *pTemp1; /* PMA file 1 */
i64 *aOffset; /* Array of PMA offsets for file 1 */
int nOffset; /* Size of aOffset[] array */
int nPMA; /* Number of PMAs stored in pTemp1 */
};
/*
@@ -116,25 +116,9 @@ struct VdbeSorterIter {
/* Minimum allowable value for the VdbeSorter.nWorking variable */
#define SORTER_MIN_SEGMENT_SIZE 10
/* Maximum number of segments to merge in a single go */
/* Maximum number of segments to merge in a single pass. */
#define SORTER_MAX_MERGE_COUNT 16
/*
** Append integer iOff to the VdbeSorter.aOffset[] array of the sorter object
** passed as the second argument. SQLITE_NOMEM is returned if an OOM error
** is encountered, or SQLITE_OK if no error occurs.
**
** TODO: The aOffset[] array may grow indefinitely. Fix this.
*/
static int vdbeSorterAppendOffset(sqlite3 *db, VdbeSorter *p, i64 iOff){
p->aOffset = sqlite3DbReallocOrFree(
db, p->aOffset, (p->nOffset+1)*sizeof(i64)
);
if( !p->aOffset ) return SQLITE_NOMEM;
p->aOffset[p->nOffset++] = iOff;
return SQLITE_OK;
}
/*
** Free all memory belonging to the VdbeSorterIter object passed as the second
** argument. All structure fields are set to zero before returning.
@@ -156,10 +140,8 @@ static int vdbeSorterIterNext(
int nRec;
int iOff;
assert( pIter->nAlloc>5 );
nRead = pIter->iEof - pIter->iReadOff;
if( nRead>5 ) nRead = 5;
if( nRead<=0 ){
vdbeSorterIterZero(db, pIter);
return SQLITE_OK;
@@ -192,6 +174,46 @@ static int vdbeSorterIterNext(
return rc;
}
static int vdbeSorterWriteVarint(
sqlite3_file *pFile,
i64 iVal,
i64 *piOffset
){
u8 aVarint[9]; /* Buffer large enough for a varint */
int nVarint; /* Number of used bytes in varint */
int rc; /* Result of write() call */
nVarint = sqlite3PutVarint(aVarint, iVal);
rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset);
*piOffset += nVarint;
return rc;
}
static int vdbeSorterReadVarint(
sqlite3_file *pFile,
i64 iEof, /* Total number of bytes in file */
i64 *piOffset, /* IN/OUT: Read offset */
i64 *piVal /* OUT: Value read from file */
){
u8 aVarint[9]; /* Buffer large enough for a varint */
i64 iOff = *piOffset; /* Offset in file to read from */
int nRead = 9; /* Number of bytes to read from file */
int rc; /* Return code */
assert( iEof>iOff );
if( (iEof-iOff)<nRead ){
nRead = iEof-iOff;
}
rc = sqlite3OsRead(pFile, aVarint, nRead, iOff);
if( rc==SQLITE_OK ){
*piOffset += getVarint(aVarint, (u64 *)piVal);
}
return rc;
}
/*
** Initialize iterator pIter to scan through the PMA stored in file pFile
** starting at offset iStart and ending at offset iEof-1. This function
@@ -200,20 +222,32 @@ static int vdbeSorterIterNext(
*/
static int vdbeSorterIterInit(
sqlite3 *db, /* Database handle */
sqlite3_file *pFile, /* File that the PMA is stored in */
VdbeSorter *pSorter, /* Sorter object */
i64 iStart, /* Start offset in pFile */
i64 iEof, /* 1 byte past the end of the PMA in pFile */
VdbeSorterIter *pIter /* Iterator to populate */
VdbeSorterIter *pIter, /* Iterator to populate */
i64 *pnByte /* IN/OUT: Increment this value by PMA size */
){
int rc;
i64 iEof = pSorter->iWriteOff;
assert( iEof>iStart );
assert( pIter->aAlloc==0 );
pIter->pFile = pFile;
pIter->iEof = iEof;
pIter->pFile = pSorter->pTemp1;
pIter->iReadOff = iStart;
pIter->nAlloc = 128;
pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
if( !pIter->aAlloc ) return SQLITE_NOMEM;
return vdbeSorterIterNext(db, pIter);
if( !pIter->aAlloc ){
rc = SQLITE_NOMEM;
}else{
i64 nByte;
rc = vdbeSorterReadVarint(pSorter->pTemp1, iEof, &pIter->iReadOff, &nByte);
*pnByte += nByte;
pIter->iEof = pIter->iReadOff + nByte;
}
if( rc==SQLITE_OK ){
rc = vdbeSorterIterNext(db, pIter);
}
return rc;
}
/*
@@ -298,7 +332,6 @@ void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
if( pSorter->pTemp1 ){
sqlite3OsCloseFree(pSorter->pTemp1);
}
sqlite3DbFree(db, pSorter->aOffset);
sqlite3DbFree(db, pSorter);
pCsr->pSorter = 0;
}
@@ -318,11 +351,12 @@ static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
);
}
/*
** Write the current contents of the b-tree to a PMA. Return SQLITE_OK
** if successful, or an SQLite error code otherwise.
*/
static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){
static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
i64 iWriteOff = pSorter->iWriteOff;
@@ -338,27 +372,26 @@ static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){
rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
assert( rc!=SQLITE_OK || pSorter->pTemp1 );
assert( pSorter->iWriteOff==0 );
assert( pSorter->nOffset==0 );
assert( pSorter->aOffset==0 );
assert( pSorter->nPMA==0 );
}
if( rc==SQLITE_OK ){
pSorter->nPMA++;
/* Write a varint containg the size of the PMA in bytes into the file. */
assert( pSorter->nBtree>0 );
for(
rc = vdbeSorterAppendOffset(db, pSorter, iWriteOff);
rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff);
rc==SQLITE_OK && res==0;
rc = sqlite3BtreeNext(pCsr->pCursor, &res)
){
i64 nKey; /* Size of this key in bytes */
u8 aVarint[9]; /* Buffer containing varint(nKey) */
int nVar; /* Number of bytes in aVarint[] used */
(void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey);
nVar = sqlite3PutVarint(aVarint, nKey);
/* Write the size of the record in bytes to the output file */
rc = sqlite3OsWrite(pSorter->pTemp1, aVarint, nVar, iWriteOff);
iWriteOff += nVar;
(void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey);
rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff);
/* Make sure the aMalloc[] buffer is large enough for the record */
if( rc==SQLITE_OK && nKey>nMalloc ){
@@ -377,13 +410,16 @@ static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){
}
}
if( rc!=SQLITE_OK ) break;
}
assert( pSorter->nBtree==(
iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree)
));
pSorter->iWriteOff = iWriteOff;
sqlite3DbFree(db, aMalloc);
}
pSorter->nBtree = 0;
return rc;
}
@@ -392,7 +428,7 @@ static int sorterBtreeToPma(sqlite3 *db, VdbeCursor *pCsr){
** If the current b-tree being constructed is already considered "full",
** a new tree is started.
*/
int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){
int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
if( pSorter ){
@@ -423,7 +459,7 @@ int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){
/* Copy the current contents of the b-tree into a PMA in sorted order.
** Close the currently open b-tree cursor. */
rc = sorterBtreeToPma(db, pCsr);
rc = vdbeSorterBtreeToPMA(db, pCsr);
sqlite3BtreeCloseCursor(p);
if( rc==SQLITE_OK ){
@@ -441,6 +477,8 @@ int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr){
rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p);
}
}
pSorter->nBtree += sqlite3VarintLen(nKey) + nKey;
}
return rc;
}
@@ -452,58 +490,30 @@ static int vdbeSorterInitMerge(
sqlite3 *db,
VdbeCursor *pCsr,
int iFirst,
int *piNext
i64 *pnByte /* Sum of bytes in all opened PMAs */
){
VdbeSorter *pSorter = pCsr->pSorter;
int rc = SQLITE_OK;
int i;
int N = 2;
int nIter; /* Number of iterators to initialize. */
nIter = pSorter->nOffset - iFirst;
if( nIter>SORTER_MAX_MERGE_COUNT ){
nIter = SORTER_MAX_MERGE_COUNT;
}
assert( nIter>0 );
while( N<nIter ) N += N;
/* Allocate aIter[] and aTree[], if required. */
if( pSorter->aIter==0 ){
int nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
if( !pSorter->aIter ) return SQLITE_NOMEM;
pSorter->aTree = (int *)&pSorter->aIter[N];
}
i64 nByte = 0;
/* Initialize as many iterators as possible. */
for(i=iFirst;
rc==SQLITE_OK && i<pSorter->nOffset && (i-iFirst)<SORTER_MAX_MERGE_COUNT;
rc==SQLITE_OK && i<pSorter->nPMA && (i-iFirst)<SORTER_MAX_MERGE_COUNT;
i++
){
int iIter = i - iFirst;
if( rc==SQLITE_OK ){
VdbeSorterIter *pIter = &pSorter->aIter[iIter];
i64 iStart = pSorter->aOffset[i];
i64 iEof;
if( i==(pSorter->nOffset-1) ){
iEof = pSorter->iWriteOff;
}else{
iEof = pSorter->aOffset[i+1];
}
rc = vdbeSorterIterInit(db, pSorter->pTemp1, iStart, iEof, pIter);
}
VdbeSorterIter *pIter = &pSorter->aIter[i - iFirst];
rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
pSorter->iReadOff = pIter->iEof;
}
*piNext = i;
assert( i>iFirst );
pSorter->nTree = N;
/* Populate the aTree[] array. */
for(i=N-1; rc==SQLITE_OK && i>0; i--){
for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
rc = vdbeSorterDoCompare(pCsr, i);
}
*pnByte = nByte;
return rc;
}
@@ -516,39 +526,65 @@ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
int rc; /* Return code */
sqlite3_file *pTemp2 = 0; /* Second temp file to use */
i64 iWrite2 = 0; /* Write offset for pTemp2 */
int nIter; /* Number of iterators used */
int nByte; /* Bytes of space required for aIter/aTree */
int N = 2; /* Power of 2 >= nIter */
assert( pSorter );
/* Write the current b-tree to a PMA. Close the b-tree cursor. */
rc = sorterBtreeToPma(db, pCsr);
rc = vdbeSorterBtreeToPMA(db, pCsr);
sqlite3BtreeCloseCursor(pCsr->pCursor);
if( rc!=SQLITE_OK ) return rc;
if( pSorter->nOffset==0 ){
if( pSorter->nPMA==0 ){
*pbEof = 1;
return SQLITE_OK;
}
while( rc==SQLITE_OK ){
int iNext = 0; /* Index of next segment to open */
/* Allocate space for aIter[] and aTree[]. */
nIter = pSorter->nPMA;
if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
assert( nIter>0 );
while( N<nIter ) N += N;
nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
if( !pSorter->aIter ) return SQLITE_NOMEM;
pSorter->aTree = (int *)&pSorter->aIter[N];
pSorter->nTree = N;
do {
int iNew = 0; /* Index of new, merged, PMA */
do {
for(iNew=0; rc==SQLITE_OK; iNew++){
i64 nWrite; /* Number of bytes in new PMA */
/* This call configures iterators for merging. */
rc = vdbeSorterInitMerge(db, pCsr, iNext, &iNext);
assert( iNext>0 );
/* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
** initialize an iterator for each of them and break out of the loop.
** These iterators will be incrementally merged as the VDBE layer calls
** sqlite3VdbeSorterNext().
**
** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
** are merged into a single PMA that is written to file pTemp2.
*/
rc = vdbeSorterInitMerge(db, pCsr, iNew*SORTER_MAX_MERGE_COUNT, &nWrite);
assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
break;
}
if( rc==SQLITE_OK && (iNew>0 || iNext<pSorter->nOffset) ){
/* Open the second temp file, if it is not already open. */
if( pTemp2==0 ){
assert( iWrite2==0 );
rc = vdbeSorterOpenTempFile(db, &pTemp2);
}
if( rc==SQLITE_OK ){
rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2);
}
if( rc==SQLITE_OK ){
int bEof = 0;
if( pTemp2==0 ){
rc = vdbeSorterOpenTempFile(db, &pTemp2);
}
if( rc==SQLITE_OK ){
pSorter->aOffset[iNew] = iWrite2;
}
while( rc==SQLITE_OK && bEof==0 ){
int nByte;
VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
@@ -560,26 +596,25 @@ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
}
}
iNew++;
}
}while( rc==SQLITE_OK && iNext<pSorter->nOffset );
}
if( iNew==0 ){
if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
break;
}else{
sqlite3_file *pTmp = pSorter->pTemp1;
pSorter->nOffset = iNew;
pSorter->nPMA = iNew;
pSorter->pTemp1 = pTemp2;
pTemp2 = pTmp;
pSorter->iWriteOff = iWrite2;
pSorter->iReadOff = 0;
iWrite2 = 0;
}
}
}while( rc==SQLITE_OK );
if( pTemp2 ){
sqlite3OsCloseFree(pTemp2);
}
*pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
return rc;
}