1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fix some fts5 problems with very large position lists.

FossilOrigin-Name: 2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a
This commit is contained in:
dan
2015-04-20 18:48:57 +00:00
parent 687c5124e0
commit 80d3ea080a
4 changed files with 88 additions and 13 deletions

View File

@ -1901,13 +1901,20 @@ static void fts5SegIterNext(
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){
Fts5DlidxIter *pDlidx = pIter->pDlidx;
Fts5Data *pLast = 0;
int pgnoLast = 0;
if( pIter->pDlidx ){
int iSegid = pIter->pSeg->iSegid;
pgnoLast = pIter->pDlidx->iLeafPgno;
pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
if( pDlidx ){
/* If the doclist-iterator is already at EOF, then the current doclist
** contains no entries except those on the current page. */
if( fts5DlidxIterEof(p, pDlidx)==0 ){
int iSegid = pIter->pSeg->iSegid;
pgnoLast = pDlidx->iLeafPgno;
pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
}else{
pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
}
}else{
int iOff; /* Byte offset within pLeaf */
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
@ -1915,7 +1922,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){
/* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first
** byte of position-list content for the current rowid. Back it up
** so that it points to the start of the position-list size field. */
pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2 + pIter->bDel);
pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
iOff = pIter->iLeafOffset;
assert( iOff>=4 );
@ -3285,6 +3292,9 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){
if( pSeg->pSeg==0 ){
/* no-op */
}else if( pSeg->pLeaf==0 ){
/* All keys from this input segment have been transfered to the output.
** Set both the first and last page-numbers to 0 to indicate that the
** segment is now empty. */
pSeg->pSeg->pgnoLast = 0;
pSeg->pSeg->pgnoFirst = 0;
}else{
@ -4092,7 +4102,13 @@ static void fts5IndexIntegrityCheckSegment(
}
}
if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
/* Either iter.iLeaf must be the rightmost leaf-page in the segment, or
** else the segment has been completely emptied by an ongoing merge
** operation. */
if( p->rc==SQLITE_OK
&& iter.iLeaf!=pSeg->pgnoLast
&& (pSeg->pgnoFirst || pSeg->pgnoLast)
){
p->rc = FTS5_CORRUPT;
}

View File

@ -0,0 +1,58 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on really large position lists. Those that require
# 4 or 5 byte position-list size varints. Because of the amount of memory
# required, these tests only run on 64-bit platforms.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5bigpl
if { $tcl_platform(wordSize)<8 } {
finish_test
return
}
do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
do_test 1.1 {
foreach t {a b c d e f g h i j} {
set doc [string repeat "$t " 1200000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
do_test 1.2 {
execsql { DELETE FROM t1 }
foreach t {"a b" "b a" "c d" "d c"} {
set doc [string repeat "$t " 600000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
# 5-byte varint. This test takes 30 seconds or so on a 2014 workstation.
# The generated database is roughly 635MiB.
#
do_test 2.1...slow {
execsql { DELETE FROM t1 }
foreach t {a} {
set doc [string repeat "$t " 150000000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
finish_test