1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-12-24 14:17:58 +03:00

Implement optimize() function. This merges all segments in the fts

index into a single segment, including dropping delete cookies. (CVS 5417)

FossilOrigin-Name: b22e187bc2b38bd219dd0feba19b97279bd83089
This commit is contained in:
shess
2008-07-15 21:32:07 +00:00
parent edaedab238
commit 7d9ef0d0fc
4 changed files with 557 additions and 14 deletions

View File

@@ -11,7 +11,7 @@
# This file implements regression tests for SQLite library. The focus
# of this script is testing the FTS3 module's optimize() function.
#
# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
# $Id: fts3d.test,v 1.2 2008/07/15 21:32:07 shess Exp $
#
set testdir [file dirname $argv0]
@@ -123,6 +123,224 @@ check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}
# TODO(shess): optimize() tests here.
#*************************************************************************
# Test results when everything is optimized manually.
# NOTE(shess): This is a copy of fts3c-1.3. I've pulled a copy here
# because fts3d-2 and fts3d-3 should have identical results.
db eval {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts3(c);
INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
DELETE FROM t1 WHERE docid IN (1,3);
DROP TABLE IF EXISTS t1old;
ALTER TABLE t1 RENAME TO t1old;
CREATE VIRTUAL TABLE t1 USING fts3(c);
INSERT INTO t1 (docid, c) SELECT docid, c FROM t1old;
DROP TABLE t1old;
}
# Should be a single optimal segment with the same logical results.
do_test fts3d-2.segments {
execsql {
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {0 0}
do_test fts3d-2.matches {
execsql {
SELECT OFFSETS(t1) FROM t1
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
}
} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
check_terms_all fts3d-2.1 {a test that was}
check_doclist_all fts3d-2.1.1 a {[2 0[2]]}
check_doclist_all fts3d-2.1.2 test {[2 0[3]]}
check_doclist_all fts3d-2.1.3 that {[2 0[0]]}
check_doclist_all fts3d-2.1.4 was {[2 0[1]]}
check_terms fts3d-2.2 0 0 {a test that was}
check_doclist fts3d-2.2.1 0 0 a {[2 0[2]]}
check_doclist fts3d-2.2.2 0 0 test {[2 0[3]]}
check_doclist fts3d-2.2.3 0 0 that {[2 0[0]]}
check_doclist fts3d-2.2.4 0 0 was {[2 0[1]]}
#*************************************************************************
# Test results when everything is optimized via optimize().
db eval {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts3(c);
INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
DELETE FROM t1 WHERE docid IN (1,3);
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
}
# Should be a single optimal segment with the same logical results.
do_test fts3d-3.segments {
execsql {
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {0 0}
do_test fts3d-3.matches {
execsql {
SELECT OFFSETS(t1) FROM t1
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
}
} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
check_terms_all fts3d-3.1 {a test that was}
check_doclist_all fts3d-3.1.1 a {[2 0[2]]}
check_doclist_all fts3d-3.1.2 test {[2 0[3]]}
check_doclist_all fts3d-3.1.3 that {[2 0[0]]}
check_doclist_all fts3d-3.1.4 was {[2 0[1]]}
check_terms fts3d-3.2 0 0 {a test that was}
check_doclist fts3d-3.2.1 0 0 a {[2 0[2]]}
check_doclist fts3d-3.2.2 0 0 test {[2 0[3]]}
check_doclist fts3d-3.2.3 0 0 that {[2 0[0]]}
check_doclist fts3d-3.2.4 0 0 was {[2 0[1]]}
#*************************************************************************
# Test optimize() against a table involving segment merges.
# NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE
# statements generates a segment.
db eval {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts3(c);
INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1;
UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2;
UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3;
UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1;
UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2;
UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3;
UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1;
UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2;
UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3;
UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1;
UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2;
UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3;
UPDATE t1 SET c = 'This is a test' WHERE rowid = 1;
UPDATE t1 SET c = 'That was a test' WHERE rowid = 2;
UPDATE t1 SET c = 'This is a test' WHERE rowid = 3;
}
# 2 segments in level 0, 1 in level 1 (18 segments created, 16
# merged).
do_test fts3d-4.segments {
execsql {
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {0 0 0 1 1 0}
do_test fts3d-4.matches {
execsql {
SELECT OFFSETS(t1) FROM t1
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
}
} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
check_terms_all fts3d-4.1 {a four is one test that this three two was}
check_doclist_all fts3d-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist_all fts3d-4.1.2 four {}
check_doclist_all fts3d-4.1.3 is {[1 0[1]] [3 0[1]]}
check_doclist_all fts3d-4.1.4 one {}
check_doclist_all fts3d-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist_all fts3d-4.1.6 that {[2 0[0]]}
check_doclist_all fts3d-4.1.7 this {[1 0[0]] [3 0[0]]}
check_doclist_all fts3d-4.1.8 three {}
check_doclist_all fts3d-4.1.9 two {}
check_doclist_all fts3d-4.1.10 was {[2 0[1]]}
check_terms fts3d-4.2 0 0 {a four test that was}
check_doclist fts3d-4.2.1 0 0 a {[2 0[2]]}
check_doclist fts3d-4.2.2 0 0 four {[2]}
check_doclist fts3d-4.2.3 0 0 test {[2 0[3]]}
check_doclist fts3d-4.2.4 0 0 that {[2 0[0]]}
check_doclist fts3d-4.2.5 0 0 was {[2 0[1]]}
check_terms fts3d-4.3 0 1 {a four is test this}
check_doclist fts3d-4.3.1 0 1 a {[3 0[2]]}
check_doclist fts3d-4.3.2 0 1 four {[3]}
check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]}
check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]}
check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]}
check_terms fts3d-4.4 1 0 {a four is one test that this three two was}
check_doclist fts3d-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist fts3d-4.4.2 1 0 four {[1] [2 0[4]] [3 0[4]]}
check_doclist fts3d-4.4.3 1 0 is {[1 0[1]] [3 0[1]]}
check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]}
check_doclist fts3d-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist fts3d-4.4.6 1 0 that {[2 0[0]]}
check_doclist fts3d-4.4.7 1 0 this {[1 0[0]] [3 0[0]]}
check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]}
check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]}
check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]}
# Optimize should leave the result in the level of the highest-level
# prior segment.
do_test fts3d-4.5 {
execsql {
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {{Index optimized} 1 0}
# Identical to fts3d-4.matches.
do_test fts3d-4.5.matches {
execsql {
SELECT OFFSETS(t1) FROM t1
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
}
} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
check_terms_all fts3d-4.5.1 {a is test that this was}
check_doclist_all fts3d-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist_all fts3d-4.5.1.2 is {[1 0[1]] [3 0[1]]}
check_doclist_all fts3d-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist_all fts3d-4.5.1.4 that {[2 0[0]]}
check_doclist_all fts3d-4.5.1.5 this {[1 0[0]] [3 0[0]]}
check_doclist_all fts3d-4.5.1.6 was {[2 0[1]]}
check_terms fts3d-4.5.2 1 0 {a is test that this was}
check_doclist fts3d-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist fts3d-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]}
check_doclist fts3d-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist fts3d-4.5.2.4 1 0 that {[2 0[0]]}
check_doclist fts3d-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]}
check_doclist fts3d-4.5.2.6 1 0 was {[2 0[1]]}
# Re-optimizing does nothing.
do_test fts3d-5.0 {
execsql {
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {{Index already optimal} 1 0}
# Even if we move things around, still does nothing.
do_test fts3d-5.1 {
execsql {
UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0;
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {{Index already optimal} 2 0}
finish_test