mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Cherry-picked from MyRocks upstream: Issue #809: Wrong query result with bloom filters
In reverse-ordered column families, if one wants to start reading at the logical end of the index, they should Seek() to a key value that is not covered by the index. This may (and typically does) prevent use of a bloom filter. The calls to setup_scan_iterator() that are made for index and table scan didn't take this into account and passed eq_cond_len=INDEX_NUMBER_SIZE. Fixed them to compute and pass correct eq_cond_len. Also, removed an incorrect assert in ha_rocksdb::setup_iterator_bounds.
This commit is contained in:
@@ -8526,7 +8526,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
|
||||
DBUG_ASSERT(key != nullptr);
|
||||
|
||||
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
|
||||
kd.get_first_key(key, &key_size);
|
||||
int key_start_matching_bytes = kd.get_first_key(key, &key_size);
|
||||
|
||||
rocksdb::Slice index_key((const char *)key, key_size);
|
||||
|
||||
@@ -8537,7 +8537,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
|
||||
// Loop as long as we get a deadlock error AND we end up creating the
|
||||
// snapshot here (i.e. it did not exist prior to this)
|
||||
for (;;) {
|
||||
setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE);
|
||||
setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes);
|
||||
m_scan_it->Seek(index_key);
|
||||
m_skip_scan_it_next_call = true;
|
||||
|
||||
@@ -9539,7 +9539,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
|
||||
void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd,
|
||||
const rocksdb::Slice &eq_cond) {
|
||||
uint eq_cond_len = eq_cond.size();
|
||||
DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE);
|
||||
memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len);
|
||||
kd.successor(m_eq_cond_upper_bound, eq_cond_len);
|
||||
memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len);
|
||||
@@ -9635,12 +9634,12 @@ void ha_rocksdb::release_scan_iterator() {
|
||||
void ha_rocksdb::setup_iterator_for_rnd_scan() {
|
||||
uint key_size;
|
||||
|
||||
m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
|
||||
int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
|
||||
|
||||
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
|
||||
|
||||
setup_scan_iterator(*m_pk_descr, &table_key, false,
|
||||
Rdb_key_def::INDEX_NUMBER_SIZE);
|
||||
key_start_matching_bytes);
|
||||
m_scan_it->Seek(table_key);
|
||||
m_skip_scan_it_next_call = true;
|
||||
}
|
||||
|
62
storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
Normal file
62
storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
Normal file
@@ -0,0 +1,62 @@
|
||||
#
|
||||
# Issue #809: Wrong query result with bloom filters
|
||||
#
|
||||
create table t1 (
|
||||
id1 bigint not null,
|
||||
id2 bigint not null,
|
||||
id3 varchar(100) not null,
|
||||
id4 int not null,
|
||||
id5 int not null,
|
||||
value bigint,
|
||||
value2 varchar(100),
|
||||
primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
|
||||
) engine=ROCKSDB;
|
||||
create table t2(a int);
|
||||
insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t3(seq int);
|
||||
insert into t3
|
||||
select
|
||||
1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
|
||||
from t2 A, t2 B, t2 C, t2 D;
|
||||
insert t1
|
||||
select
|
||||
(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
|
||||
from t3;
|
||||
set global rocksdb_force_flush_memtable_now=1;
|
||||
# Full table scan
|
||||
explain
|
||||
select * from t1 limit 10;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000
|
||||
select * from t1 limit 10;
|
||||
id1 id2 id3 id4 id5 value value2
|
||||
1000 2000 2000 10000 10000 1000 aaabbbccc
|
||||
1000 2000 2000 9999 9999 1000 aaabbbccc
|
||||
1000 2000 2000 9998 9998 1000 aaabbbccc
|
||||
1000 2000 2000 9997 9997 1000 aaabbbccc
|
||||
1000 2000 2000 9996 9996 1000 aaabbbccc
|
||||
1000 1999 1999 9995 9995 1000 aaabbbccc
|
||||
1000 1999 1999 9994 9994 1000 aaabbbccc
|
||||
1000 1999 1999 9993 9993 1000 aaabbbccc
|
||||
1000 1999 1999 9992 9992 1000 aaabbbccc
|
||||
1000 1999 1999 9991 9991 1000 aaabbbccc
|
||||
# An index scan starting from the end of the table:
|
||||
explain
|
||||
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 index NULL PRIMARY 122 NULL 1
|
||||
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
|
||||
id1 id2 id3 id4 id5 value value2
|
||||
1000 2000 2000 10000 10000 1000 aaabbbccc
|
||||
create table t4 (
|
||||
pk int unsigned not null primary key,
|
||||
kp1 int unsigned not null,
|
||||
kp2 int unsigned not null,
|
||||
col1 int unsigned,
|
||||
key(kp1, kp2) comment 'rev:bf5_2'
|
||||
) engine=rocksdb;
|
||||
insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
|
||||
# This must not fail an assert:
|
||||
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
|
||||
pk kp1 kp2 col1
|
||||
drop table t1,t2,t3,t4;
|
@@ -0,0 +1 @@
|
||||
--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}};
|
61
storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
Normal file
61
storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
Normal file
@@ -0,0 +1,61 @@
|
||||
|
||||
--echo #
|
||||
--echo # Issue #809: Wrong query result with bloom filters
|
||||
--echo #
|
||||
|
||||
create table t1 (
|
||||
id1 bigint not null,
|
||||
id2 bigint not null,
|
||||
id3 varchar(100) not null,
|
||||
id4 int not null,
|
||||
id5 int not null,
|
||||
value bigint,
|
||||
value2 varchar(100),
|
||||
primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
|
||||
) engine=ROCKSDB;
|
||||
|
||||
|
||||
create table t2(a int);
|
||||
insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
|
||||
create table t3(seq int);
|
||||
insert into t3
|
||||
select
|
||||
1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
|
||||
from t2 A, t2 B, t2 C, t2 D;
|
||||
|
||||
insert t1
|
||||
select
|
||||
(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
|
||||
from t3;
|
||||
|
||||
set global rocksdb_force_flush_memtable_now=1;
|
||||
|
||||
--echo # Full table scan
|
||||
explain
|
||||
select * from t1 limit 10;
|
||||
select * from t1 limit 10;
|
||||
|
||||
--echo # An index scan starting from the end of the table:
|
||||
explain
|
||||
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
|
||||
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
|
||||
|
||||
# A testcase for an assertion that the fix is removing
|
||||
# The only requirement for the used column family is that it is reverse-ordered
|
||||
create table t4 (
|
||||
pk int unsigned not null primary key,
|
||||
kp1 int unsigned not null,
|
||||
kp2 int unsigned not null,
|
||||
col1 int unsigned,
|
||||
key(kp1, kp2) comment 'rev:bf5_2'
|
||||
) engine=rocksdb;
|
||||
|
||||
insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
|
||||
|
||||
--echo # This must not fail an assert:
|
||||
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
|
||||
|
||||
drop table t1,t2,t3,t4;
|
||||
|
||||
|
@@ -238,12 +238,28 @@ public:
|
||||
*size = INDEX_NUMBER_SIZE;
|
||||
}
|
||||
|
||||
/* Get the first key that you need to position at to start iterating.
|
||||
Returns a "supremum" or "infimum" for this index based on collation order
|
||||
/*
|
||||
Get the first key that you need to position at to start iterating.
|
||||
|
||||
Stores into *key a "supremum" or "infimum" key value for the index.
|
||||
|
||||
@return Number of bytes in the key that are usable for bloom filter use.
|
||||
*/
|
||||
inline void get_first_key(uchar *const key, uint *const size) const {
|
||||
return m_is_reverse_cf ? get_supremum_key(key, size)
|
||||
: get_infimum_key(key, size);
|
||||
inline int get_first_key(uchar *const key, uint *const size) const {
|
||||
if (m_is_reverse_cf)
|
||||
get_supremum_key(key, size);
|
||||
else
|
||||
get_infimum_key(key, size);
|
||||
|
||||
/* Find out how many bytes of infimum are the same as m_index_number */
|
||||
uchar unmodified_key[INDEX_NUMBER_SIZE];
|
||||
rdb_netbuf_store_index(unmodified_key, m_index_number);
|
||||
int i;
|
||||
for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
|
||||
if (key[i] != unmodified_key[i])
|
||||
break;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Make a key that is right after the given key. */
|
||||
|
Reference in New Issue
Block a user