mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-35450 VEC_DISTANCE() function to autouse the available index type
This commit is contained in:
@@ -155,3 +155,103 @@ select vec_totext(`null`) from (values (null),(0x00000000)) x;
|
||||
vec_totext(`null`)
|
||||
NULL
|
||||
[0]
|
||||
# End of 11.7 tests
|
||||
#
|
||||
# MDEV-35450 VEC_DISTANCE() function to autouse the available index type
|
||||
#
|
||||
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
|
||||
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
|
||||
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
|
||||
(4,vec_fromtext('[1,2,4,5,5]'));
|
||||
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
|
||||
insert t2 select * from t1;
|
||||
create table t3 (e int primary key, f vector(5) not null);
|
||||
insert t3 select * from t1;
|
||||
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
|
||||
a c vec_distance(b,d) vec_distance_euclidean(b,d)
|
||||
3 0 1 1
|
||||
1 1 0 0
|
||||
4 2 1.73205 1.73205
|
||||
2 3 1.41421 1.41421
|
||||
0 4 1.41421 1.41421
|
||||
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
|
||||
a c vec_distance(d,b) vec_distance_cosine(b,d)
|
||||
3 0 0.00676 0.00676
|
||||
1 1 0 0
|
||||
4 2 0.01943 0.01943
|
||||
2 3 0.01626 0.01626
|
||||
0 4 0.00784 0.00784
|
||||
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
|
||||
a e vec_distance(b,f) vec_distance_euclidean(b,f)
|
||||
3 0 1 1
|
||||
1 1 0 0
|
||||
4 2 1.73205 1.73205
|
||||
2 3 1.41421 1.41421
|
||||
0 4 1.41421 1.41421
|
||||
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
|
||||
e c vec_distance(f,d) vec_distance_cosine(d,f)
|
||||
3 0 0.00676 0.00676
|
||||
1 1 0 0
|
||||
4 2 0.01943 0.01943
|
||||
2 3 0.01626 0.01626
|
||||
0 4 0.00784 0.00784
|
||||
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
|
||||
a vec_distance(b,vec_fromtext('[5,4,3,2,1]')) vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]'))
|
||||
0 6.32455 6.32455
|
||||
1 5.74456 5.74456
|
||||
2 6.08276 6.08276
|
||||
3 6.40312 6.40312
|
||||
4 6.78232 6.78232
|
||||
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
|
||||
c vec_distance(d,vec_fromtext('[5,4,3,2,1]')) vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]'))
|
||||
0 0.36363 0.36363
|
||||
1 0.29178 0.29178
|
||||
2 0.32109 0.32109
|
||||
3 0.34926 0.34926
|
||||
4 0.35989 0.35989
|
||||
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
|
||||
ERROR HY000: Cannot determine distance type for VEC_DISTANCE, index is not found
|
||||
drop table t1, t2, t3;
|
||||
#
|
||||
# Item_func_vec_distance::do_get_copy()
|
||||
#
|
||||
create table t1 (a vector(1) not null, vector(a));
|
||||
create algorithm=temptable view v1 as select * from t1;
|
||||
select * from v1 where vec_distance(a,0x30303030) > 0;
|
||||
a
|
||||
drop view v1;
|
||||
drop table t1;
|
||||
#
|
||||
# MDEV-35724 VEC_DISTANCE does not work in HAVING clause
|
||||
#
|
||||
create table t (v vector(1) not null, vector(v));
|
||||
insert t values (0x31313131),(0x32323232);
|
||||
select v from t having vec_distance(v,0x30303030) > 0;
|
||||
v
|
||||
1111
|
||||
2222
|
||||
drop table t;
|
||||
#
|
||||
# MDEV-35752 VEC_DISTANCE does not work in triggers
|
||||
#
|
||||
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
|
||||
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
|
||||
insert t (v) values (0x31313131);
|
||||
select vec_distance(default(v), 0x31313131) from t;
|
||||
vec_distance(default(v), 0x31313131)
|
||||
0.00000
|
||||
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
|
||||
drop table t;
|
||||
#
|
||||
# MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
|
||||
#
|
||||
create table t (x vector(1) not null, vector(x));
|
||||
insert into t values (0x31313131),(0x32323232);
|
||||
create algorithm=temptable view v as select * from t;
|
||||
select * from v order by vec_distance(0x30303030, x);
|
||||
x
|
||||
1111
|
||||
2222
|
||||
drop view v;
|
||||
drop table t;
|
||||
# End of 11.8 tests
|
||||
|
@@ -71,3 +71,75 @@ select vec_fromtext(0x00000000);
|
||||
--echo # MDEV-35220 Assertion `!item->null_value' failed upon VEC_TOTEXT call
|
||||
--echo #
|
||||
select vec_totext(`null`) from (values (null),(0x00000000)) x;
|
||||
|
||||
--echo # End of 11.7 tests
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-35450 VEC_DISTANCE() function to autouse the available index type
|
||||
--echo #
|
||||
|
||||
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
|
||||
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
|
||||
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
|
||||
(4,vec_fromtext('[1,2,4,5,5]'));
|
||||
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
|
||||
insert t2 select * from t1;
|
||||
create table t3 (e int primary key, f vector(5) not null);
|
||||
insert t3 select * from t1;
|
||||
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
|
||||
--error ER_VEC_DISTANCE_TYPE
|
||||
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
|
||||
|
||||
drop table t1, t2, t3;
|
||||
|
||||
--echo #
|
||||
--echo # Item_func_vec_distance::do_get_copy()
|
||||
--echo #
|
||||
create table t1 (a vector(1) not null, vector(a));
|
||||
create algorithm=temptable view v1 as select * from t1;
|
||||
select * from v1 where vec_distance(a,0x30303030) > 0;
|
||||
drop view v1;
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-35724 VEC_DISTANCE does not work in HAVING clause
|
||||
--echo #
|
||||
create table t (v vector(1) not null, vector(v));
|
||||
insert t values (0x31313131),(0x32323232);
|
||||
select v from t having vec_distance(v,0x30303030) > 0;
|
||||
drop table t;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-35752 VEC_DISTANCE does not work in triggers
|
||||
--echo #
|
||||
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
|
||||
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
|
||||
insert t (v) values (0x31313131);
|
||||
--replace_regex /(\.\d{5})\d+/\1/
|
||||
select vec_distance(default(v), 0x31313131) from t;
|
||||
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
|
||||
drop table t;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
|
||||
--echo #
|
||||
create table t (x vector(1) not null, vector(x));
|
||||
insert into t values (0x31313131),(0x32323232);
|
||||
create algorithm=temptable view v as select * from t;
|
||||
select * from v order by vec_distance(0x30303030, x);
|
||||
drop view v;
|
||||
drop table t;
|
||||
|
||||
--echo # End of 11.8 tests
|
||||
|
@@ -6287,6 +6287,22 @@ protected:
|
||||
|
||||
Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton;
|
||||
|
||||
class Create_func_vec_distance: public Create_func_arg2
|
||||
{
|
||||
public:
|
||||
Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override
|
||||
{ return new (thd->mem_root)
|
||||
Item_func_vec_distance(thd, arg1, arg2, Item_func_vec_distance::AUTO); }
|
||||
|
||||
static Create_func_vec_distance s_singleton;
|
||||
|
||||
protected:
|
||||
Create_func_vec_distance() = default;
|
||||
virtual ~Create_func_vec_distance() = default;
|
||||
};
|
||||
|
||||
Create_func_vec_distance Create_func_vec_distance::s_singleton;
|
||||
|
||||
class Create_func_vec_totext: public Create_func_arg1
|
||||
{
|
||||
public:
|
||||
@@ -6549,6 +6565,7 @@ const Native_func_registry func_array[] =
|
||||
{ { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)},
|
||||
{ { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)},
|
||||
{ { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)},
|
||||
{ { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)},
|
||||
{ { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)},
|
||||
{ { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)},
|
||||
{ { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)},
|
||||
|
@@ -59,6 +59,22 @@ bool Item_func_vec_distance::fix_length_and_dec(THD *thd)
|
||||
switch (kind) {
|
||||
case EUCLIDEAN: calc_distance= calc_distance_euclidean; break;
|
||||
case COSINE: calc_distance= calc_distance_cosine; break;
|
||||
case AUTO:
|
||||
for (uint i=0; i < 2; i++)
|
||||
if (auto *item= dynamic_cast<Item_field*>(args[i]->real_item()))
|
||||
{
|
||||
TABLE_SHARE *share= item->field->orig_table->s;
|
||||
Field *f= share->field[item->field->field_index];
|
||||
KEY *kinfo= share->key_info;
|
||||
for (uint j= share->keys; j < share->total_keys; j++)
|
||||
if (kinfo[j].algorithm == HA_KEY_ALG_VECTOR && f->key_start.is_set(j))
|
||||
{
|
||||
kind= mhnsw_uses_distance(f->table, kinfo + j);
|
||||
return fix_length_and_dec(thd);
|
||||
}
|
||||
}
|
||||
my_error(ER_VEC_DISTANCE_TYPE, MYF(0));
|
||||
return 1;
|
||||
}
|
||||
set_maybe_null(); // if wrong dimensions
|
||||
return Item_real_func::fix_length_and_dec(thd);
|
||||
|
@@ -39,13 +39,14 @@ class Item_func_vec_distance: public Item_real_func
|
||||
double (*calc_distance)(float *v1, float *v2, size_t v_len);
|
||||
|
||||
public:
|
||||
enum distance_kind { EUCLIDEAN, COSINE } kind;
|
||||
enum distance_kind { EUCLIDEAN, COSINE, AUTO } kind;
|
||||
Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind);
|
||||
LEX_CSTRING func_name_cstring() const override
|
||||
{
|
||||
static LEX_CSTRING name[3]= {
|
||||
{ STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") },
|
||||
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") }
|
||||
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") },
|
||||
{ STRING_WITH_LEN("VEC_DISTANCE") }
|
||||
};
|
||||
return name[kind];
|
||||
}
|
||||
|
@@ -12291,3 +12291,5 @@ ER_VECTOR_BINARY_FORMAT_INVALID
|
||||
eng "Invalid binary vector format. Must use IEEE standard float representation in little-endian format. Use VEC_FromText() to generate it."
|
||||
ER_VECTOR_FORMAT_INVALID
|
||||
eng "Invalid vector format at offset: %d for '%-.100s'. Must be a valid JSON array of numbers."
|
||||
ER_VEC_DISTANCE_TYPE
|
||||
eng "Cannot determine distance type for VEC_DISTANCE, index is not found"
|
||||
|
Reference in New Issue
Block a user