1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-35450 VEC_DISTANCE() function to autouse the available index type

This commit is contained in:
Sergei Golubchik
2024-12-09 17:11:08 +01:00
parent 528249a20a
commit e11592aed3
6 changed files with 210 additions and 2 deletions

View File

@@ -155,3 +155,103 @@ select vec_totext(`null`) from (values (null),(0x00000000)) x;
vec_totext(`null`) vec_totext(`null`)
NULL NULL
[0] [0]
# End of 11.7 tests
#
# MDEV-35450 VEC_DISTANCE() function to autouse the available index type
#
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
(4,vec_fromtext('[1,2,4,5,5]'));
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
insert t2 select * from t1;
create table t3 (e int primary key, f vector(5) not null);
insert t3 select * from t1;
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
a c vec_distance(b,d) vec_distance_euclidean(b,d)
3 0 1 1
1 1 0 0
4 2 1.73205 1.73205
2 3 1.41421 1.41421
0 4 1.41421 1.41421
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
a c vec_distance(d,b) vec_distance_cosine(b,d)
3 0 0.00676 0.00676
1 1 0 0
4 2 0.01943 0.01943
2 3 0.01626 0.01626
0 4 0.00784 0.00784
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
a e vec_distance(b,f) vec_distance_euclidean(b,f)
3 0 1 1
1 1 0 0
4 2 1.73205 1.73205
2 3 1.41421 1.41421
0 4 1.41421 1.41421
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
e c vec_distance(f,d) vec_distance_cosine(d,f)
3 0 0.00676 0.00676
1 1 0 0
4 2 0.01943 0.01943
2 3 0.01626 0.01626
0 4 0.00784 0.00784
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
a vec_distance(b,vec_fromtext('[5,4,3,2,1]')) vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]'))
0 6.32455 6.32455
1 5.74456 5.74456
2 6.08276 6.08276
3 6.40312 6.40312
4 6.78232 6.78232
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
c vec_distance(d,vec_fromtext('[5,4,3,2,1]')) vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]'))
0 0.36363 0.36363
1 0.29178 0.29178
2 0.32109 0.32109
3 0.34926 0.34926
4 0.35989 0.35989
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
ERROR HY000: Cannot determine distance type for VEC_DISTANCE, index is not found
drop table t1, t2, t3;
#
# Item_func_vec_distance::do_get_copy()
#
create table t1 (a vector(1) not null, vector(a));
create algorithm=temptable view v1 as select * from t1;
select * from v1 where vec_distance(a,0x30303030) > 0;
a
drop view v1;
drop table t1;
#
# MDEV-35724 VEC_DISTANCE does not work in HAVING clause
#
create table t (v vector(1) not null, vector(v));
insert t values (0x31313131),(0x32323232);
select v from t having vec_distance(v,0x30303030) > 0;
v
1111
2222
drop table t;
#
# MDEV-35752 VEC_DISTANCE does not work in triggers
#
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
insert t (v) values (0x31313131);
select vec_distance(default(v), 0x31313131) from t;
vec_distance(default(v), 0x31313131)
0.00000
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
drop table t;
#
# MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
#
create table t (x vector(1) not null, vector(x));
insert into t values (0x31313131),(0x32323232);
create algorithm=temptable view v as select * from t;
select * from v order by vec_distance(0x30303030, x);
x
1111
2222
drop view v;
drop table t;
# End of 11.8 tests

View File

@@ -71,3 +71,75 @@ select vec_fromtext(0x00000000);
--echo # MDEV-35220 Assertion `!item->null_value' failed upon VEC_TOTEXT call --echo # MDEV-35220 Assertion `!item->null_value' failed upon VEC_TOTEXT call
--echo # --echo #
select vec_totext(`null`) from (values (null),(0x00000000)) x; select vec_totext(`null`) from (values (null),(0x00000000)) x;
--echo # End of 11.7 tests
--echo #
--echo # MDEV-35450 VEC_DISTANCE() function to autouse the available index type
--echo #
create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean);
insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')),
(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')),
(4,vec_fromtext('[1,2,4,5,5]'));
create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine);
insert t2 select * from t1;
create table t3 (e int primary key, f vector(5) not null);
insert t3 select * from t1;
--replace_regex /(\.\d{5})\d+/\1/
select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5;
--replace_regex /(\.\d{5})\d+/\1/
select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1;
--replace_regex /(\.\d{5})\d+/\1/
select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2;
--error ER_VEC_DISTANCE_TYPE
select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3;
drop table t1, t2, t3;
--echo #
--echo # Item_func_vec_distance::do_get_copy()
--echo #
create table t1 (a vector(1) not null, vector(a));
create algorithm=temptable view v1 as select * from t1;
select * from v1 where vec_distance(a,0x30303030) > 0;
drop view v1;
drop table t1;
--echo #
--echo # MDEV-35724 VEC_DISTANCE does not work in HAVING clause
--echo #
create table t (v vector(1) not null, vector(v));
insert t values (0x31313131),(0x32323232);
select v from t having vec_distance(v,0x30303030) > 0;
drop table t;
--echo #
--echo # MDEV-35752 VEC_DISTANCE does not work in triggers
--echo #
create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float);
create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030);
insert t (v) values (0x31313131);
--replace_regex /(\.\d{5})\d+/\1/
select vec_distance(default(v), 0x31313131) from t;
insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131);
drop table t;
--echo #
--echo # MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view
--echo #
create table t (x vector(1) not null, vector(x));
insert into t values (0x31313131),(0x32323232);
create algorithm=temptable view v as select * from t;
select * from v order by vec_distance(0x30303030, x);
drop view v;
drop table t;
--echo # End of 11.8 tests

View File

@@ -6287,6 +6287,22 @@ protected:
Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton; Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton;
class Create_func_vec_distance: public Create_func_arg2
{
public:
Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override
{ return new (thd->mem_root)
Item_func_vec_distance(thd, arg1, arg2, Item_func_vec_distance::AUTO); }
static Create_func_vec_distance s_singleton;
protected:
Create_func_vec_distance() = default;
virtual ~Create_func_vec_distance() = default;
};
Create_func_vec_distance Create_func_vec_distance::s_singleton;
class Create_func_vec_totext: public Create_func_arg1 class Create_func_vec_totext: public Create_func_arg1
{ {
public: public:
@@ -6549,6 +6565,7 @@ const Native_func_registry func_array[] =
{ { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)}, { { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)},
{ { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)}, { { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)},
{ { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)}, { { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)},
{ { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)},
{ { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)}, { { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)},
{ { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)}, { { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)},
{ { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)}, { { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)},

View File

@@ -59,6 +59,22 @@ bool Item_func_vec_distance::fix_length_and_dec(THD *thd)
switch (kind) { switch (kind) {
case EUCLIDEAN: calc_distance= calc_distance_euclidean; break; case EUCLIDEAN: calc_distance= calc_distance_euclidean; break;
case COSINE: calc_distance= calc_distance_cosine; break; case COSINE: calc_distance= calc_distance_cosine; break;
case AUTO:
for (uint i=0; i < 2; i++)
if (auto *item= dynamic_cast<Item_field*>(args[i]->real_item()))
{
TABLE_SHARE *share= item->field->orig_table->s;
Field *f= share->field[item->field->field_index];
KEY *kinfo= share->key_info;
for (uint j= share->keys; j < share->total_keys; j++)
if (kinfo[j].algorithm == HA_KEY_ALG_VECTOR && f->key_start.is_set(j))
{
kind= mhnsw_uses_distance(f->table, kinfo + j);
return fix_length_and_dec(thd);
}
}
my_error(ER_VEC_DISTANCE_TYPE, MYF(0));
return 1;
} }
set_maybe_null(); // if wrong dimensions set_maybe_null(); // if wrong dimensions
return Item_real_func::fix_length_and_dec(thd); return Item_real_func::fix_length_and_dec(thd);

View File

@@ -39,13 +39,14 @@ class Item_func_vec_distance: public Item_real_func
double (*calc_distance)(float *v1, float *v2, size_t v_len); double (*calc_distance)(float *v1, float *v2, size_t v_len);
public: public:
enum distance_kind { EUCLIDEAN, COSINE } kind; enum distance_kind { EUCLIDEAN, COSINE, AUTO } kind;
Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind); Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind);
LEX_CSTRING func_name_cstring() const override LEX_CSTRING func_name_cstring() const override
{ {
static LEX_CSTRING name[3]= { static LEX_CSTRING name[3]= {
{ STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") },
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") } { STRING_WITH_LEN("VEC_DISTANCE_COSINE") },
{ STRING_WITH_LEN("VEC_DISTANCE") }
}; };
return name[kind]; return name[kind];
} }

View File

@@ -12291,3 +12291,5 @@ ER_VECTOR_BINARY_FORMAT_INVALID
eng "Invalid binary vector format. Must use IEEE standard float representation in little-endian format. Use VEC_FromText() to generate it." eng "Invalid binary vector format. Must use IEEE standard float representation in little-endian format. Use VEC_FromText() to generate it."
ER_VECTOR_FORMAT_INVALID ER_VECTOR_FORMAT_INVALID
eng "Invalid vector format at offset: %d for '%-.100s'. Must be a valid JSON array of numbers." eng "Invalid vector format at offset: %d for '%-.100s'. Must be a valid JSON array of numbers."
ER_VEC_DISTANCE_TYPE
eng "Cannot determine distance type for VEC_DISTANCE, index is not found"