diff --git a/mysql-test/main/vector_funcs.result b/mysql-test/main/vector_funcs.result index 65e90b6c9e9..07de56eb196 100644 --- a/mysql-test/main/vector_funcs.result +++ b/mysql-test/main/vector_funcs.result @@ -155,3 +155,103 @@ select vec_totext(`null`) from (values (null),(0x00000000)) x; vec_totext(`null`) NULL [0] +# End of 11.7 tests +# +# MDEV-35450 VEC_DISTANCE() function to autouse the available index type +# +create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean); +insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')), +(2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')), +(4,vec_fromtext('[1,2,4,5,5]')); +create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine); +insert t2 select * from t1; +create table t3 (e int primary key, f vector(5) not null); +insert t3 select * from t1; +select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5; +a c vec_distance(b,d) vec_distance_euclidean(b,d) +3 0 1 1 +1 1 0 0 +4 2 1.73205 1.73205 +2 3 1.41421 1.41421 +0 4 1.41421 1.41421 +select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5; +a c vec_distance(d,b) vec_distance_cosine(b,d) +3 0 0.00676 0.00676 +1 1 0 0 +4 2 0.01943 0.01943 +2 3 0.01626 0.01626 +0 4 0.00784 0.00784 +select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5; +a e vec_distance(b,f) vec_distance_euclidean(b,f) +3 0 1 1 +1 1 0 0 +4 2 1.73205 1.73205 +2 3 1.41421 1.41421 +0 4 1.41421 1.41421 +select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5; +e c vec_distance(f,d) vec_distance_cosine(d,f) +3 0 0.00676 0.00676 +1 1 0 0 +4 2 0.01943 0.01943 +2 3 0.01626 0.01626 +0 4 0.00784 0.00784 +select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1; +a vec_distance(b,vec_fromtext('[5,4,3,2,1]')) vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) +0 6.32455 6.32455 +1 5.74456 5.74456 +2 6.08276 6.08276 +3 6.40312 6.40312 +4 6.78232 6.78232 +select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2; +c vec_distance(d,vec_fromtext('[5,4,3,2,1]')) vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) +0 0.36363 0.36363 +1 0.29178 0.29178 +2 0.32109 0.32109 +3 0.34926 0.34926 +4 0.35989 0.35989 +select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3; +ERROR HY000: Cannot determine distance type for VEC_DISTANCE, index is not found +drop table t1, t2, t3; +# +# Item_func_vec_distance::do_get_copy() +# +create table t1 (a vector(1) not null, vector(a)); +create algorithm=temptable view v1 as select * from t1; +select * from v1 where vec_distance(a,0x30303030) > 0; +a +drop view v1; +drop table t1; +# +# MDEV-35724 VEC_DISTANCE does not work in HAVING clause +# +create table t (v vector(1) not null, vector(v)); +insert t values (0x31313131),(0x32323232); +select v from t having vec_distance(v,0x30303030) > 0; +v +1111 +2222 +drop table t; +# +# MDEV-35752 VEC_DISTANCE does not work in triggers +# +create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float); +create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030); +insert t (v) values (0x31313131); +select vec_distance(default(v), 0x31313131) from t; +vec_distance(default(v), 0x31313131) +0.00000 +insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131); +drop table t; +# +# MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view +# +create table t (x vector(1) not null, vector(x)); +insert into t values (0x31313131),(0x32323232); +create algorithm=temptable view v as select * from t; +select * from v order by vec_distance(0x30303030, x); +x +1111 +2222 +drop view v; +drop table t; +# End of 11.8 tests diff --git a/mysql-test/main/vector_funcs.test b/mysql-test/main/vector_funcs.test index 5f2253ae6ad..b1e7c433490 100644 --- a/mysql-test/main/vector_funcs.test +++ b/mysql-test/main/vector_funcs.test @@ -71,3 +71,75 @@ select vec_fromtext(0x00000000); --echo # MDEV-35220 Assertion `!item->null_value' failed upon VEC_TOTEXT call --echo # select vec_totext(`null`) from (values (null),(0x00000000)) x; + +--echo # End of 11.7 tests + +--echo # +--echo # MDEV-35450 VEC_DISTANCE() function to autouse the available index type +--echo # + +create table t1 (a int primary key, b vector(5) not null, vector index (b) distance=euclidean); +insert t1 values (0,vec_fromtext('[1,2,3,4,5]')), (1,vec_fromtext('[2,2,3,4,5]')), + (2,vec_fromtext('[1,3,3,4,5]')), (3,vec_fromtext('[1,2,4,4,5]')), + (4,vec_fromtext('[1,2,4,5,5]')); +create table t2 (c int primary key, d vector(5) not null, vector index (d) distance=cosine); +insert t2 select * from t1; +create table t3 (e int primary key, f vector(5) not null); +insert t3 select * from t1; + +--replace_regex /(\.\d{5})\d+/\1/ +select a,c,vec_distance(b,d),vec_distance_euclidean(b,d) from t1, t2 where a=(c+11)*13%5; +--replace_regex /(\.\d{5})\d+/\1/ +select a,c,vec_distance(d,b),vec_distance_cosine(b,d) from t1, t2 where a=(c+11)*13%5; +--replace_regex /(\.\d{5})\d+/\1/ +select a,e,vec_distance(b,f),vec_distance_euclidean(b,f) from t1, t3 where a=(e+11)*13%5; +--replace_regex /(\.\d{5})\d+/\1/ +select e,c,vec_distance(f,d),vec_distance_cosine(d,f) from t2, t3 where e=(c+11)*13%5; +--replace_regex /(\.\d{5})\d+/\1/ +select a,vec_distance(b,vec_fromtext('[5,4,3,2,1]')),vec_distance_euclidean(b,vec_fromtext('[5,4,3,2,1]')) from t1; +--replace_regex /(\.\d{5})\d+/\1/ +select c,vec_distance(d,vec_fromtext('[5,4,3,2,1]')),vec_distance_cosine(d,vec_fromtext('[5,4,3,2,1]')) from t2; +--error ER_VEC_DISTANCE_TYPE +select e,vec_distance(f,vec_fromtext('[5,4,3,2,1]')) from t3; + +drop table t1, t2, t3; + +--echo # +--echo # Item_func_vec_distance::do_get_copy() +--echo # +create table t1 (a vector(1) not null, vector(a)); +create algorithm=temptable view v1 as select * from t1; +select * from v1 where vec_distance(a,0x30303030) > 0; +drop view v1; +drop table t1; + +--echo # +--echo # MDEV-35724 VEC_DISTANCE does not work in HAVING clause +--echo # +create table t (v vector(1) not null, vector(v)); +insert t values (0x31313131),(0x32323232); +select v from t having vec_distance(v,0x30303030) > 0; +drop table t; + +--echo # +--echo # MDEV-35752 VEC_DISTANCE does not work in triggers +--echo # +create table t (id int primary key default 1, v vector(1) not null default 0x30303030, vector(v), d float); +create trigger tr before insert on t for each row set new.d = vec_distance(new.v,0x30303030); +insert t (v) values (0x31313131); +--replace_regex /(\.\d{5})\d+/\1/ +select vec_distance(default(v), 0x31313131) from t; +insert t (v) values (0x32323232) on duplicate key update d=vec_distance(values(v), 0x31313131); +drop table t; + +--echo # +--echo # MDEV-35778 Server crashes in Item_func_vec_distance::fix_length_and_dec upon using VEC_DISTANCE with temptable view +--echo # +create table t (x vector(1) not null, vector(x)); +insert into t values (0x31313131),(0x32323232); +create algorithm=temptable view v as select * from t; +select * from v order by vec_distance(0x30303030, x); +drop view v; +drop table t; + +--echo # End of 11.8 tests diff --git a/sql/item_create.cc b/sql/item_create.cc index ae0802d5fdf..4ce151a1294 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -6287,6 +6287,22 @@ protected: Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton; +class Create_func_vec_distance: public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override + { return new (thd->mem_root) + Item_func_vec_distance(thd, arg1, arg2, Item_func_vec_distance::AUTO); } + + static Create_func_vec_distance s_singleton; + +protected: + Create_func_vec_distance() = default; + virtual ~Create_func_vec_distance() = default; +}; + +Create_func_vec_distance Create_func_vec_distance::s_singleton; + class Create_func_vec_totext: public Create_func_arg1 { public: @@ -6549,6 +6565,7 @@ const Native_func_registry func_array[] = { { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)}, { { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)}, { { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)}, + { { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)}, { { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)}, { { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)}, { { STRING_WITH_LEN("VERSION") }, BUILDER(Create_func_version)}, diff --git a/sql/item_vectorfunc.cc b/sql/item_vectorfunc.cc index f4bc5bbe5b4..706f67bc19c 100644 --- a/sql/item_vectorfunc.cc +++ b/sql/item_vectorfunc.cc @@ -59,6 +59,22 @@ bool Item_func_vec_distance::fix_length_and_dec(THD *thd) switch (kind) { case EUCLIDEAN: calc_distance= calc_distance_euclidean; break; case COSINE: calc_distance= calc_distance_cosine; break; + case AUTO: + for (uint i=0; i < 2; i++) + if (auto *item= dynamic_cast(args[i]->real_item())) + { + TABLE_SHARE *share= item->field->orig_table->s; + Field *f= share->field[item->field->field_index]; + KEY *kinfo= share->key_info; + for (uint j= share->keys; j < share->total_keys; j++) + if (kinfo[j].algorithm == HA_KEY_ALG_VECTOR && f->key_start.is_set(j)) + { + kind= mhnsw_uses_distance(f->table, kinfo + j); + return fix_length_and_dec(thd); + } + } + my_error(ER_VEC_DISTANCE_TYPE, MYF(0)); + return 1; } set_maybe_null(); // if wrong dimensions return Item_real_func::fix_length_and_dec(thd); diff --git a/sql/item_vectorfunc.h b/sql/item_vectorfunc.h index 6e5a956c033..8d81d851daf 100644 --- a/sql/item_vectorfunc.h +++ b/sql/item_vectorfunc.h @@ -39,13 +39,14 @@ class Item_func_vec_distance: public Item_real_func double (*calc_distance)(float *v1, float *v2, size_t v_len); public: - enum distance_kind { EUCLIDEAN, COSINE } kind; + enum distance_kind { EUCLIDEAN, COSINE, AUTO } kind; Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind); LEX_CSTRING func_name_cstring() const override { static LEX_CSTRING name[3]= { { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, - { STRING_WITH_LEN("VEC_DISTANCE_COSINE") } + { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, + { STRING_WITH_LEN("VEC_DISTANCE") } }; return name[kind]; } diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 181cb5fb0f2..73c146472fe 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -12291,3 +12291,5 @@ ER_VECTOR_BINARY_FORMAT_INVALID eng "Invalid binary vector format. Must use IEEE standard float representation in little-endian format. Use VEC_FromText() to generate it." ER_VECTOR_FORMAT_INVALID eng "Invalid vector format at offset: %d for '%-.100s'. Must be a valid JSON array of numbers." +ER_VEC_DISTANCE_TYPE + eng "Cannot determine distance type for VEC_DISTANCE, index is not found"