1
0
mirror of https://github.com/MariaDB/server.git synced 2025-04-18 21:44:20 +03:00

initial support for vector indexes

MDEV-33407 Parser support for vector indexes

The syntax is

  create table t1 (... vector index (v) ...);

limitation:
* v is a binary string and NOT NULL
* only one vector index per table
* temporary tables are not supported

MDEV-33404 Engine-independent indexes: subtable method

added support for so-called "high level indexes", they are not visible
to the storage engine, implemented on the sql level. For every such
an index in a table, say, t1, the server implicitly creates a second
table named, like, t1#i#05 (where "05" is the index number in t1).
This table has a fixed structure, no frm, not accessible directly,
doesn't go into the table cache, needs no MDLs.

MDEV-33406 basic optimizer support for k-NN searches

for a query like SELECT ... ORDER BY func() optimizer will use
item_func->part_of_sortkey() to decide what keys can be used
to resolve ORDER BY.
This commit is contained in:
Sergei Golubchik 2024-01-17 15:32:45 +01:00
parent 9ccf02a9a7
commit d6add9a03d
28 changed files with 888 additions and 89 deletions

View File

@ -109,7 +109,8 @@ enum ha_key_alg {
HA_KEY_ALG_HASH= 3, /* HASH keys (HEAP tables) */
HA_KEY_ALG_FULLTEXT= 4, /* FULLTEXT */
HA_KEY_ALG_LONG_HASH= 5, /* long BLOB keys */
HA_KEY_ALG_UNIQUE_HASH= 6 /* Internal UNIQUE hash (Aria) */
HA_KEY_ALG_UNIQUE_HASH= 6, /* Internal UNIQUE hash (Aria) */
HA_KEY_ALG_VECTOR= 7 /* Vector search index */
};
/* Storage media types */

View File

@ -112,7 +112,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/sql_alter.cc ../sql/sql_partition_admin.cc
../sql/event_parse_data.cc
../sql/sql_signal.cc
../sql/sys_vars.cc
../sql/sys_vars.cc ../sql/vector_mhnsw.cc
${CMAKE_BINARY_DIR}/sql/sql_builtin.cc
../sql/mdl.cc ../sql/transaction.cc
../sql/sql_join_cache.cc

View File

@ -0,0 +1,186 @@
create temporary table t1 (id int auto_increment primary key, v blob not null, vector index (v));
ERROR HY000: Cannot create VECTOR index on temporary MyISAM table
create table t1 (id int auto_increment primary key,
u blob not null, vector index (u),
v blob not null, vector index (v));
ERROR 42000: This version of MariaDB doesn't yet support 'multiple VECTOR indexes'
create table t1 (id int auto_increment primary key, v blob not null, vector index (v));
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`v` blob NOT NULL,
PRIMARY KEY (`id`),
VECTOR KEY `v` (`v`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci
show keys from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored
t1 0 PRIMARY 1 id A 0 NULL NULL BTREE NO
t1 1 v 1 v A NULL 1 NULL VECTOR NO
select * from information_schema.statistics where table_name='t1';
TABLE_CATALOG def
TABLE_SCHEMA test
TABLE_NAME t1
NON_UNIQUE 0
INDEX_SCHEMA test
INDEX_NAME PRIMARY
SEQ_IN_INDEX 1
COLUMN_NAME id
COLLATION A
CARDINALITY 0
SUB_PART NULL
PACKED NULL
NULLABLE
INDEX_TYPE BTREE
COMMENT
INDEX_COMMENT
IGNORED NO
TABLE_CATALOG def
TABLE_SCHEMA test
TABLE_NAME t1
NON_UNIQUE 1
INDEX_SCHEMA test
INDEX_NAME v
SEQ_IN_INDEX 1
COLUMN_NAME v
COLLATION A
CARDINALITY NULL
SUB_PART 1
PACKED NULL
NULLABLE
INDEX_TYPE VECTOR
COMMENT
INDEX_COMMENT
IGNORED NO
insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'),
(x'f09baa3ea172763f123def3e0c7fe53e288bf33e'),
(x'b97a523f2a193e3eb4f62e3f2d23583e9dd60d3f'),
(x'f7c5df3e984b2b3e65e59d3d7376db3eac63773e'),
(x'de01453ffa486d3f10aa4d3fdd66813c71cb163f'),
(x'76edfc3e4b57243f10f8423fb158713f020bda3e'),
(x'56926c3fdf098d3e2c8c5e3d1ad4953daa9d0b3e'),
(x'7b713f3e5258323f80d1113d673b2b3f66e3583f'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id, hex(v) from t1;
id hex(v)
1 E360D63EBE554F3FCDBC523F4522193F5236083D
2 F511303F72224A3FDD05FE3EB22A133FFAE86A3F
3 F09BAA3EA172763F123DEF3E0C7FE53E288BF33E
4 B97A523F2A193E3EB4F62E3F2D23583E9DD60D3F
5 F7C5DF3E984B2B3E65E59D3D7376DB3EAC63773E
6 DE01453FFA486D3F10AA4D3FDD66813C71CB163F
7 76EDFC3E4B57243F10F8423FB158713F020BDA3E
8 56926C3FDF098D3E2C8C5E3D1AD4953DAA9D0B3E
9 7B713F3E5258323F80D1113D673B2B3F66E3583F
10 6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E
flush tables;
select id,vec_distance(v, x'b047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
id1 id2 vec_distance(t1.v, t2.v)
1 1 0
2 2 0
3 3 0
4 4 0
5 5 0
6 6 0
7 7 0
8 8 0
9 9 0
10 10 0
7 10 0.35209010323904116
10 7 0.35209010323904116
1 7 0.557267332724855
7 1 0.557267332724855
2 3 0.6065128837978769
3 2 0.6065128837978769
1 3 0.6128238020507096
3 1 0.6128238020507096
5 8 0.6219995745138945
8 5 0.6219995745138945
3 10 0.6523185662547816
10 3 0.6523185662547816
9 10 0.6732681362788765
10 9 0.6732681362788765
3 7 0.6799892416547949
7 3 0.6799892416547949
3 9 0.6820752294088018
9 3 0.6820752294088018
2 10 0.6916305331777215
10 2 0.6916305331777215
2 9 0.6966650510789955
9 2 0.6966650510789955
3 6 0.7102823580937639
6 3 0.7102823580937639
2 7 0.7120217580666971
7 2 0.7120217580666971
2 6 0.7351618106552689
6 2 0.7351618106552689
1 10 0.7386864491588024
10 1 0.7386864491588024
4 6 0.7784357824370262
6 4 0.7784357824370262
4 8 0.7795837407361241
8 4 0.7795837407361241
4 5 0.8132007346697969
5 4 0.8132007346697969
2 4 0.8260925223296488
4 2 0.8260925223296488
5 10 0.8286488932765299
10 5 0.8286488932765299
5 9 0.8769351333060768
9 5 0.8769351333060768
1 6 0.8861410875047832
6 1 0.8861410875047832
3 5 0.9224201772876247
5 3 0.9224201772876247
4 7 0.9347916246876117
7 4 0.9347916246876117
7 9 0.9364253407685257
9 7 0.9364253407685257
3 4 0.9757105842688992
4 3 0.9757105842688992
1 2 0.9810272439433514
2 1 0.9810272439433514
1 4 0.9965475544626712
4 1 0.9965475544626712
5 7 0.9976863778073342
7 5 0.9976863778073342
4 10 1.0109345944029724
10 4 1.0109345944029724
1 5 1.0208359400987237
5 1 1.0208359400987237
6 7 1.0221332668982412
7 6 1.0221332668982412
2 5 1.050769316594881
5 2 1.050769316594881
6 8 1.103420381318026
8 6 1.103420381318026
3 8 1.1170300826294572
8 3 1.1170300826294572
6 10 1.1523451990991307
10 6 1.1523451990991307
1 9 1.1637750565139302
9 1 1.1637750565139302
2 8 1.1736571017573874
8 2 1.1736571017573874
4 9 1.1746893942711878
9 4 1.1746893942711878
1 8 1.1909959973982214
8 1 1.1909959973982214
8 10 1.209359617652948
10 8 1.209359617652948
6 9 1.214529873940304
9 6 1.214529873940304
5 6 1.227278506501395
6 5 1.227278506501395
8 9 1.2575258643523053
9 8 1.2575258643523053
7 8 1.288239696195716
8 7 1.288239696195716
drop table t1;
db.opt

View File

@ -0,0 +1,31 @@
error ER_NO_INDEX_ON_TEMPORARY;
create temporary table t1 (id int auto_increment primary key, v blob not null, vector index (v));
error ER_NOT_SUPPORTED_YET;
create table t1 (id int auto_increment primary key,
u blob not null, vector index (u),
v blob not null, vector index (v));
create table t1 (id int auto_increment primary key, v blob not null, vector index (v));
show create table t1;
show keys from t1;
query_vertical select * from information_schema.statistics where table_name='t1';
# print unpack(H40,pack(f5,map{rand}1..5))
insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'),
(x'f09baa3ea172763f123def3e0c7fe53e288bf33e'),
(x'b97a523f2a193e3eb4f62e3f2d23583e9dd60d3f'),
(x'f7c5df3e984b2b3e65e59d3d7376db3eac63773e'),
(x'de01453ffa486d3f10aa4d3fdd66813c71cb163f'),
(x'76edfc3e4b57243f10f8423fb158713f020bda3e'),
(x'56926c3fdf098d3e2c8c5e3d1ad4953daa9d0b3e'),
(x'7b713f3e5258323f80d1113d673b2b3f66e3583f'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id, hex(v) from t1;
flush tables;
select id,vec_distance(v, x'b047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
drop table t1;
let $datadir=`select @@datadir`;
list_files $datadir/test;

View File

@ -113,7 +113,7 @@ SET (SQL_SOURCE
mf_iocache.cc my_decimal.cc
mysqld.cc net_serv.cc keycaches.cc
../sql-common/client_plugin.c
opt_range.cc
opt_range.cc vector_mhnsw.cc
opt_rewrite_date_cmp.cc
opt_rewrite_remove_casefold.cc
opt_sum.cc

View File

@ -413,10 +413,10 @@ void Filesort_buffer::sort_buffer(const Sort_param *param, uint count)
static
size_t get_sort_length(THD *thd, Item_field *item)
size_t get_sort_length(THD *thd, Item *item)
{
SORT_FIELD_ATTR sort_attr;
sort_attr.type= ((item->field)->is_packable() ?
sort_attr.type= (item->type_handler()->is_packable() ?
SORT_FIELD_ATTR::VARIABLE_SIZE :
SORT_FIELD_ATTR::FIXED_SIZE);
item->type_handler()->sort_length(thd, item, &sort_attr);
@ -452,8 +452,7 @@ double cost_of_filesort(TABLE *table, ORDER *order_by, ha_rows rows_to_read,
for (ORDER *ptr= order_by; ptr ; ptr= ptr->next)
{
Item_field *field= (Item_field*) (*ptr->item)->real_item();
size_t length= get_sort_length(thd, field);
size_t length= get_sort_length(thd, *ptr->item);
set_if_smaller(length, thd->variables.max_sort_length);
sort_len+= (uint) length;
}

View File

@ -48,6 +48,7 @@
#include "rowid_filter.h"
#include "mysys_err.h"
#include "optimizer_defaults.h"
#include "vector_mhnsw.h"
#ifdef WITH_PARTITION_STORAGE_ENGINE
#include "ha_partition.h"
@ -3575,7 +3576,7 @@ PSI_table_share *handler::ha_table_share_psi() const
const char *handler::index_type(uint key_number)
{
static const char* alg2str[]= { "???", "BTREE", "SPATIAL", "HASH",
"FULLTEXT", "HASH", "HASH" };
"FULLTEXT", "HASH", "HASH", "VECTOR" };
enum ha_key_alg alg= table_share->key_info[key_number].algorithm;
if (!alg)
{
@ -6401,6 +6402,36 @@ int ha_create_table(THD *thd, const char *path, const char *db,
{
PSI_CALL_drop_table_share(is_tmp, share.db.str, (uint)share.db.length,
share.table_name.str, (uint)share.table_name.length);
goto err;
}
/* create secondary tables for high level indexes */
if (share.hlindexes())
{
/* as of now: only one vector index can be here */
DBUG_ASSERT(share.hlindexes() == 1);
DBUG_ASSERT(share.key_info[share.keys].algorithm == HA_KEY_ALG_VECTOR);
TABLE_SHARE index_share;
char file_name[FN_REFLEN+1];
HA_CREATE_INFO index_cinfo;
char *path_end= strmov(file_name, path);
if ((error= share.path.length > sizeof(file_name) - HLINDEX_BUF_LEN))
goto err;
for (uint i= share.keys; i < share.total_keys; i++)
{
my_snprintf(path_end, HLINDEX_BUF_LEN, HLINDEX_TEMPLATE, i);
init_tmp_table_share(thd, &index_share, db, 0, table_name, file_name, 1);
index_share.db_plugin= share.db_plugin;
if ((error= index_share.init_from_sql_statement_string(thd, false,
mhnsw_hlindex_table.str, mhnsw_hlindex_table.length)))
break;
if ((error= ha_create_table_from_share(thd, &index_share, &index_cinfo)))
break;
}
free_table_share(&index_share);
}
err:
@ -7651,6 +7682,8 @@ int handler::ha_reset()
delete lookup_handler;
lookup_handler= this;
}
if (table->reset_hlindexes())
return 1;
DBUG_RETURN(reset());
}
@ -8042,8 +8075,12 @@ bool handler::prepare_for_row_logging()
int handler::prepare_for_insert(bool do_create)
{
if (table->open_hlindexes_for_write())
return 1;
/* Preparation for unique of blob's */
if (table->s->long_unique_table || table->s->period.unique_keys)
if (table->s->long_unique_table || table->s->period.unique_keys ||
table->hlindex)
{
if (do_create && create_lookup_handler())
return 1;
@ -8093,7 +8130,7 @@ int handler::ha_write_row(const uchar *buf)
{ error= write_row(buf); })
MYSQL_INSERT_ROW_DONE(error);
if (likely(!error))
if (!error && !((error= table->update_hlindexes())))
{
rows_stats.inserted++;
Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;

View File

@ -3253,7 +3253,6 @@ protected:
Table_flags cached_table_flags; /* Set on init() and open() */
ha_rows estimation_rows_to_insert;
handler *lookup_handler;
/*
Statistics for the query. Prefer to use the handler_stats pointer
below rather than this object directly as the clone() method will
@ -3268,6 +3267,7 @@ public:
OPTIMIZER_COSTS *costs; /* Points to table->share->costs */
uchar *ref; /* Pointer to current row */
uchar *dup_ref; /* Pointer to duplicate row */
handler *lookup_handler;
uchar *lookup_buffer;
/* General statistics for the table like number of row, file sizes etc */
@ -3472,8 +3472,8 @@ public:
handler(handlerton *ht_arg, TABLE_SHARE *share_arg)
:table_share(share_arg), table(0),
estimation_rows_to_insert(0),
lookup_handler(this),
ht(ht_arg), costs(0), ref(0), lookup_buffer(NULL), handler_stats(NULL),
ht(ht_arg), costs(0), ref(0), lookup_handler(this),
lookup_buffer(NULL), handler_stats(NULL),
end_range(NULL), implicit_emptied(0),
mark_trx_read_write_done(0),
check_table_binlog_row_based_done(0),

View File

@ -2073,6 +2073,7 @@ public:
}
virtual COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value,
bool top_level);
virtual key_map part_of_sortkey() const { return key_map(0); }
virtual void add_key_fields(JOIN *join, KEY_FIELD **key_fields,
uint *and_level,
table_map usable_tables,
@ -3747,6 +3748,7 @@ public:
{
return field->field_length;
}
key_map part_of_sortkey() const override { return field->part_of_sortkey; }
void reset_field(Field *f);
bool fix_fields(THD *, Item **) override;
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge)
@ -5938,6 +5940,10 @@ public:
{
return ref ? (*ref)->type_extra_attributes() : Type_extra_attributes();
}
key_map part_of_sortkey() const override
{
return ref ? (*ref)->part_of_sortkey() : Item::part_of_sortkey();
}
bool walk(Item_processor processor, bool walk_subquery, void *arg) override
{

View File

@ -24,6 +24,19 @@
#include <my_global.h>
#include "item.h"
key_map Item_func_vec_distance::part_of_sortkey() const
{
key_map map(0);
if (Item_field *item= get_field_arg())
{
Field *f= item->field;
for (uint i= f->table->s->keys; i < f->table->s->total_keys; i++)
if (f->table->s->key_info[i].algorithm == HA_KEY_ALG_VECTOR &&
f->key_start.is_set(i))
map.set_bit(i);
}
return map;
}
double Item_func_vec_distance::val_real()
{

View File

@ -22,7 +22,14 @@
class Item_func_vec_distance: public Item_real_func
{
protected:
Item_field *get_field_arg() const
{
if (args[0]->type() == Item::FIELD_ITEM && args[1]->const_item())
return (Item_field*)(args[0]);
if (args[1]->type() == Item::FIELD_ITEM && args[0]->const_item())
return (Item_field*)(args[1]);
return NULL;
}
bool check_arguments() const override
{
return check_argument_types_or_binary(NULL, 0, arg_count);
@ -41,6 +48,7 @@ public:
static LEX_CSTRING name= {STRING_WITH_LEN("vec_distance") };
return name;
}
key_map part_of_sortkey() const override;
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_func_vec_distance>(thd, this); }
};

View File

@ -725,6 +725,7 @@ SYMBOL symbols[] = {
{ "VIEW", SYM(VIEW_SYM)},
{ "VIRTUAL", SYM(VIRTUAL_SYM)},
{ "VISIBLE", SYM(VISIBLE_SYM)},
{ "VECTOR", SYM(VECTOR_SYM)},
{ "VERSIONING", SYM(VERSIONING_SYM)},
{ "WAIT", SYM(WAIT_SYM)},
{ "WARNINGS", SYM(WARNINGS)},

View File

@ -4964,7 +4964,7 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd)
if (TRUE)
{
DBUG_PRINT("info",("Creating group key in temporary table"));
share->keys=1;
share->total_keys= share->keys= 1;
table->key_info= share->key_info= keyinfo;
keyinfo->key_part=key_part_info;
keyinfo->flags= HA_NOSAME | (using_unique_constraint ? HA_UNIQUE_HASH : 0);

View File

@ -56,6 +56,7 @@
#include "sql_table.h" // build_table_filename
#include "datadict.h" // dd_frm_is_view()
#include "rpl_rli.h" // rpl_group_info
#include "vector_mhnsw.h"
#ifdef _WIN32
#include <io.h>
#endif
@ -9833,3 +9834,114 @@ int dynamic_column_error_message(enum_dyncol_func_result rc)
/**
@} (end of group Data_Dictionary)
*/
int TABLE::hlindex_open(uint nr)
{
DBUG_ASSERT(s->hlindexes() == 1);
DBUG_ASSERT(nr == s->keys);
if (!hlindex)
{
mysql_mutex_lock(&s->LOCK_share);
if (!s->hlindex)
{
mysql_mutex_unlock(&s->LOCK_share);
TABLE_SHARE *share;
char *path= NULL;
size_t path_len= s->normalized_path.length + HLINDEX_BUF_LEN;
share= (TABLE_SHARE*)alloc_root(&s->mem_root, sizeof(*share));
path= (char*)alloc_root(&s->mem_root, path_len);
if (!share || !path)
return 1;
my_snprintf(path, path_len, "%s" HLINDEX_TEMPLATE,
s->normalized_path.str, nr);
init_tmp_table_share(in_use, share, s->db.str, 0, s->table_name.str,
path, false);
share->db_plugin= s->db_plugin;
if (share->init_from_sql_statement_string(in_use, false,
mhnsw_hlindex_table.str, mhnsw_hlindex_table.length))
{
free_table_share(share);
return 1;
}
mysql_mutex_lock(&s->LOCK_share);
if (!s->hlindex)
{
s->hlindex= share;
mysql_mutex_unlock(&s->LOCK_share);
}
else
{
mysql_mutex_unlock(&s->LOCK_share);
free_table_share(share);
}
}
else
mysql_mutex_unlock(&s->LOCK_share);
TABLE *table= (TABLE*)alloc_root(&mem_root, sizeof(*table));
if (!table ||
open_table_from_share(in_use, s->hlindex, &empty_clex_str, db_stat, 0,
in_use->open_options, table, 0))
return 1;
hlindex= table;
}
hlindex->in_use= in_use; // mark in use for this query
hlindex->use_all_columns();
return hlindex->file->ha_external_lock(in_use, F_WRLCK);
}
int TABLE::open_hlindexes_for_write()
{
DBUG_ASSERT(s->hlindexes() <= 1);
for (uint i= s->keys; i < s->total_keys; i++)
{
KEY *key= s->key_info + i;
if (hlindex)
hlindex->in_use= 0;
for (uint j=0; j < key->usable_key_parts; j++)
if (bitmap_is_set(write_set, key->key_part[j].fieldnr - 1))
{
if (hlindex_open(i))
return 1;
break;
}
}
return 0;
}
int TABLE::reset_hlindexes()
{
if (hlindex)
hlindex->file->ha_external_unlock(in_use);
return 0;
}
int TABLE::update_hlindexes()
{
DBUG_ASSERT(s->hlindexes() == (hlindex != NULL));
if (hlindex && hlindex->in_use)
if (int err= mhnsw_insert(this, key_info + s->keys))
return err;
return 0;
}
int TABLE::hlindex_read_first(uint nr, Item *item, ulonglong limit)
{
DBUG_ASSERT(s->hlindexes() == 1);
DBUG_ASSERT(nr == s->keys);
if (hlindex_open(nr))
return HA_ERR_CRASHED;
DBUG_ASSERT(hlindex->in_use == in_use);
return mhnsw_read_first(this, item, limit);
}
int TABLE::hlindex_read_next()
{
return mhnsw_read_next(this);
}

View File

@ -462,8 +462,8 @@ private:
class Key :public Sql_alloc, public DDL_options {
public:
enum Keytype { PRIMARY, UNIQUE, MULTIPLE, FULLTEXT, SPATIAL, FOREIGN_KEY,
IGNORE_KEY};
enum Keytype { PRIMARY, UNIQUE, MULTIPLE, FULLTEXT, SPATIAL, VECTOR,
FOREIGN_KEY, IGNORE_KEY};
enum Keytype type;
KEY_CREATE_INFO key_create_info;
List<Key_part_spec> columns;

View File

@ -668,6 +668,7 @@ mysql_ha_fix_cond_and_key(SQL_HANDLER *handler,
if (c_key->algorithm == HA_KEY_ALG_RTREE ||
c_key->algorithm == HA_KEY_ALG_FULLTEXT ||
c_key->algorithm == HA_KEY_ALG_VECTOR ||
(ha_rkey_mode != HA_READ_KEY_EXACT &&
(table->key_info[handler->keyno].index_flags &
(HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE)) == 0))

View File

@ -933,6 +933,13 @@ bool mysql_insert(THD *thd, TABLE_LIST *table_list,
table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
if (duplic == DUP_UPDATE)
table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE);
thd->abort_on_warning= !ignore && thd->is_strict_mode();
table->reset_default_fields();
table->prepare_triggers_for_insert_stmt_or_event();
table->mark_columns_needed_for_insert();
/*
let's *try* to start bulk inserts. It won't necessary
start them as values_list.elements should be greater than
@ -960,7 +967,8 @@ bool mysql_insert(THD *thd, TABLE_LIST *table_list,
goto abort;
}
}
table->file->prepare_for_insert(create_lookup_handler);
if (table->file->prepare_for_insert(create_lookup_handler))
goto abort;
/**
This is a simple check for the case when the table has a trigger
that reads from it, or when the statement invokes a stored function
@ -978,12 +986,6 @@ bool mysql_insert(THD *thd, TABLE_LIST *table_list,
table->file->ha_reset_copy_info();
}
thd->abort_on_warning= !ignore && thd->is_strict_mode();
table->reset_default_fields();
table->prepare_triggers_for_insert_stmt_or_event();
table->mark_columns_needed_for_insert();
if (fields.elements || !value_count || table_list->view != 0)
{
if (table->triggers &&

View File

@ -237,6 +237,7 @@ static int join_read_always_key(JOIN_TAB *tab);
static int join_read_last_key(JOIN_TAB *tab);
static int join_no_more_records(READ_RECORD *info);
static int join_read_next(READ_RECORD *info);
static int join_hlindex_read_next(READ_RECORD *info);
static int join_init_quick_read_record(JOIN_TAB *tab);
static quick_select_return test_if_quick_select(JOIN_TAB *tab);
static int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
@ -5645,7 +5646,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
table->intersect_keys.clear_all();
table->reginfo.join_tab=s;
table->reginfo.not_exists_optimize=0;
bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
bzero(table->const_key_parts, sizeof(key_part_map)*table->s->total_keys);
all_table_map|= table->map;
s->preread_init_done= FALSE;
s->join=join;
@ -7248,7 +7249,8 @@ static bool add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
if (!(form->keys_in_use_for_query.is_set(key)))
continue;
if (form->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT ||
form->key_info[key].algorithm == HA_KEY_ALG_RTREE)
form->key_info[key].algorithm == HA_KEY_ALG_RTREE ||
form->key_info[key].algorithm == HA_KEY_ALG_VECTOR)
continue;
KEY *keyinfo= form->key_info+key;
@ -22170,7 +22172,7 @@ bool Create_tmp_table::finalize(THD *thd,
DBUG_PRINT("info",("Creating group key in temporary table"));
table->group= m_group; /* Table is grouped by key */
param->group_buff= m_group_buff;
share->keys=1;
share->total_keys= share->keys= 1;
table->key_info= table->s->key_info= keyinfo;
table->keys_in_use_for_query.set_bit(0);
share->keys_in_use.set_bit(0);
@ -22292,7 +22294,7 @@ bool Create_tmp_table::finalize(THD *thd,
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts;
table->distinct= 1;
share->keys= 1;
share->total_keys= share->keys= 1;
share->ext_key_parts= share->key_parts= keyinfo->ext_key_parts;
if (!(m_key_part_info= (KEY_PART_INFO*)
alloc_root(&table->mem_root,
@ -22800,7 +22802,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *org_keyinfo,
DBUG_RETURN(1);
}
/* Can't create a key; Make a unique constraint instead of a key */
share->keys--;
share->total_keys= --share->keys;
share->key_parts-= keyinfo->user_defined_key_parts;
share->ext_key_parts-= keyinfo->ext_key_parts;
use_unique= true;
@ -25137,14 +25139,30 @@ join_read_first(JOIN_TAB *tab)
!table->covering_keys.is_set(tab->index) ||
table->file->keyread == tab->index);
tab->table->status=0;
tab->read_record.read_record_func= join_read_next;
tab->read_record.table=table;
if (!table->file->inited)
error= table->file->ha_index_init(tab->index, tab->sorted);
if (likely(!error))
error= table->file->prepare_index_scan();
if (unlikely(error) ||
unlikely(error= tab->table->file->ha_index_first(tab->table->record[0])))
if (tab->index >= table->s->keys)
{
DBUG_ASSERT(tab->index < table->s->total_keys);
DBUG_ASSERT(tab->index == table->s->keys);
DBUG_ASSERT(tab->sorted);
DBUG_ASSERT(tab->join->order);
DBUG_ASSERT(tab->join->order->next == NULL);
DBUG_ASSERT(tab->join->select_limit < HA_POS_ERROR);
tab->read_record.read_record_func= join_hlindex_read_next;
error= tab->table->hlindex_read_first(tab->index, *tab->join->order->item,
tab->join->select_limit);
}
else
{
tab->read_record.read_record_func= join_read_next;
if (!table->file->inited)
error= table->file->ha_index_init(tab->index, tab->sorted);
if (!error)
error= table->file->prepare_index_scan();
if (!error)
error= tab->table->file->ha_index_first(tab->table->record[0]);
}
if (error)
{
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
report_error(table, error);
@ -25165,6 +25183,14 @@ join_read_next(READ_RECORD *info)
}
static int join_hlindex_read_next(READ_RECORD *info)
{
if (int error= info->table->hlindex_read_next())
return report_error(info->table, error);
return 0;
}
static int
join_read_last(JOIN_TAB *tab)
{
@ -26449,11 +26475,20 @@ static int test_if_order_by_key(JOIN *join, ORDER *order, TABLE *table,
DBUG_ENTER("test_if_order_by_key");
if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
idx < table->s->keys &&
table->key_info[idx].ext_key_part_map && pk != MAX_KEY && pk != idx)
{
have_pk_suffix= true;
}
if ((*order->item)->real_item()->type() != Item::FIELD_ITEM)
{
if (order->next)
DBUG_RETURN(0);
DBUG_RETURN((*order->item)->part_of_sortkey().is_set(idx));
}
for (; order ; order=order->next, const_key_parts>>=1)
{
Item_field *item_field= ((Item_field*) (*order->item)->real_item());
@ -26792,15 +26827,13 @@ find_field_in_item_list (Field *field, void *data)
static
void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table,
Item_field *item_field,
key_map *col_keys)
key_map *col_keys, Item *item)
{
col_keys->clear_all();
col_keys->merge(item_field->field->part_of_sortkey);
if (!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP))
if (item->type() != Item::FIELD_ITEM ||
!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP))
return;
Item_field *item_field= (Item_field*)item;
Item_equal *item_eq= NULL;
if (item_field->item_equal)
@ -26954,12 +26987,7 @@ bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
/* Find indexes that cover all ORDER/GROUP BY fields */
for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
{
Item *item= (*tmp_order->item)->real_item();
if (item->type() != Item::FIELD_ITEM)
{
usable_keys->clear_all();
return true; /* No suitable keys */
}
key_map col_keys= (*tmp_order->item)->part_of_sortkey();
/*
Take multiple-equalities into account. Suppose we have
@ -26977,9 +27005,8 @@ bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
And we compute an intersection of these sets to find set of indexes that
cover all ORDER BY components.
*/
key_map col_keys;
compute_part_of_sort_key_for_equals(join, table, (Item_field*)item,
&col_keys);
compute_part_of_sort_key_for_equals(join, table, &col_keys,
(*tmp_order->item)->real_item());
usable_keys->intersect(col_keys);
if (usable_keys->is_clear_all())
return true; // No usable keys
@ -32664,7 +32691,7 @@ test_if_cheaper_ordering(bool in_join_optimizer,
read_time= DBL_MAX;
Json_writer_array possible_keys(thd,"possible_keys");
for (nr=0; nr < table->s->keys ; nr++)
for (nr=0; nr < table->s->total_keys ; nr++)
{
int direction;
ha_rows select_limit= select_limit_arg;
@ -32702,7 +32729,6 @@ test_if_cheaper_ordering(bool in_join_optimizer,
(ref_key < 0 && (group || table->force_index)))
{
double rec_per_key;
KEY *keyinfo= table->key_info+nr;
if (group)
{
/*
@ -32711,6 +32737,7 @@ test_if_cheaper_ordering(bool in_join_optimizer,
key (e.g. as in Innodb).
See Bug #28591 for details.
*/
KEY *keyinfo= table->key_info+nr;
uint used_index_parts= keyinfo->user_defined_key_parts;
uint used_pk_parts= 0;
if (used_key_parts > used_index_parts)
@ -32821,7 +32848,7 @@ test_if_cheaper_ordering(bool in_join_optimizer,
if (saved_best_key_parts)
*saved_best_key_parts= used_key_parts;
if (new_used_key_parts)
*new_used_key_parts= keyinfo->user_defined_key_parts;
*new_used_key_parts= table->s->key_info[nr].user_defined_key_parts;
best_key_direction= direction;
best_select_limit= estimated_rows_to_scan;
}

View File

@ -2422,7 +2422,7 @@ int show_create_table_ex(THD *thd, TABLE_LIST *table_list,
key_info= table->s->key_info;
primary_key= share->primary_key;
for (uint i=0 ; i < share->keys ; i++,key_info++)
for (uint i=0 ; i < share->total_keys ; i++,key_info++)
{
if (key_info->flags & HA_INVISIBLE_KEY)
continue;
@ -2445,6 +2445,8 @@ int show_create_table_ex(THD *thd, TABLE_LIST *table_list,
packet->append(STRING_WITH_LEN("FULLTEXT KEY "));
else if (key_info->algorithm == HA_KEY_ALG_RTREE)
packet->append(STRING_WITH_LEN("SPATIAL KEY "));
else if (key_info->algorithm == HA_KEY_ALG_VECTOR)
packet->append(STRING_WITH_LEN("VECTOR KEY "));
else
packet->append(STRING_WITH_LEN("KEY "));
@ -2470,6 +2472,7 @@ int show_create_table_ex(THD *thd, TABLE_LIST *table_list,
append_identifier(thd, packet, &key_part->field->field_name);
if (key_part->field &&
key_part->length != table->field[key_part->fieldnr-1]->key_length() &&
key_info->algorithm != HA_KEY_ALG_VECTOR &&
key_info->algorithm != HA_KEY_ALG_RTREE &&
key_info->algorithm != HA_KEY_ALG_FULLTEXT)
{
@ -7309,7 +7312,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, TABLE *table,
bool need_column_checks= !get_schema_privileges_for_show(thd, tables,
TABLE_ACLS, false);
for (uint i=0 ; i < show_table->s->keys ; i++,key_info++)
for (uint i=0 ; i < show_table->s->total_keys ; i++,key_info++)
{
if ((key_info->flags & HA_INVISIBLE_KEY) &&
!DBUG_IF("test_invisible_index"))
@ -7365,6 +7368,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, TABLE *table,
? "D" : "A", 1, cs);
table->field[8]->set_notnull();
}
if (i < show_table->s->keys)
{
/*
We have to use table key information to get the key statistics
@ -7382,6 +7386,12 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, TABLE *table,
const char *tmp= show_table->file->index_type(i);
table->field[13]->store(tmp, strlen(tmp), cs);
}
else
{
/* there are no others at the moment */
DBUG_ASSERT(key_info->algorithm == HA_KEY_ALG_VECTOR);
table->field[13]->store(STRING_WITH_LEN("VECTOR"), cs);
}
}
if (key_info->algorithm != HA_KEY_ALG_FULLTEXT &&
(key_part->field &&
@ -7395,7 +7405,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, TABLE *table,
uint flags= key_part->field ? key_part->field->flags : 0;
const char *pos= flags & NOT_NULL_FLAG ? "" : "YES";
table->field[12]->store(pos, strlen(pos), cs);
if (!show_table->s->keys_in_use.is_set(i))
if (i < show_table->s->keys && !show_table->s->keys_in_use.is_set(i))
table->field[14]->store(STRING_WITH_LEN("disabled"), cs);
DBUG_ASSERT(MY_TEST(key_info->flags & HA_USES_COMMENT) ==
(key_info->comment.length > 0));

View File

@ -1314,7 +1314,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables,
TABLE_LIST *table;
char path[FN_REFLEN + 1];
LEX_CSTRING alias= null_clex_str;
LEX_CUSTRING version;
LEX_CUSTRING version= {0, 0};
LEX_CSTRING partition_engine_name= null_clex_str;
StringBuffer<160> unknown_tables(system_charset_info);
DDL_LOG_STATE local_ddl_log_state;
@ -1404,6 +1404,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables,
Table_type table_type;
size_t path_length= 0;
char *path_end= 0;
uint drop_index_from= 0, drop_index_to=0;
error= 0;
DBUG_PRINT("table", ("table_l: '%s'.'%s' table: %p s: %p",
@ -1545,6 +1547,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables,
version= thd->strmake_lex_custring(share->tabledef_version);
if (plugin_ref pp= IF_PARTITIONING(share->default_part_plugin, NULL))
partition_engine_name= thd->strmake_lex_cstring(*plugin_name(pp));
drop_index_from= share->keys;
drop_index_to= share->total_keys;
tdc_release_share(share);
}
else
@ -1620,14 +1624,12 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables,
}
#endif
error= -1;
if (thd->locked_tables_mode == LTM_LOCK_TABLES ||
thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES)
{
if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED))
{
error= -1;
goto err;
}
close_all_tables_for_name(thd, table->table->s,
HA_EXTRA_PREPARE_FOR_DROP, NULL);
table->table= 0;
@ -1652,13 +1654,24 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables,
else
res= ddl_log_drop_table(ddl_log_state, hton, &cpath, &db, &table_name);
if (res)
{
error= -1;
goto err;
if (path + sizeof(path) > path_end + HLINDEX_BUF_LEN)
{
for (uint i= drop_index_from; i < drop_index_to; i++)
{
my_snprintf(path_end, HLINDEX_BUF_LEN, HLINDEX_TEMPLATE, i);
int err= ha_delete_table(thd, hton, path, &db, &table_name, enoent_warning);
set_if_bigger(error, err);
}
*path_end= 0;
}
debug_crash_here("ddl_log_drop_before_delete_table");
error= ha_delete_table(thd, hton, path, &db, &table_name, enoent_warning);
{
int err= ha_delete_table(thd, hton, path, &db, &table_name, enoent_warning);
set_if_bigger(error, err);
}
debug_crash_here("ddl_log_drop_after_delete_table");
if (!error)
@ -2131,6 +2144,7 @@ bool quick_rm_table(THD *thd, handlerton *base, const LEX_CSTRING *db,
- LONG UNIQUE keys
- Normal keys
- Fulltext keys
- Vector keys
This will make checking for duplicated keys faster and ensure that
PRIMARY keys are prioritized.
@ -2167,6 +2181,10 @@ static int sort_keys(KEY *a, KEY *b)
(b_flags & HA_KEY_HAS_PART_KEY_SEG));
}
/* must be very last */
return_if_nonzero((a->algorithm == HA_KEY_ALG_VECTOR) -
(b->algorithm == HA_KEY_ALG_VECTOR));
return_if_nonzero((a->algorithm == HA_KEY_ALG_FULLTEXT) -
(b->algorithm == HA_KEY_ALG_FULLTEXT));
@ -2697,6 +2715,34 @@ Type_handler_blob_common::Key_part_spec_init_ft(Key_part_spec *part,
}
bool Type_handler_string::Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def)
const
{
part->length= 0;
return def.charset != &my_charset_bin;
}
bool Type_handler_varchar::Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def)
const
{
part->length= 0;
return def.charset != &my_charset_bin;
}
bool
Type_handler_blob_common::Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def)
const
{
part->length= 1;
return def.charset != &my_charset_bin;
}
static bool
key_add_part_check_null(const handler *file, KEY *key_info,
const Column_definition *sql_field,
@ -3196,6 +3242,10 @@ mysql_prepare_create_table_finalize(THD *thd, HA_CREATE_INFO *create_info,
case Key::FOREIGN_KEY:
key_number--; // Skip this key
continue;
case Key::VECTOR:
if (key->key_create_info.algorithm == HA_KEY_ALG_UNDEF)
key->key_create_info.algorithm= HA_KEY_ALG_VECTOR;
break;
case Key::IGNORE_KEY:
DBUG_ASSERT(0);
break;
@ -3318,6 +3368,27 @@ mysql_prepare_create_table_finalize(THD *thd, HA_CREATE_INFO *create_info,
const Type_handler *field_type= sql_field->type_handler();
switch(key->type) {
case Key::VECTOR:
if (field_type->Key_part_spec_init_vector(column, *sql_field))
{
my_error(ER_WRONG_ARGUMENTS, MYF(0), "VECTOR INDEX");
DBUG_RETURN(TRUE);
}
if (sql_field->check_vcol_for_key(thd))
DBUG_RETURN(TRUE);
if (!(sql_field->flags & NOT_NULL_FLAG))
{
my_error(ER_INDEX_CANNOT_HAVE_NULL, MYF(0), "VECTOR");
DBUG_RETURN(TRUE);
}
if (create_info->tmp_table())
{
my_error(ER_NO_INDEX_ON_TEMPORARY, MYF(0), "VECTOR",
file->table_type());
DBUG_RETURN(TRUE);
}
break;
case Key::FULLTEXT:
if (field_type->Key_part_spec_init_ft(column, *sql_field) ||
(ft_key_charset && sql_field->charset != ft_key_charset))
@ -3649,6 +3720,13 @@ without_overlaps_err:
}
create_info->null_bits= null_fields;
if (*key_count >= 2 &&
(*key_info_buffer)[*key_count-2].algorithm == HA_KEY_ALG_VECTOR)
{
my_error(ER_NOT_SUPPORTED_YET, MYF(0), "multiple VECTOR indexes");
DBUG_RETURN(TRUE);
}
/* Check fields. */
it.rewind();
while ((sql_field=it++))
@ -8912,6 +8990,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
}
else if (key_info->algorithm == HA_KEY_ALG_FULLTEXT)
key_type= Key::FULLTEXT;
else if (key_info->algorithm == HA_KEY_ALG_VECTOR)
key_type= Key::VECTOR;
else
key_type= Key::MULTIPLE;

View File

@ -4239,6 +4239,11 @@ public:
const handler *file) const;
virtual bool Key_part_spec_init_spatial(Key_part_spec *part,
const Column_definition &def) const;
virtual bool Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def) const
{
return true; // Error
}
virtual bool Key_part_spec_init_ft(Key_part_spec *part,
const Column_definition &def) const
{
@ -7223,6 +7228,8 @@ public:
ulonglong table_flags) const override;
bool Key_part_spec_init_ft(Key_part_spec *part,
const Column_definition &def) const override;
bool Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def) const override;
Field *make_table_field(MEM_ROOT *root,
const LEX_CSTRING *name,
const Record_addr &addr,
@ -7320,6 +7327,8 @@ public:
ulonglong table_flags) const override;
bool Key_part_spec_init_ft(Key_part_spec *part,
const Column_definition &def) const override;
bool Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def) const override;
Field *make_table_field(MEM_ROOT *root,
const LEX_CSTRING *name,
const Record_addr &addr,
@ -7422,6 +7431,8 @@ public:
uchar *buff) const override;
bool Key_part_spec_init_ft(Key_part_spec *part,
const Column_definition &def) const override;
bool Key_part_spec_init_vector(Key_part_spec *part,
const Column_definition &def) const override;
bool Key_part_spec_init_primary(Key_part_spec *part,
const Column_definition &def,
const handler *file) const override;

View File

@ -712,6 +712,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
%token <kwd> VARIANCE_SYM
%token <kwd> VAR_SAMP_SYM
%token <kwd> VARYING /* SQL-2003-R */
%token <kwd> VECTOR_SYM
%token <kwd> WHEN_SYM /* SQL-2003-R */
%token <kwd> WHERE /* SQL-2003-R */
%token <kwd> WHILE_SYM
@ -1663,7 +1664,7 @@ rule:
option_type opt_var_type opt_var_ident_type
%type <key_type>
constraint_key_type fulltext spatial
constraint_key_type fulltext spatial_or_vector
%type <key_alg>
btree_or_rtree opt_key_algorithm_clause opt_USING_key_algorithm
@ -2640,7 +2641,7 @@ create:
{
Lex->pop_select(); //main select
}
| create_or_replace spatial INDEX_SYM
| create_or_replace spatial_or_vector INDEX_SYM
{
if (Lex->main_select_push())
MYSQL_YYABORT;
@ -6010,7 +6011,7 @@ key_def:
MYSQL_YYABORT;
}
'(' key_list ')' fulltext_key_options { }
| spatial opt_key_or_index opt_if_not_exists opt_ident
| spatial_or_vector opt_key_or_index opt_if_not_exists opt_ident
{
Lex->option_list= NULL;
if (unlikely(Lex->add_key($1, &$4, HA_KEY_ALG_UNDEF, $3)))
@ -7093,11 +7094,12 @@ fulltext:
FULLTEXT_SYM { $$= Key::FULLTEXT;}
;
spatial:
spatial_or_vector:
SPATIAL_SYM
{
$$= Key::SPATIAL;
}
| VECTOR_SYM { $$= Key::VECTOR;}
;
normal_key_options:
@ -16790,6 +16792,7 @@ reserved_keyword_udt_not_param_type:
| VARIANCE_SYM
| VARYING
| VAR_SAMP_SYM
| VECTOR_SYM
| WHEN_SYM
| WHERE
| WHILE_SYM

View File

@ -504,6 +504,9 @@ void TABLE_SHARE::destroy()
}
delete sequence;
if (hlindex)
hlindex->destroy();
/* The mutexes are initialized only for shares that are part of the TDC */
if (tmp_table == NO_TMP_TABLE)
{
@ -518,7 +521,7 @@ void TABLE_SHARE::destroy()
/* Release fulltext parsers */
info_it= key_info;
for (idx= keys; idx; idx--, info_it++)
for (idx= total_keys; idx; idx--, info_it++)
{
if (info_it->flags & HA_USES_PARSER)
{
@ -794,6 +797,7 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
KEY_PART_INFO *key_part= NULL;
ulong *rec_per_key= NULL;
DBUG_ASSERT(keyinfo == first_keyinfo);
DBUG_ASSERT(share->keys == 0);
if (!keys)
{
@ -876,7 +880,8 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
{
if (strpos + (new_frm_ver >= 1 ? 9 : 7) >= frm_image_end)
return 1;
if (keyinfo->algorithm != HA_KEY_ALG_LONG_HASH)
if (keyinfo->algorithm != HA_KEY_ALG_LONG_HASH &&
keyinfo->algorithm != HA_KEY_ALG_VECTOR)
rec_per_key++;
key_part->fieldnr= (uint16) (uint2korr(strpos) & FIELD_NR_MASK);
key_part->offset= (uint) uint2korr(strpos+2)-1;
@ -918,6 +923,9 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
share->ext_key_parts++;
}
if (keyinfo->algorithm != HA_KEY_ALG_VECTOR)
share->keys++;
if (i && share->use_ext_keys && !((keyinfo->flags & HA_NOSAME)))
{
/* Reserve place for extended key parts */
@ -956,7 +964,7 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
(keyinfo->comment.length > 0));
}
share->keys= keys; // do it *after* all key_info's are initialized
share->total_keys= keys; // do it *after* all key_info's are initialized
return 0;
}
@ -1488,6 +1496,11 @@ key_map TABLE_SHARE::usable_indexes(THD *thd)
{
key_map usable_indexes(keys_in_use);
usable_indexes.subtract(ignored_indexes);
/* take into account keys that the engine knows nothing about */
for (uint i= keys; i < total_keys; i++)
usable_indexes.set_bit(i);
return usable_indexes;
}
@ -4159,19 +4172,19 @@ bool copy_keys_from_share(TABLE *outparam, MEM_ROOT *root)
KEY *key_info, *key_info_end;
KEY_PART_INFO *key_part;
if (!multi_alloc_root(root, &key_info, share->keys*sizeof(KEY),
if (!multi_alloc_root(root, &key_info, share->total_keys*sizeof(KEY),
&key_part, share->ext_key_parts*sizeof(KEY_PART_INFO),
NullS))
return 1;
outparam->key_info= key_info;
memcpy(key_info, share->key_info, sizeof(*key_info)*share->keys);
memcpy(key_info, share->key_info, sizeof(*key_info)*share->total_keys);
memcpy(key_part, key_info->key_part,
sizeof(*key_part)*share->ext_key_parts);
my_ptrdiff_t adjust_ptrs= PTR_BYTE_DIFF(key_part, key_info->key_part);
for (key_info_end= key_info + share->keys ;
for (key_info_end= key_info + share->total_keys ;
key_info < key_info_end ;
key_info++)
{
@ -4369,7 +4382,7 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share,
&outparam->opt_range,
share->keys * sizeof(TABLE::OPT_RANGE),
&outparam->const_key_parts,
share->keys * sizeof(key_part_map),
share->total_keys * sizeof(key_part_map),
NullS))
goto err;
@ -4780,6 +4793,8 @@ int closefrm(TABLE *table)
DBUG_ENTER("closefrm");
DBUG_PRINT("enter", ("table: %p", table));
if (table->hlindex)
closefrm(table->hlindex);
if (table->db_stat)
error=table->file->ha_close();
table->alias.free();
@ -8414,22 +8429,22 @@ bool TABLE::alloc_keys(uint key_count)
DBUG_ASSERT(s->tmp_table == INTERNAL_TMP_TABLE);
if (!multi_alloc_root(&mem_root,
&new_key_info, sizeof(*key_info)*(s->keys+key_count),
&new_key_info, sizeof(*key_info)*(s->total_keys+key_count),
&new_const_key_parts,
sizeof(*new_const_key_parts)*(s->keys+key_count),
sizeof(*new_const_key_parts)*(s->total_keys+key_count),
NullS))
return TRUE;
if (s->keys)
if (s->total_keys)
{
memmove(new_key_info, s->key_info, sizeof(*key_info) * s->keys);
memmove(new_key_info, s->key_info, sizeof(*key_info) * s->total_keys);
memmove(new_const_key_parts, const_key_parts,
s->keys * sizeof(const_key_parts));
s->total_keys * sizeof(const_key_parts));
}
s->key_info= key_info= new_key_info;
const_key_parts= new_const_key_parts;
bzero((char*) (const_key_parts + s->keys),
bzero((char*) (const_key_parts + s->total_keys),
sizeof(*const_key_parts) * key_count);
max_keys= s->keys+key_count;
max_keys= s->total_keys+key_count;
return FALSE;
}
@ -8657,6 +8672,7 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
set_if_bigger(s->max_key_length, keyinfo->key_length);
s->keys++;
s->total_keys++;
s->ext_key_parts+= keyinfo->ext_key_parts;
s->key_parts+= keyinfo->user_defined_key_parts;
return FALSE;
@ -8715,7 +8731,7 @@ void TABLE::use_index(int key_to_save, key_map *map_to_update)
}
}
*map_to_update= new_bitmap;
s->keys= saved_keys;
s->total_keys= s->keys= saved_keys;
s->key_parts= s->ext_key_parts= key_parts;
}
@ -9073,7 +9089,7 @@ void init_mdl_requests(TABLE_LIST *table_list)
bool TABLE::update_const_key_parts(COND *conds)
{
bzero((char*) const_key_parts, sizeof(key_part_map) * s->keys);
bzero((char*) const_key_parts, sizeof(key_part_map) * s->total_keys);
if (conds == NULL)
return FALSE;
@ -10898,7 +10914,7 @@ inline void TABLE::initialize_opt_range_structures()
{
TRASH_ALLOC((void*)&opt_range_keys, sizeof(opt_range_keys));
TRASH_ALLOC((void*)opt_range, s->keys * sizeof(*opt_range));
TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts));
TRASH_ALLOC(const_key_parts, s->total_keys * sizeof(*const_key_parts));
}

View File

@ -100,6 +100,9 @@ typedef ulonglong nested_join_map;
#define TMP_TABLE_KEY_EXTRA 8
#define ROCKSDB_DIRECTORY_NAME "#rocksdb"
#define HLINDEX_TEMPLATE "#i#%02u"
#define HLINDEX_BUF_LEN 16 /* with extension .ibd/.MYI/etc and safety margin */
/**
Enumerate possible types of a table from re-execution
standpoint.
@ -739,7 +742,8 @@ struct TABLE_SHARE
KEY *key_info; /* data of keys in database */
Virtual_column_info **check_constraints;
uint *blob_field; /* Index to blobs in Field arrray*/
LEX_CUSTRING vcol_defs; /* definitions of generated columns */
LEX_CUSTRING vcol_defs; /* definitions of generated columns */
TABLE_SHARE *hlindex;
/*
EITS statistics data from the last time the table was opened or ANALYZE
@ -835,7 +839,12 @@ struct TABLE_SHARE
uint table_check_constraints, field_check_constraints;
uint rec_buff_length; /* Size of table->record[] buffer */
uint keys, key_parts;
uint keys; /* Number of KEY's for the engine */
uint total_keys; /* total number of KEY's, including
high level indexes */
uint hlindexes() { return total_keys - keys; }
uint key_parts;
uint ext_key_parts; /* Total number of key parts in extended keys */
uint max_key_length, max_unique_length;
@ -1358,12 +1367,16 @@ public:
/* Tables used in DEFAULT and CHECK CONSTRAINT (normally sequence tables) */
TABLE_LIST *internal_tables;
TABLE *hlindex;
/*
Not-null for temporary tables only. Non-null values means this table is
used to compute GROUP BY, it has a unique of GROUP BY columns.
(set by create_tmp_table)
*/
ORDER *group;
union {
ORDER *group; /* only for temporary tables */
void *context; /* only for hlindexes */
};
String alias; /* alias or table name */
uchar *null_flags;
MY_BITMAP def_read_set, def_write_set, tmp_set;
@ -1776,6 +1789,14 @@ public:
void reset_default_fields();
inline ha_rows stat_records() { return used_stat_records; }
int hlindex_open(uint nr);
int hlindex_read_first(uint nr, Item *item, ulonglong limit);
int hlindex_read_next();
int open_hlindexes_for_write();
int update_hlindexes();
int reset_hlindexes();
void prepare_triggers_for_insert_stmt_or_event();
bool prepare_triggers_for_delete_stmt_or_event();
bool prepare_triggers_for_update_stmt_or_event();

209
sql/vector_mhnsw.cc Normal file
View File

@ -0,0 +1,209 @@
/*
Copyright (c) 2024, MariaDB plc
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
#include <my_global.h>
#include "vector_mhnsw.h"
#include "field.h"
#include "item.h"
#include "sql_queue.h"
#include <scope.h>
const LEX_CSTRING mhnsw_hlindex_table={STRING_WITH_LEN("\
CREATE TABLE i ( \
src varbinary(255) not null, \
dst varbinary(255) not null, \
index (src)) \
")};
static void store_ref(TABLE *t, handler *h, uint n)
{
t->hlindex->field[n]->store((char*)h->ref, h->ref_length, &my_charset_bin);
}
int mhnsw_insert(TABLE *table, KEY *keyinfo)
{
TABLE *graph= table->hlindex;
MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set);
Field *field= keyinfo->key_part->field;
String buf, *res= field->val_str(&buf);
handler *h= table->file;
int err= 0;
dbug_tmp_restore_column_map(&table->read_set, old_map);
/* metadata are checked on open */
DBUG_ASSERT(graph);
DBUG_ASSERT(keyinfo->algorithm == HA_KEY_ALG_VECTOR);
DBUG_ASSERT(keyinfo->usable_key_parts == 1);
DBUG_ASSERT(field->binary());
DBUG_ASSERT(field->cmp_type() == STRING_RESULT);
DBUG_ASSERT(res); // ER_INDEX_CANNOT_HAVE_NULL
DBUG_ASSERT(h->ref_length <= graph->field[0]->field_length);
DBUG_ASSERT(h->ref_length <= graph->field[1]->field_length);
if (res->length() == 0 || res->length() % 4)
return 1;
// let's do every node to every node
h->position(table->record[0]);
graph->field[0]->store(1);
store_ref(table, h, 0);
if (h->lookup_handler->ha_rnd_init(1))
return 1;
while (! ((err= h->lookup_handler->ha_rnd_next(h->lookup_buffer))))
{
h->lookup_handler->position(h->lookup_buffer);
if (graph->field[0]->cmp(h->lookup_handler->ref) == 0)
continue;
store_ref(table, h->lookup_handler, 1);
if ((err= graph->file->ha_write_row(graph->record[0])))
break;
}
h->lookup_handler->ha_rnd_end();
return err == HA_ERR_END_OF_FILE ? 0 : err;
}
static int cmp_float(void *, float *a, float *b)
{
return *a < *b ? -1 : *a == *b ? 0 : 1;
}
struct Node
{
float distance;
uchar ref[1000];
};
int mhnsw_read_first(TABLE *table, Item *dist, ulonglong limit)
{
TABLE *graph= table->hlindex;
Queue<Node, float> todo, result;
Node *cur;
String *str, strbuf;
const size_t ref_length= table->file->ref_length;
const size_t element_size= ref_length + sizeof(float);
uchar *key= (uchar*)alloca(ref_length + 32);
Hash_set<Node> visited(PSI_INSTRUMENT_MEM, &my_charset_bin, limit,
sizeof(float), ref_length, 0, 0, HASH_UNIQUE);
uint keylen;
int err= 0;
DBUG_ASSERT(graph);
if (todo.init(1000, 0, 0, cmp_float)) // XXX + autoextent
return HA_ERR_OUT_OF_MEM;
if (result.init(limit, 0, 1, cmp_float))
return HA_ERR_OUT_OF_MEM;
if ((err= graph->file->ha_index_init(0, 1)))
return err;
SCOPE_EXIT([graph](){ graph->file->ha_index_end(); });
// 1. read a start row
if ((err= graph->file->ha_index_last(graph->record[0])))
return err;
if (!(str= graph->field[0]->val_str(&strbuf)))
return HA_ERR_CRASHED;
DBUG_ASSERT(str->length() == ref_length);
cur= (Node*)table->in_use->alloc(element_size);
memcpy(cur->ref, str->ptr(), ref_length);
if ((err= table->file->ha_rnd_init(0)))
return err;
if ((err= table->file->ha_rnd_pos(table->record[0], cur->ref)))
return HA_ERR_CRASHED;
// 2. add it to the todo
cur->distance= dist->val_real();
if (dist->is_null())
return HA_ERR_END_OF_FILE;
todo.push(cur);
visited.insert(cur);
while (todo.elements())
{
// 3. pick the top node from the todo
cur= todo.pop();
// 4. add it to the result
if (result.is_full())
{
// 5. if not added, greedy search done
if (cur->distance > result.top()->distance)
break;
result.replace_top(cur);
}
else
result.push(cur);
float threshold= result.is_full() ? result.top()->distance : FLT_MAX;
// 6. add all its [yet unvisited] neighbours to the todo heap
keylen= graph->field[0]->get_key_image(key, ref_length, Field::itRAW);
if ((err= graph->file->ha_index_read_map(graph->record[0], key, 3,
HA_READ_KEY_EXACT)))
return HA_ERR_CRASHED;
do {
if (!(str= graph->field[1]->val_str(&strbuf)))
return HA_ERR_CRASHED;
if (visited.find(str->ptr(), ref_length))
continue;
if ((err= table->file->ha_rnd_pos(table->record[0], (uchar*)str->ptr())))
return HA_ERR_CRASHED;
float distance= dist->val_real();
if (distance > threshold)
continue;
cur= (Node*)table->in_use->alloc(element_size);
cur->distance= distance;
memcpy(cur->ref, str->ptr(), ref_length);
todo.push(cur);
visited.insert(cur);
} while (!graph->file->ha_index_next_same(graph->record[0], key, keylen));
// 7. goto 3
}
// 8. return results
Node **context= (Node**)table->in_use->alloc(sizeof(Node**)*result.elements()+1);
graph->context= context;
Node **ptr= context+result.elements();
*ptr= 0;
while (result.elements())
*--ptr= result.pop();
return mhnsw_read_next(table);
}
int mhnsw_read_next(TABLE *table)
{
Node ***context= (Node***)&table->hlindex->context;
if (**context)
return table->file->ha_rnd_pos(table->record[0], (*(*context)++)->ref);
return HA_ERR_END_OF_FILE;
}

24
sql/vector_mhnsw.h Normal file
View File

@ -0,0 +1,24 @@
/*
Copyright (c) 2024, MariaDB plc
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
#include "table.h"
extern const LEX_CSTRING mhnsw_hlindex_table;
int mhnsw_insert(TABLE *table, KEY *keyinfo);
int mhnsw_read_first(TABLE *table, Item *dist, ulonglong limit);
int mhnsw_read_next(TABLE *table);

View File

@ -925,6 +925,7 @@ MRN_SHARE *mrn_get_share(const char *table_name, TABLE *table, int *error)
*wrap_table_share= *table->s;
mrn_init_sql_alloc(current_thd, &(wrap_table_share->mem_root));
wrap_table_share->keys = share->wrap_keys;
wrap_table_share->total_keys = share->wrap_keys;
wrap_table_share->key_info = share->wrap_key_info;
wrap_table_share->primary_key = share->wrap_primary_key;
wrap_table_share->keys_in_use.init(share->wrap_keys);

View File

@ -1755,7 +1755,7 @@ search:
- recreate index stats
*/
pfs->destroy_index_stats();
pfs->m_key_count= share->keys;
pfs->m_key_count= share->total_keys;
for (uint index= 0; index < pfs->m_key_count; index++)
{
(void)pfs->find_or_create_index_stat(share, index);