MDEV-371 Unique Index for long columns

This patch implements engine independent unique hash index. Usage:- Unique HASH index can be created automatically for blob/varchar/test column whose key length > handler->max_key_length() or it can be explicitly specified. Automatic Creation:- Create TABLE t1 (a blob unique); Explicit Creation:- Create TABLE t1 (a int , unique(a) using HASH); Internal KEY_PART Representations:- Long unique key_info will have 2 representations. (lets understand this with an example create table t1(a blob, b blob , unique(a, b)); ) 1. User Given Representation:- key_info->key_part array will be similar to what user has defined. So in case of example it will have 2 key_parts (a, b) 2. Storage Engine Representation:- In this case there will be only one key_part and it will point to HASH_FIELD. This key_part will be always after user defined key_parts. So:- User Given Representation [a] [b] [hash_key_part] key_info->key_part ----^ Storage Engine Representation [a] [b] [hash_key_part] key_info->key_part ------------^ Table->s->key_info will have User Given Representation, While table->key_info will have Storage Engine Representation.Representation can be changed into each other by calling re/setup_keyinfo_hash function. Working:- 1. So when user specifies HASH_INDEX or key_length is > handler->max_key_length(), In mysql_prepare_create_table One extra vfield is added (for each long unique key). And key_info->algorithm is set to HA_KEY_ALG_LONG_HASH. 2. In init_from_binary_frm_image values for hash_keypart is set (like fieldnr , field and flags) 3. In parse_vcol_defs, HASH_FIELD->vcol_info is created. Item_func_hash is used with list of Item_fields, When Explicit length is given by user then Item_left is used to concatenate Item_field values. 4. In ha_write_row/ha_update_row check_duplicate_long_entry_key is called which will create the hash key from table->record[0] and then call ha_index_read_map , if we found duplicated hash , we will compare the result field by field.
2025-10-12 12:25:37 +03:00 · 2019-02-20 02:53:08 +05:30
parent 5b4d6595d2
commit d00f19e832
46 changed files with 4118 additions and 98 deletions
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -3768,6 +3768,8 @@ void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
  }
  else
  {
+    if (key->algorithm == HA_KEY_ALG_LONG_HASH)
+      setup_keyinfo_hash(key);
    /* Table is opened and defined at this point */
    key_unpack(&str,table, key);
    uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
@@ -3778,6 +3780,8 @@ void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
    }
    my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(),
                    key->name.str);
+    if (key->algorithm == HA_KEY_ALG_LONG_HASH)
+      re_setup_keyinfo_hash(key);
  }
 }

@@ -3795,7 +3799,6 @@ void print_keydup_error(TABLE *table, KEY *key, myf errflag)
                     errflag);
 }

-
 /**
  Print error that we got from handler function.

@@ -4288,6 +4291,8 @@ uint handler::get_dup_key(int error)
  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
              m_lock_type != F_UNLCK);
  DBUG_ENTER("handler::get_dup_key");
+  if (table->s->long_unique_table && table->file->errkey < table->s->keys)
+    DBUG_RETURN(table->file->errkey);
  table->file->errkey  = (uint) -1;
  if (error == HA_ERR_FOUND_DUPP_KEY ||
      error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
@@ -6483,6 +6488,162 @@ static int wsrep_after_row(THD *thd)
 }
 #endif /* WITH_WSREP */

+static int check_duplicate_long_entry_key(TABLE *table, handler *h, uchar *new_rec,
+                                   uint key_no)
+{
+  Field *hash_field;
+  int result, error= 0;
+  KEY *key_info= table->key_info + key_no;
+  hash_field= key_info->key_part->field;
+  DBUG_ASSERT((key_info->flags & HA_NULL_PART_KEY &&
+      key_info->key_length == HA_HASH_KEY_LENGTH_WITH_NULL)
+    || key_info->key_length == HA_HASH_KEY_LENGTH_WITHOUT_NULL);
+  uchar ptr[HA_HASH_KEY_LENGTH_WITH_NULL];
+
+  if (hash_field->is_real_null())
+    return 0;
+
+  key_copy(ptr, new_rec, key_info, key_info->key_length, false);
+
+  if (!table->check_unique_buf)
+    table->check_unique_buf= (uchar *)alloc_root(&table->mem_root,
+                                    table->s->reclength);
+
+  result= h->ha_index_init(key_no, 0);
+  if (result)
+    return result;
+  result= h->ha_index_read_map(table->check_unique_buf,
+                               ptr, HA_WHOLE_KEY, HA_READ_KEY_EXACT);
+  if (!result)
+  {
+    bool is_same;
+    Field * t_field;
+    Item_func_hash * temp= (Item_func_hash *)hash_field->vcol_info->expr;
+    Item ** arguments= temp->arguments();
+    uint arg_count= temp->argument_count();
+    do
+    {
+      long diff= table->check_unique_buf - new_rec;
+      is_same= true;
+      for (uint j=0; is_same && j < arg_count; j++)
+      {
+        DBUG_ASSERT(arguments[j]->type() == Item::FIELD_ITEM ||
+                    // this one for left(fld_name,length)
+                    arguments[j]->type() == Item::FUNC_ITEM);
+        if (arguments[j]->type() == Item::FIELD_ITEM)
+        {
+          t_field= static_cast<Item_field *>(arguments[j])->field;
+          if (t_field->cmp_offset(diff))
+            is_same= false;
+        }
+        else
+        {
+          Item_func_left *fnc= static_cast<Item_func_left *>(arguments[j]);
+          DBUG_ASSERT(!my_strcasecmp(system_charset_info, "left", fnc->func_name()));
+          DBUG_ASSERT(fnc->arguments()[0]->type() == Item::FIELD_ITEM);
+          t_field= static_cast<Item_field *>(fnc->arguments()[0])->field;
+          longlong length= fnc->arguments()[1]->val_int();
+          if (t_field->cmp_max(t_field->ptr, t_field->ptr + diff, length))
+            is_same= false;
+        }
+      }
+    }
+    while (!is_same && !(result= table->file->ha_index_next_same(table->check_unique_buf,
+                         ptr, key_info->key_length)));
+    if (is_same)
+    {
+      table->file->errkey= key_no;
+      error= HA_ERR_FOUND_DUPP_KEY;
+      goto exit;
+    }
+    else
+      goto exit;
+  }
+  if (result == HA_ERR_LOCK_WAIT_TIMEOUT)
+  {
+    table->file->errkey= key_no;
+    error= HA_ERR_LOCK_WAIT_TIMEOUT;
+  }
+  exit:
+  h->ha_index_end();
+  return error;
+}
+
+/** @brief
+    check whether inserted records breaks the
+    unique constraint on long columns.
+    @returns 0 if no duplicate else returns error
+  */
+static int check_duplicate_long_entries(TABLE *table, handler *h, uchar *new_rec)
+{
+  table->file->errkey= -1;
+  int result;
+  for (uint i= 0; i < table->s->keys; i++)
+  {
+    if (table->key_info[i].algorithm == HA_KEY_ALG_LONG_HASH &&
+            (result= check_duplicate_long_entry_key(table, h, new_rec, i)))
+      return result;
+  }
+  return 0;
+}
+
+/** @brief
+    check whether updated records breaks the
+    unique constraint on long columns.
+    In the case of update we just need to check the specic key
+    reason for that is consider case
+    create table t1(a blob , b blob , x blob , y blob ,unique(a,b)
+                                                    ,unique(x,y))
+    and update statement like this
+    update t1 set a=23+a; in this case if we try to scan for
+    whole keys in table then index scan on x_y will return 0
+    because data is same so in the case of update we take
+    key as a parameter in normal insert key should be -1
+    @returns 0 if no duplicate else returns error
+  */
+static int check_duplicate_long_entries_update(TABLE *table, handler *h, uchar *new_rec)
+{
+  Field *field;
+  uint key_parts;
+  int error= 0;
+  KEY *keyinfo;
+  KEY_PART_INFO *keypart;
+  /*
+     Here we are comparing whether new record and old record are same
+     with respect to fields in hash_str
+   */
+  long reclength= table->record[1]-table->record[0];
+  if (!table->update_handler)
+    table->clone_handler_for_update();
+  for (uint i= 0; i < table->s->keys; i++)
+  {
+    keyinfo= table->key_info + i;
+    if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH)
+    {
+      key_parts= fields_in_hash_keyinfo(keyinfo);
+      keypart= keyinfo->key_part - key_parts;
+      for (uint j= 0; j < key_parts; j++, keypart++)
+      {
+        field= keypart->field;
+        /* Compare fields if they are different then check for duplicates*/
+        if(field->cmp_binary_offset(reclength))
+        {
+          if((error= check_duplicate_long_entry_key(table, table->update_handler,
+                                                 new_rec, i)))
+            goto exit;
+          /*
+            break because check_duplicate_long_entries_key will
+            take care of remaining fields
+           */
+          break;
+        }
+      }
+    }
+  }
+  exit:
+  return error;
+}
+
 int handler::ha_write_row(uchar *buf)
 {
  int error;
@@ -6496,6 +6657,9 @@ int handler::ha_write_row(uchar *buf)
  mark_trx_read_write();
  increment_statistics(&SSV::ha_write_count);

+  if (table->s->long_unique_table &&
+          (error= check_duplicate_long_entries(table, table->file, buf)))
+    DBUG_RETURN(error);
  TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, 0,
                      { error= write_row(buf); })

@@ -6535,6 +6699,11 @@ int handler::ha_update_row(const uchar *old_data, const uchar *new_data)
  MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
  mark_trx_read_write();
  increment_statistics(&SSV::ha_update_count);
+  if (table->s->long_unique_table &&
+          (error= check_duplicate_long_entries_update(table, table->file, (uchar *)new_data)))
+  {
+    return error;
+  }

  TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_UPDATE_ROW, active_index, 0,
                      { error= update_row(old_data, new_data);})