mariadb-columnstore-engine/dbcon/mysql/ha_mcs.cpp

/* Copyright (C) 2014 InfiniDB, Inc.
   Copyright (C) 2016 MariaDB Corporation

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; version 2 of
   the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
   MA 02110-1301, USA. */

#include "ha_mcs.h"
#include "maria_def.h"
#include <typeinfo>
#include "columnstoreversion.h"

#include "ha_mcs_pushdown.h"
#define NEED_CALPONT_EXTERNS
#include "ha_mcs_impl.h"
#include "is_columnstore.h"
#include "ha_mcs_version.h"

#ifndef COLUMNSTORE_MATURITY
#define COLUMNSTORE_MATURITY MariaDB_PLUGIN_MATURITY_STABLE
#endif

#define CACHE_PREFIX "#cache#"

handlerton* mcs_hton = NULL;
// This is the maria handlerton that we need for the cache
static handlerton* mcs_maria_hton = NULL;
char cs_version[25];
char cs_commit_hash[41];  // a commit hash is 40 characters

// handlers creation function for hton.
// Look into ha_mcs_pushdown.* for more details.
group_by_handler* create_columnstore_group_by_handler(THD* thd, Query* query);

derived_handler* create_columnstore_derived_handler(THD* thd, TABLE_LIST* derived);

select_handler* create_columnstore_select_handler(THD* thd, SELECT_LEX* sel_lex, SELECT_LEX_UNIT* sel_unit);
select_handler* create_columnstore_unit_handler(THD* thd, SELECT_LEX_UNIT* sel_unit);

/* Variables for example share methods */

/*
   Hash used to track the number of open tables; variable for example share
   methods
*/
static HASH mcs_open_tables;

/* The mutex used to init the hash; variable for example share methods */
pthread_mutex_t mcs_mutex;

#ifdef DEBUG_ENTER
#undef DEBUG_ENTER
#endif
#ifdef DEBUG_RETURN
#undef DEBUG_ENTER
#endif
#define DEBUG_RETURN return

#if MYSQL_VERSION_ID >= 110500
/* because of renames in the handler class */
#define rows_changed rows_stats.updated
#endif

/**
  @brief
  Function we use in the creation of our hash to get key.
*/

static uchar* mcs_get_key(COLUMNSTORE_SHARE* share, size_t* length, my_bool not_used __attribute__((unused)))
{
  *length = share->table_name_length;
  return (uchar*)share->table_name;
}

// This one is unused
int mcs_discover(handlerton* hton, THD* thd, TABLE_SHARE* share)
{
  DBUG_ENTER("mcs_discover");
  DBUG_PRINT("mcs_discover", ("db: '%s'  name: '%s'", share->db.str, share->table_name.str));
#ifdef INFINIDB_DEBUG
  fprintf(stderr, "mcs_discover()\n");
#endif

  uchar* frm_data = NULL;
  size_t frm_len = 0;
  int error = 0;

  if (!ha_mcs_impl_discover_existence(share->db.str, share->table_name.str))
    DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);

  error = share->read_frm_image((const uchar**)&frm_data, &frm_len);

  if (error)
    DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);

  my_errno = share->init_from_binary_frm_image(thd, 1, frm_data, frm_len);

  my_free(frm_data);
  DBUG_RETURN(my_errno);
}

// This f() is also unused
int mcs_discover_existence(handlerton* hton, const char* db, const char* table_name)
{
  return ha_mcs_impl_discover_existence(db, table_name);
}

static int mcs_commit(handlerton* hton, THD* thd, bool all)
{
  int rc;
  try
  {
    rc = ha_mcs_impl_commit(hton, thd, all);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  return rc;
}

static int mcs_rollback(handlerton* hton, THD* thd, bool all)
{
  int rc;
  try
  {
    rc = ha_mcs_impl_rollback(hton, thd, all);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  return rc;
}

static int mcs_close_connection(handlerton* hton, THD* thd)
{
  int rc;
  try
  {
    rc = ha_mcs_impl_close_connection(hton, thd);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  return rc;
}

ha_mcs::ha_mcs(handlerton* hton, TABLE_SHARE* table_arg)
 : handler(hton, table_arg)
 , int_table_flags(HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE | HA_TABLE_SCAN_ON_INDEX |
                   HA_CAN_TABLE_CONDITION_PUSHDOWN | HA_CAN_DIRECT_UPDATE_AND_DELETE)
 , m_lock_type(F_UNLCK)
{
  const char* timeZone = current_thd->variables.time_zone->get_name()->ptr();
  dataconvert::timeZoneToOffset(timeZone, strlen(timeZone), &time_zone);
}

/**
  @brief
  If frm_error() is called then we will use this to determine
  the file extensions that exist for the storage engine. This is also
  used by the default rename_table and delete_table method in
  handler.cc.

  For engines that have two file name extentions (separate meta/index file
  and data file), the order of elements is relevant. First element of engine
  file name extentions array should be meta/index file extention. Second
  element - data file extention. This order is assumed by
  prepare_for_repair() when REPAIR TABLE ... USE_FRM is issued.

  @see
  rename_table method in handler.cc and
  delete_table method in handler.cc
*/

static const char* ha_mcs_exts[] = {NullS};

const char** ha_mcs::bas_ext() const
{
  return ha_mcs_exts;
}

int ha_mcs::analyze(THD* thd, HA_CHECK_OPT* check_opt)
{
  DBUG_ENTER("ha_mcs::analyze");

  int rc;
  try
  {
    rc = ha_mcs_impl_analyze(thd, table);
  }
  catch (std::runtime_error& e)
  {
    thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

/**
  @brief
  Used for opening tables. The name will be the name of the file.

  @details
  A table is opened when it needs to be opened; e.g. when a request comes in
  for a SELECT on the table (tables are not open and closed for each request,
  they are cached).

  Called from handler.cc by handler::ha_open(). The server opens all tables by
  calling ha_open() which then calls the handler specific open().

  @see
  handler::ha_open() in handler.cc
*/

int ha_mcs::open(const char* name, int mode, uint32_t test_if_locked)
{
  DBUG_ENTER("ha_mcs::open");

  bool isPS = current_thd->stmt_arena &&
              (current_thd->stmt_arena->is_stmt_prepare() || current_thd->stmt_arena->is_stmt_execute());

  // MCOL-4282 See the description for discover_check_version() in ha_mcs.h
  // for why we need to mutate optimizer flags here. Sequence of SQL
  // statements that will lead to this execution path for prepared
  // statements:
  //   CREATE TABLE t1 (a int, b int) engine=columnstore;
  //   PREPARE stmt1 FROM "SELECT * FROM t1";
  //   EXECUTE stmt1;
  if (isPS)
    mutate_optimizer_flags(current_thd);

  int rc;
  try
  {
    rc = ha_mcs_impl_open(name, mode, test_if_locked);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

int ha_mcs::discover_check_version()
{
  bool isPS = current_thd->stmt_arena &&
              (current_thd->stmt_arena->is_stmt_prepare() || current_thd->stmt_arena->is_stmt_execute());

  if (isPS)
    mutate_optimizer_flags(current_thd);

  return 0;
}

/**
  @brief
  Closes a table. We call the free_share() function to free any resources
  that we have allocated in the "shared" structure.

  @details
  Called from sql_base.cc, sql_select.cc, and table.cc. In sql_select.cc it is
  only used to close up temporary tables or during the process where a
  temporary table is converted over to being a myisam table.

  For sql_base.cc look at close_data_tables().

  @see
  sql_base.cc, sql_select.cc and table.cc
*/

int ha_mcs::close(void)
{
  DBUG_ENTER("ha_mcs::close");

  int rc;
  try
  {
    rc = ha_mcs_impl_close();
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

/**
  @brief
  write_row() inserts a row. No extra() hint is given currently if a bulk load
  is happening. buf() is a byte array of data. You can use the field
  information to extract the data from the native byte array type.

    @details
  Example of this would be:
    @code
    @endcode
*/

int ha_mcs::write_row(const uchar* buf)
{
  DBUG_ENTER("ha_mcs::write_row");
  int rc;
  try
  {
    rc = ha_mcs_impl_write_row(buf, table, rows_changed, time_zone);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

void ha_mcs::start_bulk_insert(ha_rows rows, uint flags)
{
  DBUG_ENTER("ha_mcs::start_bulk_insert");
  try
  {
    ha_mcs_impl_start_bulk_insert(rows, table);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
  }
  DBUG_VOID_RETURN;
}

void ha_mcs::start_bulk_insert_from_cache(ha_rows rows, uint flags)
{
  DBUG_ENTER("ha_mcs::start_bulk_insert_from_cache");
  try
  {
    ha_mcs_impl_start_bulk_insert(rows, table, true);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
  }
  DBUG_VOID_RETURN;
}

int ha_mcs::end_bulk_insert()
{
  DBUG_ENTER("ha_mcs::end_bulk_insert");
  int rc;
  try
  {
    rc = ha_mcs_impl_end_bulk_insert(false, table);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

/**
  @brief
  Yes, update_row() does what you expect, it updates a row. old_data will have
  the previous row record in it, while new_data will have the newest data in it.
  Keep in mind that the server can do updates based on ordering if an ORDER BY
  clause was used. Consecutive ordering is not guaranteed.

    @details
   @code
    @endcode

    @see
*/
int ha_mcs::update_row(const uchar* old_data, const uchar* new_data)
{
  DBUG_ENTER("ha_mcs::update_row");
  int rc;
  try
  {
    rc = ha_mcs_impl_update_row();
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

/**
  @brief
    Durect UPDATE/DELETE are the features that allows engine to run UPDATE
    or DELETE on its own. There are number of limitations that dissalows
    the feature.

    @details
   @code
    @endcode

    @see
      mysql_update()/mysql_delete
*/
int ha_mcs::direct_update_rows_init(List<Item>* update_fields)
{
  DBUG_ENTER("ha_mcs::direct_update_rows_init");
  DBUG_RETURN(0);
}

int ha_mcs::direct_update_rows(ha_rows* update_rows)
{
  DBUG_ENTER("ha_mcs::direct_update_rows");
  int rc;
  try
  {
    rc = ha_mcs_impl_direct_update_delete_rows(false, update_rows, condStack);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

int ha_mcs::direct_update_rows(ha_rows* update_rows, ha_rows* found_rows)
{
  DBUG_ENTER("ha_mcs::direct_update_rows");
  int rc;
  try
  {
    rc = ha_mcs_impl_direct_update_delete_rows(false, update_rows, condStack);
    *found_rows = *update_rows;
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

int ha_mcs::direct_delete_rows_init()
{
  DBUG_ENTER("ha_mcs::direct_delete_rows_init");
  DBUG_RETURN(0);
}

int ha_mcs::direct_delete_rows(ha_rows* deleted_rows)
{
  DBUG_ENTER("ha_mcs::direct_delete_rows");
  int rc;
  try
  {
    rc = ha_mcs_impl_direct_update_delete_rows(true, deleted_rows, condStack);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}
/**
  @brief
  This will delete a row. buf will contain a copy of the row to be deleted.
  The server will call this right after the current row has been called (from
  either a previous rnd_nexT() or index call).

  @details
  If you keep a pointer to the last row or can access a primary key it will
  make doing the deletion quite a bit easier. Keep in mind that the server does
  not guarantee consecutive deletions. ORDER BY clauses can be used.

  Called in sql_acl.cc and sql_udf.cc to manage internal table
  information.  Called in sql_delete.cc, sql_insert.cc, and
  sql_select.cc. In sql_select it is used for removing duplicates
  while in insert it is used for REPLACE calls.

  @see
  sql_acl.cc, sql_udf.cc, sql_delete.cc, sql_insert.cc and sql_select.cc
*/

int ha_mcs::delete_row(const uchar* buf)
{
  DBUG_ENTER("ha_mcs::delete_row");
  int rc;
  try
  {
    rc = ha_mcs_impl_delete_row();
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

/**
  @brief
  Positions an index cursor to the index specified in the handle. Fetches the
  row if available. If the key value is null, begin at the first key of the
  index.
*/

int ha_mcs::index_read_map(uchar* buf, const uchar* key, key_part_map keypart_map __attribute__((unused)),
                           enum ha_rkey_function find_flag __attribute__((unused)))
{
  DBUG_ENTER("ha_mcs::index_read");
  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}

/**
  @brief
  Used to read forward through the index.
*/

int ha_mcs::index_next(uchar* buf)
{
  DBUG_ENTER("ha_mcs::index_next");
  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}

/**
  @brief
  Used to read backwards through the index.
*/

int ha_mcs::index_prev(uchar* buf)
{
  DBUG_ENTER("ha_mcs::index_prev");
  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}

/**
  @brief
  index_first() asks for the first key in the index.

    @details
  Called from opt_range.cc, opt_sum.cc, sql_handler.cc, and sql_select.cc.

    @see
  opt_range.cc, opt_sum.cc, sql_handler.cc and sql_select.cc
*/
int ha_mcs::index_first(uchar* buf)
{
  DBUG_ENTER("ha_mcs::index_first");
  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}

/**
  @brief
  index_last() asks for the last key in the index.

    @details
  Called from opt_range.cc, opt_sum.cc, sql_handler.cc, and sql_select.cc.

    @see
  opt_range.cc, opt_sum.cc, sql_handler.cc and sql_select.cc
*/
int ha_mcs::index_last(uchar* buf)
{
  DBUG_ENTER("ha_mcs::index_last");
  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}

/**
  @brief
  rnd_init() is called when the system wants the storage engine to do a table
  scan. See the example in the introduction at the top of this file to see when
  rnd_init() is called.

    @details
  Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
  and sql_update.cc.

    @see
  filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc and sql_update.cc
*/
int ha_mcs::rnd_init(bool scan)
{
  DBUG_ENTER("ha_mcs::rnd_init");

  int rc = 0;
  if (scan)
  {
    try
    {
      rc = impl_rnd_init(table, condStack);
    }
    catch (std::runtime_error& e)
    {
      current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
      rc = ER_INTERNAL_ERROR;
    }
  }

  DBUG_RETURN(rc);
}

int ha_mcs::rnd_end()
{
  DBUG_ENTER("ha_mcs::rnd_end");

  int rc;
  try
  {
    rc = ha_mcs_impl_rnd_end(table);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

/**
  @brief
  This is called for each row of the table scan. When you run out of records
  you should return HA_ERR_END_OF_FILE. Fill buff up with the row information.
  The Field structure for the table is the key to getting data into buf
  in a manner that will allow the server to understand it.

    @details
  Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
  and sql_update.cc.

    @see
  filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc and sql_update.cc
*/
int ha_mcs::rnd_next(uchar* buf)
{
  DBUG_ENTER("ha_mcs::rnd_next");

  int rc;
  try
  {
    rc = ha_mcs_impl_rnd_next(buf, table, time_zone);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

/**
  @brief
  position() is called after each call to rnd_next() if the data needs
  to be ordered. You can do something like the following to store
  the position:
    @code
  my_store_ptr(ref, ref_length, current_position);
    @endcode

    @details
  The server uses ref to store data. ref_length in the above case is
  the size needed to store current_position. ref is just a byte array
  that the server will maintain. If you are using offsets to mark rows, then
  current_position should be the offset. If it is a primary key like in
  BDB, then it needs to be a primary key.

  Called from filesort.cc, sql_select.cc, sql_delete.cc, and sql_update.cc.

    @see
  filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc
*/
// @TODO: Implement position() and rnd_pos() and remove HA_NO_BLOBS from table_flags
// This would require us to add a psuedo-column of some sort for a primary index. This
// would only be used in rare cases of ORDER BY, so the slow down would be ok and would
// allow for implementing blobs (is that the same as varbinary?). Perhaps using
// lbid and offset as key would work, or something. We also need to add functionality
// to retrieve records quickly by this "key"
void ha_mcs::position(const uchar* record)
{
  DBUG_ENTER("ha_mcs::position");
  DBUG_VOID_RETURN;
}

/**
  @brief
  This is like rnd_next, but you are given a position to use
  to determine the row. The position will be of the type that you stored in
  ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
  or position you saved when position() was called.

    @details
  Called from filesort.cc, records.cc, sql_insert.cc, sql_select.cc, and sql_update.cc.

    @see
  filesort.cc, records.cc, sql_insert.cc, sql_select.cc and sql_update.cc
*/
int ha_mcs::rnd_pos(uchar* buf, uchar* pos)
{
  DBUG_ENTER("ha_mcs::rnd_pos");
  int rc;
  try
  {
    rc = ha_mcs_impl_rnd_pos(buf, pos);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

/**
  @brief
  ::info() is used to return information to the optimizer. See my_base.h for
  the complete description.

    @details
  Currently this table handler doesn't implement most of the fields really needed.
  SHOW also makes use of this data.

  You will probably want to have the following in your code:
    @code
  if (records < 2)
    records = 2;
    @endcode
  The reason is that the server will optimize for cases of only a single
  record. If, in a table scan, you don't know the number of records, it
  will probably be better to set records to two so you can return as many
  records as you need. Along with records, a few more variables you may wish
  to set are:
    records
    deleted
    data_file_length
    index_file_length
    delete_length
    check_time
  Take a look at the public variables in handler.h for more information.

  Called in filesort.cc, ha_heap.cc, item_sum.cc, opt_sum.cc, sql_delete.cc,
  sql_delete.cc, sql_derived.cc, sql_select.cc, sql_select.cc, sql_select.cc,
  sql_select.cc, sql_select.cc, sql_show.cc, sql_show.cc, sql_show.cc, sql_show.cc,
  sql_table.cc, sql_union.cc, and sql_update.cc.

    @see
  filesort.cc, ha_heap.cc, item_sum.cc, opt_sum.cc, sql_delete.cc, sql_delete.cc,
  sql_derived.cc, sql_select.cc, sql_select.cc, sql_select.cc, sql_select.cc,
  sql_select.cc, sql_show.cc, sql_show.cc, sql_show.cc, sql_show.cc, sql_table.cc,
  sql_union.cc and sql_update.cc
*/
int ha_mcs::info(uint32_t flag)
{
  DBUG_ENTER("ha_mcs::info");
  // @bug 1635. Raise this number magically fix the filesort crash issue. May need to twist
  // the number again if the issue re-occurs
  stats.records = 2000;
#ifdef INFINIDB_DEBUG
  puts("info");
#endif
  DBUG_RETURN(0);
}

/**
  @brief
  extra() is called whenever the server wishes to send a hint to
  the storage engine. The myisam engine implements the most hints.
  ha_innodb.cc has the most exhaustive list of these hints.

    @see
  ha_innodb.cc
*/
int ha_mcs::extra(enum ha_extra_function operation)
{
  DBUG_ENTER("ha_mcs::extra");
#ifdef INFINIDB_DEBUG
  {
    const char* hefs;

    switch (operation)
    {
      case HA_EXTRA_NO_READCHECK: hefs = "HA_EXTRA_NO_READCHECK"; break;

      case HA_EXTRA_CACHE: hefs = "HA_EXTRA_CACHE"; break;

      case HA_EXTRA_NO_CACHE: hefs = "HA_EXTRA_NO_CACHE"; break;

      case HA_EXTRA_NO_IGNORE_DUP_KEY: hefs = "HA_EXTRA_NO_IGNORE_DUP_KEY"; break;

      case HA_EXTRA_PREPARE_FOR_RENAME: hefs = "HA_EXTRA_PREPARE_FOR_RENAME"; break;

      default: hefs = "UNKNOWN ENUM!"; break;
    }

    fprintf(stderr, "ha_mcs::extra(\"%s\", %d: %s)\n", table->s->table_name.str, operation, hefs);
  }
#endif
  DBUG_RETURN(0);
}

/**
  @brief
  Used to delete all rows in a table, including cases of truncate and cases where
  the optimizer realizes that all rows will be removed as a result of an SQL statement.

    @details
  Called from item_sum.cc by Item_func_group_concat::clear(),
  Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
  Called from sql_delete.cc by mysql_delete().
  Called from sql_select.cc by JOIN::reinit().
  Called from sql_union.cc by st_select_lex_unit::exec().

    @see
  Item_func_group_concat::clear(), Item_sum_count_distinct::clear() and
  Item_func_group_concat::clear() in item_sum.cc;
  mysql_delete() in sql_delete.cc;
  JOIN::reinit() in sql_select.cc and
  st_select_lex_unit::exec() in sql_union.cc.
*/
int ha_mcs::delete_all_rows()
{
  DBUG_ENTER("ha_mcs::delete_all_rows");
  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}

/**
  @brief
  This create a lock on the table. If you are implementing a storage engine
  that can handle transacations look at ha_berkely.cc to see how you will
  want to go about doing this. Otherwise you should consider calling flock()
  here. Hint: Read the section "locking functions for mysql" in lock.cc to understand
  this.

    @details
  Called from lock.cc by lock_external() and unlock_external(). Also called
  from sql_table.cc by copy_data_between_tables().

    @see
  lock.cc by lock_external() and unlock_external() in lock.cc;
  the section "locking functions for mysql" in lock.cc;
  copy_data_between_tables() in sql_table.cc.
*/
int ha_mcs::external_lock(THD* thd, int lock_type)
{
  DBUG_ENTER("ha_mcs::external_lock");

  int rc;
  try
  {
    //@Bug 2526 Only register the transaction when autocommit is off
    if ((thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
      trans_register_ha(thd, true, mcs_hton, 0);

    rc = impl_external_lock(thd, table, lock_type);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

/**
  @brief
  The idea with handler::store_lock() is: The statement decides which locks
  should be needed for the table. For updates/deletes/inserts we get WRITE
  locks, for SELECT... we get read locks.

    @details
  Before adding the lock into the table lock handler (see thr_lock.c),
  mysqld calls store lock with the requested locks. Store lock can now
  modify a write lock to a read lock (or some other lock), ignore the
  lock (if we don't want to use MySQL table locks at all), or add locks
  for many tables (like we do when we are using a MERGE handler).

  Berkeley DB, for example, changes all WRITE locks to TL_WRITE_ALLOW_WRITE
  (which signals that we are doing WRITES, but are still allowing other
  readers and writers).

  When releasing locks, store_lock() is also called. In this case one
  usually doesn't have to do anything.

  In some exceptional cases MySQL may send a request for a TL_IGNORE;
  This means that we are requesting the same lock as last time and this
  should also be ignored. (This may happen when someone does a flush
  table when we have opened a part of the tables, in which case mysqld
  closes and reopens the tables and tries to get the same locks at last
  time). In the future we will probably try to remove this.

  Called from lock.cc by get_lock_data().

    @note
  In this method one should NEVER rely on table->in_use, it may, in fact,
  refer to a different thread! (this happens if get_lock_data() is called
  from mysql_lock_abort_for_thread() function)

    @see
  get_lock_data() in lock.cc
*/

THR_LOCK_DATA** ha_mcs::store_lock(THD* thd, THR_LOCK_DATA** to, enum thr_lock_type lock_type)
{
  // if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK)
  //  lock.type=lock_type;
  //*to++= &lock;
#ifdef INFINIDB_DEBUG
  puts("store_lock");
#endif
  return to;
}

/**
  @brief
  Used to delete a table. By the time delete_table() has been called all
  opened references to this table will have been closed (and your globally
  shared references released). The variable name will just be the name of
  the table. You will need to remove any files you have created at this point.

    @details
  If you do not implement this, the default delete_table() is called from
  handler.cc and it will delete all files with the file extensions returned
  by bas_ext().

  Called from handler.cc by delete_table and ha_create_table(). Only used
  during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
  the storage engine.

    @see
  delete_table and ha_create_table() in handler.cc
*/
int ha_mcs::delete_table(const char* name)
{
  DBUG_ENTER("ha_mcs::delete_table");
  /* This is not implemented but we want someone to be able that it works. */

  int rc;

  try
  {
    rc = ha_mcs_impl_delete_table(name);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }

  DBUG_RETURN(rc);
}

/**
  @brief
  Renames a table from one name to another via an alter table call.

  @details
  If you do not implement this, the default rename_table() is called from
  handler.cc and it will delete all files with the file extensions returned
  by bas_ext().

  Called from sql_table.cc by mysql_rename_table().

  @see
  mysql_rename_table() in sql_table.cc
*/
int ha_mcs::rename_table(const char* from, const char* to)
{
  DBUG_ENTER("ha_mcs::rename_table ");
  int rc;
  try
  {
    rc = ha_mcs_impl_rename_table(from, to);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

/**
  @brief
  Given a starting key and an ending key, estimate the number of rows that
  will exist between the two keys.

  @details
  end_key may be empty, in which case determine if start_key matches any rows.

  Called from opt_range.cc by check_quick_keys().

  @see
  check_quick_keys() in opt_range.cc
*/
ha_rows ha_mcs::records_in_range(uint32_t inx, const key_range* min_key, const key_range* max_key,
                                 page_range* res)
{
  DBUG_ENTER("ha_mcs::records_in_range");
  DBUG_RETURN(10);  // low number to force index usage
}

/**
  @brief
  create() is called to create a database. The variable name will have the name
  of the table.

  @details
  When create() is called you do not need to worry about
  opening the table. Also, the .frm file will have already been
  created so adjusting create_info is not necessary. You can overwrite
  the .frm file at this point if you wish to change the table
  definition, but there are no methods currently provided for doing
  so.

  Called from handle.cc by ha_create_table().

  @see
  ha_create_table() in handle.cc
*/

int ha_mcs::create(const char* name, TABLE* table_arg, HA_CREATE_INFO* create_info)
{
  DBUG_ENTER("ha_mcs::create");

  int rc;
  try
  {
    rc = ha_mcs_impl_create(name, table_arg, create_info);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
    rc = ER_INTERNAL_ERROR;
  }
  DBUG_RETURN(rc);
}

const COND* ha_mcs::cond_push(const COND* cond)
{
  DBUG_ENTER("ha_mcs::cond_push");
  COND* ret_cond = NULL;
  try
  {
    ret_cond = ha_mcs_impl_cond_push(const_cast<COND*>(cond), table, condStack);
  }
  catch (std::runtime_error& e)
  {
    current_thd->raise_error_printf(ER_INTERNAL_ERROR, e.what());
  }
  DBUG_RETURN(ret_cond);
}

void ha_mcs::cond_pop()
{
  DBUG_ENTER("ha_mcs::cond_pop");

  THD* thd = current_thd;

  if ((((thd->lex)->sql_command == SQLCOM_UPDATE) || ((thd->lex)->sql_command == SQLCOM_UPDATE_MULTI) ||
       ((thd->lex)->sql_command == SQLCOM_DELETE) || ((thd->lex)->sql_command == SQLCOM_DELETE_MULTI)) &&
      !condStack.empty())
  {
    condStack.pop_back();
  }

  DBUG_VOID_RETURN;
}

int ha_mcs::reset()
{
  DBUG_ENTER("ha_mcs::reset");

  if (!condStack.empty())
  {
    condStack.clear();
  }

  // Restore the optimizer flags which were mutated earlier in
  // ha_mcs::open/ha_mcs::discover_check_version
  restore_optimizer_flags(current_thd);

  DBUG_RETURN(0);
}

int ha_mcs::repair(THD* thd, HA_CHECK_OPT* check_opt)
{
  DBUG_ENTER("ha_mcs::repair");
  DBUG_ASSERT(!(int_table_flags & HA_CAN_REPAIR));
  DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
}

bool ha_mcs::is_crashed() const
{
  DBUG_ENTER("ha_mcs::is_crashed");
  DBUG_RETURN(0);
}

/*
  Implementation of ha_mcs_cache, ha_mcs_cache is an insert cache for ColumnStore to
  speed up inserts.

  The idea is that inserts are first stored in storage engine that is
  fast for inserts, like MyISAM or Aria, and in case of select,
  update, delete then the rows are first flushed to ColumnStore before
  the original requests is made.

  The table used for the cache is the original table name prefixed with #cache#
*/

/*
  TODO:
  - Add create flag to myisam_open that will ensure that on open and recovery
    it restores only as many rows as stored in the header.
    (Can be fast as we have number of rows and file size stored in the header)
  - Commit to the cache is now done per statement. Should be changed to be per
    transaction.  Should not be impossible to do as we only have to update
    key_file_length and data_file_length on commit.
  - On recovery, check all #cache# tables to see if the last stored commit
    is already in ColumnStore. If yes, truncate the cache.

  Things to consider:

    Current implementation is doing a syncronization tables as part of
    thd->get_status(), which is the function to be called when server
    has got a lock of all used tables. This ensure that the row
    visibility is the same for all tables.
    The disadvantage of this is that we have to always take a write lock
    for the cache table. In case of read transactions, this lock is released
    in free_locks() as soon as we get the table lock.
    Another alternative would be to assume that if there are no cached rows
    during the call to 'store_lock', then we can ignore any new rows added.
    This would allows us to avoid write locks for the cached table, except
    for inserts or if there are rows in the cache. The disadvanteg would be
    that we would not see any rows inserted while we are trying to get the
    lock.
*/

my_bool get_status_and_flush_cache(void* param, my_bool concurrent_insert);

/*
  Create a name for the cache table
*/

static void create_cache_name(char* to, const char* name)
{
  uint dir_length = dirname_length(name);
  to = strnmov(to, name, dir_length);
  strxmov(to, CACHE_PREFIX, name + dir_length, NullS);
}

/*****************************************************************************
 THR_LOCK wrapper functions

  The idea of these is to highjack 'THR_LOCK->get_status() so that if this
  is called in a non-insert context then we will flush the cache.
  We also hijack THR_LOCK->start_trans() to free any locks on the cache
  if the command was not an insert command.
*****************************************************************************/

/*
  First call to get_status() will flush the cache if the command is not an
  insert
*/

my_bool get_status_and_flush_cache(void* param, my_bool concurrent_insert)
{
  if (current_thd->slave_thread && !get_replication_slave(current_thd))
    return (0);

  ha_mcs_cache* cache = (ha_mcs_cache*)param;
  int error;
  enum_sql_command sql_command = cache->table->in_use->lex->sql_command;

  cache->sql_command = sql_command;

  cache->insert_command = (sql_command == SQLCOM_INSERT || sql_command == SQLCOM_LOAD);
  /*
    Call first the original Aria get_status function
    All Aria get_status functions takes Maria handler as the parameter
  */
  if (cache->share->org_lock.get_status)
    (*cache->share->org_lock.get_status)(cache->cache_handler->file, concurrent_insert);

  /* If first get_status() call for this table, flush cache if needed */
  if (!cache->lock_counter++)
  {
    ha_rows num_rows = cache->num_rows_cached();
    if (((!cache->insert_command && num_rows != 0) || num_rows >= get_cache_flush_threshold(current_thd)) &&
        // In replication, LDI on a master comes as sql_command = SQLCOM_END
        // on the slave, if binlog_format != STATEMENT. See mysql_load
        // function in sql/sql_load.cc to know why. If this is the case,
        // make sure we don't flush the cache.
        (!current_thd->slave_thread || sql_command != SQLCOM_END))
    {
      if ((error = cache->flush_insert_cache()))
      {
        my_error(error, MYF(MY_WME | ME_FATAL), "Got error while trying to flush insert cache: %d", my_errno);
        return (1);
      }
    }
  }

  return (0);
}

/*
  start_trans() is called when all locks has been given
  If this was not an insert command then we can free the write lock on
  the cache table and also downgrade external lock for the cached table
  to F_READ
*/

my_bool cache_start_trans(void* param)
{
  ha_mcs_cache* cache = (ha_mcs_cache*)param;

  if (!cache->insert_command)
  {
    cache->free_locks();
    return 0;
  }

  return (*cache->share->org_lock.start_trans)(cache->cache_handler->file);
}

/* Pass through functions for all the THR_LOCK virtual functions */

static void cache_copy_status(void* to, void* from)
{
  ha_mcs_cache *to_cache = (ha_mcs_cache*)to, *from_cache = (ha_mcs_cache*)from;
  (*to_cache->share->org_lock.copy_status)(to_cache->cache_handler->file, from_cache->cache_handler->file);
}

static void cache_update_status(void* param)
{
  ha_mcs_cache* cache = (ha_mcs_cache*)param;
  (*cache->share->org_lock.update_status)(cache->cache_handler->file);
}

static void cache_restore_status(void* param)
{
  ha_mcs_cache* cache = (ha_mcs_cache*)param;
  (*cache->share->org_lock.restore_status)(cache->cache_handler->file);
}

static my_bool cache_check_status(void* param)
{
  ha_mcs_cache* cache = (ha_mcs_cache*)param;
  return (*cache->share->org_lock.check_status)(cache->cache_handler->file);
}

/*****************************************************************************
 ha_mcs_cache_share functions (Common storage for an open cache file)
*****************************************************************************/

static ha_mcs_cache_share* cache_share_list = 0;
static PSI_mutex_key key_LOCK_cache_share;
#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_info all_mutexes[] = {
    {&key_LOCK_cache_share, "LOCK_cache_share", PSI_FLAG_GLOBAL},
};
#endif
static mysql_mutex_t LOCK_cache_share;

/*
  Find or create a share
*/

ha_mcs_cache_share* find_cache_share(const char* name, ulonglong cached_rows)
{
  ha_mcs_cache_share *pos, *share;
  mysql_mutex_lock(&LOCK_cache_share);
  for (pos = cache_share_list; pos; pos = pos->next)
  {
    if (!strcmp(pos->name, name))
    {
      pos->open_count++;
      mysql_mutex_unlock(&LOCK_cache_share);
      return (pos);
    }
  }
  if (!(share = (ha_mcs_cache_share*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(*share) + strlen(name) + 1,
                                               MYF(MY_FAE))))
  {
    mysql_mutex_unlock(&LOCK_cache_share);
    return 0;
  }
  share->name = (char*)(share + 1);
  share->open_count = 1;
  share->cached_rows = cached_rows;
  strmov((char*)share->name, name);
  share->next = cache_share_list;
  cache_share_list = share;
  mysql_mutex_unlock(&LOCK_cache_share);
  return share;
}

/*
  Decrement open counter and free share if there is no more users
*/

void ha_mcs_cache_share::close()
{
  ha_mcs_cache_share* pos;
  mysql_mutex_lock(&LOCK_cache_share);
  if (!--open_count)
  {
    ha_mcs_cache_share** prev = &cache_share_list;
    for (; (pos = *prev) != this; prev = &pos->next)
      ;
    *prev = next;
    my_free(this);
  }
  mysql_mutex_unlock(&LOCK_cache_share);
}

/*****************************************************************************
 ha_mcs_cache handler functions
*****************************************************************************/

static plugin_ref plugin_maria = NULL;

ha_mcs_cache::ha_mcs_cache(handlerton* hton, TABLE_SHARE* table_arg, MEM_ROOT* mem_root)
 : ha_mcs(mcs_hton, table_arg), isSysCatTable(false), isCacheDisabled(false)
{
  if (table_arg && table_arg->db.str && !strcasecmp(table_arg->db.str, "calpontsys") &&
      table_arg->table_name.str &&
      (!strcasecmp(table_arg->table_name.str, "syscolumn") ||
       !strcasecmp(table_arg->table_name.str, "systable")))
    isSysCatTable = true;

  if (get_cache_inserts(current_thd) && !isSysCatTable)
  {
    if (!plugin_maria)
    {
      LEX_CSTRING name = {STRING_WITH_LEN("Aria")};
      plugin_maria = ha_resolve_by_name(0, &name, 0);
      mcs_maria_hton = plugin_hton(plugin_maria);
      int error = mcs_maria_hton == NULL;  // Engine must exists!
      if (error)
        my_error(HA_ERR_INITIALIZATION, MYF(0), "Could not find storage engine %s", name.str);
    }

    assert(mcs_maria_hton);

    cache_handler = (ha_maria*)mcs_maria_hton->create(mcs_maria_hton, table_arg, mem_root);
    share = 0;
    lock_counter = 0;
    cache_locked = 0;
  }
}

ha_mcs_cache::~ha_mcs_cache()
{
  if (get_cache_inserts(current_thd) && !isSysCatTable && cache_handler)
  {
    delete cache_handler;
    cache_handler = NULL;
  }
}

/*
  The following functions duplicates calls to derived handler and
  cache handler
*/

int ha_mcs_cache::create(const char* name, TABLE* table_arg, HA_CREATE_INFO* ha_create_info)
{
  int error;
  char cache_name[FN_REFLEN + 8];
  DBUG_ENTER("ha_mcs_cache::create");

  if (isCacheEnabled())
  {
    create_cache_name(cache_name, name);
    /* Create a cached table */
    ha_choice save_transactional = ha_create_info->transactional;
    row_type save_row_type = ha_create_info->row_type;
    ha_create_info->transactional = HA_CHOICE_NO;
    ha_create_info->row_type = ROW_TYPE_DYNAMIC;

    if ((error = cache_handler->create(cache_name, table_arg, ha_create_info)))
      DBUG_RETURN(error);
    ha_create_info->transactional = save_transactional;
    ha_create_info->row_type = save_row_type;
  }

  /* Create the real table in ColumnStore */
  if ((error = parent::create(name, table_arg, ha_create_info)))
  {
    if (get_cache_inserts(current_thd) && !isSysCatTable)
      cache_handler->delete_table(cache_name);
    DBUG_RETURN(error);
  }

  DBUG_RETURN(0);
}

int ha_mcs_cache::open(const char* name, int mode, uint open_flags)
{
  int error, cache_error;
  DBUG_ENTER("ha_mcs_cache::open");

  if (get_cache_inserts(current_thd) && !isSysCatTable)
  {
    /* Copy table object to cache_handler */
    cache_handler->change_table_ptr(table, table->s);

    char cache_name[FN_REFLEN + 8];
    create_cache_name(cache_name, name);

    if (!(cache_error = cache_handler->open(cache_name, mode, open_flags)))
    {
      if (!(share = find_cache_share(name, cache_handler->file->state->records)))
      {
        cache_handler->close();
        DBUG_RETURN(ER_OUTOFMEMORY);
      }

      /* Fix lock so that it goes through get_status_and_flush() */
      THR_LOCK* lock = &cache_handler->file->s->lock;
      if (lock->get_status != &get_status_and_flush_cache)
      {
        mysql_mutex_lock(&cache_handler->file->s->intern_lock);

        /* The following lock is here just to establish mutex locking order */
        mysql_mutex_lock(&lock->mutex);
        mysql_mutex_unlock(&lock->mutex);

        if (lock->get_status != &get_status_and_flush_cache)
        {
          /* Remember original lock. Used by the THR_lock cache functions */
          share->org_lock = lock[0];
          if (lock->start_trans)
            lock->start_trans = &cache_start_trans;
          if (lock->copy_status)
            lock->copy_status = &cache_copy_status;
          if (lock->update_status)
            lock->update_status = &cache_update_status;
          if (lock->restore_status)
            lock->restore_status = &cache_restore_status;
          if (lock->check_status)
            lock->check_status = &cache_check_status;
          if (lock->restore_status)
            lock->restore_status = &cache_restore_status;
          lock->get_status = &get_status_and_flush_cache;
        }
        mysql_mutex_unlock(&cache_handler->file->s->intern_lock);
      }
      cache_handler->file->lock.status_param = (void*)this;
    }
    else if (cache_error == ENOENT)
    {
      if (!(error = parent::open(name, mode, open_flags)))
      {
        isCacheDisabled = true;
        DBUG_RETURN(0);
      }
      else
      {
        DBUG_RETURN(error);
      }
    }
    else
    {
      DBUG_RETURN(cache_error);
    }
  }

  if ((error = parent::open(name, mode, open_flags)))
  {
    if (get_cache_inserts(current_thd) && !isSysCatTable)
      cache_handler->close();
    DBUG_RETURN(error);
  }

  DBUG_RETURN(0);
}

int ha_mcs_cache::close()
{
  int error, error2;

  DBUG_ENTER("ha_mcs_cache::close()");

  if (isCacheEnabled())
  {
    error = cache_handler->close();
    if ((error2 = parent::close()))
      error = error2;
    ha_mcs_cache_share* org_share = share;
    if (org_share)
      org_share->close();
  }
  else
  {
    error = parent::close();
  }

  DBUG_RETURN(error);
}

/*
   Handling locking of the tables. In case of INSERT we have to lock both
   the cache handler and main table. If not, we only lock the main table
*/

uint ha_mcs_cache::lock_count(void) const
{
  /*
    If we are doing an insert or if we want to flush the cache, we have to lock
    both the Aria table and normal table.
  */
  if (isCacheEnabled())
    return 2;
  else
    return 1;
}

/**
   Store locks for the Aria table and ColumnStore table
*/

THR_LOCK_DATA** ha_mcs_cache::store_lock(THD* thd, THR_LOCK_DATA** to, enum thr_lock_type lock_type)
{
  if (isCacheEnabled())
    to = cache_handler->store_lock(thd, to, TL_WRITE);
  return parent::store_lock(thd, to, lock_type);
}

/**
   Do external locking of the tables
*/

int ha_mcs_cache::external_lock(THD* thd, int lock_type)
{
  int error = 0;
  DBUG_ENTER("ha_mcs_cache::external_lock");

  if (isCacheEnabled())
  {
    /*
      Reset lock_counter. This is ok as external_lock() is guaranteed to be
      called before first get_status()
    */
    lock_counter = 0;

    if (lock_type == F_UNLCK)
    {
      int error2;
      error = 0;
      if (cache_locked)
      {
        error = cache_handler->external_lock(thd, lock_type);
        cache_locked = 0;
      }
      if ((error2 = parent::external_lock(thd, lock_type)))
        error = error2;
      DBUG_RETURN(error);
    }

    /* Lock first with write lock to be able to do insert or flush table */
    original_lock_type = lock_type;
    lock_type = F_WRLCK;
    if ((error = cache_handler->external_lock(thd, lock_type)))
      DBUG_RETURN(error);
    if ((error = parent::external_lock(thd, lock_type)))
    {
      error = cache_handler->external_lock(thd, F_UNLCK);
      DBUG_RETURN(error);
    }

    cache_locked = 1;
  }
  else
  {
    error = parent::external_lock(thd, lock_type);
  }

  DBUG_RETURN(error);
}

int ha_mcs_cache::delete_table(const char* name)
{
  int cache_error = 0, error;

  DBUG_ENTER("ha_mcs_cache::delete_table");

  if (isCacheEnabled())
  {
    char cache_name[FN_REFLEN + 8];
    create_cache_name(cache_name, name);
    cache_error = cache_handler->delete_table(cache_name);

    if (cache_error && (cache_error != ENOENT))
      DBUG_RETURN(cache_error);
    else if (cache_error == ENOENT)
      cache_error = 0;
  }

  if ((error = parent::delete_table(name)))
    cache_error = error;

  DBUG_RETURN(cache_error);
}

int ha_mcs_cache::rename_table(const char* from, const char* to)
{
  int error = 0;

  DBUG_ENTER("ha_mcs_cache::rename_table");

  if (isCacheEnabled())
  {
    char cache_from[FN_REFLEN + 8], cache_to[FN_REFLEN + 8];
    create_cache_name(cache_from, from);
    create_cache_name(cache_to, to);
    if (!(error = cache_handler->rename_table(cache_from, cache_to)))
    {
      if ((error = parent::rename_table(from, to)))
      {
        cache_handler->rename_table(cache_to, cache_from);
        DBUG_RETURN(error);
      }
    }
    else if (error == ENOENT)
    {
      if ((error = parent::rename_table(from, to)))
      {
        DBUG_RETURN(error);
      }
    }
  }
  else
  {
    error = parent::rename_table(from, to);
  }

  DBUG_RETURN(error);
}

int ha_mcs_cache::delete_all_rows(void)
{
  int error = 0, error2;

  DBUG_ENTER("ha_mcs_cache::delete_all_rows");

  if (isCacheEnabled())
  {
    error = cache_handler->delete_all_rows();
    share->cached_rows = 0;
  }
  if ((error2 = parent::delete_all_rows()))
    error = error2;
  DBUG_RETURN(error);
}

bool ha_mcs_cache::is_crashed() const
{
  if (isCacheEnabled())
    return (cache_handler->is_crashed() || parent::is_crashed());
  else
    return parent::is_crashed();
}

/**
   After a crash, repair will be run on next open.

   There are two cases when repair is run:
   1) Automatically on open if the table is crashed
   2) When the user explicitely runs repair

   In the case of 1) we don't want to run repair on both tables as
   the repair can be a slow process. Instead we only run repair
   on the crashed tables. If no tables are marked crashed, we
   run repair on both tables.

   Repair on the cache table will delete the part of the cache that was
   not committed.

   key_file_length and data_file_length are updated last for a statement.
   When these are updated, we threat the cache as committed
*/

int ha_mcs_cache::repair(THD* thd, HA_CHECK_OPT* check_opt)
{
  int error = 0, error2;
  int something_crashed = is_crashed();
  DBUG_ENTER("ha_mcs_cache::repair");

  if (isCacheEnabled())
  {
    if (cache_handler->is_crashed() || !something_crashed)
    {
      /* Delete everything that was not already committed */
      mysql_file_chsize(cache_handler->file->dfile.file, cache_handler->file->s->state.state.data_file_length,
                        0, MYF(MY_WME));
      mysql_file_chsize(cache_handler->file->s->kfile.file,
                        cache_handler->file->s->state.state.key_file_length, 0, MYF(MY_WME));
      check_opt->flags |= T_AUTO_REPAIR;
      error = cache_handler->repair(thd, check_opt);
      share->cached_rows = cache_handler->file->state->records;
    }
  }

  if (parent::is_crashed() || !something_crashed)
    if ((error2 = parent::repair(thd, check_opt)))
      error = error2;

  DBUG_RETURN(error);
}

/**
   Write to cache handler or main table
*/
int ha_mcs_cache::write_row(const uchar* buf)
{
  if (current_thd->slave_thread && !get_replication_slave(current_thd))
    return (0);

  if (isCacheEnabled() && insert_command)
  {
    DBUG_ASSERT(share->cached_rows == cache_handler->file->state->records);
    share->cached_rows++;
    return cache_handler->write_row(buf);
  }

  return parent::write_row(buf);
}

void ha_mcs_cache::start_bulk_insert(ha_rows rows, uint flags)
{
  if (current_thd->slave_thread && !get_replication_slave(current_thd))
    return;

  if (isCacheEnabled())
  {
    if (insert_command)
    {
      bzero(&cache_handler->copy_info, sizeof(cache_handler->copy_info));
      return cache_handler->start_bulk_insert(rows, flags);
    }
    else if (sql_command == SQLCOM_INSERT_SELECT)
    {
      return parent::start_bulk_insert_from_cache(rows, flags);
    }
    // sql_command == SQLCOM_END:
    // See the comment in get_status_and_flush_cache
    else
    {
      return parent::start_bulk_insert(rows, flags);
    }
  }
  else
  {
    return parent::start_bulk_insert(rows, flags);
  }
}

int ha_mcs_cache::end_bulk_insert()
{
  if (current_thd->slave_thread && !get_replication_slave(current_thd))
    return (0);

  if (isCacheEnabled() && insert_command)
    return cache_handler->end_bulk_insert();
  return parent::end_bulk_insert();
}

/******************************************************************************
 ha_mcs_cache Plugin code
******************************************************************************/

static handler* ha_mcs_cache_create_handler(handlerton* hton, TABLE_SHARE* table, MEM_ROOT* mem_root)
{
  return new (mem_root) ha_mcs_cache(hton, table, mem_root);
}

/******************************************************************************
 ha_mcs Plugin code
******************************************************************************/

static int columnstore_init_func(void* p)
{
  DBUG_ENTER("columnstore_init_func");

  struct tm tm;
  time_t t;
  time(&t);
  localtime_r(&t, &tm);
  fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d ", tm.tm_year % 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour,
          tm.tm_min, tm.tm_sec);

  fprintf(stderr, "Columnstore: Started; Version: %s-%s\n", columnstore_version.c_str(),
          columnstore_release.c_str());

  strncpy(cs_version, columnstore_version.c_str(), sizeof(cs_version) - 1);
  cs_version[sizeof(cs_version) - 1] = 0;

  strncpy(cs_commit_hash, columnstore_commit_hash.c_str(), sizeof(cs_commit_hash) - 1);
  cs_commit_hash[sizeof(cs_commit_hash) - 1] = 0;

  mcs_hton = (handlerton*)p;

  (void)pthread_mutex_init(&mcs_mutex, MY_MUTEX_INIT_FAST);
  (void)my_hash_init(PSI_NOT_INSTRUMENTED, &mcs_open_tables, system_charset_info, 32, 0, 0,
                     (my_hash_get_key)mcs_get_key, 0, 0);

  mcs_hton->create = ha_mcs_cache_create_handler;
  mcs_hton->panic = 0;
  mcs_hton->flags = HTON_CAN_RECREATE | HTON_NO_PARTITION;
  //  mcs_hton->discover_table = mcs_discover;
  //  mcs_hton->discover_table_existence = mcs_discover_existence;
  mcs_hton->commit = mcs_commit;
  mcs_hton->rollback = mcs_rollback;
  mcs_hton->close_connection = mcs_close_connection;
  mcs_hton->create_group_by = create_columnstore_group_by_handler;
  mcs_hton->create_derived = create_columnstore_derived_handler;
  mcs_hton->create_select = create_columnstore_select_handler;
  mcs_hton->create_unit = create_columnstore_unit_handler;
  mcs_hton->db_type = DB_TYPE_AUTOASSIGN;

#ifdef HAVE_PSI_INTERFACE
  uint count = sizeof(all_mutexes) / sizeof(all_mutexes[0]);
  mysql_mutex_register("ha_mcs_cache", all_mutexes, count);
#else
  (void)key_LOCK_cache_share;
#endif
  mysql_mutex_init(key_LOCK_cache_share, &LOCK_cache_share, MY_MUTEX_INIT_FAST);

  DBUG_RETURN(0);
}

static int columnstore_done_func(void* p)
{
  DBUG_ENTER("columnstore_done_func");

  config::Config::deleteInstanceMap();
  my_hash_free(&mcs_open_tables);
  pthread_mutex_destroy(&mcs_mutex);

  if (plugin_maria)
  {
    plugin_unlock(0, plugin_maria);
    plugin_maria = NULL;
  }

  mysql_mutex_destroy(&LOCK_cache_share);

  DBUG_RETURN(0);
}

struct st_mysql_storage_engine ha_mcs_cache_storage_engine = {MYSQL_HANDLERTON_INTERFACE_VERSION};

struct st_mysql_storage_engine columnstore_storage_engine = {MYSQL_HANDLERTON_INTERFACE_VERSION};

static struct st_mysql_information_schema is_columnstore_plugin_version = {
    MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION};

maria_declare_plugin(columnstore){
    MYSQL_STORAGE_ENGINE_PLUGIN, &columnstore_storage_engine,  "Columnstore",
    "MariaDB Corporation",       "ColumnStore storage engine", PLUGIN_LICENSE_GPL,
    columnstore_init_func,       columnstore_done_func,        MCSVERSIONHEX,
    mcs_status_variables,       /* status variables */
    mcs_system_variables,       /* system variables */
    PLUGIN_COLUMNSTORE_VERSION, /* string version */
    COLUMNSTORE_MATURITY        /* maturity */
},
    {MYSQL_INFORMATION_SCHEMA_PLUGIN, &is_columnstore_plugin_version, "COLUMNSTORE_COLUMNS",
     "MariaDB Corporation", "An information schema plugin to list ColumnStore columns", PLUGIN_LICENSE_GPL,
     is_columnstore_columns_plugin_init,
     // is_columnstore_tables_plugin_deinit,
     NULL, MCSVERSIONHEX, NULL, NULL, PLUGIN_COLUMNSTORE_VERSION, COLUMNSTORE_MATURITY},
    {MYSQL_INFORMATION_SCHEMA_PLUGIN, &is_columnstore_plugin_version, "COLUMNSTORE_TABLES",
     "MariaDB Corporation", "An information schema plugin to list ColumnStore tables", PLUGIN_LICENSE_GPL,
     is_columnstore_tables_plugin_init,
     // is_columnstore_tables_plugin_deinit,
     NULL, MCSVERSIONHEX, NULL, NULL, PLUGIN_COLUMNSTORE_VERSION, COLUMNSTORE_MATURITY},
    {MYSQL_INFORMATION_SCHEMA_PLUGIN, &is_columnstore_plugin_version, "COLUMNSTORE_FILES",
     "MariaDB Corporation", "An information schema plugin to list ColumnStore files", PLUGIN_LICENSE_GPL,
     is_columnstore_files_plugin_init,
     // is_columnstore_files_plugin_deinit,
     NULL, MCSVERSIONHEX, NULL, NULL, PLUGIN_COLUMNSTORE_VERSION, COLUMNSTORE_MATURITY},
    {MYSQL_INFORMATION_SCHEMA_PLUGIN, &is_columnstore_plugin_version, "COLUMNSTORE_EXTENTS",
     "MariaDB Corporation", "An information schema plugin to list ColumnStore extents", PLUGIN_LICENSE_GPL,
     is_columnstore_extents_plugin_init,
     // is_columnstore_extents_plugin_deinit,
     NULL, MCSVERSIONHEX, NULL, NULL, PLUGIN_COLUMNSTORE_VERSION,
     COLUMNSTORE_MATURITY} maria_declare_plugin_end;

/******************************************************************************
Implementation of write cache
******************************************************************************/

ha_rows ha_mcs_cache::num_rows_cached()
{
  return cache_handler->file->state->records;
}

/* Free write locks if this was not an insert */

void ha_mcs_cache::free_locks()
{
  /* Restart transaction for columnstore table */
  if (original_lock_type != F_WRLCK)
  {
    parent::external_lock(table->in_use, F_UNLCK);
    parent::external_lock(table->in_use, original_lock_type);
  }

  /* We don't need to lock cache_handler anymore as it's already flushed */
  cache_handler->external_lock(table->in_use, F_UNLCK);
  thr_unlock(&cache_handler->file->lock, 0);
  cache_locked = false;
}

/**
   Copy data from cache to ColumnStore

   Both tables are locked. The from table has also an exclusive lock to
   ensure that no one is inserting data to it while we are reading it.
*/

int ha_mcs_cache::flush_insert_cache()
{
  int error, error2;
  ha_maria* from = cache_handler;
  uchar* record = table->record[0];
  [[maybe_unused]] ulonglong copied_rows = 0;
  DBUG_ENTER("flush_insert_cache");

  DBUG_ASSERT(from->file->state->records == share->cached_rows);

  parent::start_bulk_insert_from_cache(from->file->state->records, 0);
  from->rnd_init(1);
  while (!(error = from->rnd_next(record)))
  {
    copied_rows++;
    if ((error = parent::write_row(record)))
      goto end;
    rows_changed++;
  }
  if (error == HA_ERR_END_OF_FILE)
    error = 0;
  DBUG_ASSERT(copied_rows == share->cached_rows);

end:
  from->rnd_end();
  if ((error2 = parent::end_bulk_insert()) && !error)
    error = error2;

  if (!error)
  {
    if (parent::ht->commit)
      error = parent::ht->commit(parent::ht, table->in_use, 1);
  }
  else
  {
    /* We can ignore the rollback error as we already have some other errors */
    if (parent::ht->rollback)
      parent::ht->rollback(parent::ht, table->in_use, 1);
  }

  DBUG_ASSERT(error == 0);
  if (!error)
  {
    /*
      Everything went fine, delete all rows from the cache and allow others
      to use it.
    */
    {
      /*
        We have to unlock the lock mutex as otherwise we get a conflict in
        mutex order. This is fine as we have a write lock on the mutex,
        so we will be able to get it again
      */
      mysql_mutex_unlock(&from->file->s->lock.mutex);
      from->delete_all_rows();
      share->cached_rows = 0;
      mysql_mutex_lock(&from->file->s->lock.mutex);
    }
  }
  DBUG_RETURN(error);
}