1
0
mirror of https://github.com/MariaDB/server.git synced 2025-12-03 05:41:09 +03:00
Files
mariadb/mysql-test/suite/versioning/t/trx_id.test
Marko Mäkelä 3cef4f8f0f MDEV-515 Reduce InnoDB undo logging for insert into empty table
We implement an idea that was suggested by Michael 'Monty' Widenius
in October 2017: When InnoDB is inserting into an empty table or partition,
we can write a single undo log record TRX_UNDO_EMPTY, which will cause
ROLLBACK to clear the table.

For this to work, the insert into an empty table or partition must be
covered by an exclusive table lock that will be held until the transaction
has been committed or rolled back, or the INSERT operation has been
rolled back (and the table is empty again), in lock_table_x_unlock().

Clustered index records that are covered by the TRX_UNDO_EMPTY record
will carry DB_TRX_ID=0 and DB_ROLL_PTR=1<<55, and thus they cannot
be distinguished from what MDEV-12288 leaves behind after purging the
history of row-logged operations.

Concurrent non-locking reads must be adjusted: If the read view was
created before the INSERT into an empty table, then we must continue
to imagine that the table is empty, and not try to read any records.
If the read view was created after the INSERT was committed, then
all records must be visible normally. To implement this, we introduce
the field dict_table_t::bulk_trx_id.

This special handling only applies to the very first INSERT statement
of a transaction for the empty table or partition. If a subsequent
statement in the transaction is modifying the initially empty table again,
we must enable row-level undo logging, so that we will be able to
roll back to the start of the statement in case of an error (such as
duplicate key).

INSERT IGNORE will continue to use row-level logging and locking, because
implementing it would require the ability to roll back the latest row.
Since the undo log that we write only allows us to roll back the entire
statement, we cannot support INSERT IGNORE. We will introduce a
handler::extra() parameter HA_EXTRA_IGNORE_INSERT to indicate to storage
engines that INSERT IGNORE is being executed.

In many test cases, we add an extra record to the table, so that during
the 'interesting' part of the test, row-level locking and logging will
be used.

Replicas will continue to use row-level logging and locking until
MDEV-24622 has been addressed. Likewise, this optimization will be
disabled in Galera cluster until MDEV-24623 enables it.

dict_table_t::bulk_trx_id: The latest active or committed transaction
that initiated an insert into an empty table or partition.
Protected by exclusive table lock and a clustered index leaf page latch.

ins_node_t::bulk_insert: Whether bulk insert was initiated.

trx_t::mod_tables: Use C++11 style accessors (emplace instead of insert).
Unlike earlier, this collection will cover also temporary tables.

trx_mod_table_time_t: Add start_bulk_insert(), end_bulk_insert(),
is_bulk_insert(), was_bulk_insert().

trx_undo_report_row_operation(): Before accessing any undo log pages,
invoke trx->mod_tables.emplace() in order to determine whether undo
logging was disabled, or whether this is the first INSERT and we are
supposed to write a TRX_UNDO_EMPTY record.

row_ins_clust_index_entry_low(): If we are inserting into an empty
clustered index leaf page, set the ins_node_t::bulk_insert flag for
the subsequent trx_undo_report_row_operation() call.

lock_rec_insert_check_and_lock(), lock_prdt_insert_check_and_lock():
Remove the redundant parameter 'flags' that can be checked in the caller.

btr_cur_ins_lock_and_undo(): Simplify the logic. Correctly write
DB_TRX_ID,DB_ROLL_PTR after invoking trx_undo_report_row_operation().

trx_mark_sql_stat_end(), ha_innobase::extra(HA_EXTRA_IGNORE_INSERT),
ha_innobase::external_lock(): Invoke trx_t::end_bulk_insert() so that
the next statement will not be covered by table-level undo logging.

ReadView::changes_visible(trx_id_t) const: New accessor for the case
where the trx_id_t is not read from a potentially corrupted index page
but directly from the memory. In this case, we can skip a sanity check.

row_sel(), row_sel_try_search_shortcut(), row_search_mvcc():
row_sel_try_search_shortcut_for_mysql(),
row_merge_read_clustered_index(): Check dict_table_t::bulk_trx_id.

row_sel_clust_sees(): Replaces lock_clust_rec_cons_read_sees().

lock_sec_rec_cons_read_sees(): Replaced with lower-level code.

btr_root_page_init(): Refactored from btr_create().

dict_index_t::clear(), dict_table_t::clear(): Empty an index or table,
for the ROLLBACK of an INSERT operation.

ROW_T_EMPTY, ROW_OP_EMPTY: Note a concurrent ROLLBACK of an INSERT
into an empty table.

This is joint work with Thirunarayanan Balathandayuthapani,
who created a working prototype.
Thanks to Matthias Leich for extensive testing.
2021-01-25 18:41:27 +02:00

505 lines
15 KiB
Plaintext

if (!$TEST_VERSIONING_SO)
{
--skip needs test_versioning plugin
}
--source include/have_innodb.inc
--source include/default_charset.inc
set default_storage_engine= innodb;
create or replace table t1 (
x int,
sys_trx_start bigint(20) unsigned as row start invisible,
sys_trx_end bigint(20) unsigned as row end invisible,
period for system_time (sys_trx_start, sys_trx_end)
) with system versioning;
--echo # No history inside the transaction
start transaction;
insert into t1 (x) values (1);
update t1 set x= x + 1;
update t1 set x= x + 1;
commit;
select *, sys_trx_start > 1, sys_trx_end from t1 for system_time all;
--echo # ALTER ADD SYSTEM VERSIONING should write to mysql.transaction_registry
set @@system_versioning_alter_history=keep;
create or replace table t1 (x int);
insert into t1 values (1);
alter table t1
add column s bigint unsigned as row start,
add column e bigint unsigned as row end,
add period for system_time(s, e),
add system versioning,
algorithm=inplace;
select s from t1 into @trx_start;
select count(*) = 1 from mysql.transaction_registry where transaction_id = @trx_start;
create or replace table t1 (x int);
select count(*) from mysql.transaction_registry into @tmp;
alter table t1
add column s bigint unsigned as row start,
add column e bigint unsigned as row end,
add period for system_time(s, e),
add system versioning,
algorithm=inplace;
select count(*) = @tmp from mysql.transaction_registry;
create or replace table t1 (x int);
insert into t1 values (1);
alter table t1
add column s bigint unsigned as row start,
add column e bigint unsigned as row end,
add period for system_time(s, e),
add system versioning,
algorithm=copy;
select s from t1 into @trx_start;
select count(*) = 1 from mysql.transaction_registry where transaction_id = @trx_start;
create or replace table t1 (x int);
select count(*) from mysql.transaction_registry into @tmp;
alter table t1
add column s bigint unsigned as row start,
add column e bigint unsigned as row end,
add period for system_time(s, e),
add system versioning,
algorithm=copy;
# With MDEV-14511 the transaction will be registered even for empty tables.
select count(*) = @tmp + 1 from mysql.transaction_registry;
--echo # TRX_ID to TIMESTAMP versioning switch
create or replace table t1 (
x int,
sys_start bigint unsigned as row start invisible,
sys_end bigint unsigned as row end invisible,
period for system_time (sys_start, sys_end)
) with system versioning;
insert into t1 values (1);
alter table t1 drop column sys_start, drop column sys_end;
select row_end = 18446744073709551615 as transaction_based from t1 for system_time all;
--echo # Simple vs SEES algorithms
create or replace table t1 (
x int,
sys_start bigint(20) unsigned as row start invisible,
sys_end bigint(20) unsigned as row end invisible,
period for system_time (sys_start, sys_end)
) with system versioning;
# MDEV-515 takes X-lock on the table for the first insert
# So concurrent DML won't happen on the table
INSERT INTO t1 VALUES(100);
set transaction isolation level read committed;
start transaction;
insert into t1 values (1);
--connect (con1,localhost,root,,test)
set transaction isolation level read committed;
start transaction;
insert into t1 values (2);
--connect (con2,localhost,root,,test)
set transaction isolation level read committed;
start transaction;
insert into t1 values (3);
commit;
--disconnect con2
--connection default
--sleep 0.01
set @ts1= sysdate(6);
--connection con1
commit;
--disconnect con1
--connection default
--sleep 0.01
set @ts2= sysdate(6);
commit;
--sleep 0.01
set @ts3= sysdate(6);
select sys_start from t1 where x = 1 into @trx_id1;
select sys_start from t1 where x = 2 into @trx_id2;
select sys_start from t1 where x = 3 into @trx_id3;
select @trx_id1 < @trx_id2, @trx_id2 < @trx_id3;
select @ts1 < @ts2, @ts2 < @ts3;
--echo # MVCC is resolved
select * from t1 for system_time as of transaction @trx_id1;
select * from t1 for system_time as of timestamp @ts1;
select * from t1 for system_time as of transaction @trx_id2;
select * from t1 for system_time as of timestamp @ts2;
select * from t1 for system_time as of transaction @trx_id3;
select * from t1 for system_time as of timestamp @ts3;
--echo #
--echo # MDEV-15427 IB: TRX_ID based operations inside transaction generate history
--echo #
create or replace table t1(
x int(10),
row_start bigint(20) unsigned as row start,
row_end bigint(20) unsigned as row end,
period for system_time(row_start, row_end)
) with system versioning;
begin;
insert into t1 (x) values (1);
delete from t1;
commit;
select x from t1 for system_time all;
insert into t1 (x) values (2);
begin;
update t1 set x= 3;
update t1 set x= 4;
commit;
select x, row_start < row_end from t1 for system_time all;
--echo # MDEV-16010 Too many rows with AS OF point_in_the_past_or_NULL
create or replace table t1 (
x int,
row_start bigint unsigned as row start invisible,
row_end bigint unsigned as row end invisible,
period for system_time (row_start, row_end)
) with system versioning engine innodb;
insert into t1 (x) values (1);
delete from t1;
select * from t1 for system_time as of timestamp'1990-1-1 00:00';
select * from t1 for system_time as of NULL;
--echo # MDEV-16024 transaction_registry.begin_timestamp is wrong for explicit transactions
create or replace table t1 (
x int(11) default null,
row_start bigint(20) unsigned generated always as row start invisible,
row_end bigint(20) unsigned generated always as row end invisible,
period for system_time (row_start, row_end)
) engine=innodb with system versioning;
begin;
set @ts1= now(6);
--sleep 0.01
insert into t1 values (1);
commit;
select row_start from t1 into @trx_id;
select trt_begin_ts(@trx_id) <= @ts1 as BEGIN_TS_GOOD;
drop table t1;
--echo #
--echo # MDEV-16100 FOR SYSTEM_TIME erroneously resolves string user variables as transaction IDs
--echo #
CREATE TABLE t1 (
x INT,
sys_trx_start BIGINT UNSIGNED AS ROW START,
sys_trx_end BIGINT UNSIGNED AS ROW END,
PERIOD FOR SYSTEM_TIME (sys_trx_start, sys_trx_end)
) WITH SYSTEM VERSIONING ENGINE=INNODB;
INSERT INTO t1 (x) VALUES (1);
SET @ts= DATE_ADD(NOW(), INTERVAL 1 YEAR);
EXPLAIN EXTENDED SELECT x FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION @ts;
EXPLAIN EXTENDED SELECT x FROM t1 FOR SYSTEM_TIME AS OF TIMESTAMP @ts;
EXPLAIN EXTENDED SELECT x FROM t1 FOR SYSTEM_TIME AS OF @ts;
DROP TABLE t1;
--echo #
--echo # Testing AS OF with expressions of various kinds and data types
--echo #
CREATE TABLE t1
(
x INT,
sys_trx_start BIGINT UNSIGNED AS ROW START INVISIBLE,
sys_trx_end BIGINT UNSIGNED AS ROW END INVISIBLE,
PERIOD FOR SYSTEM_TIME (sys_trx_start, sys_trx_end)
) WITH SYSTEM VERSIONING;
INSERT INTO t1 VALUES (1);
CREATE TABLE t2
(
x INT,
sys_trx_start TIMESTAMP(6) AS ROW START INVISIBLE,
sys_trx_end TIMESTAMP(6) AS ROW END INVISIBLE,
PERIOD FOR SYSTEM_TIME (sys_trx_start, sys_trx_end)
) WITH SYSTEM VERSIONING;
INSERT INTO t2 VALUES (1);
--echo #
--echo # ROW is not supported
--echo #
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t1 FOR SYSTEM_TIME AS OF (1,1);
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t2 FOR SYSTEM_TIME AS OF (1,1);
--echo #
--echo # DOUBLE is not supported, use explicit CAST
--echo #
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION RAND();
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION (RAND());
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION COALESCE(RAND());
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION RAND();
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION (RAND());
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION COALESCE(RAND());
--echo #
--echo # DECIMAL is not supported, use explicit CAST
--echo #
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION 10.1;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION COALESCE(10.1);
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION 10.1;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION COALESCE(10.1);
--echo #
--echo # YEAR is not supported, use explicit CAST
--echo #
DELIMITER $$;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
BEGIN NOT ATOMIC
DECLARE var YEAR;
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
DELIMITER $$;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
BEGIN NOT ATOMIC
DECLARE var YEAR;
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
--echo #
--echo # ENUM is not supported, use explicit CAST
--echo #
DELIMITER $$;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
BEGIN NOT ATOMIC
DECLARE var ENUM('xxx') DEFAULT 'xxx';
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
DELIMITER $$;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
BEGIN NOT ATOMIC
DECLARE var ENUM('xxx') DEFAULT 'xxx';
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
--echo #
--echo # SET is not supported, use explicit CAST
--echo #
DELIMITER $$;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
BEGIN NOT ATOMIC
DECLARE var SET('xxx') DEFAULT 'xxx';
SELECT * FROM t1 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
DELIMITER $$;
--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION
BEGIN NOT ATOMIC
DECLARE var SET('xxx') DEFAULT 'xxx';
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
DELIMITER $$;
--error ER_VERS_ENGINE_UNSUPPORTED
BEGIN NOT ATOMIC
DECLARE var BIT(10);
SELECT * FROM t2 FOR SYSTEM_TIME AS OF TRANSACTION var;
END;
$$
DELIMITER ;$$
--echo #
--echo # String literals resolve to TIMESTAMP
--echo #
SELECT * FROM t1 FOR SYSTEM_TIME AS OF '2038-12-30 00:00:00';
SELECT * FROM t2 FOR SYSTEM_TIME AS OF '2038-12-30 00:00:00';
DROP TABLE t1, t2;
--echo #
--echo # MDEV-16094 Crash when using AS OF with a stored function
--echo #
CREATE FUNCTION fts() RETURNS DATETIME RETURN '2001-01-01 10:20:30';
CREATE FUNCTION ftx() RETURNS BIGINT UNSIGNED RETURN 1;
CREATE TABLE ttx
(
x INT,
start_timestamp BIGINT UNSIGNED GENERATED ALWAYS AS ROW START,
end_timestamp BIGINT UNSIGNED GENERATED ALWAYS AS ROW END,
PERIOD FOR SYSTEM_TIME(start_timestamp, end_timestamp)
) ENGINE=InnoDB WITH SYSTEM VERSIONING;
CREATE TABLE tts
(
x INT,
start_timestamp TIMESTAMP(6) GENERATED ALWAYS AS ROW START,
end_timestamp TIMESTAMP(6) GENERATED ALWAYS AS ROW END,
PERIOD FOR SYSTEM_TIME(start_timestamp, end_timestamp)
) ENGINE=InnoDB WITH SYSTEM VERSIONING;
SELECT * FROM tts FOR SYSTEM_TIME AS OF fts();
--error ER_VERS_ENGINE_UNSUPPORTED
SELECT * FROM tts FOR SYSTEM_TIME AS OF TRANSACTION ftx();
SELECT * FROM ttx FOR SYSTEM_TIME AS OF fts();
SELECT * FROM ttx FOR SYSTEM_TIME AS OF TRANSACTION ftx();
DROP TABLE tts;
DROP TABLE ttx;
DROP FUNCTION fts;
DROP FUNCTION ftx;
--echo #
--echo # MDEV-16330 Allow instant change of WITH SYSTEM VERSIONING column attribute
--echo #
SET @@SYSTEM_VERSIONING_ALTER_HISTORY=KEEP;
CREATE TABLE t (
a INT,
b INT,
row_start BIGINT UNSIGNED AS ROW START INVISIBLE,
row_end BIGINT UNSIGNED AS ROW END INVISIBLE,
PERIOD FOR SYSTEM_TIME(row_start, row_end)
) WITH SYSTEM VERSIONING ENGINE=INNODB;
INSERT INTO t VALUES (1,1);
--echo # without table rebuild
SELECT c.prtype FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS AS c
INNER JOIN INFORMATION_SCHEMA.INNODB_SYS_TABLES AS t
ON c.table_id=t.table_id
WHERE t.name='test/t' AND c.name='a';
--enable_info
ALTER TABLE t
CHANGE a a INT WITHOUT SYSTEM VERSIONING;
--disable_info
SELECT c.prtype FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS AS c
INNER JOIN INFORMATION_SCHEMA.INNODB_SYS_TABLES AS t
ON c.table_id=t.table_id
WHERE t.name='test/t' AND c.name='a';
UPDATE t SET a=11;
SELECT COUNT(*) FROM t FOR SYSTEM_TIME ALL;
--echo # with table rebuild
SELECT c.prtype FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS AS c
INNER JOIN INFORMATION_SCHEMA.INNODB_SYS_TABLES AS t
ON c.table_id=t.table_id
WHERE t.name='test/t' AND c.name='a';
--enable_info
ALTER TABLE t
CHANGE a a INT WITH SYSTEM VERSIONING,
ADD PRIMARY KEY pk(a);
--disable_info
SELECT c.prtype FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS AS c
INNER JOIN INFORMATION_SCHEMA.INNODB_SYS_TABLES AS t
ON c.table_id=t.table_id
WHERE t.name='test/t' AND c.name='a';
UPDATE t SET a=1;
SELECT COUNT(*) FROM t FOR SYSTEM_TIME ALL;
SHOW CREATE TABLE t;
-- echo # handles VIRTUAL columns too
CREATE OR REPLACE TABLE t (
a INT AS (b + 1),
b INT,
row_start BIGINT UNSIGNED AS ROW START INVISIBLE,
row_end BIGINT UNSIGNED AS ROW END INVISIBLE,
PERIOD FOR SYSTEM_TIME(row_start, row_end)
) WITH SYSTEM VERSIONING ENGINE=INNODB;
INSERT INTO t VALUES (DEFAULT, 1);
SELECT c.prtype FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS AS c
INNER JOIN INFORMATION_SCHEMA.INNODB_SYS_TABLES AS t
ON c.table_id=t.table_id
WHERE t.name='test/t' AND c.name='b';
--enable_info
ALTER TABLE t
CHANGE b b INT WITHOUT SYSTEM VERSIONING;
--disable_info
SELECT c.prtype FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS AS c
INNER JOIN INFORMATION_SCHEMA.INNODB_SYS_TABLES AS t
ON c.table_id=t.table_id
WHERE t.name='test/t' AND c.name='b';
UPDATE t SET b=11;
SELECT COUNT(*) FROM t FOR SYSTEM_TIME ALL;
DROP TABLE t;
SET @@SYSTEM_VERSIONING_ALTER_HISTORY=ERROR;
SELECT count(*) from mysql.transaction_registry where begin_timestamp>=commit_timestamp;
--echo # MDEV-18875 Assertion `thd->transaction.stmt.ha_list == __null ||
--echo # trans == &thd->transaction.stmt' failed or bogus ER_DUP_ENTRY upon
--echo # ALTER TABLE with versioning
create or replace table t (x int) engine=innodb;
set autocommit= 0;
alter table t
algorithm=copy,
add column row_start bigint unsigned as row start,
add column row_end bigint unsigned as row end,
add period for system_time(row_start,row_end),
with system versioning;
set autocommit= 1;
--echo # MDEV-18865 Assertion `t->first->versioned_by_id()'
--echo # failed in innodb_prepare_commit_versioned
create or replace table t (x int) engine=innodb;
insert into t values (0);
alter table t add `row_start` bigint unsigned as row start,
add `row_end` bigint unsigned as row end,
add period for system_time(`row_start`,`row_end`),
modify x int after row_start,
with system versioning;
create or replace database test;