mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED
columns When the server crashes after a record stub has been inserted and before all its off-page columns have been written, the record will contain incomplete off-page columns after crash recovery. Such records may only be accessed at the READ UNCOMMITTED isolation level or when rolling back a recovered transaction in recv_recovery_rollback_active(). Skip these records at the READ UNCOMMITTED isolation level. TODO: Add assertions for checking the above assumptions hold when an incomplete BLOB is encountered. btr_rec_copy_externally_stored_field(): Return NULL if the field is incomplete. row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs. row_sel_store_mysql_rec(): Return FALSE if not all columns could be retrieved. Previously this function always returned TRUE. Assert that the record is not delete-marked. row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns could be retrieved. row_search_for_mysql(): Skip records containing incomplete off-page columns. Assert that the transaction isolation level is READ UNCOMMITTED. rb://380 approved by Jimmy Yang
This commit is contained in:
@@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field(
|
|||||||
|
|
||||||
/*******************************************************************//**
|
/*******************************************************************//**
|
||||||
Copies an externally stored field of a record to mem heap.
|
Copies an externally stored field of a record to mem heap.
|
||||||
@return the field copied to heap */
|
@return the field copied to heap, or NULL if the field is incomplete */
|
||||||
UNIV_INTERN
|
UNIV_INTERN
|
||||||
byte*
|
byte*
|
||||||
btr_rec_copy_externally_stored_field(
|
btr_rec_copy_externally_stored_field(
|
||||||
@@ -4844,6 +4844,28 @@ btr_rec_copy_externally_stored_field(
|
|||||||
|
|
||||||
data = rec_get_nth_field(rec, offsets, no, &local_len);
|
data = rec_get_nth_field(rec, offsets, no, &local_len);
|
||||||
|
|
||||||
|
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
|
||||||
|
|
||||||
|
if (UNIV_UNLIKELY
|
||||||
|
(!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
|
||||||
|
field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
|
||||||
|
/* The externally stored field was not written
|
||||||
|
yet. This is only a valid condition when the server
|
||||||
|
crashed after the time a record stub was freshly
|
||||||
|
inserted but before all its columns were written. This
|
||||||
|
record should only be seen by
|
||||||
|
recv_recovery_rollback_active() or any
|
||||||
|
TRX_ISO_READ_UNCOMMITTED transactions. */
|
||||||
|
|
||||||
|
/* TODO: assert that there is an owner_trx with
|
||||||
|
owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
|
||||||
|
|
||||||
|
/* TODO: assert that for the current transaction trx,
|
||||||
|
either (trx == owner_trx && trx_is_recv(trx)) or
|
||||||
|
trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
return(btr_copy_externally_stored_field(len, data,
|
return(btr_copy_externally_stored_field(len, data,
|
||||||
zip_size, local_len, heap));
|
zip_size, local_len, heap));
|
||||||
}
|
}
|
||||||
|
@@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix(
|
|||||||
ulint local_len);/*!< in: length of data, in bytes */
|
ulint local_len);/*!< in: length of data, in bytes */
|
||||||
/*******************************************************************//**
|
/*******************************************************************//**
|
||||||
Copies an externally stored field of a record to mem heap.
|
Copies an externally stored field of a record to mem heap.
|
||||||
@return the field copied to heap */
|
@return the field copied to heap, or NULL if the field is incomplete */
|
||||||
UNIV_INTERN
|
UNIV_INTERN
|
||||||
byte*
|
byte*
|
||||||
btr_rec_copy_externally_stored_field(
|
btr_rec_copy_externally_stored_field(
|
||||||
|
@@ -622,7 +622,11 @@ struct row_prebuilt_struct {
|
|||||||
the secondary index, then this is
|
the secondary index, then this is
|
||||||
set to TRUE */
|
set to TRUE */
|
||||||
unsigned templ_contains_blob:1;/*!< TRUE if the template contains
|
unsigned templ_contains_blob:1;/*!< TRUE if the template contains
|
||||||
BLOB column(s) */
|
a column with DATA_BLOB ==
|
||||||
|
get_innobase_type_from_mysql_type();
|
||||||
|
not to be confused with InnoDB
|
||||||
|
externally stored columns
|
||||||
|
(VARCHAR can be off-page too) */
|
||||||
mysql_row_templ_t* mysql_template;/*!< template used to transform
|
mysql_row_templ_t* mysql_template;/*!< template used to transform
|
||||||
rows fast between MySQL and Innobase
|
rows fast between MySQL and Innobase
|
||||||
formats; memory for this template
|
formats; memory for this template
|
||||||
|
@@ -1780,6 +1780,11 @@ row_merge_copy_blobs(
|
|||||||
(below). */
|
(below). */
|
||||||
data = btr_rec_copy_externally_stored_field(
|
data = btr_rec_copy_externally_stored_field(
|
||||||
mrec, offsets, zip_size, i, &len, heap);
|
mrec, offsets, zip_size, i, &len, heap);
|
||||||
|
/* Because we have locked the table, any records
|
||||||
|
written by incomplete transactions must have been
|
||||||
|
rolled back already. There must not be any incomplete
|
||||||
|
BLOB columns. */
|
||||||
|
ut_a(data);
|
||||||
|
|
||||||
dfield_set_data(field, data, len);
|
dfield_set_data(field, data, len);
|
||||||
}
|
}
|
||||||
|
@@ -416,7 +416,7 @@ row_sel_fetch_columns(
|
|||||||
field_no))) {
|
field_no))) {
|
||||||
|
|
||||||
/* Copy an externally stored field to the
|
/* Copy an externally stored field to the
|
||||||
temporary heap */
|
temporary heap, if possible. */
|
||||||
|
|
||||||
heap = mem_heap_create(1);
|
heap = mem_heap_create(1);
|
||||||
|
|
||||||
@@ -425,6 +425,21 @@ row_sel_fetch_columns(
|
|||||||
dict_table_zip_size(index->table),
|
dict_table_zip_size(index->table),
|
||||||
field_no, &len, heap);
|
field_no, &len, heap);
|
||||||
|
|
||||||
|
/* data == NULL means that the
|
||||||
|
externally stored field was not
|
||||||
|
written yet. This is only a valid
|
||||||
|
condition when the server crashed
|
||||||
|
after the time a record stub was
|
||||||
|
freshly inserted but before all its
|
||||||
|
columns were written. This record
|
||||||
|
should only be seen by
|
||||||
|
recv_recovery_rollback_active() or any
|
||||||
|
TRX_ISO_READ_UNCOMMITTED
|
||||||
|
transactions. The InnoDB SQL parser
|
||||||
|
(the sole caller of this function)
|
||||||
|
does not implement READ UNCOMMITTED,
|
||||||
|
and it is not involved during rollback. */
|
||||||
|
ut_a(data);
|
||||||
ut_a(len != UNIV_SQL_NULL);
|
ut_a(len != UNIV_SQL_NULL);
|
||||||
|
|
||||||
needs_copy = TRUE;
|
needs_copy = TRUE;
|
||||||
@@ -926,6 +941,7 @@ row_sel_get_clust_rec(
|
|||||||
when plan->clust_pcur was positioned. The latch will not be
|
when plan->clust_pcur was positioned. The latch will not be
|
||||||
released until mtr_commit(mtr). */
|
released until mtr_commit(mtr). */
|
||||||
|
|
||||||
|
ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
|
||||||
row_sel_fetch_columns(index, clust_rec, offsets,
|
row_sel_fetch_columns(index, clust_rec, offsets,
|
||||||
UT_LIST_GET_FIRST(plan->columns));
|
UT_LIST_GET_FIRST(plan->columns));
|
||||||
*out_rec = clust_rec;
|
*out_rec = clust_rec;
|
||||||
@@ -1628,6 +1644,13 @@ skip_lock:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (old_vers == NULL) {
|
if (old_vers == NULL) {
|
||||||
|
/* The record does not exist
|
||||||
|
in our read view. Skip it, but
|
||||||
|
first attempt to determine
|
||||||
|
whether the index segment we
|
||||||
|
are searching through has been
|
||||||
|
exhausted. */
|
||||||
|
|
||||||
offsets = rec_get_offsets(
|
offsets = rec_get_offsets(
|
||||||
rec, index, offsets,
|
rec, index, offsets,
|
||||||
ULINT_UNDEFINED, &heap);
|
ULINT_UNDEFINED, &heap);
|
||||||
@@ -2647,9 +2670,8 @@ Convert a row in the Innobase format to a row in the MySQL format.
|
|||||||
Note that the template in prebuilt may advise us to copy only a few
|
Note that the template in prebuilt may advise us to copy only a few
|
||||||
columns to mysql_rec, other columns are left blank. All columns may not
|
columns to mysql_rec, other columns are left blank. All columns may not
|
||||||
be needed in the query.
|
be needed in the query.
|
||||||
@return TRUE if success, FALSE if could not allocate memory for a BLOB
|
@return TRUE on success, FALSE if not all columns could be retrieved */
|
||||||
(though we may also assert in that case) */
|
static __attribute__((warn_unused_result))
|
||||||
static
|
|
||||||
ibool
|
ibool
|
||||||
row_sel_store_mysql_rec(
|
row_sel_store_mysql_rec(
|
||||||
/*====================*/
|
/*====================*/
|
||||||
@@ -2672,6 +2694,7 @@ row_sel_store_mysql_rec(
|
|||||||
ut_ad(prebuilt->mysql_template);
|
ut_ad(prebuilt->mysql_template);
|
||||||
ut_ad(prebuilt->default_rec);
|
ut_ad(prebuilt->default_rec);
|
||||||
ut_ad(rec_offs_validate(rec, NULL, offsets));
|
ut_ad(rec_offs_validate(rec, NULL, offsets));
|
||||||
|
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
|
||||||
|
|
||||||
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
|
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
|
||||||
mem_heap_free(prebuilt->blob_heap);
|
mem_heap_free(prebuilt->blob_heap);
|
||||||
@@ -2719,6 +2742,26 @@ row_sel_store_mysql_rec(
|
|||||||
dict_table_zip_size(prebuilt->table),
|
dict_table_zip_size(prebuilt->table),
|
||||||
templ->rec_field_no, &len, heap);
|
templ->rec_field_no, &len, heap);
|
||||||
|
|
||||||
|
if (UNIV_UNLIKELY(!data)) {
|
||||||
|
/* The externally stored field
|
||||||
|
was not written yet. This is
|
||||||
|
only a valid condition when
|
||||||
|
the server crashed after the
|
||||||
|
time a record stub was freshly
|
||||||
|
inserted but before all its
|
||||||
|
columns were written. This
|
||||||
|
record should only be seen by
|
||||||
|
recv_recovery_rollback_active()
|
||||||
|
or any TRX_ISO_READ_UNCOMMITTED
|
||||||
|
transactions. */
|
||||||
|
|
||||||
|
if (extern_field_heap) {
|
||||||
|
mem_heap_free(extern_field_heap);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
ut_a(len != UNIV_SQL_NULL);
|
ut_a(len != UNIV_SQL_NULL);
|
||||||
} else {
|
} else {
|
||||||
/* Field is stored in the row. */
|
/* Field is stored in the row. */
|
||||||
@@ -3136,9 +3179,10 @@ row_sel_pop_cached_row_for_mysql(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/********************************************************************//**
|
/********************************************************************//**
|
||||||
Pushes a row for MySQL to the fetch cache. */
|
Pushes a row for MySQL to the fetch cache.
|
||||||
UNIV_INLINE
|
@return TRUE on success, FALSE if the record contains incomplete BLOBs */
|
||||||
void
|
UNIV_INLINE __attribute__((warn_unused_result))
|
||||||
|
ibool
|
||||||
row_sel_push_cache_row_for_mysql(
|
row_sel_push_cache_row_for_mysql(
|
||||||
/*=============================*/
|
/*=============================*/
|
||||||
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
|
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
|
||||||
@@ -3180,10 +3224,11 @@ row_sel_push_cache_row_for_mysql(
|
|||||||
prebuilt->fetch_cache[
|
prebuilt->fetch_cache[
|
||||||
prebuilt->n_fetch_cached],
|
prebuilt->n_fetch_cached],
|
||||||
prebuilt, rec, offsets))) {
|
prebuilt, rec, offsets))) {
|
||||||
ut_error;
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
prebuilt->n_fetch_cached++;
|
prebuilt->n_fetch_cached++;
|
||||||
|
return(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*********************************************************************//**
|
/*********************************************************************//**
|
||||||
@@ -3578,6 +3623,24 @@ row_search_for_mysql(
|
|||||||
|
|
||||||
if (!row_sel_store_mysql_rec(buf, prebuilt,
|
if (!row_sel_store_mysql_rec(buf, prebuilt,
|
||||||
rec, offsets)) {
|
rec, offsets)) {
|
||||||
|
/* Only fresh inserts at
|
||||||
|
server crash time may contain
|
||||||
|
incomplete externally stored
|
||||||
|
columns. Pretend that such
|
||||||
|
records do not exist. Such
|
||||||
|
records may only be accessed
|
||||||
|
at the READ UNCOMMITTED
|
||||||
|
isolation level or when
|
||||||
|
rolling back a recovered
|
||||||
|
transaction. Rollback happens
|
||||||
|
at a lower level, not here. */
|
||||||
|
ut_a(trx->isolation_level
|
||||||
|
== TRX_ISO_READ_UNCOMMITTED);
|
||||||
|
/* TODO: assert that there is
|
||||||
|
an owner_trx with
|
||||||
|
owner_trx->id == DB_TRX_ID and
|
||||||
|
owner_trx->is_recovered */
|
||||||
|
|
||||||
err = DB_TOO_BIG_RECORD;
|
err = DB_TOO_BIG_RECORD;
|
||||||
|
|
||||||
/* We let the main loop to do the
|
/* We let the main loop to do the
|
||||||
@@ -4357,9 +4420,20 @@ requires_clust_rec:
|
|||||||
not cache rows because there the cursor is a scrollable
|
not cache rows because there the cursor is a scrollable
|
||||||
cursor. */
|
cursor. */
|
||||||
|
|
||||||
row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
|
if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
|
||||||
offsets);
|
offsets)) {
|
||||||
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
|
/* Only fresh inserts at server crash time may contain
|
||||||
|
incomplete externally stored columns. Pretend that
|
||||||
|
such records do not exist. Such records may only be
|
||||||
|
accessed at the READ UNCOMMITTED isolation level or
|
||||||
|
when rolling back a recovered transaction. Rollback
|
||||||
|
happens at a lower level, not here. */
|
||||||
|
ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
|
||||||
|
/* TODO: assert that there is an owner_trx
|
||||||
|
with owner_trx->id == DB_TRX_ID and
|
||||||
|
owner_trx->is_recovered */
|
||||||
|
} else if (prebuilt->n_fetch_cached
|
||||||
|
== MYSQL_FETCH_CACHE_SIZE) {
|
||||||
|
|
||||||
goto got_row;
|
goto got_row;
|
||||||
}
|
}
|
||||||
@@ -4375,9 +4449,20 @@ requires_clust_rec:
|
|||||||
} else {
|
} else {
|
||||||
if (!row_sel_store_mysql_rec(buf, prebuilt,
|
if (!row_sel_store_mysql_rec(buf, prebuilt,
|
||||||
result_rec, offsets)) {
|
result_rec, offsets)) {
|
||||||
err = DB_TOO_BIG_RECORD;
|
/* Only fresh inserts at server crash
|
||||||
|
time may contain incomplete externally
|
||||||
goto lock_wait_or_error;
|
stored columns. Pretend that such
|
||||||
|
records do not exist. Such records may
|
||||||
|
only be accessed at the READ UNCOMMITTED
|
||||||
|
isolation level or when rolling back a
|
||||||
|
recovered transaction. Rollback happens
|
||||||
|
at a lower level, not here. */
|
||||||
|
ut_a(trx->isolation_level
|
||||||
|
== TRX_ISO_READ_UNCOMMITTED);
|
||||||
|
/* TODO: assert that there is an owner_trx
|
||||||
|
with owner_trx->id == DB_TRX_ID and
|
||||||
|
owner_trx->is_recovered */
|
||||||
|
goto next_rec;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user