1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

Many files:

Merge InnoDB-4.0.14: SAVEPOINT now implemented; InnoDB now accepts also column prefix keys; crashing bug in ON UPDATE CASCADE fixed; page checksum formula fixed
This commit is contained in:
heikki@hundin.mysql.fi
2003-06-15 01:04:28 +03:00
parent 8a52c2d20b
commit 4da7f485b7
75 changed files with 2300 additions and 788 deletions

View File

@ -255,6 +255,7 @@ enum ha_base_keytype {
#define HA_ERR_CANNOT_ADD_FOREIGN 150 /* Cannot add a foreign key constr. */
#define HA_ERR_NO_REFERENCED_ROW 151 /* Cannot add a child row */
#define HA_ERR_ROW_IS_REFERENCED 152 /* Cannot delete a parent row */
#define HA_ERR_NO_SAVEPOINT 153 /* No savepoint with that name */
/* Other constants */

View File

@ -1364,7 +1364,8 @@ btr_cur_update_sec_rec_in_place(
}
/*****************************************************************
Updates a record when the update causes no size changes in its fields. */
Updates a record when the update causes no size changes in its fields.
We assume here that the ordering fields of the record do not change. */
ulint
btr_cur_update_in_place(
@ -1455,7 +1456,8 @@ btr_cur_update_in_place(
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended. */
so that tree compression is recommended. We assume here that the ordering
fields of the record do not change. */
ulint
btr_cur_optimistic_update(
@ -1507,10 +1509,11 @@ btr_cur_optimistic_update(
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
if (!row_upd_changes_field_size(rec, index, update)) {
if (!row_upd_changes_field_size_or_external(rec, index, update)) {
/* The simplest and most common case: the update does not
change the size of any field */
/* The simplest and the most common case: the update does not
change the size of any field and none of the updated fields is
externally stored in rec or update */
return(btr_cur_update_in_place(flags, cursor, update,
cmpl_info, thr, mtr));
@ -1539,7 +1542,7 @@ btr_cur_optimistic_update(
new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
row_upd_clust_index_replace_new_col_vals(new_entry, update);
row_upd_index_replace_new_col_vals(new_entry, index, update, NULL);
old_rec_size = rec_get_size(rec);
new_rec_size = rec_get_converted_size(new_entry);
@ -1670,53 +1673,12 @@ btr_cur_pess_upd_restore_supremum(
rec);
}
/***************************************************************
Replaces and copies the data in the new column values stored in the
update vector to the clustered index entry given. */
static
void
btr_cur_copy_new_col_vals(
/*======================*/
dtuple_t* entry, /* in/out: index entry where replaced */
upd_t* update, /* in: update vector */
mem_heap_t* heap) /* in: heap where data is copied */
{
upd_field_t* upd_field;
dfield_t* dfield;
dfield_t* new_val;
ulint field_no;
byte* data;
ulint i;
dtuple_set_info_bits(entry, update->info_bits);
for (i = 0; i < upd_get_n_fields(update); i++) {
upd_field = upd_get_nth_field(update, i);
field_no = upd_field->field_no;
dfield = dtuple_get_nth_field(entry, field_no);
new_val = &(upd_field->new_val);
if (new_val->len == UNIV_SQL_NULL) {
data = NULL;
} else {
data = mem_heap_alloc(heap, new_val->len);
ut_memcpy(data, new_val->data, new_val->len);
}
dfield_set_data(dfield, data, new_val->len);
}
}
/*****************************************************************
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist. */
own x-latches to brothers of page, if those brothers exist. We assume
here that the ordering fields of the record do not change. */
ulint
btr_cur_pessimistic_update(
@ -1813,7 +1775,7 @@ btr_cur_pessimistic_update(
new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
btr_cur_copy_new_col_vals(new_entry, update, heap);
row_upd_index_replace_new_col_vals(new_entry, index, update, heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
@ -3369,8 +3331,8 @@ btr_free_externally_stored_field(
page_no = mach_read_from_4(data + local_len
+ BTR_EXTERN_PAGE_NO);
offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
offset = mach_read_from_4(data + local_len
+ BTR_EXTERN_OFFSET);
extern_len = mach_read_from_4(data + local_len
+ BTR_EXTERN_LEN + 4);

View File

@ -364,6 +364,8 @@ btr_pcur_move_to_next_page(
btr_leaf_page_release(page, cursor->latch_mode, mtr);
page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
page_check_dir(next_page);
}
/*************************************************************

View File

@ -209,12 +209,12 @@ ibool buf_debug_prints = FALSE; /* If this is set TRUE,
/************************************************************************
Calculates a page checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value
on 32-bit and 64-bit architectures. */
to a file. Note that we must be careful to calculate the same value on
32-bit and 64-bit architectures. */
ulint
buf_calc_page_checksum(
/*===================*/
buf_calc_page_new_checksum(
/*=======================*/
/* out: checksum */
byte* page) /* in: buffer page */
{
@ -222,12 +222,39 @@ buf_calc_page_checksum(
/* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO
are written outside the buffer pool to the first pages of data
files, we have to skip them in page checksum calculation */
files, we have to skip them in the page checksum calculation.
We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
checksum is stored, and also the last 8 bytes of page because
there we store the old formula checksum. */
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
+ ut_fold_binary(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN_OLD_CHKSUM);
checksum = checksum & 0xFFFFFFFF;
return(checksum);
}
/************************************************************************
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
looked at the first few bytes of the page. This calculates that old
checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
because this takes that field as an input! */
ulint
buf_calc_page_old_checksum(
/*=======================*/
/* out: checksum */
byte* page) /* in: buffer page */
{
ulint checksum;
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
+ ut_fold_binary(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN);
checksum = checksum & 0xFFFFFFFF;
return(checksum);
@ -243,27 +270,47 @@ buf_page_is_corrupted(
byte* read_buf) /* in: a database page */
{
ulint checksum;
ulint old_checksum;
ulint checksum_field;
ulint old_checksum_field;
checksum = buf_calc_page_checksum(read_buf);
if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
/* Note that InnoDB initializes empty pages to zero, and
early versions of InnoDB did not store page checksum to
the 4 most significant bytes of the page lsn field at the
end of a page: */
/* Stored log sequence numbers at the start and the end
of page do not match */
if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN + 4))
|| (checksum != mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN)
&& mach_read_from_4(read_buf + FIL_PAGE_LSN)
!= mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN))) {
return(TRUE);
}
old_checksum = buf_calc_page_old_checksum(read_buf);
old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM);
/* There are 2 valid formulas for old_checksum_field:
1. Very old versions of InnoDB only stored 8 byte lsn to the start
and the end of the page.
2. Newer InnoDB versions store the old formula checksum there. */
if (old_checksum_field != mach_read_from_4(read_buf + FIL_PAGE_LSN)
&& old_checksum_field != old_checksum) {
return(TRUE);
}
checksum = buf_calc_page_new_checksum(read_buf);
checksum_field = mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
(always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
if (checksum_field != 0 && checksum_field != checksum) {
return(TRUE);
}
return(FALSE);
}
@ -277,6 +324,7 @@ buf_page_print(
{
dict_index_t* index;
ulint checksum;
ulint old_checksum;
char* buf;
buf = mem_alloc(4 * UNIV_PAGE_SIZE);
@ -291,19 +339,23 @@ buf_page_print(
mem_free(buf);
checksum = buf_calc_page_checksum(read_buf);
checksum = buf_calc_page_new_checksum(read_buf);
old_checksum = buf_calc_page_old_checksum(read_buf);
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Page checksum %lu stored checksum %lu\n",
checksum, mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN));
fprintf(stderr,
" InnoDB: Page checksum %lu, prior-to-4.0.14-form checksum %lu\n"
"InnoDB: stored checksum %lu, prior-to-4.0.14-form stored checksum %lu\n",
checksum, old_checksum,
mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM));
fprintf(stderr,
"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n",
mach_read_from_4(read_buf + FIL_PAGE_LSN),
mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN + 4));
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
== TRX_UNDO_INSERT) {
fprintf(stderr,

View File

@ -361,21 +361,29 @@ buf_flush_init_for_writing(
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
/* Write the newest modification lsn to the page */
UT_NOT_USED(space);
/* Write the newest modification lsn to the page header and trailer */
mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN, newest_lsn);
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
newest_lsn);
/* Write the page number */
/* Write to the page the space id and page number */
mach_write_to_4(page + FIL_PAGE_SPACE, space);
mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
/* We overwrite the first 4 bytes of the end lsn field to store
a page checksum */
/* Store the new formula checksum */
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
buf_calc_page_checksum(page));
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
buf_calc_page_new_checksum(page));
/* We overwrite the first 4 bytes of the end lsn field to store
the old formula checksum. Since it depends also on the field
FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
new formula checksum. */
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
buf_calc_page_old_checksum(page));
}
/************************************************************************

View File

@ -584,8 +584,7 @@ dtuple_convert_big_rec(
* sizeof(big_rec_field_t));
/* Decide which fields to shorten: the algorithm is to look for
the longest field which does not occur in the ordering part
of any index on the table */
the longest field whose type is DATA_BLOB */
n_fields = 0;
@ -610,12 +609,9 @@ dtuple_convert_big_rec(
}
}
/* Skip over fields which are ordering in some index */
if (!is_externally_stored &&
dict_field_get_col(
dict_index_get_nth_field(index, i))
->ord_part == 0) {
if (!is_externally_stored
&& dict_index_get_nth_type(index, i)->mtype
== DATA_BLOB) {
dfield = dtuple_get_nth_field(entry, i);
@ -629,9 +625,13 @@ dtuple_convert_big_rec(
}
}
if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
+ REC_1BYTE_OFFS_LIMIT) {
/* We do not store externally fields which are smaller than
DICT_MAX_COL_PREFIX_LEN */
ut_a(DICT_MAX_COL_PREFIX_LEN > REC_1BYTE_OFFS_LIMIT);
if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
+ DICT_MAX_COL_PREFIX_LEN) {
/* Cannot shorten more */
mem_heap_free(heap);
@ -644,13 +644,19 @@ dtuple_convert_big_rec(
drop below 128 which is the limit for the 2-byte
offset storage format in a physical record. This
we accomplish by storing 128 bytes of data in entry
itself, and only the remaining part to big rec vec. */
itself, and only the remaining part to big rec vec.
We store the first bytes locally to the record. Then
we can calculate all ordering fields in all indexes
from locally stored data. */
dfield = dtuple_get_nth_field(entry, longest_i);
vector->fields[n_fields].field_no = longest_i;
ut_a(dfield->len > DICT_MAX_COL_PREFIX_LEN);
vector->fields[n_fields].len = dfield->len
- REC_1BYTE_OFFS_LIMIT;
- DICT_MAX_COL_PREFIX_LEN;
vector->fields[n_fields].data = mem_heap_alloc(heap,
vector->fields[n_fields].len);

View File

@ -85,8 +85,6 @@ dtype_print(
printf("DATA_MIX_ID");
} else if (prtype == DATA_ENGLISH) {
printf("DATA_ENGLISH");
} else if (prtype == DATA_FINNISH) {
printf("DATA_FINNISH");
} else {
printf("prtype %lu", mtype);
}

View File

@ -276,7 +276,7 @@ dict_boot(void)
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 1);
dict_mem_index_add_field(index, (char *) "NAME", 0);
dict_mem_index_add_field(index, (char *) "NAME", 0, 0);
index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_TABLES,
MLOG_4BYTES, &mtr);
@ -287,7 +287,7 @@ dict_boot(void)
index = dict_mem_index_create((char *) "SYS_TABLES",
(char *) "ID_IND", DICT_HDR_SPACE,
DICT_UNIQUE, 1);
dict_mem_index_add_field(index, (char *) "ID", 0);
dict_mem_index_add_field(index, (char *) "ID", 0, 0);
index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS,
MLOG_4BYTES, &mtr);
@ -313,8 +313,8 @@ dict_boot(void)
(char *) "CLUST_IND", DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, (char *) "TABLE_ID", 0);
dict_mem_index_add_field(index, (char *) "POS", 0);
dict_mem_index_add_field(index, (char *) "TABLE_ID", 0, 0);
dict_mem_index_add_field(index, (char *) "POS", 0, 0);
index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS,
MLOG_4BYTES, &mtr);
@ -343,8 +343,8 @@ dict_boot(void)
(char *) "CLUST_IND", DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, (char *) "TABLE_ID", 0);
dict_mem_index_add_field(index, (char *) "ID", 0);
dict_mem_index_add_field(index, (char *) "TABLE_ID", 0, 0);
dict_mem_index_add_field(index, (char *) "ID", 0, 0);
index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES,
MLOG_4BYTES, &mtr);
@ -365,8 +365,8 @@ dict_boot(void)
(char *) "CLUST_IND", DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, (char *) "INDEX_ID", 0);
dict_mem_index_add_field(index, (char *) "POS", 0);
dict_mem_index_add_field(index, (char *) "INDEX_ID", 0, 0);
dict_mem_index_add_field(index, (char *) "POS", 0, 0);
index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS,
MLOG_4BYTES, &mtr);

View File

@ -337,7 +337,7 @@ dict_create_index_for_cluster_step(
for (i = 0; i < table->n_cols; i++) {
col = dict_table_get_nth_col(table, i);
dict_mem_index_add_field(index, col->name, 0);
dict_mem_index_add_field(index, col->name, 0, 0);
}
(node->cluster)->index = index;
@ -450,9 +450,17 @@ dict_create_sys_fields_tuple(
dict_field_t* field;
dfield_t* dfield;
byte* ptr;
ibool index_contains_column_prefix_field = FALSE;
ulint j;
ut_ad(index && heap);
for (j = 0; j < index->n_fields; j++) {
if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
index_contains_column_prefix_field = TRUE;
}
}
field = dict_index_get_nth_field(index, i);
sys_fields = dict_sys->sys_fields;
@ -466,11 +474,25 @@ dict_create_sys_fields_tuple(
mach_write_to_8(ptr, index->id);
dfield_set_data(dfield, ptr, 8);
/* 1: POS ----------------------------*/
/* 1: POS + PREFIX LENGTH ----------------------------*/
dfield = dtuple_get_nth_field(entry, 1);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, i);
if (index_contains_column_prefix_field) {
/* If there are column prefix fields in the index, then
we store the number of the field to the 2 HIGH bytes
and the prefix length to the 2 low bytes, */
mach_write_to_4(ptr, (i << 16) + field->prefix_len);
} else {
/* Else we store the number of the field to the 2 LOW bytes.
This is to keep the storage format compatible with
InnoDB versions < 4.0.14. */
mach_write_to_4(ptr, i);
}
dfield_set_data(dfield, ptr, 4);
/* 4: COL_NAME -------------------------*/

View File

@ -88,15 +88,6 @@ dict_index_remove_from_cache(
dict_table_t* table, /* in: table */
dict_index_t* index); /* in, own: index */
/***********************************************************************
Adds a column to index. */
UNIV_INLINE
void
dict_index_add_col(
/*===============*/
dict_index_t* index, /* in: index */
dict_col_t* col, /* in: column */
ulint order); /* in: order criterion */
/***********************************************************************
Copies fields contained in index2 to index1. */
static
void
@ -482,8 +473,9 @@ dict_index_get_nth_col_pos(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
col = dict_table_get_nth_col(index->table, n);
if (index->type & DICT_CLUSTERED) {
col = dict_table_get_nth_col(index->table, n);
return(col->clust_pos);
}
@ -492,9 +484,47 @@ dict_index_get_nth_col_pos(
for (pos = 0; pos < n_fields; pos++) {
field = dict_index_get_nth_field(index, pos);
col = field->col;
if (dict_col_get_no(col) == n) {
if (col == field->col && field->prefix_len == 0) {
return(pos);
}
}
return(ULINT_UNDEFINED);
}
/************************************************************************
Looks for a matching field in an index. The column and the prefix len have
to be the same. */
ulint
dict_index_get_nth_field_pos(
/*=========================*/
/* out: position in internal representation
of the index; if not contained, returns
ULINT_UNDEFINED */
dict_index_t* index, /* in: index from which to search */
dict_index_t* index2, /* in: index */
ulint n) /* in: field number in index2 */
{
dict_field_t* field;
dict_field_t* field2;
ulint n_fields;
ulint pos;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
field2 = dict_index_get_nth_field(index2, n);
n_fields = dict_index_get_n_fields(index);
for (pos = 0; pos < n_fields; pos++) {
field = dict_index_get_nth_field(index, pos);
if (field->col == field2->col
&& field->prefix_len == field2->prefix_len) {
return(pos);
}
@ -622,8 +652,7 @@ dict_table_get(
}
/**************************************************************************
Returns a table object and increments MySQL open handle count on the table.
*/
Returns a table object and increments MySQL open handle count on the table. */
dict_table_t*
dict_table_get_and_increment_handle_count(
@ -732,11 +761,12 @@ dict_table_add_to_cache(
}
/* Add table to hash table of tables */
HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table);
HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
table);
/* Add table to hash table of tables based on table id */
HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold,
table);
table);
/* Add table to LRU list of tables */
UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
@ -837,7 +867,8 @@ dict_table_rename_in_cache(
table->name = name_buf;
/* Add table to hash table of tables */
HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table);
HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
table);
dict_sys->size += (mem_heap_get_size(table->heap) - old_size);
@ -1128,7 +1159,6 @@ dict_index_add_to_cache(
ulint n_ord;
ibool success;
ulint i;
ulint j;
ut_ad(index);
ut_ad(mutex_own(&(dict_sys->mutex)));
@ -1159,28 +1189,6 @@ dict_index_add_to_cache(
return(FALSE);
}
/* Check that the same column does not appear twice in the index.
InnoDB assumes this in its algorithms, e.g., update of an index
entry */
for (i = 0; i < dict_index_get_n_fields(index); i++) {
for (j = 0; j < i; j++) {
if (dict_index_get_nth_field(index, j)->col
== dict_index_get_nth_field(index, i)->col) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: column %s appears twice in index %s of table %s\n"
"InnoDB: This is not allowed in InnoDB.\n"
"InnoDB: UPDATE can cause such an index to become corrupt in InnoDB.\n",
dict_index_get_nth_field(index, i)->col->name,
index->name, table->name);
}
}
}
/* Build the cache internal representation of the index,
containing also the added system fields */
@ -1223,8 +1231,8 @@ dict_index_add_to_cache(
cluster = dict_table_get_low(table->cluster_name);
tree = dict_index_get_tree(UT_LIST_GET_FIRST(cluster->indexes));
tree = dict_index_get_tree(
UT_LIST_GET_FIRST(cluster->indexes));
new_index->tree = tree;
new_index->page_no = tree->page;
} else {
@ -1352,13 +1360,14 @@ UNIV_INLINE
void
dict_index_add_col(
/*===============*/
dict_index_t* index, /* in: index */
dict_col_t* col, /* in: column */
ulint order) /* in: order criterion */
dict_index_t* index, /* in: index */
dict_col_t* col, /* in: column */
ulint order, /* in: order criterion */
ulint prefix_len) /* in: column prefix length */
{
dict_field_t* field;
dict_mem_index_add_field(index, col->name, order);
dict_mem_index_add_field(index, col->name, order, prefix_len);
field = dict_index_get_nth_field(index, index->n_def - 1);
@ -1384,7 +1393,8 @@ dict_index_copy(
for (i = start; i < end; i++) {
field = dict_index_get_nth_field(index2, i);
dict_index_add_col(index1, field->col, field->order);
dict_index_add_col(index1, field->col, field->order,
field->prefix_len);
}
}
@ -1487,7 +1497,7 @@ dict_index_build_internal_clust(
/* Add the mix id column */
dict_index_add_col(new_index,
dict_table_get_sys_col(table, DATA_MIX_ID), 0);
dict_table_get_sys_col(table, DATA_MIX_ID), 0, 0);
/* Copy the rest of fields */
dict_index_copy(new_index, index, table->mix_len,
@ -1525,14 +1535,15 @@ dict_index_build_internal_clust(
if (!(index->type & DICT_UNIQUE)) {
dict_index_add_col(new_index,
dict_table_get_sys_col(table, DATA_ROW_ID), 0);
dict_table_get_sys_col(table, DATA_ROW_ID), 0, 0);
trx_id_pos++;
}
dict_index_add_col(new_index,
dict_table_get_sys_col(table, DATA_TRX_ID), 0);
dict_table_get_sys_col(table, DATA_TRX_ID), 0, 0);
dict_index_add_col(new_index,
dict_table_get_sys_col(table, DATA_ROLL_PTR), 0);
dict_table_get_sys_col(table, DATA_ROLL_PTR), 0, 0);
for (i = 0; i < trx_id_pos; i++) {
@ -1561,7 +1572,14 @@ dict_index_build_internal_clust(
for (i = 0; i < new_index->n_def; i++) {
field = dict_index_get_nth_field(new_index, i);
(field->col)->aux = 0;
/* If there is only a prefix of the column in the index
field, do not mark the column as contained in the index */
if (field->prefix_len == 0) {
field->col->aux = 0;
}
}
/* Add to new_index non-system columns of table not yet included
@ -1572,7 +1590,7 @@ dict_index_build_internal_clust(
ut_ad(col->type.mtype != DATA_SYS);
if (col->aux == ULINT_UNDEFINED) {
dict_index_add_col(new_index, col, 0);
dict_index_add_col(new_index, col, 0, 0);
}
}
@ -1584,7 +1602,11 @@ dict_index_build_internal_clust(
for (i = 0; i < new_index->n_def; i++) {
field = dict_index_get_nth_field(new_index, i);
(field->col)->clust_pos = i;
if (field->prefix_len == 0) {
field->col->clust_pos = i;
}
}
new_index->cached = TRUE;
@ -1646,25 +1668,33 @@ dict_index_build_internal_non_clust(
for (i = 0; i < clust_index->n_uniq; i++) {
field = dict_index_get_nth_field(clust_index, i);
(field->col)->aux = ULINT_UNDEFINED;
field->col->aux = ULINT_UNDEFINED;
}
/* Mark with 0 table columns already contained in new_index */
for (i = 0; i < new_index->n_def; i++) {
field = dict_index_get_nth_field(new_index, i);
(field->col)->aux = 0;
/* If there is only a prefix of the column in the index
field, do not mark the column as contained in the index */
if (field->prefix_len == 0) {
field->col->aux = 0;
}
}
/* Add to new_index columns necessary to determine the clustered
/* Add to new_index the columns necessary to determine the clustered
index entry uniquely */
for (i = 0; i < clust_index->n_uniq; i++) {
field = dict_index_get_nth_field(clust_index, i);
if ((field->col)->aux == ULINT_UNDEFINED) {
dict_index_add_col(new_index, field->col, 0);
if (field->col->aux == ULINT_UNDEFINED) {
dict_index_add_col(new_index, field->col, 0,
field->prefix_len);
}
}
@ -1787,6 +1817,14 @@ dict_foreign_find_index(
for (i = 0; i < n_cols; i++) {
col_name = dict_index_get_nth_field(index, i)
->col->name;
if (dict_index_get_nth_field(index, i)
->prefix_len != 0) {
/* We do not accept column prefix
indexes here */
break;
}
if (ut_strlen(columns[i]) !=
ut_strlen(col_name)
|| 0 != ut_cmp_in_lower_case(columns[i],
@ -3776,6 +3814,10 @@ dict_field_print_low(
ut_ad(mutex_own(&(dict_sys->mutex)));
printf(" %s", field->name);
if (field->prefix_len != 0) {
printf("(%lu)", field->prefix_len);
}
}
/**************************************************************************

View File

@ -301,6 +301,8 @@ dict_load_fields(
dtuple_t* tuple;
dfield_t* dfield;
char* col_name;
ulint pos_and_prefix_len;
ulint prefix_len;
rec_t* rec;
byte* field;
ulint len;
@ -345,8 +347,28 @@ dict_load_fields(
ut_a(ut_memcmp(buf, field, len) == 0);
field = rec_get_nth_field(rec, 1, &len);
ut_ad(len == 4);
ut_a(i == mach_read_from_4(field));
ut_a(len == 4);
/* The next field stores the field position in the index
and a possible column prefix length if the index field
does not contain the whole column. The storage format is
like this: if there is at least one prefix field in the index,
then the HIGH 2 bytes contain the field number (== i) and the
low 2 bytes the prefix length for the field. Otherwise the
field number (== i) is contained in the 2 LOW bytes. */
pos_and_prefix_len = mach_read_from_4(field);
ut_a((pos_and_prefix_len & 0xFFFF) == i
|| (pos_and_prefix_len & 0xFFFF0000) == (i << 16));
if ((i == 0 && pos_and_prefix_len > 0)
|| (pos_and_prefix_len & 0xFFFF0000) > 0) {
prefix_len = pos_and_prefix_len & 0xFFFF;
} else {
prefix_len = 0;
}
ut_a(0 == ut_strcmp((char*) "COL_NAME",
dict_field_get_col(
@ -359,7 +381,7 @@ dict_load_fields(
ut_memcpy(col_name, field, len);
col_name[len] = '\0';
dict_mem_index_add_field(index, col_name, 0);
dict_mem_index_add_field(index, col_name, 0, prefix_len);
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}

View File

@ -266,10 +266,13 @@ by the column name may be released only after publishing the index. */
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /* in: index */
char* name, /* in: column name */
ulint order) /* in: order criterion; 0 means an ascending
order */
dict_index_t* index, /* in: index */
char* name, /* in: column name */
ulint order, /* in: order criterion; 0 means an
ascending order */
ulint prefix_len) /* in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
{
dict_field_t* field;
@ -282,6 +285,8 @@ dict_mem_index_add_field(
field->name = name;
field->order = order;
field->prefix_len = prefix_len;
}
/**************************************************************************

View File

@ -632,7 +632,7 @@ fil_space_create(
/* Spaces with an odd id number are reserved to replicate spaces
used in log debugging */
ut_a((purpose == FIL_LOG) || (id % 2 == 0));
ut_anp((purpose == FIL_LOG) || (id % 2 == 0));
#endif
mutex_enter(&(system->mutex));
@ -1202,8 +1202,8 @@ loop:
/* Do aio */
ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
ut_anp(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
/* Queue the aio request */
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,

View File

@ -778,7 +778,7 @@ fsp_init_file_page_low(
page[i] = 0xFF;
}
#endif
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
ut_dulint_zero);
mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
}
@ -2875,7 +2875,7 @@ fseg_free_step(
freed yet */
ut_a(descr);
ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header)
ut_anp(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header)
% FSP_EXTENT_SIZE, mtr) == FALSE);
inode = fseg_inode_get(header, mtr);

View File

@ -293,11 +293,13 @@ ha_print_info(
hash_table_t* table) /* in: hash table */
{
hash_cell_t* cell;
/* ha_node_t* node; */
ulint nodes = 0;
ulint cells = 0;
/*
ha_node_t* node;
ulint len = 0;
ulint max_len = 0;
ulint nodes = 0;
*/
ulint cells = 0;
ulint n_bufs;
ulint i;

View File

@ -170,7 +170,7 @@ dropped! So, there seems to be no problem. */
/**********************************************************************
Validates the ibuf data structures when the caller owns ibuf_mutex. */
static
ibool
ibuf_validate_low(void);
/*===================*/
@ -484,8 +484,8 @@ ibuf_data_init_for_space(
index = dict_mem_index_create(buf, (char *) "CLUST_IND", space,
DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,2);
dict_mem_index_add_field(index, (char *) "PAGE_NO", 0);
dict_mem_index_add_field(index, (char *) "TYPES", 0);
dict_mem_index_add_field(index, (char *) "PAGE_NO", 0, 0);
dict_mem_index_add_field(index, (char *) "TYPES", 0, 0);
index->page_no = FSP_IBUF_TREE_ROOT_PAGE_NO;
@ -2727,7 +2727,7 @@ reset_bit:
/**********************************************************************
Validates the ibuf data structures when the caller owns ibuf_mutex. */
static
ibool
ibuf_validate_low(void)
/*===================*/

View File

@ -690,7 +690,13 @@ and sleep this many microseconds in between */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
#define BTR_CUR_RETRY_SLEEP_TIME 50000
/* The reference in a field of which data is stored on a different page */
/* The reference in a field for which data is stored on a different page.
The reference is at the end of the 'locally' stored part of the field.
'Locally' means storage in the index record.
We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
/*--------------------------------------*/
#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */

View File

@ -364,11 +364,24 @@ to a file. Note that we must be careful to calculate the same value
on 32-bit and 64-bit architectures. */
ulint
buf_calc_page_checksum(
/*===================*/
buf_calc_page_new_checksum(
/*=======================*/
/* out: checksum */
byte* page); /* in: buffer page */
/************************************************************************
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
looked at the first few bytes of the page. This calculates that old
checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
because this takes that field as an input! */
ulint
buf_calc_page_old_checksum(
/*=======================*/
/* out: checksum */
byte* page); /* in: buffer page */
/************************************************************************
Checks if a page is corrupt. */
ibool

View File

@ -453,8 +453,6 @@ struct dfield_struct{
void* data; /* pointer to data */
ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
dtype_t type; /* type of data */
ulint col_no; /* when building index entries, the column
number can be stored here */
};
struct dtuple_struct {

View File

@ -18,14 +18,16 @@ typedef struct dtype_struct dtype_t;
data type */
extern dtype_t* dtype_binary;
/* Data main types of SQL data; NOTE! character data types requiring
collation transformation must have the smallest codes! All codes must be
less than 256! */
/* Data main types of SQL data */
#define DATA_VARCHAR 1 /* character varying */
#define DATA_CHAR 2 /* fixed length character */
#define DATA_FIXBINARY 3 /* binary string of fixed length */
#define DATA_BINARY 4 /* binary string */
#define DATA_BLOB 5 /* binary large object */
#define DATA_BLOB 5 /* binary large object, or a TEXT type; if
prtype & DATA_NONLATIN1 != 0 the data must
be compared by MySQL as a whole field; if
prtype & DATA_BINARY_TYPE == 0, then this is
actually a TEXT column */
#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
#define DATA_SYS 8 /* system column */
@ -34,35 +36,55 @@ binary strings */
#define DATA_FLOAT 9
#define DATA_DOUBLE 10
#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
#define DATA_VARMYSQL 12 /* data types for which comparisons must be */
#define DATA_MYSQL 13 /* made by MySQL */
#define DATA_ERROR 111 /* error value */
#define DATA_MTYPE_MAX 255
#define DATA_VARMYSQL 12 /* non-latin1 varying length char */
#define DATA_MYSQL 13 /* non-latin1 fixed length char */
#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
requires the values are <= 63 */
/*-------------------------------------------*/
/* Precise data types for system columns; NOTE: the values must run
from 0 up in the order given! All codes must be less than 256! */
/* In the lowest byte in the precise type we store the MySQL type code
(not applicable for system columns). */
#define DATA_ENGLISH 4 /* English language character string: this
is a relic from pre-MySQL time and only used
for InnoDB's own system tables */
#define DATA_ERROR 111 /* another relic from pre-MySQL time */
#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
type from the precise type */
/* Precise data types for system columns and the length of those columns;
NOTE: the values must run from 0 up in the order given! All codes must
be less than 256 */
#define DATA_ROW_ID 0 /* row id: a dulint */
#define DATA_ROW_ID_LEN 6 /* stored length for row id */
#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
#define DATA_TRX_ID_LEN 6
#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
#define DATA_ROLL_PTR_LEN 7
#define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in
a row in a compressed form */
#define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a
compressed dulint form) */
#define DATA_N_SYS_COLS 4 /* number of system columns defined above */
/*-------------------------------------------*/
/* Flags ORed to the precise data type */
#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
the column is declared as NOT NULL */
#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
we have an unsigned integer type */
#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
string, this is ORed to the precise type:
this only holds for tables created with
>= MySQL-4.0.14 */
#define DATA_NONLATIN1 2048 /* if the data type is a character string
of a non-latin1 type, this is ORed to the
precise type: this only holds for tables
created with >= MySQL-4.0.14 */
/*-------------------------------------------*/
/* Precise types of a char or varchar data. All codes must be less than 256! */
#define DATA_ENGLISH 4 /* English language character string */
#define DATA_FINNISH 5 /* Finnish */
#define DATA_PRTYPE_MAX 255
/* This many bytes we need to store the type information affecting the
alphabetical order for a single field and decide the storage size of an
SQL null*/
@ -123,7 +145,7 @@ dtype_get_pad_char(
/*===============*/
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
dtype_t* type); /* in: typeumn */
dtype_t* type); /* in: type */
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
@ -150,24 +172,24 @@ dtype_is_fixed_size(
/* out: TRUE if fixed size */
dtype_t* type); /* in: type */
/**************************************************************************
Stores to a type the information which determines its alphabetical
ordering. */
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_store_for_order_and_null_size(
/*================================*/
byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
bytes */
bytes where we store the info */
dtype_t* type); /* in: type struct */
/**************************************************************************
Reads of a type the stored information which determines its alphabetical
ordering. */
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
byte* buf); /* in: buffer for type order info */
byte* buf); /* in: buffer for the stored order info */
/*************************************************************************
Validates a data type structure. */

View File

@ -110,7 +110,9 @@ dtype_get_pad_char(
if (type->mtype == DATA_CHAR
|| type->mtype == DATA_VARCHAR
|| type->mtype == DATA_BINARY
|| type->mtype == DATA_FIXBINARY) {
|| type->mtype == DATA_FIXBINARY
|| type->mtype == DATA_MYSQL
|| type->mtype == DATA_VARMYSQL) {
/* Space is the padding character for all char and binary
strings */
@ -124,39 +126,56 @@ dtype_get_pad_char(
}
/**************************************************************************
Stores to a type the information which determines its alphabetical
ordering. */
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_store_for_order_and_null_size(
/*================================*/
byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
bytes */
bytes where we store the info */
dtype_t* type) /* in: type struct */
{
ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
buf[0] = (byte)(type->mtype & 0xFF);
if (type->prtype & DATA_BINARY_TYPE) {
buf[0] = buf[0] | 128;
}
if (type->prtype & DATA_NONLATIN1) {
buf[0] = buf[0] | 64;
}
buf[1] = (byte)(type->prtype & 0xFF);
mach_write_to_2(buf + 2, type->len & 0xFFFF);
}
/**************************************************************************
Reads of a type the stored information which determines its alphabetical
ordering. */
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
byte* buf) /* in: buffer for type order info */
byte* buf) /* in: buffer for stored type order info */
{
ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
type->mtype = buf[0];
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype = type->prtype | DATA_BINARY_TYPE;
}
if (buf[0] & 64) {
type->prtype = type->prtype | DATA_NONLATIN1;
}
type->len = mach_read_from_2(buf + 2);
}

View File

@ -44,8 +44,10 @@ Created 5/24/1996 Heikki Tuuri
#define DB_CORRUPTION 39 /* data structure corruption noticed */
#define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index
where same column appears twice */
#define DB_CANNOT_DROP_CONSTRAINT 40 /* dropping a foreign key constraint
#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint
from a table failed */
#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given
name */
/* The following are partial failure codes */
#define DB_FAIL 1000

View File

@ -569,6 +569,19 @@ dict_index_get_nth_col_pos(
dict_index_t* index, /* in: index */
ulint n); /* in: column number */
/************************************************************************
Looks for a matching field in an index. The column and the prefix len has
to be the same. */
ulint
dict_index_get_nth_field_pos(
/*=========================*/
/* out: position in internal representation
of the index; if not contained, returns
ULINT_UNDEFINED */
dict_index_t* index, /* in: index from which to search */
dict_index_t* index2, /* in: index */
ulint n); /* in: field number in index2 */
/************************************************************************
Looks for column n position in the clustered index. */
ulint

View File

@ -111,10 +111,13 @@ by the column name may be released only after publishing the index. */
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /* in: index */
char* name, /* in: column name */
ulint order); /* in: order criterion; 0 means an ascending
order */
dict_index_t* index, /* in: index */
char* name, /* in: column name */
ulint order, /* in: order criterion; 0 means an
ascending order */
ulint prefix_len); /* in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
/**************************************************************************
Frees an index memory object. */
@ -158,12 +161,18 @@ struct dict_col_struct{
in some of the functions below */
};
#define DICT_MAX_COL_PREFIX_LEN 512
/* Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /* pointer to the table column */
char* name; /* name of the column */
ulint order; /* flags for ordering this field:
DICT_DESCEND, ... */
dict_col_t* col; /* pointer to the table column */
char* name; /* name of the column */
ulint order; /* flags for ordering this field:
DICT_DESCEND, ... */
ulint prefix_len; /* 0 or the length of the column
prefix in a MySQL index of type, e.g.,
INDEX (textcol(25)); must be smaller
than DICT_MAX_COL_PREFIX_LEN */
};
/* Data structure for an index tree */

View File

@ -43,7 +43,10 @@ struct fil_addr_struct{
extern fil_addr_t fil_addr_null;
/* The byte offsets on a file page for various variables */
#define FIL_PAGE_SPACE 0 /* space id the page belongs to */
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
page belongs to (== 0) but in later
versions the 'new' checksum of the
page */
#define FIL_PAGE_OFFSET 4 /* page offset inside space */
#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
of the page, its offset */
@ -64,7 +67,7 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_DATA 38 /* start of the data on the page */
/* File page trailer */
#define FIL_PAGE_END_LSN 8 /* the low 4 bytes of this are used
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
to store the page checksum, the
last 4 bytes should be identical
to the last 4 bytes of FIL_PAGE_LSN */

View File

@ -450,6 +450,18 @@ lock_rec_get_mutex_for_addr(
ulint space, /* in: space id */
ulint page_no);/* in: page number */
/*************************************************************************
Checks that a transaction id is sensible, i.e., not in the future. */
ibool
lock_check_trx_id_sanity(
/*=====================*/
/* out: TRUE if ok */
dulint trx_id, /* in: trx id */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
ibool has_kernel_mutex);/* in: TRUE if the caller owns the
kernel mutex */
/*************************************************************************
Validates the lock queue on a single record. */
ibool

View File

@ -146,6 +146,21 @@ os_file_create_simple(
ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
ibool* success);/* out: TRUE if succeed, FALSE if error */
/********************************************************************
A simple function to open or create a file. */
os_file_t
os_file_create_simple_no_error_handling(
/*====================================*/
/* out, own: handle to the file, not defined if error,
error number can be retrieved with os_get_last_error */
char* name, /* in: name of the file or path as a null-terminated
string */
ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened
(if does not exist, error), or OS_FILE_CREATE if a new
file is created (if exists, error) */
ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
ibool* success);/* out: TRUE if succeed, FALSE if error */
/********************************************************************
Opens an existing file or creates a new. */
os_file_t
@ -173,6 +188,14 @@ os_file_close(
/* out: TRUE if success */
os_file_t file); /* in, own: handle to a file */
/***************************************************************************
Closes a file handle. */
ibool
os_file_close_no_error_handling(
/*============================*/
/* out: TRUE if success */
os_file_t file); /* in, own: handle to a file */
/***************************************************************************
Gets a file size. */
ibool

View File

@ -666,6 +666,15 @@ page_rec_validate(
/* out: TRUE if ok */
rec_t* rec); /* in: record on the page */
/*******************************************************************
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */
void
page_check_dir(
/*===========*/
page_t* page); /* in: index page */
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */

View File

@ -42,6 +42,22 @@ cmp_data_data(
buffer) */
ulint len2); /* in: data field length or UNIV_SQL_NULL */
/*****************************************************************
This function is used to compare two data fields for which we know the
data type. */
int
cmp_data_data_slow(
/*===============*/
/* out: 1, 0, -1, if data1 is greater, equal,
less than data2, respectively */
dtype_t* cur_type,/* in: data type of the fields */
byte* data1, /* in: data field (== a pointer to a memory
buffer) */
ulint len1, /* in: data field length or UNIV_SQL_NULL */
byte* data2, /* in: data field (== a pointer to a memory
buffer) */
ulint len2); /* in: data field length or UNIV_SQL_NULL */
/*****************************************************************
This function is used to compare two dfields where at least the first
has its data type field set. */
UNIV_INLINE

View File

@ -58,7 +58,8 @@ row_mysql_store_col_in_innobase_format(
/*===================================*/
dfield_t* dfield, /* in/out: dfield */
byte* buf, /* in/out: buffer for the converted
value */
value; this must be at least col_len
long! */
byte* mysql_data, /* in: MySQL column value, not
SQL NULL; NOTE that dfield may also
get a pointer to mysql_data,
@ -96,7 +97,6 @@ row_mysql_store_col_in_innobase_format(
while (col_len > 0 && ptr[col_len - 1] == ' ') {
col_len--;
}
} else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
}

View File

@ -86,9 +86,10 @@ dtuple_t*
row_build(
/*======*/
/* out, own: row built; see the NOTE below! */
ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
the former copies also the data fields to
heap as the latter only places pointers to
ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or
ROW_COPY_ALSO_EXTERNALS,
the two last copy also the data fields to
heap as the first only places pointers to
data fields on the index page, and thus is
more efficient */
dict_index_t* index, /* in: clustered index */

View File

@ -87,9 +87,11 @@ row_printf_step(
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/********************************************************************
Converts a key value stored in MySQL format to an Innobase dtuple.
The last field of the key value may be just a prefix of a fixed length
field: hence the parameter key_len. */
Converts a key value stored in MySQL format to an Innobase dtuple. The last
field of the key value may be just a prefix of a fixed length field: hence
the parameter key_len. But currently we do not allow search keys where the
last field is only a prefix of the full key field len and print a warning if
such appears. */
void
row_sel_convert_mysql_key_to_innobase(
@ -100,6 +102,7 @@ row_sel_convert_mysql_key_to_innobase(
to index! */
byte* buf, /* in: buffer to use in field
conversions */
ulint buf_len, /* in: buffer length */
dict_index_t* index, /* in: index of the key value */
byte* key_ptr, /* in: MySQL key value */
ulint key_len); /* in: MySQL key value length */

View File

@ -114,13 +114,15 @@ row_upd_index_write_log(
closed within this function */
mtr_t* mtr); /* in: mtr into whose log to write */
/***************************************************************
Returns TRUE if row update changes size of some field in index. */
Returns TRUE if row update changes size of some field in index or if some
field to be updated is stored externally in rec or update. */
ibool
row_upd_changes_field_size(
/*=======================*/
row_upd_changes_field_size_or_external(
/*===================================*/
/* out: TRUE if the update changes the size of
some field in index */
some field in index or the field is external
in rec or update */
rec_t* rec, /* in: record in clustered index */
dict_index_t* index, /* in: clustered index */
upd_t* update);/* in: update vector */
@ -175,16 +177,10 @@ row_upd_index_replace_new_col_vals(
dtuple_t* entry, /* in/out: index entry where replaced */
dict_index_t* index, /* in: index; NOTE that may also be a
non-clustered index */
upd_t* update); /* in: update vector */
/***************************************************************
Replaces the new column values stored in the update vector to the
clustered index entry given. */
void
row_upd_clust_index_replace_new_col_vals(
/*=====================================*/
dtuple_t* entry, /* in/out: index entry where replaced */
upd_t* update); /* in: update vector */
upd_t* update, /* in: update vector */
mem_heap_t* heap); /* in: memory heap to which we allocate and
copy the new values, set this as NULL if you
do not want allocation */
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
@ -358,9 +354,9 @@ struct upd_node_struct{
externally in the clustered index record of
row */
ulint n_ext_vec;/* number of fields in ext_vec */
mem_heap_t* heap; /* memory heap used as auxiliary storage for
row; this must be emptied after a successful
update if node->row != NULL */
mem_heap_t* heap; /* memory heap used as auxiliary storage;
this must be emptied after a successful
update */
/*----------------------*/
sym_node_t* table_sym;/* table node in symbol table */
que_node_t* col_assign_list;

View File

@ -153,6 +153,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
/* Array of English strings describing the current state of an
i/o handler thread */
extern char* srv_io_thread_op_info[];
extern char* srv_io_thread_function[];
typedef struct srv_sys_struct srv_sys_t;

View File

@ -177,6 +177,55 @@ trx_general_rollback_for_mysql(
ibool partial,/* in: TRUE if partial rollback requested */
trx_savept_t* savept);/* in: pointer to savepoint undo number, if
partial rollback requested */
/***********************************************************************
Rolls back a transaction back to a named savepoint. Modifications after the
savepoint are undone but InnoDB does NOT release the corresponding locks
which are stored in memory. If a lock is 'implicit', that is, a new inserted
row holds a lock where the lock information is carried by the trx id stored in
the row, these locks are naturally released in the rollback. Savepoints which
were set after this savepoint are deleted. */
ulint
trx_rollback_to_savepoint_for_mysql(
/*================================*/
/* out: if no savepoint
of the name found then
DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
trx_t* trx, /* in: transaction handle */
char* savepoint_name, /* in: savepoint name */
ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
binlog entries of the queries
executed after the savepoint */
/***********************************************************************
Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback. */
ulint
trx_savepoint_for_mysql(
/*====================*/
/* out: always DB_SUCCESS */
trx_t* trx, /* in: transaction handle */
char* savepoint_name, /* in: savepoint name */
ib_longlong binlog_cache_pos); /* in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
/***********************************************************************
Frees savepoint structs. */
void
trx_roll_savepoints_free(
/*=====================*/
trx_t* trx, /* in: transaction handle */
trx_named_savept_t* savep); /* in: free all savepoints > this one;
if this is NULL, free all savepoints
of trx */
extern sess_t* trx_dummy_sess;
@ -207,6 +256,21 @@ struct roll_node_struct{
case of a partial rollback */
};
/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
struct trx_named_savept_struct{
char* name; /* savepoint name */
trx_savept_t savept; /* the undo number corresponding to
the savepoint */
ib_longlong mysql_binlog_cache_pos;
/* the MySQL binlog cache position
corresponding to this savepoint, not
defined if the MySQL binlogging is not
enabled */
UT_LIST_NODE_T(trx_named_savept_t)
trx_savepoints; /* the list of savepoints of a
transaction */
};
/* Rollback node states */
#define ROLL_NODE_SEND 1
#define ROLL_NODE_WAIT 2

View File

@ -296,6 +296,16 @@ trx_is_active(
return(FALSE);
}
if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
/* There must be corruption: we return TRUE because this
function is only called by lock_clust_rec_some_has_impl()
and row_vers_impl_x_locked_off_kernel() and they have
diagnostic prints in this case */
return(TRUE);
}
trx = trx_get_on_id(trx_id);
if (trx && (trx->conc_state == TRX_ACTIVE)) {

View File

@ -381,7 +381,8 @@ struct trx_struct{
replication slave, we have here the
master binlog name up to which
replication has processed; otherwise
this is a pointer to a null character */
this is a pointer to a null
character */
ib_longlong mysql_master_log_pos;
/* if the database server is a MySQL
replication slave, this is the
@ -501,6 +502,10 @@ struct trx_struct{
mem_heap_t* read_view_heap; /* memory heap for the read view */
read_view_t* read_view; /* consistent read view or NULL */
/*------------------------------*/
UT_LIST_BASE_NODE_T(trx_named_savept_t)
trx_savepoints; /* savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
mutex_t undo_mutex; /* mutex protecting the fields in this
section (down to undo_no_arr), EXCEPT
last_sql_stat_start, which can be

View File

@ -24,6 +24,7 @@ typedef struct trx_undo_inf_struct trx_undo_inf_t;
typedef struct trx_purge_struct trx_purge_t;
typedef struct roll_node_struct roll_node_t;
typedef struct commit_node_struct commit_node_t;
typedef struct trx_named_savept_struct trx_named_savept_t;
/* Transaction savepoint */
typedef struct trx_savept_struct trx_savept_t;

View File

@ -50,6 +50,37 @@ extern ulint* ut_dbg_null_ptr;
}\
}
/* This can be used if there are % characters in the assertion formula:
if we try to printf the formula gcc would complain of illegal print
format characters */
#define ut_anp(EXPR)\
{\
ulint dbg_i;\
\
if (!((ulint)(EXPR) + ut_dbg_zero)) {\
ut_print_timestamp(stderr);\
fprintf(stderr,\
" InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\
os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\
(ulint)__LINE__);\
fprintf(stderr,\
"\nInnoDB: We intentionally generate a memory trap.\n");\
fprintf(stderr,\
"InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\
ut_dbg_stop_threads = TRUE;\
dbg_i = *(ut_dbg_null_ptr);\
if (dbg_i) {\
ut_dbg_null_ptr = NULL;\
}\
}\
if (ut_dbg_stop_threads) {\
fprintf(stderr,\
"InnoDB: Thread %lu stopped in file %s line %lu\n",\
os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\
os_thread_sleep(1000000000);\
}\
}
#define ut_error {\
ulint dbg_i;\
ut_print_timestamp(stderr);\

View File

@ -57,7 +57,7 @@ ut_free(
/*====*/
void* ptr); /* in, own: memory block */
/**************************************************************************
Frees all allocated memory not freed yet. */
Frees in shutdown all allocated memory not freed yet. */
void
ut_free_all_mem(void);

View File

@ -356,7 +356,7 @@ lock_mutex_enter_kernel(void)
}
/*************************************************************************
Releses the kernel mutex. This function is used in this module to allow
Releases the kernel mutex. This function is used in this module to allow
monitoring the contention degree on the kernel mutex caused by the lock
operations. */
UNIV_INLINE
@ -514,6 +514,53 @@ lock_rec_mutex_own_all(void)
#endif
/*************************************************************************
Checks that a transaction id is sensible, i.e., not in the future. */
ibool
lock_check_trx_id_sanity(
/*=====================*/
/* out: TRUE if ok */
dulint trx_id, /* in: trx id */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
ibool has_kernel_mutex)/* in: TRUE if the caller owns the
kernel mutex */
{
char err_buf[500];
ibool is_ok = TRUE;
if (!has_kernel_mutex) {
mutex_enter(&kernel_mutex);
}
/* A sanity check: the trx_id in rec must be smaller than the global
trx id counter */
if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
rec_sprintf(err_buf, 400, rec);
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Error: transaction id associated with record\n%s\n"
"InnoDB: in table %s index %s\n"
"InnoDB: is %lu %lu which is higher than the global trx id counter %lu %lu!\n"
"InnoDB: The table is corrupt. You have to do dump + drop + reimport.\n",
err_buf, index->table_name, index->name,
ut_dulint_get_high(trx_id),
ut_dulint_get_low(trx_id),
ut_dulint_get_high(trx_sys->max_trx_id),
ut_dulint_get_low(trx_sys->max_trx_id));
is_ok = FALSE;
}
if (!has_kernel_mutex) {
mutex_exit(&kernel_mutex);
}
return(is_ok);
}
/*************************************************************************
Checks that a record is seen in a consistent read. */
@ -539,6 +586,15 @@ lock_clust_rec_cons_read_sees(
return(TRUE);
}
if (!lock_check_trx_id_sanity(trx_id, rec, index, FALSE)) {
/* Trying to get the 'history' of a corrupt record is bound
to fail: let us try to use the record itself in the query */
fprintf(stderr,
"InnoDB: We try to access the corrupt record in the query anyway.\n");
return(TRUE);
}
return(FALSE);
}
@ -563,6 +619,8 @@ lock_sec_rec_cons_read_sees(
{
dulint max_trx_id;
UT_NOT_USED(index);
ut_ad(!(index->type & DICT_CLUSTERED));
ut_ad(page_rec_is_user_rec(rec));
@ -575,6 +633,16 @@ lock_sec_rec_cons_read_sees(
if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) {
if (!lock_check_trx_id_sanity(max_trx_id, rec, index, FALSE)) {
/* Trying to get the 'history' of a corrupt record is
bound to fail: let us try to use the record itself in
the query */
fprintf(stderr,
"InnoDB: We try to access the corrupt record in the query anyway.\n");
return(TRUE);
}
return(FALSE);
}
@ -1569,6 +1637,15 @@ lock_sec_rec_some_has_impl_off_kernel(
/* Ok, in this case it is possible that some transaction has an
implicit x-lock. We have to look in the clustered index. */
if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), rec, index,
TRUE)) {
buf_page_print(page);
/* The page is corrupt: try to avoid a crash by returning
NULL */
return(NULL);
}
return(row_vers_impl_x_locked_off_kernel(rec, index));
}
@ -2565,7 +2642,7 @@ lock_move_rec_list_start(
ulint heap_no;
ulint type_mode;
ut_ad(new_page);
ut_a(new_page);
lock_mutex_enter_kernel();
@ -3028,7 +3105,7 @@ lock_deadlock_recursive(
we return LOCK_VICTIM_IS_START */
{
lock_t* lock;
ulint bit_no;
ulint bit_no = ULINT_UNDEFINED;
trx_t* lock_trx;
char* err_buf;
ulint ret;
@ -3067,6 +3144,7 @@ lock_deadlock_recursive(
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
} else {
ut_ad(lock_get_type(lock) == LOCK_REC);
ut_a(bit_no != ULINT_UNDEFINED);
lock = lock_rec_get_prev(lock, bit_no);
}

View File

@ -375,7 +375,7 @@ log_pad_current_log_block(void)
log_close();
log_release();
ut_ad((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
ut_anp((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
== LOG_BLOCK_HDR_SIZE);
}
@ -1070,8 +1070,8 @@ log_group_write_buf(
ulint i;
ut_ad(mutex_own(&(log_sys->mutex)));
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
if (new_data_offset == 0) {
write_header = TRUE;
@ -2123,11 +2123,11 @@ log_group_archive(
start_lsn = log_sys->archived_lsn;
ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
end_lsn = log_sys->next_archived_lsn;
ut_ad(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
buf = log_sys->archive_buf;
@ -2234,7 +2234,7 @@ loop:
group->next_archived_file_no = group->archived_file_no + n_files;
group->next_archived_offset = next_offset % group->file_size;
ut_ad(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
}
/*********************************************************
@ -2429,8 +2429,8 @@ loop:
start_lsn = log_sys->archived_lsn;
if (calc_new_limit) {
ut_ad(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_anp(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE
== 0);
limit_lsn = ut_dulint_add(start_lsn,
log_sys->archive_buf_size);
@ -2916,6 +2916,7 @@ loop:
mutex_enter(&kernel_mutex);
/* Check that there are no longer transactions */
if (trx_n_mysql_transactions > 0
|| UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
@ -2924,6 +2925,8 @@ loop:
goto loop;
}
/* Check that the master thread is suspended */
if (srv_n_threads_active[SRV_MASTER] != 0) {
mutex_exit(&kernel_mutex);
@ -2952,7 +2955,6 @@ loop:
}
log_archive_all();
log_make_checkpoint_at(ut_dulint_max, TRUE);
mutex_enter(&(log_sys->mutex));
@ -2961,8 +2963,9 @@ loop:
if (ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0
|| (srv_log_archive_on
&& ut_dulint_cmp(lsn,
ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE)) != 0)) {
&& ut_dulint_cmp(lsn,
ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE))
!= 0)) {
mutex_exit(&(log_sys->mutex));
@ -2981,10 +2984,22 @@ loop:
mutex_exit(&(log_sys->mutex));
mutex_enter(&kernel_mutex);
/* Check that the master thread has stayed suspended */
if (srv_n_threads_active[SRV_MASTER] != 0) {
fprintf(stderr,
"InnoDB: Warning: the master thread woke up during shutdown\n");
mutex_exit(&kernel_mutex);
goto loop;
}
mutex_exit(&kernel_mutex);
fil_flush_file_spaces(FIL_TABLESPACE);
fil_flush_file_spaces(FIL_LOG);
/* The following fil_write_... will pass the buffer pool: therefore
/* The next fil_write_... will pass the buffer pool: therefore
it is essential that the buffer pool has been completely flushed
to disk! */
@ -2993,12 +3008,14 @@ loop:
goto loop;
}
/* The lock timeout thread should now have exited */
if (srv_lock_timeout_and_monitor_active) {
goto loop;
}
/* We now suspend also the InnoDB error monitor thread */
/* We now let also the InnoDB error monitor thread to exit */
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
@ -3008,6 +3025,7 @@ loop:
}
/* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
ut_a(buf_all_freed());
ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
@ -3016,6 +3034,7 @@ loop:
fil_flush_file_spaces(FIL_TABLESPACE);
/* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
ut_a(buf_all_freed());
ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
}

View File

@ -973,7 +973,7 @@ recv_recover_page(
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
buf_block_t* block;
buf_block_t* block = NULL;
recv_addr_t* recv_addr;
recv_t* recv;
byte* buf;
@ -1085,7 +1085,7 @@ recv_recover_page(
page_lsn = page_newest_lsn;
mach_write_to_8(page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN, ut_dulint_zero);
- FIL_PAGE_END_LSN_OLD_CHKSUM, ut_dulint_zero);
mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
}
@ -1107,7 +1107,7 @@ recv_recover_page(
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len, page, &mtr);
mach_write_to_8(page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN,
- FIL_PAGE_END_LSN_OLD_CHKSUM,
ut_dulint_add(recv->start_lsn,
recv->len));
mach_write_to_8(page + FIL_PAGE_LSN,
@ -1132,6 +1132,8 @@ recv_recover_page(
mutex_exit(&(recv_sys->mutex));
if (!recover_backup && modification_to_page) {
ut_a(block);
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
}
@ -1339,6 +1341,7 @@ loop:
mutex_exit(&(recv_sys->mutex));
}
#ifdef UNIV_HOTBACKUP
/***********************************************************************
Applies log records in the hash table to a backup. */
@ -1520,8 +1523,8 @@ recv_check_identical(
for (i = 0; i < len; i++) {
if (str1[i] != str2[i]) {
fprintf(stderr, "Strings do not match at offset %lu\n", i);
fprintf(stderr,
"Strings do not match at offset %lu\n", i);
ut_print_buf(str1 + i, 16);
fprintf(stderr, "\n");
ut_print_buf(str2 + i, 16);
@ -1654,6 +1657,7 @@ recv_compare_spaces_low(
recv_compare_spaces(space1, space2, n_pages);
}
#endif
/***********************************************************************
Tries to parse a single log record and returns its length. */

View File

@ -99,6 +99,12 @@ mem_pool_t* mem_comm_pool = NULL;
ulint mem_out_of_mem_err_msg_count = 0;
/* We use this counter to check that the mem pool mutex does not leak;
this is to track a strange assertion failure reported at
mysql@lists.mysql.com */
ulint mem_n_threads_inside = 0;
/************************************************************************
Reserves the mem pool mutex. */
@ -328,6 +334,9 @@ mem_area_alloc(
n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
mutex_enter(&(pool->mutex));
mem_n_threads_inside++;
ut_a(mem_n_threads_inside == 1);
area = UT_LIST_GET_FIRST(pool->free_list[n]);
@ -338,6 +347,7 @@ mem_area_alloc(
/* Out of memory in memory pool: we try to allocate
from the operating system with the regular malloc: */
mem_n_threads_inside--;
mutex_exit(&(pool->mutex));
return(ut_malloc(size));
@ -353,6 +363,16 @@ mem_area_alloc(
n);
mem_analyze_corruption((byte*)area);
/* Try to analyze a strange assertion failure reported at
mysql@lists.mysql.com where the free bit IS 1 in the
hex dump above */
if (mem_area_get_free(area)) {
fprintf(stderr,
"InnoDB: Probably a race condition because now the area is marked free!\n");
}
ut_a(0);
}
@ -374,6 +394,7 @@ mem_area_alloc(
pool->reserved += mem_area_get_size(area);
mem_n_threads_inside--;
mutex_exit(&(pool->mutex));
ut_ad(mem_pool_validate(pool));
@ -495,6 +516,9 @@ mem_area_free(
n = ut_2_log(size);
mutex_enter(&(pool->mutex));
mem_n_threads_inside++;
ut_a(mem_n_threads_inside == 1);
if (buddy && mem_area_get_free(buddy)
&& (size == mem_area_get_size(buddy))) {
@ -518,6 +542,7 @@ mem_area_free(
pool->reserved += ut_2_exp(n);
mem_n_threads_inside--;
mutex_exit(&(pool->mutex));
mem_area_free(new_ptr, pool);
@ -533,6 +558,7 @@ mem_area_free(
pool->reserved -= size;
}
mem_n_threads_inside--;
mutex_exit(&(pool->mutex));
ut_ad(mem_pool_validate(pool));
@ -577,7 +603,7 @@ mem_pool_validate(
}
}
ut_a(free + pool->reserved == pool->size
ut_anp(free + pool->reserved == pool->size
- (pool->size % MEM_AREA_MIN_SIZE));
mutex_exit(&(pool->mutex));

View File

@ -60,6 +60,7 @@ struct os_aio_slot_struct{
ulint pos; /* index of the slot in the aio
array */
ibool reserved; /* TRUE if this slot is reserved */
time_t reservation_time;/* time when reserved */
ulint len; /* length of the block to read or
write */
byte* buf; /* buffer used in i/o */
@ -147,6 +148,12 @@ time_t os_last_printout;
ibool os_has_said_disk_full = FALSE;
/* The mutex protecting the following counts of pending pread and pwrite
operations */
os_mutex_t os_file_count_mutex;
ulint os_file_n_pending_preads = 0;
ulint os_file_n_pending_pwrites = 0;
/***************************************************************************
Gets the operating system version. Currently works only on Windows. */
@ -364,6 +371,8 @@ os_io_init_simple(void)
{
ulint i;
os_file_count_mutex = os_mutex_create(NULL);
for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create(NULL);
}
@ -415,9 +424,8 @@ try_again:
file = CreateFile(name,
access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
/* file can be read and written
also by other processes */
FILE_SHARE_READ,/* file can be read also by other
processes */
NULL, /* default security attributes */
create_flag,
attributes,
@ -481,6 +489,101 @@ try_again:
return(file);
#endif
}
/********************************************************************
A simple function to open or create a file. */
os_file_t
os_file_create_simple_no_error_handling(
/*====================================*/
/* out, own: handle to the file, not defined if error,
error number can be retrieved with os_get_last_error */
char* name, /* in: name of the file or path as a null-terminated
string */
ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened
(if does not exist, error), or OS_FILE_CREATE if a new
file is created (if exists, error) */
ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
ibool* success)/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
DWORD create_flag;
DWORD access;
DWORD attributes = 0;
ut_a(name);
if (create_mode == OS_FILE_OPEN) {
create_flag = OPEN_EXISTING;
} else if (create_mode == OS_FILE_CREATE) {
create_flag = CREATE_NEW;
} else {
create_flag = 0;
ut_error;
}
if (access_type == OS_FILE_READ_ONLY) {
access = GENERIC_READ;
} else if (access_type == OS_FILE_READ_WRITE) {
access = GENERIC_READ | GENERIC_WRITE;
} else {
access = 0;
ut_error;
}
file = CreateFile(name,
access,
FILE_SHARE_READ,/* file can be read also by other
processes */
NULL, /* default security attributes */
create_flag,
attributes,
NULL); /* no template file */
if (file == INVALID_HANDLE_VALUE) {
*success = FALSE;
} else {
*success = TRUE;
}
return(file);
#else
os_file_t file;
int create_flag;
ut_a(name);
if (create_mode == OS_FILE_OPEN) {
if (access_type == OS_FILE_READ_ONLY) {
create_flag = O_RDONLY;
} else {
create_flag = O_RDWR;
}
} else if (create_mode == OS_FILE_CREATE) {
create_flag = O_RDWR | O_CREAT | O_EXCL;
} else {
create_flag = 0;
ut_error;
}
if (create_mode == OS_FILE_CREATE) {
file = open(name, create_flag, S_IRUSR | S_IWUSR
| S_IRGRP | S_IWGRP);
} else {
file = open(name, create_flag);
}
if (file == -1) {
*success = FALSE;
} else {
*success = TRUE;
}
return(file);
#endif
}
/********************************************************************
Opens an existing file or creates a new. */
@ -566,9 +669,14 @@ try_again:
file = CreateFile(name,
GENERIC_READ | GENERIC_WRITE, /* read and write
access */
FILE_SHARE_READ | FILE_SHARE_WRITE,
/* file can be read and written
also by other processes */
FILE_SHARE_READ,/* File can be read also by other
processes; we must give the read
permission because of ibbackup. We do
not give the write permission to
others because if one would succeed to
start 2 instances of mysqld on the
SAME files, that could cause severe
database corruption! */
NULL, /* default security attributes */
create_flag,
attributes,
@ -676,6 +784,41 @@ os_file_close(
#endif
}
/***************************************************************************
Closes a file handle. */
ibool
os_file_close_no_error_handling(
/*============================*/
/* out: TRUE if success */
os_file_t file) /* in, own: handle to a file */
{
#ifdef __WIN__
BOOL ret;
ut_a(file);
ret = CloseHandle(file);
if (ret) {
return(TRUE);
}
return(FALSE);
#else
int ret;
ret = close(file);
if (ret == -1) {
return(FALSE);
}
return(TRUE);
#endif
}
/***************************************************************************
Gets a file size. */
@ -896,6 +1039,7 @@ os_file_pread(
offset */
{
off_t offs;
ssize_t n_bytes;
ut_a((offset & 0xFFFFFFFF) == offset);
@ -917,7 +1061,17 @@ os_file_pread(
os_n_file_reads++;
#ifdef HAVE_PREAD
return(pread(file, buf, n, offs));
os_mutex_enter(os_file_count_mutex);
os_file_n_pending_preads++;
os_mutex_exit(os_file_count_mutex);
n_bytes = pread(file, buf, n, offs);
os_mutex_enter(os_file_count_mutex);
os_file_n_pending_preads--;
os_mutex_exit(os_file_count_mutex);
return(n_bytes);
#else
{
ssize_t ret;
@ -982,8 +1136,16 @@ os_file_pwrite(
os_n_file_writes++;
#ifdef HAVE_PWRITE
os_mutex_enter(os_file_count_mutex);
os_file_n_pending_pwrites++;
os_mutex_exit(os_file_count_mutex);
ret = pwrite(file, buf, n, offs);
os_mutex_enter(os_file_count_mutex);
os_file_n_pending_pwrites--;
os_mutex_exit(os_file_count_mutex);
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& !os_do_not_call_flush_at_each_write) {
@ -1372,20 +1534,36 @@ os_aio_init(
os_io_init_simple();
for (i = 0; i < n_segments; i++) {
srv_io_thread_op_info[i] = (char*)"not started yet";
}
n_per_seg = n / n_segments;
n_write_segs = (n_segments - 2) / 2;
n_read_segs = n_segments - 2 - n_write_segs;
/* printf("Array n per seg %lu\n", n_per_seg); */
os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
n_read_segs);
os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
n_write_segs);
os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
srv_io_thread_function[0] = (char*)"insert buffer thread";
os_aio_log_array = os_aio_array_create(n_per_seg, 1);
srv_io_thread_function[1] = (char*)"log thread";
os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
n_read_segs);
for (i = 2; i < 2 + n_read_segs; i++) {
srv_io_thread_function[i] = (char*)"read thread";
}
os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
n_write_segs);
for (i = 2 + n_read_segs; i < n_segments; i++) {
srv_io_thread_function[i] = (char*)"write thread";
}
os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
os_aio_n_segments = n_segments;
@ -1677,6 +1855,7 @@ loop:
}
slot->reserved = TRUE;
slot->reservation_time = time(NULL);
slot->message1 = message1;
slot->message2 = message2;
slot->file = file;
@ -2249,6 +2428,8 @@ os_aio_simulated_handle(
ulint total_len;
ulint offs;
ulint lowest_offset;
ulint biggest_age;
ulint age;
byte* combined_buf;
byte* combined_buf2= 0; /* Remove warning */
ibool ret;
@ -2301,22 +2482,55 @@ restart:
n_consecutive = 0;
/* Look for an i/o request at the lowest offset in the array
(we ignore the high 32 bits of the offset in these heuristics) */
/* If there are at least 2 seconds old requests, then pick the oldest
one to prevent starvation. If several requests have the same age,
then pick the one at the lowest offset. */
biggest_age = 0;
lowest_offset = ULINT_MAX;
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot->reserved && slot->offset < lowest_offset) {
if (slot->reserved) {
age = (ulint)difftime(time(NULL),
slot->reservation_time);
/* Found an i/o request */
consecutive_ios[0] = slot;
if ((age >= 2 && age > biggest_age)
|| (age >= 2 && age == biggest_age
&& slot->offset < lowest_offset)) {
n_consecutive = 1;
/* Found an i/o request */
consecutive_ios[0] = slot;
lowest_offset = slot->offset;
n_consecutive = 1;
biggest_age = age;
lowest_offset = slot->offset;
}
}
}
if (n_consecutive == 0) {
/* There were no old requests. Look for an i/o request at the
lowest offset in the array (we ignore the high 32 bits of the
offset in these heuristics) */
lowest_offset = ULINT_MAX;
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array,
i + segment * n);
if (slot->reserved && slot->offset < lowest_offset) {
/* Found an i/o request */
consecutive_ios[0] = slot;
n_consecutive = 1;
lowest_offset = slot->offset;
}
}
}
@ -2422,7 +2636,7 @@ consecutive_loop:
+ FIL_PAGE_LSN + 4)
!= mach_read_from_4(combined_buf + len2
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN + 4)) {
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: The page to be written seems corrupt!\n");
@ -2583,14 +2797,15 @@ os_aio_print(
double avg_bytes_read;
ulint i;
if (buf_end - buf < 1000) {
if (buf_end - buf < 1200) {
return;
}
for (i = 0; i < srv_n_file_io_threads; i++) {
buf += sprintf(buf, "I/O thread %lu state: %s\n", i,
srv_io_thread_op_info[i]);
buf += sprintf(buf, "I/O thread %lu state: %s (%s)\n", i,
srv_io_thread_op_info[i],
srv_io_thread_function[i]);
}
buf += sprintf(buf, "Pending normal aio reads:");
@ -2665,6 +2880,12 @@ loop:
"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
os_n_file_reads, os_n_file_writes, os_n_fsyncs);
if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
buf += sprintf(buf,
"%lu pending preads, %lu pending pwrites\n",
os_file_n_pending_preads, os_file_n_pending_pwrites);
}
if (os_n_file_reads == os_n_file_reads_old) {
avg_bytes_read = 0.0;
} else {

View File

@ -187,8 +187,8 @@ os_thread_exit(
is cast as a DWORD */
{
#ifdef UNIV_DEBUG_THREAD_CREATION
printf("A thread exits.\n");
printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id()));
printf("Thread exits, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
#endif
os_mutex_enter(os_sync_mutex);
os_thread_count--;

View File

@ -14,6 +14,7 @@ Created 10/4/1994 Heikki Tuuri
#include "rem0cmp.h"
#include "mtr0log.h"
#include "log0recv.h"
#include "rem0cmp.h"
ulint page_cur_short_succ = 0;
@ -218,6 +219,8 @@ page_cur_search_with_match(
|| (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)
|| (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG));
page_check_dir(page);
#ifdef PAGE_CUR_ADAPT
if ((page_header_get_field(page, PAGE_LEVEL) == 0)
&& (mode == PAGE_CUR_LE)
@ -595,6 +598,7 @@ page_cur_parse_insert_rec(
rec_t* cursor_rec;
byte buf1[1024];
byte* buf;
byte* ptr2 = ptr;
ulint info_bits = 0; /* remove warning */
page_cur_t cursor;
@ -697,7 +701,20 @@ page_cur_parse_insert_rec(
/* Build the inserted record to buf */
ut_a(mismatch_index < UNIV_PAGE_SIZE);
if (mismatch_index >= UNIV_PAGE_SIZE) {
printf("Is short %lu, info_bits %lu, offset %lu, o_offset %lu\n"
"mismatch index %lu, end_seg_len %lu\n"
"parsed len %lu\n",
is_short, info_bits, offset, origin_offset,
mismatch_index, end_seg_len, (ulint)(ptr - ptr2));
printf("Dump of 300 bytes of log:\n");
ut_print_buf(ptr2, 300);
buf_page_print(page);
ut_a(0);
}
ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);

View File

@ -353,7 +353,7 @@ page_create(
infimum_rec = rec_convert_dtuple_to_rec(heap_top, tuple);
ut_ad(infimum_rec == page + PAGE_INFIMUM);
ut_a(infimum_rec == page + PAGE_INFIMUM);
rec_set_n_owned(infimum_rec, 1);
rec_set_heap_no(infimum_rec, 0);
@ -370,7 +370,7 @@ page_create(
supremum_rec = rec_convert_dtuple_to_rec(heap_top, tuple);
ut_ad(supremum_rec == page + PAGE_SUPREMUM);
ut_a(supremum_rec == page + PAGE_SUPREMUM);
rec_set_n_owned(supremum_rec, 1);
rec_set_heap_no(supremum_rec, 1);
@ -389,6 +389,8 @@ page_create(
page_header_set_ptr(page, PAGE_FREE, NULL);
page_header_set_field(page, PAGE_GARBAGE, 0);
page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
page_header_set_field(page, PAGE_N_DIRECTION, 0);
page_header_set_field(page, PAGE_N_RECS, 0);
page_set_max_trx_id(page, ut_dulint_zero);
@ -402,17 +404,22 @@ page_create(
slot = page_dir_get_nth_slot(page, 1);
page_dir_slot_set_rec(slot, supremum_rec);
/* Set next pointers in infimum and supremum */
/* Set the next pointers in infimum and supremum */
rec_set_next_offs(infimum_rec, (ulint)(supremum_rec - page));
rec_set_next_offs(supremum_rec, 0);
#ifdef notdefined
/* Disable the use of page_template: there is a race condition here:
while one thread is creating page_template, another one can start
using it before the memcpy completes! */
if (page_template == NULL) {
page_template = mem_alloc(UNIV_PAGE_SIZE);
ut_memcpy(page_template, page, UNIV_PAGE_SIZE);
}
#endif
return(page);
}
@ -439,6 +446,9 @@ page_copy_rec_list_end_no_locks(
page_cur_move_to_next(&cur1);
}
/* Track a memory corruption bug in Windows */
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == PAGE_INFIMUM);
page_cur_set_before_first(new_page, &cur2);
/* Copy records from the original page to the new page */
@ -449,6 +459,8 @@ page_copy_rec_list_end_no_locks(
ut_a(
page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr));
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10)
== PAGE_INFIMUM);
page_cur_move_to_next(&cur1);
page_cur_move_to_next(&cur2);
}
@ -1316,6 +1328,37 @@ page_rec_validate(
return(TRUE);
}
/*******************************************************************
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */
void
page_check_dir(
/*===========*/
page_t* page) /* in: index page */
{
ulint n_slots;
n_slots = page_dir_get_n_slots(page);
if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0))
!= page_get_infimum_rec(page)) {
fprintf(stderr,
"InnoDB: Page directory corruption: supremum not pointed to\n");
buf_page_print(page);
}
if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1))
!= page_get_supremum_rec(page)) {
fprintf(stderr,
"InnoDB: Page directory corruption: supremum not pointed to\n");
buf_page_print(page);
}
}
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
@ -1598,7 +1641,8 @@ page_validate(
"InnoDB: previous record %s\n", err_buf);
rec_sprintf(err_buf, 900, rec);
fprintf(stderr, "InnoDB: record %s\n", err_buf);
fprintf(stderr,
"InnoDB: record %s\n", err_buf);
goto func_exit;
}

View File

@ -1058,7 +1058,6 @@ opt_clust_access(
dfield_t* dfield;
mem_heap_t* heap;
ulint n_fields;
ulint col_no;
ulint pos;
ulint i;
@ -1093,8 +1092,7 @@ opt_clust_access(
plan->clust_map = mem_heap_alloc(heap, n_fields * sizeof(ulint));
for (i = 0; i < n_fields; i++) {
col_no = dict_index_get_nth_col_no(clust_index, i);
pos = dict_index_get_nth_col_pos(index, col_no);
pos = dict_index_get_nth_field_pos(index, clust_index, i);
*(plan->clust_map + i) = pos;
@ -1109,7 +1107,8 @@ opt_clust_access(
dfield = dtuple_get_nth_field(plan->clust_ref, table->mix_len);
dfield_set_data(dfield, mem_heap_alloc(heap, table->mix_id_len),
dfield_set_data(dfield, mem_heap_alloc(heap,
table->mix_id_len),
table->mix_id_len);
ut_memcpy(dfield_get_data(dfield), table->mix_id_buf,
table->mix_id_len);

View File

@ -244,13 +244,11 @@ pars_resolve_func_data_type(
/* Inherit the data type from the first argument (which must
not be the SQL null literal whose type is DATA_ERROR) */
ut_a(dtype_get_mtype(que_node_get_data_type(arg))
!= DATA_ERROR);
dtype_copy(que_node_get_data_type(node),
que_node_get_data_type(arg));
ut_a(dtype_get_mtype(que_node_get_data_type(node)) == DATA_INT);
ut_a(dtype_get_mtype(que_node_get_data_type(node))
== DATA_INT);
} else if (func == PARS_COUNT_TOKEN) {
ut_a(arg);
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0);
@ -1596,7 +1594,7 @@ pars_create_index(
column = column_list;
while (column) {
dict_mem_index_add_field(index, column->name, 0);
dict_mem_index_add_field(index, column->name, 0, 0);
column->resolved = TRUE;
column->token_type = SYM_COLUMN;

View File

@ -38,7 +38,7 @@ Used in debug checking of cmp_dtuple_... .
This function is used to compare a data tuple to a physical record. If
dtuple has n fields then rec must have either m >= n fields, or it must
differ from dtuple in some of the m fields rec has. */
static
int
cmp_debug_dtuple_rec_with_match(
/*============================*/
@ -50,9 +50,10 @@ cmp_debug_dtuple_rec_with_match(
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
ulint* matched_fields);/* in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
ulint* matched_fields);/* in/out: number of already
completely matched fields; when function
returns, contains the value for current
comparison */
/*****************************************************************
This function is used to compare two data fields for which the data type
is such that we must use MySQL code to compare them. The prototype here
@ -79,17 +80,12 @@ UNIV_INLINE
ulint
cmp_collate(
/*========*/
/* out: collation order position */
dtype_t* type __attribute__((unused)) , /* in: type */
ulint code) /* in: code of a character stored in database
record */
/* out: collation order position */
ulint code) /* in: code of a character stored in database record */
{
ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR));
return((ulint) srv_latin1_ordering[code]);
}
/*****************************************************************
Returns TRUE if two types are equal for comparison purposes. */
@ -118,7 +114,8 @@ cmp_types_are_equal(
if (type1->mtype == DATA_INT
&& (type1->prtype & DATA_UNSIGNED)
!= (type2->prtype & DATA_UNSIGNED)) {
!= (type2->prtype & DATA_UNSIGNED)) {
/* The storage format of an unsigned integer is different
from a signed integer: in a signed integer we OR
0x8000... to the value of positive integers. */
@ -131,12 +128,17 @@ cmp_types_are_equal(
return(FALSE);
}
if (type1->mtype == DATA_BLOB && (type1->prtype & DATA_BINARY_TYPE)
!= (type2->prtype & DATA_BINARY_TYPE)) {
return(FALSE);
}
return(TRUE);
}
/*****************************************************************
Innobase uses this function is to compare two data fields for which the
data type is such that we must compare whole fields. */
Innobase uses this function to compare two data fields for which the data type
is such that we must compare whole fields or call MySQL to do the comparison */
static
int
cmp_whole_field(
@ -239,8 +241,34 @@ cmp_whole_field(
return(0);
case DATA_VARMYSQL:
case DATA_MYSQL:
case DATA_BLOB:
if (data_type == DATA_BLOB
&& 0 != (type->prtype & DATA_BINARY_TYPE)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: comparing a binary BLOB with a character set sensitive\n"
"InnoDB: comparison!\n");
}
/* MySQL does not pad the ends of strings with spaces in a
comparison. That would cause a foreign key check to fail for
non-latin1 character sets if we have different length columns.
To prevent that we remove trailing spaces here before doing
the comparison. NOTE that if we in the future map more MySQL
types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
code. */
while (a_length > 0 && a[a_length - 1] == ' ') {
a_length--;
}
while (b_length > 0 && b[b_length - 1] == ' ') {
b_length--;
}
return(innobase_mysql_cmp(
(int)(type->prtype & ~DATA_NOT_NULL),
(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
a, a_length, b, b_length));
default:
fprintf(stderr,
@ -291,7 +319,10 @@ cmp_data_data_slow(
return(1);
}
if (cur_type->mtype >= DATA_FLOAT) {
if (cur_type->mtype >= DATA_FLOAT
|| (cur_type->mtype == DATA_BLOB
&& (cur_type->prtype & DATA_NONLATIN1))) {
return(cmp_whole_field(cur_type, data1, len1, data2, len2));
}
@ -334,9 +365,12 @@ cmp_data_data_slow(
goto next_byte;
}
if (cur_type->mtype <= DATA_CHAR) {
data1_byte = cmp_collate(cur_type, data1_byte);
data2_byte = cmp_collate(cur_type, data2_byte);
if (cur_type->mtype <= DATA_CHAR
|| (cur_type->mtype == DATA_BLOB
&& 0 == (cur_type->prtype & DATA_BINARY_TYPE))) {
data1_byte = cmp_collate(data1_byte);
data2_byte = cmp_collate(data2_byte);
}
if (data1_byte > data2_byte) {
@ -487,7 +521,9 @@ cmp_dtuple_rec_with_match(
}
}
if (cur_type->mtype >= DATA_FLOAT) {
if (cur_type->mtype >= DATA_FLOAT
|| (cur_type->mtype == DATA_BLOB
&& (cur_type->prtype & DATA_NONLATIN1))) {
ret = cmp_whole_field(cur_type,
dfield_get_data(dtuple_field), dtuple_f_len,
@ -547,10 +583,13 @@ cmp_dtuple_rec_with_match(
goto next_byte;
}
if (cur_type->mtype <= DATA_CHAR) {
rec_byte = cmp_collate(cur_type, rec_byte);
dtuple_byte = cmp_collate(cur_type,
dtuple_byte);
if (cur_type->mtype <= DATA_CHAR
|| (cur_type->mtype == DATA_BLOB
&& 0 ==
(cur_type->prtype & DATA_BINARY_TYPE))) {
rec_byte = cmp_collate(rec_byte);
dtuple_byte = cmp_collate(dtuple_byte);
}
if (dtuple_byte > rec_byte) {
@ -804,7 +843,10 @@ cmp_rec_rec_with_match(
}
}
if (cur_type->mtype >= DATA_FLOAT) {
if (cur_type->mtype >= DATA_FLOAT
|| (cur_type->mtype == DATA_BLOB
&& (cur_type->prtype & DATA_NONLATIN1))) {
ret = cmp_whole_field(cur_type,
rec1_b_ptr, rec1_f_len,
rec2_b_ptr, rec2_f_len);
@ -861,9 +903,13 @@ cmp_rec_rec_with_match(
goto next_byte;
}
if (cur_type->mtype <= DATA_CHAR) {
rec1_byte = cmp_collate(cur_type, rec1_byte);
rec2_byte = cmp_collate(cur_type, rec2_byte);
if (cur_type->mtype <= DATA_CHAR
|| (cur_type->mtype == DATA_BLOB
&& 0 ==
(cur_type->prtype & DATA_BINARY_TYPE))) {
rec1_byte = cmp_collate(rec1_byte);
rec2_byte = cmp_collate(rec2_byte);
}
if (rec1_byte < rec2_byte) {
@ -906,7 +952,7 @@ This function is used to compare a data tuple to a physical record. If
dtuple has n fields then rec must have either m >= n fields, or it must
differ from dtuple in some of the m fields rec has. If encounters an
externally stored field, returns 0. */
static
int
cmp_debug_dtuple_rec_with_match(
/*============================*/
@ -918,9 +964,10 @@ cmp_debug_dtuple_rec_with_match(
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
ulint* matched_fields) /* in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
ulint* matched_fields) /* in/out: number of already
completely matched fields; when function
returns, contains the value for current
comparison */
{
dtype_t* cur_type; /* pointer to type of the current
field in dtuple */

View File

@ -217,8 +217,8 @@ ins_node_set_new_row(
}
/***********************************************************************
Does an insert operation by updating a delete marked existing record
in the index. This situation can occur if the delete marked record is
Does an insert operation by updating a delete-marked existing record
in the index. This situation can occur if the delete-marked record is
kept in the index for consistent reads. */
static
ulint
@ -240,9 +240,9 @@ row_ins_sec_index_entry_by_modify(
ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
ut_ad(rec_get_deleted_flag(rec));
/* We know that in the ordering entry and rec are identified.
But in their binary form there may be differences if there
are char fields in them. Therefore we have to calculate the
/* We know that in the alphabetical ordering, entry and rec are
identical. But in their binary form there may be differences if
there are char fields in them. Therefore we have to calculate the
difference and do an update-in-place if necessary. */
heap = mem_heap_create(1024);
@ -305,8 +305,8 @@ row_ins_clust_index_entry_by_modify(
/* Try optimistic updating of the record, keeping changes
within the page */
err = btr_cur_optimistic_update(0, cursor, update, 0, thr, mtr);
err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
err = DB_FAIL;
}
@ -364,11 +364,17 @@ row_ins_cascade_calc_update_vec(
/* out: number of fields in the
calculated update vector; the value
can also be 0 if no foreign key
fields changed */
fields changed; the returned value
is ULINT_UNDEFINED if the column
type in the child table is too short
to fit the new value in the parent
table: that means the update fails */
upd_node_t* node, /* in: update node of the parent
table */
dict_foreign_t* foreign) /* in: foreign key constraint whose
dict_foreign_t* foreign, /* in: foreign key constraint whose
type is != 0 */
mem_heap_t* heap) /* in: memory heap to use as
temporary storage */
{
upd_node_t* cascade = node->cascade_node;
dict_table_t* table = foreign->foreign_table;
@ -381,14 +387,16 @@ row_ins_cascade_calc_update_vec(
upd_field_t* parent_ufield;
ulint n_fields_updated;
ulint parent_field_no;
dtype_t* type;
ulint i;
ulint j;
ut_a(node && foreign && cascade && table && index);
/* Calculate the appropriate update vector which will set the fields
in the child index record to the same value as the referenced index
record will get in the update. */
in the child index record to the same value (possibly padded with
spaces if the column is a fixed length CHAR or FIXBINARY column) as
the referenced index record will get in the update. */
parent_table = node->table;
ut_a(parent_table == foreign->referenced_table);
@ -424,7 +432,56 @@ row_ins_cascade_calc_update_vec(
dict_table_get_nth_col_pos(table,
dict_index_get_nth_col_no(index, i));
ufield->exp = NULL;
ufield->new_val = parent_ufield->new_val;
type = dict_index_get_nth_type(index, i);
/* Do not allow a NOT NULL column to be
updated as NULL */
if (ufield->new_val.len == UNIV_SQL_NULL
&& (type->prtype & DATA_NOT_NULL)) {
return(ULINT_UNDEFINED);
}
/* If the new value would not fit in the
column, do not allow the update */
if (ufield->new_val.len != UNIV_SQL_NULL
&& ufield->new_val.len
> dtype_get_len(type)) {
return(ULINT_UNDEFINED);
}
/* If the parent column type has a different
length than the child column type, we may
need to pad with spaces the new value of the
child column */
if (dtype_is_fixed_size(type)
&& ufield->new_val.len != UNIV_SQL_NULL
&& ufield->new_val.len
< dtype_get_fixed_size(type)) {
ufield->new_val.data =
mem_heap_alloc(heap,
dtype_get_fixed_size(type));
ufield->new_val.len =
dtype_get_fixed_size(type);
ut_a(dtype_get_pad_char(type)
!= ULINT_UNDEFINED);
memset(ufield->new_val.data,
(byte)dtype_get_pad_char(type),
dtype_get_fixed_size(type));
ut_memcpy(ufield->new_val.data,
parent_ufield->new_val.data,
parent_ufield->new_val.len);
}
ufield->extern_storage = FALSE;
n_fields_updated++;
@ -570,9 +627,11 @@ row_ins_foreign_check_on_constraint(
dict_index_t* clust_index;
dtuple_t* ref;
mem_heap_t* tmp_heap;
mem_heap_t* upd_vec_heap = NULL;
rec_t* rec;
rec_t* clust_rec;
upd_t* update;
ulint n_to_update;
ulint err;
ulint i;
char* ptr;
@ -597,8 +656,10 @@ row_ins_foreign_check_on_constraint(
*ptr = '\0';
/* We call a function in ha_innodb.cc */
#ifndef UNIV_HOTBACKUP
innobase_invalidate_query_cache(thr_get_trx(thr), table_name_buf,
ut_strlen(table->name) + 1);
#endif
node = thr->run_node;
if (node->is_delete && 0 == (foreign->type &
@ -828,7 +889,21 @@ row_ins_foreign_check_on_constraint(
/* Build the appropriate update vector which sets changing
foreign->n_fields first fields in rec to new values */
row_ins_cascade_calc_update_vec(node, foreign);
upd_vec_heap = mem_heap_create(256);
n_to_update = row_ins_cascade_calc_update_vec(node, foreign,
upd_vec_heap);
if (n_to_update == ULINT_UNDEFINED) {
err = DB_ROW_IS_REFERENCED;
row_ins_foreign_report_err(
(char*)"Trying a cascaded update where the updated value in the child\n"
"table would not fit in the length of the column, or the value would\n"
"be NULL and the column is declared as not NULL in the child table,",
thr, foreign, btr_pcur_get_rec(pcur), entry);
goto nonstandard_exit_func;
}
if (cascade->update->n_fields == 0) {
@ -867,10 +942,18 @@ row_ins_foreign_check_on_constraint(
btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
if (upd_vec_heap) {
mem_heap_free(upd_vec_heap);
}
return(err);
nonstandard_exit_func:
if (upd_vec_heap) {
mem_heap_free(upd_vec_heap);
}
btr_pcur_store_position(pcur, mtr);
mtr_commit(mtr);
@ -1275,6 +1358,11 @@ row_ins_unique_report_err(
dtuple_t* entry, /* in: index entry to insert in the index */
dict_index_t* index) /* in: index */
{
UT_NOT_USED(thr);
UT_NOT_USED(rec);
UT_NOT_USED(entry);
UT_NOT_USED(index);
#ifdef notdefined
/* Disable reporting to test if the slowdown of REPLACE in 4.0.13 was
caused by this! */
@ -1832,13 +1920,15 @@ row_ins_index_entry(
/***************************************************************
Sets the values of the dtuple fields in entry from the values of appropriate
columns in row. */
UNIV_INLINE
static
void
row_ins_index_entry_set_vals(
/*=========================*/
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to make */
dtuple_t* row) /* in: row */
{
dict_field_t* ind_field;
dfield_t* field;
dfield_t* row_field;
ulint n_fields;
@ -1850,11 +1940,21 @@ row_ins_index_entry_set_vals(
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(entry, i);
ind_field = dict_index_get_nth_field(index, i);
row_field = dtuple_get_nth_field(row, field->col_no);
row_field = dtuple_get_nth_field(row, ind_field->col->ind);
/* Check column prefix indexes */
if (ind_field->prefix_len > 0
&& dfield_get_len(row_field) != UNIV_SQL_NULL
&& dfield_get_len(row_field) > ind_field->prefix_len) {
field->len = ind_field->prefix_len;
} else {
field->len = row_field->len;
}
field->data = row_field->data;
field->len = row_field->len;
}
}
@ -1873,7 +1973,7 @@ row_ins_index_entry_step(
ut_ad(dtuple_check_typed(node->row));
row_ins_index_entry_set_vals(node->entry, node->row);
row_ins_index_entry_set_vals(node->index, node->entry, node->row);
ut_ad(dtuple_check_typed(node->entry));

View File

@ -76,9 +76,6 @@ row_mysql_store_blob_ref(
also to set the NULL bit in the MySQL record
header! */
{
ulint sum = 0;
ulint i;
/* MySQL might assume the field is set to zero except the length and
the pointer fields */
@ -93,22 +90,6 @@ row_mysql_store_blob_ref(
ut_a(col_len - 8 > 2 || len < 256 * 256);
ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
/* We try to track an elusive bug which probably was fixed
May 9, 2002, but better be sure: we probe the data buffer
to make sure it is in valid allocated memory */
for (i = 0; i < len; i++) {
sum += (ulint)(data + i);
}
/* The variable below is identically false, we just fool the
compiler to not optimize away our loop */
if (row_mysql_identically_false) {
printf("Sum %lu\n", sum);
}
mach_write_to_n_little_endian(dest, col_len - 8, len);
ut_memcpy(dest + col_len - 8, (byte*)&data, sizeof(byte*));
@ -952,7 +933,8 @@ row_update_for_mysql(
if (prebuilt->pcur->btr_cur.index == clust_index) {
btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
} else {
btr_pcur_copy_stored_position(node->pcur, prebuilt->clust_pcur);
btr_pcur_copy_stored_position(node->pcur,
prebuilt->clust_pcur);
}
ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
@ -1477,8 +1459,7 @@ row_create_index_for_mysql(
ulint namelen;
ulint keywordlen;
ulint err;
ulint i;
ulint j;
ulint i, j;
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
ut_ad(mutex_own(&(dict_sys->mutex)));
@ -1486,23 +1467,9 @@ row_create_index_for_mysql(
trx->op_info = (char *) "creating index";
trx_start_if_not_started(trx);
namelen = ut_strlen(index->table_name);
keywordlen = ut_strlen("_recover_innodb_tmp_table");
if (namelen >= keywordlen
&& 0 == ut_memcmp(
index->table_name + namelen - keywordlen,
(char*)"_recover_innodb_tmp_table", keywordlen)) {
return(DB_SUCCESS);
}
/* Check that the same column does not appear twice in the index.
InnoDB assumes this in its algorithms, e.g., update of an index
entry */
Starting from 4.0.14 InnoDB should be able to cope with that, but
safer not to allow them. */
for (i = 0; i < dict_index_get_n_fields(index); i++) {
for (j = 0; j < i; j++) {
@ -1525,6 +1492,20 @@ row_create_index_for_mysql(
}
}
trx_start_if_not_started(trx);
namelen = ut_strlen(index->table_name);
keywordlen = ut_strlen("_recover_innodb_tmp_table");
if (namelen >= keywordlen
&& 0 == ut_memcmp(
index->table_name + namelen - keywordlen,
(char*)"_recover_innodb_tmp_table", keywordlen)) {
return(DB_SUCCESS);
}
heap = mem_heap_create(512);
trx->dict_operation = TRUE;
@ -1542,6 +1523,7 @@ row_create_index_for_mysql(
que_graph_free((que_t*) que_node_get_parent(thr));
error_handling:
if (err != DB_SUCCESS) {
/* We have special error handling here */

View File

@ -136,7 +136,14 @@ row_build_index_entry(
dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
dfield_copy(dfield, dfield2);
dfield->col_no = dict_col_get_no(col);
/* If a column prefix index, take only the prefix */
if (ind_field->prefix_len > 0
&& dfield_get_len(dfield2) != UNIV_SQL_NULL
&& dfield_get_len(dfield2) > ind_field->prefix_len) {
dfield_set_len(dfield, ind_field->prefix_len);
}
}
ut_ad(dtuple_check_typed(entry));
@ -146,8 +153,7 @@ row_build_index_entry(
/***********************************************************************
An inverse function to dict_row_build_index_entry. Builds a row from a
record in a clustered index. NOTE that externally stored (often big)
fields are always copied to heap. */
record in a clustered index. */
dtuple_t*
row_build(
@ -172,6 +178,7 @@ row_build(
{
dtuple_t* row;
dict_table_t* table;
dict_field_t* ind_field;
dict_col_t* col;
dfield_t* dfield;
ulint n_fields;
@ -204,19 +211,24 @@ row_build(
dict_table_copy_types(row, table);
for (i = 0; i < n_fields; i++) {
ind_field = dict_index_get_nth_field(index, i);
col = dict_field_get_col(dict_index_get_nth_field(index, i));
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
field = rec_get_nth_field(rec, i, &len);
if (ind_field->prefix_len == 0) {
if (type == ROW_COPY_ALSO_EXTERNALS
&& rec_get_nth_field_extern_bit(rec, i)) {
col = dict_field_get_col(ind_field);
dfield = dtuple_get_nth_field(row,
dict_col_get_no(col));
field = rec_get_nth_field(rec, i, &len);
field = btr_rec_copy_externally_stored_field(rec,
i, &len, heap);
if (type == ROW_COPY_ALSO_EXTERNALS
&& rec_get_nth_field_extern_bit(rec, i)) {
field = btr_rec_copy_externally_stored_field(
rec, i, &len, heap);
}
dfield_set_data(dfield, field, len);
}
dfield_set_data(dfield, field, len);
}
ut_ad(dtuple_check_typed(row));
@ -371,7 +383,6 @@ row_build_row_ref(
dict_table_t* table;
dict_index_t* clust_index;
dfield_t* dfield;
dict_col_t* col;
dtuple_t* ref;
byte* field;
ulint len;
@ -403,24 +414,13 @@ row_build_row_ref(
for (i = 0; i < ref_len; i++) {
dfield = dtuple_get_nth_field(ref, i);
col = dict_field_get_col(
dict_index_get_nth_field(clust_index, i));
pos = dict_index_get_nth_col_pos(index, dict_col_get_no(col));
pos = dict_index_get_nth_field_pos(index, clust_index, i);
if (pos != ULINT_UNDEFINED) {
field = rec_get_nth_field(rec, pos, &len);
ut_a(pos != ULINT_UNDEFINED);
dfield_set_data(dfield, field, len);
} else {
ut_ad(table->type == DICT_TABLE_CLUSTER_MEMBER);
ut_ad(i == table->mix_len);
field = rec_get_nth_field(rec, pos, &len);
dfield_set_data(dfield,
mem_heap_alloc(heap, table->mix_id_len),
table->mix_id_len);
ut_memcpy(dfield_get_data(dfield), table->mix_id_buf,
table->mix_id_len);
}
dfield_set_data(dfield, field, len);
}
ut_ad(dtuple_check_typed(ref));
@ -448,7 +448,6 @@ row_build_row_ref_in_tuple(
dict_table_t* table;
dict_index_t* clust_index;
dfield_t* dfield;
dict_col_t* col;
byte* field;
ulint len;
ulint ref_len;
@ -483,19 +482,13 @@ row_build_row_ref_in_tuple(
for (i = 0; i < ref_len; i++) {
dfield = dtuple_get_nth_field(ref, i);
col = dict_field_get_col(
dict_index_get_nth_field(clust_index, i));
pos = dict_index_get_nth_col_pos(index, dict_col_get_no(col));
pos = dict_index_get_nth_field_pos(index, clust_index, i);
if (pos != ULINT_UNDEFINED) {
field = rec_get_nth_field(rec, pos, &len);
ut_a(pos != ULINT_UNDEFINED);
dfield_set_data(dfield, field, len);
} else {
ut_ad(table->type == DICT_TABLE_CLUSTER_MEMBER);
ut_ad(i == table->mix_len);
ut_a(0);
}
field = rec_get_nth_field(rec, pos, &len);
dfield_set_data(dfield, field, len);
}
ut_ad(dtuple_check_typed(ref));
@ -517,6 +510,7 @@ row_build_row_ref_from_row(
directly into data of this row */
{
dict_index_t* clust_index;
dict_field_t* field;
dfield_t* dfield;
dfield_t* dfield2;
dict_col_t* col;
@ -534,12 +528,20 @@ row_build_row_ref_from_row(
for (i = 0; i < ref_len; i++) {
dfield = dtuple_get_nth_field(ref, i);
col = dict_field_get_col(
dict_index_get_nth_field(clust_index, i));
field = dict_index_get_nth_field(clust_index, i);
col = dict_field_get_col(field);
dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
dfield_copy(dfield, dfield2);
if (field->prefix_len > 0
&& dfield->len != UNIV_SQL_NULL
&& dfield->len > field->prefix_len) {
dfield->len = field->prefix_len;
}
}
ut_ad(dtuple_check_typed(ref));

View File

@ -65,41 +65,50 @@ row_sel_sec_rec_is_for_clust_rec(
rec_t* sec_rec, /* in: secondary index record */
dict_index_t* sec_index, /* in: secondary index */
rec_t* clust_rec, /* in: clustered index record */
dict_index_t* clust_index __attribute__((unused)))
/* in: clustered index */
dict_index_t* clust_index) /* in: clustered index */
{
dict_col_t* col;
byte* sec_field;
ulint sec_len;
byte* clust_field;
ulint clust_len;
ulint n;
ulint i;
dict_field_t* ifield;
dict_col_t* col;
byte* sec_field;
ulint sec_len;
byte* clust_field;
ulint clust_len;
ulint n;
ulint i;
n = dict_index_get_n_ordering_defined_by_user(sec_index);
UT_NOT_USED(clust_index);
for (i = 0; i < n; i++) {
col = dict_field_get_col(
dict_index_get_nth_field(sec_index, i));
n = dict_index_get_n_ordering_defined_by_user(sec_index);
clust_field = rec_get_nth_field(clust_rec,
dict_col_get_clust_pos(col),
&clust_len);
sec_field = rec_get_nth_field(sec_rec, i, &sec_len);
for (i = 0; i < n; i++) {
ifield = dict_index_get_nth_field(sec_index, i);
col = dict_field_get_col(ifield);
if (sec_len != clust_len) {
clust_field = rec_get_nth_field(clust_rec,
dict_col_get_clust_pos(col),
&clust_len);
sec_field = rec_get_nth_field(sec_rec, i, &sec_len);
return(FALSE);
if (ifield->prefix_len > 0
&& clust_len != UNIV_SQL_NULL
&& clust_len > ifield->prefix_len) {
clust_len = ifield->prefix_len;
}
if (0 != cmp_data_data(dict_col_get_type(col),
clust_field, clust_len,
sec_field, sec_len)) {
return(FALSE);
}
}
if (sec_len != clust_len) {
return(TRUE);
return(FALSE);
}
if (0 != cmp_data_data(dict_col_get_type(col),
clust_field, clust_len,
sec_field, sec_len)) {
return(FALSE);
}
}
return(TRUE);
}
/*************************************************************************
@ -606,7 +615,7 @@ row_sel_get_clust_rec(
/* Try to place a lock on the index record */
err = lock_clust_rec_read_check_and_lock(0, clust_rec, index,
node->row_lock_mode, LOCK_ORDINARY, thr);
node->row_lock_mode, LOCK_ORDINARY, thr);
if (err != DB_SUCCESS) {
return(err);
@ -1850,9 +1859,11 @@ row_printf_step(
}
/********************************************************************
Converts a key value stored in MySQL format to an Innobase dtuple.
The last field of the key value may be just a prefix of a fixed length
field: hence the parameter key_len. */
Converts a key value stored in MySQL format to an Innobase dtuple. The last
field of the key value may be just a prefix of a fixed length field: hence
the parameter key_len. But currently we do not allow search keys where the
last field is only a prefix of the full key field len and print a warning if
such appears. */
void
row_sel_convert_mysql_key_to_innobase(
@ -1863,17 +1874,24 @@ row_sel_convert_mysql_key_to_innobase(
to index! */
byte* buf, /* in: buffer to use in field
conversions */
ulint buf_len, /* in: buffer length */
dict_index_t* index, /* in: index of the key value */
byte* key_ptr, /* in: MySQL key value */
ulint key_len) /* in: MySQL key value length */
{
byte* original_buf = buf;
dict_field_t* field;
dfield_t* dfield;
ulint offset;
ulint len;
ulint data_offset;
ulint data_len;
ulint data_field_len;
ibool is_null;
byte* key_end;
ulint n_fields = 0;
ulint type;
UT_NOT_USED(index);
/* For documentation of the key value storage format in MySQL, see
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
key_end = key_ptr + key_len;
@ -1882,11 +1900,14 @@ row_sel_convert_mysql_key_to_innobase(
dtuple_set_n_fields(tuple, ULINT_MAX);
dfield = dtuple_get_nth_field(tuple, 0);
field = dict_index_get_nth_field(index, 0);
if (dfield_get_type(dfield)->mtype == DATA_SYS) {
/* A special case: we are looking for a position in a
generated clustered index: the first and the only
ordering column is ROW_ID */
/* A special case: we are looking for a position in the
generated clustered index which InnoDB automatically added
to a table with no primary key: the first and the only
ordering column is ROW_ID which InnoDB stored to the key_ptr
buffer. */
ut_a(key_len == DATA_ROW_ID_LEN);
@ -1897,70 +1918,114 @@ row_sel_convert_mysql_key_to_innobase(
return;
}
while (key_ptr < key_end) {
offset = 0;
len = dfield_get_type(dfield)->len;
while (key_ptr < key_end) {
n_fields++;
ut_a(dict_col_get_type(field->col)->mtype
== dfield_get_type(dfield)->mtype);
data_offset = 0;
is_null = FALSE;
if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
/* The first byte in the field tells if this is
an SQL NULL value */
offset = 1;
data_offset = 1;
if (*key_ptr != 0) {
if (*key_ptr != 0) {
dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
goto next_part;
is_null = TRUE;
}
}
row_mysql_store_col_in_innobase_format(
dfield, buf, key_ptr + offset, len,
dfield_get_type(dfield)->mtype,
type = dfield_get_type(dfield)->mtype;
/* Calculate data length and data field total length */
if (type == DATA_BLOB) {
/* The key field is a column prefix of a BLOB or
TEXT type column */
ut_a(field->prefix_len > 0);
/* MySQL stores the actual data length to the first 2
bytes after the optional SQL NULL marker byte. The
storage format is little-endian. */
/* There are no key fields > 255 bytes currently in
MySQL */
if (key_ptr[data_offset + 1] != 0) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: BLOB or TEXT prefix > 255 bytes in query to table %s\n",
index->table_name);
}
data_len = key_ptr[data_offset];
data_field_len = data_offset + 2 + field->prefix_len;
data_offset += 2;
type = DATA_CHAR; /* now that we know the length, we
store the column value like it would
be a fixed char field */
} else if (field->prefix_len > 0) {
data_len = field->prefix_len;
data_field_len = data_offset + data_len;
} else {
data_len = dfield_get_type(dfield)->len;
data_field_len = data_offset + data_len;
}
/* Storing may use at most data_len bytes of buf */
if (!is_null) {
row_mysql_store_col_in_innobase_format(
dfield, buf, key_ptr + data_offset,
data_len, type,
dfield_get_type(dfield)->prtype
& DATA_UNSIGNED);
next_part:
key_ptr += (offset + len);
buf += data_len;
}
key_ptr += data_field_len;
if (key_ptr > key_end) {
/* The last field in key was not a complete
field but a prefix of it.
/* The last field in key was not a complete key field
but a prefix of it.
Print a warning about this! HA_READ_PREFIX_LAST
does not currently work in InnoDB with partial-field
key value prefixes. Since MySQL currently uses a
padding trick to calculate LIKE 'abc%' type queries
there should never be partial-field prefixes
in searches. */
Print a warning about this! HA_READ_PREFIX_LAST does
not currently work in InnoDB with partial-field key
value prefixes. Since MySQL currently uses a padding
trick to calculate LIKE 'abc%' type queries there
should never be partial-field prefixes in searches. */
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: using a partial-field key prefix in search\n");
ut_ad(dfield_get_len(dfield) != UNIV_SQL_NULL);
dfield_set_data(dfield, buf,
len - (ulint)(key_ptr - key_end));
if (!is_null) {
dfield->len -= (ulint)(key_ptr - key_end);
}
}
buf += len;
n_fields++;
field++;
dfield++;
}
/* We set the length of tuple to n_fields: we assume that
the memory area allocated for it is big enough (usually
bigger than n_fields). */
ut_a(buf <= original_buf + buf_len);
/* We set the length of tuple to n_fields: we assume that the memory
area allocated for it is big enough (usually bigger than n_fields). */
dtuple_set_n_fields(tuple, n_fields);
}
/******************************************************************
Stores the row id to the prebuilt struct. */
UNIV_INLINE
static
void
row_sel_store_row_id_to_prebuilt(
/*=============================*/
@ -1970,11 +2035,22 @@ row_sel_store_row_id_to_prebuilt(
{
byte* data;
ulint len;
char err_buf[1000];
data = rec_get_nth_field(index_rec,
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
ut_a(len == DATA_ROW_ID_LEN);
if (len != DATA_ROW_ID_LEN) {
rec_sprintf(err_buf, 900, index_rec);
fprintf(stderr,
"InnoDB: Error: Row id field is wrong length %lu in table %s index %s\n"
"InnoDB: Field number %lu, record:\n%s\n",
len, index->table_name, index->name,
dict_index_get_sys_col_pos(index, DATA_ROW_ID),
err_buf);
ut_a(0);
}
ut_memcpy(prebuilt->row_id, data, len);
}

View File

@ -428,7 +428,8 @@ row_undo_mod_del_unmark_sec(
found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
&mtr);
if (!found) {
fprintf(stderr, "InnoDB: error in sec index entry del undo in\n"
fprintf(stderr,
"InnoDB: error in sec index entry del undo in\n"
"InnoDB: index %s table %s\n", index->name,
index->table->name);
dtuple_sprintf(err_buf, 900, entry);
@ -570,7 +571,7 @@ row_undo_mod_upd_exist_sec(
the row */
row_upd_index_replace_new_col_vals(entry, index,
node->update);
node->update, NULL);
row_undo_mod_del_unmark_sec(node, thr, index, entry);
}

View File

@ -72,8 +72,9 @@ searched delete is obviously to keep the x-latch for several
steps of query graph execution. */
/***************************************************************
Checks if an update vector changes some of the first fields of an index
record. */
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
that index does not contain column prefixes. */
static
ibool
row_upd_changes_first_fields(
@ -234,7 +235,8 @@ row_upd_check_references_constraints(
if (err != DB_SUCCESS) {
if (got_s_lock) {
row_mysql_unfreeze_data_dictionary(trx);
row_mysql_unfreeze_data_dictionary(
trx);
}
mem_heap_free(heap);
@ -350,14 +352,15 @@ row_upd_index_entry_sys_field(
}
/***************************************************************
Returns TRUE if row update changes size of some field in index
or if some field to be updated is stored externally in rec or update. */
Returns TRUE if row update changes size of some field in index or if some
field to be updated is stored externally in rec or update. */
ibool
row_upd_changes_field_size(
/*=======================*/
row_upd_changes_field_size_or_external(
/*===================================*/
/* out: TRUE if the update changes the size of
some field in index */
some field in index or the field is external
in rec or update */
rec_t* rec, /* in: record in clustered index */
dict_index_t* index, /* in: clustered index */
upd_t* update) /* in: update vector */
@ -820,72 +823,58 @@ void
row_upd_index_replace_new_col_vals(
/*===============================*/
dtuple_t* entry, /* in/out: index entry where replaced */
dict_index_t* index, /* in: index; NOTE that may also be a
dict_index_t* index, /* in: index; NOTE that this may also be a
non-clustered index */
upd_t* update) /* in: update vector */
upd_t* update, /* in: update vector */
mem_heap_t* heap) /* in: memory heap to which we allocate and
copy the new values, set this as NULL if you
do not want allocation */
{
dict_field_t* field;
upd_field_t* upd_field;
dfield_t* dfield;
dfield_t* new_val;
ulint field_no;
dict_index_t* clust_index;
ulint j;
ulint i;
ut_ad(index);
clust_index = dict_table_get_first_index(index->table);
dtuple_set_info_bits(entry, update->info_bits);
for (i = 0; i < upd_get_n_fields(update); i++) {
for (j = 0; j < dict_index_get_n_fields(index); j++) {
upd_field = upd_get_nth_field(update, i);
field = dict_index_get_nth_field(index, j);
field_no = dict_index_get_nth_col_pos(index,
dict_index_get_nth_col_no(clust_index,
upd_field->field_no));
if (field_no != ULINT_UNDEFINED) {
dfield = dtuple_get_nth_field(entry, field_no);
for (i = 0; i < upd_get_n_fields(update); i++) {
new_val = &(upd_field->new_val);
upd_field = upd_get_nth_field(update, i);
dfield_set_data(dfield, new_val->data, new_val->len);
if (upd_field->field_no == field->col->clust_pos) {
dfield = dtuple_get_nth_field(entry, j);
new_val = &(upd_field->new_val);
dfield_set_data(dfield, new_val->data,
new_val->len);
if (heap && new_val->len != UNIV_SQL_NULL) {
dfield->data = mem_heap_alloc(heap,
new_val->len);
ut_memcpy(dfield->data, new_val->data,
new_val->len);
}
if (field->prefix_len > 0
&& new_val->len != UNIV_SQL_NULL
&& new_val->len > field->prefix_len) {
dfield->len = field->prefix_len;
}
}
}
}
}
/***************************************************************
Replaces the new column values stored in the update vector to the
clustered index entry given. */
void
row_upd_clust_index_replace_new_col_vals(
/*=====================================*/
dtuple_t* entry, /* in/out: index entry where replaced */
upd_t* update) /* in: update vector */
{
upd_field_t* upd_field;
dfield_t* dfield;
dfield_t* new_val;
ulint field_no;
ulint i;
dtuple_set_info_bits(entry, update->info_bits);
for (i = 0; i < upd_get_n_fields(update); i++) {
upd_field = upd_get_nth_field(update, i);
field_no = upd_field->field_no;
dfield = dtuple_get_nth_field(entry, field_no);
new_val = &(upd_field->new_val);
dfield_set_data(dfield, new_val->data, new_val->len);
}
}
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
@ -931,9 +920,15 @@ row_upd_changes_ord_field_binary(
upd_field = upd_get_nth_field(update, j);
/* Note that if the index field is a column prefix
then it may be that row does not contain an externally
stored part of the column value, and we cannot compare
the datas */
if (col_pos == upd_field->field_no
&& (row == NULL
|| !dfield_datas_are_binary_equal(
&& (row == NULL
|| ind_field->prefix_len > 0
|| !dfield_datas_are_binary_equal(
dtuple_get_nth_field(row, col_no),
&(upd_field->new_val)))) {
return(TRUE);
@ -978,8 +973,9 @@ row_upd_changes_some_index_ord_field_binary(
}
/***************************************************************
Checks if an update vector changes some of the first fields of an index
record. */
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
that index does not contain column prefixes. */
static
ibool
row_upd_changes_first_fields(
@ -1013,9 +1009,10 @@ row_upd_changes_first_fields(
upd_field = upd_get_nth_field(update, j);
if (col_pos == upd_field->field_no
&& cmp_dfield_dfield(
&& (ind_field->prefix_len > 0
|| 0 != cmp_dfield_dfield(
dtuple_get_nth_field(entry, i),
&(upd_field->new_val))) {
&(upd_field->new_val)))) {
return(TRUE);
}
}
@ -1204,7 +1201,7 @@ close_cur:
}
/* Build a new index entry */
row_upd_index_replace_new_col_vals(entry, index, node->update);
row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
/* Insert new index entry */
err = row_ins_index_entry(index, entry, NULL, 0, thr);
@ -1317,12 +1314,12 @@ row_upd_clust_rec_by_insert(
entry = row_build_index_entry(node->row, index, heap);
row_upd_clust_index_replace_new_col_vals(entry, node->update);
row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
/* If we return from a lock wait, for example, we may have
extern fields marked as not-owned in entry (marked if the
extern fields marked as not-owned in entry (marked in the
if-branch above). We must unmark them. */
btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec,
@ -1702,9 +1699,9 @@ function_exit:
/* Do some cleanup */
if (node->row != NULL) {
mem_heap_empty(node->heap);
node->row = NULL;
node->n_ext_vec = 0;
mem_heap_empty(node->heap);
}
node->state = UPD_NODE_UPDATE_CLUSTERED;

View File

@ -27,6 +27,7 @@ Created 2/6/1997 Heikki Tuuri
#include "row0upd.h"
#include "rem0cmp.h"
#include "read0read.h"
#include "lock0lock.h"
/*********************************************************************
Finds out if an active transaction has inserted or modified a secondary
@ -111,6 +112,14 @@ row_vers_impl_x_locked_off_kernel(
return(NULL);
}
if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, TRUE)) {
/* Corruption noticed: try to avoid a crash by returning */
mtr_commit(&mtr);
return(NULL);
}
/* We look up if some earlier version of the clustered index record
would require rec to be in a different state (delete marked or
unmarked, or not existing). If there is such a version, then rec was
@ -177,7 +186,8 @@ row_vers_impl_x_locked_off_kernel(
/* If we get here, we know that the trx_id transaction is
still active and it has modified prev_version. Let us check
if prev_version would require rec to be in a different state. */
if prev_version would require rec to be in a different
state. */
vers_del = rec_get_deleted_flag(prev_version);

View File

@ -286,6 +286,7 @@ ulint srv_test_n_mutexes = ULINT_MAX;
i/o handler thread */
char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
time_t srv_last_monitor_time;
@ -2399,8 +2400,9 @@ srv_sprintf_innodb_monitor(
srv_conc_n_threads, srv_conc_n_waiting_threads);
#ifdef UNIV_LINUX
buf += sprintf(buf,
"Main thread process no %lu, state: %s\n",
"Main thread process no. %lu, id %lu, state: %s\n",
srv_main_thread_process_no,
srv_main_thread_id,
srv_main_thread_op_info);
#else
buf += sprintf(buf,
@ -2464,8 +2466,8 @@ srv_lock_timeout_and_monitor_thread(
ulint i;
#ifdef UNIV_DEBUG_THREAD_CREATION
printf("Lock timeout thread starts\n");
printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id()));
printf("Lock timeout thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
#endif
UT_NOT_USED(arg);
srv_last_monitor_time = time(NULL);
@ -2637,8 +2639,8 @@ srv_error_monitor_thread(
UT_NOT_USED(arg);
#ifdef UNIV_DEBUG_THREAD_CREATION
printf("Error monitor thread starts\n");
printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id()));
printf("Error monitor thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
#endif
loop:
srv_error_monitor_active = TRUE;
@ -2760,8 +2762,8 @@ srv_master_thread(
UT_NOT_USED(arg);
#ifdef UNIV_DEBUG_THREAD_CREATION
printf("Master thread starts\n");
printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id()));
printf("Master thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
#endif
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());

View File

@ -415,8 +415,8 @@ io_handler_thread(
segment = *((ulint*)arg);
#ifdef UNIV_DEBUG_THREAD_CREATION
printf("Io handler thread %lu starts\n", segment);
printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id()));
printf("Io handler thread %lu starts, id %lu\n", segment,
os_thread_pf(os_thread_get_curr_id()));
#endif
for (i = 0;; i++) {
fil_aio_wait(segment);
@ -1492,7 +1492,9 @@ innobase_shutdown_for_mysql(void)
}
/* 1. Flush buffer pool to disk, write the current lsn to
the tablespace header(s), and copy all log data to archive */
the tablespace header(s), and copy all log data to archive.
The step 1 is the real InnoDB shutdown. The remaining steps
just free data structures after the shutdown. */
logs_empty_and_mark_files_at_shutdown();

View File

@ -272,8 +272,8 @@ trx_undo_page_report_insert(
mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
ptr - undo_page);
/* Write the log entry to the REDO log of this change in the UNDO log */
/* Write the log entry to the REDO log of this change in the UNDO
log */
trx_undof_page_add_undo_rec_log(undo_page, first_free,
ptr - undo_page, mtr);
return(first_free);
@ -493,6 +493,7 @@ trx_undo_page_report_modify(
ptr += 2;
/* Store first some general parameters to the undo log */
if (update) {
if (rec_get_deleted_flag(rec)) {
type_cmpl = TRX_UNDO_UPD_DEL_REC;
@ -527,7 +528,6 @@ trx_undo_page_report_modify(
trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec);
roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec);
len = mach_dulint_write_compressed(ptr, trx_id);
ptr += len;
@ -632,7 +632,11 @@ trx_undo_page_report_modify(
columns which occur as ordering fields in any index. This info is used
in the purge of old versions where we use it to build and search the
delete marked index records, to look if we can remove them from the
index tree. */
index tree. Note that starting from 4.0.14 also externally stored
fields can be ordering in some index. But we always store at least
384 first bytes locally to the clustered index record, which means
we can construct the column prefix fields in the index from the
stored data. */
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@ -1408,11 +1412,11 @@ trx_undo_prev_version_build(
return(DB_ERROR);
}
if (row_upd_changes_field_size(rec, index, update)) {
if (row_upd_changes_field_size_or_external(rec, index, update)) {
entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
row_upd_clust_index_replace_new_col_vals(entry, update);
entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec,
heap);
row_upd_index_replace_new_col_vals(entry, index, update, heap);
buf = mem_heap_alloc(heap, rec_get_converted_size(entry));

View File

@ -52,6 +52,11 @@ trx_general_rollback_for_mysql(
que_thr_t* thr;
roll_node_t* roll_node;
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
trx_start_if_not_started(trx);
heap = mem_heap_create(512);
@ -89,6 +94,11 @@ trx_general_rollback_for_mysql(
ut_a(trx->error_state == DB_SUCCESS);
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
return((int) trx->error_state);
}
@ -110,20 +120,8 @@ trx_rollback_for_mysql(
trx->op_info = (char *) "rollback";
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
trx_mark_sql_stat_end(trx);
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
trx->op_info = (char *) "";
return(err);
@ -147,25 +145,191 @@ trx_rollback_last_sql_stat_for_mysql(
trx->op_info = (char *) "rollback of SQL statement";
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
err = trx_general_rollback_for_mysql(trx, TRUE,
&(trx->last_sql_stat_start));
/* The following call should not be needed, but we play safe: */
trx_mark_sql_stat_end(trx);
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
trx->op_info = (char *) "";
return(err);
}
/***********************************************************************
Frees savepoint structs. */
void
trx_roll_savepoints_free(
/*=====================*/
trx_t* trx, /* in: transaction handle */
trx_named_savept_t* savep) /* in: free all savepoints > this one;
if this is NULL, free all savepoints
of trx */
{
trx_named_savept_t* next_savep;
if (savep == NULL) {
savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
} else {
savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
}
while (savep != NULL) {
next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
mem_free(savep->name);
mem_free(savep);
savep = next_savep;
}
}
/***********************************************************************
Rolls back a transaction back to a named savepoint. Modifications after the
savepoint are undone but InnoDB does NOT release the corresponding locks
which are stored in memory. If a lock is 'implicit', that is, a new inserted
row holds a lock where the lock information is carried by the trx id stored in
the row, these locks are naturally released in the rollback. Savepoints which
were set after this savepoint are deleted. */
ulint
trx_rollback_to_savepoint_for_mysql(
/*================================*/
/* out: if no savepoint
of the name found then
DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
trx_t* trx, /* in: transaction handle */
char* savepoint_name, /* in: savepoint name */
ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
binlog entries of the queries
executed after the savepoint */
{
trx_named_savept_t* savep;
ulint err;
savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
while (savep != NULL) {
if (0 == ut_strcmp(savep->name, savepoint_name)) {
/* Found */
break;
}
savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
}
if (savep == NULL) {
return(DB_NO_SAVEPOINT);
}
if (trx->conc_state == TRX_NOT_STARTED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: transaction has a savepoint %s though it is not started\n",
savep->name);
return(DB_ERROR);
}
/* We can now free all savepoints strictly later than this one */
trx_roll_savepoints_free(trx, savep);
*mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
trx->op_info = (char *) "rollback to a savepoint";
err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
/* Store the current undo_no of the transaction so that we know where
to roll back if we have to roll back the next SQL statement: */
trx_mark_sql_stat_end(trx);
trx->op_info = (char *) "";
return(err);
}
/***********************************************************************
Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback. */
ulint
trx_savepoint_for_mysql(
/*====================*/
/* out: always DB_SUCCESS */
trx_t* trx, /* in: transaction handle */
char* savepoint_name, /* in: savepoint name */
ib_longlong binlog_cache_pos) /* in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
{
trx_named_savept_t* savep;
ut_a(trx);
ut_a(savepoint_name);
trx_start_if_not_started(trx);
savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
while (savep != NULL) {
if (0 == ut_strcmp(savep->name, savepoint_name)) {
/* Found */
break;
}
savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
}
if (savep) {
/* There is a savepoint with the same name: free that */
UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
mem_free(savep->name);
mem_free(savep);
}
/* Create a new savepoint and add it as the last in the list */
savep = mem_alloc(sizeof(trx_named_savept_t));
savep->name = mem_alloc(1 + ut_strlen(savepoint_name));
ut_memcpy(savep->name, savepoint_name, 1 + ut_strlen(savepoint_name));
savep->savept = trx_savept_take(trx);
savep->mysql_binlog_cache_pos = binlog_cache_pos;
UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
return(DB_SUCCESS);
}
/***********************************************************************
Returns a transaction savepoint taken at this point in time. */
trx_savept_t
trx_savept_take(
/*============*/
/* out: savepoint */
trx_t* trx) /* in: transaction */
{
trx_savept_t savept;
savept.least_undo_no = trx->undo_no;
return(savept);
}
/***********************************************************************
Rollback or clean up transactions which have no user session. If the
transaction already was committed, then we clean up a possible insert
@ -326,22 +490,6 @@ loop:
goto loop;
}
/***********************************************************************
Returns a transaction savepoint taken at this point in time. */
trx_savept_t
trx_savept_take(
/*============*/
/* out: savepoint */
trx_t* trx) /* in: transaction */
{
trx_savept_t savept;
savept.least_undo_no = trx->undo_no;
return(savept);
}
/***********************************************************************
Creates an undo number array. */

View File

@ -321,8 +321,8 @@ trx_sys_doublewrite_restore_corrupt_pages(void)
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
space_id = mach_read_from_4(page + FIL_PAGE_SPACE);
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
space_id = 0;
if (!fil_check_adress_in_tablespace(space_id, page_no)) {
fprintf(stderr,

View File

@ -135,6 +135,8 @@ trx_create(
trx->lock_heap = mem_heap_create_in_buffer(256);
UT_LIST_INIT(trx->trx_locks);
UT_LIST_INIT(trx->trx_savepoints);
trx->dict_operation_lock_mode = 0;
trx->has_search_latch = FALSE;
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
@ -807,6 +809,9 @@ trx_commit_off_kernel(
mutex_enter(&kernel_mutex);
}
/* Free savepoints */
trx_roll_savepoints_free(trx, NULL);
trx->conc_state = TRX_NOT_STARTED;
trx->rseg = NULL;
trx->undo_no = ut_dulint_zero;

View File

@ -166,7 +166,7 @@ ut_free(
}
/**************************************************************************
Frees all allocated memory not freed yet. */
Frees in shutdown all allocated memory not freed yet. */
void
ut_free_all_mem(void)
@ -174,7 +174,7 @@ ut_free_all_mem(void)
{
ut_mem_block_t* block;
os_fast_mutex_lock(&ut_list_mutex);
os_fast_mutex_free(&ut_list_mutex);
while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
@ -187,11 +187,11 @@ ut_free_all_mem(void)
free(block);
}
os_fast_mutex_unlock(&ut_list_mutex);
ut_a(ut_total_allocated_memory == 0);
os_fast_mutex_free(&ut_list_mutex);
if (ut_total_allocated_memory != 0) {
fprintf(stderr,
"InnoDB: Warning: after shutdown total allocated memory is %lu\n",
ut_total_allocated_memory);
}
}
/**************************************************************************

View File

@ -53,6 +53,8 @@ ut_get_high32(
ulint a) /* in: ulint */
{
#if SIZEOF_LONG == 4
UT_NOT_USED(a);
return 0;
#else
return(a >> 32);

View File

@ -787,16 +787,6 @@ id id3
100 2
UNLOCK TABLES;
DROP TABLE t1;
create table t1 (a char(20), unique (a(5))) type=innodb;
Incorrect sub part key. The used key part isn't a string, the used length is longer than the key part or the table handler doesn't support unique sub keys
create table t1 (a char(20), index (a(5))) type=innodb;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) default NULL,
KEY `a` (`a`)
) TYPE=InnoDB
drop table t1;
create temporary table t1 (a int not null auto_increment, primary key(a)) type=innodb;
insert into t1 values (NULL),(NULL),(NULL);
delete from t1 where a=3;

View File

@ -471,15 +471,6 @@ select id,id3 from t1;
UNLOCK TABLES;
DROP TABLE t1;
#
# Test prefix key
#
--error 1089
create table t1 (a char(20), unique (a(5))) type=innodb;
create table t1 (a char(20), index (a(5))) type=innodb;
show create table t1;
drop table t1;
#
# Test using temporary table and auto_increment
#

View File

@ -129,12 +129,45 @@ static void innobase_print_error(const char* db_errpfx, char* buffer);
/* General functions */
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
trx_t* trx) /* in: transaction handle */
{
if (srv_thread_concurrency >= 500) {
return;
}
srv_conc_enter_innodb(trx);
}
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
trx_t* trx) /* in: transaction handle */
{
if (srv_thread_concurrency >= 500) {
return;
}
srv_conc_exit_innodb(trx);
}
/**********************************************************************
Releases possible search latch and InnoDB thread FIFO ticket. These should
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
static
inline
void
innobase_release_stat_resources(
@ -183,7 +216,9 @@ innobase_active_small(void)
}
/************************************************************************
Converts an InnoDB error code to a MySQL error code. */
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
static
int
convert_error_code_to_mysql(
@ -206,10 +241,10 @@ convert_error_code_to_mysql(
} else if (error == (int) DB_ERROR) {
return(HA_ERR_NO_ACTIVE_RECORD);
return(-1); /* unspecified error */
} else if (error == (int) DB_DEADLOCK) {
/* Since we roll back the whole transaction, we must
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
cached binlog for this transaction */
@ -221,11 +256,10 @@ convert_error_code_to_mysql(
} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
/* Since we roll back the whole transaction, we must
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
cached binlog for this transaction */
if (thd) {
ha_rollback(thd);
}
@ -271,6 +305,9 @@ convert_error_code_to_mysql(
} else if (error == (int) DB_CORRUPTION) {
return(HA_ERR_CRASHED);
} else if (error == (int) DB_NO_SAVEPOINT) {
return(HA_ERR_NO_SAVEPOINT);
} else {
return(-1); // Unknown error
}
@ -941,18 +978,23 @@ innobase_commit(
DBUG_ENTER("innobase_commit");
DBUG_PRINT("trans", ("ending transaction"));
/* The flag thd->transaction.all.innodb_active_trans is set to 1
in ::external_lock and ::start_stmt, and it is only set to 0 in
a commit or a rollback. If it is 0 we know there cannot be resources
to be freed and we can return immediately. */
if (thd->transaction.all.innodb_active_trans == 0) {
DBUG_RETURN(0);
}
trx = check_trx_exists(thd);
/* The flag thd->transaction.all.innodb_active_trans is set to 1 in
::external_lock, ::start_stmt, and innobase_savepoint, and it is only
set to 0 in a commit or a rollback. If it is 0 we know there cannot be
resources to be freed and we could return immediately. For the time
being we play safe and do the cleanup though there should be nothing
to clean up. */
if (thd->transaction.all.innodb_active_trans == 0
&& trx->conc_state != TRX_NOT_STARTED) {
fprintf(stderr,
"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n"
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
}
if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
|| (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
@ -964,9 +1006,9 @@ innobase_commit(
/* If we had reserved the auto-inc lock for some
table in this SQL statement we release it now */
srv_conc_enter_innodb(trx);
innodb_srv_conc_enter_innodb(trx);
row_unlock_table_autoinc_for_mysql(trx);
srv_conc_exit_innodb(trx);
innodb_srv_conc_exit_innodb(trx);
}
/* Store the current undo_no of the transaction so that we
know where to roll back if we have to roll back the next
@ -1050,7 +1092,7 @@ innobase_commit_complete(
}
/*********************************************************************
Rolls back a transaction or the latest SQL statement in an InnoDB database. */
Rolls back a transaction or the latest SQL statement. */
int
innobase_rollback(
@ -1071,25 +1113,27 @@ innobase_rollback(
trx = check_trx_exists(thd);
if (trx->auto_inc_lock) {
/* If we had reserved the auto-inc lock for some table (if
we come here to roll back the latest SQL statement) we
release it now before a possibly lengthy rollback */
/* If we had reserved the auto-inc lock for
some table in this SQL statement, we release it now */
srv_conc_enter_innodb(trx);
innodb_srv_conc_enter_innodb(trx);
row_unlock_table_autoinc_for_mysql(trx);
srv_conc_exit_innodb(trx);
innodb_srv_conc_exit_innodb(trx);
}
srv_conc_enter_innodb(trx);
innodb_srv_conc_enter_innodb(trx);
if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle
|| (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) {
error = trx_rollback_for_mysql(trx);
thd->transaction.all.innodb_active_trans=0;
thd->transaction.all.innodb_active_trans = 0;
} else {
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
srv_conc_exit_innodb(trx);
innodb_srv_conc_exit_innodb(trx);
/* Release a possible FIFO ticket and search latch */
innobase_release_stat_resources(trx);
@ -1097,6 +1141,83 @@ innobase_rollback(
DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}
/*********************************************************************
Rolls back a transaction to a savepoint. */
int
innobase_rollback_to_savepoint(
/*===========================*/
/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
no savepoint with the given name */
THD* thd, /* in: handle to the MySQL thread of the user
whose transaction should be rolled back */
char* savepoint_name, /* in: savepoint name */
my_off_t* binlog_cache_pos)/* out: position which corresponds to the
savepoint in the binlog cache of this
transaction, not defined if error */
{
ib_longlong mysql_binlog_cache_pos;
int error = 0;
trx_t* trx;
DBUG_ENTER("innobase_rollback_to_savepoint");
trx = check_trx_exists(thd);
innodb_srv_conc_enter_innodb(trx);
error = trx_rollback_to_savepoint_for_mysql(trx, savepoint_name,
&mysql_binlog_cache_pos);
innodb_srv_conc_exit_innodb(trx);
*binlog_cache_pos = (my_off_t)mysql_binlog_cache_pos;
/* Release a possible FIFO ticket and search latch */
innobase_release_stat_resources(trx);
DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}
/*********************************************************************
Sets a transaction savepoint. */
int
innobase_savepoint(
/*===============*/
/* out: always 0, that is, always succeeds */
THD* thd, /* in: handle to the MySQL thread */
char* savepoint_name, /* in: savepoint name */
my_off_t binlog_cache_pos)/* in: offset up to which the current
transaction has cached log entries to its
binlog cache, not defined if no transaction
active, or we are in the autocommit state, or
binlogging is not switched on */
{
int error = 0;
trx_t* trx;
DBUG_ENTER("innobase_savepoint");
if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
/* In the autocommit state there is no sense to set a
savepoint: we return immediate success */
DBUG_RETURN(0);
}
trx = check_trx_exists(thd);
/* Setting a savepoint starts a transaction inside InnoDB since
it allocates resources for it (memory to store the savepoint name,
for example) */
thd->transaction.all.innodb_active_trans = 1;
error = trx_savepoint_for_mysql(trx, savepoint_name,
(ib_longlong)binlog_cache_pos);
DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}
/*********************************************************************
Frees a possible InnoDB trx object associated with the current
THD. */
@ -1220,7 +1341,6 @@ ha_innobase::open(
{
dict_table_t* ib_table;
int error = 0;
uint buff_len;
char norm_name[1000];
DBUG_ENTER("ha_innobase::open");
@ -1245,11 +1365,11 @@ ha_innobase::open(
fields when packed actually became 1 byte longer, when we also
stored the string length as the first byte. */
buff_len = table->reclength + table->max_key_length
upd_and_key_val_buff_len = table->reclength + table->max_key_length
+ MAX_REF_PARTS * 3;
if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
&upd_buff, buff_len,
&key_val_buff, buff_len,
&upd_buff, upd_and_key_val_buff_len,
&key_val_buff, upd_and_key_val_buff_len,
NullS)) {
free_share(share);
DBUG_RETURN(1);
@ -1500,6 +1620,10 @@ innobase_mysql_cmp(
case FIELD_TYPE_STRING:
case FIELD_TYPE_VAR_STRING:
case FIELD_TYPE_TINY_BLOB:
case FIELD_TYPE_MEDIUM_BLOB:
case FIELD_TYPE_BLOB:
case FIELD_TYPE_LONG_BLOB:
ret = my_sortncmp((const char*) a, a_length,
(const char*) b, b_length);
if (ret < 0) {
@ -1526,7 +1650,7 @@ get_innobase_type_from_mysql_type(
/* out: DATA_BINARY, DATA_VARCHAR, ... */
Field* field) /* in: MySQL field */
{
/* The following asserts check that MySQL type code fits in
/* The following asserts check that the MySQL type code fits in
8 bits: this is used in ibuf and also when DATA_NOT_NULL is
ORed to the type */
@ -1537,6 +1661,8 @@ get_innobase_type_from_mysql_type(
DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);
switch (field->type()) {
/* NOTE that we only allow string types in DATA_MYSQL
and DATA_VARMYSQL */
case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) {
return(DATA_BINARY);
@ -1607,33 +1733,98 @@ ha_innobase::store_key_val_for_row(
KEY_PART_INFO* key_part = key_info->key_part;
KEY_PART_INFO* end = key_part + key_info->key_parts;
char* buff_start = buff;
enum_field_types mysql_type;
Field* field;
ulint blob_len;
byte* blob_data;
ibool is_null;
DBUG_ENTER("store_key_val_for_row");
/* The format for storing a key field in MySQL is the following:
1. If the column can be NULL, then in the first byte we put 1 if the
field value is NULL, 0 otherwise.
2. If the column is of a BLOB type (it must be a column prefix field
in this case), then we put the length of the data in the field to the
next 2 bytes, in the little-endian format. If the field is SQL NULL,
then these 2 bytes are set to 0. Note that the length of data in the
field is <= column prefix length.
3. In a column prefix field, prefix_len next bytes are reserved for
data. In a normal field the max field length next bytes are reserved
for data. For a VARCHAR(n) the max field length is n. If the stored
value is the SQL NULL then these data bytes are set to 0. */
/* We have to zero-fill the 'ref' buffer so that MySQL is able to
use a simple memcmp to compare two key values to determine if they
are equal */
bzero(buff, ref_length);
for (; key_part != end; key_part++) {
is_null = FALSE;
if (key_part->null_bit) {
/* Store 0 if the key part is a NULL part */
if (record[key_part->null_offset]
& key_part->null_bit) {
*buff++ = 1;
continue;
}
*buff++ = 0;
*buff = 1;
is_null = TRUE;
} else {
*buff = 0;
}
buff++;
}
memcpy(buff, record + key_part->offset, key_part->length);
buff += key_part->length;
}
field = key_part->field;
mysql_type = field->type();
/*
We have to zero-fill the 'ref' buffer so that MySQL is able to
use a simple memcmp to compare two key values to determine if they
are equal
*/
bzero(buff, (ref_length- (uint) (buff - buff_start)));
if (mysql_type == FIELD_TYPE_TINY_BLOB
|| mysql_type == FIELD_TYPE_MEDIUM_BLOB
|| mysql_type == FIELD_TYPE_BLOB
|| mysql_type == FIELD_TYPE_LONG_BLOB) {
ut_a(key_part->key_part_flag & HA_PART_KEY);
if (is_null) {
buff += key_part->length + 2;
continue;
}
blob_data = row_mysql_read_blob_ref(&blob_len,
(byte*) (record
+ (ulint)get_field_offset(table, field)),
(ulint) field->pack_length());
ut_a(get_field_offset(table, field)
== key_part->offset);
if (blob_len > key_part->length) {
blob_len = key_part->length;
}
/* MySQL reserves 2 bytes for the length and the
storage of the number is little-endian */
ut_a(blob_len < 256);
*buff = blob_len;
buff += 2;
memcpy(buff, blob_data, blob_len);
buff += key_part->length;
} else {
if (is_null) {
buff += key_part->length;
continue;
}
memcpy(buff, record + key_part->offset,
key_part->length);
buff += key_part->length;
}
}
DBUG_RETURN((uint)(buff - buff_start));
}
@ -1905,9 +2096,9 @@ ha_innobase::write_row(
The lock is released at each SQL statement's
end. */
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
error = row_lock_table_autoinc_for_mysql(prebuilt);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
if (error != DB_SUCCESS) {
@ -1918,14 +2109,15 @@ ha_innobase::write_row(
dict_table_autoinc_update(prebuilt->table, auto_inc);
} else {
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
if (!prebuilt->trx->auto_inc_lock) {
error = row_lock_table_autoinc_for_mysql(
prebuilt);
if (error != DB_SUCCESS) {
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(
prebuilt->trx);
error = convert_error_code_to_mysql(
error, user_thd);
@ -1939,7 +2131,7 @@ ha_innobase::write_row(
auto_inc = dict_table_autoinc_get(prebuilt->table);
incremented_auto_inc_counter = TRUE;
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
/* We can give the new value for MySQL to place in
the field */
@ -1962,11 +2154,11 @@ ha_innobase::write_row(
build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
}
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
error = row_insert_for_mysql((byte*) record, prebuilt);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
if (error != DB_SUCCESS) {
/* If the insert did not succeed we restore the value of
@ -2037,7 +2229,6 @@ innobase_convert_and_store_changed_col(
while (len > 0 && data[len - 1] == ' ') {
len--;
}
} else if (col_type == DATA_INT) {
/* Store integer data in InnoDB in a big-endian
format, sign bit negated, if signed */
@ -2075,9 +2266,11 @@ calc_row_difference(
struct st_table* table, /* in: table in MySQL data
dictionary */
mysql_byte* upd_buff, /* in: buffer to use */
ulint buff_len, /* in: buffer length */
row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */
THD* thd) /* in: user thread */
{
mysql_byte* original_upd_buff = upd_buff;
Field* field;
uint n_fields;
ulint o_len;
@ -2159,12 +2352,13 @@ calc_row_difference(
(prebuilt->table->cols + i)->clust_pos;
n_changed++;
}
;
}
uvect->n_fields = n_changed;
uvect->info_bits = 0;
ut_a(buf <= (byte*)original_upd_buff + buff_len);
return(0);
}
@ -2213,17 +2407,19 @@ ha_innobase::update_row(
(uses upd_buff of the handle) */
calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
upd_buff, prebuilt, user_thd);
upd_buff, (ulint)upd_and_key_val_buff_len,
prebuilt, user_thd);
/* This is not a delete */
prebuilt->upd_node->is_delete = FALSE;
assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
error = row_update_for_mysql((byte*) old_row, prebuilt);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
error = convert_error_code_to_mysql(error, user_thd);
@ -2267,11 +2463,11 @@ ha_innobase::delete_row(
prebuilt->upd_node->is_delete = TRUE;
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
error = row_update_for_mysql((byte*) record, prebuilt);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
error = convert_error_code_to_mysql(error, user_thd);
@ -2459,10 +2655,11 @@ ha_innobase::index_read(
prebuilt->search_tuple */
row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
(byte*) key_val_buff,
index,
(byte*) key_ptr,
(ulint) key_len);
(byte*) key_val_buff,
(ulint)upd_and_key_val_buff_len,
index,
(byte*) key_ptr,
(ulint) key_len);
} else {
/* We position the cursor to the last or the first entry
in the index */
@ -2484,11 +2681,11 @@ ha_innobase::index_read(
last_match_mode = match_mode;
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
if (ret == DB_SUCCESS) {
error = 0;
@ -2632,11 +2829,11 @@ ha_innobase::general_fetch(
ut_a(prebuilt->trx ==
(trx_t*) current_thd->transaction.all.innobase_tid);
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
direction);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
if (ret == DB_SUCCESS) {
error = 0;
@ -2939,7 +3136,6 @@ ha_innobase::position(
}
}
/*********************************************************************
Creates a table definition to an InnoDB database. */
static
@ -2958,6 +3154,8 @@ create_table_def(
ulint col_type;
ulint nulls_allowed;
ulint unsigned_type;
ulint binary_type;
ulint nonlatin1_type;
ulint i;
DBUG_ENTER("create_table_def");
@ -2986,9 +3184,23 @@ create_table_def(
unsigned_type = 0;
}
if (strcmp(default_charset_info->name, "latin1") != 0) {
nonlatin1_type = DATA_NONLATIN1;
} else {
nonlatin1_type = 0;
}
if (field->flags & BINARY_FLAG) {
binary_type = DATA_BINARY_TYPE;
nonlatin1_type = 0;
} else {
binary_type = 0;
}
dict_mem_table_add_col(table, (char*) field->field_name,
col_type, (ulint)field->type()
| nulls_allowed | unsigned_type,
| nulls_allowed | unsigned_type
| nonlatin1_type | binary_type,
field->pack_length(), 0);
}
@ -3011,6 +3223,7 @@ create_index(
const char* table_name, /* in: table name */
uint key_num) /* in: index number */
{
Field* field;
dict_index_t* index;
int error;
ulint n_fields;
@ -3020,6 +3233,7 @@ create_index(
ulint col_type;
ulint prefix_len;
ulint i;
ulint j;
DBUG_ENTER("create_index");
@ -3046,20 +3260,46 @@ create_index(
for (i = 0; i < n_fields; i++) {
key_part = key->key_part + i;
if (key_part->length != key_part->field->pack_length()) {
/* (The flag HA_PART_KEY denotes in MySQL a column prefix
field in an index: we only store a specified number of first
bytes of the column to the index field.) The flag does not
seem to be properly set by MySQL. Let us fall back on testing
the length of the key part versus the column. */
field = NULL;
for (j = 0; j < form->fields; j++) {
field = form->field[j];
if (strlen(field->field_name)
== strlen(key_part->field->field_name)
&& 0 == ut_cmp_in_lower_case(
(char*)field->field_name,
(char*)key_part->field->field_name,
strlen(field->field_name))) {
/* Found the corresponding column */
break;
}
}
ut_a(j < form->fields);
col_type = get_innobase_type_from_mysql_type(key_part->field);
if (DATA_BLOB == col_type
|| key_part->length < field->pack_length()) {
prefix_len = key_part->length;
col_type = get_innobase_type_from_mysql_type(
key_part->field);
if (col_type == DATA_INT
|| col_type == DATA_FLOAT
|| col_type == DATA_DOUBLE
|| col_type == DATA_DECIMAL) {
fprintf(stderr,
"InnoDB: error: MySQL is trying to create a column prefix index field\n"
"InnoDB: on an inappropriate data type %lu. Table name %s, column name %s.\n",
col_type, table_name,
key_part->field->field_name);
"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n",
table_name, key_part->field->field_name);
prefix_len = 0;
}
@ -3067,10 +3307,16 @@ create_index(
prefix_len = 0;
}
if (prefix_len >= DICT_MAX_COL_PREFIX_LEN) {
DBUG_RETURN(-1);
}
/* We assume all fields should be sorted in ascending
order, hence the '0': */
dict_mem_index_add_field(index,
(char*) key_part->field->field_name, 0);
(char*) key_part->field->field_name,
0, prefix_len);
}
error = row_create_index_for_mysql(index, trx);
@ -3536,6 +3782,8 @@ ha_innobase::records_in_range(
table->reclength
+ table->max_key_length + 100,
MYF(MY_WME));
ulint buff2_len = table->reclength
+ table->max_key_length + 100;
dtuple_t* range_start;
dtuple_t* range_end;
ib_longlong n_rows;
@ -3572,12 +3820,15 @@ ha_innobase::records_in_range(
dict_index_copy_types(range_end, index, key->key_parts);
row_sel_convert_mysql_key_to_innobase(
range_start, (byte*) key_val_buff, index,
range_start, (byte*) key_val_buff,
(ulint)upd_and_key_val_buff_len,
index,
(byte*) start_key,
(ulint) start_key_len);
row_sel_convert_mysql_key_to_innobase(
range_end, (byte*) key_val_buff2, index,
range_end, (byte*) key_val_buff2,
buff2_len, index,
(byte*) end_key,
(ulint) end_key_len);
@ -3787,8 +4038,32 @@ ha_innobase::info(
}
for (i = 0; i < table->keys; i++) {
if (index == NULL) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: table %s contains less indexes inside InnoDB\n"
"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
ib_table->name);
break;
}
for (j = 0; j < table->key_info[i].key_parts; j++) {
if (j + 1 > index->n_uniq) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n"
"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
index->name,
ib_table->name, index->n_uniq,
j + 1);
break;
}
if (index->stat_n_diff_key_vals[j + 1] == 0) {
rec_per_key = records;
@ -4046,10 +4321,11 @@ ha_innobase::reset(void)
}
/**********************************************************************
MySQL calls this function at the start of each SQL statement. Inside LOCK
TABLES the ::external_lock method does not work to mark SQL statement
borders. Note also a special case: if a temporary table is created inside
LOCK TABLES, MySQL has not called external_lock() at all on that table. */
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
on that table. */
int
ha_innobase::start_stmt(
@ -4448,9 +4724,9 @@ ha_innobase::innobase_read_and_init_auto_inc(
return(0);
}
srv_conc_enter_innodb(prebuilt->trx);
innodb_srv_conc_enter_innodb(prebuilt->trx);
error = row_lock_table_autoinc_for_mysql(prebuilt);
srv_conc_exit_innodb(prebuilt->trx);
innodb_srv_conc_exit_innodb(prebuilt->trx);
if (error != DB_SUCCESS) {
error = convert_error_code_to_mysql(error, user_thd);

View File

@ -52,6 +52,9 @@ class ha_innobase: public handler
byte* key_val_buff; /* buffer used in converting
search key values from MySQL format
to Innodb format */
ulong upd_and_key_val_buff_len;
/* the length of each of the previous
two buffers */
ulong int_table_flags;
uint primary_key;
uint last_dup_key;
@ -83,13 +86,15 @@ class ha_innobase: public handler
public:
ha_innobase(TABLE *table): handler(table),
int_table_flags(HA_REC_NOT_IN_SEQ |
HA_KEYPOS_TO_RNDPOS | HA_LASTKEY_ORDER |
HA_NULL_KEY | HA_CAN_SQL_HANDLER |
HA_KEYPOS_TO_RNDPOS |
HA_LASTKEY_ORDER |
HA_NULL_KEY |
HA_BLOB_KEY |
HA_CAN_SQL_HANDLER |
HA_NOT_EXACT_COUNT |
HA_NO_WRITE_DELAYED |
HA_PRIMARY_KEY_IN_READ_INDEX |
HA_DROP_BEFORE_CREATE |
HA_NO_PREFIX_CHAR_KEYS |
HA_TABLE_SCAN_ON_INDEX),
last_dup_key((uint) -1),
start_of_scan(0)
@ -217,6 +222,14 @@ int innobase_report_binlog_offset_and_commit(
int innobase_commit_complete(
void* trx_handle);
int innobase_rollback(THD *thd, void* trx_handle);
int innobase_rollback_to_savepoint(
THD* thd,
char* savepoint_name,
my_off_t* binlog_cache_pos);
int innobase_savepoint(
THD* thd,
char* savepoint_name,
my_off_t binlog_cache_pos);
int innobase_close_connection(THD *thd);
int innobase_drop_database(char *path);
int innodb_show_status(THD* thd);

View File

@ -379,7 +379,6 @@ int ha_commit_trans(THD *thd, THD_TRANS* trans)
trans->innodb_active_trans=0;
if (trans == &thd->transaction.all)
operation_done= transaction_commited= 1;
}
#endif
#ifdef HAVE_QUERY_CACHE
@ -447,6 +446,70 @@ int ha_rollback_trans(THD *thd, THD_TRANS *trans)
DBUG_RETURN(error);
}
/*
Rolls the current transaction back to a savepoint.
Return value: 0 if success, 1 if there was not a savepoint of the given
name.
*/
int ha_rollback_to_savepoint(THD *thd, char *savepoint_name)
{
my_off_t binlog_cache_pos=0;
bool operation_done=0;
int error=0;
DBUG_ENTER("ha_rollback_to_savepoint");
#ifdef USING_TRANSACTIONS
if (opt_using_transactions)
{
#ifdef HAVE_INNOBASE_DB
/*
Retrieve the trans_log binlog cache position corresponding to the
savepoint, and if the rollback is successful inside InnoDB reset the write
position in the binlog cache to what it was at the savepoint.
*/
if ((error=innobase_rollback_to_savepoint(thd, savepoint_name,
&binlog_cache_pos)))
{
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error);
error=1;
}
else
reinit_io_cache(&thd->transaction.trans_log, WRITE_CACHE,
binlog_cache_pos, 0, 0);
operation_done=1;
#endif
if (operation_done)
statistic_increment(ha_rollback_count,&LOCK_status);
}
#endif /* USING_TRANSACTIONS */
DBUG_RETURN(error);
}
/*
Sets a transaction savepoint.
Return value: always 0, that is, succeeds always
*/
int ha_savepoint(THD *thd, char *savepoint_name)
{
my_off_t binlog_cache_pos=0;
int error=0;
DBUG_ENTER("ha_savepoint");
#ifdef USING_TRANSACTIONS
if (opt_using_transactions)
{
binlog_cache_pos=my_b_tell(&thd->transaction.trans_log);
#ifdef HAVE_INNOBASE_DB
innobase_savepoint(thd,savepoint_name, binlog_cache_pos);
#endif
}
#endif /* USING_TRANSACTIONS */
DBUG_RETURN(error);
}
bool ha_flush_logs()
{
bool result=0;

View File

@ -376,6 +376,8 @@ int ha_commit_complete(THD *thd);
int ha_release_temporary_latches(THD *thd);
int ha_commit_trans(THD *thd, THD_TRANS *trans);
int ha_rollback_trans(THD *thd, THD_TRANS *trans);
int ha_rollback_to_savepoint(THD *thd, char *savepoint_name);
int ha_savepoint(THD *thd, char *savepoint_name);
int ha_autocommit_or_rollback(THD *thd, int error);
void ha_set_spin_retries(uint retries);
bool ha_flush_logs(void);

View File

@ -53,8 +53,9 @@ enum enum_sql_command {
SQLCOM_REPAIR, SQLCOM_REPLACE, SQLCOM_REPLACE_SELECT,
SQLCOM_CREATE_FUNCTION, SQLCOM_DROP_FUNCTION,
SQLCOM_REVOKE,SQLCOM_OPTIMIZE, SQLCOM_CHECK,
SQLCOM_FLUSH, SQLCOM_KILL, SQLCOM_ANALYZE,
SQLCOM_ROLLBACK, SQLCOM_COMMIT, SQLCOM_SAVEPOINT,
SQLCOM_FLUSH, SQLCOM_KILL, SQLCOM_ANALYZE,
SQLCOM_ROLLBACK, SQLCOM_ROLLBACK_TO_SAVEPOINT,
SQLCOM_COMMIT, SQLCOM_SAVEPOINT,
SQLCOM_SLAVE_START, SQLCOM_SLAVE_STOP,
SQLCOM_BEGIN, SQLCOM_LOAD_MASTER_TABLE, SQLCOM_CHANGE_MASTER,
SQLCOM_RENAME_TABLE, SQLCOM_BACKUP_TABLE, SQLCOM_RESTORE_TABLE,

View File

@ -2536,8 +2536,22 @@ mysql_execute_command(void)
res= -1;
thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE);
break;
case SQLCOM_ROLLBACK_TO_SAVEPOINT:
if (!ha_rollback_to_savepoint(thd, lex->savepoint_name))
{
if (thd->options & OPTION_STATUS_NO_TRANS_UPDATE)
send_warning(&thd->net,ER_WARNING_NOT_COMPLETE_ROLLBACK,0);
else
send_ok(&thd->net);
}
else
res= -1;
break;
case SQLCOM_SAVEPOINT:
send_ok(&thd->net);
if (!ha_savepoint(thd, lex->savepoint_name))
send_ok(&thd->net);
else
res= -1;
break;
default: /* Impossible */
send_ok(&thd->net);

View File

@ -3922,11 +3922,10 @@ rollback:
ROLLBACK_SYM
{
Lex->sql_command = SQLCOM_ROLLBACK;
Lex->savepoint_name = NULL;
}
| ROLLBACK_SYM TO_SYM SAVEPOINT_SYM ident
{
Lex->sql_command = SQLCOM_ROLLBACK;
Lex->sql_command = SQLCOM_ROLLBACK_TO_SAVEPOINT;
Lex->savepoint_name = $4.str;
};
savepoint: