diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index e294d969c2f..fe5a271830a 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -143,6 +143,7 @@ msvensson@build.mysql.com mwagner@cash.mwagner.org mwagner@evoq.mwagner.org mwagner@here.mwagner.org +mwagner@mysql.com mwagner@work.mysql.com mydev@mysql.com mysql@home.(none) diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc index fe450868342..37c4971497a 100644 --- a/client/mysqladmin.cc +++ b/client/mysqladmin.cc @@ -126,8 +126,8 @@ static TYPELIB command_typelib= static struct my_option my_long_options[] = { #ifdef __NETWARE__ - {"autoclose", 'a', " Auto close the screen on exit for NetWare", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"autoclose", 'a', " Auto close the screen on exit for NetWare", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif {"count", 'c', "Number of iterations to make. This works with -i (--sleep) only.", diff --git a/client/mysqldump.c b/client/mysqldump.c index 99670dec89c..0d0e67454f8 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -1936,7 +1936,7 @@ static int init_dumping(char *database) MYSQL_ROW row; MYSQL_RES *dbinfo; - sprintf(qbuf,"SHOW CREATE DATABASE WITH IF NOT EXISTS %s", + sprintf(qbuf,"SHOW CREATE DATABASE IF NOT EXISTS %s", qdatabase); if (mysql_query_with_error_report(sock, &dbinfo, qbuf)) diff --git a/configure.in b/configure.in index d3385fc425b..ff0c7550e4a 100644 --- a/configure.in +++ b/configure.in @@ -5,7 +5,7 @@ AC_INIT(sql/mysqld.cc) AC_CANONICAL_SYSTEM # The Docs Makefile.am parses this line! # remember to also change ndb version below and update version.c in ndb -AM_INIT_AUTOMAKE(mysql, 5.0.2-alpha) +AM_INIT_AUTOMAKE(mysql, 5.0.3-alpha) AM_CONFIG_HEADER(config.h) PROTOCOL_VERSION=10 diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index ae967e0525e..c911124e705 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -86,15 +86,6 @@ btr_page_create( page_t* page, /* in: page to be created */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Sets the child node file address in a node pointer. */ -UNIV_INLINE -void -btr_node_ptr_set_child_page_no( -/*===========================*/ - rec_t* rec, /* in: node pointer record */ - ulint page_no, /* in: child node address */ - mtr_t* mtr); /* in: mtr */ /**************************************************************** Returns the upper level node pointer to a page. It is assumed that mtr holds an x-latch on the tree. */ @@ -128,7 +119,10 @@ btr_page_insert_fits( rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple should be first */ - dtuple_t* tuple); /* in: tuple to insert */ + const ulint* offsets, /* in: rec_get_offsets( + split_rec, cursor->index) */ + dtuple_t* tuple, /* in: tuple to insert */ + mem_heap_t* heap); /* in: temporary memory heap */ /****************************************************************** Gets the root node of a tree and x-latches it. */ @@ -143,11 +137,13 @@ btr_root_get( ulint space; ulint root_page_no; page_t* root; + ibool comp = UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp; space = dict_tree_get_space(tree); root_page_no = dict_tree_get_page(tree); root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(root) == comp); return(root); } @@ -194,6 +190,7 @@ btr_get_prev_user_rec( MTR_MEMO_PAGE_S_FIX)) || (mtr_memo_contains(mtr, buf_block_align(prev_page), MTR_MEMO_PAGE_X_FIX))); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); prev_rec = page_rec_get_prev(page_get_supremum_rec(prev_page)); @@ -246,6 +243,7 @@ btr_get_next_user_rec( || (mtr_memo_contains(mtr, buf_block_align(next_page), MTR_MEMO_PAGE_X_FIX))); + ut_a(page_is_comp(next_page) == page_is_comp(page)); next_rec = page_rec_get_next(page_get_infimum_rec(next_page)); return(next_rec); @@ -267,7 +265,8 @@ btr_page_create( { ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - page_create(page, mtr); + page_create(page, mtr, + UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; btr_page_set_index_id(page, tree->id, mtr); @@ -503,20 +502,21 @@ UNIV_INLINE void btr_node_ptr_set_child_page_no( /*===========================*/ - rec_t* rec, /* in: node pointer record */ - ulint page_no, /* in: child node address */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint page_no,/* in: child node address */ + mtr_t* mtr) /* in: mtr */ { - ulint n_fields; byte* field; ulint len; + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr)); - - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); /* The child address is in the last field */ - field = rec_get_nth_field(rec, n_fields - 1, &len); + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); ut_ad(len == 4); @@ -529,16 +529,18 @@ static page_t* btr_node_ptr_get_child( /*===================*/ - /* out: child page, x-latched */ - rec_t* node_ptr, /* in: node pointer */ - mtr_t* mtr) /* in: mtr */ + /* out: child page, x-latched */ + rec_t* node_ptr,/* in: node pointer */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + mtr_t* mtr) /* in: mtr */ { ulint page_no; ulint space; page_t* page; - + + ut_ad(rec_offs_validate(node_ptr, NULL, offsets)); space = buf_frame_get_space_id(node_ptr); - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); page = btr_page_get(space, page_no, RW_X_LATCH, mtr); @@ -564,6 +566,8 @@ btr_page_get_father_for_rec( dtuple_t* tuple; btr_cur_t cursor; rec_t* node_ptr; + dict_index_t* index; + ulint* offsets; ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -576,18 +580,20 @@ btr_page_get_father_for_rec( tuple = dict_tree_build_node_ptr(tree, user_rec, 0, heap, btr_page_get_level(page, mtr)); + index = UT_LIST_GET_FIRST(tree->tree_indexes); /* In the following, we choose just any index from the tree as the first parameter for btr_cur_search_to_nth_level. */ - - btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes), + + btr_cur_search_to_nth_level(index, btr_page_get_level(page, mtr) + 1, tuple, PAGE_CUR_LE, BTR_CONT_MODIFY_TREE, &cursor, 0, mtr); node_ptr = btr_cur_get_rec(&cursor); + offsets = rec_get_offsets(node_ptr, index, ULINT_UNDEFINED, heap); - if (btr_node_ptr_get_child_page_no(node_ptr) != + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != buf_frame_get_page_no(page)) { fputs("InnoDB: Dump of the child page:\n", stderr); buf_page_print(buf_frame_align(page)); @@ -595,17 +601,22 @@ btr_page_get_father_for_rec( buf_page_print(buf_frame_align(node_ptr)); fputs("InnoDB: Corruption of an index tree: table ", stderr); - ut_print_name(stderr, NULL, - UT_LIST_GET_FIRST(tree->tree_indexes)->table_name); + ut_print_name(stderr, NULL, index->table_name); fputs(", index ", stderr); - ut_print_name(stderr, NULL, - UT_LIST_GET_FIRST(tree->tree_indexes)->name); + ut_print_name(stderr, NULL, index->name); fprintf(stderr, ",\n" "InnoDB: father ptr page no %lu, child page no %lu\n", - (ulong) btr_node_ptr_get_child_page_no(node_ptr), + (ulong) + btr_node_ptr_get_child_page_no(node_ptr, offsets), (ulong) buf_frame_get_page_no(page)); - page_rec_print(page_rec_get_next(page_get_infimum_rec(page))); - page_rec_print(node_ptr); + offsets = rec_reget_offsets(page_rec_get_next( + page_get_infimum_rec(page)), index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(page_rec_get_next(page_get_infimum_rec(page)), + offsets); + offsets = rec_reget_offsets(node_ptr, index, offsets, + ULINT_UNDEFINED, heap); + page_rec_print(node_ptr, offsets); fputs( "InnoDB: You should dump + drop + reimport the table to fix the\n" @@ -614,7 +625,7 @@ btr_page_get_father_for_rec( "InnoDB: forcing recovery. Then dump + drop + reimport.\n", stderr); } - ut_a(btr_node_ptr_get_child_page_no(node_ptr) == + ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) == buf_frame_get_page_no(page)); mem_heap_free(heap); @@ -649,6 +660,7 @@ btr_create( ulint type, /* in: type of the index */ ulint space, /* in: space where created */ dulint index_id,/* in: index id */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr) /* in: mini-transaction handle */ { ulint page_no; @@ -716,7 +728,7 @@ btr_create( } /* Create a new index page on the the allocated segment page */ - page = page_create(frame, mtr); + page = page_create(frame, mtr, comp); buf_block_align(page)->check_index_page_at_flush = TRUE; /* Set the index id of the page */ @@ -821,12 +833,14 @@ static void btr_page_reorganize_low( /*====================*/ - ibool recovery,/* in: TRUE if called in recovery: locks should not - be updated, i.e., there cannot exist locks on the - page, and a hash index should not be dropped: it - cannot exist */ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr) /* in: mtr */ + ibool recovery,/* in: TRUE if called in recovery: + locks should not be updated, i.e., + there cannot exist locks on the + page, and a hash index should not be + dropped: it cannot exist */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_t* new_page; ulint log_mode; @@ -841,7 +855,9 @@ btr_page_reorganize_low( max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); /* Write the log record */ - mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr); + mlog_open_and_write_index(mtr, page, index, index->table->comp + ? MLOG_COMP_PAGE_REORGANIZE + : MLOG_PAGE_REORGANIZE, 0); /* Turn logging off */ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); @@ -858,14 +874,14 @@ btr_page_reorganize_low( /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(page, mtr); + page_create(page, mtr, index->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ page_copy_rec_list_end_no_locks(page, new_page, - page_get_infimum_rec(new_page), mtr); + page_get_infimum_rec(new_page), index, mtr); /* Copy max trx id to recreated page */ page_set_max_trx_id(page, page_get_max_trx_id(new_page)); @@ -901,10 +917,11 @@ Reorganizes an index page. */ void btr_page_reorganize( /*================*/ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - btr_page_reorganize_low(FALSE, page, mtr); + btr_page_reorganize_low(FALSE, page, index, mtr); } /*************************************************************** @@ -913,18 +930,20 @@ Parses a redo log record of reorganizing a page. */ byte* btr_parse_page_reorganize( /*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr __attribute__((unused)), + /* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ut_ad(ptr && end_ptr); /* The record is empty, except for the record initial part */ if (page) { - btr_page_reorganize_low(TRUE, page, mtr); + btr_page_reorganize_low(TRUE, page, index, mtr); } return(ptr); @@ -946,7 +965,7 @@ btr_page_empty( /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(page, mtr); + page_create(page, mtr, page_is_comp(page)); buf_block_align(page)->check_index_page_at_flush = TRUE; } @@ -1011,7 +1030,7 @@ btr_root_raise_and_insert( /* Move the records from root to the new page */ page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), - mtr); + cursor->index, mtr); /* If this is a pessimistic insert which is actually done to perform a pessimistic update then we have stored the lock information of the record to be inserted on the infimum of the @@ -1031,7 +1050,7 @@ btr_root_raise_and_insert( node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap, level); /* Reorganize the root to get free space */ - btr_page_reorganize(root, mtr); + btr_page_reorganize(root, cursor->index, mtr); page_cursor = btr_cur_get_page_cur(cursor); @@ -1039,7 +1058,8 @@ btr_root_raise_and_insert( page_cur_set_before_first(root, page_cursor); - node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, mtr); + node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, + cursor->index, mtr); ut_ad(node_ptr_rec); @@ -1047,7 +1067,7 @@ btr_root_raise_and_insert( as there is no lower alphabetical limit to records in the leftmost node of a level: */ - btr_set_min_rec_mark(node_ptr_rec, mtr); + btr_set_min_rec_mark(node_ptr_rec, cursor->index->table->comp, mtr); /* Free the memory heap */ mem_heap_free(heap); @@ -1060,7 +1080,8 @@ btr_root_raise_and_insert( ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), new_page); /* Reposition the cursor to the child node */ - page_cur_search(new_page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(new_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); /* Split the child and insert tuple */ return(btr_page_split_and_insert(cursor, tuple, mtr)); @@ -1190,11 +1211,13 @@ btr_page_get_sure_split_rec( rec_t* rec; rec_t* next_rec; ulint n; - + mem_heap_t* heap; + ulint* offsets; + page = btr_cur_get_page(cursor); - insert_size = rec_get_converted_size(tuple); - free_space = page_get_free_space_of_empty(); + insert_size = rec_get_converted_size(cursor->index, tuple); + free_space = page_get_free_space_of_empty(cursor->index->table->comp); /* free_space is now the free space of a created new page */ @@ -1208,6 +1231,9 @@ btr_page_get_sure_split_rec( ins_rec = btr_cur_get_rec(cursor); rec = page_get_infimum_rec(page); + heap = mem_heap_create(100); + offsets = NULL; + /* We start to include records to the left half, and when the space reserved by them exceeds half of total_space, then if the included records fit on the left page, they will be put there @@ -1230,7 +1256,9 @@ btr_page_get_sure_split_rec( /* Include tuple */ incl_data += insert_size; } else { - incl_data += rec_get_size(rec); + offsets = rec_reget_offsets(rec, cursor->index, + offsets, ULINT_UNDEFINED, heap); + incl_data += rec_offs_size(offsets); } n++; @@ -1252,11 +1280,12 @@ btr_page_get_sure_split_rec( next_rec = page_rec_get_next(rec); } if (next_rec != page_get_supremum_rec(page)) { - + mem_heap_free(heap); return(next_rec); } } + mem_heap_free(heap); return(rec); } } @@ -1275,7 +1304,10 @@ btr_page_insert_fits( rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple to be inserted should be first */ - dtuple_t* tuple) /* in: tuple to insert */ + const ulint* offsets, /* in: rec_get_offsets( + split_rec, cursor->index) */ + dtuple_t* tuple, /* in: tuple to insert */ + mem_heap_t* heap) /* in: temporary memory heap */ { page_t* page; ulint insert_size; @@ -1284,11 +1316,19 @@ btr_page_insert_fits( ulint total_n_recs; rec_t* rec; rec_t* end_rec; + ulint* offs; page = btr_cur_get_page(cursor); - - insert_size = rec_get_converted_size(tuple); - free_space = page_get_free_space_of_empty(); + + ut_ad(!split_rec == !offsets); + ut_ad(!offsets + || cursor->index->table->comp == rec_offs_comp(offsets)); + ut_ad(!offsets + || rec_offs_validate(split_rec, cursor->index, offsets)); + ut_ad(page_is_comp(page) == cursor->index->table->comp); + + insert_size = rec_get_converted_size(cursor->index, tuple); + free_space = page_get_free_space_of_empty(cursor->index->table->comp); /* free_space is now the free space of a created new page */ @@ -1303,7 +1343,7 @@ btr_page_insert_fits( rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - } else if (cmp_dtuple_rec(tuple, split_rec) >= 0) { + } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = split_rec; @@ -1321,11 +1361,16 @@ btr_page_insert_fits( return(TRUE); } + offs = NULL; + while (rec != end_rec) { /* In this loop we calculate the amount of reserved space after rec is removed from page. */ - total_data -= rec_get_size(rec); + offs = rec_reget_offsets(rec, cursor->index, offs, + ULINT_UNDEFINED, heap); + + total_data -= rec_offs_size(offs); total_n_recs--; if (total_data + page_dir_calc_reserved_space(total_n_recs) @@ -1411,6 +1456,10 @@ btr_attach_half_pages( MTR_MEMO_PAGE_X_FIX)); ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page), MTR_MEMO_PAGE_X_FIX)); + ut_a(page_is_comp(page) == page_is_comp(new_page)); + + /* Create a memory heap where the data tuple is stored */ + heap = mem_heap_create(100); /* Based on split direction, decide upper and lower pages */ if (direction == FSP_DOWN) { @@ -1426,7 +1475,12 @@ btr_attach_half_pages( /* Replace the address of the old child node (= page) with the address of the new lower half */ - btr_node_ptr_set_child_page_no(node_ptr, lower_page_no, mtr); + btr_node_ptr_set_child_page_no(node_ptr, + rec_get_offsets(node_ptr, + UT_LIST_GET_FIRST(tree->tree_indexes), + ULINT_UNDEFINED, heap), + lower_page_no, mtr); + mem_heap_empty(heap); } else { lower_page_no = buf_frame_get_page_no(page); upper_page_no = buf_frame_get_page_no(new_page); @@ -1434,9 +1488,6 @@ btr_attach_half_pages( upper_page = new_page; } - /* Create a memory heap where the data tuple is stored */ - heap = mem_heap_create(100); - /* Get the level of the split pages */ level = btr_page_get_level(page, mtr); @@ -1465,6 +1516,7 @@ btr_attach_half_pages( if (prev_page_no != FIL_NULL) { prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); btr_page_set_next(prev_page, lower_page_no, mtr); } @@ -1472,6 +1524,7 @@ btr_attach_half_pages( if (next_page_no != FIL_NULL) { next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); btr_page_set_prev(next_page, upper_page_no, mtr); } @@ -1522,7 +1575,15 @@ btr_page_split_and_insert( ibool insert_will_fit; ulint n_iterations = 0; rec_t* rec; + mem_heap_t* heap; + ulint n_uniq; + ulint* offsets; + + heap = mem_heap_create(1024); + n_uniq = dict_index_get_n_unique_in_tree(cursor->index); func_start: + mem_heap_empty(heap); + offsets = NULL; tree = btr_cur_get_tree(cursor); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), @@ -1574,9 +1635,10 @@ func_start: first_rec = split_rec; move_limit = split_rec; } else { - buf = mem_alloc(rec_get_converted_size(tuple)); + buf = mem_alloc(rec_get_converted_size(cursor->index, tuple)); - first_rec = rec_convert_dtuple_to_rec(buf, tuple); + first_rec = rec_convert_dtuple_to_rec(buf, + cursor->index, tuple); move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } @@ -1593,7 +1655,16 @@ func_start: We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ - insert_will_fit = btr_page_insert_fits(cursor, split_rec, tuple); + if (split_rec) { + offsets = rec_reget_offsets(split_rec, cursor->index, + offsets, n_uniq, heap); + + insert_will_fit = btr_page_insert_fits(cursor, + split_rec, offsets, tuple, heap); + } else { + insert_will_fit = btr_page_insert_fits(cursor, + NULL, NULL, tuple, heap); + } if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { @@ -1605,7 +1676,8 @@ func_start: if (direction == FSP_DOWN) { /* fputs("Split left\n", stderr); */ - page_move_rec_list_start(new_page, page, move_limit, mtr); + page_move_rec_list_start(new_page, page, move_limit, + cursor->index, mtr); left_page = new_page; right_page = page; @@ -1613,7 +1685,8 @@ func_start: } else { /* fputs("Split right\n", stderr); */ - page_move_rec_list_end(new_page, page, move_limit, mtr); + page_move_rec_list_end(new_page, page, move_limit, + cursor->index, mtr); left_page = page; right_page = new_page; @@ -1626,19 +1699,25 @@ func_start: if (split_rec == NULL) { insert_page = right_page; - } else if (cmp_dtuple_rec(tuple, first_rec) >= 0) { - - insert_page = right_page; } else { - insert_page = left_page; + offsets = rec_reget_offsets(first_rec, cursor->index, + offsets, n_uniq, heap); + + if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) { + + insert_page = right_page; + } else { + insert_page = left_page; + } } /* 7. Reposition the cursor for insert and try insertion */ page_cursor = btr_cur_get_page_cur(cursor); - page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(insert_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (rec != NULL) { /* Insert fit on the page: update the free bits for the @@ -1650,15 +1729,17 @@ func_start: /* fprintf(stderr, "Split and insert done %lu %lu\n", buf_frame_get_page_no(left_page), buf_frame_get_page_no(right_page)); */ + mem_heap_free(heap); return(rec); } /* 8. If insert did not fit, try page reorganization */ - btr_page_reorganize(insert_page, mtr); + btr_page_reorganize(insert_page, cursor->index, mtr); - page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + page_cur_search(insert_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (rec == NULL) { /* The insert did not fit on the page: loop back to the @@ -1688,6 +1769,7 @@ func_start: ut_ad(page_validate(left_page, UT_LIST_GET_FIRST(tree->tree_indexes))); ut_ad(page_validate(right_page, UT_LIST_GET_FIRST(tree->tree_indexes))); + mem_heap_free(heap); return(rec); } @@ -1721,6 +1803,7 @@ btr_level_list_remove( if (prev_page_no != FIL_NULL) { prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); btr_page_set_next(prev_page, next_page_no, mtr); } @@ -1728,6 +1811,7 @@ btr_level_list_remove( if (next_page_no != FIL_NULL) { next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); btr_page_set_prev(next_page, prev_page_no, mtr); } @@ -1741,9 +1825,11 @@ void btr_set_min_rec_mark_log( /*=====================*/ rec_t* rec, /* in: record */ + ibool comp, /* TRUE=compact record format */ mtr_t* mtr) /* in: mtr */ { - mlog_write_initial_log_record(rec, MLOG_REC_MIN_MARK, mtr); + mlog_write_initial_log_record(rec, + comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr); /* Write rec offset as a 2-byte ulint */ mlog_catenate_ulint(mtr, rec - buf_frame_align(rec), MLOG_2BYTES); @@ -1759,6 +1845,7 @@ btr_parse_set_min_rec_mark( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr) /* in: mtr or NULL */ { @@ -1772,7 +1859,7 @@ btr_parse_set_min_rec_mark( if (page) { rec = page + mach_read_from_2(ptr); - btr_set_min_rec_mark(rec, mtr); + btr_set_min_rec_mark(rec, comp, mtr); } return(ptr + 2); @@ -1785,15 +1872,16 @@ void btr_set_min_rec_mark( /*=================*/ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr) /* in: mtr */ { ulint info_bits; - info_bits = rec_get_info_bits(rec); + info_bits = rec_get_info_bits(rec, comp); - rec_set_info_bits(rec, info_bits | REC_INFO_MIN_REC_FLAG); + rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); - btr_set_min_rec_mark_log(rec, mtr); + btr_set_min_rec_mark_log(rec, comp, mtr); } /***************************************************************** @@ -1842,18 +1930,19 @@ btr_lift_page_up( record from the page should be removed */ mtr_t* mtr) /* in: mtr */ { - rec_t* node_ptr; - page_t* father_page; - ulint page_level; - + page_t* father_page; + ulint page_level; + dict_index_t* index; + ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); - father_page = buf_frame_align(node_ptr); + father_page = buf_frame_align( + btr_page_get_father_node_ptr(tree, page, mtr)); page_level = btr_page_get_level(page, mtr); + index = UT_LIST_GET_FIRST(tree->tree_indexes); btr_search_drop_page_hash_index(page); @@ -1862,7 +1951,7 @@ btr_lift_page_up( /* Move records to the father */ page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), - mtr); + index, mtr); lock_update_copy_and_discard(father_page, page); btr_page_set_level(father_page, page_level, mtr); @@ -1871,10 +1960,8 @@ btr_lift_page_up( btr_page_free(tree, page, mtr); /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), - father_page); - ut_ad(page_validate(father_page, - UT_LIST_GET_FIRST(tree->tree_indexes))); + ibuf_reset_free_bits(index, father_page); + ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(tree, father_page, mtr)); } @@ -1914,9 +2001,11 @@ btr_compress( ulint max_ins_size; ulint max_ins_size_reorg; ulint level; - + ibool comp = cursor->index->table->comp; + page = btr_cur_get_page(cursor); tree = btr_cur_get_tree(cursor); + ut_a(comp == page_is_comp(page)); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -1932,7 +2021,9 @@ btr_compress( right_page_no); */ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); + ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); father_page = buf_frame_align(node_ptr); + ut_a(comp == page_is_comp(father_page)); /* Decide the page to which we try to merge and which will inherit the locks */ @@ -1957,6 +2048,7 @@ btr_compress( n_recs = page_get_n_recs(page); data_size = page_get_data_size(page); + ut_a(page_is_comp(merge_page) == page_is_comp(page)); max_ins_size_reorg = page_get_max_insert_size_after_reorganize( merge_page, n_recs); @@ -1975,7 +2067,7 @@ btr_compress( /* We have to reorganize merge_page */ - btr_page_reorganize(merge_page, mtr); + btr_page_reorganize(merge_page, cursor->index, mtr); max_ins_size = page_get_max_insert_size(merge_page, n_recs); @@ -1999,11 +2091,14 @@ btr_compress( if (is_left) { btr_node_ptr_delete(tree, page, mtr); } else { + mem_heap_t* heap = mem_heap_create(100); /* Replace the address of the old child node (= page) with the address of the merge page to the right */ - btr_node_ptr_set_child_page_no(node_ptr, right_page_no, mtr); - + btr_node_ptr_set_child_page_no(node_ptr, + rec_get_offsets(node_ptr, cursor->index, + ULINT_UNDEFINED, heap), right_page_no, mtr); + mem_heap_free(heap); btr_node_ptr_delete(tree, merge_page, mtr); } @@ -2012,14 +2107,14 @@ btr_compress( orig_pred = page_rec_get_prev( page_get_supremum_rec(merge_page)); page_copy_rec_list_start(merge_page, page, - page_get_supremum_rec(page), mtr); + page_get_supremum_rec(page), cursor->index, mtr); lock_update_merge_left(merge_page, orig_pred, page); } else { orig_succ = page_rec_get_next( page_get_infimum_rec(merge_page)); page_copy_rec_list_end(merge_page, page, - page_get_infimum_rec(page), mtr); + page_get_infimum_rec(page), cursor->index, mtr); lock_update_merge_right(orig_succ, page); } @@ -2133,6 +2228,7 @@ btr_discard_page( return; } + ut_a(page_is_comp(merge_page) == page_is_comp(page)); btr_search_drop_page_hash_index(page); if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) { @@ -2144,7 +2240,8 @@ btr_discard_page( ut_ad(node_ptr != page_get_supremum_rec(merge_page)); - btr_set_min_rec_mark(node_ptr, mtr); + btr_set_min_rec_mark(node_ptr, + cursor->index->table->comp, mtr); } btr_node_ptr_delete(tree, page, mtr); @@ -2215,6 +2312,8 @@ btr_print_recursive( page_t* page, /* in: index page */ ulint width, /* in: print this many entries from start and end */ + mem_heap_t* heap, /* in: heap for rec_reget_offsets() */ + ulint** offsets,/* in/out: buffer for rec_reget_offsets() */ mtr_t* mtr) /* in: mtr */ { page_cur_t cursor; @@ -2223,14 +2322,16 @@ btr_print_recursive( mtr_t mtr2; rec_t* node_ptr; page_t* child; - + dict_index_t* index; + ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", (ulong) btr_page_get_level(page, mtr), (ulong) buf_frame_get_page_no(page)); - page_print(page, width, width); + index = UT_LIST_GET_FIRST(tree->tree_indexes); + page_print(page, index, width, width); n_recs = page_get_n_recs(page); @@ -2249,15 +2350,20 @@ btr_print_recursive( node_ptr = page_cur_get_rec(&cursor); - child = btr_node_ptr_get_child(node_ptr, &mtr2); - - btr_print_recursive(tree, child, width, &mtr2); + *offsets = rec_reget_offsets(node_ptr, index, + *offsets, ULINT_UNDEFINED, heap); + child = btr_node_ptr_get_child(node_ptr, + *offsets, &mtr2); + btr_print_recursive(tree, child, width, + heap, offsets, &mtr2); mtr_commit(&mtr2); } page_cur_move_to_next(&cursor); i++; } + + mem_heap_free(heap); } /****************************************************************** @@ -2270,8 +2376,10 @@ btr_print_tree( ulint width) /* in: print this many entries from start and end */ { - mtr_t mtr; - page_t* root; + mtr_t mtr; + page_t* root; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; fputs("--------------------------\n" "INDEX TREE PRINT\n", stderr); @@ -2280,7 +2388,8 @@ btr_print_tree( root = btr_root_get(tree, &mtr); - btr_print_recursive(tree, root, width, &mtr); + btr_print_recursive(tree, root, width, heap, &offsets, &mtr); + mem_heap_free(heap); mtr_commit(&mtr); @@ -2323,7 +2432,10 @@ btr_check_node_ptr( page_rec_get_next(page_get_infimum_rec(page)), 0, heap, btr_page_get_level(page, mtr)); - ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0); + ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr, + rec_get_offsets(node_ptr, + dict_tree_find_index(tree, node_ptr), + ULINT_UNDEFINED, heap)) == 0); mem_heap_free(heap); @@ -2360,10 +2472,12 @@ btr_index_rec_validate( should print hex dump of record and page on error */ { - ulint len; - ulint n; - ulint i; - page_t* page; + ulint len; + ulint n; + ulint i; + page_t* page; + mem_heap_t* heap; + ulint* offsets; page = buf_frame_align(rec); @@ -2377,10 +2491,10 @@ btr_index_rec_validate( n = dict_index_get_n_fields(index); - if (rec_get_n_fields(rec) != n) { + if (!index->table->comp && rec_get_n_fields_old(rec) != n) { btr_index_rec_validate_report(page, rec, index); fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", - (ulong) rec_get_n_fields(rec), (ulong) n); + (ulong) rec_get_n_fields_old(rec), (ulong) n); if (!dump_on_error) { @@ -2390,23 +2504,27 @@ btr_index_rec_validate( buf_page_print(page); fputs("InnoDB: corrupt record ", stderr); - rec_print(stderr, rec); + rec_print_old(stderr, rec); putc('\n', stderr); return(FALSE); } + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + for (i = 0; i < n; i++) { dtype_t* type = dict_index_get_nth_type(index, i); + ulint fixed_size = dtype_get_fixed_size(type); - rec_get_nth_field(rec, i, &len); + rec_get_nth_field(rec, offsets, i, &len); /* Note that prefix indexes are not fixed size even when their type is CHAR. */ if ((dict_index_get_nth_field(index, i)->prefix_len == 0 - && len != UNIV_SQL_NULL && dtype_is_fixed_size(type) - && len != dtype_get_fixed_size(type)) + && len != UNIV_SQL_NULL && fixed_size + && len != fixed_size) || (dict_index_get_nth_field(index, i)->prefix_len > 0 && len != UNIV_SQL_NULL @@ -2419,20 +2537,22 @@ btr_index_rec_validate( (ulong) i, (ulong) len, (ulong) dtype_get_fixed_size(type)); if (!dump_on_error) { - + mem_heap_free(heap); return(FALSE); } buf_page_print(page); fputs("InnoDB: corrupt record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); + mem_heap_free(heap); return(FALSE); } } + mem_heap_free(heap); return(TRUE); } @@ -2527,15 +2647,18 @@ btr_validate_level( page_t* right_father_page; rec_t* node_ptr; rec_t* right_node_ptr; + rec_t* rec; ulint right_page_no; ulint left_page_no; page_cur_t cursor; - mem_heap_t* heap; dtuple_t* node_ptr_tuple; ibool ret = TRUE; dict_index_t* index; mtr_t mtr; - + mem_heap_t* heap = mem_heap_create(256); + ulint* offsets = NULL; + ulint* offsets2= NULL; + mtr_start(&mtr); mtr_x_lock(dict_tree_get_lock(tree), &mtr); @@ -2544,6 +2667,8 @@ btr_validate_level( space = buf_frame_get_space_id(page); + index = UT_LIST_GET_FIRST(tree->tree_indexes); + while (level != btr_page_get_level(page, &mtr)) { ut_a(btr_page_get_level(page, &mtr) > 0); @@ -2552,14 +2677,16 @@ btr_validate_level( page_cur_move_to_next(&cursor); node_ptr = page_cur_get_rec(&cursor); - page = btr_node_ptr_get_child(node_ptr, &mtr); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); + page = btr_node_ptr_get_child(node_ptr, offsets, &mtr); } - index = UT_LIST_GET_FIRST(tree->tree_indexes); - /* Now we are on the desired level. Loop through the pages on that level. */ loop: + mem_heap_empty(heap); + offsets = offsets2 = NULL; mtr_x_lock(dict_tree_get_lock(tree), &mtr); /* Check ordering etc. of records */ @@ -2588,12 +2715,20 @@ loop: (buf_frame_get_page_no(page) == dict_tree_get_page(tree)))); if (right_page_no != FIL_NULL) { - + rec_t* right_rec; right_page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); - if (cmp_rec_rec(page_rec_get_prev(page_get_supremum_rec(page)), - page_rec_get_next(page_get_infimum_rec(right_page)), - UT_LIST_GET_FIRST(tree->tree_indexes)) >= 0) { + ut_a(page_is_comp(right_page) == page_is_comp(page)); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + right_rec = page_rec_get_next( + page_get_infimum_rec(right_page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + offsets2 = rec_reget_offsets(right_rec, index, + offsets2, ULINT_UNDEFINED, heap); + if (cmp_rec_rec(rec, right_rec, offsets, offsets2, + dict_index_get_n_fields(index), + index) >= 0) { btr_validate_report2(index, level, page, right_page); @@ -2604,12 +2739,17 @@ loop: buf_page_print(right_page); fputs("InnoDB: record ", stderr); - rec_print(stderr, page_rec_get_prev( - page_get_supremum_rec(page))); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); fputs("InnoDB: record ", stderr); - rec_print(stderr, page_rec_get_next( - page_get_infimum_rec(right_page))); + rec = page_rec_get_next(page_get_infimum_rec( + right_page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); ret = FALSE; @@ -2618,7 +2758,8 @@ loop: if (level > 0 && left_page_no == FIL_NULL) { ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)))); + page_rec_get_next(page_get_infimum_rec(page)), + index->table->comp)); } if (buf_frame_get_page_no(page) != dict_tree_get_page(tree)) { @@ -2627,12 +2768,14 @@ loop: node_ptr = btr_page_get_father_node_ptr(tree, page, &mtr); father_page = buf_frame_align(node_ptr); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); - if (btr_node_ptr_get_child_page_no(node_ptr) != + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != buf_frame_get_page_no(page) || node_ptr != btr_page_get_father_for_rec(tree, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)) { + page_rec_get_prev(page_get_supremum_rec(page)), + &mtr)) { btr_validate_report1(index, level, page); fputs("InnoDB: node pointer to the page is wrong\n", @@ -2642,17 +2785,20 @@ loop: buf_page_print(page); fputs("InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr); + rec_print(stderr, node_ptr, offsets); fprintf(stderr, "\n" "InnoDB: node ptr child page n:o %lu\n", - (unsigned long) btr_node_ptr_get_child_page_no(node_ptr)); + (unsigned long) btr_node_ptr_get_child_page_no( + node_ptr, offsets)); fputs("InnoDB: record on page ", stderr); - rec_print(stderr, - btr_page_get_father_for_rec(tree, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)); + rec = btr_page_get_father_for_rec(tree, page, + page_rec_get_prev(page_get_supremum_rec(page)), + &mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); ret = FALSE; @@ -2660,7 +2806,8 @@ loop: } if (btr_page_get_level(page, &mtr) > 0) { - heap = mem_heap_create(256); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); node_ptr_tuple = dict_tree_build_node_ptr( tree, @@ -2669,7 +2816,10 @@ loop: 0, heap, btr_page_get_level(page, &mtr)); - if (cmp_dtuple_rec(node_ptr_tuple, node_ptr) != 0) { + if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, + offsets)) { + rec_t* first_rec = page_rec_get_next( + page_get_infimum_rec(page)); btr_validate_report1(index, level, page); @@ -2679,18 +2829,16 @@ loop: fputs("InnoDB: Error: node ptrs differ" " on levels > 0\n" "InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr); + rec_print(stderr, node_ptr, offsets); fputs("InnoDB: first rec ", stderr); - rec_print(stderr, page_rec_get_next( - page_get_infimum_rec(page))); + offsets = rec_reget_offsets(first_rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, first_rec, offsets); putc('\n', stderr); ret = FALSE; - mem_heap_free(heap); goto node_ptr_fails; } - - mem_heap_free(heap); } if (left_page_no == FIL_NULL) { @@ -2701,7 +2849,7 @@ loop: if (right_page_no == FIL_NULL) { ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); + page_get_supremum_rec(father_page))); ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); } @@ -2771,13 +2919,16 @@ node_ptr_fails: mtr_commit(&mtr); if (right_page_no != FIL_NULL) { + ibool comp = page_is_comp(page); mtr_start(&mtr); page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); + ut_a(page_is_comp(page) == comp); goto loop; } + mem_heap_free(heap); return(ret); } diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 48de5644908..f5e146172ed 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -73,8 +73,9 @@ static void btr_cur_unmark_extern_fields( /*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr, /* in: mtr */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*********************************************************************** Adds path information to the cursor for the current page, for which the binary search has been performed. */ @@ -96,6 +97,7 @@ btr_rec_free_updated_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free @@ -108,9 +110,10 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - /* out: externally stored part, in units of a - database page */ - rec_t* rec); /* in: record */ + /* out: externally stored part, + in units of a database page */ + rec_t* rec, /* in: record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*==================== B-TREE SEARCH =========================*/ @@ -137,11 +140,13 @@ btr_cur_latch_leaves( if (latch_mode == BTR_SEARCH_LEAF) { get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_LEAF) { get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_TREE) { @@ -152,11 +157,13 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { get_page = btr_page_get(space, left_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; right_page_no = btr_page_get_next(page, mtr); @@ -176,11 +183,14 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { cursor->left_page = btr_page_get(space, left_page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(cursor->left_page) == + page_is_comp(page)); buf_block_align( cursor->left_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_PREV) { @@ -191,11 +201,14 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { cursor->left_page = btr_page_get(space, left_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(cursor->left_page) == + page_is_comp(page)); buf_block_align( cursor->left_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else { ut_error; @@ -261,6 +274,8 @@ btr_cur_search_to_nth_level( #ifdef BTR_CUR_ADAPT btr_search_t* info; #endif + mem_heap_t* heap; + ulint* offsets; /* Currently, PAGE_CUR_LE is the only search mode used for searches ending to upper levels */ @@ -379,7 +394,9 @@ btr_cur_search_to_nth_level( page_mode = mode; break; } - + + heap = mem_heap_create(100); + offsets = NULL; /* Loop and search until we arrive at the desired level */ for (;;) { @@ -414,7 +431,7 @@ retry_page_get: cursor->thr)) { /* Insertion to the insert buffer succeeded */ cursor->flag = BTR_CUR_INSERT_TO_IBUF; - + mem_heap_free(heap); return; } @@ -470,9 +487,9 @@ retry_page_get: page_mode = mode; } - page_cur_search_with_match(page, tuple, page_mode, &up_match, - &up_bytes, &low_match, &low_bytes, - page_cursor); + page_cur_search_with_match(page, index, tuple, page_mode, + &up_match, &up_bytes, + &low_match, &low_bytes, page_cursor); if (estimate) { btr_cur_add_path_info(cursor, height, root_height); } @@ -486,7 +503,9 @@ retry_page_get: if (level > 0) { /* x-latch the page */ - btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(btr_page_get(space, + page_no, RW_X_LATCH, mtr)) + == index->table->comp); } break; @@ -498,11 +517,14 @@ retry_page_get: guess = NULL; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + mem_heap_free(heap); + if (level == 0) { cursor->low_match = low_match; cursor->low_bytes = low_bytes; @@ -552,6 +574,8 @@ btr_cur_open_at_index_side( rec_t* node_ptr; ulint estimate; ulint savepoint; + mem_heap_t* heap; + ulint* offsets = NULL; estimate = latch_mode & BTR_ESTIMATE; latch_mode = latch_mode & ~BTR_ESTIMATE; @@ -576,7 +600,8 @@ btr_cur_open_at_index_side( page_no = dict_tree_get_page(tree); height = ULINT_UNDEFINED; - + heap = mem_heap_create(100); + for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, @@ -645,10 +670,13 @@ btr_cur_open_at_index_side( height--; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + + mem_heap_free(heap); } /************************************************************************** @@ -669,6 +697,8 @@ btr_cur_open_at_rnd_pos( ulint space; ulint height; rec_t* node_ptr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; tree = index->tree; @@ -717,10 +747,13 @@ btr_cur_open_at_rnd_pos( height--; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + + mem_heap_free(heap); } /*==================== B-TREE INSERT =========================*/ @@ -758,18 +791,20 @@ btr_cur_insert_if_possible( page_cursor = btr_cur_get_page_cur(cursor); /* Now, try the insert */ - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (!rec) { /* If record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, cursor->index, mtr); *reorg = TRUE; - page_cur_search(page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, + cursor->index, mtr); } return(rec); @@ -887,8 +922,6 @@ btr_cur_optimistic_insert( ibool reorg; ibool inherit; ulint rec_size; - ulint data_size; - ulint extra_size; ulint type; ulint err; @@ -914,13 +947,11 @@ btr_cur_optimistic_insert( calculate_sizes_again: /* Calculate the record size when entry is converted to a record */ - data_size = dtuple_get_data_size(entry); - extra_size = rec_get_converted_extra_size(data_size, - dtuple_get_n_fields(entry)); - rec_size = data_size + extra_size; + rec_size = rec_get_converted_size(index, entry); - if ((rec_size >= page_get_free_space_of_empty() / 2) - || (rec_size >= REC_MAX_DATA_SIZE)) { + if (rec_size >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { /* The record is so big that we have to store some fields externally on separate database pages */ @@ -983,19 +1014,18 @@ calculate_sizes_again: /* Now, try the insert */ - *rec = page_cur_insert_rec_low(page_cursor, entry, data_size, - NULL, mtr); + *rec = page_cur_insert_rec_low(page_cursor, entry, index, NULL, mtr); if (!(*rec)) { /* If the record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, index, mtr); ut_ad(page_get_max_insert_size(page, 1) == max_size); reorg = TRUE; - page_cur_search(page, entry, PAGE_CUR_LE, page_cursor); + page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor); - *rec = page_cur_tuple_insert(page_cursor, entry, mtr); + *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr); if (!*rec) { fputs("InnoDB: Error: cannot insert tuple ", stderr); @@ -1123,9 +1153,9 @@ btr_cur_pessimistic_insert( } } - if ((rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) - || (rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE)) { + if (rec_get_converted_size(index, entry) >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { /* The record is so big that we have to store some fields externally on separate database pages */ @@ -1212,8 +1242,11 @@ btr_cur_upd_lock_and_undo( err = DB_SUCCESS; if (!(flags & BTR_NO_LOCKING_FLAG)) { + mem_heap_t* heap = mem_heap_create(100); err = lock_clust_rec_modify_check_and_lock(flags, rec, index, - thr); + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap), + thr); + mem_heap_free(heap); if (err != DB_SUCCESS) { return(err); @@ -1243,14 +1276,17 @@ btr_cur_update_in_place_log( mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(flags < 256); - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_UPDATE_IN_PLACE + : MLOG_REC_UPDATE_IN_PLACE, + 1 + DATA_ROLL_PTR_LEN + 14 + 2 + MLOG_BUF_MARGIN); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_UPDATE_IN_PLACE, log_ptr, mtr); - - mach_write_to_1(log_ptr, flags); - log_ptr++; + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery */ + return; + } /* The code below assumes index is a clustered index: change index to the clustered index if we are updating a secondary index record (or we @@ -1259,6 +1295,9 @@ btr_cur_update_in_place_log( index = dict_table_get_first_index(index->table); + mach_write_to_1(log_ptr, flags); + log_ptr++; + log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, mtr); mach_write_to_2(log_ptr, rec - buf_frame_align(rec)); @@ -1273,10 +1312,11 @@ Parses a redo log record of updating a record in-place. */ byte* btr_cur_parse_update_in_place( /*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + dict_index_t* index) /* in: index corresponding to page */ { ulint flags; rec_t* rec; @@ -1286,6 +1326,7 @@ btr_cur_parse_update_in_place( dulint roll_ptr; ulint rec_offset; mem_heap_t* heap; + ulint* offsets; if (end_ptr < ptr + 1) { @@ -1333,11 +1374,14 @@ btr_cur_parse_update_in_place( /* We do not need to reserve btr_search_latch, as the page is only being recovered, and there cannot be a hash index to it. */ + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, roll_ptr); + row_upd_rec_sys_fields_in_recovery(rec, offsets, + pos, trx_id, roll_ptr); } - row_upd_rec_in_place(rec, update); + row_upd_rec_in_place(rec, offsets, update); mem_heap_free(heap); @@ -1369,14 +1413,18 @@ btr_cur_update_in_place( dulint roll_ptr = ut_dulint_zero; trx_t* trx; ibool was_delete_marked; + mem_heap_t* heap; + const ulint* offsets; rec = btr_cur_get_rec(cursor); index = cursor->index; trx = thr_get_trx(thr); - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(trx, index, "update "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } /* Do lock checking and undo logging */ @@ -1384,6 +1432,7 @@ btr_cur_update_in_place( thr, &roll_ptr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -1405,15 +1454,15 @@ btr_cur_update_in_place( } if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, index, trx, roll_ptr); + row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); } /* FIXME: in a mixed tree, all records may not have enough ordering fields for btr search: */ - was_delete_marked = rec_get_deleted_flag(rec); - - row_upd_rec_in_place(rec, update); + was_delete_marked = rec_get_deleted_flag(rec, index->table->comp); + + row_upd_rec_in_place(rec, offsets, update); if (block->is_hashed) { rw_lock_x_unlock(&btr_search_latch); @@ -1421,13 +1470,14 @@ btr_cur_update_in_place( btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr, mtr); - if (was_delete_marked && !rec_get_deleted_flag(rec)) { + if (was_delete_marked && !rec_get_deleted_flag(rec, index->table->comp)) { /* The new updated record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1469,24 +1519,28 @@ btr_cur_optimistic_update( mem_heap_t* heap; ibool reorganized = FALSE; ulint i; - + ulint* offsets; + page = btr_cur_get_page(cursor); rec = btr_cur_get_rec(cursor); index = cursor->index; + heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(thr_get_trx(thr), index, "update "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - if (!row_upd_changes_field_size_or_external(rec, index, update)) { + if (!row_upd_changes_field_size_or_external(index, offsets, update)) { /* The simplest and the most common case: the update does not change the size of any field and none of the updated fields is externally stored in rec or update */ - + mem_heap_free(heap); return(btr_cur_update_in_place(flags, cursor, update, cmpl_info, thr, mtr)); } @@ -1497,29 +1551,30 @@ btr_cur_optimistic_update( /* Externally stored fields are treated in pessimistic update */ + mem_heap_free(heap); return(DB_OVERFLOW); } } - if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) { + if (rec_offs_any_extern(offsets)) { /* Externally stored fields are treated in pessimistic update */ + mem_heap_free(heap); return(DB_OVERFLOW); } page_cursor = btr_cur_get_page_cur(cursor); - heap = mem_heap_create(1024); - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, NULL); - old_rec_size = rec_get_size(rec); - new_rec_size = rec_get_converted_size(new_entry); + old_rec_size = rec_offs_size(offsets); + new_rec_size = rec_get_converted_size(index, new_entry); - if (new_rec_size >= page_get_free_space_of_empty() / 2) { + if (new_rec_size >= + page_get_free_space_of_empty(index->table->comp) / 2) { mem_heap_free(heap); @@ -1570,7 +1625,7 @@ btr_cur_optimistic_update( btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(page_cursor, mtr); + page_cur_delete_rec(page_cursor, index, mtr); page_cur_move_to_prev(page_cursor); @@ -1587,11 +1642,13 @@ btr_cur_optimistic_update( ut_a(rec); /* <- We calculated above the insert would fit */ - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } /* Restore the old explicit lock state on the record */ @@ -1690,6 +1747,7 @@ btr_cur_pessimistic_update( ulint* ext_vect; ulint n_ext_vect; ulint reserve_flag; + ulint* offsets = NULL; *big_rec = NULL; @@ -1743,6 +1801,7 @@ btr_cur_pessimistic_update( } heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); trx = thr_get_trx(thr); @@ -1767,28 +1826,29 @@ btr_cur_pessimistic_update( ut_a(big_rec_vec == NULL); - btr_rec_free_updated_extern_fields(index, rec, update, - TRUE, mtr); + btr_rec_free_updated_extern_fields(index, rec, offsets, + update, TRUE, mtr); } /* We have to set appropriate extern storage bits in the new record to be inserted: we have to remember which fields were such */ - ext_vect = mem_heap_alloc(heap, sizeof(ulint) * rec_get_n_fields(rec)); - n_ext_vect = btr_push_update_extern_fields(ext_vect, rec, update); - - if ((rec_get_converted_size(new_entry) >= - page_get_free_space_of_empty() / 2) - || (rec_get_converted_size(new_entry) >= REC_MAX_DATA_SIZE)) { + ext_vect = mem_heap_alloc(heap, sizeof(ulint) + * dict_index_get_n_fields(index)); + ut_ad(!cursor->index->table->comp || !rec_get_node_ptr_flag(rec)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update); + + if (rec_get_converted_size(index, new_entry) >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { big_rec_vec = dtuple_convert_big_rec(index, new_entry, ext_vect, n_ext_vect); if (big_rec_vec == NULL) { - mem_heap_free(heap); - err = DB_TOO_BIG_RECORD; - goto return_after_reservations; } } @@ -1808,7 +1868,7 @@ btr_cur_pessimistic_update( btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(page_cursor, mtr); + page_cur_delete_rec(page_cursor, index, mtr); page_cur_move_to_prev(page_cursor); @@ -1817,21 +1877,22 @@ btr_cur_pessimistic_update( ut_a(rec || optim_err != DB_UNDERFLOW); if (rec) { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + lock_rec_restore_from_page_infimum(rec, page); - rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + rec_set_field_extern_bits(rec, index, + ext_vect, n_ext_vect, mtr); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* The new inserted record owns its possible externally stored fields */ - - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; - mem_heap_free(heap); - goto return_after_reservations; } @@ -1856,13 +1917,15 @@ btr_cur_pessimistic_update( ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } lock_rec_restore_from_page_infimum(rec, page); @@ -1876,9 +1939,8 @@ btr_cur_pessimistic_update( btr_cur_pess_upd_restore_supremum(rec, mtr); } - mem_heap_free(heap); - return_after_reservations: + mem_heap_free(heap); if (n_extents > 0) { fil_space_release_free_extents(cursor->index->space, @@ -1908,11 +1970,18 @@ btr_cur_del_mark_set_clust_rec_log( mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(flags < 256); + ut_ad(val <= 1); - log_ptr = mlog_open(mtr, 30); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_CLUST_DELETE_MARK + : MLOG_REC_CLUST_DELETE_MARK, + 1 + 1 + DATA_ROLL_PTR_LEN + 14 + 2); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_CLUST_DELETE_MARK, log_ptr, mtr); + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery */ + return; + } mach_write_to_1(log_ptr, flags); log_ptr++; @@ -1934,10 +2003,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_clust_rec( /*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page) /* in: page or NULL */ { ulint flags; ibool val; @@ -1978,15 +2048,19 @@ btr_cur_parse_del_mark_set_clust_rec( rec = page + offset; if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, - roll_ptr); + mem_heap_t* heap = mem_heap_create(100); + row_upd_rec_sys_fields_in_recovery(rec, + rec_get_offsets(rec, index, + ULINT_UNDEFINED, heap), + pos, trx_id, roll_ptr); + mem_heap_free(heap); } /* We do not need to reserve btr_search_latch, as the page is only being recovered, and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); } return(ptr); @@ -2015,22 +2089,28 @@ btr_cur_del_mark_set_clust_rec( ulint err; rec_t* rec; trx_t* trx; + mem_heap_t* heap; + const ulint* offsets; rec = btr_cur_get_rec(cursor); index = cursor->index; - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(thr_get_trx(thr), index, "del mark "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_get_deleted_flag(rec) == FALSE); + ut_ad(rec_get_deleted_flag(rec, index->table->comp) == FALSE); - err = lock_clust_rec_modify_check_and_lock(flags, rec, index, thr); + err = lock_clust_rec_modify_check_and_lock(flags, + rec, index, offsets, thr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2039,6 +2119,7 @@ btr_cur_del_mark_set_clust_rec( &roll_ptr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2048,13 +2129,12 @@ btr_cur_del_mark_set_clust_rec( rw_lock_x_lock(&btr_search_latch); } - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); trx = thr_get_trx(thr); if (!(flags & BTR_KEEP_SYS_FLAG)) { - - row_upd_rec_sys_fields(rec, index, trx, roll_ptr); + row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); } if (block->is_hashed) { @@ -2063,6 +2143,7 @@ btr_cur_del_mark_set_clust_rec( btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, roll_ptr, mtr); + mem_heap_free(heap); return(DB_SUCCESS); } @@ -2073,16 +2154,24 @@ UNIV_INLINE void btr_cur_del_mark_set_sec_rec_log( /*=============================*/ - rec_t* rec, /* in: record */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(val <= 1); - log_ptr = mlog_open(mtr, 30); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_SEC_DELETE_MARK + : MLOG_REC_SEC_DELETE_MARK, + 1 + 2); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr); + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } mach_write_to_1(log_ptr, val); log_ptr++; @@ -2100,10 +2189,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_sec_rec( /*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page) /* in: page or NULL */ { ibool val; ulint offset; @@ -2129,7 +2219,7 @@ btr_cur_parse_del_mark_set_sec_rec( is only being recovered, and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); } return(ptr); @@ -2156,9 +2246,12 @@ btr_cur_del_mark_set_sec_rec( rec = btr_cur_get_rec(cursor); if (btr_cur_print_record_ops && thr) { + mem_heap_t* heap = mem_heap_create(100); btr_cur_trx_report(thr_get_trx(thr), cursor->index, "del mark "); - rec_print(stderr, rec); + rec_print(stderr, rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap)); + mem_heap_free(heap); } err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index, @@ -2174,13 +2267,13 @@ btr_cur_del_mark_set_sec_rec( rw_lock_x_lock(&btr_search_latch); } - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, cursor->index->table->comp, val); if (block->is_hashed) { rw_lock_x_unlock(&btr_search_latch); } - btr_cur_del_mark_set_sec_rec_log(rec, val, mtr); + btr_cur_del_mark_set_sec_rec_log(rec, cursor->index, val, mtr); return(DB_SUCCESS); } @@ -2192,15 +2285,16 @@ used by the insert buffer insert merge mechanism. */ void btr_cur_del_unmark_for_ibuf( /*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record to delete unmark */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { /* We do not need to reserve btr_search_latch, as the page has just been read to the buffer pool and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, FALSE); + rec_set_deleted_flag(rec, index->table->comp, FALSE); - btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr); + btr_cur_del_mark_set_sec_rec_log(rec, index, FALSE, mtr); } /*==================== B-TREE RECORD REMOVE =========================*/ @@ -2279,8 +2373,11 @@ btr_cur_optimistic_delete( successor of the deleted record */ mtr_t* mtr) /* in: mtr */ { - page_t* page; - ulint max_ins_size; + page_t* page; + ulint max_ins_size; + mem_heap_t* heap; + rec_t* rec; + const ulint* offsets; ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_page(cursor)), MTR_MEMO_PAGE_X_FIX)); @@ -2290,26 +2387,30 @@ btr_cur_optimistic_delete( ut_ad(btr_page_get_level(page, mtr) == 0); - if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) { + heap = mem_heap_create(100); + rec = btr_cur_get_rec(cursor); + offsets = rec_get_offsets(rec, cursor->index, ULINT_UNDEFINED, heap); - return(FALSE); - } + if (!rec_offs_any_extern(offsets) + && btr_cur_can_delete_without_compress( + cursor, rec_offs_size(offsets), mtr)) { - if (btr_cur_can_delete_without_compress(cursor, mtr)) { - - lock_update_delete(btr_cur_get_rec(cursor)); + lock_update_delete(rec); btr_search_update_hash_on_delete(cursor); max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr); + page_cur_delete_rec(btr_cur_get_page_cur(cursor), + cursor->index, mtr); ibuf_update_free_bits_low(cursor->index, page, max_ins_size, mtr); + mem_heap_free(heap); return(TRUE); } + mem_heap_free(heap); return(FALSE); } @@ -2375,8 +2476,20 @@ btr_cur_pessimistic_delete( } } - btr_rec_free_externally_stored_fields(cursor->index, - btr_cur_get_rec(cursor), in_rollback, mtr); + heap = mem_heap_create(256); + rec = btr_cur_get_rec(cursor); + + /* Free externally stored fields if the record is neither + a node pointer nor in two-byte format. + This avoids unnecessary calls to rec_get_offsets(). */ + if (cursor->index->table->comp + ? !rec_get_node_ptr_flag(rec) + : !rec_get_1byte_offs_flag(rec)) { + btr_rec_free_externally_stored_fields(cursor->index, + rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), + in_rollback, mtr); + } if ((page_get_n_recs(page) < 2) && (dict_tree_get_page(btr_cur_get_tree(cursor)) @@ -2393,8 +2506,6 @@ btr_cur_pessimistic_delete( goto return_after_reservations; } - rec = btr_cur_get_rec(cursor); - lock_update_delete(rec); if ((btr_page_get_level(page, mtr) > 0) @@ -2406,7 +2517,8 @@ btr_cur_pessimistic_delete( non-leaf level, we must mark the new leftmost node pointer as the predefined minimum record */ - btr_set_min_rec_mark(page_rec_get_next(rec), mtr); + btr_set_min_rec_mark(page_rec_get_next(rec), + cursor->index->table->comp, mtr); } else { /* Otherwise, if we delete the leftmost node pointer on a page, we have to change the father node pointer @@ -2415,8 +2527,6 @@ btr_cur_pessimistic_delete( btr_node_ptr_delete(tree, page, mtr); - heap = mem_heap_create(256); - node_ptr = dict_tree_build_node_ptr( tree, page_rec_get_next(rec), buf_frame_get_page_no(page), @@ -2425,20 +2535,19 @@ btr_cur_pessimistic_delete( btr_insert_on_non_leaf_level(tree, btr_page_get_level(page, mtr) + 1, node_ptr, mtr); - - mem_heap_free(heap); } } btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr); + page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index, mtr); ut_ad(btr_check_node_ptr(tree, page, mtr)); *err = DB_SUCCESS; return_after_reservations: + mem_heap_free(heap); if (ret == FALSE) { ret = btr_cur_compress_if_useful(cursor, mtr); @@ -2663,9 +2772,13 @@ btr_estimate_number_of_different_key_vals( ulint j; ulint add_on; mtr_t mtr; + mem_heap_t* heap; + ulint* offsets1 = 0; + ulint* offsets2 = 0; n_cols = dict_index_get_n_unique(index); + heap = mem_heap_create(100); n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); for (j = 0; j <= n_cols; j++) { @@ -2697,11 +2810,17 @@ btr_estimate_number_of_different_key_vals( while (rec != page_get_supremum_rec(page) && page_rec_get_next(rec) != page_get_supremum_rec(page)) { + rec_t* next_rec = page_rec_get_next(rec); matched_fields = 0; matched_bytes = 0; + offsets1 = rec_reget_offsets(rec, index, + offsets1, ULINT_UNDEFINED, heap); + offsets2 = rec_reget_offsets(next_rec, index, + offsets2, n_cols, heap); - cmp_rec_rec_with_match(rec, page_rec_get_next(rec), - index, &matched_fields, + cmp_rec_rec_with_match(rec, next_rec, + offsets1, offsets2, + index, n_cols, &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -2712,7 +2831,8 @@ btr_estimate_number_of_different_key_vals( } total_external_size += - btr_rec_get_externally_stored_len(rec); + btr_rec_get_externally_stored_len( + rec, offsets1); rec = page_rec_get_next(rec); } @@ -2736,8 +2856,11 @@ btr_estimate_number_of_different_key_vals( } } + offsets1 = rec_reget_offsets(rec, index, + offsets1, ULINT_UNDEFINED, heap); total_external_size += - btr_rec_get_externally_stored_len(rec); + btr_rec_get_externally_stored_len(rec, + offsets1); mtr_commit(&mtr); } @@ -2778,6 +2901,7 @@ btr_estimate_number_of_different_key_vals( } mem_free(n_diff); + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ @@ -2788,9 +2912,10 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - /* out: externally stored part, in units of a - database page */ - rec_t* rec) /* in: record */ + /* out: externally stored part, + in units of a database page */ + rec_t* rec, /* in: record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_fields; byte* data; @@ -2799,17 +2924,13 @@ btr_rec_get_externally_stored_len( ulint total_extern_len = 0; ulint i; - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - - return(0); - } - - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, i, &local_len); + data = rec_get_nth_field(rec, offsets, i, &local_len); local_len -= BTR_EXTERN_FIELD_REF_SIZE; @@ -2830,16 +2951,17 @@ static void btr_cur_set_ownership_of_extern_field( /*==================================*/ - rec_t* rec, /* in: clustered index record */ - ulint i, /* in: field number */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: clustered index record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint i, /* in: field number */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ { byte* data; ulint local_len; ulint byte_val; - data = rec_get_nth_field(rec, i, &local_len); + data = rec_get_nth_field(rec, offsets, i, &local_len); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); @@ -2866,19 +2988,22 @@ to free the field. */ void btr_cur_mark_extern_inherited_fields( /*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - upd_t* update, /* in: update vector */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update, /* in: update vector */ + mtr_t* mtr) /* in: mtr */ { ibool is_updated; ulint n; ulint j; ulint i; - - n = rec_get_n_fields(rec); + + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { /* Check it is not in updated fields */ is_updated = FALSE; @@ -2894,8 +3019,8 @@ btr_cur_mark_extern_inherited_fields( } if (!is_updated) { - btr_cur_set_ownership_of_extern_field(rec, i, - FALSE, mtr); + btr_cur_set_ownership_of_extern_field(rec, + offsets, i, FALSE, mtr); } } } @@ -2967,18 +3092,20 @@ static void btr_cur_unmark_extern_fields( /*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr, /* in: mtr */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n; ulint i; - n = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { - - btr_cur_set_ownership_of_extern_field(rec, i, + if (rec_offs_nth_extern(offsets, i)) { + + btr_cur_set_ownership_of_extern_field(rec, offsets, i, TRUE, mtr); } } @@ -3028,10 +3155,10 @@ ulint btr_push_update_extern_fields( /*==========================*/ /* out: number of values stored in ext_vect */ - ulint* ext_vect, /* in: array of ulints, must be preallocated + ulint* ext_vect,/* in: array of ulints, must be preallocated to have space for all fields in rec */ - rec_t* rec, /* in: record */ - upd_t* update) /* in: update vector or NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update) /* in: update vector or NULL */ { ulint n_pushed = 0; ibool is_updated; @@ -3054,10 +3181,10 @@ btr_push_update_extern_fields( } } - n = rec_get_n_fields(rec); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { /* Check it is not in updated fields */ is_updated = FALSE; @@ -3119,6 +3246,7 @@ btr_store_big_rec_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ mtr_t* local_mtr __attribute__((unused))) /* in: mtr @@ -3139,6 +3267,7 @@ btr_store_big_rec_extern_fields( ulint i; mtr_t mtr; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec), @@ -3152,8 +3281,8 @@ btr_store_big_rec_extern_fields( for (i = 0; i < big_rec_vec->n_fields; i++) { - data = rec_get_nth_field(rec, big_rec_vec->fields[i].field_no, - &local_len); + data = rec_get_nth_field(rec, offsets, + big_rec_vec->fields[i].field_no, &local_len); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); local_len -= BTR_EXTERN_FIELD_REF_SIZE; extern_len = big_rec_vec->fields[i].len; @@ -3254,7 +3383,7 @@ btr_store_big_rec_extern_fields( /* Set the bit denoting that this field in rec is stored externally */ - rec_set_nth_field_extern_bit(rec, + rec_set_nth_field_extern_bit(rec, index, big_rec_vec->fields[i].field_no, TRUE, &mtr); } @@ -3407,6 +3536,7 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free inherited fields */ @@ -3419,21 +3549,18 @@ btr_rec_free_externally_stored_fields( ulint len; ulint i; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)); - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - - return; - } - /* Free possible externally stored fields in the record */ - n_fields = rec_get_n_fields(rec); + ut_ad(index->table->comp == rec_offs_comp(offsets)); + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); btr_free_externally_stored_field(index, data, len, do_not_free_inherited, mtr); } @@ -3450,6 +3577,7 @@ btr_rec_free_updated_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free @@ -3463,13 +3591,10 @@ btr_rec_free_updated_extern_fields( ulint len; ulint i; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)); - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - return; - } - /* Free possible externally stored fields in the record */ n_fields = upd_get_n_fields(update); @@ -3477,9 +3602,10 @@ btr_rec_free_updated_extern_fields( for (i = 0; i < n_fields; i++) { ufield = upd_get_nth_field(update, i); - if (rec_get_nth_field_extern_bit(rec, ufield->field_no)) { + if (rec_offs_nth_extern(offsets, ufield->field_no)) { - data = rec_get_nth_field(rec, ufield->field_no, &len); + data = rec_get_nth_field(rec, offsets, + ufield->field_no, &len); btr_free_externally_stored_field(index, data, len, do_not_free_inherited, mtr); } @@ -3583,7 +3709,8 @@ byte* btr_rec_copy_externally_stored_field( /*=================================*/ /* out: the field copied to heap */ - rec_t* rec, /* in: record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint no, /* in: field number */ ulint* len, /* out: length of the field */ mem_heap_t* heap) /* in: mem heap */ @@ -3591,7 +3718,8 @@ btr_rec_copy_externally_stored_field( ulint local_len; byte* data; - ut_a(rec_get_nth_field_extern_bit(rec, no)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_a(rec_offs_nth_extern(offsets, no)); /* An externally stored field can contain some initial data from the field, and in the last 20 bytes it has the @@ -3602,7 +3730,7 @@ btr_rec_copy_externally_stored_field( limit so that field offsets are stored in two bytes, and the extern bit is available in those two bytes. */ - data = rec_get_nth_field(rec, no, &local_len); + data = rec_get_nth_field(rec, offsets, no, &local_len); return(btr_copy_externally_stored_field(len, data, local_len, heap)); } diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index cf8a612ef28..7df8e53cd07 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -45,12 +45,12 @@ btr_pcur_free_for_mysql( mem_free(cursor->old_rec_buf); - cursor->old_rec = NULL; cursor->old_rec_buf = NULL; } cursor->btr_cur.page_cur.rec = NULL; cursor->old_rec = NULL; + cursor->old_n_fields = 0; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; cursor->latch_mode = BTR_NO_LATCHES; @@ -133,9 +133,10 @@ btr_pcur_store_position( cursor->old_stored = BTR_PCUR_OLD_STORED; cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec, - &(cursor->old_rec_buf), - &(cursor->buf_size)); - + &cursor->old_n_fields, + &cursor->old_rec_buf, + &cursor->buf_size); + cursor->block_when_stored = buf_block_align(page); cursor->modify_clock = buf_frame_get_modify_clock(page); } @@ -166,6 +167,8 @@ btr_pcur_copy_stored_position( pcur_receive->old_rec = pcur_receive->old_rec_buf + (pcur_donate->old_rec - pcur_donate->old_rec_buf); } + + pcur_receive->old_n_fields = pcur_donate->old_n_fields; } /****************************************************************** @@ -228,6 +231,7 @@ btr_pcur_restore_position( } ut_a(cursor->old_rec); + ut_a(cursor->old_n_fields); page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); @@ -242,17 +246,32 @@ btr_pcur_restore_position( buf_page_dbg_add_level(page, SYNC_TREE_NODE); #endif /* UNIV_SYNC_DEBUG */ if (cursor->rel_pos == BTR_PCUR_ON) { - +#ifdef UNIV_DEBUG + rec_t* rec; + ulint* offsets1; + ulint* offsets2; + dict_index_t* index; +#endif /* UNIV_DEBUG */ cursor->latch_mode = latch_mode; +#ifdef UNIV_DEBUG + rec = btr_pcur_get_rec(cursor); + index = dict_tree_find_index( + btr_cur_get_tree( + btr_pcur_get_btr_cur(cursor)), + rec); + + heap = mem_heap_create(256); + offsets1 = rec_get_offsets(cursor->old_rec, + index, ULINT_UNDEFINED, heap); + offsets2 = rec_get_offsets(rec, + index, ULINT_UNDEFINED, heap); ut_ad(cmp_rec_rec(cursor->old_rec, - btr_pcur_get_rec(cursor), - dict_tree_find_index( - btr_cur_get_tree( - btr_pcur_get_btr_cur(cursor)), - btr_pcur_get_rec(cursor))) - == 0); - + rec, offsets1, offsets2, + cursor->old_n_fields, + index) == 0); + mem_heap_free(heap); +#endif /* UNIV_DEBUG */ return(TRUE); } @@ -265,7 +284,8 @@ btr_pcur_restore_position( heap = mem_heap_create(256); tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor)); - tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, heap); + tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, + cursor->old_n_fields, heap); /* Save the old search mode of the cursor */ old_mode = cursor->search_mode; @@ -287,7 +307,10 @@ btr_pcur_restore_position( if (cursor->rel_pos == BTR_PCUR_ON && btr_pcur_is_on_user_rec(cursor, mtr) - && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { + && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor), + rec_get_offsets(btr_pcur_get_rec(cursor), + btr_pcur_get_btr_cur(cursor)->index, + ULINT_UNDEFINED, heap))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! But we can retain @@ -376,6 +399,7 @@ btr_pcur_move_to_next_page( ut_ad(next_page_no != FIL_NULL); next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); buf_block_align(next_page)->check_index_page_at_flush = TRUE; btr_leaf_page_release(page, cursor->latch_mode, mtr); diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index ad74f9704da..40ccf56492f 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -416,7 +416,7 @@ btr_search_update_hash_ref( && (block->curr_n_fields == info->n_fields) && (block->curr_n_bytes == info->n_bytes) && (block->curr_side == info->side)) { - + mem_heap_t* heap; rec = btr_cur_get_rec(cursor); if (!page_rec_is_user_rec(rec)) { @@ -425,10 +425,11 @@ btr_search_update_hash_ref( } tree_id = ((cursor->index)->tree)->id; - - fold = rec_fold(rec, block->curr_n_fields, - block->curr_n_bytes, tree_id); - + heap = mem_heap_create(100); + fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), block->curr_n_fields, + block->curr_n_bytes, tree_id); + mem_heap_free(heap); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ @@ -535,15 +536,17 @@ btr_search_check_guess( or PAGE_CUR_GE */ mtr_t* mtr) /* in: mtr */ { - page_t* page; - rec_t* rec; - rec_t* prev_rec; - rec_t* next_rec; - ulint n_unique; - ulint match; - ulint bytes; - int cmp; - + page_t* page; + rec_t* rec; + rec_t* prev_rec; + rec_t* next_rec; + ulint n_unique; + ulint match; + ulint bytes; + int cmp; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; + n_unique = dict_index_get_n_unique_in_tree(cursor->index); rec = btr_cur_get_rec(cursor); @@ -554,23 +557,25 @@ btr_search_check_guess( match = 0; bytes = 0; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &match, &bytes); + offsets = rec_get_offsets(rec, cursor->index, n_unique, heap); + cmp = page_cmp_dtuple_rec_with_match(tuple, rec, + offsets, &match, &bytes); if (mode == PAGE_CUR_GE) { if (cmp == 1) { - + mem_heap_free(heap); return(FALSE); } cursor->up_match = match; if (match >= n_unique) { - + mem_heap_free(heap); return(TRUE); } } else if (mode == PAGE_CUR_LE) { if (cmp == -1) { - + mem_heap_free(heap); return(FALSE); } @@ -578,12 +583,12 @@ btr_search_check_guess( } else if (mode == PAGE_CUR_G) { if (cmp != -1) { - + mem_heap_free(heap); return(FALSE); } } else if (mode == PAGE_CUR_L) { if (cmp != 1) { - + mem_heap_free(heap); return(FALSE); } } @@ -591,7 +596,7 @@ btr_search_check_guess( if (can_only_compare_to_cursor_rec) { /* Since we could not determine if our guess is right just by looking at the record under the cursor, return FALSE */ - + mem_heap_free(heap); return(FALSE); } @@ -605,17 +610,15 @@ btr_search_check_guess( prev_rec = page_rec_get_prev(rec); if (prev_rec == page_get_infimum_rec(page)) { - - if (btr_page_get_prev(page, mtr) != FIL_NULL) { - - return(FALSE); - } - - return(TRUE); + mem_heap_free(heap); + return(btr_page_get_prev(page, mtr) == FIL_NULL); } + offsets = rec_reget_offsets(prev_rec, cursor->index, + offsets, n_unique, heap); cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec, - &match, &bytes); + offsets, &match, &bytes); + mem_heap_free(heap); if (mode == PAGE_CUR_GE) { if (cmp != 1) { @@ -636,6 +639,7 @@ btr_search_check_guess( next_rec = page_rec_get_next(rec); if (next_rec == page_get_supremum_rec(page)) { + mem_heap_free(heap); if (btr_page_get_next(page, mtr) == FIL_NULL) { @@ -647,8 +651,12 @@ btr_search_check_guess( return(FALSE); } - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &match, &bytes); - + offsets = rec_reget_offsets(next_rec, cursor->index, + offsets, n_unique, heap); + cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, + offsets, &match, &bytes); + mem_heap_free(heap); + if (mode == PAGE_CUR_LE) { if (cmp != -1) { @@ -1003,8 +1011,7 @@ static void btr_search_build_page_hash_index( /*=============================*/ - dict_index_t* index, /* in: index for which to build, or NULL if - not known */ + dict_index_t* index, /* in: index for which to build */ page_t* page, /* in: index page, s- or x-latched */ ulint n_fields,/* in: hash this many full fields */ ulint n_bytes,/* in: hash this many bytes from the next @@ -1024,7 +1031,11 @@ btr_search_build_page_hash_index( ulint* folds; rec_t** recs; ulint i; - + mem_heap_t* heap; + ulint* offsets; + + ut_ad(index); + block = buf_block_align(page); table = btr_search_sys->hash_index; @@ -1061,9 +1072,9 @@ btr_search_build_page_hash_index( return; } - if (index && (dict_index_get_n_unique_in_tree(index) < n_fields + if (dict_index_get_n_unique_in_tree(index) < n_fields || (dict_index_get_n_unique_in_tree(index) == n_fields - && n_bytes > 0))) { + && n_bytes > 0)) { return; } @@ -1072,6 +1083,7 @@ btr_search_build_page_hash_index( folds = mem_alloc(n_recs * sizeof(ulint)); recs = mem_alloc(n_recs * sizeof(rec_t*)); + heap = mem_heap_create(100); n_cached = 0; @@ -1082,18 +1094,19 @@ btr_search_build_page_hash_index( rec = page_get_infimum_rec(page); rec = page_rec_get_next(rec); + offsets = rec_get_offsets(rec, index, n_fields + (n_bytes > 0), heap); + if (rec != sup) { - ut_a(n_fields <= rec_get_n_fields(rec)); + ut_a(n_fields <= rec_offs_n_fields(offsets)); if (n_bytes > 0) { - ut_a(n_fields < rec_get_n_fields(rec)); + ut_a(n_fields < rec_offs_n_fields(offsets)); } } /* FIXME: in a mixed tree, all records may not have enough ordering fields: */ - - fold = rec_fold(rec, n_fields, n_bytes, tree_id); + fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id); if (side == BTR_SEARCH_LEFT_SIDE) { @@ -1117,7 +1130,10 @@ btr_search_build_page_hash_index( break; } - next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(next_rec, index, + offsets, n_fields + (n_bytes > 0), heap); + next_fold = rec_fold(next_rec, offsets, n_fields, + n_bytes, tree_id); if (fold != next_fold) { /* Insert an entry into the hash index */ @@ -1145,13 +1161,7 @@ btr_search_build_page_hash_index( if (block->is_hashed && ((block->curr_n_fields != n_fields) || (block->curr_n_bytes != n_bytes) || (block->curr_side != side))) { - - rw_lock_x_unlock(&btr_search_latch); - - mem_free(folds); - mem_free(recs); - - return; + goto exit_func; } block->is_hashed = TRUE; @@ -1166,10 +1176,12 @@ btr_search_build_page_hash_index( ha_insert_for_fold(table, folds[i], recs[i]); } +exit_func: rw_lock_x_unlock(&btr_search_latch); mem_free(folds); mem_free(recs); + mem_heap_free(heap); } /************************************************************************ @@ -1181,10 +1193,13 @@ parameters as page (this often happens when a page is split). */ void btr_search_move_or_delete_hash_entries( /*===================================*/ - page_t* new_page, /* in: records are copied to this page */ - page_t* page) /* in: index page from which records were - copied, and the copied records will be deleted - from this page */ + page_t* new_page, /* in: records are copied + to this page */ + page_t* page, /* in: index page from which + records were copied, and the + copied records will be deleted + from this page */ + dict_index_t* index) /* in: record descriptor */ { buf_block_t* block; buf_block_t* new_block; @@ -1194,6 +1209,7 @@ btr_search_move_or_delete_hash_entries( block = buf_block_align(page); new_block = buf_block_align(new_page); + ut_a(page_is_comp(page) == page_is_comp(new_page)); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); @@ -1224,8 +1240,8 @@ btr_search_move_or_delete_hash_entries( rw_lock_s_unlock(&btr_search_latch); ut_a(n_fields + n_bytes > 0); - - btr_search_build_page_hash_index(NULL, new_page, n_fields, + + btr_search_build_page_hash_index(index, new_page, n_fields, n_bytes, side); ut_a(n_fields == block->curr_n_fields); ut_a(n_bytes == block->curr_n_bytes); @@ -1253,6 +1269,7 @@ btr_search_update_hash_on_delete( ulint fold; dulint tree_id; ibool found; + mem_heap_t* heap; rec = btr_cur_get_rec(cursor); @@ -1272,9 +1289,11 @@ btr_search_update_hash_on_delete( table = btr_search_sys->hash_index; tree_id = cursor->index->tree->id; - - fold = rec_fold(rec, block->curr_n_fields, block->curr_n_bytes, - tree_id); + heap = mem_heap_create(100); + fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), block->curr_n_fields, + block->curr_n_bytes, tree_id); + mem_heap_free(heap); rw_lock_x_lock(&btr_search_latch); found = ha_search_and_delete_if_found(table, fold, rec); @@ -1355,6 +1374,8 @@ btr_search_update_hash_on_insert( ulint n_bytes; ulint side; ibool locked = FALSE; + mem_heap_t* heap; + ulint* offsets; table = btr_search_sys->hash_index; @@ -1383,15 +1404,22 @@ btr_search_update_hash_on_insert( next_rec = page_rec_get_next(ins_rec); page = buf_frame_align(rec); - - ins_fold = rec_fold(ins_rec, n_fields, n_bytes, tree_id); + heap = mem_heap_create(100); + offsets = rec_get_offsets(ins_rec, cursor->index, + ULINT_UNDEFINED, heap); + ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, tree_id); if (next_rec != page_get_supremum_rec(page)) { - next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(next_rec, cursor->index, + offsets, n_fields + (n_bytes > 0), heap); + next_fold = rec_fold(next_rec, offsets, n_fields, + n_bytes, tree_id); } if (rec != page_get_infimum_rec(page)) { - fold = rec_fold(rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(rec, cursor->index, + offsets, n_fields + (n_bytes > 0), heap); + fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id); } else { if (side == BTR_SEARCH_LEFT_SIDE) { @@ -1461,6 +1489,7 @@ check_next_rec: } function_exit: + mem_heap_free(heap); if (locked) { rw_lock_x_unlock(&btr_search_latch); } @@ -1470,9 +1499,10 @@ function_exit: Validates the search system. */ ibool -btr_search_validate(void) -/*=====================*/ +btr_search_validate( +/*================*/ /* out: TRUE if ok */ + dict_index_t* index) /* in: record descriptor */ { buf_block_t* block; page_t* page; @@ -1480,6 +1510,8 @@ btr_search_validate(void) ulint n_page_dumps = 0; ibool ok = TRUE; ulint i; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; rw_lock_x_lock(&btr_search_latch); @@ -1489,9 +1521,13 @@ btr_search_validate(void) while (node != NULL) { block = buf_block_align(node->data); page = buf_frame_align(node->data); + offsets = rec_reget_offsets((rec_t*) node->data, index, + offsets, block->curr_n_fields + + (block->curr_n_bytes > 0), heap); if (!block->is_hashed || node->fold != rec_fold((rec_t*)(node->data), + offsets, block->curr_n_fields, block->curr_n_bytes, btr_page_get_index_id(page))) { @@ -1507,12 +1543,13 @@ btr_search_validate(void) (ulong) ut_dulint_get_low(btr_page_get_index_id(page)), (ulong) node->fold, (ulong) rec_fold((rec_t*)(node->data), + offsets, block->curr_n_fields, block->curr_n_bytes, btr_page_get_index_id(page))); fputs("InnoDB: Record ", stderr); - rec_print(stderr, (rec_t*)(node->data)); + rec_print(stderr, (rec_t*)node->data, offsets); fprintf(stderr, "\nInnoDB: on that page." "Page mem address %p, is hashed %lu, n fields %lu, n bytes %lu\n" "side %lu\n", @@ -1536,6 +1573,7 @@ btr_search_validate(void) } rw_lock_x_unlock(&btr_search_latch); + mem_heap_free(heap); return(ok); } diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index 97ec1a1acd9..25ba19d0296 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -500,7 +500,7 @@ dtuple_convert_big_rec( ut_a(dtuple_check_typed_no_assert(entry)); - size = rec_get_converted_size(entry); + size = rec_get_converted_size(index, entry); if (size > 1000000000) { fprintf(stderr, @@ -524,9 +524,10 @@ dtuple_convert_big_rec( n_fields = 0; - while ((rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) - || rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE) { + while (rec_get_converted_size(index, entry) + >= ut_min(page_get_free_space_of_empty( + index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { longest = 0; for (i = dict_index_get_n_unique_in_tree(index); diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c index 714cf92bc65..9b8fb084e33 100644 --- a/innobase/data/data0type.c +++ b/innobase/data/data0type.c @@ -195,7 +195,7 @@ dtype_validate( ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL)); if (type->mtype == DATA_SYS) { - ut_a(type->prtype <= DATA_MIX_ID); + ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS); } return(TRUE); diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c index f156cf67a18..e500b92252f 100644 --- a/innobase/dict/dict0boot.c +++ b/innobase/dict/dict0boot.c @@ -158,7 +158,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_TABLES_ID, mtr); + DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -168,7 +168,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, - DICT_TABLE_IDS_ID, mtr); + DICT_TABLE_IDS_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -178,7 +178,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_COLUMNS_ID, mtr); + DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -188,7 +188,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_INDEXES_ID, mtr); + DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -198,7 +198,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_FIELDS_ID, mtr); + DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -254,7 +254,7 @@ dict_boot(void) /* Insert into the dictionary cache the descriptions of the basic system tables */ /*-------------------------*/ - table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE,8); + table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, FALSE); dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0); dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0); @@ -290,7 +290,7 @@ dict_boot(void) index->id = DICT_TABLE_IDS_ID; ut_a(dict_index_add_to_cache(table, index)); /*-------------------------*/ - table = dict_mem_table_create("SYS_COLUMNS",DICT_HDR_SPACE,7); + table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, FALSE); dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY,0,0,0); dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0); @@ -316,7 +316,7 @@ dict_boot(void) index->id = DICT_COLUMNS_ID; ut_a(dict_index_add_to_cache(table, index)); /*-------------------------*/ - table = dict_mem_table_create("SYS_INDEXES",DICT_HDR_SPACE,7); + table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, FALSE); dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0,0,0); dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0); @@ -349,7 +349,7 @@ dict_boot(void) index->id = DICT_INDEXES_ID; ut_a(dict_index_add_to_cache(table, index)); /*-------------------------*/ - table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE,3); + table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, FALSE); dict_mem_table_add_col(table, "INDEX_ID", DATA_BINARY, 0,0,0); dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0); diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index cbdc0aab53c..747a99ebdc9 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -84,7 +84,8 @@ dict_create_sys_tables_tuple( dfield = dtuple_get_nth_field(entry, 5); ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, table->mix_len); + mach_write_to_4(ptr, (table->mix_len & 0x7fffffff) | + ((ulint) table->comp << 31)); dfield_set_data(dfield, ptr, 4); /* 8: CLUSTER_NAME ---------------------*/ @@ -624,7 +625,7 @@ dict_create_index_tree_step( btr_pcur_move_to_next_user_rec(&pcur, &mtr); index->page_no = btr_create(index->type, index->space, index->id, - &mtr); + table->comp, &mtr); /* printf("Created a new index tree in space %lu root page %lu\n", index->space, index->page_no); */ @@ -660,8 +661,9 @@ dict_drop_index_tree( #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(dict_sys->mutex))); #endif /* UNIV_SYNC_DEBUG */ - - ptr = rec_get_nth_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); + + ut_a(!dict_sys->sys_indexes->comp); + ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); ut_ad(len == 4); @@ -673,8 +675,9 @@ dict_drop_index_tree( return; } - ptr = rec_get_nth_field(rec, DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); - + ptr = rec_get_nth_field_old(rec, + DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); + ut_ad(len == 4); space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); @@ -699,8 +702,8 @@ dict_drop_index_tree( root_page_no); */ btr_free_root(space, root_page_no, mtr); - page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, - FIL_NULL, mtr); + page_rec_write_index_page_no(rec, + DICT_SYS_INDEXES_PAGE_NO_FIELD, FIL_NULL, mtr); } /************************************************************************* diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c index 183c547ab2b..0aaa3a9a721 100644 --- a/innobase/dict/dict0dict.c +++ b/innobase/dict/dict0dict.c @@ -814,23 +814,22 @@ dict_table_add_to_cache( system columns. */ dict_mem_table_add_col(table, "DB_ROW_ID", DATA_SYS, - DATA_ROW_ID, 0, 0); + DATA_ROW_ID | DATA_NOT_NULL, DATA_ROW_ID_LEN, 0); #if DATA_ROW_ID != 0 #error "DATA_ROW_ID != 0" #endif dict_mem_table_add_col(table, "DB_TRX_ID", DATA_SYS, - DATA_TRX_ID, 0, 0); + DATA_TRX_ID | DATA_NOT_NULL, DATA_TRX_ID_LEN, 0); #if DATA_TRX_ID != 1 #error "DATA_TRX_ID != 1" #endif dict_mem_table_add_col(table, "DB_ROLL_PTR", DATA_SYS, - DATA_ROLL_PTR, 0, 0); + DATA_ROLL_PTR | DATA_NOT_NULL, DATA_ROLL_PTR_LEN, 0); #if DATA_ROLL_PTR != 2 #error "DATA_ROLL_PTR != 2" #endif - dict_mem_table_add_col(table, "DB_MIX_ID", DATA_SYS, - DATA_MIX_ID, 0, 0); + DATA_MIX_ID | DATA_NOT_NULL, DATA_MIX_ID_LEN, 0); #if DATA_MIX_ID != 3 #error "DATA_MIX_ID != 3" #endif @@ -1588,7 +1587,7 @@ dict_index_find_cols( /*********************************************************************** Adds a column to index. */ -UNIV_INLINE + void dict_index_add_col( /*===============*/ @@ -1604,6 +1603,34 @@ dict_index_add_col( field = dict_index_get_nth_field(index, index->n_def - 1); field->col = col; + field->fixed_len = dtype_get_fixed_size(&col->type); + + if (prefix_len && field->fixed_len > prefix_len) { + field->fixed_len = prefix_len; + } + + /* Long fixed-length fields that need external storage are treated as + variable-length fields, so that the extern flag can be embedded in + the length word. */ + + if (field->fixed_len > DICT_MAX_COL_PREFIX_LEN) { + field->fixed_len = 0; + } + + if (!(dtype_get_prtype(&col->type) & DATA_NOT_NULL)) { + index->n_nullable++; + } + + if (index->n_def > 1) { + const dict_field_t* field2 = + dict_index_get_nth_field(index, index->n_def - 2); + field->fixed_offs = (!field2->fixed_len || + field2->fixed_offs == ULINT_UNDEFINED) + ? ULINT_UNDEFINED + : field2->fixed_len + field2->fixed_offs; + } else { + field->fixed_offs = 0; + } } /*********************************************************************** @@ -3580,9 +3607,10 @@ dict_tree_find_index_low( && (table->type != DICT_TABLE_ORDINARY)) { /* Get the mix id of the record */ + ut_a(!table->comp); mix_id = mach_dulint_read_compressed( - rec_get_nth_field(rec, table->mix_len, &len)); + rec_get_nth_field_old(rec, table->mix_len, &len)); while (ut_dulint_cmp(table->mix_id, mix_id) != 0) { @@ -3715,7 +3743,8 @@ dict_tree_build_node_ptr( on non-leaf levels we remove the last field, which contains the page number of the child page */ - n_unique = rec_get_n_fields(rec); + ut_a(!ind->table->comp); + n_unique = rec_get_n_fields_old(rec); if (level > 0) { ut_a(n_unique > 1); @@ -3744,9 +3773,11 @@ dict_tree_build_node_ptr( field = dtuple_get_nth_field(tuple, n_unique); dfield_set_data(field, buf, 4); - dtype_set(dfield_get_type(field), DATA_SYS_CHILD, 0, 0, 0); + dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4, 0); - rec_copy_prefix_to_dtuple(tuple, rec, n_unique, heap); + rec_copy_prefix_to_dtuple(tuple, rec, ind, n_unique, heap); + dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple) | + REC_STATUS_NODE_PTR); ut_ad(dtuple_check_typed(tuple)); @@ -3763,27 +3794,26 @@ dict_tree_copy_rec_order_prefix( /* out: pointer to the prefix record */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to copy prefix */ + ulint* n_fields,/* out: number of fields copied */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size)/* in/out: buffer size */ { - dict_index_t* ind; - rec_t* order_rec; - ulint n_fields; - - ind = dict_tree_find_index_low(tree, rec); + dict_index_t* index; + ulint n; + + index = dict_tree_find_index_low(tree, rec); - n_fields = dict_index_get_n_unique_in_tree(ind); - if (tree->type & DICT_UNIVERSAL) { - - n_fields = rec_get_n_fields(rec); + ut_a(!index->table->comp); + n = rec_get_n_fields_old(rec); + } else { + n = dict_index_get_n_unique_in_tree(index); } - order_rec = rec_copy_prefix_to_buf(rec, n_fields, buf, buf_size); - - return(order_rec); -} + *n_fields = n; + return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size)); +} /************************************************************************** Builds a typed data tuple out of a physical record. */ @@ -3794,21 +3824,21 @@ dict_tree_build_data_tuple( /* out, own: data tuple */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to build data tuple */ + ulint n_fields,/* in: number of data fields */ mem_heap_t* heap) /* in: memory heap where tuple created */ { dtuple_t* tuple; dict_index_t* ind; - ulint n_fields; ind = dict_tree_find_index_low(tree, rec); - n_fields = rec_get_n_fields(rec); + ut_ad(ind->table->comp || n_fields <= rec_get_n_fields_old(rec)); tuple = dtuple_create(heap, n_fields); dict_index_copy_types(tuple, ind, n_fields); - rec_copy_prefix_to_dtuple(tuple, rec, n_fields, heap); + rec_copy_prefix_to_dtuple(tuple, rec, ind, n_fields, heap); ut_ad(dtuple_check_typed(tuple)); @@ -3826,6 +3856,27 @@ dict_index_calc_min_rec_len( ulint sum = 0; ulint i; + if (index->table->comp) { + ulint nullable = 0; + sum = REC_N_NEW_EXTRA_BYTES; + for (i = 0; i < dict_index_get_n_fields(index); i++) { + dtype_t*t = dict_index_get_nth_type(index, i); + ulint size = dtype_get_fixed_size(t); + sum += size; + if (!size) { + size = dtype_get_len(t); + sum += size < 128 ? 1 : 2; + } + if (!(dtype_get_prtype(t) & DATA_NOT_NULL)) + nullable++; + } + + /* round the NULL flags up to full bytes */ + sum += (nullable + 7) / 8; + + return(sum); + } + for (i = 0; i < dict_index_get_n_fields(index); i++) { sum += dtype_get_fixed_size(dict_index_get_nth_type(index, i)); } @@ -3836,7 +3887,7 @@ dict_index_calc_min_rec_len( sum += dict_index_get_n_fields(index); } - sum += REC_N_EXTRA_BYTES; + sum += REC_N_OLD_EXTRA_BYTES; return(sum); } diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c index 8fc6eb9141e..c80f8346abf 100644 --- a/innobase/dict/dict0load.c +++ b/innobase/dict/dict0load.c @@ -55,6 +55,7 @@ dict_get_first_table_name_in_db( sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!sys_tables->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -77,7 +78,7 @@ loop: return(NULL); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); if (len < strlen(name) || ut_memcmp(name, field, strlen(name)) != 0) { @@ -90,7 +91,7 @@ loop: return(NULL); } - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, sys_tables->comp)) { /* We found one */ @@ -163,9 +164,9 @@ loop: return; } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, sys_tables->comp)) { /* We found one */ @@ -229,6 +230,7 @@ dict_check_tablespaces_or_store_max_id( sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!sys_tables->comp); btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); @@ -255,15 +257,15 @@ loop: return; } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, sys_tables->comp)) { /* We found one */ char* name = mem_strdupl((char*) field, len); - field = rec_get_nth_field(rec, 9, &len); + field = rec_get_nth_field_old(rec, 9, &len); ut_a(len == 4); space_id = mach_read_from_4(field); @@ -328,6 +330,7 @@ dict_load_columns( sys_columns = dict_table_get_low("SYS_COLUMNS"); sys_index = UT_LIST_GET_FIRST(sys_columns->indexes); + ut_a(!sys_columns->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -346,28 +349,27 @@ dict_load_columns( ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - ut_a(!rec_get_deleted_flag(rec)); - - field = rec_get_nth_field(rec, 0, &len); + ut_a(!rec_get_deleted_flag(rec, sys_columns->comp)); + + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_ad(len == 4); ut_a(i == mach_read_from_4(field)); ut_a(0 == ut_strcmp("NAME", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_columns), 4))->name)); + dict_index_get_nth_field(sys_index, 4))->name)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); name = mem_heap_strdupl(heap, (char*) field, len); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); mtype = mach_read_from_4(field); - field = rec_get_nth_field(rec, 6, &len); + field = rec_get_nth_field_old(rec, 6, &len); prtype = mach_read_from_4(field); if (dtype_is_non_binary_string_type(mtype, prtype) @@ -379,15 +381,14 @@ dict_load_columns( data_mysql_default_charset_coll); } - field = rec_get_nth_field(rec, 7, &len); + field = rec_get_nth_field_old(rec, 7, &len); col_len = mach_read_from_4(field); ut_a(0 == ut_strcmp("PREC", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_columns), 8))->name)); + dict_index_get_nth_field(sys_index, 8))->name)); - field = rec_get_nth_field(rec, 8, &len); + field = rec_get_nth_field_old(rec, 8, &len); prec = mach_read_from_4(field); dict_mem_table_add_col(table, name, mtype, prtype, col_len, @@ -452,6 +453,7 @@ dict_load_fields( sys_fields = dict_table_get_low("SYS_FIELDS"); sys_index = UT_LIST_GET_FIRST(sys_fields->indexes); + ut_a(!sys_fields->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -469,15 +471,15 @@ dict_load_fields( rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, sys_fields->comp)) { dict_load_report_deleted_index(table->name, i); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); ut_a(ut_memcmp(buf, field, len) == 0); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); /* The next field stores the field position in the index @@ -503,10 +505,9 @@ dict_load_fields( ut_a(0 == ut_strcmp("COL_NAME", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_fields), 4))->name)); + dict_index_get_nth_field(sys_index, 4))->name)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); dict_mem_index_add_field(index, mem_heap_strdupl(heap, (char*) field, len), 0, prefix_len); @@ -565,6 +566,7 @@ dict_load_indexes( sys_indexes = dict_table_get_low("SYS_INDEXES"); sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes); + ut_a(!sys_indexes->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -585,14 +587,14 @@ dict_load_indexes( rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); if (ut_memcmp(buf, field, len) != 0) { break; } - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, table->comp)) { dict_load_report_deleted_index(table->name, ULINT_UNDEFINED); @@ -602,33 +604,31 @@ dict_load_indexes( return(FALSE); } - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_ad(len == 8); id = mach_read_from_8(field); ut_a(0 == ut_strcmp("NAME", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_indexes), 4))->name)); - - field = rec_get_nth_field(rec, 4, &name_len); + dict_index_get_nth_field(sys_index, 4))->name)); + + field = rec_get_nth_field_old(rec, 4, &name_len); name_buf = mem_heap_strdupl(heap, (char*) field, name_len); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); n_fields = mach_read_from_4(field); - field = rec_get_nth_field(rec, 6, &len); + field = rec_get_nth_field_old(rec, 6, &len); type = mach_read_from_4(field); - field = rec_get_nth_field(rec, 7, &len); + field = rec_get_nth_field_old(rec, 7, &len); space = mach_read_from_4(field); ut_a(0 == ut_strcmp("PAGE_NO", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_indexes), 8))->name)); + dict_index_get_nth_field(sys_index, 8))->name)); - field = rec_get_nth_field(rec, 8, &len); + field = rec_get_nth_field_old(rec, 8, &len); page_no = mach_read_from_4(field); if (page_no == FIL_NULL) { @@ -731,6 +731,7 @@ dict_load_table( sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!sys_tables->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -743,7 +744,7 @@ dict_load_table( rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { + || rec_get_deleted_flag(rec, sys_tables->comp)) { /* Not found */ btr_pcur_close(&pcur); @@ -753,7 +754,7 @@ dict_load_table( return(NULL); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); /* Check if the table name in record is the searched one */ if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) { @@ -767,10 +768,9 @@ dict_load_table( ut_a(0 == ut_strcmp("SPACE", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_tables), 9))->name)); + dict_index_get_nth_field(sys_index, 9))->name)); - field = rec_get_nth_field(rec, 9, &len); + field = rec_get_nth_field_old(rec, 9, &len); space = mach_read_from_4(field); /* Check if the tablespace exists and has the right name */ @@ -792,43 +792,45 @@ dict_load_table( ut_a(0 == ut_strcmp("N_COLS", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_tables), 4))->name)); + dict_index_get_nth_field(sys_index, 4))->name)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); n_cols = mach_read_from_4(field); - table = dict_mem_table_create(name, space, n_cols); + /* table->comp will be initialized later, in this function */ + table = dict_mem_table_create(name, space, n_cols, FALSE); table->ibd_file_missing = ibd_file_missing; ut_a(0 == ut_strcmp("ID", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_tables), 3))->name)); + dict_index_get_nth_field(sys_index, 3))->name)); - field = rec_get_nth_field(rec, 3, &len); + field = rec_get_nth_field_old(rec, 3, &len); table->id = mach_read_from_8(field); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); table->type = mach_read_from_4(field); if (table->type == DICT_TABLE_CLUSTER_MEMBER) { ut_error; #if 0 /* clustered tables have not been implemented yet */ - field = rec_get_nth_field(rec, 6, &len); + field = rec_get_nth_field_old(rec, 6, &len); table->mix_id = mach_read_from_8(field); - field = rec_get_nth_field(rec, 8, &len); + field = rec_get_nth_field_old(rec, 8, &len); table->cluster_name = mem_heap_strdupl(heap, (char*) field, len); #endif } + /* The high-order bit of MIX_LEN is the "compact format" flag */ + field = rec_get_nth_field_old(rec, 7, &len); + table->comp = !!(mach_read_from_1(field) & 0x80); + if ((table->type == DICT_TABLE_CLUSTER) || (table->type == DICT_TABLE_CLUSTER_MEMBER)) { - - field = rec_get_nth_field(rec, 7, &len); - table->mix_len = mach_read_from_4(field); + + table->mix_len = mach_read_from_4(field) & 0x7fffffff; } btr_pcur_close(&pcur); @@ -906,6 +908,7 @@ dict_load_table_on_id( sys_tables = dict_sys->sys_tables; sys_table_ids = dict_table_get_next_index( dict_table_get_first_index(sys_tables)); + ut_a(!sys_tables->comp); heap = mem_heap_create(256); tuple = dtuple_create(heap, 1); @@ -922,7 +925,7 @@ dict_load_table_on_id( rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { + || rec_get_deleted_flag(rec, sys_tables->comp)) { /* Not found */ btr_pcur_close(&pcur); @@ -937,7 +940,7 @@ dict_load_table_on_id( table ID and NAME */ rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); /* Check if the table id in record is the one searched for */ @@ -951,7 +954,7 @@ dict_load_table_on_id( } /* Now we get the table name from the record */ - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); /* Load the table definition to memory */ table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len)); @@ -1019,6 +1022,7 @@ dict_load_foreign_cols( sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); + ut_a(!sys_foreign_cols->comp); tuple = dtuple_create(foreign->heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -1033,21 +1037,21 @@ dict_load_foreign_cols( rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - ut_a(!rec_get_deleted_flag(rec)); - - field = rec_get_nth_field(rec, 0, &len); + ut_a(!rec_get_deleted_flag(rec, sys_foreign_cols->comp)); + + field = rec_get_nth_field_old(rec, 0, &len); ut_a(len == ut_strlen(id)); ut_a(ut_memcmp(id, field, len) == 0); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); ut_a(i == mach_read_from_4(field)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); foreign->foreign_col_names[i] = mem_heap_strdupl(foreign->heap, (char*) field, len); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); foreign->referenced_col_names[i] = mem_heap_strdupl(foreign->heap, (char*) field, len); @@ -1091,6 +1095,7 @@ dict_load_foreign( sys_foreign = dict_table_get_low("SYS_FOREIGN"); sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); + ut_a(!sys_foreign->comp); tuple = dtuple_create(heap2, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -1103,7 +1108,7 @@ dict_load_foreign( rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { + || rec_get_deleted_flag(rec, sys_foreign->comp)) { /* Not found */ fprintf(stderr, @@ -1117,7 +1122,7 @@ dict_load_foreign( return(DB_ERROR); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); /* Check if the id in record is the searched one */ if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { @@ -1140,7 +1145,8 @@ dict_load_foreign( foreign = dict_mem_foreign_create(); - foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len)); + foreign->n_fields = + mach_read_from_4(rec_get_nth_field_old(rec, 5, &len)); ut_a(len == 4); @@ -1151,11 +1157,11 @@ dict_load_foreign( foreign->id = mem_heap_strdup(foreign->heap, id); - field = rec_get_nth_field(rec, 3, &len); + field = rec_get_nth_field_old(rec, 3, &len); foreign->foreign_table_name = mem_heap_strdupl(foreign->heap, (char*) field, len); - - field = rec_get_nth_field(rec, 4, &len); + + field = rec_get_nth_field_old(rec, 4, &len); foreign->referenced_table_name = mem_heap_strdupl(foreign->heap, (char*) field, len); @@ -1224,6 +1230,7 @@ dict_load_foreigns( return(DB_ERROR); } + ut_a(!sys_foreign->comp); mtr_start(&mtr); /* Get the secondary index based on FOR_NAME from table @@ -1255,7 +1262,7 @@ loop: name and a foreign constraint ID */ rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); /* Check if the table name in the record is the one searched for; the following call does the comparison in the latin1_swedish_ci @@ -1278,13 +1285,13 @@ loop: goto next_rec; } - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, sys_foreign->comp)) { goto next_rec; } /* Now we get a foreign key constraint id */ - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); id = mem_heap_strdupl(heap, (char*) field, len); btr_pcur_store_position(&pcur, &mtr); diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c index 1d45585aac1..48b9f28d292 100644 --- a/innobase/dict/dict0mem.c +++ b/innobase/dict/dict0mem.c @@ -35,7 +35,8 @@ dict_mem_table_create( the table is placed; this parameter is ignored if the table is made a member of a cluster */ - ulint n_cols) /* in: number of columns */ + ulint n_cols, /* in: number of columns */ + ibool comp) /* in: TRUE=compact page format */ { dict_table_t* table; mem_heap_t* heap; @@ -54,6 +55,7 @@ dict_mem_table_create( table->space = space; table->ibd_file_missing = FALSE; table->tablespace_discarded = FALSE; + table->comp = comp; table->n_def = 0; table->n_cols = n_cols + DATA_N_SYS_COLS; table->mem_fix = 0; @@ -110,7 +112,8 @@ dict_mem_cluster_create( { dict_table_t* cluster; - cluster = dict_mem_table_create(name, space, n_cols); + /* Clustered tables cannot work with the compact record format. */ + cluster = dict_mem_table_create(name, space, n_cols, FALSE); cluster->type = DICT_TABLE_CLUSTER; cluster->mix_len = mix_len; @@ -197,7 +200,7 @@ dict_mem_index_create( index->name = mem_heap_strdup(heap, index_name); index->table_name = table_name; index->table = NULL; - index->n_def = 0; + index->n_def = index->n_nullable = 0; index->n_fields = n_fields; index->fields = mem_heap_alloc(heap, 1 + n_fields * sizeof(dict_field_t)); diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index 6d3ffcd63f3..dea48117e00 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -1579,30 +1579,38 @@ fil_op_write_log( mtr_t* mtr) /* in: mini-transaction handle */ { byte* log_ptr; + ulint len; + + log_ptr = mlog_open(mtr, 11 + 2); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } - log_ptr = mlog_open(mtr, 30); - log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0, log_ptr, mtr); /* Let us store the strings as null-terminated for easier readability and handling */ - mach_write_to_2(log_ptr, ut_strlen(name) + 1); + len = strlen(name) + 1; + + mach_write_to_2(log_ptr, len); log_ptr += 2; - mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, (byte*) name, ut_strlen(name) + 1); + mlog_catenate_string(mtr, (byte*) name, len); if (type == MLOG_FILE_RENAME) { - log_ptr = mlog_open(mtr, 30); - mach_write_to_2(log_ptr, ut_strlen(new_name) + 1); + ulint len = strlen(new_name) + 1; + log_ptr = mlog_open(mtr, 2 + len); + ut_a(log_ptr); + mach_write_to_2(log_ptr, len); log_ptr += 2; - mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, (byte*) new_name, - ut_strlen(new_name) + 1); + mlog_catenate_string(mtr, (byte*) new_name, len); } } #endif diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index e1621cc2765..ef8e70646c6 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -910,7 +910,7 @@ fsp_header_init( if (space == 0) { fsp_fill_free_list(FALSE, space, header, mtr); btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space, - ut_dulint_add(DICT_IBUF_ID_MIN, space), mtr); + ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr); } else { fsp_fill_free_list(TRUE, space, header, mtr); } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index 2191cdc0ee6..c7ca03f9901 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -46,7 +46,7 @@ Note that contary to what we planned in the 1990's, there will only be one insert buffer tree, and that is in the system tablespace of InnoDB. 1. The first field is the space id. -2. The second field is a one-byte marker which differentiates records from +2. The second field is a one-byte marker (0) which differentiates records from the < 4.1.x storage format. 3. The third field is the page number. 4. The fourth field contains the type info, where we have also added 2 bytes to @@ -55,7 +55,14 @@ insert buffer tree, and that is in the system tablespace of InnoDB. can use in the binary search on the index page in the ibuf merge phase. 5. The rest of the fields contain the fields of the actual index record. -*/ +In versions >= 5.0.3: + +The first byte of the fourth field is an additional marker (0) if the record +is in the compact format. The presence of this marker can be detected by +looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE. + +The high-order bit of the character set field in the type info is the +"nullable" flag for the field. */ /* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM @@ -525,8 +532,8 @@ ibuf_data_init_for_space( ibuf_exit(); sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space); - - table = dict_mem_table_create(buf, space, 2); + /* use old-style record format for the insert buffer */ + table = dict_mem_table_create(buf, space, 2, FALSE); dict_mem_table_add_col(table, "PAGE_NO", DATA_BINARY, 0, 0, 0); dict_mem_table_add_col(table, "TYPES", DATA_BINARY, 0, 0, 0); @@ -1049,20 +1056,20 @@ ibuf_rec_get_page_no( ulint len; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(rec) > 2); + ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); if (len == 1) { /* This is of the >= 4.1.x record format */ ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field(rec, 2, &len); + field = rec_get_nth_field_old(rec, 2, &len); } else { ut_a(trx_doublewrite_must_reset_space_ids); ut_a(!trx_sys_multiple_tablespace_format); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); } ut_a(len == 4); @@ -1084,15 +1091,15 @@ ibuf_rec_get_space( ulint len; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(rec) > 2); + ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); if (len == 1) { /* This is of the >= 4.1.x record format */ ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_a(len == 4); return(mach_read_from_4(field)); @@ -1104,6 +1111,161 @@ ibuf_rec_get_space( return(0); } +/************************************************************************ +Creates a dummy index for inserting a record to a non-clustered index. +*/ +static +dict_index_t* +ibuf_dummy_index_create( +/*====================*/ + /* out: dummy index */ + ulint n, /* in: number of fields */ + ibool comp) /* in: TRUE=use compact record format */ +{ + dict_table_t* table; + dict_index_t* index; + table = dict_mem_table_create("IBUF_DUMMY", + DICT_HDR_SPACE, n, comp); + index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", + DICT_HDR_SPACE, 0, n); + index->table = table; + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + index->cached = TRUE; + return(index); +} +/************************************************************************ +Add a column to the dummy index */ +static +void +ibuf_dummy_index_add_col( +/*====================*/ + dict_index_t* index, /* in: dummy index */ + dtype_t* type) /* in: the data type of the column */ +{ + ulint i = index->table->n_def; + dict_mem_table_add_col(index->table, "DUMMY", + dtype_get_mtype(type), + dtype_get_prtype(type), + dtype_get_len(type), + dtype_get_prec(type)); + dict_index_add_col(index, + dict_table_get_nth_col(index->table, i), 0, 0); +} +/************************************************************************ +Deallocates a dummy index for inserting a record to a non-clustered index. +*/ +static +void +ibuf_dummy_index_free( +/*====================*/ + dict_index_t* index) /* in: dummy index */ +{ + dict_table_t* table = index->table; + mem_heap_free(index->heap); + mutex_free(&(table->autoinc_mutex)); + mem_heap_free(table->heap); +} + +/************************************************************************* +Builds the entry to insert into a non-clustered index when we have the +corresponding record in an ibuf index. */ +static +dtuple_t* +ibuf_build_entry_from_ibuf_rec( +/*===========================*/ + /* out, own: entry to insert to + a non-clustered index; NOTE that + as we copy pointers to fields in + ibuf_rec, the caller must hold a + latch to the ibuf_rec page as long + as the entry is used! */ + rec_t* ibuf_rec, /* in: record in an insert buffer */ + mem_heap_t* heap, /* in: heap where built */ + dict_index_t** pindex) /* out, own: dummy index that + describes the entry */ +{ + dtuple_t* tuple; + dfield_t* field; + ulint n_fields; + byte* types; + const byte* data; + ulint len; + ulint i; + dict_index_t* index; + + data = rec_get_nth_field_old(ibuf_rec, 1, &len); + + if (len > 1) { + /* This a < 4.1.x format record */ + + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 2; + tuple = dtuple_create(heap, n_fields); + types = rec_get_nth_field_old(ibuf_rec, 1, &len); + + ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); + + dfield_set_data(field, data, len); + + dtype_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); + } + + *pindex = ibuf_dummy_index_create(n_fields, FALSE); + return(tuple); + } + + /* This a >= 4.1.x format record */ + + ut_a(trx_sys_multiple_tablespace_format); + ut_a(*data == 0); + ut_a(rec_get_n_fields_old(ibuf_rec) > 4); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + + tuple = dtuple_create(heap, n_fields); + + types = rec_get_nth_field_old(ibuf_rec, 3, &len); + + ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); + index = ibuf_dummy_index_create(n_fields, + len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + /* compact record format */ + len--; + ut_a(*types == 0); + types++; + } + + ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); + + dfield_set_data(field, data, len); + + dtype_new_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + ibuf_dummy_index_add_col(index, dfield_get_type(field)); + } + + *pindex = index; + return(tuple); +} + /************************************************************************ Returns the space taken by a stored non-clustered index entry if converted to an index record. */ @@ -1125,43 +1287,60 @@ ibuf_rec_get_volume( ulint i; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(ibuf_rec) > 2); - - data = rec_get_nth_field(ibuf_rec, 1, &len); + ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); + + data = rec_get_nth_field_old(ibuf_rec, 1, &len); if (len > 1) { - /* < 4.1.x format record */ + /* < 4.1.x format record */ ut_a(trx_doublewrite_must_reset_space_ids); ut_a(!trx_sys_multiple_tablespace_format); - n_fields = rec_get_n_fields(ibuf_rec) - 2; + n_fields = rec_get_n_fields_old(ibuf_rec) - 2; - types = rec_get_nth_field(ibuf_rec, 1, &len); + types = rec_get_nth_field_old(ibuf_rec, 1, &len); ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); } else { - /* >= 4.1.x format record */ + /* >= 4.1.x format record */ ut_a(trx_sys_multiple_tablespace_format); + ut_a(*data == 0); + + types = rec_get_nth_field_old(ibuf_rec, 3, &len); + + ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); + if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + /* compact record format */ + ulint volume; + dict_index_t* dummy_index; + mem_heap_t* heap = mem_heap_create(500); + dtuple_t* entry = + ibuf_build_entry_from_ibuf_rec( + ibuf_rec, heap, &dummy_index); + volume = rec_get_converted_size(dummy_index, entry); + ibuf_dummy_index_free(dummy_index); + mem_heap_free(heap); + return(volume + page_dir_calc_reserved_space(1)); + } + + n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + new_format = TRUE; - - n_fields = rec_get_n_fields(ibuf_rec) - 4; - - types = rec_get_nth_field(ibuf_rec, 3, &len); } for (i = 0; i < n_fields; i++) { if (new_format) { - data = rec_get_nth_field(ibuf_rec, i + 4, &len); + data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); dtype_new_read_for_order_and_null_size(&dtype, - types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); } else { - data = rec_get_nth_field(ibuf_rec, i + 2, &len); + data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); dtype_read_for_order_and_null_size(&dtype, - types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); + types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); } if (len == UNIV_SQL_NULL) { @@ -1187,6 +1366,7 @@ ibuf_entry_build( must be kept because we copy pointers to its fields */ dtuple_t* entry, /* in: entry for a non-clustered index */ + ibool comp, /* in: flag: TRUE=compact record format */ ulint space, /* in: space id */ ulint page_no,/* in: index page number where entry should be inserted */ @@ -1202,11 +1382,14 @@ ibuf_entry_build( /* Starting from 4.1.x, we have to build a tuple whose (1) first field is the space id, - (2) the second field a single marker byte to tell that this + (2) the second field a single marker byte (0) to tell that this is a new format record, (3) the third contains the page number, and (4) the fourth contains the relevent type information of each data - field, + field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is + (a) 0 for b-trees in the old format, and + (b) 1 for b-trees in the compact format, the first byte of the field + being the marker (0); (5) and the rest of the fields are copied from entry. All fields in the tuple are ordered like the type binary in our insert buffer tree. */ @@ -1247,10 +1430,15 @@ ibuf_entry_build( dfield_set_data(field, buf, 4); + ut_ad(comp == 0 || comp == 1); /* Store the type info in buf2, and add the fields from entry to tuple */ buf2 = mem_heap_alloc(heap, n_fields - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + + comp); + if (comp) { + *buf2++ = 0; /* write the compact format indicator */ + } for (i = 0; i < n_fields; i++) { /* We add 4 below because we have the 4 extra fields at the start of an ibuf record */ @@ -1268,8 +1456,13 @@ ibuf_entry_build( field = dtuple_get_nth_field(tuple, 3); + if (comp) { + buf2--; + } + dfield_set_data(field, buf2, n_fields - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + + comp); /* Set all the types in the new tuple binary */ dtuple_set_types_binary(tuple, n_fields + 4); @@ -1277,88 +1470,6 @@ ibuf_entry_build( return(tuple); } -/************************************************************************* -Builds the entry to insert into a non-clustered index when we have the -corresponding record in an ibuf index. */ -static -dtuple_t* -ibuf_build_entry_from_ibuf_rec( -/*===========================*/ - /* out, own: entry to insert to - a non-clustered index; NOTE that - as we copy pointers to fields in - ibuf_rec, the caller must hold a - latch to the ibuf_rec page as long - as the entry is used! */ - rec_t* ibuf_rec, /* in: record in an insert buffer */ - mem_heap_t* heap) /* in: heap where built */ -{ - dtuple_t* tuple; - dfield_t* field; - ulint n_fields; - byte* types; - byte* data; - ulint len; - ulint i; - - data = rec_get_nth_field(ibuf_rec, 1, &len); - - if (len > 1) { - /* This a < 4.1.x format record */ - - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - n_fields = rec_get_n_fields(ibuf_rec) - 2; - tuple = dtuple_create(heap, n_fields); - types = rec_get_nth_field(ibuf_rec, 1, &len); - - ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field(ibuf_rec, i + 2, &len); - - dfield_set_data(field, data, len); - - dtype_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); - } - - return(tuple); - } - - /* This a >= 4.1.x format record */ - - ut_a(trx_sys_multiple_tablespace_format); - - ut_a(rec_get_n_fields(ibuf_rec) > 4); - - n_fields = rec_get_n_fields(ibuf_rec) - 4; - - tuple = dtuple_create(heap, n_fields); - - types = rec_get_nth_field(ibuf_rec, 3, &len); - - ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field(ibuf_rec, i + 4, &len); - - dfield_set_data(field, data, len); - - dtype_new_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - } - - return(tuple); -} - /************************************************************************* Builds a search tuple used to search buffered inserts for an index page. This is for < 4.1.x format records */ @@ -2047,8 +2158,7 @@ loop: mutex_exit(&ibuf_mutex); sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), - space_ids, space_versions, page_nos, - &n_stored); + space_ids, space_versions, page_nos, &n_stored); #ifdef UNIV_IBUF_DEBUG /* fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", sync, n_stored, sum_sizes); */ @@ -2344,6 +2454,7 @@ ibuf_update_max_tablespace_id(void) ibuf_data = fil_space_get_ibuf_data(0); ibuf_index = ibuf_data->index; + ut_a(!ibuf_index->table->comp); ibuf_enter(); @@ -2360,7 +2471,7 @@ ibuf_update_max_tablespace_id(void) } else { rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_a(len == 4); @@ -2479,7 +2590,7 @@ ibuf_insert_low( ibuf_enter(); } - entry_size = rec_get_converted_size(entry); + entry_size = rec_get_converted_size(index, entry); heap = mem_heap_create(512); @@ -2487,7 +2598,8 @@ ibuf_insert_low( the first fields and the type information for other fields, and which will be inserted to the insert buffer. */ - ibuf_entry = ibuf_entry_build(entry, space, page_no, heap); + ibuf_entry = ibuf_entry_build(entry, index->table->comp, + space, page_no, heap); /* Open a cursor to the insert buffer tree to calculate if we can add the new entry to it without exceeding the free space limit for the @@ -2532,8 +2644,8 @@ ibuf_insert_low( do_merge = TRUE; ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur), - space_ids, space_versions, page_nos, - &n_stored); + space_ids, space_versions, + page_nos, &n_stored); goto function_exit; } @@ -2656,8 +2768,8 @@ ibuf_insert( ut_a(!(index->type & DICT_CLUSTERED)); - if (rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) { + if (rec_get_converted_size(index, entry) + >= page_get_free_space_of_empty(index->table->comp) / 2) { return(FALSE); } @@ -2692,6 +2804,7 @@ ibuf_insert_to_index_page( dtuple_t* entry, /* in: buffered entry to insert */ page_t* page, /* in: index page where the buffered entry should be placed */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mtr */ { page_cur_t page_cur; @@ -2699,17 +2812,28 @@ ibuf_insert_to_index_page( rec_t* rec; page_t* bitmap_page; ulint old_bits; + mem_heap_t* heap; ut_ad(ibuf_inside()); ut_ad(dtuple_check_typed(entry)); - if (rec_get_n_fields(page_rec_get_next(page_get_infimum_rec(page))) - != dtuple_get_n_fields(entry)) { - - fprintf(stderr, + if (index->table->comp != page_is_comp(page)) { + fputs( "InnoDB: Trying to insert a record from the insert buffer to an index page\n" -"InnoDB: but the number of fields does not match!\n"); +"InnoDB: but the 'compact' flag does not match!\n", stderr); + goto dump; + } + heap = mem_heap_create(100); + rec = page_rec_get_next(page_get_infimum_rec(page)); + + if (rec_offs_n_fields(rec_get_offsets(rec, index, ULINT_UNDEFINED, + heap)) != dtuple_get_n_fields(entry)) { + mem_heap_free(heap); + fputs( +"InnoDB: Trying to insert a record from the insert buffer to an index page\n" +"InnoDB: but the number of fields does not match!\n", stderr); + dump: buf_page_print(page); dtuple_print(stderr, entry); @@ -2723,31 +2847,35 @@ ibuf_insert_to_index_page( return; } - low_match = page_cur_search(page, entry, PAGE_CUR_LE, &page_cur); + mem_heap_free(heap); + low_match = page_cur_search(page, index, entry, + PAGE_CUR_LE, &page_cur); if (low_match == dtuple_get_n_fields(entry)) { rec = page_cur_get_rec(&page_cur); - btr_cur_del_unmark_for_ibuf(rec, mtr); + btr_cur_del_unmark_for_ibuf(rec, index, mtr); } else { - rec = page_cur_tuple_insert(&page_cur, entry, mtr); + rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); if (rec == NULL) { /* If the record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, index, mtr); - page_cur_search(page, entry, PAGE_CUR_LE, &page_cur); + page_cur_search(page, index, entry, + PAGE_CUR_LE, &page_cur); /* This time the record must fit */ - if (!page_cur_tuple_insert(&page_cur, entry, mtr)) { + if (!page_cur_tuple_insert(&page_cur, entry, + index, mtr)) { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: Error: Insert buffer insert fails; page free %lu, dtuple size %lu\n", (ulong) page_get_max_insert_size(page, 1), - (ulong) rec_get_converted_size(entry)); + (ulong) rec_get_converted_size(index, entry)); fputs("InnoDB: Cannot insert index record ", stderr); dtuple_print(stderr, entry); @@ -2836,11 +2964,12 @@ ibuf_delete_rec( "InnoDB: ibuf record inserted to page %lu\n", (ulong) page_no); fflush(stderr); - rec_print(stderr, btr_pcur_get_rec(pcur)); - rec_print(stderr, pcur->old_rec); + rec_print_old(stderr, btr_pcur_get_rec(pcur)); + rec_print_old(stderr, pcur->old_rec); dtuple_print(stderr, search_tuple); - rec_print(stderr, page_rec_get_next(btr_pcur_get_rec(pcur))); + rec_print_old(stderr, + page_rec_get_next(btr_pcur_get_rec(pcur))); fflush(stderr); btr_pcur_commit_specify_mtr(pcur, mtr); @@ -3075,7 +3204,7 @@ loop: if (corruption_noticed) { fputs("InnoDB: Discarding record\n ", stderr); - rec_print(stderr, ibuf_rec); + rec_print_old(stderr, ibuf_rec); fputs("\n from the insert buffer!\n\n", stderr); } else if (page) { /* Now we have at pcur a record which should be @@ -3083,19 +3212,22 @@ loop: copies pointers to fields in ibuf_rec, and we must keep the latch to the ibuf_rec page until the insertion is finished! */ - - dulint max_trx_id = page_get_max_trx_id( + dict_index_t* dummy_index; + dulint max_trx_id = page_get_max_trx_id( buf_frame_align(ibuf_rec)); page_update_max_trx_id(page, max_trx_id); - entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, heap); + entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, + heap, &dummy_index); #ifdef UNIV_IBUF_DEBUG - volume += rec_get_converted_size(entry) + volume += rec_get_converted_size(dummy_index, entry) + page_dir_calc_reserved_space(1); ut_a(volume <= 4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); #endif - ibuf_insert_to_index_page(entry, page, &mtr); + ibuf_insert_to_index_page(entry, page, + dummy_index, &mtr); + ibuf_dummy_index_free(dummy_index); } n_inserts++; diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h index 8606fcd2a5c..0b19e64d4e0 100644 --- a/innobase/include/btr0btr.h +++ b/innobase/include/btr0btr.h @@ -155,7 +155,8 @@ ulint btr_node_ptr_get_child_page_no( /*===========================*/ /* out: child node address */ - rec_t* rec); /* in: node pointer record */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /**************************************************************** Creates the root node for a new index tree. */ @@ -167,6 +168,7 @@ btr_create( ulint type, /* in: type of the index */ ulint space, /* in: space where created */ dulint index_id,/* in: index id */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr); /* in: mini-transaction handle */ /**************************************************************** Frees a B-tree except the root page, which MUST be freed after this @@ -210,8 +212,9 @@ Reorganizes an index page. */ void btr_page_reorganize( /*================*/ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Decides if the page should be split at the convergence point of inserts converging to left. */ @@ -273,6 +276,7 @@ void btr_set_min_rec_mark( /*=================*/ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes on the upper level the node pointer to a page. */ @@ -332,6 +336,7 @@ btr_parse_set_min_rec_mark( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** @@ -340,11 +345,12 @@ Parses a redo log record of reorganizing a page. */ byte* btr_parse_page_reorganize( /*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /****************************************************************** Gets the number of pages in a B-tree. */ diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic index b0aa0756307..1d1f97d3668 100644 --- a/innobase/include/btr0btr.ic +++ b/innobase/include/btr0btr.ic @@ -183,17 +183,18 @@ ulint btr_node_ptr_get_child_page_no( /*===========================*/ /* out: child node address */ - rec_t* rec) /* in: node pointer record */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; byte* field; ulint len; ulint page_no; - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); /* The child address is in the last field */ - field = rec_get_nth_field(rec, n_fields - 1, &len); + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); ut_ad(len == 4); diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index f1334656d53..0a8d8ceaeb7 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -34,7 +34,7 @@ page_cur_t* btr_cur_get_page_cur( /*=================*/ /* out: pointer to page cursor component */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the record pointer of a tree cursor. */ UNIV_INLINE @@ -42,14 +42,14 @@ rec_t* btr_cur_get_rec( /*============*/ /* out: pointer to record */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Invalidates a tree cursor by setting record pointer to NULL. */ UNIV_INLINE void btr_cur_invalidate( /*===============*/ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the page of a tree cursor. */ UNIV_INLINE @@ -57,7 +57,7 @@ page_t* btr_cur_get_page( /*=============*/ /* out: pointer to page */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the tree of a cursor. */ UNIV_INLINE @@ -65,7 +65,7 @@ dict_tree_t* btr_cur_get_tree( /*=============*/ /* out: tree */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Positions a tree cursor at a given record. */ UNIV_INLINE @@ -283,8 +283,9 @@ only used by the insert buffer insert merge mechanism. */ void btr_cur_del_unmark_for_ibuf( /*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record to delete unmark */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Tries to compress a page of the tree on the leaf level. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid @@ -361,10 +362,11 @@ Parses a redo log record of updating a record in-place. */ byte* btr_cur_parse_update_in_place( /*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + dict_index_t* index); /* in: index corresponding to page */ /******************************************************************** Parses the redo log record for delete marking or unmarking of a clustered index record. */ @@ -372,10 +374,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_clust_rec( /*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page); /* in: page or NULL */ /******************************************************************** Parses the redo log record for delete marking or unmarking of a secondary index record. */ @@ -383,10 +386,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_sec_rec( /*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page); /* in: page or NULL */ /*********************************************************************** Estimates the number of rows in a given index range. */ @@ -417,9 +421,10 @@ to free the field. */ void btr_cur_mark_extern_inherited_fields( /*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - upd_t* update, /* in: update vector */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update, /* in: update vector */ + mtr_t* mtr); /* in: mtr */ /*********************************************************************** The complement of the previous function: in an update entry may inherit some externally stored fields from a record. We must mark them as inherited @@ -456,6 +461,7 @@ btr_store_big_rec_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ mtr_t* local_mtr); /* in: mtr containing the latch to @@ -496,6 +502,7 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free inherited fields */ @@ -510,6 +517,7 @@ btr_rec_copy_externally_stored_field( /*=================================*/ /* out: the field copied to heap */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint no, /* in: field number */ ulint* len, /* out: length of the field */ mem_heap_t* heap); /* in: mem heap */ @@ -540,10 +548,10 @@ ulint btr_push_update_extern_fields( /*==========================*/ /* out: number of values stored in ext_vect */ - ulint* ext_vect, /* in: array of ulints, must be preallocated - to have place for all fields in rec */ - rec_t* rec, /* in: record */ - upd_t* update); /* in: update vector */ + ulint* ext_vect,/* in: array of ulints, must be preallocated + to have space for all fields in rec */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update);/* in: update vector or NULL */ /*######################################################################*/ diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic index a3a04b60c45..dcad3e9e14d 100644 --- a/innobase/include/btr0cur.ic +++ b/innobase/include/btr0cur.ic @@ -134,17 +134,15 @@ btr_cur_can_delete_without_compress( /* out: TRUE if can be deleted without recommended compression */ btr_cur_t* cursor, /* in: btr cursor */ + ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/ mtr_t* mtr) /* in: mtr */ { - ulint rec_size; page_t* page; ut_ad(mtr_memo_contains(mtr, buf_block_align( btr_cur_get_page(cursor)), MTR_MEMO_PAGE_X_FIX)); - rec_size = rec_get_size(btr_cur_get_rec(cursor)); - page = btr_cur_get_page(cursor); if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT) diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h index 81f19af4d40..6384222be51 100644 --- a/innobase/include/btr0pcur.h +++ b/innobase/include/btr0pcur.h @@ -462,6 +462,7 @@ struct btr_pcur_struct{ contains an initial segment of the latest record cursor was positioned either on, before, or after */ + ulint old_n_fields; /* number of fields in old_rec */ ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on whether cursor was on, before, or after the diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h index ce4140ecf92..73cd95d1464 100644 --- a/innobase/include/btr0sea.h +++ b/innobase/include/btr0sea.h @@ -77,8 +77,10 @@ parameters as page (this often happens when a page is split). */ void btr_search_move_or_delete_hash_entries( /*===================================*/ - page_t* new_page, /* in: records are copied to this page */ - page_t* page); /* in: index page */ + page_t* new_page, /* in: records are copied + to this page */ + page_t* page, /* in: index page */ + dict_index_t* index); /* in: record descriptor */ /************************************************************************ Drops a page hash index. */ @@ -128,9 +130,10 @@ btr_search_update_hash_on_delete( Validates the search system. */ ibool -btr_search_validate(void); -/*=====================*/ - +btr_search_validate( +/*================*/ + /* out: TRUE if ok */ + dict_index_t* index); /* in: record descriptor */ /* Search info directions */ #define BTR_SEA_NO_DIRECTION 1 diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index 946b646ffbf..0b92ffbe7f1 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -8,6 +8,17 @@ Created 1/16/1996 Heikki Tuuri #include "mach0data.h" +/********************************************************************** +Determines whether the given character set is of variable length. + +NOTE: the prototype of this function is copied from ha_innodb.cc! If you change +this function, you MUST change also the prototype here! */ +extern +ibool +innobase_is_mb_cset( +/*================*/ + ulint cset); /* in: MySQL charset-collation code */ + /************************************************************************* Sets a data type structure. */ UNIV_INLINE @@ -149,8 +160,10 @@ dtype_new_store_for_order_and_null_size( bytes where we store the info */ dtype_t* type) /* in: type struct */ { - ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif + buf[0] = (byte)(type->mtype & 0xFFUL); if (type->prtype & DATA_BINARY_TYPE) { @@ -166,10 +179,12 @@ dtype_new_store_for_order_and_null_size( mach_write_to_2(buf + 2, type->len & 0xFFFFUL); + ut_ad(dtype_get_charset_coll(type->prtype) < 256); mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); - /* Note that the second last byte is left unused, because the - charset-collation code is always < 256 */ + if (type->prtype & DATA_NOT_NULL) { + buf[4] |= 128; + } } /************************************************************************** @@ -211,20 +226,26 @@ dtype_new_read_for_order_and_null_size( { ulint charset_coll; - ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif type->mtype = buf[0] & 63; type->prtype = buf[1]; if (buf[0] & 128) { - type->prtype = type->prtype | DATA_BINARY_TYPE; + type->prtype |= DATA_BINARY_TYPE; + } + + if (buf[4] & 128) { + type->prtype |= DATA_NOT_NULL; } type->len = mach_read_from_2(buf + 2); mach_read_from_2(buf + 4); - charset_coll = mach_read_from_2(buf + 4); + charset_coll = mach_read_from_2(buf + 4) & 0x7fff; if (dtype_is_string_type(type->mtype)) { ut_a(charset_coll < 256); @@ -257,23 +278,39 @@ dtype_get_fixed_size( mtype = dtype_get_mtype(type); switch (mtype) { + case DATA_SYS: +#ifdef UNIV_DEBUG + switch (type->prtype & DATA_MYSQL_TYPE_MASK) { + default: + ut_ad(0); + return(0); + case DATA_ROW_ID: + ut_ad(type->len == DATA_ROW_ID_LEN); + break; + case DATA_TRX_ID: + ut_ad(type->len == DATA_TRX_ID_LEN); + break; + case DATA_ROLL_PTR: + ut_ad(type->len == DATA_ROLL_PTR_LEN); + break; + case DATA_MIX_ID: + ut_ad(type->len == DATA_MIX_ID_LEN); + break; + } +#endif /* UNIV_DEBUG */ case DATA_CHAR: case DATA_FIXBINARY: case DATA_INT: case DATA_FLOAT: case DATA_DOUBLE: case DATA_MYSQL: - return(dtype_get_len(type)); - - case DATA_SYS: if (type->prtype == DATA_ROW_ID) { - return(DATA_ROW_ID_LEN); - } else if (type->prtype == DATA_TRX_ID) { - return(DATA_TRX_ID_LEN); - } else if (type->prtype == DATA_ROLL_PTR) { - return(DATA_ROLL_PTR_LEN); - } else { - return(0); + if ((type->prtype & DATA_BINARY_TYPE) + || !innobase_is_mb_cset( + dtype_get_charset_coll( + type->prtype))) { + return(dtype_get_len(type)); } + /* fall through for variable-length charsets */ case DATA_VARCHAR: case DATA_BINARY: case DATA_DECIMAL: diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index ca632691450..a2399a81ca9 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -639,6 +639,16 @@ dict_index_get_sys_col_pos( dict_index_t* index, /* in: index */ ulint type); /* in: DATA_ROW_ID, ... */ /*********************************************************************** +Adds a column to index. */ + +void +dict_index_add_col( +/*===============*/ + dict_index_t* index, /* in: index */ + dict_col_t* col, /* in: column */ + ulint order, /* in: order criterion */ + ulint prefix_len); /* in: column prefix length */ +/*********************************************************************** Copies types of fields contained in index to tuple. */ void @@ -657,6 +667,7 @@ dict_index_rec_get_sys_col( /*=======================*/ /* out: system column value */ dict_index_t* index, /* in: clustered index describing the record */ + const ulint* offsets,/* in: offsets returned by rec_get_offsets() */ ulint type, /* in: column type: DATA_ROLL_PTR, ... */ rec_t* rec); /* in: record */ /************************************************************************* @@ -770,6 +781,7 @@ dict_tree_copy_rec_order_prefix( /* out: pointer to the prefix record */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to copy prefix */ + ulint* n_fields,/* out: number of fields copied */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size);/* in/out: buffer size */ @@ -782,6 +794,7 @@ dict_tree_build_data_tuple( /* out, own: data tuple */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to build data tuple */ + ulint n_fields,/* in: number of data fields */ mem_heap_t* heap); /* in: memory heap where tuple created */ /************************************************************************* Gets the space id of the root of the index tree. */ diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 85e4aaf1a05..7f754e316b3 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -168,7 +168,7 @@ dict_table_get_sys_col( col = dict_table_get_nth_col(table, table->n_cols - DATA_N_SYS_COLS + sys); ut_ad(col->type.mtype == DATA_SYS); - ut_ad(col->type.prtype == sys); + ut_ad(col->type.prtype == (sys | DATA_NOT_NULL)); return(col); } @@ -322,6 +322,7 @@ dict_index_rec_get_sys_col( /*=======================*/ /* out: system column value */ dict_index_t* index, /* in: clustered index describing the record */ + const ulint* offsets,/* in: offsets returned by rec_get_offsets() */ ulint type, /* in: column type: DATA_ROLL_PTR, ... */ rec_t* rec) /* in: record */ { @@ -331,12 +332,13 @@ dict_index_rec_get_sys_col( ut_ad(index); ut_ad(index->type & DICT_CLUSTERED); - + ut_ad(rec_offs_validate(rec, index, offsets)); + pos = dict_index_get_sys_col_pos(index, type); ut_ad(pos != ULINT_UNDEFINED); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); if (type == DATA_ROLL_PTR) { ut_ad(len == 7); @@ -677,7 +679,10 @@ dict_is_mixed_table_rec( byte* mix_id_field; ulint len; - mix_id_field = rec_get_nth_field(rec, table->mix_len, &len); + ut_ad(!table->comp); + + mix_id_field = rec_get_nth_field_old(rec, + table->mix_len, &len); if ((len != table->mix_id_len) || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) { diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 1e496a25477..670b3445a55 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -54,7 +54,8 @@ dict_mem_table_create( of the table is placed; this parameter is ignored if the table is made a member of a cluster */ - ulint n_cols); /* in: number of columns */ + ulint n_cols, /* in: number of columns */ + ibool comp); /* in: TRUE=compact page format */ /************************************************************************** Creates a cluster memory object. */ @@ -171,6 +172,13 @@ struct dict_field_struct{ DICT_MAX_COL_PREFIX_LEN; NOTE that in the UTF-8 charset, MySQL sets this to 3 * the prefix len in UTF-8 chars */ + ulint fixed_len; /* 0 or the fixed length of the + column if smaller than + DICT_MAX_COL_PREFIX_LEN */ + ulint fixed_offs; /* offset to the field, or + ULINT_UNDEFINED if it is not fixed + within the record (due to preceding + variable-length fields) */ }; /* Data structure for an index tree */ @@ -225,6 +233,7 @@ struct dict_index_struct{ ulint n_def; /* number of fields defined so far */ ulint n_fields;/* number of fields in the index */ dict_field_t* fields; /* array of field descriptions */ + ulint n_nullable;/* number of nullable fields */ UT_LIST_NODE_T(dict_index_t) indexes;/* list of indexes of the table */ dict_tree_t* tree; /* index tree struct */ @@ -320,6 +329,7 @@ struct dict_table_struct{ ibool tablespace_discarded;/* this flag is set TRUE when the user calls DISCARD TABLESPACE on this table, and reset to FALSE in IMPORT TABLESPACE */ + ibool comp; /* flag: TRUE=compact page format */ hash_node_t name_hash; /* hash chain node */ hash_node_t id_hash; /* hash chain node */ ulint n_def; /* number of columns defined so far */ diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index f8435e14d97..d642fe46fef 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -47,7 +47,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index); /* in: secondary index */ + dict_index_t* index, /* in: secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Checks if some transaction has an implicit x-lock on a record in a clustered index. */ @@ -58,7 +59,8 @@ lock_clust_rec_some_has_impl( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /***************************************************************** Resets the lock bits for a single record. Releases transactions waiting for lock requests here. */ @@ -275,6 +277,7 @@ lock_clust_rec_modify_check_and_lock( does nothing */ rec_t* rec, /* in: record which should be modified */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr); /* in: query thread */ /************************************************************************* Checks if locks of other transactions prevent an immediate modify @@ -308,6 +311,7 @@ lock_sec_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: secondary index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -333,6 +337,7 @@ lock_clust_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -350,6 +355,7 @@ lock_clust_rec_cons_read_sees( rec_t* rec, /* in: user record which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ read_view_t* view); /* in: consistent read view */ /************************************************************************* Checks that a non-clustered index record is seen in a consistent read. */ @@ -463,13 +469,32 @@ lock_rec_hash( ulint space, /* in: space */ ulint page_no);/* in: page number */ /************************************************************************* -Gets the table covered by an IX table lock. */ +Gets the source table of an ALTER TABLE transaction. The table must be +covered by an IX or IS table lock. */ dict_table_t* -lock_get_ix_table( -/*==============*/ - /* out: the table covered by the lock */ - lock_t* lock); /* in: table lock */ +lock_get_src_table( +/*===============*/ + /* out: the source table of transaction, + if it is covered by an IX or IS table lock; + dest if there is no source table, and + NULL if the transaction is locking more than + two tables or an inconsistency is found */ + trx_t* trx, /* in: transaction */ + dict_table_t* dest, /* in: destination of ALTER TABLE */ + ulint* mode); /* out: lock mode of the source table */ +/************************************************************************* +Determine if the given table is exclusively "owned" by the given +transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC +on the table. */ + +ibool +lock_is_table_exclusive( +/*=================*/ + /* out: TRUE if table is only locked by trx, + with LOCK_IX, and possibly LOCK_AUTO_INC */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction */ /************************************************************************* Checks that a transaction id is sensible, i.e., not in the future. */ @@ -480,6 +505,7 @@ lock_check_trx_id_sanity( dulint trx_id, /* in: trx id */ rec_t* rec, /* in: user record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ ibool has_kernel_mutex);/* in: TRUE if the caller owns the kernel mutex */ /************************************************************************* @@ -490,7 +516,8 @@ lock_rec_queue_validate( /*====================*/ /* out: TRUE if ok */ rec_t* rec, /* in: record to look at */ - dict_index_t* index); /* in: index, or NULL if not known */ + dict_index_t* index, /* in: index, or NULL if not known */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Prints info of a table lock. */ diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic index fabc9256401..c7a71bb45d8 100644 --- a/innobase/include/lock0lock.ic +++ b/innobase/include/lock0lock.ic @@ -60,7 +60,8 @@ lock_clust_rec_some_has_impl( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { dulint trx_id; @@ -70,7 +71,7 @@ lock_clust_rec_some_has_impl( ut_ad(index->type & DICT_CLUSTERED); ut_ad(page_rec_is_user_rec(rec)); - trx_id = row_get_rec_trx_id(rec, index); + trx_id = row_get_rec_trx_id(rec, index, offsets); if (trx_is_active(trx_id)) { /* The modifying or inserting transaction is active */ diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h index 9c9c6f696e8..c0636ea1e1e 100644 --- a/innobase/include/mtr0log.h +++ b/innobase/include/mtr0log.h @@ -11,6 +11,7 @@ Created 12/7/1995 Heikki Tuuri #include "univ.i" #include "mtr0mtr.h" +#include "dict0types.h" /************************************************************ Writes 1 - 4 bytes to a file page buffered in the buffer pool. @@ -173,6 +174,38 @@ mlog_parse_string( byte* page); /* in: page where to apply the log record, or NULL */ +/************************************************************ +Opens a buffer for mlog, writes the initial log record and, +if needed, the field lengths of an index. Reserves space +for further log entries. The log entry must be closed with +mtr_close(). */ + +byte* +mlog_open_and_write_index( +/*======================*/ + /* out: buffer, NULL if log mode + MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + byte* rec, /* in: index record or page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: log item type */ + ulint size); /* in: requested buffer size in bytes + (if 0, calls mlog_close() and returns NULL) */ + +/************************************************************ +Parses a log record written by mlog_open_and_write_index. */ + +byte* +mlog_parse_index( +/*=============*/ + /* out: parsed record end, + NULL if not a complete record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + /* out: new value of log_ptr */ + ibool comp, /* in: TRUE=compact record format */ + dict_index_t** index); /* out, own: dummy index */ + /* Insert, update, and maybe other functions may use this value to define an extra mlog buffer size for variable size data */ #define MLOG_BUF_MARGIN 256 diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h index e8c68a91dad..071279d5259 100644 --- a/innobase/include/mtr0mtr.h +++ b/innobase/include/mtr0mtr.h @@ -102,7 +102,31 @@ flag value must give the length also! */ file rename */ #define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd file deletion */ -#define MLOG_BIGGEST_TYPE ((byte)35) /* biggest value (used in +#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record + as the predefined minimum + record */ +#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact + index page */ +#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */ +#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39) + /* mark compact clustered index + record deleted */ +#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index + record deleted */ +#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record, + preserves record field sizes */ +#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record + from a page */ +#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list + end on index page */ +#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list + start on index page */ +#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45) + /* copy compact record list end + to a new created index page */ +#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */ + +#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in asserts) */ /******************************************************************* diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h index c85669ed4df..a693931968e 100644 --- a/innobase/include/page0cur.h +++ b/innobase/include/page0cur.h @@ -128,7 +128,8 @@ page_cur_tuple_insert( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ + dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /*************************************************************** Inserts a record next to page cursor. Returns pointer to inserted record if @@ -142,6 +143,7 @@ page_cur_rec_insert( otherwise */ page_cur_t* cursor, /* in: a page cursor */ rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /*************************************************************** Inserts a record next to page cursor. Returns pointer to inserted record if @@ -155,9 +157,9 @@ page_cur_insert_rec_low( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - ulint data_size,/* in: data size of tuple */ - rec_t* rec, /* in: pointer to a physical record or NULL */ + dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ + dict_index_t* index, /* in: record descriptor */ + rec_t* rec, /* in: pointer to a physical record or NULL */ mtr_t* mtr); /* in: mini-transaction handle */ /***************************************************************** Copies records from page to a newly created page, from a given record onward, @@ -166,10 +168,11 @@ including that record. Infimum and supremum records are not copied. */ void page_copy_rec_list_end_to_created_page( /*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: first record to copy */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /*************************************************************** Deletes a record at the page cursor. The cursor is moved to the next record after the deleted one. */ @@ -178,6 +181,7 @@ void page_cur_delete_rec( /*================*/ page_cur_t* cursor, /* in: a page cursor */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /******************************************************************** Searches the right position for a page cursor. */ @@ -187,6 +191,7 @@ page_cur_search( /*============*/ /* out: number of matched fields on the left */ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -198,6 +203,7 @@ void page_cur_search_with_match( /*=======================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -229,34 +235,37 @@ Parses a log record of a record insert on a page. */ byte* page_cur_parse_insert_rec( /*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /************************************************************** Parses a log record of copying a record list end to a new created page. */ byte* page_parse_copy_rec_list_to_created_page( /*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** Parses log record of a record delete on a page. */ byte* page_cur_parse_delete_rec( /*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /* Index page cursor */ diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic index 39f8ab11513..03010fbd766 100644 --- a/innobase/include/page0cur.ic +++ b/innobase/include/page0cur.ic @@ -143,7 +143,7 @@ UNIV_INLINE void page_cur_move_to_prev( /*==================*/ - page_cur_t* cur) /* in: cursor; must not before first */ + page_cur_t* cur) /* in: page cursor, not before first */ { ut_ad(!page_cur_is_before_first(cur)); @@ -158,6 +158,7 @@ page_cur_search( /*============*/ /* out: number of matched fields on the left */ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -170,7 +171,7 @@ page_cur_search( ut_ad(dtuple_check_typed(tuple)); - page_cur_search_with_match(page, tuple, mode, + page_cur_search_with_match(page, index, tuple, mode, &up_matched_fields, &up_matched_bytes, &low_matched_fields, @@ -190,16 +191,11 @@ page_cur_tuple_insert( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ + dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { - ulint data_size; - - ut_ad(dtuple_check_typed(tuple)); - - data_size = dtuple_get_data_size(tuple); - - return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr)); + return(page_cur_insert_rec_low(cursor, tuple, index, NULL, mtr)); } /*************************************************************** @@ -214,8 +210,9 @@ page_cur_rec_insert( otherwise */ page_cur_t* cursor, /* in: a page cursor */ rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { - return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr)); + return(page_cur_insert_rec_low(cursor, NULL, index, rec, mtr)); } diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h index 969313614e3..d3ef8214eb6 100644 --- a/innobase/include/page0page.h +++ b/innobase/include/page0page.h @@ -37,7 +37,8 @@ typedef byte page_header_t; /*-----------------------------*/ #define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */ #define PAGE_HEAP_TOP 2 /* pointer to record heap top */ -#define PAGE_N_HEAP 4 /* number of records in the heap */ +#define PAGE_N_HEAP 4 /* number of records in the heap, + bit 15=flag: new-style compact page format */ #define PAGE_FREE 6 /* pointer to start of page free record list */ #define PAGE_GARBAGE 8 /* number of bytes in deleted records */ #define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or @@ -79,15 +80,24 @@ typedef byte page_header_t; #define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE) /* start of data on the page */ -#define PAGE_INFIMUM (PAGE_DATA + 1 + REC_N_EXTRA_BYTES) - /* offset of the page infimum record on the - page */ -#define PAGE_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8) - /* offset of the page supremum record on the - page */ -#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9) +#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES) + /* offset of the page infimum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8) + /* offset of the page supremum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9) /* offset of the page supremum record end on - the page */ + an old-style page */ +#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES) + /* offset of the page infimum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8) + /* offset of the page supremum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8) + /* offset of the page supremum record end on + a new-style compact page */ /*-----------------------------*/ /* Directions of cursor movement */ @@ -233,6 +243,7 @@ page_cmp_dtuple_rec_with_match( be page infimum or supremum, in which case matched-parameter values below are not affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns contains the value for current comparison */ @@ -259,6 +270,22 @@ page_rec_get_n_recs_before( /* out: number of records */ rec_t* rec); /* in: the physical record */ /***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + page_t* page); /* in: index page */ +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in: index page */ + ulint n_heap);/* in: number of records */ +/***************************************************************** Gets the number of dir slots in directory. */ UNIV_INLINE ulint @@ -267,6 +294,15 @@ page_dir_get_n_slots( /* out: number of slots */ page_t* page); /* in: index page */ /***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + /* out: number of slots */ + page_t* page, /* in: index page */ + ulint n_slots);/* in: number of slots */ +/***************************************************************** Gets pointer to nth directory slot. */ UNIV_INLINE page_dir_slot_t* @@ -333,7 +369,16 @@ ulint page_dir_find_owner_slot( /*=====================*/ /* out: the directory slot number */ - rec_t* rec); /* in: the physical record */ + rec_t* rec); /* in: the physical record */ +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ibool +page_is_comp( +/*=========*/ + /* out: TRUE if the page is in compact format + FALSE if it is in old-style format */ + page_t* page); /* in: index page */ /**************************************************************** Gets the pointer to the next record on the page. */ UNIV_INLINE @@ -359,9 +404,10 @@ UNIV_INLINE rec_t* page_rec_get_prev( /*==============*/ - /* out: pointer to previous record */ - rec_t* rec); /* in: pointer to record, must not be page - infimum */ + /* out: pointer to previous record */ + rec_t* rec); /* in: pointer to record, + must not be page infimum */ + /**************************************************************** TRUE if the record is a user record on the page. */ UNIV_INLINE @@ -446,9 +492,11 @@ page_get_max_insert_size_after_reorganize( Calculates free space if a page is emptied. */ UNIV_INLINE ulint -page_get_free_space_of_empty(void); -/*==============================*/ - /* out: free space */ +page_get_free_space_of_empty( +/*=========================*/ + /* out: free space */ + ibool comp) /* in: TRUE=compact page format */ + __attribute__((const)); /**************************************************************** Returns the sum of the sizes of the records in the record list excluding the infimum and supremum records. */ @@ -464,20 +512,23 @@ Allocates a block of memory from an index page. */ byte* page_mem_alloc( /*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - ulint* heap_no);/* out: this contains the heap number - of the allocated record if allocation succeeds */ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in: index page */ + ulint need, /* in: number of bytes needed */ + dict_index_t* index, /* in: record descriptor */ + ulint* heap_no);/* out: this contains the heap number + of the allocated record + if allocation succeeds */ /**************************************************************** Puts a record to free list. */ UNIV_INLINE void page_mem_free( /*==========*/ - page_t* page, /* in: index page */ - rec_t* rec); /* in: pointer to the (origin of) record */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index); /* in: record descriptor */ /************************************************************** The index page creation function. */ @@ -487,7 +538,8 @@ page_create( /* out: pointer to the page */ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr); /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp); /* in: TRUE=compact page format */ /***************************************************************** Differs from page_copy_rec_list_end, because this function does not touch the lock table and max trx id on page. */ @@ -495,10 +547,11 @@ touch the lock table and max trx id on page. */ void page_copy_rec_list_end_no_locks( /*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Copies records from page to new_page, from the given record onward, including that record. Infimum and supremum records are not copied. @@ -507,10 +560,11 @@ The records are copied to the start of the record list on new_page. */ void page_copy_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Copies records from page to new_page, up to the given record, NOT including that record. Infimum and supremum records are not copied. @@ -519,10 +573,11 @@ The records are copied to the end of the record list on new_page. */ void page_copy_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ @@ -530,14 +585,15 @@ The infimum and supremum records are not deleted. */ void page_delete_rec_list_end( /*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED - if not known */ - ulint size, /* in: the sum of the sizes of the records in the end - of the chain to delete, or ULINT_UNDEFINED if not - known */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + ulint n_recs, /* in: number of records to delete, + or ULINT_UNDEFINED if not known */ + ulint size, /* in: the sum of the sizes of the + records in the end of the chain to + delete, or ULINT_UNDEFINED if not known */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes records from page, up to the given record, NOT including that record. Infimum and supremum records are not deleted. */ @@ -545,9 +601,10 @@ that record. Infimum and supremum records are not deleted. */ void page_delete_rec_list_start( /*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Moves record list end to another page. Moved records include split_rec. */ @@ -555,10 +612,11 @@ split_rec. */ void page_move_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Moves record list start to another page. Moved records do not include split_rec. */ @@ -566,10 +624,11 @@ split_rec. */ void page_move_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record not to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /******************************************************************** Splits a directory slot which owns too many records. */ @@ -595,13 +654,16 @@ Parses a log record of a record list end or start deletion. */ byte* page_parse_delete_rec_list( /*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE or - MLOG_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE, + MLOG_LIST_START_DELETE, + MLOG_COMP_LIST_END_DELETE or + MLOG_COMP_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** Parses a redo log record of creating a page. */ @@ -611,6 +673,7 @@ page_parse_create( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ /**************************************************************** @@ -620,7 +683,8 @@ the index page context. */ void page_rec_print( /*===========*/ - rec_t* rec); + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: record descriptor */ /******************************************************************* This is used to print the contents of the directory for debugging purposes. */ @@ -637,8 +701,9 @@ debugging purposes. */ void page_print_list( /*============*/ - page_t* page, /* in: index page */ - ulint pr_n); /* in: print n first and n last entries */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint pr_n); /* in: print n first and n last entries */ /******************************************************************* Prints the info in a page header. */ @@ -653,9 +718,12 @@ debugging purposes. */ void page_print( /*======*/ - page_t* page, /* in: index page */ - ulint dn, /* in: print dn first and last entries in directory */ - ulint rn); /* in: print rn first and last records on page */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint dn, /* in: print dn first and last entries + in directory */ + ulint rn); /* in: print rn first and last records + in directory */ /******************************************************************* The following is used to validate a record on a page. This function differs from rec_validate as it can also check the n_owned field and @@ -664,8 +732,9 @@ the heap_no field. */ ibool page_rec_validate( /*==============*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: record on the page */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Checks that the first directory slot points to the infimum record and the last to the supremum. This function is intended to track if the diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic index 3d2bf3b090e..1d5ea337031 100644 --- a/innobase/include/page0page.ic +++ b/innobase/include/page0page.ic @@ -73,7 +73,8 @@ page_header_set_field( { ut_ad(page); ut_ad(field <= PAGE_N_RECS); - ut_ad(val < UNIV_PAGE_SIZE); + ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE); + ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); mach_write_to_2(page + PAGE_HEADER + field, val); } @@ -162,7 +163,11 @@ page_get_infimum_rec( { ut_ad(page); - return(page + PAGE_INFIMUM); + if (page_is_comp(page)) { + return(page + PAGE_NEW_INFIMUM); + } else { + return(page + PAGE_OLD_INFIMUM); + } } /**************************************************************** @@ -176,7 +181,11 @@ page_get_supremum_rec( { ut_ad(page); - return(page + PAGE_SUPREMUM); + if (page_is_comp(page)) { + return(page + PAGE_NEW_SUPREMUM); + } else { + return(page + PAGE_OLD_SUPREMUM); + } } /**************************************************************** @@ -309,6 +318,7 @@ page_cmp_dtuple_rec_with_match( be page infimum or supremum, in which case matched-parameter values below are not affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns contains the value for current comparison */ @@ -320,6 +330,7 @@ page_cmp_dtuple_rec_with_match( page_t* page; ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); page = buf_frame_align(rec); @@ -328,7 +339,7 @@ page_cmp_dtuple_rec_with_match( } else if (rec == page_get_supremum_rec(page)) { return(-1); } else { - return(cmp_dtuple_rec_with_match(dtuple, rec, + return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, matched_fields, matched_bytes)); } @@ -358,6 +369,45 @@ page_dir_get_n_slots( { return(page_header_get_field(page, PAGE_N_DIR_SLOTS)); } +/***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + /* out: number of slots */ + page_t* page, /* in: index page */ + ulint n_slots)/* in: number of slots */ +{ + page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots); +} + +/***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + page_t* page) /* in: index page */ +{ + return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff); +} + +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in: index page */ + ulint n_heap) /* in: number of records */ +{ + ut_ad(n_heap < 0x8000); + + page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 & + page_header_get_field(page, PAGE_N_HEAP))); +} /***************************************************************** Gets pointer to nth directory slot. */ @@ -369,7 +419,7 @@ page_dir_get_nth_slot( page_t* page, /* in: index page */ ulint n) /* in: position */ { - ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n); + ut_ad(page_dir_get_n_slots(page) > n); return(page + UNIV_PAGE_SIZE - PAGE_DIR - (n + 1) * PAGE_DIR_SLOT_SIZE); @@ -431,7 +481,8 @@ page_dir_slot_get_n_owned( /* out: number of records */ page_dir_slot_t* slot) /* in: page directory slot */ { - return(rec_get_n_owned(page_dir_slot_get_rec(slot))); + return(rec_get_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(buf_frame_align(slot)))); } /******************************************************************* @@ -444,7 +495,8 @@ page_dir_slot_set_n_owned( ulint n) /* in: number of records owned by the slot */ { - rec_set_n_owned(page_dir_slot_get_rec(slot), n); + rec_set_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(buf_frame_align(slot)), n); } /**************************************************************** @@ -461,6 +513,19 @@ page_dir_calc_reserved_space( / PAGE_DIR_SLOT_MIN_N_OWNED); } +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ibool +page_is_comp( +/*=========*/ + /* out: TRUE if the page is in compact format + FALSE if it is in old-style format */ + page_t* page) /* in: index page */ +{ + return(!!(page_header_get_field(page, PAGE_N_HEAP) & 0x8000)); +} + /**************************************************************** Gets the pointer to the next record on the page. */ UNIV_INLINE @@ -477,7 +542,7 @@ page_rec_get_next( page = buf_frame_align(rec); - offs = rec_get_next_offs(rec); + offs = rec_get_next_offs(rec, page_is_comp(page)); if (offs >= UNIV_PAGE_SIZE) { fprintf(stderr, @@ -513,6 +578,7 @@ page_rec_set_next( infimum */ { page_t* page; + ulint offs; ut_ad(page_rec_check(rec)); ut_a((next == NULL) @@ -523,11 +589,13 @@ page_rec_set_next( ut_ad(rec != page_get_supremum_rec(page)); ut_ad(next != page_get_infimum_rec(page)); - if (next == NULL) { - rec_set_next_offs(rec, 0); + if (next) { + offs = (ulint) (next - page); } else { - rec_set_next_offs(rec, (ulint)(next - page)); + offs = 0; } + + rec_set_next_offs(rec, page_is_comp(page), offs); } /**************************************************************** @@ -545,6 +613,7 @@ page_rec_get_prev( rec_t* rec2; rec_t* prev_rec = NULL; page_t* page; + ibool comp; ut_ad(page_rec_check(rec)); @@ -559,6 +628,7 @@ page_rec_get_prev( slot = page_dir_get_nth_slot(page, slot_no - 1); rec2 = page_dir_slot_get_rec(slot); + comp = page_is_comp(page); while (rec != rec2) { prev_rec = rec2; @@ -579,9 +649,12 @@ page_rec_find_owner_rec( /* out: the owner record */ rec_t* rec) /* in: the physical record */ { - ut_ad(page_rec_check(rec)); + ibool comp; - while (rec_get_n_owned(rec) == 0) { + ut_ad(page_rec_check(rec)); + comp = page_is_comp(buf_frame_align(rec)); + + while (rec_get_n_owned(rec, comp) == 0) { rec = page_rec_get_next(rec); } @@ -601,7 +674,9 @@ page_get_data_size( ulint ret; ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_SUPREMUM_END + - (page_is_comp(page) + ? PAGE_NEW_SUPREMUM_END + : PAGE_OLD_SUPREMUM_END) - page_header_get_field(page, PAGE_GARBAGE)); ut_ad(ret < UNIV_PAGE_SIZE); @@ -613,12 +688,13 @@ page_get_data_size( Calculates free space if a page is emptied. */ UNIV_INLINE ulint -page_get_free_space_of_empty(void) -/*==============================*/ +page_get_free_space_of_empty( +/*=========================*/ /* out: free space */ + ibool comp) /* in: TRUE=compact page layout */ { return((ulint)(UNIV_PAGE_SIZE - - PAGE_SUPREMUM_END + - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END) - PAGE_DIR - 2 * PAGE_DIR_SLOT_SIZE)); } @@ -640,13 +716,16 @@ page_get_max_insert_size( { ulint occupied; ulint free_space; + ibool comp; + + comp = page_is_comp(page); occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_SUPREMUM_END + - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END) + page_dir_calc_reserved_space( - n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2)); + n_recs + page_dir_get_n_heap(page) - 2); - free_space = page_get_free_space_of_empty(); + free_space = page_get_free_space_of_empty(comp); /* Above the 'n_recs +' part reserves directory space for the new inserted records; the '- 2' excludes page infimum and supremum @@ -673,11 +752,14 @@ page_get_max_insert_size_after_reorganize( { ulint occupied; ulint free_space; + ibool comp; + + comp = page_is_comp(page); occupied = page_get_data_size(page) + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page)); - free_space = page_get_free_space_of_empty(); + free_space = page_get_free_space_of_empty(comp); if (occupied > free_space) { @@ -693,11 +775,12 @@ UNIV_INLINE void page_mem_free( /*==========*/ - page_t* page, /* in: index page */ - rec_t* rec) /* in: pointer to the (origin of) record */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index) /* in: record descriptor */ { - rec_t* free; - ulint garbage; + rec_t* free; + ulint garbage; free = page_header_get_ptr(page, PAGE_FREE); @@ -707,7 +790,7 @@ page_mem_free( garbage = page_header_get_field(page, PAGE_GARBAGE); page_header_set_field(page, PAGE_GARBAGE, - garbage + rec_get_size(rec)); + garbage + rec_get_size(rec, index)); } #ifdef UNIV_MATERIALIZE diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 712e263350e..77a5a42c2d5 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -90,6 +90,7 @@ cmp_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns, contains the value for current comparison */ @@ -107,7 +108,8 @@ cmp_dtuple_rec( less than rec, respectively; see the comments for cmp_dtuple_rec_with_match */ dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /****************************************************************** Checks if a dtuple is a prefix of a record. The last field in dtuple is allowed to be a prefix of the corresponding field in the record. */ @@ -116,23 +118,9 @@ ibool cmp_dtuple_is_prefix_of_rec( /*========================*/ /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec); /* in: physical record */ -/****************************************************************** -Compares a prefix of a data tuple to a prefix of a physical record for -equality. If there are less fields in rec than parameter n_fields, FALSE -is returned. NOTE that n_fields_cmp of dtuple does not affect this -comparison. */ - -ibool -cmp_dtuple_rec_prefix_equal( -/*========================*/ - /* out: TRUE if equal */ dtuple_t* dtuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ - ulint n_fields); /* in: number of fields which should be - compared; must not exceed the number of - fields in dtuple */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /***************************************************************** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is @@ -146,7 +134,13 @@ cmp_rec_rec_with_match( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ulint n, /* in: number of fields to compare, + or ULINT_UNDEFINED if both records + contain all fields, and all fields + should be compared */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -167,6 +161,12 @@ cmp_rec_rec( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + ulint n, /* in: number of fields to compare, + or ULINT_UNDEFINED if both records + contain all fields, and all fields + should be compared */ dict_index_t* index); /* in: data dictionary index */ diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic index 75cb3ef04e8..d4c30f25f03 100644 --- a/innobase/include/rem0cmp.ic +++ b/innobase/include/rem0cmp.ic @@ -57,10 +57,14 @@ cmp_rec_rec( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + ulint n, /* in: number of fields to compare */ dict_index_t* index) /* in: data dictionary index */ { ulint match_f = 0; ulint match_b = 0; - return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b)); + return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, n, + &match_f, &match_b)); } diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h index 86bf263170f..d450df82311 100644 --- a/innobase/include/rem0rec.h +++ b/innobase/include/rem0rec.h @@ -23,9 +23,18 @@ Created 5/30/1994 Heikki Tuuri info bits of a record */ #define REC_INFO_MIN_REC_FLAG 0x10UL -/* Number of extra bytes in a record, in addition to the data and the -offsets */ -#define REC_N_EXTRA_BYTES 6 +/* Number of extra bytes in an old-style record, +in addition to the data and the offsets */ +#define REC_N_OLD_EXTRA_BYTES 6 +/* Number of extra bytes in a new-style record, +in addition to the data and the offsets */ +#define REC_N_NEW_EXTRA_BYTES 5 + +/* Record status values */ +#define REC_STATUS_ORDINARY 0 +#define REC_STATUS_NODE_PTR 1 +#define REC_STATUS_INFIMUM 2 +#define REC_STATUS_SUPREMUM 3 /********************************************************** The following function is used to get the offset of the @@ -36,7 +45,8 @@ rec_get_next_offs( /*==============*/ /* out: the page offset of the next chained record */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the next record offset field of the record. */ @@ -45,14 +55,15 @@ void rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint next); /* in: offset of the next record */ /********************************************************** The following function is used to get the number of fields -in the record. */ +in an old-style record. */ UNIV_INLINE ulint -rec_get_n_fields( -/*=============*/ +rec_get_n_fields_old( +/*=================*/ /* out: number of data fields */ rec_t* rec); /* in: physical record */ /********************************************************** @@ -63,7 +74,8 @@ ulint rec_get_n_owned( /*============*/ /* out: number of owned records */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the number of owned records. */ @@ -72,6 +84,7 @@ void rec_set_n_owned( /*============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint n_owned); /* in: the number of owned */ /********************************************************** The following function is used to retrieve the info bits of @@ -81,7 +94,8 @@ ulint rec_get_info_bits( /*==============*/ /* out: info bits */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the info bits of a record. */ UNIV_INLINE @@ -89,15 +103,26 @@ void rec_set_info_bits( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint bits); /* in: info bits */ /********************************************************** -Gets the value of the deleted falg in info bits. */ +The following function retrieves the status bits of a new-style record. */ UNIV_INLINE -ibool -rec_info_bits_get_deleted_flag( -/*===========================*/ - /* out: TRUE if deleted flag set */ - ulint info_bits); /* in: info bits from a record */ +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + rec_t* rec); /* in: physical record */ + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in: physical record */ + ulint bits); /* in: info bits */ + /********************************************************** The following function tells if record is delete marked. */ UNIV_INLINE @@ -105,7 +130,8 @@ ibool rec_get_deleted_flag( /*=================*/ /* out: TRUE if delete marked */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the deleted bit. */ UNIV_INLINE @@ -113,8 +139,25 @@ void rec_set_deleted_flag( /*=================*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ibool flag); /* in: TRUE if delete marked */ /********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*=================*/ + /* out: TRUE if node pointer */ + rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to flag a record as a node pointer. */ +UNIV_INLINE +void +rec_set_node_ptr_flag( +/*=================*/ + rec_t* rec, /* in: physical record */ + ibool flag); /* in: TRUE if the record is a node pointer */ +/********************************************************** The following function is used to get the order number of the record in the heap of the index page. */ UNIV_INLINE @@ -122,7 +165,8 @@ ulint rec_get_heap_no( /*=============*/ /* out: heap order number */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the heap number field in the record. */ @@ -131,6 +175,7 @@ void rec_set_heap_no( /*=============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint heap_no);/* in: the heap number */ /********************************************************** The following function is used to test whether the data offsets @@ -141,31 +186,84 @@ rec_get_1byte_offs_flag( /*====================*/ /* out: TRUE if 1-byte form */ rec_t* rec); /* in: physical record */ +/********************************************************** +The following function determines the offsets to each field +in the record. The offsets are returned in an array of +ulint, with [0] being the number of fields (n), [1] being the +extra size (if REC_OFFS_COMPACT is set, the record is in the new +format), and [2]..[n+1] being the offsets past the end of +fields 0..n, or to the beginning of fields 1..n+1. When the +high-order bit of the offset at [n+1] is set (REC_OFFS_SQL_NULL), +the field n is NULL. When the second high-order bit of the offset +at [n+1] is set (REC_OFFS_EXTERNAL), the field n is being stored +externally. */ + +ulint* +rec_get_offsets( +/*============*/ + /* out: the offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap); /* in: memory heap */ +/********************************************************** +The following function determines the offsets to each field +in the record. It differs from rec_get_offsets() by trying to +reuse a previously returned array. */ + +ulint* +rec_reget_offsets( +/*==============*/ + /* out: the new offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in: array of offsets + from rec_get_offsets() + or rec_reget_offsets(), or NULL */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap); /* in: memory heap */ + +/**************************************************************** +Validates offsets returned by rec_get_offsets() or rec_reget_offsets(). */ +UNIV_INLINE +ibool +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + rec_t* rec, /* in: record or NULL */ + dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets);/* in: array returned by rec_get_offsets() + or rec_reget_offsets() */ +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + const rec_t* rec, /* in: record */ + const dict_index_t* index,/* in: record descriptor */ + ulint* offsets);/* in: array returned by rec_get_offsets() + or rec_reget_offsets() */ + /**************************************************************** The following function is used to get a pointer to the nth -data field in the record. */ +data field in an old-style record. */ byte* -rec_get_nth_field( -/*==============*/ +rec_get_nth_field_old( +/*==================*/ /* out: pointer to the field */ rec_t* rec, /* in: record */ ulint n, /* in: index of the field */ ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL null */ /**************************************************************** -Return field length or UNIV_SQL_NULL. */ -UNIV_INLINE -ulint -rec_get_nth_field_len( -/*==================*/ - /* out: length of the field; UNIV_SQL_NULL if SQL - null */ - rec_t* rec, /* in: record */ - ulint n); /* in: index of the field */ -/**************************************************************** -Gets the physical size of a field. Also an SQL null may have a field of -size > 0, if the data type is of a fixed size. */ +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ UNIV_INLINE ulint rec_get_nth_field_size( @@ -173,131 +271,194 @@ rec_get_nth_field_size( /* out: field size in bytes */ rec_t* rec, /* in: record */ ulint n); /* in: index of the field */ -/*************************************************************** -Gets the value of the ith field extern storage bit. If it is TRUE -it means that the field is stored on another page. */ +/**************************************************************** +The following function is used to get a pointer to the nth +data field in an old-style record. */ +UNIV_INLINE +byte* +rec_get_nth_field( +/*==============*/ + /* out: pointer to the field */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len); /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +/********************************************************** +Determine if the offsets are for a record in the new +compact format. */ UNIV_INLINE ibool -rec_get_nth_field_extern_bit( -/*=========================*/ - /* in: TRUE or FALSE */ - rec_t* rec, /* in: record */ - ulint i); /* in: ith field */ +rec_offs_comp( +/*==========*/ + /* out: TRUE if compact format */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/********************************************************** +Returns TRUE if the nth field of rec is SQL NULL. */ +UNIV_INLINE +ibool +rec_offs_nth_null( +/*==============*/ + /* out: TRUE if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Returns TRUE if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ibool +rec_offs_nth_extern( +/*================*/ + /* out: TRUE if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ + /********************************************************** Returns TRUE if the extern bit is set in any of the fields of rec. */ UNIV_INLINE ibool -rec_contains_externally_stored_field( -/*=================================*/ - /* out: TRUE if a field is stored externally */ - rec_t* rec); /* in: record */ +rec_offs_any_extern( +/*================*/ + /* out: TRUE if a field is stored externally */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*************************************************************** Sets the value of the ith field extern storage bit. */ - +UNIV_INLINE void rec_set_nth_field_extern_bit( /*=========================*/ - rec_t* rec, /* in: record */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page where - rec is, or NULL; in the NULL case we do not - write to log about the change */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ /*************************************************************** Sets TRUE the extern storage bits of fields mentioned in an array. */ void rec_set_field_extern_bits( /*======================*/ - rec_t* rec, /* in: record */ - ulint* vec, /* in: array of field numbers */ - ulint n_fields, /* in: number of fields numbers */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case we - do not write to log about the change */ -/**************************************************************** -The following function is used to get a copy of the nth -data field in the record to a buffer. */ -UNIV_INLINE -void -rec_copy_nth_field( -/*===============*/ - void* buf, /* in: pointer to the buffer */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL - null */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + const ulint* vec, /* in: array of field numbers */ + ulint n_fields,/* in: number of fields numbers */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ /*************************************************************** -This is used to modify the value of an already existing field in -a physical record. The previous value must have exactly the same -size as the new value. If len is UNIV_SQL_NULL then the field is -treated as SQL null. */ +This is used to modify the value of an already existing field in a record. +The previous value must have exactly the same size as the new value. If len +is UNIV_SQL_NULL then the field is treated as an SQL null for old-style +records. For new-style records, len must not be UNIV_SQL_NULL. */ UNIV_INLINE void rec_set_nth_field( /*==============*/ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - void* data, /* in: pointer to the data if not SQL null */ - ulint len); /* in: length of the data or UNIV_SQL_NULL. - If not SQL null, must have the same length as the - previous value. If SQL null, previous value must be - SQL null. */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data if not SQL null */ + ulint len); /* in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ /************************************************************** +The following function returns the data size of an old-style physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_get_data_size_old( +/*==================*/ + /* out: size */ + rec_t* rec); /* in: physical record */ +/************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*===============*/ + /* out: number of fields */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** The following function returns the data size of a physical record, that is the sum of field lengths. SQL null fields are counted as length 0 fields. The value returned by the function is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_data_size( -/*==============*/ - /* out: size */ - rec_t* rec); /* in: physical record */ -/************************************************************** +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** Returns the total size of record minus data size of record. The value returned by the function is the distance from record start to record origin in bytes. */ UNIV_INLINE ulint -rec_get_extra_size( -/*===============*/ - /* out: size */ - rec_t* rec); /* in: physical record */ -/************************************************************** +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** Returns the total size of a physical record. */ UNIV_INLINE +ulint +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns the total size of a physical record. */ + ulint rec_get_size( /*=========*/ - /* out: size */ - rec_t* rec); /* in: physical record */ + /* out: size */ + rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ /************************************************************** Returns a pointer to the start of the record. */ UNIV_INLINE byte* rec_get_start( /*==========*/ - /* out: pointer to start */ - rec_t* rec); /* in: pointer to record */ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /************************************************************** Returns a pointer to the end of the record. */ UNIV_INLINE byte* rec_get_end( /*========*/ - /* out: pointer to end */ - rec_t* rec); /* in: pointer to record */ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Copies a physical record to a buffer. */ UNIV_INLINE rec_t* rec_copy( /*=====*/ - /* out: pointer to the origin of the copied record */ - void* buf, /* in: buffer */ - rec_t* rec); /* in: physical record */ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /****************************************************************** Copies the first n fields of a physical record to a new physical record in a buffer. */ @@ -305,49 +466,43 @@ a buffer. */ rec_t* rec_copy_prefix_to_buf( /*===================*/ - /* out, own: copied record */ - rec_t* rec, /* in: physical record */ - ulint n_fields, /* in: number of fields to copy */ - byte** buf, /* in/out: memory buffer for the copied prefix, - or NULL */ - ulint* buf_size); /* in/out: buffer size */ + /* out, own: copied record */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields to copy */ + byte** buf, /* in/out: memory buffer + for the copied prefix, or NULL */ + ulint* buf_size); /* in/out: buffer size */ /**************************************************************** Folds a prefix of a physical record to a ulint. */ UNIV_INLINE ulint rec_fold( /*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - ulint n_fields, /* in: number of complete fields to fold */ - ulint n_bytes, /* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id); /* in: index tree id */ -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -address destination. */ -UNIV_INLINE -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple); /* in: data tuple */ + /* out: the folded value */ + rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id); /* in: index tree id */ /************************************************************* Builds a physical record out of a data tuple and stores it beginning from address destination. */ rec_t* -rec_convert_dtuple_to_rec_low( -/*==========================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple, /* in: data tuple */ - ulint data_size); /* in: data size of dtuple */ +rec_convert_dtuple_to_rec( +/*======================*/ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the + physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple);/* in: data tuple */ /************************************************************** -Returns the extra size of a physical record if we know its +Returns the extra size of an old-style physical record if we know its data size and number of fields. */ UNIV_INLINE ulint @@ -355,7 +510,8 @@ rec_get_converted_extra_size( /*=========================*/ /* out: extra size */ ulint data_size, /* in: data size */ - ulint n_fields); /* in: number of fields */ + ulint n_fields) /* in: number of fields */ + __attribute__((const)); /************************************************************** The following function returns the size of a data tuple when converted to a physical record. */ @@ -364,6 +520,7 @@ ulint rec_get_converted_size( /*===================*/ /* out: size */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* dtuple);/* in: data tuple */ /****************************************************************** Copies the first n fields of a physical record to a data tuple. @@ -374,6 +531,7 @@ rec_copy_prefix_to_dtuple( /*======================*/ dtuple_t* tuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ ulint n_fields, /* in: number of fields to copy */ mem_heap_t* heap); /* in: memory heap */ /******************************************************************* @@ -382,16 +540,27 @@ Validates the consistency of a physical record. */ ibool rec_validate( /*=========*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: physical record */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Prints an old-style physical record. */ + +void +rec_print_old( +/*==========*/ + FILE* file, /* in: file where to print */ + rec_t* rec); /* in: physical record */ + /******************************************************************* Prints a physical record. */ void rec_print( /*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec); /* in: physical record */ + FILE* file, /* in: file where to print */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ #define REC_INFO_BITS 6 /* This is single byte bit-field */ diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic index c36bf8f6d6e..8443b5fa07d 100644 --- a/innobase/include/rem0rec.ic +++ b/innobase/include/rem0rec.ic @@ -8,9 +8,19 @@ Created 5/30/1994 Heikki Tuuri #include "mach0data.h" #include "ut0byte.h" +#include "dict0dict.h" -/* Offsets of the bit-fields in the record. NOTE! In the table the most -significant bytes and bits are written below less significant. +/* Compact flag ORed to the extra size returned by rec_get_offsets() */ +#define REC_OFFS_COMPACT ((ulint) 1 << 31) +/* SQL NULL flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_SQL_NULL ((ulint) 1 << 31) +/* External flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_EXTERNAL ((ulint) 1 << 30) +/* Mask for offsets returned by rec_get_offsets() */ +#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1) + +/* Offsets of the bit-fields in an old-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. (1) byte offset (2) bit usage within byte downward from @@ -25,6 +35,25 @@ significant bytes and bits are written below less significant. 4 bits info bits */ +/* Offsets of the bit-fields in a new-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. + + (1) byte offset (2) bit usage within byte + downward from + origin -> 1 8 bits pointer to next record (relative) + 2 8 bits pointer to next record (relative) + 3 3 bits status: + 000=conventional record + 001=node pointer record (inside B-tree) + 010=infimum record + 011=supremum record + 1xx=reserved + 5 bits heap number + 4 8 bits heap number + 5 4 bits n_owned + 4 bits info bits +*/ + /* We list the byte offsets from the origin of the record, the mask, and the shift needed to obtain each bit-field of the record. */ @@ -32,22 +61,30 @@ and the shift needed to obtain each bit-field of the record. */ #define REC_NEXT_MASK 0xFFFFUL #define REC_NEXT_SHIFT 0 -#define REC_SHORT 3 /* This is single byte bit-field */ -#define REC_SHORT_MASK 0x1UL -#define REC_SHORT_SHIFT 0 +#define REC_OLD_SHORT 3 /* This is single byte bit-field */ +#define REC_OLD_SHORT_MASK 0x1UL +#define REC_OLD_SHORT_SHIFT 0 -#define REC_N_FIELDS 4 -#define REC_N_FIELDS_MASK 0x7FEUL -#define REC_N_FIELDS_SHIFT 1 +#define REC_OLD_N_FIELDS 4 +#define REC_OLD_N_FIELDS_MASK 0x7FEUL +#define REC_OLD_N_FIELDS_SHIFT 1 -#define REC_HEAP_NO 5 +#define REC_NEW_STATUS 3 /* This is single byte bit-field */ +#define REC_NEW_STATUS_MASK 0x7UL +#define REC_NEW_STATUS_SHIFT 0 + +#define REC_OLD_HEAP_NO 5 +#define REC_NEW_HEAP_NO 4 #define REC_HEAP_NO_MASK 0xFFF8UL #define REC_HEAP_NO_SHIFT 3 -#define REC_N_OWNED 6 /* This is single byte bit-field */ +#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */ +#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */ #define REC_N_OWNED_MASK 0xFUL #define REC_N_OWNED_SHIFT 0 +#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */ +#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */ #define REC_INFO_BITS_MASK 0xF0UL #define REC_INFO_BITS_SHIFT 0 @@ -65,26 +102,24 @@ a field stored to another page: */ #define REC_2BYTE_EXTERN_MASK 0x4000UL -/**************************************************************** -Return field length or UNIV_SQL_NULL. */ -UNIV_INLINE -ulint -rec_get_nth_field_len( -/*==================*/ - /* out: length of the field; UNIV_SQL_NULL if SQL - null */ - rec_t* rec, /* in: record */ - ulint n) /* in: index of the field */ -{ - ulint len; - - rec_get_nth_field(rec, n, &len); - - return(len); -} +#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \ + ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \ + ^ 0xFFFFFFFFUL +# error "sum of old-style masks != 0xFFFFFFFFUL" +#endif +#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \ + ^ 0xFFFFFFUL +# error "sum of new-style masks != 0xFFFFFFUL" +#endif /*************************************************************** -Sets the value of the ith field SQL null bit. */ +Sets the value of the ith field SQL null bit of an old-style record. */ void rec_set_nth_field_null_bit( @@ -93,8 +128,8 @@ rec_set_nth_field_null_bit( ulint i, /* in: ith field */ ibool val); /* in: value to set */ /*************************************************************** -Sets a record field to SQL null. The physical size of the field is not -changed. */ +Sets an old-style record field to SQL null. +The physical size of the field is not changed. */ void rec_set_nth_field_sql_null( @@ -102,6 +137,32 @@ rec_set_nth_field_sql_null( rec_t* rec, /* in: record */ ulint n); /* in: index of the field */ +/*************************************************************** +Sets the value of the ith field extern storage bit of an old-style record. */ + +void +rec_set_nth_field_extern_bit_old( +/*=============================*/ + rec_t* rec, /* in: old-style record */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page where + rec is, or NULL; in the NULL case we do not + write to log about the change */ +/*************************************************************** +Sets the value of the ith field extern storage bit of a new-style record. */ + +void +rec_set_nth_field_extern_bit_new( +/*=============================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint ith, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ + /********************************************************** Gets a bit field from within 1 byte. */ UNIV_INLINE @@ -131,7 +192,7 @@ rec_set_bit_field_1( ulint shift) /* in: shift right applied after masking */ { ut_ad(rec); - ut_ad(offs <= REC_N_EXTRA_BYTES); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); ut_ad(mask); ut_ad(mask <= 0xFFUL); ut_ad(((mask >> shift) << shift) == mask); @@ -171,30 +232,14 @@ rec_set_bit_field_2( ulint shift) /* in: shift right applied after masking */ { ut_ad(rec); - ut_ad(offs <= REC_N_EXTRA_BYTES); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); ut_ad(mask > 0xFFUL); ut_ad(mask <= 0xFFFFUL); ut_ad((mask >> shift) & 1); ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1))); ut_ad(((mask >> shift) << shift) == mask); ut_ad(((val << shift) & mask) == (val << shift)); -#ifdef UNIV_DEBUG - { - ulint m; - /* The following assertion checks that the masks of currently - defined bit-fields in bytes 3-6 do not overlap. */ - m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3))) - + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4))) - + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4))) - + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3))) - + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3)))); - if (m != ut_dbg_zero + 0xFFFFFFFFUL) { - fprintf(stderr, "Sum of masks %lx\n", m); - ut_error; - } - } -#endif mach_write_to_2(rec - offs, (mach_read_from_2(rec - offs) & ~mask) | (val << shift)); @@ -208,17 +253,26 @@ ulint rec_get_next_offs( /*==============*/ /* out: the page offset of the next chained record */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK, - REC_NEXT_SHIFT); - ut_ad(ret < UNIV_PAGE_SIZE); - - return(ret); + if (comp) { + lint ret = (int16_t) rec_get_bit_field_2(rec, REC_NEXT, + REC_NEXT_MASK, REC_NEXT_SHIFT); +#if UNIV_PAGE_SIZE <= 32768 + /* with 64 KiB page size, the pointer will "wrap around", + and the following assertions are invalid */ + ut_ad(ret + ut_align_offset(rec, UNIV_PAGE_SIZE) < + UNIV_PAGE_SIZE); +#endif + return(ret ? ut_align_offset(rec + ret, UNIV_PAGE_SIZE) : 0); + } + else { + ulint ret = rec_get_bit_field_2(rec, REC_NEXT, + REC_NEXT_MASK, REC_NEXT_SHIFT); + ut_ad(ret < UNIV_PAGE_SIZE); + return(ret); + } } /********************************************************** @@ -229,21 +283,32 @@ void rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint next) /* in: offset of the next record */ { ut_ad(rec); ut_ad(UNIV_PAGE_SIZE > next); - rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK, - REC_NEXT_SHIFT); + if (comp) { + rec_set_bit_field_2(rec, next + ? (next - ut_align_offset(rec, UNIV_PAGE_SIZE)) +#ifdef UNIV_DEBUG /* avoid an assertion failure */ + & (REC_NEXT_MASK >> REC_NEXT_SHIFT) +#endif + : 0, REC_NEXT, REC_NEXT_MASK, REC_NEXT_SHIFT); + } else { + rec_set_bit_field_2(rec, next, + REC_NEXT, REC_NEXT_MASK, REC_NEXT_SHIFT); + } } /********************************************************** -The following function is used to get the number of fields in the record. */ +The following function is used to get the number of fields +in an old-style record. */ UNIV_INLINE ulint -rec_get_n_fields( -/*=============*/ +rec_get_n_fields_old( +/*=================*/ /* out: number of data fields */ rec_t* rec) /* in: physical record */ { @@ -251,8 +316,8 @@ rec_get_n_fields( ut_ad(rec); - ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK, - REC_N_FIELDS_SHIFT); + ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); ut_ad(ret <= REC_MAX_N_FIELDS); ut_ad(ret > 0); @@ -260,12 +325,12 @@ rec_get_n_fields( } /********************************************************** -The following function is used to set the number of fields field in the -record. */ +The following function is used to set the number of fields +in an old-style record. */ UNIV_INLINE void -rec_set_n_fields( -/*=============*/ +rec_set_n_fields_old( +/*=================*/ rec_t* rec, /* in: physical record */ ulint n_fields) /* in: the number of fields */ { @@ -273,8 +338,38 @@ rec_set_n_fields( ut_ad(n_fields <= REC_MAX_N_FIELDS); ut_ad(n_fields > 0); - rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK, - REC_N_FIELDS_SHIFT); + rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); +} + +/********************************************************** +The following function is used to get the number of fields +in a record. */ +UNIV_INLINE +ulint +rec_get_n_fields( +/*=============*/ + /* out: number of data fields */ + rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + ut_ad(rec); + ut_ad(index); + if (!index->table->comp) { + return(rec_get_n_fields_old(rec)); + } + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + return(dict_index_get_n_fields(index)); + case REC_STATUS_NODE_PTR: + return(dict_index_get_n_unique_in_tree(index) + 1); + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + return(1); + default: + ut_error; + return(ULINT_UNDEFINED); + } } /********************************************************** @@ -285,14 +380,16 @@ ulint rec_get_n_owned( /*============*/ /* out: number of owned records */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK, - REC_N_OWNED_SHIFT); + ret = rec_get_bit_field_1(rec, + comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); ut_ad(ret <= REC_MAX_N_OWNED); return(ret); @@ -305,13 +402,15 @@ void rec_set_n_owned( /*============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint n_owned) /* in: the number of owned */ { ut_ad(rec); ut_ad(n_owned <= REC_MAX_N_OWNED); - rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK, - REC_N_OWNED_SHIFT); + rec_set_bit_field_1(rec, n_owned, + comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); } /********************************************************** @@ -321,14 +420,16 @@ ulint rec_get_info_bits( /*==============*/ /* out: info bits */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK, - REC_INFO_BITS_SHIFT); + ret = rec_get_bit_field_1(rec, + comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); ut_ad((ret & ~REC_INFO_BITS_MASK) == 0); return(ret); @@ -341,30 +442,51 @@ void rec_set_info_bits( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint bits) /* in: info bits */ { ut_ad(rec); ut_ad((bits & ~REC_INFO_BITS_MASK) == 0); - rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK, - REC_INFO_BITS_SHIFT); + rec_set_bit_field_1(rec, bits, + comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); } /********************************************************** -Gets the value of the deleted flag in info bits. */ +The following function retrieves the status bits of a new-style record. */ UNIV_INLINE -ibool -rec_info_bits_get_deleted_flag( -/*===========================*/ - /* out: TRUE if deleted flag set */ - ulint info_bits) /* in: info bits from a record */ +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + rec_t* rec) /* in: physical record */ { - if (info_bits & REC_INFO_DELETED_FLAG) { + ulint ret; - return(TRUE); - } + ut_ad(rec); - return(FALSE); + ret = rec_get_bit_field_1(rec, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); + ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0); + + return(ret); +} + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in: physical record */ + ulint bits) /* in: info bits */ +{ + ut_ad(rec); + ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0); + + rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); } /********************************************************** @@ -374,9 +496,10 @@ ibool rec_get_deleted_flag( /*=================*/ /* out: TRUE if delete marked */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { - if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) { + if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec, comp)) { return(TRUE); } @@ -391,6 +514,7 @@ void rec_set_deleted_flag( /*=================*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ibool flag) /* in: TRUE if delete marked */ { ulint old_val; @@ -399,7 +523,7 @@ rec_set_deleted_flag( ut_ad(TRUE == 1); ut_ad(flag <= TRUE); - old_val = rec_get_info_bits(rec); + old_val = rec_get_info_bits(rec, comp); if (flag) { new_val = REC_INFO_DELETED_FLAG | old_val; @@ -407,7 +531,39 @@ rec_set_deleted_flag( new_val = ~REC_INFO_DELETED_FLAG & old_val; } - rec_set_info_bits(rec, new_val); + rec_set_info_bits(rec, comp, new_val); +} + +/********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*=================*/ + /* out: TRUE if node pointer */ + rec_t* rec) /* in: physical record */ +{ + return(REC_STATUS_NODE_PTR == rec_get_status(rec)); +} + +/********************************************************** +The following function is used to flag a record as a node pointer. */ +UNIV_INLINE +void +rec_set_node_ptr_flag( +/*=================*/ + rec_t* rec, /* in: physical record */ + ibool flag) /* in: TRUE if the record is a node pointer */ +{ + ulint status; + ut_ad(flag <= TRUE); + ut_ad(REC_STATUS_NODE_PTR >= rec_get_status(rec)); + if (flag) { + status = REC_STATUS_NODE_PTR; + } else { + status = REC_STATUS_ORDINARY; + } + rec_set_status(rec, status); } /********************************************************** @@ -418,14 +574,16 @@ ulint rec_get_heap_no( /*=============*/ /* out: heap order number */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK, - REC_HEAP_NO_SHIFT); + ret = rec_get_bit_field_2(rec, + comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); ut_ad(ret <= REC_MAX_HEAP_NO); return(ret); @@ -438,12 +596,14 @@ void rec_set_heap_no( /*=============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint heap_no)/* in: the heap number */ { ut_ad(heap_no <= REC_MAX_HEAP_NO); - rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK, - REC_HEAP_NO_SHIFT); + rec_set_bit_field_2(rec, heap_no, + comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); } /********************************************************** @@ -456,10 +616,12 @@ rec_get_1byte_offs_flag( /* out: TRUE if 1-byte form */ rec_t* rec) /* in: physical record */ { - ut_ad(TRUE == 1); +#if TRUE != 1 +#error "TRUE != 1" +#endif - return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK, - REC_SHORT_SHIFT)); + return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT)); } /********************************************************** @@ -471,11 +633,13 @@ rec_set_1byte_offs_flag( rec_t* rec, /* in: physical record */ ibool flag) /* in: TRUE if 1byte form */ { - ut_ad(TRUE == 1); +#if TRUE != 1 +#error "TRUE != 1" +#endif ut_ad(flag <= TRUE); - rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK, - REC_SHORT_SHIFT); + rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT); } /********************************************************** @@ -492,9 +656,9 @@ rec_1_get_field_end_info( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1))); + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1))); } /********************************************************** @@ -511,68 +675,234 @@ rec_2_get_field_end_info( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2))); + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2))); } -/*************************************************************** -Gets the value of the ith field extern storage bit. If it is TRUE -it means that the field is stored on another page. */ +#ifdef UNIV_DEBUG +# define REC_OFFS_HEADER_SIZE 3 +#else /* UNIV_DEBUG */ +# define REC_OFFS_HEADER_SIZE 1 +#endif /* UNIV_DEBUG */ + +/* Get the base address of offsets. The extra_size is stored at +this position, and following positions hold the end offsets of +the fields. */ +#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE) + +/**************************************************************** +Validates offsets returned by rec_get_offsets() or rec_reget_offsets(). */ UNIV_INLINE ibool -rec_get_nth_field_extern_bit( -/*=========================*/ - /* in: TRUE or FALSE */ - rec_t* rec, /* in: record */ - ulint i) /* in: ith field */ +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + rec_t* rec, /* in: record or NULL */ + dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint info; + ulint i = rec_offs_n_fields(offsets); + ulint last = ULINT_MAX; + ibool comp = (*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0; + ut_a(offsets); + if (rec) { + ut_ad((ulint) rec == offsets[1]); + if (!comp) { + ut_a(rec_get_n_fields_old(rec) >= i); + } + } + if (index) { + ulint max_n_fields; + ut_ad((ulint) index == offsets[2]); + max_n_fields = ut_max( + dict_index_get_n_fields(index), + dict_index_get_n_unique_in_tree(index) + 1); + if (comp && rec) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + break; + case REC_STATUS_NODE_PTR: + max_n_fields = + dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + max_n_fields = 1; + break; + default: + ut_error; + } + } + ut_a(i <= max_n_fields); + } + while (i--) { + ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; + ut_a(curr <= last); + last = curr; + } + return(TRUE); +} +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + const rec_t* rec __attribute__((unused)), + /* in: record */ + const dict_index_t* index __attribute__((unused)), + /* in: record descriptor */ + ulint* offsets __attribute__((unused))) + /* in: array returned by rec_get_offsets() + or rec_reget_offsets() */ +{ +#ifdef UNIV_DEBUG + offsets[1] = (ulint) rec; + offsets[2] = (ulint) index; +#endif /* UNIV_DEBUG */ +} - if (rec_get_1byte_offs_flag(rec)) { +/**************************************************************** +The following function is used to get a pointer to the nth +data field in an old-style record. */ +UNIV_INLINE +byte* +rec_get_nth_field( +/*==============*/ + /* out: pointer to the field */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len) /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +{ + byte* field; + ulint length; + ut_ad(rec); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + ut_ad(len); - return(FALSE); + if (n == 0) { + field = rec; + } else { + field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK); } - info = rec_2_get_field_end_info(rec, i); + length = rec_offs_base(offsets)[1 + n]; - if (info & REC_2BYTE_EXTERN_MASK) { - return(TRUE); + if (length & REC_OFFS_SQL_NULL) { + field = NULL; + length = UNIV_SQL_NULL; + } else { + length &= REC_OFFS_MASK; + length -= field - rec; } - return(FALSE); + *len = length; + return(field); +} + +/********************************************************** +Determine if the offsets are for a record in the new +compact format. */ +UNIV_INLINE +ibool +rec_offs_comp( +/*==========*/ + /* out: TRUE if compact format */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + return((*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0); +} + +/********************************************************** +Returns TRUE if the nth field of rec is SQL NULL. */ +UNIV_INLINE +ibool +rec_offs_nth_null( +/*==============*/ + /* out: TRUE if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL) != 0); +} +/********************************************************** +Returns TRUE if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ibool +rec_offs_nth_extern( +/*================*/ + /* out: TRUE if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL) != 0); +} + +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK); } /********************************************************** Returns TRUE if the extern bit is set in any of the fields -of rec. */ +of an old-style record. */ UNIV_INLINE ibool -rec_contains_externally_stored_field( -/*=================================*/ - /* out: TRUE if a field is stored externally */ - rec_t* rec) /* in: record */ +rec_offs_any_extern( +/*================*/ + /* out: TRUE if a field is stored externally */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n; ulint i; - - if (rec_get_1byte_offs_flag(rec)) { - - return(FALSE); - } - - n = rec_get_n_fields(rec); - - for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { - + for (i = rec_offs_n_fields(offsets); i--; ) { + if (rec_offs_nth_extern(offsets, i)) { return(TRUE); } } - return(FALSE); } +/*************************************************************** +Sets the value of the ith field extern storage bit. */ +UNIV_INLINE +void +rec_set_nth_field_extern_bit( +/*=========================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ +{ + if (index->table->comp) { + rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr); + } else { + rec_set_nth_field_extern_bit_old(rec, i, val, mtr); + } +} + /********************************************************** Returns the offset of n - 1th field end if the record is stored in the 1-byte offsets form. If the field is SQL null, the flag is ORed in the returned @@ -589,9 +919,9 @@ rec_1_get_prev_field_end_info( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); - return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n))); + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n))); } /********************************************************** @@ -608,9 +938,9 @@ rec_2_get_prev_field_end_info( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); - return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n))); + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n))); } /********************************************************** @@ -625,9 +955,9 @@ rec_1_set_field_end_info( ulint info) /* in: value to set */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info); + mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info); } /********************************************************** @@ -642,9 +972,9 @@ rec_2_set_field_end_info( ulint info) /* in: value to set */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info); + mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info); } /********************************************************** @@ -659,7 +989,7 @@ rec_1_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -682,7 +1012,7 @@ rec_2_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -707,7 +1037,7 @@ rec_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(rec); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -723,8 +1053,9 @@ rec_get_field_start_offs( } /**************************************************************** -Gets the physical size of a field. Also an SQL null may have a field of -size > 0, if the data type is of a fixed size. */ +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ UNIV_INLINE ulint rec_get_nth_field_size( @@ -744,133 +1075,132 @@ rec_get_nth_field_size( return(next_os - os); } -/**************************************************************** -The following function is used to get a copy of the nth data field in a -record to a buffer. */ -UNIV_INLINE -void -rec_copy_nth_field( -/*===============*/ - void* buf, /* in: pointer to the buffer */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL - null */ -{ - byte* ptr; - - ut_ad(buf && rec && len); - - ptr = rec_get_nth_field(rec, n, len); - - if (*len == UNIV_SQL_NULL) { - - return; - } - - ut_memcpy(buf, ptr, *len); -} - /*************************************************************** This is used to modify the value of an already existing field in a record. The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. */ +is UNIV_SQL_NULL then the field is treated as an SQL null for old-style +records. For new-style records, len must not be UNIV_SQL_NULL. */ UNIV_INLINE void rec_set_nth_field( /*==============*/ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - void* data, /* in: pointer to the data if not SQL null */ - ulint len) /* in: length of the data or UNIV_SQL_NULL */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data + if not SQL null */ + ulint len) /* in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ { byte* data2; ulint len2; - ut_ad((len == UNIV_SQL_NULL) - || (rec_get_nth_field_size(rec, n) == len)); - + ut_ad(rec_offs_validate(rec, NULL, offsets)); + if (len == UNIV_SQL_NULL) { + ut_ad(!rec_offs_comp(offsets)); rec_set_nth_field_sql_null(rec, n); return; } - data2 = rec_get_nth_field(rec, n, &len2); + data2 = rec_get_nth_field(rec, offsets, n, &len2); + ut_ad(len2 == len); ut_memcpy(data2, data, len); if (len2 == UNIV_SQL_NULL) { - + ut_ad(!rec_offs_comp(offsets)); rec_set_nth_field_null_bit(rec, n, FALSE); } } /************************************************************** +The following function returns the data size of an old-style physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_get_data_size_old( +/*==================*/ + /* out: size */ + rec_t* rec) /* in: physical record */ +{ + ut_ad(rec); + + return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec))); +} + +/************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*===============*/ + /* out: number of fields */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint n_fields; + ut_ad(offsets); + n_fields = offsets[0]; + ut_ad(n_fields > 0); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + return(n_fields); +} + +/************************************************************** The following function returns the data size of a physical record, that is the sum of field lengths. SQL null fields are counted as length 0 fields. The value returned by the function is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_data_size( -/*==============*/ - /* out: size */ - rec_t* rec) /* in: physical record */ +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ut_ad(rec); + ulint size; - return(rec_get_field_start_offs(rec, rec_get_n_fields(rec))); + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] + & REC_OFFS_MASK; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); } -/************************************************************** +/************************************************************** Returns the total size of record minus data size of record. The value returned by the function is the distance from record start to record origin in bytes. */ UNIV_INLINE ulint -rec_get_extra_size( -/*===============*/ - /* out: size */ - rec_t* rec) /* in: physical record */ +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; - - ut_ad(rec); - - n_fields = rec_get_n_fields(rec); - - if (rec_get_1byte_offs_flag(rec)) { - - return(REC_N_EXTRA_BYTES + n_fields); - } - - return(REC_N_EXTRA_BYTES + 2 * n_fields); + ulint size; + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); } -/************************************************************** +/************************************************************** Returns the total size of a physical record. */ UNIV_INLINE ulint -rec_get_size( -/*=========*/ - /* out: size */ - rec_t* rec) /* in: physical record */ +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; - - ut_ad(rec); - - n_fields = rec_get_n_fields(rec); - - if (rec_get_1byte_offs_flag(rec)) { - - return(REC_N_EXTRA_BYTES + n_fields - + rec_1_get_field_start_offs(rec, n_fields)); - } - - return(REC_N_EXTRA_BYTES + 2 * n_fields - + rec_2_get_field_start_offs(rec, n_fields)); + return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets)); } /************************************************************** @@ -879,10 +1209,11 @@ UNIV_INLINE byte* rec_get_end( /*========*/ - /* out: pointer to end */ - rec_t* rec) /* in: pointer to record */ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - return(rec + rec_get_data_size(rec)); + return(rec + rec_offs_data_size(offsets)); } /************************************************************** @@ -891,10 +1222,11 @@ UNIV_INLINE byte* rec_get_start( /*==========*/ - /* out: pointer to start */ - rec_t* rec) /* in: pointer to record */ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - return(rec - rec_get_extra_size(rec)); + return(rec - rec_offs_extra_size(offsets)); } /******************************************************************* @@ -903,18 +1235,20 @@ UNIV_INLINE rec_t* rec_copy( /*=====*/ - /* out: pointer to the origin of the copied record */ - void* buf, /* in: buffer */ - rec_t* rec) /* in: physical record */ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint extra_len; ulint data_len; ut_ad(rec && buf); - ut_ad(rec_validate(rec)); + ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets)); + ut_ad(rec_validate((rec_t*) rec, offsets)); - extra_len = rec_get_extra_size(rec); - data_len = rec_get_data_size(rec); + extra_len = rec_offs_extra_size(offsets); + data_len = rec_offs_data_size(offsets); ut_memcpy(buf, rec - extra_len, extra_len + data_len); @@ -922,8 +1256,8 @@ rec_copy( } /************************************************************** -Returns the extra size of a physical record if we know its data size and -the number of fields. */ +Returns the extra size of an old-style physical record if we know its +data size and number of fields. */ UNIV_INLINE ulint rec_get_converted_extra_size( @@ -934,12 +1268,22 @@ rec_get_converted_extra_size( { if (data_size <= REC_1BYTE_OFFS_LIMIT) { - return(REC_N_EXTRA_BYTES + n_fields); + return(REC_N_OLD_EXTRA_BYTES + n_fields); } - return(REC_N_EXTRA_BYTES + 2 * n_fields); + return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields); } +/************************************************************** +The following function returns the size of a data tuple when converted to +a new-style physical record. */ + +ulint +rec_get_converted_size_new( +/*=======================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple);/* in: data tuple */ /************************************************************** The following function returns the size of a data tuple when converted to a physical record. */ @@ -948,14 +1292,27 @@ ulint rec_get_converted_size( /*===================*/ /* out: size */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* dtuple) /* in: data tuple */ { ulint data_size; ulint extra_size; - + + ut_ad(index); ut_ad(dtuple); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(index->type & DICT_UNIVERSAL + || dtuple_get_n_fields(dtuple) == + (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) + == REC_STATUS_NODE_PTR) + ? dict_index_get_n_unique_in_tree(index) + 1 + : dict_index_get_n_fields(index))); + + if (index->table->comp) { + return(rec_get_converted_size_new(index, dtuple)); + } + data_size = dtuple_get_data_size(dtuple); extra_size = rec_get_converted_extra_size( @@ -971,12 +1328,15 @@ UNIV_INLINE ulint rec_fold( /*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - ulint n_fields, /* in: number of complete fields to fold */ - ulint n_bytes, /* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id) /* in: index tree id */ + /* out: the folded value */ + rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id) /* in: index tree id */ { ulint i; byte* data; @@ -984,12 +1344,13 @@ rec_fold( ulint fold; ulint n_fields_rec; - ut_ad(rec_validate(rec)); - ut_ad(n_fields <= rec_get_n_fields(rec)); - ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_validate((rec_t*) rec, offsets)); ut_ad(n_fields + n_bytes > 0); - - n_fields_rec = rec_get_n_fields(rec); + + n_fields_rec = rec_offs_n_fields(offsets); + ut_ad(n_fields <= n_fields_rec); + ut_ad(n_fields < n_fields_rec || n_bytes == 0); if (n_fields > n_fields_rec) { n_fields = n_fields_rec; @@ -1002,7 +1363,7 @@ rec_fold( fold = ut_fold_dulint(tree_id); for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { fold = ut_fold_ulint_pair(fold, @@ -1011,7 +1372,7 @@ rec_fold( } if (n_bytes > 0) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { if (len > n_bytes) { @@ -1025,19 +1386,3 @@ rec_fold( return(fold); } - -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -the address destination. */ -UNIV_INLINE -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple) /* in: data tuple */ -{ - return(rec_convert_dtuple_to_rec_low(destination, dtuple, - dtuple_get_data_size(dtuple))); -} diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index d4634482752..c23a0e025ad 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -177,10 +177,12 @@ row_lock_table_for_mysql( /* out: error code or DB_SUCCESS */ row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL table handle */ - dict_table_t* table); /* in: table to LOCK_IX, or NULL + dict_table_t* table, /* in: table to lock, or NULL if prebuilt->table should be locked as LOCK_TABLE_EXP | prebuilt->select_lock_type */ + ulint mode); /* in: lock mode of table */ + /************************************************************************* Does an insert for MySQL. */ @@ -547,7 +549,10 @@ struct row_prebuilt_struct { format */ ulint hint_need_to_fetch_extra_cols; /* normally this is set to 0; if this - is set to ROW_RETRIEVE_PRIMARY_KEY, + is set to ROW_RETRIEVE_PRIMARY_KEY + (that value is obsolete starting from + 5.0.2, because we always fetch the + primary key cols), then we should at least retrieve all columns in the primary key; if this is set to ROW_RETRIEVE_ALL_COLS, then @@ -620,6 +625,9 @@ struct row_prebuilt_struct { /* Values for hint_need_to_fetch_extra_cols */ #define ROW_RETRIEVE_PRIMARY_KEY 1 + /* value 1 is obsolete starting from + 5.0.2, because we always fetch the + primary key cols */ #define ROW_RETRIEVE_ALL_COLS 2 diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h index 951e211fb37..782973d8f5d 100644 --- a/innobase/include/row0row.h +++ b/innobase/include/row0row.h @@ -27,7 +27,8 @@ row_get_rec_trx_id( /*===============*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Reads the roll pointer field from a clustered index record. */ UNIV_INLINE @@ -36,7 +37,8 @@ row_get_rec_roll_ptr( /*=================*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Writes the trx id field to a clustered index record. */ UNIV_INLINE @@ -45,7 +47,8 @@ row_set_rec_trx_id( /*===============*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ - dulint trx_id); /* in: value of the field */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + dulint trx_id);/* in: value of the field */ /************************************************************************* Sets the roll pointer field in a clustered index record. */ UNIV_INLINE @@ -54,6 +57,7 @@ row_set_rec_roll_ptr( /*=================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint roll_ptr);/* in: value of the field */ /********************************************************************* When an insert to a table is performed, this function builds the entry which @@ -90,6 +94,9 @@ row_build( the buffer page of this record must be at least s-latched and the latch held as long as the row dtuple is used! */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) + or NULL, in which case this function + will invoke rec_get_offsets() */ mem_heap_t* heap); /* in: memory heap from which the memory needed is allocated */ /*********************************************************************** @@ -175,14 +182,15 @@ UNIV_INLINE void row_build_row_ref_fast( /*===================*/ - dtuple_t* ref, /* in: typed data tuple where the reference - is built */ - ulint* map, /* in: array of field numbers in rec telling - how ref should be built from the fields of - rec */ - rec_t* rec); /* in: record in the index; must be preserved - while ref is used, as we do not copy field - values to heap */ + dtuple_t* ref, /* in: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Searches the clustered index record for a row, if we have the row reference. */ diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic index 8e5121f5a96..85410beacf0 100644 --- a/innobase/include/row0row.ic +++ b/innobase/include/row0row.ic @@ -20,7 +20,8 @@ row_get_rec_sys_field( /* out: value of the field */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Sets the trx id or roll ptr field in a clustered index record: this function is slower than the specialized inline functions. */ @@ -32,6 +33,7 @@ row_set_rec_sys_field( ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint val); /* in: value to set */ /************************************************************************* @@ -42,18 +44,21 @@ row_get_rec_trx_id( /*===============*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { return(trx_read_trx_id(rec + offset)); } else { - return(row_get_rec_sys_field(DATA_TRX_ID, rec, index)); + return(row_get_rec_sys_field(DATA_TRX_ID, + rec, index, offsets)); } } @@ -65,18 +70,21 @@ row_get_rec_roll_ptr( /*=================*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); } else { - return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index)); + return(row_get_rec_sys_field(DATA_ROLL_PTR, + rec, index, offsets)); } } @@ -88,18 +96,21 @@ row_set_rec_trx_id( /*===============*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint trx_id) /* in: value of the field */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { trx_write_trx_id(rec + offset, trx_id); } else { - row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id); + row_set_rec_sys_field(DATA_TRX_ID, + rec, index, offsets, trx_id); } } @@ -111,18 +122,21 @@ row_set_rec_roll_ptr( /*=================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint roll_ptr)/* in: value of the field */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); } else { - row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr); + row_set_rec_sys_field(DATA_ROLL_PTR, + rec, index, offsets, roll_ptr); } } @@ -133,14 +147,15 @@ UNIV_INLINE void row_build_row_ref_fast( /*===================*/ - dtuple_t* ref, /* in: typed data tuple where the reference - is built */ - ulint* map, /* in: array of field numbers in rec telling - how ref should be built from the fields of - rec */ - rec_t* rec) /* in: record in the index; must be preserved - while ref is used, as we do not copy field - values to heap */ + dtuple_t* ref, /* in: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { dfield_t* dfield; byte* field; @@ -149,6 +164,7 @@ row_build_row_ref_fast( ulint field_no; ulint i; + ut_ad(rec_offs_validate(rec, NULL, offsets)); ref_len = dtuple_get_n_fields(ref); for (i = 0; i < ref_len; i++) { @@ -158,7 +174,8 @@ row_build_row_ref_fast( if (field_no != ULINT_UNDEFINED) { - field = rec_get_nth_field(rec, field_no, &len); + field = rec_get_nth_field(rec, offsets, + field_no, &len); dfield_set_data(dfield, field, len); } } diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 28210364833..673e0511153 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -80,6 +80,7 @@ row_upd_rec_sys_fields( /*===================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ trx_t* trx, /* in: transaction */ dulint roll_ptr);/* in: roll ptr of the undo log record */ /************************************************************************* @@ -124,8 +125,8 @@ row_upd_changes_field_size_or_external( /* out: TRUE if the update changes the size of some field in index or the field is external in rec or update */ - rec_t* rec, /* in: record in index */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update);/* in: update vector */ /*************************************************************** Replaces the new column values stored in the update vector to the record @@ -135,8 +136,9 @@ a clustered index */ void row_upd_rec_in_place( /*=================*/ - rec_t* rec, /* in/out: record where replaced */ - upd_t* update);/* in: update vector */ + rec_t* rec, /* in/out: record where replaced */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update);/* in: update vector */ /******************************************************************* Builds an update vector from those fields which in a secondary index entry differ from a record that has the equal ordering fields. NOTE: we compare @@ -274,10 +276,11 @@ recovery. */ void row_upd_rec_sys_fields_in_recovery( /*===============================*/ - rec_t* rec, /* in: record */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr);/* in: roll ptr of the undo log record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr);/* in: roll ptr of the undo log record */ /************************************************************************* Parses the log data written by row_upd_index_write_log. */ diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic index a124228a0de..e2d81a39cfa 100644 --- a/innobase/include/row0upd.ic +++ b/innobase/include/row0upd.ic @@ -106,15 +106,17 @@ row_upd_rec_sys_fields( /*===================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ trx_t* trx, /* in: transaction */ dulint roll_ptr)/* in: roll ptr of the undo log record */ { ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); #ifdef UNIV_SYNC_DEBUG ut_ad(!buf_block_align(rec)->is_hashed || rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - row_set_rec_trx_id(rec, index, trx->id); - row_set_rec_roll_ptr(rec, index, roll_ptr); + row_set_rec_trx_id(rec, index, offsets, trx->id); + row_set_rec_roll_ptr(rec, index, offsets, roll_ptr); } diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h index 30cf82144e9..0dd40fda65f 100644 --- a/innobase/include/row0vers.h +++ b/innobase/include/row0vers.h @@ -30,7 +30,8 @@ row_vers_impl_x_locked_off_kernel( transaction; NOTE that the kernel mutex is temporarily released! */ rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index); /* in: the secondary index */ + dict_index_t* index, /* in: the secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /********************************************************************* Finds out if we must preserve a delete marked earlier version of a clustered index record, because it is >= the purge view. */ diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic index 5ece47c35d1..ab1e264635b 100644 --- a/innobase/include/row0vers.ic +++ b/innobase/include/row0vers.ic @@ -11,73 +11,3 @@ Created 2/6/1997 Heikki Tuuri #include "read0read.h" #include "page0page.h" #include "log0recv.h" - -/************************************************************************* -Fetches the trx id of a clustered index record or version. */ -UNIV_INLINE -dulint -row_vers_get_trx_id( -/*================*/ - /* out: trx id or ut_dulint_zero if the - clustered index record not found */ - rec_t* rec, /* in: clustered index record, or an old - version of it */ - dict_table_t* table) /* in: table */ -{ - return(row_get_rec_trx_id(rec, dict_table_get_first_index(table))); -} - -/************************************************************************* -Checks if a consistent read can be performed immediately on the index -record, or if an older version is needed. */ -UNIV_INLINE -ibool -row_vers_clust_rec_sees_older( -/*==========================*/ - /* out: FALSE if can read immediately */ - rec_t* rec, /* in: record which should be read or passed - over by a read cursor */ - dict_index_t* index, /* in: clustered index */ - read_view_t* view) /* in: read view */ -{ - ut_ad(index->type & DICT_CLUSTERED); - - if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************* -Checks if a secondary index record can be read immediately by a consistent -read, or if an older version may be needed. To be sure, we will have to -look in the clustered index. */ -UNIV_INLINE -ibool -row_vers_sec_rec_may_see_older( -/*===========================*/ - /* out: FALSE if can be read immediately */ - rec_t* rec, /* in: record which should be read or passed */ - dict_index_t* index __attribute__((unused)),/* in: secondary index */ - read_view_t* view) /* in: read view */ -{ - page_t* page; - - ut_ad(!(index->type & DICT_CLUSTERED)); - - page = buf_frame_align(rec); - - if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0) - || recv_recovery_is_on()) { - - /* It may be that the record was inserted or modified by a - transaction the view should not see: we have to look in the - clustered index */ - - return(TRUE); - } - - return(FALSE); -} diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index b9963d93265..d4cc7d8222f 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -538,6 +538,10 @@ struct srv_sys_struct{ srv_table_t* threads; /* server thread table */ UT_LIST_BASE_NODE_T(que_thr_t) tasks; /* task queue */ + dict_index_t* dummy_ind1; /* dummy index for old-style + supremum and infimum records */ + dict_index_t* dummy_ind2; /* dummy index for new-style + supremum and infimum records */ }; extern ulint srv_n_threads_active[]; diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h index 9d7f41cd94e..4387ce1a61e 100644 --- a/innobase/include/trx0rec.h +++ b/innobase/include/trx0rec.h @@ -246,6 +246,7 @@ trx_undo_prev_version_build( index_rec page and purge_view */ rec_t* rec, /* in: version of a clustered index record */ dict_index_t* index, /* in: clustered index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) */ mem_heap_t* heap, /* in: memory heap from which the memory needed is allocated */ rec_t** old_vers);/* out, own: previous version, or NULL if diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h index 6004551f456..893e5af6c01 100644 --- a/innobase/include/trx0roll.h +++ b/innobase/include/trx0roll.h @@ -104,11 +104,12 @@ trx_rollback( /*********************************************************************** Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert -undo log. If the transaction was not yet committed, then we roll it back. */ +undo log. If the transaction was not yet committed, then we roll it back. +Note: this is done in a background thread */ -void -trx_rollback_or_clean_all_without_sess(void); -/*========================================*/ +void * +trx_rollback_or_clean_all_without_sess(void *); +/*============================================*/ /******************************************************************** Finishes a transaction rollback. */ diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index 602291f946a..8eb71dac763 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -16,6 +16,7 @@ Created 3/26/1996 Heikki Tuuri #include "que0types.h" #include "mem0mem.h" #include "read0types.h" +#include "trx0xa.h" extern ulint trx_n_mysql_transactions; @@ -156,6 +157,36 @@ trx_commit_for_mysql( /*=================*/ /* out: 0 or error number */ trx_t* trx); /* in: trx handle */ + +/************************************************************************** +Does the transaction prepare for MySQL. */ + +ulint +trx_prepare_for_mysql( +/*=================*/ + /* out: 0 or error number */ + trx_t* trx); /* in: trx handle */ + +/************************************************************************** +This function is used to find number of prepared transactions and +their transaction objects for a recovery. */ + +int +trx_recover_for_mysql( +/*=================*/ + /* out: number of prepared transactions */ + XID* xid_list, /* in/out: prepared transactions */ + uint len); /* in: number of slots in xid_list */ + +/*********************************************************************** +This function is used to commit one X/Open XA distributed transaction +which is in the prepared state */ +trx_t * +trx_get_trx_by_xid( +/*===============*/ + /* out: trx or NULL */ + XID* xid); /* in: X/Open XA Transaction Idenfication */ + /************************************************************************** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. */ @@ -339,6 +370,9 @@ struct trx_struct{ if we can use the insert buffer for them, we set this FALSE */ dulint id; /* transaction id */ + XID xid; /* X/Open XA transaction + identification to identify a + transaction branch */ dulint no; /* transaction serialization number == max trx id when the transaction is moved to COMMITTED_IN_MEMORY state */ @@ -353,8 +387,10 @@ struct trx_struct{ dulint table_id; /* table id if the preceding field is TRUE */ /*------------------------------*/ - void* mysql_thd; /* MySQL thread handle corresponding - to this trx, or NULL */ + int active_trans; /* whether a transaction in MySQL + is active */ + void* mysql_thd; /* MySQL thread handle corresponding + to this trx, or NULL */ char** mysql_query_str;/* pointer to the field in mysqld_thd which contains the pointer to the current SQL query string */ @@ -543,6 +579,7 @@ struct trx_struct{ #define TRX_NOT_STARTED 1 #define TRX_ACTIVE 2 #define TRX_COMMITTED_IN_MEMORY 3 +#define TRX_PREPARED 4 /* Support for 2PC/XA */ /* Transaction execution states when trx state is TRX_ACTIVE */ #define TRX_QUE_RUNNING 1 /* transaction is running */ diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h index 20002076cc3..fce62e46046 100644 --- a/innobase/include/trx0undo.h +++ b/innobase/include/trx0undo.h @@ -14,6 +14,7 @@ Created 3/26/1996 Heikki Tuuri #include "mtr0mtr.h" #include "trx0sys.h" #include "page0types.h" +#include "trx0xa.h" /*************************************************************************** Builds a roll pointer dulint. */ @@ -36,7 +37,7 @@ trx_undo_decode_roll_ptr( ibool* is_insert, /* out: TRUE if insert undo log */ ulint* rseg_id, /* out: rollback segment id */ ulint* page_no, /* out: page number */ - ulint* offset); /* out: offset of the undo entry within page */ + ulint* offset); /* out: offset of the undo entry within page */ /*************************************************************************** Returns TRUE if the roll pointer is of the insert type. */ UNIV_INLINE @@ -239,6 +240,18 @@ trx_undo_set_state_at_finish( trx_t* trx, /* in: transaction */ trx_undo_t* undo, /* in: undo log memory copy */ mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Sets the state of the undo log segment at a transaction prepare. */ + +page_t* +trx_undo_set_state_at_prepare( +/*==========================*/ + /* out: undo log segment header page, + x-latched */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + mtr_t* mtr); /* in: mtr */ + /************************************************************************** Adds the update undo log header as the first in the history list, and frees the memory object, or puts it to the list of cached update undo log @@ -294,7 +307,23 @@ trx_undo_parse_discard_latest( byte* end_ptr,/* in: buffer end */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ +/************************************************************************ +Write X/Open XA Transaction Identification (XID) to undo log header */ +void +trx_undo_write_xid( +/*===============*/ + trx_ulogf_t* log_hdr,/* in: undo log header */ + XID* xid); /* in: X/Open XA Transaction Identification */ + +/************************************************************************ +Read X/Open XA Transaction Identification (XID) from undo log header */ + +void +trx_undo_read_xid( +/*==============*/ + trx_ulogf_t* log_hdr,/* in: undo log header */ + XID* xid); /* out: X/Open XA Transaction Identification */ /* Types of an undo log segment */ #define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */ @@ -310,6 +339,8 @@ trx_undo_parse_discard_latest( #define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be reused: it can be freed in purge when all undo data in it is removed */ +#define TRX_UNDO_PREPARED 5 /* contains an undo log of an + prepared transaction */ /* Transaction undo log memory object; this is protected by the undo_mutex in the corresponding transaction object */ @@ -332,6 +363,8 @@ struct trx_undo_struct{ field */ dulint trx_id; /* id of the trx assigned to the undo log */ + XID xid; /* X/Open XA transaction + identification */ ibool dict_operation; /* TRUE if a dict operation trx */ dulint table_id; /* if a dict operation, then the table id */ @@ -436,7 +469,10 @@ page of an update undo log segment. */ log start, and therefore this is not necessarily the same as this log header end offset */ -#define TRX_UNDO_DICT_OPERATION 20 /* TRUE if the transaction is a table +#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes + X/Open XA transaction identification + XID */ +#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table create, index create, or drop transaction: in recovery the transaction cannot be rolled back @@ -452,7 +488,17 @@ page of an update undo log segment. */ #define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history list, the file list node is here */ /*-------------------------------------------------------------*/ -#define TRX_UNDO_LOG_HDR_SIZE (34 + FLST_NODE_SIZE) +/* X/Open XA Transaction Identification (XID) */ + +#define TRX_UNDO_XA_FORMAT (34 + FLST_NODE_SIZE) +#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4) +#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4) +#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4) +#define TRX_UNDO_XA_LEN (TRX_UNDO_XA_XID + XIDDATASIZE) + +/*-------------------------------------------------------------*/ +#define TRX_UNDO_LOG_HDR_SIZE (TRX_UNDO_XA_LEN) +/*-------------------------------------------------------------*/ #ifndef UNIV_NONINL #include "trx0undo.ic" diff --git a/innobase/include/trx0xa.h b/innobase/include/trx0xa.h new file mode 100644 index 00000000000..34b7a2f95a8 --- /dev/null +++ b/innobase/include/trx0xa.h @@ -0,0 +1,182 @@ +/* + * Start of xa.h header + * + * Define a symbol to prevent multiple inclusions of this header file + */ +#ifndef XA_H +#define XA_H + +/* + * Transaction branch identification: XID and NULLXID: + */ +#ifndef XIDDATASIZE + +#define XIDDATASIZE 128 /* size in bytes */ +#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */ +#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */ + +struct xid_t { + long formatID; /* format identifier */ + long gtrid_length; /* value from 1 through 64 */ + long bqual_length; /* value from 1 through 64 */ + char data[XIDDATASIZE]; +}; +typedef struct xid_t XID; +#endif +/* + * A value of -1 in formatID means that the XID is null. + */ + + +#ifdef NOTDEFINED +/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */ + +/* + * Declarations of routines by which RMs call TMs: + */ +extern int ax_reg __P((int, XID *, long)); +extern int ax_unreg __P((int, long)); + +/* + * XA Switch Data Structure + */ +#define RMNAMESZ 32 /* length of resource manager name, */ + /* including the null terminator */ +#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */ + /* strings, including the null + terminator */ + + +struct xa_switch_t { + char name[RMNAMESZ]; /* name of resource manager */ + long flags; /* resource manager specific options */ + long version; /* must be 0 */ + int (*xa_open_entry) /* xa_open function pointer */ + __P((char *, int, long)); + int (*xa_close_entry) /* xa_close function pointer */ + __P((char *, int, long)); + int (*xa_start_entry) /* xa_start function pointer */ + __P((XID *, int, long)); + int (*xa_end_entry) /* xa_end function pointer */ + __P((XID *, int, long)); + int (*xa_rollback_entry) /* xa_rollback function pointer */ + __P((XID *, int, long)); + int (*xa_prepare_entry) /* xa_prepare function pointer */ + __P((XID *, int, long)); + int (*xa_commit_entry) /* xa_commit function pointer */ + __P((XID *, int, long)); + int (*xa_recover_entry) /* xa_recover function pointer */ + __P((XID *, long, int, long)); + int (*xa_forget_entry) /* xa_forget function pointer */ + __P((XID *, int, long)); + int (*xa_complete_entry) /* xa_complete function pointer */ + __P((int *, int *, int, long)); +}; +#endif /* NOTDEFINED */ + + +/* + * Flag definitions for the RM switch + */ +#define TMNOFLAGS 0x00000000L /* no resource manager features + selected */ +#define TMREGISTER 0x00000001L /* resource manager dynamically + registers */ +#define TMNOMIGRATE 0x00000002L /* resource manager does not support + association migration */ +#define TMUSEASYNC 0x00000004L /* resource manager supports + asynchronous operations */ +/* + * Flag definitions for xa_ and ax_ routines + */ +/* use TMNOFLAGGS, defined above, when not specifying other flags */ +#define TMASYNC 0x80000000L /* perform routine asynchronously */ +#define TMONEPHASE 0x40000000L /* caller is using one-phase commit + optimisation */ +#define TMFAIL 0x20000000L /* dissociates caller and marks + transaction branch rollback-only */ +#define TMNOWAIT 0x10000000L /* return if blocking condition + exists */ +#define TMRESUME 0x08000000L /* caller is resuming association with + suspended transaction branch */ +#define TMSUCCESS 0x04000000L /* dissociate caller from transaction + branch */ +#define TMSUSPEND 0x02000000L /* caller is suspending, not ending, + association */ +#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */ +#define TMENDRSCAN 0x00800000L /* end a recovery scan */ +#define TMMULTIPLE 0x00400000L /* wait for any asynchronous + operation */ +#define TMJOIN 0x00200000L /* caller is joining existing + transaction branch */ +#define TMMIGRATE 0x00100000L /* caller intends to perform + migration */ + +/* + * ax_() return codes (transaction manager reports to resource manager) + */ +#define TM_JOIN 2 /* caller is joining existing + transaction branch */ +#define TM_RESUME 1 /* caller is resuming association with + suspended transaction branch */ +#define TM_OK 0 /* normal execution */ +#define TMER_TMERR -1 /* an error occurred in the transaction + manager */ +#define TMER_INVAL -2 /* invalid arguments were given */ +#define TMER_PROTO -3 /* routine invoked in an improper + context */ + +/* + * xa_() return codes (resource manager reports to transaction manager) + */ +#define XA_RBBASE 100 /* The inclusive lower bound of the + rollback codes */ +#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an + unspecified reason */ +#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a + communication failure */ +#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */ +#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the + integrity of the resources was + detected */ +#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the + transaction branch for a reason not + on this list */ +#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the + resource manager */ +#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took too long */ +#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */ +#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the + rollback codes */ +#define XA_NOMIGRATE 9 /* resumption must occur where + suspension occurred */ +#define XA_HEURHAZ 8 /* the transaction branch may have + been heuristically completed */ +#define XA_HEURCOM 7 /* the transaction branch has been + heuristically committed */ +#define XA_HEURRB 6 /* the transaction branch has been + heuristically rolled back */ +#define XA_HEURMIX 5 /* the transaction branch has been + heuristically committed and rolled + back */ +#define XA_RETRY 4 /* routine returned with no effect and + may be re-issued */ +#define XA_RDONLY 3 /* the transaction branch was read-only + and has been committed */ +#define XA_OK 0 /* normal execution */ +#define XAER_ASYNC -2 /* asynchronous operation already + outstanding */ +#define XAER_RMERR -3 /* a resource manager error occurred in + the transaction branch */ +#define XAER_NOTA -4 /* the XID is not valid */ +#define XAER_INVAL -5 /* invalid arguments were given */ +#define XAER_PROTO -6 /* routine invoked in an improper + context */ +#define XAER_RMFAIL -7 /* resource manager unavailable */ +#define XAER_DUPID -8 /* the XID already exists */ +#define XAER_OUTSIDE -9 /* resource manager doing work outside + transaction */ +#endif /* ifndef XA_H */ +/* + * End of xa.h header + */ diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h index a62c2e2e318..22d488abeaf 100644 --- a/innobase/include/ut0byte.h +++ b/innobase/include/ut0byte.h @@ -208,7 +208,20 @@ ut_align_down( /*==========*/ /* out: aligned pointer */ void* ptr, /* in: pointer */ - ulint align_no); /* in: align by this number */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*==========*/ + /* out: distance from aligned + pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); /********************************************************************* Gets the nth bit of a ulint. */ UNIV_INLINE diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic index 5a70dcf12a8..e141de3aa3f 100644 --- a/innobase/include/ut0byte.ic +++ b/innobase/include/ut0byte.ic @@ -335,6 +335,27 @@ ut_align_down( return((void*)((((ulint)ptr)) & ~(align_no - 1))); } +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*============*/ + /* out: distance from + aligned pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ +{ + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + ut_ad(ptr); + + ut_ad(sizeof(void*) == sizeof(ulint)); + + return(((ulint)ptr) & (align_no - 1)); +} + /********************************************************************* Gets the nth bit of a ulint. */ UNIV_INLINE diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index 479952235f0..d2d16a1ae4e 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -365,6 +365,21 @@ lock_deadlock_recursive( ulint* cost); /* in/out: number of calculation steps thus far: if this exceeds LOCK_MAX_N_STEPS_... we return TRUE */ + +/************************************************************************* +Gets the type of a lock. */ +UNIV_INLINE +ulint +lock_get_type( +/*==========*/ + /* out: LOCK_TABLE or LOCK_REC */ + lock_t* lock) /* in: lock */ +{ + ut_ad(lock); + + return(lock->type_mode & LOCK_TYPE_MASK); +} + /************************************************************************* Gets the nth bit of a record lock. */ UNIV_INLINE @@ -395,19 +410,6 @@ lock_rec_get_nth_bit( return(ut_bit_get_nth(b, bit_index)); } -/************************************************************************* -Gets the table covered by an IX table lock. */ - -dict_table_t* -lock_get_ix_table( -/*==============*/ - /* out: the table covered by the lock */ - lock_t* lock) /* in: table lock */ -{ - ut_a(lock->type_mode == (LOCK_TABLE | LOCK_IX)); - return(lock->un_member.tab_lock.table); -} - /*************************************************************************/ #define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex) @@ -423,11 +425,14 @@ lock_check_trx_id_sanity( dulint trx_id, /* in: trx id */ rec_t* rec, /* in: user record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ ibool has_kernel_mutex)/* in: TRUE if the caller owns the kernel mutex */ { ibool is_ok = TRUE; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (!has_kernel_mutex) { mutex_enter(&kernel_mutex); } @@ -440,7 +445,7 @@ lock_check_trx_id_sanity( fputs(" InnoDB: Error: transaction id associated" " with record\n", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); fputs("InnoDB: in ", stderr); dict_index_name_print(stderr, NULL, index); fprintf(stderr, "\n" @@ -472,18 +477,20 @@ lock_clust_rec_cons_read_sees( rec_t* rec, /* in: user record which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ read_view_t* view) /* in: consistent read view */ { dulint trx_id; ut_ad(index->type & DICT_CLUSTERED); ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); /* NOTE that we call this function while holding the search system latch. To obey the latching order we must NOT reserve the kernel mutex here! */ - trx_id = row_get_rec_trx_id(rec, index); + trx_id = row_get_rec_trx_id(rec, index, offsets); if (read_view_sees_trx_id(view, trx_id)) { @@ -581,20 +588,6 @@ lock_get_mode( return(lock->type_mode & LOCK_MODE_MASK); } -/************************************************************************* -Gets the type of a lock. */ -UNIV_INLINE -ulint -lock_get_type( -/*==========*/ - /* out: LOCK_TABLE or LOCK_REC */ - lock_t* lock) /* in: lock */ -{ - ut_ad(lock); - - return(lock->type_mode & LOCK_TYPE_MASK); -} - /************************************************************************* Gets the wait flag of a lock. */ UNIV_INLINE @@ -614,6 +607,128 @@ lock_get_wait( return(FALSE); } +/************************************************************************* +Gets the source table of an ALTER TABLE transaction. The table must be +covered by an IX or IS table lock. */ + +dict_table_t* +lock_get_src_table( +/*===============*/ + /* out: the source table of transaction, + if it is covered by an IX or IS table lock; + dest if there is no source table, and + NULL if the transaction is locking more than + two tables or an inconsistency is found */ + trx_t* trx, /* in: transaction */ + dict_table_t* dest, /* in: destination of ALTER TABLE */ + ulint* mode) /* out: lock mode of the source table */ +{ + dict_table_t* src; + lock_t* lock; + + src = NULL; + *mode = LOCK_NONE; + + for (lock = UT_LIST_GET_FIRST(trx->trx_locks); + lock; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + lock_table_t* tab_lock; + ulint lock_mode; + if (!(lock_get_type(lock) & LOCK_TABLE)) { + /* We are only interested in table locks. */ + continue; + } + tab_lock = &lock->un_member.tab_lock; + if (dest == tab_lock->table) { + /* We are not interested in the destination table. */ + continue; + } else if (!src) { + /* This presumably is the source table. */ + src = tab_lock->table; + if (UT_LIST_GET_LEN(src->locks) != 1 || + UT_LIST_GET_FIRST(src->locks) != lock) { + /* We only support the case when + there is only one lock on this table. */ + return(NULL); + } + } else if (src != tab_lock->table) { + /* The transaction is locking more than + two tables (src and dest): abort */ + return(NULL); + } + + /* Check that the source table is locked by + LOCK_IX or LOCK_IS. */ + lock_mode = lock_get_mode(lock); + switch (lock_mode) { + case LOCK_IX: + case LOCK_IS: + if (*mode != LOCK_NONE && *mode != lock_mode) { + /* There are multiple locks on src. */ + return(NULL); + } + *mode = lock_mode; + break; + } + } + + if (!src) { + /* No source table lock found: flag the situation to caller */ + src = dest; + } + + return(src); +} + +/************************************************************************* +Determine if the given table is exclusively "owned" by the given +transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC +on the table. */ + +ibool +lock_is_table_exclusive( +/*====================*/ + /* out: TRUE if table is only locked by trx, + with LOCK_IX, and possibly LOCK_AUTO_INC */ + dict_table_t* table, /* in: table */ + trx_t* trx) /* in: transaction */ +{ + lock_t* lock; + bool ok = FALSE; + + ut_ad(table && trx); + + for (lock = UT_LIST_GET_FIRST(table->locks); + lock; + lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) { + if (lock->trx != trx) { + /* A lock on the table is held + by some other transaction. */ + return(FALSE); + } + + if (!(lock_get_type(lock) & LOCK_TABLE)) { + /* We are interested in table locks only. */ + continue; + } + + switch (lock_get_mode(lock)) { + case LOCK_IX: + ok = TRUE; + break; + case LOCK_AUTO_INC: + /* It is allowed for trx to hold an + auto_increment lock. */ + break; + default: + /* Other table locks than LOCK_IX are not allowed. */ + return(FALSE); + } + } + + return(ok); +} + /************************************************************************* Sets the wait flag of a lock and the back pointer in trx to lock. */ UNIV_INLINE @@ -1146,6 +1261,7 @@ lock_rec_get_next( /*==============*/ /* out: next lock, NULL if none exists */ rec_t* rec, /* in: record on a page */ + ibool comp, /* in: TRUE=compact page format */ lock_t* lock) /* in: lock */ { #ifdef UNIV_SYNC_DEBUG @@ -1161,7 +1277,7 @@ lock_rec_get_next( return(NULL); } - if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) { + if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec, comp))) { return(lock); } @@ -1178,15 +1294,17 @@ lock_rec_get_first( rec_t* rec) /* in: record on a page */ { lock_t* lock; + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first_on_page(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock) { - if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) { + if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec, comp))) { break; } @@ -1353,6 +1471,7 @@ lock_rec_has_expl( for a supremum record we regard this always a gap type request */ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ trx_t* trx) /* in: transaction */ { lock_t* lock; @@ -1382,7 +1501,7 @@ lock_rec_has_expl( return(lock); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } return(NULL); @@ -1401,6 +1520,7 @@ lock_rec_other_has_expl_req( ulint wait, /* in: LOCK_WAIT if also waiting locks are taken into account, or 0 if not */ rec_t* rec, /* in: record to look at */ + ibool comp, /* in: TRUE=compact record format */ trx_t* trx) /* in: transaction, or NULL if requests by all transactions are taken into account */ { @@ -1425,7 +1545,7 @@ lock_rec_other_has_expl_req( return(lock); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } return(NULL); @@ -1446,12 +1566,13 @@ lock_rec_other_has_conflicting( trx_t* trx) /* in: our transaction */ { lock_t* lock; - + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock) { if (lock_rec_has_to_wait(trx, mode, lock, @@ -1460,7 +1581,7 @@ lock_rec_other_has_conflicting( return(lock); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } return(NULL); @@ -1486,8 +1607,7 @@ lock_rec_find_similar_on_page( ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ - heap_no = rec_get_heap_no(rec); - + heap_no = rec_get_heap_no(rec, page_is_comp(buf_frame_align(rec))); lock = lock_rec_get_first_on_page(rec); while (lock != NULL) { @@ -1514,7 +1634,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index) /* in: secondary index */ + dict_index_t* index, /* in: secondary index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { page_t* page; @@ -1523,6 +1644,7 @@ lock_sec_rec_some_has_impl_off_kernel( #endif /* UNIV_SYNC_DEBUG */ ut_ad(!(index->type & DICT_CLUSTERED)); ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); page = buf_frame_align(rec); @@ -1542,8 +1664,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* Ok, in this case it is possible that some transaction has an implicit x-lock. We have to look in the clustered index. */ - if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), rec, index, - TRUE)) { + if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), + rec, index, offsets, TRUE)) { buf_page_print(page); /* The page is corrupt: try to avoid a crash by returning @@ -1551,7 +1673,7 @@ lock_sec_rec_some_has_impl_off_kernel( return(NULL); } - return(row_vers_impl_x_locked_off_kernel(rec, index)); + return(row_vers_impl_x_locked_off_kernel(rec, index, offsets)); } /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/ @@ -1585,7 +1707,7 @@ lock_rec_create( page = buf_frame_align(rec); space = buf_frame_get_space_id(page); page_no = buf_frame_get_page_no(page); - heap_no = rec_get_heap_no(rec); + heap_no = rec_get_heap_no(rec, page_is_comp(page)); /* If rec is the supremum record, then we reset the gap and LOCK_REC_NOT_GAP bits, as all locks on the supremum are @@ -1598,8 +1720,7 @@ lock_rec_create( } /* Make lock bitmap bigger by a safety margin */ - n_bits = page_header_get_field(page, PAGE_N_HEAP) - + LOCK_PAGE_BITMAP_MARGIN; + n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN; n_bytes = 1 + n_bits / 8; lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes); @@ -1704,7 +1825,8 @@ lock_rec_enqueue_waiting( if (lock_deadlock_occurs(lock, trx)) { lock_reset_lock_and_trx_wait(lock); - lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec)); + lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec, + page_is_comp(buf_frame_align(rec)))); return(DB_DEADLOCK); } @@ -1754,7 +1876,7 @@ lock_rec_add_to_queue( lock_t* lock; lock_t* similar_lock = NULL; ulint heap_no; - page_t* page; + page_t* page = buf_frame_align(rec); ibool somebody_waits = FALSE; #ifdef UNIV_SYNC_DEBUG @@ -1762,15 +1884,15 @@ lock_rec_add_to_queue( #endif /* UNIV_SYNC_DEBUG */ ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) || ((type_mode & LOCK_MODE_MASK) != LOCK_S) - || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, rec, trx)); + || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, + rec, page_is_comp(page), trx)); ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) || ((type_mode & LOCK_MODE_MASK) != LOCK_X) - || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, rec, trx)); + || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, + rec, page_is_comp(page), trx)); type_mode = type_mode | LOCK_REC; - page = buf_frame_align(rec); - /* If rec is the supremum record, then we can reset the gap bit, as all locks on the supremum are automatically of the gap type, and we try to avoid unnecessary memory consumption of a new record lock @@ -1787,7 +1909,7 @@ lock_rec_add_to_queue( /* Look for a waiting lock request on the same record or on a gap */ - heap_no = rec_get_heap_no(rec); + heap_no = rec_get_heap_no(rec, page_is_comp(page)); lock = lock_rec_get_first_on_page(rec); while (lock != NULL) { @@ -1862,7 +1984,7 @@ lock_rec_lock_fast( || mode - (LOCK_MODE_MASK & mode) == 0 || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); - heap_no = rec_get_heap_no(rec); + heap_no = rec_get_heap_no(rec, page_is_comp(buf_frame_align(rec))); lock = lock_rec_get_first_on_page(rec); @@ -1943,7 +2065,8 @@ lock_rec_lock_slow( trx = thr_get_trx(thr); - if (lock_rec_has_expl(mode, rec, trx)) { + if (lock_rec_has_expl(mode, rec, + page_is_comp(buf_frame_align(rec)), trx)) { /* The trx already has a strong enough lock on rec: do nothing */ @@ -2259,12 +2382,14 @@ lock_rec_reset_and_release_wait( { lock_t* lock; ulint heap_no; - + ibool comp; + #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ - heap_no = rec_get_heap_no(rec); + comp = page_is_comp(buf_frame_align(rec)); + heap_no = rec_get_heap_no(rec, comp); lock = lock_rec_get_first(rec); @@ -2275,7 +2400,7 @@ lock_rec_reset_and_release_wait( lock_rec_reset_nth_bit(lock, heap_no); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } } @@ -2293,12 +2418,13 @@ lock_rec_inherit_to_gap( the locks on this record */ { lock_t* lock; - + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock != NULL) { if (!lock_rec_get_insert_intention(lock)) { @@ -2308,7 +2434,7 @@ lock_rec_inherit_to_gap( heir, lock->index, lock->trx); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } } @@ -2325,12 +2451,13 @@ lock_rec_inherit_to_gap_if_gap_lock( the locks on this record */ { lock_t* lock; - + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock != NULL) { if (!lock_rec_get_insert_intention(lock) @@ -2342,7 +2469,7 @@ lock_rec_inherit_to_gap_if_gap_lock( heir, lock->index, lock->trx); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } } @@ -2355,7 +2482,8 @@ lock_rec_move( /*==========*/ rec_t* receiver, /* in: record which gets locks; this record must have no lock requests on it! */ - rec_t* donator) /* in: record which gives locks */ + rec_t* donator, /* in: record which gives locks */ + ibool comp) /* in: TRUE=compact page format */ { lock_t* lock; ulint heap_no; @@ -2365,7 +2493,7 @@ lock_rec_move( ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ - heap_no = rec_get_heap_no(donator); + heap_no = rec_get_heap_no(donator, comp); lock = lock_rec_get_first(donator); @@ -2385,7 +2513,7 @@ lock_rec_move( lock_rec_add_to_queue(type_mode, receiver, lock->index, lock->trx); - lock = lock_rec_get_next(donator, lock); + lock = lock_rec_get_next(donator, comp, lock); } ut_ad(lock_rec_get_first(donator) == NULL); @@ -2411,6 +2539,7 @@ lock_move_reorganize_page( UT_LIST_BASE_NODE_T(lock_t) old_locks; mem_heap_t* heap = NULL; rec_t* sup; + ibool comp; lock_mutex_enter_kernel(); @@ -2451,6 +2580,9 @@ lock_move_reorganize_page( lock = UT_LIST_GET_FIRST(old_locks); + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(old_page)); + while (lock) { /* NOTE: we copy also the locks set on the infimum and supremum of the page; the infimum may carry locks if an @@ -2462,12 +2594,12 @@ lock_move_reorganize_page( /* Set locks according to old locks */ for (;;) { - ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), page_cur_get_rec(&cur2), - rec_get_data_size( + rec_get_data_size_old( page_cur_get_rec(&cur2)))); - - old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2)); + old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2), + comp); if (lock_rec_get_nth_bit(lock, old_heap_no)) { @@ -2526,6 +2658,7 @@ lock_move_rec_list_end( ulint heap_no; rec_t* sup; ulint type_mode; + ibool comp; lock_mutex_enter_kernel(); @@ -2539,6 +2672,8 @@ lock_move_rec_list_end( lock = lock_rec_get_first_on_page(page); + comp = page_is_comp(page); + while (lock != NULL) { page_cur_position(rec, &cur1); @@ -2554,13 +2689,12 @@ lock_move_rec_list_end( reset the lock bits on the old */ while (page_cur_get_rec(&cur1) != sup) { - - ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), page_cur_get_rec(&cur2), - rec_get_data_size( + rec_get_data_size_old( page_cur_get_rec(&cur2)))); - - heap_no = rec_get_heap_no(page_cur_get_rec(&cur1)); + heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), + comp); if (lock_rec_get_nth_bit(lock, heap_no)) { type_mode = lock->type_mode; @@ -2610,12 +2744,15 @@ lock_move_rec_list_start( page_cur_t cur2; ulint heap_no; ulint type_mode; + ibool comp; ut_a(new_page); lock_mutex_enter_kernel(); lock = lock_rec_get_first_on_page(page); + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(new_page)); while (lock != NULL) { @@ -2629,13 +2766,12 @@ lock_move_rec_list_start( reset the lock bits on the old */ while (page_cur_get_rec(&cur1) != rec) { - - ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), page_cur_get_rec(&cur2), - rec_get_data_size( + rec_get_data_size_old( page_cur_get_rec(&cur2)))); - - heap_no = rec_get_heap_no(page_cur_get_rec(&cur1)); + heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), + comp); if (lock_rec_get_nth_bit(lock, heap_no)) { type_mode = lock->type_mode; @@ -2675,13 +2811,16 @@ lock_update_split_right( page_t* right_page, /* in: right page */ page_t* left_page) /* in: left page */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(left_page); + ut_ad(comp == page_is_comp(right_page)); + /* Move the locks on the supremum of the left page to the supremum of the right page */ lock_rec_move(page_get_supremum_rec(right_page), - page_get_supremum_rec(left_page)); + page_get_supremum_rec(left_page), comp); /* Inherit the locks to the supremum of left page from the successor of the infimum on right page */ @@ -2735,13 +2874,16 @@ lock_update_root_raise( page_t* new_page, /* in: index page to which copied */ page_t* root) /* in: root page */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(root); + ut_ad(comp == page_is_comp(new_page)); + /* Move the locks on the supremum of the root to the supremum of new_page */ lock_rec_move(page_get_supremum_rec(new_page), - page_get_supremum_rec(root)); + page_get_supremum_rec(root), comp); lock_mutex_exit_kernel(); } @@ -2755,13 +2897,16 @@ lock_update_copy_and_discard( page_t* new_page, /* in: index page to which copied */ page_t* page) /* in: index page; NOT the root! */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(new_page)); + /* Move the locks on the supremum of the old page to the supremum of new_page */ lock_rec_move(page_get_supremum_rec(new_page), - page_get_supremum_rec(page)); + page_get_supremum_rec(page), comp); lock_rec_free_all_from_discard_page(page); lock_mutex_exit_kernel(); @@ -2799,8 +2944,11 @@ lock_update_merge_left( page_t* right_page) /* in: merged index page which will be discarded */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(left_page); + ut_ad(comp == page_is_comp(right_page)); + if (page_rec_get_next(orig_pred) != page_get_supremum_rec(left_page)) { /* Inherit the locks on the supremum of the left page to the @@ -2820,7 +2968,7 @@ lock_update_merge_left( of the left page */ lock_rec_move(page_get_supremum_rec(left_page), - page_get_supremum_rec(right_page)); + page_get_supremum_rec(right_page), comp); lock_rec_free_all_from_discard_page(right_page); @@ -2947,12 +3095,14 @@ lock_rec_store_on_page_infimum( bits are reset on the record */ { page_t* page; + ibool comp; page = buf_frame_align(rec); + comp = page_is_comp(page); lock_mutex_enter_kernel(); - lock_rec_move(page_get_infimum_rec(page), rec); + lock_rec_move(page_get_infimum_rec(page), rec, comp); lock_mutex_exit_kernel(); } @@ -2969,9 +3119,12 @@ lock_rec_restore_from_page_infimum( whose infimum stored the lock state; lock bits are reset on the infimum */ { + ibool comp; lock_mutex_enter_kernel(); - - lock_rec_move(rec, page_get_infimum_rec(page)); + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(buf_frame_align(rec))); + + lock_rec_move(rec, page_get_infimum_rec(page), comp); lock_mutex_exit_kernel(); } @@ -3915,11 +4068,15 @@ lock_rec_print( FILE* file, /* in: file where to print */ lock_t* lock) /* in: record type lock */ { - page_t* page; - ulint space; - ulint page_no; - ulint i; - mtr_t mtr; + page_t* page; + ulint space; + ulint page_no; + ulint i; + mtr_t mtr; + mem_heap_t* heap; + ulint* offsets = NULL; + + heap = mem_heap_create(100); #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); @@ -3998,8 +4155,11 @@ lock_rec_print( fprintf(file, "Record lock, heap no %lu ", (ulong) i); if (page) { - rec_print(file, - page_find_rec_with_heap_no(page, i)); + rec_t* rec + = page_find_rec_with_heap_no(page, i); + offsets = rec_reget_offsets(rec, lock->index, + offsets, ULINT_UNDEFINED, heap); + rec_print(file, rec, offsets); } putc('\n', file); @@ -4007,6 +4167,7 @@ lock_rec_print( } mtr_commit(&mtr); + mem_heap_free(heap); } /************************************************************************* @@ -4284,12 +4445,16 @@ lock_rec_queue_validate( /*====================*/ /* out: TRUE if ok */ rec_t* rec, /* in: record to look at */ - dict_index_t* index) /* in: index, or NULL if not known */ + dict_index_t* index, /* in: index, or NULL if not known */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { trx_t* impl_trx; lock_t* lock; - + ibool comp; + ut_a(rec); + ut_ad(rec_offs_validate(rec, index, offsets)); + comp = page_is_comp(buf_frame_align(rec)); lock_mutex_enter_kernel(); @@ -4312,7 +4477,7 @@ lock_rec_queue_validate( ut_a(lock->index == index); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } lock_mutex_exit_kernel(); @@ -4322,13 +4487,13 @@ lock_rec_queue_validate( if (index && (index->type & DICT_CLUSTERED)) { - impl_trx = lock_clust_rec_some_has_impl(rec, index); + impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, - LOCK_WAIT, rec, impl_trx)) { + LOCK_WAIT, rec, comp, impl_trx)) { ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)); + comp, impl_trx)); } } @@ -4338,13 +4503,14 @@ lock_rec_queue_validate( next function call: we have to release lock table mutex to obey the latching order */ - impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index); + impl_trx = lock_sec_rec_some_has_impl_off_kernel( + rec, index, offsets); if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, - LOCK_WAIT, rec, impl_trx)) { + LOCK_WAIT, rec, comp, impl_trx)) { - ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)); + ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + rec, comp, impl_trx)); } } @@ -4363,10 +4529,10 @@ lock_rec_queue_validate( if (lock_get_mode(lock) == LOCK_S) { ut_a(!lock_rec_other_has_expl_req(LOCK_X, - 0, 0, rec, lock->trx)); + 0, 0, rec, comp, lock->trx)); } else { ut_a(!lock_rec_other_has_expl_req(LOCK_S, - 0, 0, rec, lock->trx)); + 0, 0, rec, comp, lock->trx)); } } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { @@ -4374,7 +4540,7 @@ lock_rec_queue_validate( ut_a(lock_rec_has_to_wait_in_queue(lock)); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } lock_mutex_exit_kernel(); @@ -4400,6 +4566,8 @@ lock_rec_validate_page( ulint nth_bit = 0; ulint i; mtr_t mtr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(!mutex_own(&kernel_mutex)); @@ -4439,13 +4607,15 @@ loop: index = lock->index; rec = page_find_rec_with_heap_no(page, i); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); fprintf(stderr, "Validating %lu %lu\n", (ulong) space, (ulong) page_no); lock_mutex_exit_kernel(); - lock_rec_queue_validate(rec, index); + lock_rec_queue_validate(rec, index, offsets); lock_mutex_enter_kernel(); @@ -4465,6 +4635,7 @@ function_exit: mtr_commit(&mtr); + mem_heap_free(heap); return(TRUE); } @@ -4637,8 +4808,16 @@ lock_rec_insert_check_and_lock( page_update_max_trx_id(buf_frame_align(rec), thr_get_trx(thr)->id); } - - ut_ad(lock_rec_queue_validate(next_rec, index)); + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = mem_heap_create(100); + const ulint* offsets = rec_get_offsets(next_rec, index, + ULINT_UNDEFINED, heap); + ut_ad(lock_rec_queue_validate(next_rec, index, offsets)); + mem_heap_free(heap); + } +#endif /* UNIV_DEBUG */ return(err); } @@ -4652,7 +4831,8 @@ void lock_rec_convert_impl_to_expl( /*==========================*/ rec_t* rec, /* in: user record on page */ - dict_index_t* index) /* in: index of record */ + dict_index_t* index, /* in: index of record */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { trx_t* impl_trx; @@ -4660,11 +4840,14 @@ lock_rec_convert_impl_to_expl( ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(page_is_comp(buf_frame_align(rec)) == index->table->comp); if (index->type & DICT_CLUSTERED) { - impl_trx = lock_clust_rec_some_has_impl(rec, index); + impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); } else { - impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index); + impl_trx = lock_sec_rec_some_has_impl_off_kernel( + rec, index, offsets); } if (impl_trx) { @@ -4672,7 +4855,7 @@ lock_rec_convert_impl_to_expl( record, set one for it */ if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)) { + index->table->comp, impl_trx)) { lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP, rec, index, @@ -4698,17 +4881,19 @@ lock_clust_rec_modify_check_and_lock( does nothing */ rec_t* rec, /* in: record which should be modified */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr) /* in: query thread */ { ulint err; - + + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(index->type & DICT_CLUSTERED); + if (flags & BTR_NO_LOCKING_FLAG) { return(DB_SUCCESS); } - ut_ad(index->type & DICT_CLUSTERED); - lock_mutex_enter_kernel(); ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); @@ -4716,13 +4901,13 @@ lock_clust_rec_modify_check_and_lock( /* If a transaction has no explicit x-lock set on the record, set one for it */ - lock_rec_convert_impl_to_expl(rec, index); + lock_rec_convert_impl_to_expl(rec, index, offsets); err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr); lock_mutex_exit_kernel(); - ut_ad(lock_rec_queue_validate(rec, index)); + ut_ad(lock_rec_queue_validate(rec, index, offsets)); return(err); } @@ -4766,8 +4951,16 @@ lock_sec_rec_modify_check_and_lock( err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr); lock_mutex_exit_kernel(); - - ut_ad(lock_rec_queue_validate(rec, index)); + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = mem_heap_create(100); + const ulint* offsets = rec_get_offsets(rec, index, + ULINT_UNDEFINED, heap); + ut_ad(lock_rec_queue_validate(rec, index, offsets)); + mem_heap_free(heap); + } +#endif /* UNIV_DEBUG */ if (err == DB_SUCCESS) { /* Update the page max trx id field */ @@ -4794,6 +4987,7 @@ lock_sec_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: secondary index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -4805,6 +4999,7 @@ lock_sec_rec_read_check_and_lock( ut_ad(!(index->type & DICT_CLUSTERED)); ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); if (flags & BTR_NO_LOCKING_FLAG) { @@ -4827,14 +5022,14 @@ lock_sec_rec_read_check_and_lock( || recv_recovery_is_on()) && !page_rec_is_supremum(rec)) { - lock_rec_convert_impl_to_expl(rec, index); + lock_rec_convert_impl_to_expl(rec, index, offsets); } err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr); lock_mutex_exit_kernel(); - ut_ad(lock_rec_queue_validate(rec, index)); + ut_ad(lock_rec_queue_validate(rec, index, offsets)); return(err); } @@ -4858,6 +5053,7 @@ lock_clust_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -4871,6 +5067,9 @@ lock_clust_rec_read_check_and_lock( ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP || gap_mode == LOCK_REC_NOT_GAP); + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); + if (flags & BTR_NO_LOCKING_FLAG) { return(DB_SUCCESS); @@ -4885,14 +5084,14 @@ lock_clust_rec_read_check_and_lock( if (!page_rec_is_supremum(rec)) { - lock_rec_convert_impl_to_expl(rec, index); + lock_rec_convert_impl_to_expl(rec, index, offsets); } err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr); lock_mutex_exit_kernel(); - ut_ad(lock_rec_queue_validate(rec, index)); - + ut_ad(lock_rec_queue_validate(rec, index, offsets)); + return(err); } diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index 10f921bb1f0..f42f0eb8c72 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -756,81 +756,124 @@ recv_parse_or_apply_log_rec_body( mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if page is non-NULL */ { - byte* new_ptr; + dict_index_t* index = NULL; - if (type <= MLOG_8BYTES) { - new_ptr = mlog_parse_nbytes(type, ptr, end_ptr, page); - - } else if (type == MLOG_REC_INSERT) { - new_ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, page, - mtr); - } else if (type == MLOG_REC_CLUST_DELETE_MARK) { - new_ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, end_ptr, - page); - } else if (type == MLOG_REC_SEC_DELETE_MARK) { - new_ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, - page); - } else if (type == MLOG_REC_UPDATE_IN_PLACE) { - new_ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page); - - } else if ((type == MLOG_LIST_END_DELETE) - || (type == MLOG_LIST_START_DELETE)) { - new_ptr = page_parse_delete_rec_list(type, ptr, end_ptr, page, - mtr); - } else if (type == MLOG_LIST_END_COPY_CREATED) { - new_ptr = page_parse_copy_rec_list_to_created_page(ptr, - end_ptr, page, mtr); - } else if (type == MLOG_PAGE_REORGANIZE) { - new_ptr = btr_parse_page_reorganize(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_PAGE_CREATE) { - new_ptr = page_parse_create(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_UNDO_INSERT) { - new_ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page); - - } else if (type == MLOG_UNDO_ERASE_END) { - new_ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, - mtr); - } else if (type == MLOG_UNDO_INIT) { - new_ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_UNDO_HDR_DISCARD) { - new_ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, - mtr); - } else if ((type == MLOG_UNDO_HDR_CREATE) - || (type == MLOG_UNDO_HDR_REUSE)) { - new_ptr = trx_undo_parse_page_header(type, ptr, end_ptr, page, - mtr); - } else if (type == MLOG_REC_MIN_MARK) { - new_ptr = btr_parse_set_min_rec_mark(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_REC_DELETE) { - new_ptr = page_cur_parse_delete_rec(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_IBUF_BITMAP_INIT) { - new_ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_INIT_FILE_PAGE) { - new_ptr = fsp_parse_init_file_page(ptr, end_ptr, page); - - } else if (type == MLOG_WRITE_STRING) { - new_ptr = mlog_parse_string(ptr, end_ptr, page); - - } else if (type == MLOG_FILE_CREATE - || type == MLOG_FILE_RENAME - || type == MLOG_FILE_DELETE) { - new_ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE, + switch (type) { + case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES: + ptr = mlog_parse_nbytes(type, ptr, end_ptr, page); + break; + case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_INSERT, &index))) { + ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, + index, page, mtr); + } + break; + case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_CLUST_DELETE_MARK, &index))) { + ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, + end_ptr, index, page); + } + break; + case MLOG_REC_SEC_DELETE_MARK: case MLOG_COMP_REC_SEC_DELETE_MARK: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_SEC_DELETE_MARK, &index))) { + ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, + index, page); + } + break; + case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_UPDATE_IN_PLACE, &index))) { + ptr = btr_cur_parse_update_in_place(ptr, end_ptr, + page, index); + } + break; + case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE: + case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_LIST_END_DELETE + || type == MLOG_COMP_LIST_START_DELETE, &index))) { + ptr = page_parse_delete_rec_list(type, ptr, end_ptr, + index, page, mtr); + } + break; + case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_LIST_END_COPY_CREATED, &index))) { + ptr = page_parse_copy_rec_list_to_created_page(ptr, + end_ptr, index, page, mtr); + } + break; + case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_PAGE_REORGANIZE, &index))) { + ptr = btr_parse_page_reorganize(ptr, end_ptr, index, + page, mtr); + } + break; + case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE: + ptr = page_parse_create(ptr, end_ptr, + type == MLOG_COMP_PAGE_CREATE, page, mtr); + break; + case MLOG_UNDO_INSERT: + ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page); + break; + case MLOG_UNDO_ERASE_END: + ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr); + break; + case MLOG_UNDO_INIT: + ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); + break; + case MLOG_UNDO_HDR_DISCARD: + ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr); + break; + case MLOG_UNDO_HDR_CREATE: + case MLOG_UNDO_HDR_REUSE: + ptr = trx_undo_parse_page_header(type, ptr, end_ptr, + page, mtr); + break; + case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK: + ptr = btr_parse_set_min_rec_mark(ptr, end_ptr, + type == MLOG_COMP_REC_MIN_MARK, page, mtr); + break; + case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_DELETE, &index))) { + ptr = page_cur_parse_delete_rec(ptr, end_ptr, + index, page, mtr); + } + break; + case MLOG_IBUF_BITMAP_INIT: + ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr); + break; + case MLOG_INIT_FILE_PAGE: + ptr = fsp_parse_init_file_page(ptr, end_ptr, page); + break; + case MLOG_WRITE_STRING: + ptr = mlog_parse_string(ptr, end_ptr, page); + break; + case MLOG_FILE_CREATE: + case MLOG_FILE_RENAME: + case MLOG_FILE_DELETE: + ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE, ULINT_UNDEFINED); - } else { - new_ptr = NULL; - + break; + default: + ptr = NULL; recv_sys->found_corrupt_log = TRUE; } - ut_ad(!page || new_ptr); + ut_ad(!page || ptr); + if (index) { + dict_table_t* table = index->table; + mem_heap_free(index->heap); + mutex_free(&(table->autoinc_mutex)); + mem_heap_free(table->heap); + } - return(new_ptr); + return(ptr); } /************************************************************************* @@ -2851,11 +2894,13 @@ void recv_recovery_from_checkpoint_finish(void) /*======================================*/ { + int i; + os_thread_id_t recovery_thread_id; + /* Rollback the uncommitted transactions which have no user session */ - if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { - trx_rollback_or_clean_all_without_sess(); - } + fprintf(stderr, + "InnoDB: Starting to apply log records to the database...\n"); /* Apply the hashed log records to the respective file pages */ @@ -2888,9 +2933,15 @@ recv_recovery_from_checkpoint_finish(void) /* Free the resources of the recovery system */ recv_recovery_on = FALSE; + #ifndef UNIV_LOG_DEBUG recv_sys_free(); #endif + + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { + os_thread_create(trx_rollback_or_clean_all_without_sess, + (void *)&i, &recovery_thread_id); + } } /********************************************************** diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c index 82baa8905ba..417093134c3 100644 --- a/innobase/mtr/mtr0log.c +++ b/innobase/mtr/mtr0log.c @@ -384,3 +384,160 @@ mlog_parse_string( return(ptr + len); } + +/************************************************************ +Opens a buffer for mlog, writes the initial log record and, +if needed, the field lengths of an index. */ + +byte* +mlog_open_and_write_index( +/*======================*/ + /* out: buffer, NULL if log mode + MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + byte* rec, /* in: index record or page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: log item type */ + ulint size) /* in: requested buffer size in bytes + (if 0, calls mlog_close() and returns NULL) */ +{ + byte* log_ptr; + const byte* log_start; + const byte* log_end; + + if (!index->table->comp) { + log_start = log_ptr = mlog_open(mtr, 11 + size); + if (!log_ptr) { + return(NULL); /* logging is disabled */ + } + log_ptr = mlog_write_initial_log_record_fast(rec, type, + log_ptr, mtr); + log_end = log_ptr + 11 + size; + } else { + ulint i; + ulint n = dict_index_get_n_fields(index); + /* total size needed */ + ulint total = 11 + size + (n + 2) * 2; + ulint alloc = total; + /* allocate at most DYN_ARRAY_DATA_SIZE at a time */ + if (alloc > DYN_ARRAY_DATA_SIZE) { + alloc = DYN_ARRAY_DATA_SIZE; + } + log_start = log_ptr = mlog_open(mtr, alloc); + if (!log_ptr) { + return(NULL); /* logging is disabled */ + } + log_end = log_ptr + alloc; + log_ptr = mlog_write_initial_log_record_fast(rec, type, + log_ptr, mtr); + mach_write_to_2(log_ptr, n); + log_ptr += 2; + mach_write_to_2(log_ptr, + dict_index_get_n_unique_in_tree(index)); + log_ptr += 2; + for (i = 0; i < n; i++) { + dict_field_t* field; + dtype_t* type; + ulint len; + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + len = field->fixed_len; + ut_ad(len < 0x7fff); + if (len == 0 && dtype_get_len(type) > 255) { + /* variable-length field + with maximum length > 255 */ + len = 0x7fff; + } + if (dtype_get_prtype(type) & DATA_NOT_NULL) { + len |= 0x8000; + } + if (log_ptr + 2 > log_end) { + mlog_close(mtr, log_ptr); + ut_a(total > (ulint) (log_ptr - log_start)); + total -= log_ptr - log_start; + alloc = total; + if (alloc > DYN_ARRAY_DATA_SIZE) { + alloc = DYN_ARRAY_DATA_SIZE; + } + log_start = log_ptr = mlog_open(mtr, alloc); + if (!log_ptr) { + return(NULL); /* logging is disabled */ + } + log_end = log_ptr + alloc; + } + mach_write_to_2(log_ptr, len); + log_ptr += 2; + } + } + if (size == 0) { + mlog_close(mtr, log_ptr); + log_ptr = NULL; + } else if (log_ptr + size > log_end) { + mlog_close(mtr, log_ptr); + log_ptr = mlog_open(mtr, size); + } + return(log_ptr); +} + +/************************************************************ +Parses a log record written by mlog_open_and_write_index. */ + +byte* +mlog_parse_index( +/*=============*/ + /* out: parsed record end, + NULL if not a complete record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + /* out: new value of log_ptr */ + ibool comp, /* in: TRUE=compact record format */ + dict_index_t** index) /* out, own: dummy index */ +{ + ulint i, n, n_uniq; + dict_table_t* table; + dict_index_t* ind; + + if (comp) { + if (end_ptr < ptr + 4) { + return(NULL); + } + n = mach_read_from_2(ptr); + ptr += 2; + n_uniq = mach_read_from_2(ptr); + ut_ad(n_uniq <= n); + if (end_ptr < ptr + (n + 1) * 2) { + return(NULL); + } + } else { + n = n_uniq = 1; + } + table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, comp); + ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", + DICT_HDR_SPACE, 0, n); + ind->table = table; + ind->n_uniq = n_uniq; + if (n_uniq != n) { + ind->type = DICT_CLUSTERED; + } + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + ind->cached = TRUE; + if (comp) { + for (i = 0; i < n; i++) { + ulint len = mach_read_from_2(ptr += 2); + /* The high-order bit of len is the NOT NULL flag; + the rest is 0 or 0x7fff for variable-length fields, + and 1..0x7ffe for fixed-length fields. */ + dict_mem_table_add_col(table, "DUMMY", + ((len + 1) & 0x7fff) <= 1 + ? DATA_BINARY + : DATA_FIXBINARY, + len & 0x8000 ? DATA_NOT_NULL : 0, + len & 0x7fff, 0); + dict_index_add_col(ind, + dict_table_get_nth_col(table, i), 0, 0); + } + ptr += 2; + } + *index = ind; + return(ptr); +} diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c index 459ab986610..8def8474d9a 100644 --- a/innobase/page/page0cur.c +++ b/innobase/page/page0cur.c @@ -30,6 +30,7 @@ ibool page_cur_try_search_shortcut( /*=========================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint* iup_matched_fields, /* in/out: already matched fields in upper @@ -55,9 +56,14 @@ page_cur_try_search_shortcut( #ifdef UNIV_SEARCH_DEBUG page_cur_t cursor2; #endif + mem_heap_t* heap; + ulint* offsets; ut_ad(dtuple_check_typed(tuple)); rec = page_header_get_ptr(page, PAGE_LAST_INSERT); + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, + dtuple_get_n_fields(tuple), heap); ut_ad(rec); ut_ad(page_rec_is_user_rec(rec)); @@ -69,26 +75,30 @@ page_cur_try_search_shortcut( up_match = low_match; up_bytes = low_bytes; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &low_match, + cmp = page_cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match, &low_bytes); if (cmp == -1) { + mem_heap_free(heap); return(FALSE); } next_rec = page_rec_get_next(rec); + offsets = rec_reget_offsets(next_rec, index, offsets, + dtuple_get_n_fields(tuple), heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &up_match, - &up_bytes); + cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, + &up_match, &up_bytes); if (cmp != -1) { + mem_heap_free(heap); return(FALSE); } cursor->rec = rec; #ifdef UNIV_SEARCH_DEBUG - page_cur_search_with_match(page, tuple, PAGE_CUR_DBG, + page_cur_search_with_match(page, index, tuple, PAGE_CUR_DBG, iup_matched_fields, iup_matched_bytes, ilow_matched_fields, @@ -117,6 +127,7 @@ page_cur_try_search_shortcut( #ifdef UNIV_SEARCH_PERF_STAT page_cur_short_succ++; #endif + mem_heap_free(heap); return(TRUE); } @@ -130,22 +141,24 @@ static ibool page_cur_rec_field_extends( /*=======================*/ - /* out: TRUE if rec field extends tuple - field */ - dtuple_t* tuple, /* in: data tuple */ - rec_t* rec, /* in: record */ - ulint n) /* in: compare nth field */ + /* out: TRUE if rec field + extends tuple field */ + dtuple_t* tuple, /* in: data tuple */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: compare nth field */ { dtype_t* type; dfield_t* dfield; byte* rec_f; ulint rec_f_len; + ut_ad(rec_offs_validate(rec, NULL, offsets)); dfield = dtuple_get_nth_field(tuple, n); type = dfield_get_type(dfield); - rec_f = rec_get_nth_field(rec, n, &rec_f_len); + rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len); if (type->mtype == DATA_VARCHAR || type->mtype == DATA_CHAR @@ -176,6 +189,7 @@ void page_cur_search_with_match( /*=======================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -212,6 +226,9 @@ page_cur_search_with_match( ulint dbg_matched_fields; ulint dbg_matched_bytes; #endif + mem_heap_t* heap; + ulint* offsets = NULL; + ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes && ilow_matched_fields && ilow_matched_bytes && cursor); ut_ad(dtuple_validate(tuple)); @@ -229,7 +246,7 @@ page_cur_search_with_match( && (page_header_get_ptr(page, PAGE_LAST_INSERT)) && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { - if (page_cur_try_search_shortcut(page, tuple, + if (page_cur_try_search_shortcut(page, index, tuple, iup_matched_fields, iup_matched_bytes, ilow_matched_fields, @@ -245,6 +262,8 @@ page_cur_search_with_match( /*#endif */ #endif + heap = mem_heap_create(100); + /* The following flag does not work for non-latin1 char sets because cmp_full_field does not tell how many bytes matched */ ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); @@ -279,7 +298,10 @@ page_cur_search_with_match( low_matched_fields, low_matched_bytes, up_matched_fields, up_matched_bytes); - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, + offsets = rec_reget_offsets(mid_rec, index, offsets, + dtuple_get_n_fields_cmp(tuple), heap); + + cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); if (cmp == 1) { @@ -288,10 +310,12 @@ page_cur_search_with_match( low_matched_bytes = cur_matched_bytes; } else if (cmp == -1) { + offsets = rec_reget_offsets(mid_rec, index, + offsets, dtuple_get_n_fields_cmp(tuple), heap); if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, - cur_matched_fields)) { + offsets, cur_matched_fields)) { low = mid; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; @@ -329,7 +353,10 @@ page_cur_search_with_match( low_matched_fields, low_matched_bytes, up_matched_fields, up_matched_bytes); - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, + offsets = rec_reget_offsets(mid_rec, index, + offsets, dtuple_get_n_fields_cmp(tuple), heap); + + cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); if (cmp == 1) { @@ -338,9 +365,12 @@ page_cur_search_with_match( low_matched_bytes = cur_matched_bytes; } else if (cmp == -1) { + offsets = rec_reget_offsets(mid_rec, index, + offsets, dtuple_get_n_fields_cmp(tuple), heap); + if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, - cur_matched_fields)) { + offsets, cur_matched_fields)) { low_rec = mid_rec; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; @@ -368,7 +398,9 @@ page_cur_search_with_match( dbg_matched_fields = 0; dbg_matched_bytes = 0; - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, + offsets = rec_reget_offsets(low_rec, index, + offsets, ULINT_UNDEFINED, heap); + dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets, &dbg_matched_fields, &dbg_matched_bytes); if (mode == PAGE_CUR_G) { @@ -390,7 +422,9 @@ page_cur_search_with_match( dbg_matched_fields = 0; dbg_matched_bytes = 0; - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, + offsets = rec_reget_offsets(up_rec, index, + offsets, ULINT_UNDEFINED, heap); + dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets, &dbg_matched_fields, &dbg_matched_bytes); if (mode == PAGE_CUR_G) { @@ -419,6 +453,7 @@ page_cur_search_with_match( *iup_matched_bytes = up_matched_bytes; *ilow_matched_fields = low_matched_fields; *ilow_matched_bytes = low_matched_bytes; + mem_heap_free(heap); } /*************************************************************** @@ -463,10 +498,12 @@ static void page_cur_insert_rec_write_log( /*==========================*/ - rec_t* insert_rec, /* in: inserted physical record */ - ulint rec_size, /* in: insert_rec size */ - rec_t* cursor_rec, /* in: record the cursor is pointing to */ - mtr_t* mtr) /* in: mini-transaction handle */ + rec_t* insert_rec, /* in: inserted physical record */ + ulint rec_size, /* in: insert_rec size */ + rec_t* cursor_rec, /* in: record the + cursor is pointing to */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mini-transaction handle */ { ulint cur_rec_size; ulint extra_size; @@ -476,23 +513,30 @@ page_cur_insert_rec_write_log( byte* cur_ptr; ulint extra_info_yes; byte* log_ptr; + byte* log_end; ulint i; ut_a(rec_size < UNIV_PAGE_SIZE); - ut_ad(rec_size == rec_get_size(insert_rec)); - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + { + mem_heap_t* heap; + ulint* cur_offs; + ulint* ins_offs; - if (log_ptr == NULL) { + heap = mem_heap_create(100); + cur_offs = rec_get_offsets(cursor_rec, index, + ULINT_UNDEFINED, heap); + ins_offs = rec_get_offsets(insert_rec, index, + ULINT_UNDEFINED, heap); - return; + extra_size = rec_offs_extra_size(ins_offs); + cur_extra_size = rec_offs_extra_size(cur_offs); + ut_ad(rec_size == rec_offs_size(ins_offs)); + cur_rec_size = rec_offs_size(cur_offs); + + mem_heap_free(heap); } - extra_size = rec_get_extra_size(insert_rec); - - cur_extra_size = rec_get_extra_size(cursor_rec); - cur_rec_size = rec_get_size(cursor_rec); - ins_ptr = insert_rec - extra_size; i = 0; @@ -514,7 +558,9 @@ page_cur_insert_rec_write_log( ins_ptr++; cur_ptr++; } else if ((i < extra_size) - && (i >= extra_size - REC_N_EXTRA_BYTES)) { + && (i >= extra_size - (index->table->comp + ? REC_N_NEW_EXTRA_BYTES + : REC_N_OLD_EXTRA_BYTES))) { i = extra_size; ins_ptr = insert_rec; cur_ptr = cursor_rec; @@ -525,16 +571,35 @@ page_cur_insert_rec_write_log( } if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { - - log_ptr = mlog_write_initial_log_record_fast(insert_rec, - MLOG_REC_INSERT, log_ptr, mtr); + + log_ptr = mlog_open_and_write_index(mtr, insert_rec, index, + index->table->comp + ? MLOG_COMP_REC_INSERT : MLOG_REC_INSERT, + 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash + recovery: in that case mlog_open returns NULL */ + return; + } + + log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; /* Write the cursor rec offset as a 2-byte ulint */ mach_write_to_2(log_ptr, cursor_rec - buf_frame_align(cursor_rec)); log_ptr += 2; + } else { + log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); + if (!log_ptr) { + /* Logging in mtr is switched off during crash + recovery: in that case mlog_open returns NULL */ + return; + } + log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; } - if ((rec_get_info_bits(insert_rec) != rec_get_info_bits(cursor_rec)) + if ((rec_get_info_bits(insert_rec, index->table->comp) != + rec_get_info_bits(cursor_rec, index->table->comp)) || (extra_size != cur_extra_size) || (rec_size != cur_rec_size)) { @@ -549,7 +614,8 @@ page_cur_insert_rec_write_log( + extra_info_yes); if (extra_info_yes) { /* Write the info bits */ - mach_write_to_1(log_ptr, rec_get_info_bits(insert_rec)); + mach_write_to_1(log_ptr, + rec_get_info_bits(insert_rec, index->table->comp)); log_ptr++; /* Write the record origin offset */ @@ -565,17 +631,15 @@ page_cur_insert_rec_write_log( /* Write to the log the inserted index record end segment which differs from the cursor record */ - if (rec_size - i < MLOG_BUF_MARGIN) { - ut_memcpy(log_ptr, ins_ptr, rec_size - i); - log_ptr += rec_size - i; - } + rec_size -= i; - mlog_close(mtr, log_ptr); - - ut_a(rec_size - i < UNIV_PAGE_SIZE); - - if (rec_size - i >= MLOG_BUF_MARGIN) { - mlog_catenate_string(mtr, ins_ptr, rec_size - i); + if (log_ptr + rec_size <= log_end) { + memcpy(log_ptr, ins_ptr, rec_size); + mlog_close(mtr, log_ptr + rec_size); + } else { + mlog_close(mtr, log_ptr); + ut_a(rec_size < UNIV_PAGE_SIZE); + mlog_catenate_string(mtr, ins_ptr, rec_size); } } @@ -585,12 +649,13 @@ Parses a log record of a record insert on a page. */ byte* page_cur_parse_insert_rec( /*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ulint extra_info_yes; ulint offset = 0; /* remove warning */ @@ -603,6 +668,8 @@ page_cur_parse_insert_rec( byte* ptr2 = ptr; ulint info_bits = 0; /* remove warning */ page_cur_t cursor; + mem_heap_t* heap; + ulint* offsets; if (!is_short) { /* Read the cursor rec offset as a 2-byte ulint */ @@ -689,11 +756,14 @@ page_cur_parse_insert_rec( cursor_rec = page + offset; } + heap = mem_heap_create(100); + offsets = rec_get_offsets(cursor_rec, index, ULINT_UNDEFINED, heap); + if (extra_info_yes == 0) { - info_bits = rec_get_info_bits(cursor_rec); - origin_offset = rec_get_extra_size(cursor_rec); - mismatch_index = rec_get_size(cursor_rec) - end_seg_len; - } + info_bits = rec_get_info_bits(cursor_rec, index->table->comp); + origin_offset = rec_offs_extra_size(offsets); + mismatch_index = rec_offs_size(offsets) - end_seg_len; + } if (mismatch_index + end_seg_len < sizeof buf1) { buf = buf1; @@ -722,14 +792,24 @@ page_cur_parse_insert_rec( ut_error; } - ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index); + ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index); ut_memcpy(buf + mismatch_index, ptr, end_seg_len); - rec_set_info_bits(buf + origin_offset, info_bits); + rec_set_info_bits(buf + origin_offset, index->table->comp, info_bits); + + /* Set the status bits for new-style records. */ + if (index->table->comp) { + /* Leaf pages (level 0) contain ordinary records; + non-leaf pages contain node pointer records. */ + ulint level = page_header_get_field( + buf_frame_align(cursor_rec), PAGE_LEVEL); + rec_set_status(buf + origin_offset, + level ? REC_STATUS_NODE_PTR : REC_STATUS_ORDINARY); + } page_cur_position(cursor_rec, &cursor); - page_cur_rec_insert(&cursor, buf + origin_offset, mtr); + page_cur_rec_insert(&cursor, buf + origin_offset, index, mtr); if (buf != buf1) { @@ -751,68 +831,80 @@ page_cur_insert_rec_low( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - ulint data_size,/* in: data size of tuple */ - rec_t* rec, /* in: pointer to a physical record or NULL */ + dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ + dict_index_t* index, /* in: record descriptor */ + rec_t* rec, /* in: pointer to a physical record or NULL */ mtr_t* mtr) /* in: mini-transaction handle */ { - byte* insert_buf = NULL; - ulint rec_size; - byte* page; /* the relevant page */ - rec_t* last_insert; /* cursor position at previous insert */ - rec_t* insert_rec; /* inserted record */ - ulint heap_no; /* heap number of the inserted record */ - rec_t* current_rec; /* current record after which the - new record is inserted */ - rec_t* next_rec; /* next record after current before - the insertion */ - ulint owner_slot; /* the slot which owns the inserted record */ - rec_t* owner_rec; - ulint n_owned; - + byte* insert_buf = NULL; + ulint rec_size; + byte* page; /* the relevant page */ + rec_t* last_insert; /* cursor position at previous insert */ + rec_t* insert_rec; /* inserted record */ + ulint heap_no; /* heap number of the inserted record */ + rec_t* current_rec; /* current record after which the + new record is inserted */ + rec_t* next_rec; /* next record after current before + the insertion */ + ulint owner_slot; /* the slot which owns the + inserted record */ + rec_t* owner_rec; + ulint n_owned; + mem_heap_t* heap; + ulint* offsets; + ibool comp = index->table->comp; + ut_ad(cursor && mtr); ut_ad(tuple || rec); ut_ad(!(tuple && rec)); ut_ad(rec || dtuple_check_typed(tuple)); - ut_ad(rec || (dtuple_get_data_size(tuple) == data_size)); page = page_cur_get_page(cursor); + ut_ad(page_is_comp(page) == comp); + ut_ad(cursor->rec != page_get_supremum_rec(page)); + heap = mem_heap_create(100); + /* 1. Get the size of the physical record in the page */ if (tuple != NULL) { - rec_size = data_size + rec_get_converted_extra_size( - data_size, - dtuple_get_n_fields(tuple)); + offsets = NULL; + rec_size = rec_get_converted_size(index, tuple); } else { - rec_size = rec_get_size(rec); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + rec_size = rec_offs_size(offsets); } /* 2. Try to find suitable space from page memory management */ - insert_buf = page_mem_alloc(page, rec_size, &heap_no); + insert_buf = page_mem_alloc(page, rec_size, index, &heap_no); if (insert_buf == NULL) { - + mem_heap_free(heap); return(NULL); } /* 3. Create the record */ if (tuple != NULL) { - insert_rec = rec_convert_dtuple_to_rec_low(insert_buf, tuple, - data_size); + insert_rec = rec_convert_dtuple_to_rec(insert_buf, + index, tuple); } else { - insert_rec = rec_copy(insert_buf, rec); + insert_rec = rec_copy(insert_buf, rec, offsets); } ut_ad(insert_rec); - ut_ad(rec_size == rec_get_size(insert_rec)); + offsets = rec_reget_offsets(insert_rec, index, + offsets, ULINT_UNDEFINED, heap); + ut_ad(rec_size == rec_offs_size(offsets)); /* 4. Insert the record in the linked list of records */ - current_rec = cursor->rec; + ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM); + ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM); + next_rec = page_rec_get_next(current_rec); + ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM); page_rec_set_next(insert_rec, next_rec); page_rec_set_next(current_rec, insert_rec); @@ -821,12 +913,15 @@ page_cur_insert_rec_low( /* 5. Set the n_owned field in the inserted record to zero, and set the heap_no field */ - rec_set_n_owned(insert_rec, 0); - rec_set_heap_no(insert_rec, heap_no); + rec_set_n_owned(insert_rec, comp, 0); + rec_set_heap_no(insert_rec, comp, heap_no); /* 6. Update the last insertion info in page header */ last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); + ut_ad(!last_insert || !comp + || rec_get_node_ptr_flag(last_insert) + == rec_get_node_ptr_flag(insert_rec)); if (last_insert == NULL) { page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); @@ -855,8 +950,8 @@ page_cur_insert_rec_low( /* 7. It remains to update the owner record. */ owner_rec = page_rec_find_owner_rec(insert_rec); - n_owned = rec_get_n_owned(owner_rec); - rec_set_n_owned(owner_rec, n_owned + 1); + n_owned = rec_get_n_owned(owner_rec, comp); + rec_set_n_owned(owner_rec, comp, n_owned + 1); /* 8. Now we have incremented the n_owned field of the owner record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, @@ -868,8 +963,10 @@ page_cur_insert_rec_low( } /* 9. Write log record of the insert */ - page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, mtr); + page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, + index, mtr); + mem_heap_free(heap); return(insert_rec); } @@ -879,17 +976,19 @@ UNIV_INLINE byte* page_copy_rec_list_to_created_page_write_log( /*=========================================*/ - /* out: 4-byte field where to write the log data - length */ - page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mtr */ + /* out: 4-byte field where to + write the log data length */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { byte* log_ptr; - - mlog_write_initial_log_record(page, MLOG_LIST_END_COPY_CREATED, mtr); - - log_ptr = mlog_open(mtr, 4); + log_ptr = mlog_open_and_write_index(mtr, page, index, + index->table->comp + ? MLOG_COMP_LIST_END_COPY_CREATED + : MLOG_LIST_END_COPY_CREATED, 4); + ut_a(log_ptr); mlog_close(mtr, log_ptr + 4); return(log_ptr); @@ -901,11 +1000,12 @@ Parses a log record of copying a record list end to a new created page. */ byte* page_parse_copy_rec_list_to_created_page( /*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { byte* rec_end; ulint log_data_len; @@ -931,7 +1031,8 @@ page_parse_copy_rec_list_to_created_page( } while (ptr < rec_end) { - ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, page, mtr); + ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, + index, page, mtr); } ut_a(ptr == rec_end); @@ -950,10 +1051,11 @@ including that record. Infimum and supremum records are not copied. */ void page_copy_rec_list_end_to_created_page( /*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: first record to copy */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_dir_slot_t* slot = 0; /* remove warning */ byte* heap_top; @@ -966,9 +1068,13 @@ page_copy_rec_list_end_to_created_page( ulint log_mode; byte* log_ptr; ulint log_data_len; + ibool comp = page_is_comp(page); + mem_heap_t* heap; + ulint* offsets = NULL; - ut_ad(page_header_get_field(new_page, PAGE_N_HEAP) == 2); + ut_ad(page_dir_get_n_heap(new_page) == 2); ut_ad(page != new_page); + ut_ad(comp == page_is_comp(new_page)); if (rec == page_get_infimum_rec(page)) { @@ -983,12 +1089,13 @@ page_copy_rec_list_end_to_created_page( #ifdef UNIV_DEBUG /* To pass the debug tests we have to set these dummy values in the debug version */ - page_header_set_field(new_page, PAGE_N_DIR_SLOTS, UNIV_PAGE_SIZE / 2); + page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2); page_header_set_ptr(new_page, PAGE_HEAP_TOP, new_page + UNIV_PAGE_SIZE - 1); #endif - log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, mtr); + log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, + index, mtr); log_data_len = dyn_array_get_data_size(&(mtr->log)); @@ -997,22 +1104,29 @@ page_copy_rec_list_end_to_created_page( log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); prev_rec = page_get_infimum_rec(new_page); - heap_top = new_page + PAGE_SUPREMUM_END; + if (comp) { + heap_top = new_page + PAGE_NEW_SUPREMUM_END; + } else { + heap_top = new_page + PAGE_OLD_SUPREMUM_END; + } count = 0; slot_index = 0; n_recs = 0; + heap = mem_heap_create(100); + /* should be do ... until, comment by Jani */ while (rec != page_get_supremum_rec(page)) { - - insert_rec = rec_copy(heap_top, rec); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + insert_rec = rec_copy(heap_top, rec, offsets); - rec_set_next_offs(prev_rec, insert_rec - new_page); + rec_set_next_offs(prev_rec, comp, insert_rec - new_page); - rec_set_n_owned(insert_rec, 0); - rec_set_heap_no(insert_rec, 2 + n_recs); + rec_set_n_owned(insert_rec, comp, 0); + rec_set_heap_no(insert_rec, comp, 2 + n_recs); - rec_size = rec_get_size(insert_rec); + rec_size = rec_offs_size(offsets); heap_top = heap_top + rec_size; @@ -1034,7 +1148,7 @@ page_copy_rec_list_end_to_created_page( } page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, - mtr); + index, mtr); prev_rec = insert_rec; rec = page_rec_get_next(rec); } @@ -1056,22 +1170,25 @@ page_copy_rec_list_end_to_created_page( slot_index--; } + mem_heap_free(heap); + log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len; ut_a(log_data_len < 100 * UNIV_PAGE_SIZE); mach_write_to_4(log_ptr, log_data_len); - rec_set_next_offs(insert_rec, PAGE_SUPREMUM); + rec_set_next_offs(insert_rec, comp, + comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM); slot = page_dir_get_nth_slot(new_page, 1 + slot_index); page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page)); page_dir_slot_set_n_owned(slot, count + 1); - page_header_set_field(new_page, PAGE_N_DIR_SLOTS, 2 + slot_index); + page_dir_set_n_slots(new_page, 2 + slot_index); page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top); - page_header_set_field(new_page, PAGE_N_HEAP, 2 + n_recs); + page_dir_set_n_heap(new_page, 2 + n_recs); page_header_set_field(new_page, PAGE_N_RECS, n_recs); page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL); @@ -1089,14 +1206,27 @@ UNIV_INLINE void page_cur_delete_rec_write_log( /*==========================*/ - rec_t* cursor_rec, /* in: record to be deleted */ - mtr_t* mtr) /* in: mini-transaction handle */ + rec_t* rec, /* in: record to be deleted */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mini-transaction handle */ { - mlog_write_initial_log_record(cursor_rec, MLOG_REC_DELETE, mtr); + byte* log_ptr; + + log_ptr = mlog_open_and_write_index(mtr, rec, index, + index->table->comp + ? MLOG_COMP_REC_DELETE + : MLOG_REC_DELETE, 2); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } /* Write the cursor rec offset as a 2-byte ulint */ - mlog_catenate_ulint(mtr, cursor_rec - buf_frame_align(cursor_rec), - MLOG_2BYTES); + mach_write_to_2(log_ptr, rec - buf_frame_align(rec)); + + mlog_close(mtr, log_ptr + 2); } /*************************************************************** @@ -1105,11 +1235,12 @@ Parses log record of a record delete on a page. */ byte* page_cur_parse_delete_rec( /*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ulint offset; page_cur_t cursor; @@ -1128,7 +1259,7 @@ page_cur_parse_delete_rec( if (page) { page_cur_position(page + offset, &cursor); - page_cur_delete_rec(&cursor, mtr); + page_cur_delete_rec(&cursor, index, mtr); } return(ptr); @@ -1142,6 +1273,7 @@ void page_cur_delete_rec( /*================*/ page_cur_t* cursor, /* in: a page cursor */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { page_dir_slot_t* cur_dir_slot; @@ -1169,7 +1301,7 @@ page_cur_delete_rec( cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); /* 0. Write the log record */ - page_cur_delete_rec_write_log(current_rec, mtr); + page_cur_delete_rec_write_log(current_rec, index, mtr); /* 1. Reset the last insert info in the page header and increment the modify clock for the frame */ @@ -1223,7 +1355,7 @@ page_cur_delete_rec( page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1); /* 6. Free the memory occupied by the record */ - page_mem_free(page, current_rec); + page_mem_free(page, current_rec, index); /* 7. Now we have decremented the number of owned records of the slot. If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c index 343f300fc77..38b1e503c8f 100644 --- a/innobase/page/page0page.c +++ b/innobase/page/page0page.c @@ -18,6 +18,8 @@ Created 2/2/1994 Heikki Tuuri #include "fut0lst.h" #include "btr0sea.h" #include "buf0buf.h" +#include "srv0srv.h" +#include "btr0btr.h" /* THE INDEX PAGE ============== @@ -75,10 +77,14 @@ page_dir_find_owner_slot( page_t* page; page_dir_slot_t* slot; rec_t* original_rec = rec; + ibool comp; ut_ad(page_rec_check(rec)); - while (rec_get_n_owned(rec) == 0) { + page = buf_frame_align(rec); + comp = page_is_comp(page); + + while (rec_get_n_owned(rec, comp) == 0) { steps++; rec = page_rec_get_next(rec); } @@ -96,14 +102,18 @@ page_dir_find_owner_slot( "InnoDB: Original record ", (ulong) buf_frame_get_page_no(page)); - rec_print(stderr, original_rec); + if (comp) { + fputs("(compact record)\n", stderr); + } else { + rec_print_old(stderr, original_rec); + } fprintf(stderr, "\n" "InnoDB: on that page. Steps %lu.\n", (ulong) steps); fputs( "InnoDB: Cannot find the dir slot for record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, NULL); fputs("\n" "InnoDB: on that page!\n", stderr); @@ -136,14 +146,15 @@ page_dir_slot_check( page = buf_frame_align(slot); - n_slots = page_header_get_field(page, PAGE_N_DIR_SLOTS); + n_slots = page_dir_get_n_slots(page); ut_a(slot <= page_dir_get_nth_slot(page, 0)); ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1)); - ut_a(page_rec_check(page + mach_read_from_2(slot))); + ut_a(page_rec_check(page_dir_slot_get_rec(slot))); - n_owned = rec_get_n_owned(page + mach_read_from_2(slot)); + n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(page)); if (slot == page_dir_get_nth_slot(page, 0)) { ut_a(n_owned == 1); @@ -194,12 +205,14 @@ Allocates a block of memory from an index page. */ byte* page_mem_alloc( /*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - ulint* heap_no)/* out: this contains the heap number - of the allocated record if allocation succeeds */ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in: index page */ + ulint need, /* in: number of bytes needed */ + dict_index_t* index, /* in: record descriptor */ + ulint* heap_no)/* out: this contains the heap number + of the allocated record + if allocation succeeds */ { rec_t* rec; byte* block; @@ -213,18 +226,30 @@ page_mem_alloc( rec = page_header_get_ptr(page, PAGE_FREE); - if (rec && (rec_get_size(rec) >= need)) { + if (rec) { + mem_heap_t* heap + = mem_heap_create(100); + const ulint* offsets + = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); - page_header_set_ptr(page, PAGE_FREE, page_rec_get_next(rec)); + if (rec_offs_size(offsets) >= need) { + page_header_set_ptr(page, PAGE_FREE, + page_rec_get_next(rec)); - garbage = page_header_get_field(page, PAGE_GARBAGE); - ut_ad(garbage >= need); + garbage = page_header_get_field(page, PAGE_GARBAGE); + ut_ad(garbage >= need); - page_header_set_field(page, PAGE_GARBAGE, garbage - need); + page_header_set_field(page, PAGE_GARBAGE, + garbage - need); - *heap_no = rec_get_heap_no(rec); + *heap_no = rec_get_heap_no(rec, page_is_comp(page)); - return(rec_get_start(rec)); + block = rec_get_start(rec, offsets); + mem_heap_free(heap); + return(block); + } + + mem_heap_free(heap); } /* Could not find space from the free list, try top of heap */ @@ -235,9 +260,9 @@ page_mem_alloc( block = page_header_get_ptr(page, PAGE_HEAP_TOP); page_header_set_ptr(page, PAGE_HEAP_TOP, block + need); - *heap_no = page_header_get_field(page, PAGE_N_HEAP); + *heap_no = page_dir_get_n_heap(page); - page_header_set_field(page, PAGE_N_HEAP, 1 + *heap_no); + page_dir_set_n_heap(page, 1 + *heap_no); return(block); } @@ -253,9 +278,11 @@ page_create_write_log( /*==================*/ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp) /* in: TRUE=compact page format */ { - mlog_write_initial_log_record(frame, MLOG_PAGE_CREATE, mtr); + mlog_write_initial_log_record(frame, + comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE, mtr); } /*************************************************************** @@ -267,6 +294,7 @@ page_parse_create( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr __attribute__((unused)), /* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr) /* in: mtr or NULL */ { @@ -275,7 +303,7 @@ page_parse_create( /* The record is empty, except for the record initial part */ if (page) { - page_create(page, mtr); + page_create(page, mtr, comp); } return(ptr); @@ -290,7 +318,8 @@ page_create( /* out: pointer to the page */ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp) /* in: TRUE=compact page format */ { page_dir_slot_t* slot; mem_heap_t* heap; @@ -300,6 +329,10 @@ page_create( rec_t* infimum_rec; rec_t* supremum_rec; page_t* page; + dict_index_t* index; + ulint* offsets; + + index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1; ut_ad(frame && mtr); ut_ad(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE @@ -311,7 +344,7 @@ page_create( buf_frame_modify_clock_inc(frame); /* 2. WRITE LOG INFORMATION */ - page_create_write_log(frame, mtr); + page_create_write_log(frame, mtr, comp); page = frame; @@ -323,43 +356,52 @@ page_create( /* Create first a data tuple for infimum record */ tuple = dtuple_create(heap, 1); + dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM); field = dtuple_get_nth_field(tuple, 0); - dfield_set_data(field, "infimum", sizeof "infimum"); - dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0); - + dfield_set_data(field, "infimum", 8); + dtype_set(dfield_get_type(field), + DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8, 0); /* Set the corresponding physical record to its place in the page record heap */ heap_top = page + PAGE_DATA; - infimum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); + infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); + + ut_a(infimum_rec == + page + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); + + rec_set_n_owned(infimum_rec, comp, 1); + rec_set_heap_no(infimum_rec, comp, 0); + offsets = rec_get_offsets(infimum_rec, index, ULINT_UNDEFINED, heap); + + heap_top = rec_get_end(infimum_rec, offsets); - ut_a(infimum_rec == page + PAGE_INFIMUM); - - rec_set_n_owned(infimum_rec, 1); - rec_set_heap_no(infimum_rec, 0); - - heap_top = rec_get_end(infimum_rec); - /* Create then a tuple for supremum */ tuple = dtuple_create(heap, 1); + dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM); field = dtuple_get_nth_field(tuple, 0); - dfield_set_data(field, "supremum", sizeof "supremum"); - dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0); + dfield_set_data(field, "supremum", 9 - comp); + dtype_set(dfield_get_type(field), + DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 9 - comp, 0); - supremum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); + supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); - ut_a(supremum_rec == page + PAGE_SUPREMUM); + ut_a(supremum_rec == + page + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM)); - rec_set_n_owned(supremum_rec, 1); - rec_set_heap_no(supremum_rec, 1); - - heap_top = rec_get_end(supremum_rec); + rec_set_n_owned(supremum_rec, comp, 1); + rec_set_heap_no(supremum_rec, comp, 1); - ut_ad(heap_top == page + PAGE_SUPREMUM_END); + offsets = rec_reget_offsets(supremum_rec, index, + offsets, ULINT_UNDEFINED, heap); + heap_top = rec_get_end(supremum_rec, offsets); + + ut_ad(heap_top == + page + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)); mem_heap_free(heap); @@ -367,7 +409,7 @@ page_create( page_header_set_field(page, PAGE_N_DIR_SLOTS, 2); page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top); - page_header_set_field(page, PAGE_N_HEAP, 2); + page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2); page_header_set_ptr(page, PAGE_FREE, NULL); page_header_set_field(page, PAGE_GARBAGE, 0); page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); @@ -388,8 +430,8 @@ page_create( /* Set the next pointers in infimum and supremum */ - rec_set_next_offs(infimum_rec, (ulint)(supremum_rec - page)); - rec_set_next_offs(supremum_rec, 0); + rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page)); + rec_set_next_offs(supremum_rec, comp, 0); return(page); } @@ -401,10 +443,11 @@ touch the lock table and max trx id on page. */ void page_copy_rec_list_end_no_locks( /*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_cur_t cur1; page_cur_t cur2; @@ -416,8 +459,11 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - - ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == PAGE_INFIMUM); + + ut_a(index->table->comp == page_is_comp(page)); + ut_a(index->table->comp == page_is_comp(new_page)); + ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) + (index->table->comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); page_cur_set_before_first(new_page, &cur2); @@ -427,7 +473,7 @@ page_copy_rec_list_end_no_locks( while (sup != page_cur_get_rec(&cur1)) { if (!page_cur_rec_insert(&cur2, - page_cur_get_rec(&cur1), mtr)) { + page_cur_get_rec(&cur1), index, mtr)) { /* Track an assertion failure reported on the mailing list on June 18th, 2003 */ @@ -456,16 +502,18 @@ The records are copied to the start of the record list on new_page. */ void page_copy_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - if (page_header_get_field(new_page, PAGE_N_HEAP) == 2) { + if (page_dir_get_n_heap(new_page) == 2) { page_copy_rec_list_end_to_created_page(new_page, page, rec, - mtr); + index, mtr); } else { - page_copy_rec_list_end_no_locks(new_page, page, rec, mtr); + page_copy_rec_list_end_no_locks(new_page, page, rec, + index, mtr); } /* Update the lock table, MAX_TRX_ID, and possible hash index */ @@ -474,7 +522,7 @@ page_copy_rec_list_end( page_update_max_trx_id(new_page, page_get_max_trx_id(page)); - btr_search_move_or_delete_hash_entries(new_page, page); + btr_search_move_or_delete_hash_entries(new_page, page, index); } /***************************************************************** @@ -485,10 +533,11 @@ The records are copied to the end of the record list on new_page. */ void page_copy_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_cur_t cur1; page_cur_t cur2; @@ -510,8 +559,8 @@ page_copy_rec_list_start( /* Copy records from the original page to the new page */ while (page_cur_get_rec(&cur1) != rec) { - ut_a( - page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr)); + ut_a(page_cur_rec_insert(&cur2, + page_cur_get_rec(&cur1), index, mtr)); page_cur_move_to_next(&cur1); page_cur_move_to_next(&cur2); @@ -523,7 +572,7 @@ page_copy_rec_list_start( page_update_max_trx_id(new_page, page_get_max_trx_id(page)); - btr_search_move_or_delete_hash_entries(new_page, page); + btr_search_move_or_delete_hash_entries(new_page, page, index); } /************************************************************** @@ -532,18 +581,25 @@ UNIV_INLINE void page_delete_rec_list_write_log( /*===========================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - byte type, /* in: operation type: MLOG_LIST_END_DELETE, ... */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: operation type: + MLOG_LIST_END_DELETE, ... */ + mtr_t* mtr) /* in: mtr */ { - ut_ad((type == MLOG_LIST_END_DELETE) - || (type == MLOG_LIST_START_DELETE)); + byte* log_ptr; + ut_ad(type == MLOG_LIST_END_DELETE + || type == MLOG_LIST_START_DELETE + || type == MLOG_COMP_LIST_END_DELETE + || type == MLOG_COMP_LIST_START_DELETE); - mlog_write_initial_log_record(page, type, mtr); - - /* Write the parameter as a 2-byte ulint */ - mlog_catenate_ulint(mtr, rec - page, MLOG_2BYTES); + log_ptr = mlog_open_and_write_index(mtr, page, index, type, 2); + if (log_ptr) { + /* Write the parameter as a 2-byte ulint */ + mach_write_to_2(log_ptr, rec - page); + mlog_close(mtr, log_ptr + 2); + } } /************************************************************** @@ -552,18 +608,23 @@ Parses a log record of a record list end or start deletion. */ byte* page_parse_delete_rec_list( /*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE or - MLOG_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE, + MLOG_LIST_START_DELETE, + MLOG_COMP_LIST_END_DELETE or + MLOG_COMP_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ulint offset; - ut_ad((type == MLOG_LIST_END_DELETE) - || (type == MLOG_LIST_START_DELETE)); + ut_ad(type == MLOG_LIST_END_DELETE + || type == MLOG_LIST_START_DELETE + || type == MLOG_COMP_LIST_END_DELETE + || type == MLOG_COMP_LIST_START_DELETE); /* Read the record offset as a 2-byte ulint */ @@ -580,11 +641,12 @@ page_parse_delete_rec_list( return(ptr); } - if (type == MLOG_LIST_END_DELETE) { - page_delete_rec_list_end(page, page + offset, ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); + if (type == MLOG_LIST_END_DELETE + || type == MLOG_COMP_LIST_END_DELETE) { + page_delete_rec_list_end(page, page + offset, index, + ULINT_UNDEFINED, ULINT_UNDEFINED, mtr); } else { - page_delete_rec_list_start(page, page + offset, mtr); + page_delete_rec_list_start(page, page + offset, index, mtr); } return(ptr); @@ -597,14 +659,15 @@ The infimum and supremum records are not deleted. */ void page_delete_rec_list_end( /*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED - if not known */ - ulint size, /* in: the sum of the sizes of the records in the end - of the chain to delete, or ULINT_UNDEFINED if not - known */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + ulint n_recs, /* in: number of records to delete, + or ULINT_UNDEFINED if not known */ + ulint size, /* in: the sum of the sizes of the + records in the end of the chain to + delete, or ULINT_UNDEFINED if not known */ + mtr_t* mtr) /* in: mtr */ { page_dir_slot_t* slot; ulint slot_index; @@ -615,10 +678,12 @@ page_delete_rec_list_end( ulint count; ulint n_owned; rec_t* sup; + ibool comp; /* Reset the last insert info in the page header and increment the modify clock for the frame */ + ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE); page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); /* The page gets invalid for optimistic searches: increment the @@ -632,7 +697,9 @@ page_delete_rec_list_end( rec = page_rec_get_next(rec); } - page_delete_rec_list_write_log(page, rec, MLOG_LIST_END_DELETE, mtr); + comp = page_is_comp(page); + page_delete_rec_list_write_log(page, rec, index, + comp ? MLOG_COMP_LIST_END_DELETE : MLOG_LIST_END_DELETE, mtr); if (rec == sup) { @@ -644,19 +711,32 @@ page_delete_rec_list_end( last_rec = page_rec_get_prev(sup); if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; /* Calculate the sum of sizes and the number of records */ size = 0; n_recs = 0; rec2 = rec; while (rec2 != sup) { - size += rec_get_size(rec2); + ulint s; + offsets = rec_reget_offsets(rec2, index, + offsets, ULINT_UNDEFINED, heap); + s = rec_offs_size(offsets); + ut_ad(rec2 - page + s - rec_offs_extra_size(offsets) + < UNIV_PAGE_SIZE); + ut_ad(size + s < UNIV_PAGE_SIZE); + size += s; n_recs++; rec2 = page_rec_get_next(rec2); } + + mem_heap_free(heap); } + ut_ad(size < UNIV_PAGE_SIZE); + /* Update the page directory; there is no need to balance the number of the records owned by the supremum record, as it is allowed to be less than PAGE_DIR_SLOT_MIN_N_OWNED */ @@ -664,15 +744,15 @@ page_delete_rec_list_end( rec2 = rec; count = 0; - while (rec_get_n_owned(rec2) == 0) { + while (rec_get_n_owned(rec2, comp) == 0) { count++; rec2 = page_rec_get_next(rec2); } - ut_ad(rec_get_n_owned(rec2) - count > 0); + ut_ad(rec_get_n_owned(rec2, comp) - count > 0); - n_owned = rec_get_n_owned(rec2) - count; + n_owned = rec_get_n_owned(rec2, comp) - count; slot_index = page_dir_find_owner_slot(rec2); slot = page_dir_get_nth_slot(page, slot_index); @@ -680,7 +760,7 @@ page_delete_rec_list_end( page_dir_slot_set_rec(slot, sup); page_dir_slot_set_n_owned(slot, n_owned); - page_header_set_field(page, PAGE_N_DIR_SLOTS, slot_index + 1); + page_dir_set_n_slots(page, slot_index + 1); /* Remove the record chain segment from the record chain */ page_rec_set_next(prev_rec, page_get_supremum_rec(page)); @@ -706,14 +786,19 @@ that record. Infimum and supremum records are not deleted. */ void page_delete_rec_list_start( /*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_cur_t cur1; ulint log_mode; - page_delete_rec_list_write_log(page, rec, MLOG_LIST_START_DELETE, mtr); + page_delete_rec_list_write_log(page, rec, index, + index->table->comp + ? MLOG_COMP_LIST_START_DELETE + : MLOG_LIST_START_DELETE, + mtr); page_cur_set_before_first(page, &cur1); @@ -730,7 +815,7 @@ page_delete_rec_list_start( while (page_cur_get_rec(&cur1) != rec) { - page_cur_delete_rec(&cur1, mtr); + page_cur_delete_rec(&cur1, index, mtr); } /* Restore log mode */ @@ -745,10 +830,11 @@ split_rec. */ void page_move_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { ulint old_data_size; ulint new_data_size; @@ -758,15 +844,15 @@ page_move_rec_list_end( old_data_size = page_get_data_size(new_page); old_n_recs = page_get_n_recs(new_page); - page_copy_rec_list_end(new_page, page, split_rec, mtr); + page_copy_rec_list_end(new_page, page, split_rec, index, mtr); new_data_size = page_get_data_size(new_page); new_n_recs = page_get_n_recs(new_page); ut_ad(new_data_size >= old_data_size); - page_delete_rec_list_end(page, split_rec, new_n_recs - old_n_recs, - new_data_size - old_data_size, mtr); + page_delete_rec_list_end(page, split_rec, index, + new_n_recs - old_n_recs, new_data_size - old_data_size, mtr); } /***************************************************************** @@ -776,14 +862,15 @@ split_rec. */ void page_move_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record not to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - page_copy_rec_list_start(new_page, page, split_rec, mtr); + page_copy_rec_list_start(new_page, page, split_rec, index, mtr); - page_delete_rec_list_start(page, split_rec, mtr); + page_delete_rec_list_start(page, split_rec, index, mtr); } /*************************************************************************** @@ -801,7 +888,7 @@ page_rec_write_index_page_no( byte* data; ulint len; - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field_old(rec, i, &len); ut_ad(len == 4); @@ -885,7 +972,7 @@ page_dir_add_slots( ut_ad(start < n_slots - 1); /* Update the page header */ - page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots + n); + page_dir_set_n_slots(page, n_slots + n); /* Move slots up */ @@ -1006,8 +1093,8 @@ page_dir_balance_slot( old_rec = page_dir_slot_get_rec(slot); new_rec = page_rec_get_next(old_rec); - rec_set_n_owned(old_rec, 0); - rec_set_n_owned(new_rec, n_owned + 1); + rec_set_n_owned(old_rec, page_is_comp(page), 0); + rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1); page_dir_slot_set_rec(slot, new_rec); @@ -1080,13 +1167,15 @@ page_rec_get_n_recs_before( rec_t* slot_rec; page_t* page; ulint i; + ibool comp; lint n = 0; ut_ad(page_rec_check(rec)); page = buf_frame_align(rec); - - while (rec_get_n_owned(rec) == 0) { + comp = page_is_comp(page); + + while (rec_get_n_owned(rec, comp) == 0) { rec = page_rec_get_next(rec); n--; @@ -1096,7 +1185,7 @@ page_rec_get_n_recs_before( slot = page_dir_get_nth_slot(page, i); slot_rec = page_dir_slot_get_rec(slot); - n += rec_get_n_owned(slot_rec); + n += rec_get_n_owned(slot_rec, comp); if (rec == slot_rec) { @@ -1118,17 +1207,21 @@ the index page context. */ void page_rec_print( /*===========*/ - rec_t* rec) + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: record descriptor */ { - rec_print(stderr, rec); + ibool comp = page_is_comp(buf_frame_align(rec)); + + ut_a(comp == rec_offs_comp(offsets)); + rec_print(stderr, rec, offsets); fprintf(stderr, " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned(rec), - (ulong) rec_get_heap_no(rec), - (ulong) rec_get_next_offs(rec)); + (ulong) rec_get_n_owned(rec, comp), + (ulong) rec_get_heap_no(rec, comp), + (ulong) rec_get_next_offs(rec, comp)); page_rec_check(rec); - rec_validate(rec); + rec_validate(rec, offsets); } /******************************************************************* @@ -1176,12 +1269,18 @@ debugging purposes. */ void page_print_list( /*============*/ - page_t* page, /* in: index page */ - ulint pr_n) /* in: print n first and n last entries */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint pr_n) /* in: print n first and n last entries */ { page_cur_t cur; ulint count; ulint n_recs; + mem_heap_t* heap; + ulint* offsets = NULL; + + ut_a(page_is_comp(page) == index->table->comp); + heap = mem_heap_create(100); fprintf(stderr, "--------------------------------\n" @@ -1193,7 +1292,9 @@ page_print_list( page_cur_set_before_first(page, &cur); count = 0; for (;;) { - page_rec_print(cur.rec); + offsets = rec_reget_offsets(cur.rec, index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(cur.rec, offsets); if (count == pr_n) { break; @@ -1213,7 +1314,9 @@ page_print_list( page_cur_move_to_next(&cur); if (count + pr_n >= n_recs) { - page_rec_print(cur.rec); + offsets = rec_reget_offsets(cur.rec, index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(cur.rec, offsets); } count++; } @@ -1222,6 +1325,8 @@ page_print_list( "Total of %lu records \n" "--------------------------------\n", (ulong) (count + 1)); + + mem_heap_free(heap); } /******************************************************************* @@ -1235,14 +1340,15 @@ page_header_print( fprintf(stderr, "--------------------------------\n" "PAGE HEADER INFO\n" - "Page address %p, n records %lu\n" + "Page address %p, n records %lu (%s)\n" "n dir slots %lu, heap top %lu\n" "Page n heap %lu, free %lu, garbage %lu\n" "Page last insert %lu, direction %lu, n direction %lu\n", page, (ulong) page_header_get_field(page, PAGE_N_RECS), + page_is_comp(page) ? "compact format" : "original format", (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS), (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) page_header_get_field(page, PAGE_N_HEAP), + (ulong) page_dir_get_n_heap(page), (ulong) page_header_get_field(page, PAGE_FREE), (ulong) page_header_get_field(page, PAGE_GARBAGE), (ulong) page_header_get_field(page, PAGE_LAST_INSERT), @@ -1257,13 +1363,16 @@ debugging purposes. */ void page_print( /*======*/ - page_t* page, /* in: index page */ - ulint dn, /* in: print dn first and last entries in directory */ - ulint rn) /* in: print rn first and last records on page */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint dn, /* in: print dn first and last entries + in directory */ + ulint rn) /* in: print rn first and last records + in directory */ { page_header_print(page); page_dir_print(page, dn); - page_print_list(page, rn); + page_print_list(page, index, rn); } /******************************************************************* @@ -1274,20 +1383,24 @@ the heap_no field. */ ibool page_rec_validate( /*==============*/ - /* out: TRUE if ok */ - rec_t* rec) /* in: record on the page */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_owned; ulint heap_no; - page_t* page; + page_t* page; + ibool comp; page = buf_frame_align(rec); + comp = page_is_comp(page); + ut_a(comp == rec_offs_comp(offsets)); page_rec_check(rec); - rec_validate(rec); + rec_validate(rec, offsets); - n_owned = rec_get_n_owned(rec); - heap_no = rec_get_heap_no(rec); + n_owned = rec_get_n_owned(rec, comp); + heap_no = rec_get_heap_no(rec, comp); if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) { fprintf(stderr, @@ -1296,11 +1409,11 @@ page_rec_validate( return(FALSE); } - if (!(heap_no < page_header_get_field(page, PAGE_N_HEAP))) { + if (!(heap_no < page_dir_get_n_heap(page))) { fprintf(stderr, "InnoDB: Heap no of rec %lu too big %lu %lu\n", (ulong)(rec - page), (ulong) heap_no, - (ulong) page_header_get_field(page, PAGE_N_HEAP)); + (ulong) page_dir_get_n_heap(page)); return(FALSE); } @@ -1358,6 +1471,7 @@ page_simple_validate( ulint count; ulint own_count; ibool ret = FALSE; + ibool comp = page_is_comp(page); /* Check first that the record heap and the directory do not overlap. */ @@ -1404,13 +1518,13 @@ page_simple_validate( goto func_exit; } - if (rec_get_n_owned(rec) != 0) { + if (rec_get_n_owned(rec, comp) != 0) { /* This is a record pointed to by a dir slot */ - if (rec_get_n_owned(rec) != own_count) { + if (rec_get_n_owned(rec, comp) != own_count) { fprintf(stderr, "InnoDB: Wrong owned count %lu, %lu, rec %lu\n", - (ulong) rec_get_n_owned(rec), + (ulong) rec_get_n_owned(rec, comp), (ulong) own_count, (ulong)(rec - page)); @@ -1438,11 +1552,11 @@ page_simple_validate( break; } - if (rec_get_next_offs(rec) < FIL_PAGE_DATA - || rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) { + if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA + || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Next record offset nonsensical %lu for rec %lu\n", - (ulong) rec_get_next_offs(rec), + (ulong) rec_get_next_offs(rec, comp), (ulong)(rec - page)); goto func_exit; @@ -1461,7 +1575,7 @@ page_simple_validate( own_count++; } - if (rec_get_n_owned(rec) == 0) { + if (rec_get_n_owned(rec, comp) == 0) { fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n"); goto func_exit; @@ -1514,10 +1628,10 @@ page_simple_validate( rec = page_rec_get_next(rec); } - if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) { + if (page_dir_get_n_heap(page) != count + 1) { fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_header_get_field(page, PAGE_N_HEAP), + (ulong) page_dir_get_n_heap(page), (ulong) (count + 1)); goto func_exit; @@ -1549,12 +1663,19 @@ page_validate( ulint slot_no; ulint data_size; rec_t* rec; - rec_t* old_rec = NULL; + rec_t* old_rec = NULL; ulint offs; ulint n_slots; - ibool ret = FALSE; + ibool ret = FALSE; ulint i; - + ibool comp = page_is_comp(page); + ulint* offsets = NULL; + ulint* old_offsets = NULL; + + if (comp != index->table->comp) { + fputs("InnoDB: 'compact format' flag mismatch\n", stderr); + goto func_exit2; + } if (!page_simple_validate(page)) { goto func_exit2; } @@ -1599,22 +1720,33 @@ page_validate( for (;;) { rec = cur.rec; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); - if (!page_rec_validate(rec)) { + if (comp && page_rec_is_user_rec(rec) + && rec_get_node_ptr_flag(rec) + == !btr_page_get_level_low(page)) { + fputs("InnoDB: node_ptr flag mismatch\n", stderr); + goto func_exit; + } + + if (!page_rec_validate(rec, offsets)) { goto func_exit; } /* Check that the records are in the ascending order */ if ((count >= 2) && (!page_cur_is_after_last(&cur))) { - if (!(1 == cmp_rec_rec(rec, old_rec, index))) { + if (!(1 == cmp_rec_rec(rec, old_rec, + offsets, old_offsets, + ULINT_UNDEFINED, index))) { fprintf(stderr, "InnoDB: Records in wrong order on page %lu", (ulong) buf_frame_get_page_no(page)); dict_index_name_print(stderr, NULL, index); fputs("\nInnoDB: previous record ", stderr); - rec_print(stderr, old_rec); + rec_print(stderr, old_rec, old_offsets); fputs("\nInnoDB: record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); goto func_exit; @@ -1624,12 +1756,12 @@ page_validate( if ((rec != page_get_supremum_rec(page)) && (rec != page_get_infimum_rec(page))) { - data_size += rec_get_size(rec); + data_size += rec_offs_size(offsets); } - offs = rec_get_start(rec) - page; + offs = rec_get_start(rec, offsets) - page; - for (i = 0; i < rec_get_size(rec); i++) { + for (i = 0; i < rec_offs_size(offsets); i++) { if (!buf[offs + i] == 0) { /* No other record may overlap this */ @@ -1641,12 +1773,12 @@ page_validate( buf[offs + i] = 1; } - if (rec_get_n_owned(rec) != 0) { + if (rec_get_n_owned(rec, comp) != 0) { /* This is a record pointed to by a dir slot */ - if (rec_get_n_owned(rec) != own_count) { + if (rec_get_n_owned(rec, comp) != own_count) { fprintf(stderr, "InnoDB: Wrong owned count %lu, %lu\n", - (ulong) rec_get_n_owned(rec), + (ulong) rec_get_n_owned(rec, comp), (ulong) own_count); goto func_exit; } @@ -1671,11 +1803,11 @@ page_validate( break; } - if (rec_get_next_offs(rec) < FIL_PAGE_DATA - || rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) { + if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA + || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Next record offset wrong %lu\n", - (ulong) rec_get_next_offs(rec)); + (ulong) rec_get_next_offs(rec, comp)); goto func_exit; } @@ -1683,9 +1815,15 @@ page_validate( page_cur_move_to_next(&cur); own_count++; old_rec = rec; + /* set old_offsets to offsets; recycle offsets */ + { + ulint* offs = old_offsets; + old_offsets = offsets; + offsets = offs; + } } - if (rec_get_n_owned(rec) == 0) { + if (rec_get_n_owned(rec, comp) == 0) { fputs("InnoDB: n owned is zero\n", stderr); goto func_exit; } @@ -1714,15 +1852,17 @@ page_validate( rec = page_header_get_ptr(page, PAGE_FREE); while (rec != NULL) { - if (!page_rec_validate(rec)) { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + if (!page_rec_validate(rec, offsets)) { goto func_exit; } count++; - offs = rec_get_start(rec) - page; + offs = rec_get_start(rec, offsets) - page; - for (i = 0; i < rec_get_size(rec); i++) { + for (i = 0; i < rec_offs_size(offsets); i++) { if (buf[offs + i] != 0) { fputs( @@ -1736,9 +1876,9 @@ page_validate( rec = page_rec_get_next(rec); } - if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) { + if (page_dir_get_n_heap(page) != count + 1) { fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_HEAP), + (ulong) page_dir_get_n_heap(page), (ulong) count + 1); goto func_exit; } @@ -1775,7 +1915,7 @@ page_find_rec_with_heap_no( page_cur_set_before_first(page, &cur); for (;;) { - if (rec_get_heap_no(cur.rec) == heap_no) { + if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) { return(cur.rec); } diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c index 16d630dd318..c62184abd85 100644 --- a/innobase/pars/pars0pars.c +++ b/innobase/pars/pars0pars.c @@ -1514,8 +1514,11 @@ pars_create_table( n_cols = que_node_list_get_len(column_defs); - table = dict_mem_table_create(table_sym->name, 0, n_cols); - + /* As the InnoDB SQL parser is for internal use only, + for creating some system tables, this function will only + create tables in the old (not compact) record format. */ + table = dict_mem_table_create(table_sym->name, 0, n_cols, FALSE); + if (not_fit_in_memory != NULL) { table->does_not_fit_in_memory = TRUE; } diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index 041fb7914e2..974fc7a24d0 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -51,6 +51,7 @@ cmp_debug_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields);/* in/out: number of already completely matched fields; when function returns, contains the value for current @@ -426,6 +427,7 @@ cmp_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns, contains the value for current comparison */ @@ -455,12 +457,13 @@ cmp_dtuple_rec_with_match( ut_ad(dtuple && rec && matched_fields && matched_bytes); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); cur_field = *matched_fields; cur_bytes = *matched_bytes; ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple)); - ut_ad(cur_field <= rec_get_n_fields(rec)); + ut_ad(cur_field <= rec_offs_n_fields(offsets)); /* Match fields in a loop; stop if we run out of fields in dtuple or find an externally stored field */ @@ -472,7 +475,8 @@ cmp_dtuple_rec_with_match( dtuple_f_len = dfield_get_len(dtuple_field); - rec_b_ptr = rec_get_nth_field(rec, cur_field, &rec_f_len); + rec_b_ptr = rec_get_nth_field(rec, offsets, + cur_field, &rec_f_len); /* If we have matched yet 0 bytes, it may be that one or both the fields are SQL null, or the record or dtuple may be @@ -482,7 +486,8 @@ cmp_dtuple_rec_with_match( if (cur_bytes == 0) { if (cur_field == 0) { - if (rec_get_info_bits(rec) + if (rec_get_info_bits(rec, + rec_offs_comp(offsets)) & REC_INFO_MIN_REC_FLAG) { if (dtuple_get_info_bits(dtuple) @@ -504,7 +509,7 @@ cmp_dtuple_rec_with_match( } } - if (rec_get_nth_field_extern_bit(rec, cur_field)) { + if (rec_offs_nth_extern(offsets, cur_field)) { /* We do not compare to an externally stored field */ @@ -635,7 +640,7 @@ cmp_dtuple_rec_with_match( up to the common fields */ order_resolved: ut_ad((ret >= - 1) && (ret <= 1)); - ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, + ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets, matched_fields)); ut_ad(*matched_fields == cur_field); /* In the debug version, the above cmp_debug_... sets @@ -656,13 +661,15 @@ cmp_dtuple_rec( less than rec, respectively; see the comments for cmp_dtuple_rec_with_match */ dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint matched_fields = 0; ulint matched_bytes = 0; - return(cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields, - &matched_bytes)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, + &matched_fields, &matched_bytes)); } /****************************************************************** @@ -673,22 +680,24 @@ ibool cmp_dtuple_is_prefix_of_rec( /*========================*/ /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec) /* in: physical record */ + dtuple_t* dtuple, /* in: data tuple */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_fields; ulint matched_fields = 0; ulint matched_bytes = 0; + ut_ad(rec_offs_validate(rec, NULL, offsets)); n_fields = dtuple_get_n_fields(dtuple); - if (n_fields > rec_get_n_fields(rec)) { + if (n_fields > rec_offs_n_fields(offsets)) { return(FALSE); } - cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields, - &matched_bytes); + cmp_dtuple_rec_with_match(dtuple, rec, offsets, + &matched_fields, &matched_bytes); if (matched_fields == n_fields) { return(TRUE); @@ -703,42 +712,6 @@ cmp_dtuple_is_prefix_of_rec( return(FALSE); } -/****************************************************************** -Compares a prefix of a data tuple to a prefix of a physical record for -equality. If there are less fields in rec than parameter n_fields, FALSE -is returned. NOTE that n_fields_cmp of dtuple does not affect this -comparison. */ - -ibool -cmp_dtuple_rec_prefix_equal( -/*========================*/ - /* out: TRUE if equal */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - ulint n_fields) /* in: number of fields which should be - compared; must not exceed the number of - fields in dtuple */ -{ - ulint matched_fields = 0; - ulint matched_bytes = 0; - - ut_ad(n_fields <= dtuple_get_n_fields(dtuple)); - - if (rec_get_n_fields(rec) < n_fields) { - - return(FALSE); - } - - cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields, - &matched_bytes); - if (matched_fields >= n_fields) { - - return(TRUE); - } - - return(FALSE); -} - /***************************************************************** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is @@ -752,7 +725,13 @@ cmp_rec_rec_with_match( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ulint n, /* in: number of fields to compare, + or ULINT_UNDEFINED if both records + contain all fields, and all fields + should be compared */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -778,17 +757,27 @@ cmp_rec_rec_with_match( ulint cur_bytes; /* number of already matched bytes in current field */ int ret = 3333; /* return value */ + ibool comp; ut_ad(rec1 && rec2 && index); + ut_ad(rec_offs_validate(rec1, index, offsets1)); + ut_ad(rec_offs_validate(rec2, index, offsets2)); + ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2)); - rec1_n_fields = rec_get_n_fields(rec1); - rec2_n_fields = rec_get_n_fields(rec2); + comp = rec_offs_comp(offsets1); + if (n == ULINT_UNDEFINED) { + rec1_n_fields = rec_offs_n_fields(offsets1); + rec2_n_fields = rec_offs_n_fields(offsets2); + } else { + ut_ad(n <= rec_offs_n_fields(offsets1)); + ut_ad(n <= rec_offs_n_fields(offsets2)); + rec1_n_fields = rec2_n_fields = n; + } cur_field = *matched_fields; cur_bytes = *matched_bytes; - /* Match fields in a loop; stop if we run out of fields in either - record */ + /* Match fields in a loop */ while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) { @@ -800,17 +789,19 @@ cmp_rec_rec_with_match( dict_index_get_nth_field(index, cur_field))); } - rec1_b_ptr = rec_get_nth_field(rec1, cur_field, &rec1_f_len); - rec2_b_ptr = rec_get_nth_field(rec2, cur_field, &rec2_f_len); - + rec1_b_ptr = rec_get_nth_field(rec1, offsets1, + cur_field, &rec1_f_len); + rec2_b_ptr = rec_get_nth_field(rec2, offsets2, + cur_field, &rec2_f_len); + if (cur_bytes == 0) { if (cur_field == 0) { /* Test if rec is the predefined minimum record */ - if (rec_get_info_bits(rec1) + if (rec_get_info_bits(rec1, comp) & REC_INFO_MIN_REC_FLAG) { - if (rec_get_info_bits(rec2) + if (rec_get_info_bits(rec2, comp) & REC_INFO_MIN_REC_FLAG) { ret = 0; } else { @@ -819,7 +810,7 @@ cmp_rec_rec_with_match( goto order_resolved; - } else if (rec_get_info_bits(rec2) + } else if (rec_get_info_bits(rec2, comp) & REC_INFO_MIN_REC_FLAG) { ret = 1; @@ -828,8 +819,8 @@ cmp_rec_rec_with_match( } } - if (rec_get_nth_field_extern_bit(rec1, cur_field) - || rec_get_nth_field_extern_bit(rec2, cur_field)) { + if (rec_offs_nth_extern(offsets1, cur_field) + || rec_offs_nth_extern(offsets2, cur_field)) { /* We do not compare to an externally stored field */ @@ -984,6 +975,7 @@ cmp_debug_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields) /* in/out: number of already completely matched fields; when function returns, contains the value for current @@ -1003,14 +995,16 @@ cmp_debug_dtuple_rec_with_match( ut_ad(dtuple && rec && matched_fields); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple)); - ut_ad(*matched_fields <= rec_get_n_fields(rec)); + ut_ad(*matched_fields <= rec_offs_n_fields(offsets)); cur_field = *matched_fields; if (cur_field == 0) { - if (rec_get_info_bits(rec) & REC_INFO_MIN_REC_FLAG) { + if (rec_get_info_bits(rec, rec_offs_comp(offsets)) + & REC_INFO_MIN_REC_FLAG) { if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) { @@ -1040,9 +1034,10 @@ cmp_debug_dtuple_rec_with_match( dtuple_f_data = dfield_get_data(dtuple_field); dtuple_f_len = dfield_get_len(dtuple_field); - rec_f_data = rec_get_nth_field(rec, cur_field, &rec_f_len); + rec_f_data = rec_get_nth_field(rec, offsets, + cur_field, &rec_f_len); - if (rec_get_nth_field_extern_bit(rec, cur_field)) { + if (rec_offs_nth_extern(offsets, cur_field)) { /* We do not compare to an externally stored field */ ret = 0; diff --git a/innobase/rem/rem0rec.c b/innobase/rem/rem0rec.c index 1db89241dff..e4fa213480f 100644 --- a/innobase/rem/rem0rec.c +++ b/innobase/rem/rem0rec.c @@ -15,8 +15,8 @@ Created 5/30/1994 Heikki Tuuri #include "mtr0mtr.h" #include "mtr0log.h" -/* PHYSICAL RECORD - =============== +/* PHYSICAL RECORD (OLD STYLE) + =========================== The physical record, which is the data type of all the records found in index pages of the database, has the following format @@ -39,7 +39,7 @@ represented on a higher text line): | 10 bits giving the number of fields in this record | | 1 bit which is set to 1 if the offsets above are given in one byte format, 0 if in two byte format | -| two bytes giving the pointer to the next record in the page | +| two bytes giving an absolute pointer to the next record in the page | ORIGIN of the record | first field of data | ... @@ -55,9 +55,50 @@ The offsets of the data fields are given as one-byte (if there are less than 127 bytes of data in the record) or two-byte unsigned integers. The most significant bit is not part of the offset, instead it indicates the SQL-null -if the bit is set to 1. +if the bit is set to 1. */ -CANONICAL COORDINATES. A record can be seen as a single +/* PHYSICAL RECORD (NEW STYLE) + =========================== + +The physical record, which is the data type of all the records +found in index pages of the database, has the following format +(lower addresses and more significant bits inside a byte are below +represented on a higher text line): + +| length of the last non-null variable-length field of data: + if the maximum length is 255, one byte; otherwise, + 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes, + length=128..16383, extern storage flag) | +... +| length of first variable-length field of data | +| SQL-null flags (1 bit per nullable field), padded to full bytes | +| 4 bits used to delete mark a record, and mark a predefined + minimum record in alphabetical order | +| 4 bits giving the number of records owned by this record + (this term is explained in page0page.h) | +| 13 bits giving the order number of this record in the + heap of the index page | +| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree), + 010=infimum, 011=supremum, 1xx=reserved | +| two bytes giving a relative pointer to the next record in the page | +ORIGIN of the record +| first field of data | +... +| last field of data | + +The origin of the record is the start address of the first field +of data. The offsets are given relative to the origin. +The offsets of the data fields are stored in an inverted +order because then the offset of the first fields are near the +origin, giving maybe a better processor cache hit rate in searches. + +The offsets of the data fields are given as one-byte +(if there are less than 127 bytes of data in the record) +or two-byte unsigned integers. The most significant bit +is not part of the offset, instead it indicates the SQL-null +if the bit is set to 1. */ + +/* CANONICAL COORDINATES. A record can be seen as a single string of 'characters' in the following way: catenate the bytes in each field, in the order of fields. An SQL-null field is taken to be an empty sequence of bytes. Then after @@ -86,13 +127,291 @@ the corresponding canonical strings have the same property. */ ulint rec_dummy; /* this is used to fool compiler in rec_validate */ +/******************************************************************* +Validates the consistency of an old-style physical record. */ +static +ibool +rec_validate_old( +/*=============*/ + /* out: TRUE if ok */ + rec_t* rec); /* in: physical record */ + +/********************************************************** +The following function determines the offsets to each field +in the record. The offsets are written to an array of +ulint[n+2], with [0] being the number of fields (n), [1] being the +extra size (if REC_OFFS_COMPACT is set, the record is in the new +format), and [2]..[n+1] being the offsets past the end of +fields 0..n, or to the beginning of fields 1..n+1. When the +high-order bit of the offset at [n+1] is set (REC_OFFS_SQL_NULL), +the field n is NULL. When the second high-order bit of the offset +at [n+1] is set (REC_OFFS_EXTERNAL), the field n is being stored +externally. */ +static +void +rec_init_offsets( +/*=============*/ + /* out: the offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets)/* in:/out: ulint[n+2]; + n=rec_offs_n_fields(offsets) */ +{ + ulint n_fields = rec_offs_n_fields(offsets); + ulint i = 0; + ulint offs; + + rec_offs_make_valid(rec, index, offsets); + + if (index->table->comp) { + const byte* nulls; + const byte* lens; + dict_field_t* field; + dtype_t* type; + ulint null_mask; + ulint status = rec_get_status(rec); + ulint n_node_ptr_field = ULINT_UNDEFINED; + + switch (status) { + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* the field is 8 bytes long */ + rec_offs_base(offsets)[0] = + REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT; + rec_offs_base(offsets)[1] = 8; + return; + case REC_STATUS_NODE_PTR: + n_node_ptr_field = + dict_index_get_n_unique_in_tree(index); + break; + case REC_STATUS_ORDINARY: + break; + } + + nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + lens = nulls - (index->n_nullable + 7) / 8; + offs = 0; + null_mask = 1; + + /* read the lengths of fields 0..n */ + for (; i < n_fields; i++) { + ibool is_null = FALSE, is_external = FALSE; + ulint len; + if (i == n_node_ptr_field) { + len = 4; + goto resolved; + } + + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + /* nullable field => read the null flag */ + is_null = (*nulls & null_mask) != 0; + null_mask <<= 1; + if (null_mask == 0x100) { + nulls--; + null_mask = 1; + } + } + + if (is_null) { + /* No length is stored for NULL fields. */ + len = 0; + } else if (!field->fixed_len) { + /* Variable-length field: read the length */ + len = *lens--; + if (dtype_get_len(type) > 255 + || dtype_get_mtype(type) == DATA_BLOB) { + if (len & 0x80) { + /* 1exxxxxxx xxxxxxxx */ + is_external = !!(len & 0x40); + len &= 0x3f; + len <<= 8; + len |= *lens--; + } + } + } else { + len = field->fixed_len; + } + resolved: + offs += len; + len = offs; + if (is_external) { + len |= REC_OFFS_EXTERNAL; + } + if (is_null) { + len |= REC_OFFS_SQL_NULL; + } + rec_offs_base(offsets)[i + 1] = len; + } + + *rec_offs_base(offsets) = + (rec - (lens + 1)) | REC_OFFS_COMPACT; + } else { + /* Old-style record: determine extra size and end offsets */ + offs = REC_N_OLD_EXTRA_BYTES; + if (rec_get_1byte_offs_flag(rec)) { + offs += n_fields; + *rec_offs_base(offsets) = offs; + /* Determine offsets to fields */ + for (; i < n_fields; i++) { + offs = rec_1_get_field_end_info(rec, i); + if (offs & REC_1BYTE_SQL_NULL_MASK) { + offs &= ~REC_1BYTE_SQL_NULL_MASK; + offs |= REC_OFFS_SQL_NULL; + } + rec_offs_base(offsets)[1 + i] = offs; + } + } else { + offs += 2 * n_fields; + *rec_offs_base(offsets) = offs; + /* Determine offsets to fields */ + for (; i < n_fields; i++) { + offs = rec_2_get_field_end_info(rec, i); + if (offs & REC_2BYTE_SQL_NULL_MASK) { + offs &= ~REC_2BYTE_SQL_NULL_MASK; + offs |= REC_OFFS_SQL_NULL; + } + if (offs & REC_2BYTE_EXTERN_MASK) { + offs &= ~REC_2BYTE_EXTERN_MASK; + offs |= REC_OFFS_EXTERNAL; + } + rec_offs_base(offsets)[1 + i] = offs; + } + } + } +} + +/********************************************************** +The following function determines the offsets to each field +in the record. The offsets are returned in an array of +ulint, with [0] being the number of fields (n), [1] being the +extra size (if REC_OFFS_COMPACT is set, the record is in the new +format), and [2]..[n+1] being the offsets past the end of +fields 0..n, or to the beginning of fields 1..n+1. When the +high-order bit of the offset at [n+1] is set (REC_OFFS_SQL_NULL), +the field n is NULL. When the second high-order bit of the offset +at [n+1] is set (REC_OFFS_EXTERNAL), the field n is being stored +externally. */ + +ulint* +rec_get_offsets( +/*============*/ + /* out: the offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap) /* in: memory heap */ +{ + ulint* offsets; + ulint n; + + ut_ad(rec); + ut_ad(index); + ut_ad(heap); + + if (index->table->comp) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + n = dict_index_get_n_fields(index); + break; + case REC_STATUS_NODE_PTR: + n = dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record */ + n = 1; + break; + default: + ut_error; + return(NULL); + } + } else { + n = rec_get_n_fields_old(rec); + } + + if (n_fields < n) { + n = n_fields; + } + + offsets = mem_heap_alloc(heap, + (n + (1 + REC_OFFS_HEADER_SIZE)) * sizeof(ulint)); + + offsets[0] = n; + + rec_init_offsets(rec, index, offsets); + return(offsets); +} + +/********************************************************** +The following function determines the offsets to each field +in the record. It differs from rec_get_offsets() by trying to +reuse a previously returned array. */ + +ulint* +rec_reget_offsets( +/*==============*/ + /* out: the new offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in: array of offsets + from rec_get_offsets() + or rec_reget_offsets(), or NULL */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap) /* in: memory heap */ +{ + ulint n; + + ut_ad(rec); + ut_ad(index); + ut_ad(heap); + + if (index->table->comp) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + n = dict_index_get_n_fields(index); + break; + case REC_STATUS_NODE_PTR: + n = dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record */ + n = 1; + break; + default: + ut_error; + return(NULL); + } + } else { + n = rec_get_n_fields_old(rec); + } + + if (n_fields < n) { + n = n_fields; + } + + if (!offsets || rec_offs_n_fields(offsets) < n) { + offsets = mem_heap_alloc(heap, + (n + (1 + REC_OFFS_HEADER_SIZE)) * sizeof(ulint)); + } + + offsets[0] = n; + + rec_init_offsets(rec, index, offsets); + return(offsets); +} + /**************************************************************** -The following function is used to get a pointer to the nth data field in a -record. */ +The following function is used to get a pointer to the nth +data field in an old-style record. */ byte* -rec_get_nth_field( -/*==============*/ +rec_get_nth_field_old( +/*==================*/ /* out: pointer to the field */ rec_t* rec, /* in: record */ ulint n, /* in: index of the field */ @@ -103,9 +422,9 @@ rec_get_nth_field( ulint next_os; ut_ad(rec && len); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - if (n > 1024) { + if (n > REC_MAX_N_FIELDS) { fprintf(stderr, "Error: trying to access field %lu in rec\n", (ulong) n); ut_error; @@ -150,8 +469,78 @@ rec_get_nth_field( return(rec + os); } +/************************************************************** +The following function returns the size of a data tuple when converted to +a new-style physical record. */ + +ulint +rec_get_converted_size_new( +/*=======================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple) /* in: data tuple */ +{ + ulint size = REC_N_NEW_EXTRA_BYTES + + (index->n_nullable + 7) / 8; + dict_field_t* field; + dtype_t* type; + ulint i; + ulint n_fields; + ut_ad(index && dtuple); + ut_ad(index->table->comp); + + switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) { + case REC_STATUS_ORDINARY: + n_fields = dict_index_get_n_fields(index); + ut_ad(n_fields == dtuple_get_n_fields(dtuple)); + break; + case REC_STATUS_NODE_PTR: + n_fields = dict_index_get_n_unique_in_tree(index); + ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple)); + ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4); + size += 4; /* child page number */ + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record, 8 bytes */ + return(size + 8); /* no extra data needed */ + default: + ut_a(0); + return(ULINT_UNDEFINED); + } + + /* read the lengths of fields 0..n */ + for (i = 0; i < n_fields; i++) { + ulint len = dtuple_get_nth_field(dtuple, i)->len; + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + ut_ad(len != UNIV_SQL_NULL || + !(dtype_get_prtype(type) & DATA_NOT_NULL)); + + if (len == UNIV_SQL_NULL) { + /* No length is stored for NULL fields. */ + continue; + } + + ut_ad(len <= dtype_get_len(type) + || dtype_get_mtype(type) == DATA_BLOB); + ut_ad(!field->fixed_len || len == field->fixed_len); + + if (field->fixed_len) { + } else if (len < 128 || (dtype_get_len(type) < 256 + && dtype_get_mtype(type) != DATA_BLOB)) { + size++; + } else { + size += 2; + } + size += len; + } + + return(size); +} + /*************************************************************** -Sets the value of the ith field SQL null bit. */ +Sets the value of the ith field SQL null bit of an old-style record. */ void rec_set_nth_field_null_bit( @@ -189,12 +578,12 @@ rec_set_nth_field_null_bit( } /*************************************************************** -Sets the value of the ith field extern storage bit. */ +Sets the value of the ith field extern storage bit of an old-style record. */ void -rec_set_nth_field_extern_bit( -/*=========================*/ - rec_t* rec, /* in: record */ +rec_set_nth_field_extern_bit_old( +/*=============================*/ + rec_t* rec, /* in: old-style record */ ulint i, /* in: ith field */ ibool val, /* in: value to set */ mtr_t* mtr) /* in: mtr holding an X-latch to the page where @@ -204,7 +593,7 @@ rec_set_nth_field_extern_bit( ulint info; ut_a(!rec_get_1byte_offs_flag(rec)); - ut_a(i < rec_get_n_fields(rec)); + ut_a(i < rec_get_n_fields_old(rec)); info = rec_2_get_field_end_info(rec, i); @@ -215,36 +604,138 @@ rec_set_nth_field_extern_bit( } if (mtr) { - mlog_write_ulint(rec - REC_N_EXTRA_BYTES - 2 * (i + 1), info, - MLOG_2BYTES, mtr); + mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1), + info, MLOG_2BYTES, mtr); } else { rec_2_set_field_end_info(rec, i, info); } } +/*************************************************************** +Sets the value of the ith field extern storage bit of a new-style record. */ + +void +rec_set_nth_field_extern_bit_new( +/*=============================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint ith, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ +{ + byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + byte* lens = nulls - (index->n_nullable + 7) / 8; + dict_field_t* field; + dtype_t* type; + ulint i; + ulint n_fields; + ulint null_mask = 1; + ut_ad(rec && index); + ut_ad(index->table->comp); + ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY); + + n_fields = dict_index_get_n_fields(index); + + ut_ad(ith < n_fields); + + /* read the lengths of fields 0..n */ + for (i = 0; i < n_fields; i++) { + ibool is_null; + ulint len; + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); + if (is_null) { + /* nullable field => read the null flag */ + is_null = !!(*nulls & null_mask); + null_mask <<= 1; + if (null_mask == 0x100) + nulls--, null_mask = 1; + } + if (is_null || field->fixed_len) { + /* No length (or extern bit) is stored for + fields that are NULL or fixed-length. */ + ut_ad(i != ith); + continue; + } + len = *lens--; + if (dtype_get_len(type) > 255 + || dtype_get_mtype(type) == DATA_BLOB) { + if (len & 0x80) { /* 1exxxxxx: 2-byte length */ + if (i == ith) { + if (!val == !(len & 0x20)) { + return; /* no change */ + } + /* toggle the extern bit */ + len ^= 0x40; + if (mtr) { + mlog_write_ulint(lens + 1, len, + MLOG_1BYTE, mtr); + } else { + lens[1] = len; + } + return; + } + lens--; + } else { + /* short fields cannot be external */ + ut_ad(i != ith); + } + } else { + /* short fields cannot be external */ + ut_ad(i != ith); + } + } +} + /*************************************************************** Sets TRUE the extern storage bits of fields mentioned in an array. */ void rec_set_field_extern_bits( /*======================*/ - rec_t* rec, /* in: record */ - ulint* vec, /* in: array of field numbers */ - ulint n_fields, /* in: number of fields numbers */ - mtr_t* mtr) /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case we - do not write to log about the change */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + const ulint* vec, /* in: array of field numbers */ + ulint n_fields,/* in: number of fields numbers */ + mtr_t* mtr) /* in: mtr holding an X-latch to the + page where rec is, or NULL; + in the NULL case we do not write + to log about the change */ { ulint i; for (i = 0; i < n_fields; i++) { - rec_set_nth_field_extern_bit(rec, vec[i], TRUE, mtr); + rec_set_nth_field_extern_bit(rec, index, vec[i], TRUE, mtr); } } +/************************************************************** +Returns the total size of a physical record. */ + +ulint +rec_get_size( +/*=========*/ + /* out: size */ + rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + mem_heap_t* heap + = mem_heap_create(100); + ulint* offsets + = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + ulint size + = rec_offs_size(offsets); + + mem_heap_free(heap); + return(size); +} + /*************************************************************** -Sets a record field to SQL null. The physical size of the field is not -changed. */ +Sets an old-style record field to SQL null. +The physical size of the field is not changed. */ void rec_set_nth_field_sql_null( @@ -262,20 +753,20 @@ rec_set_nth_field_sql_null( } /************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -address destination. */ - -rec_t* -rec_convert_dtuple_to_rec_low( +Builds an old-style physical record out of a data tuple and +stores it beginning from the start of the given buffer. */ +static +rec_t* +rec_convert_dtuple_to_rec_old( /*==========================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple, /* in: data tuple */ - ulint data_size) /* in: data size of dtuple */ + /* out: pointer to the origin of + physical record */ + byte* buf, /* in: start address of the physical record */ + dtuple_t* dtuple)/* in: data tuple */ { dfield_t* field; ulint n_fields; + ulint data_size; rec_t* rec; ulint end_offset; ulint ored_offset; @@ -283,24 +774,25 @@ rec_convert_dtuple_to_rec_low( ulint len; ulint i; - ut_ad(destination && dtuple); + ut_ad(buf && dtuple); ut_ad(dtuple_validate(dtuple)); ut_ad(dtuple_check_typed(dtuple)); - ut_ad(dtuple_get_data_size(dtuple) == data_size); n_fields = dtuple_get_n_fields(dtuple); + data_size = dtuple_get_data_size(dtuple); ut_ad(n_fields > 0); /* Calculate the offset of the origin in the physical record */ - rec = destination + rec_get_converted_extra_size(data_size, n_fields); + rec = buf + rec_get_converted_extra_size(data_size, n_fields); /* Store the number of fields */ - rec_set_n_fields(rec, n_fields); + rec_set_n_fields_old(rec, n_fields); /* Set the info bits of the record */ - rec_set_info_bits(rec, dtuple_get_info_bits(dtuple)); + rec_set_info_bits(rec, FALSE, + dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); /* Store the data and the offsets */ @@ -361,8 +853,191 @@ rec_convert_dtuple_to_rec_low( } } - ut_ad(rec_validate(rec)); + return(rec); +} +/************************************************************* +Builds a new-style physical record out of a data tuple and +stores it beginning from the start of the given buffer. */ +static +rec_t* +rec_convert_dtuple_to_rec_new( +/*==========================*/ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple) /* in: data tuple */ +{ + dfield_t* field; + dtype_t* type; + rec_t* rec = buf + REC_N_NEW_EXTRA_BYTES; + byte* end; + byte* nulls; + byte* lens; + ulint len; + ulint i; + ulint fixed_len; + ulint null_mask = 1; + const ulint n_fields = dtuple_get_n_fields(dtuple); + const ulint status = dtuple_get_info_bits(dtuple) + & REC_NEW_STATUS_MASK; + ut_ad(index->table->comp); + + ut_ad(n_fields > 0); + switch (status) { + case REC_STATUS_ORDINARY: + ut_ad(n_fields <= dict_index_get_n_fields(index)); + break; + case REC_STATUS_NODE_PTR: + ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1); + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + ut_ad(n_fields == 1); + goto init; + default: + ut_a(0); + return(0); + } + + /* Calculate the offset of the origin in the physical record. + We must loop over all fields to do this. */ + rec += (index->n_nullable + 7) / 8; + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(dtuple, i); + type = dfield_get_type(field); + len = dfield_get_len(field); + if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { + fixed_len = 4; + ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + ut_ad(len == 4); + continue; + } + fixed_len = dict_index_get_nth_field(index, i)->fixed_len; + + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + if (len == UNIV_SQL_NULL) + continue; + } + /* only nullable fields can be null */ + ut_ad(len != UNIV_SQL_NULL); + if (fixed_len) { + ut_ad(len == fixed_len); + } else { + ut_ad(len <= dtype_get_len(type) + || dtype_get_mtype(type) == DATA_BLOB); + rec++; + if (len >= 128 && (dtype_get_len(type) >= 256 + || dtype_get_mtype(type) == DATA_BLOB)) { + rec++; + } + } + } + +init: + end = rec; + nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + lens = nulls - (index->n_nullable + 7) / 8; + /* clear the SQL-null flags */ + memset (lens + 1, 0, nulls - lens); + + /* Set the info bits of the record */ + rec_set_status(rec, status); + + rec_set_info_bits(rec, TRUE, + dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); + + /* Store the data and the offsets */ + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(dtuple, i); + type = dfield_get_type(field); + len = dfield_get_len(field); + + if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { + fixed_len = 4; + ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + ut_ad(len == 4); + goto copy; + } + fixed_len = dict_index_get_nth_field(index, i)->fixed_len; + + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + /* nullable field */ + ut_ad(index->n_nullable > 0); + ut_ad(*nulls < null_mask); + /* set the null flag if necessary */ + if (len == UNIV_SQL_NULL) { + *nulls |= null_mask; + } + null_mask <<= 1; + if (null_mask == 0x100) + nulls--, null_mask = 1; + if (len == UNIV_SQL_NULL) + continue; + } + /* only nullable fields can be null */ + ut_ad(len != UNIV_SQL_NULL); + if (fixed_len) { + ut_ad(len == fixed_len); + } else { + ut_ad(len <= dtype_get_len(type) + || dtype_get_mtype(type) == DATA_BLOB); + if (len < 128 || (dtype_get_len(type) < 256 + && dtype_get_mtype(type) != DATA_BLOB)) { + *lens-- = len; + } + else { + /* the extern bits will be set later */ + ut_ad(len < 16384); + *lens-- = len >> 8 | 0x80; + *lens-- = len; + } + } + copy: + memcpy(end, dfield_get_data(field), len); + end += len; + } + + return(rec); +} + +/************************************************************* +Builds a physical record out of a data tuple and +stores it beginning from the start of the given buffer. */ + +rec_t* +rec_convert_dtuple_to_rec( +/*======================*/ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the + physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple) /* in: data tuple */ +{ + rec_t* rec; + + ut_ad(buf && index && dtuple); + ut_ad(dtuple_validate(dtuple)); + ut_ad(dtuple_check_typed(dtuple)); + + if (index->table->comp) { + rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple); + } else { + rec = rec_convert_dtuple_to_rec_old(buf, dtuple); + } + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = mem_heap_create(100); + ut_ad(rec_validate(rec, + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap))); + mem_heap_free(heap); + } +#endif /* UNIV_DEBUG */ return(rec); } @@ -375,6 +1050,7 @@ rec_copy_prefix_to_dtuple( /*======================*/ dtuple_t* tuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ ulint n_fields, /* in: number of fields to copy */ mem_heap_t* heap) /* in: memory heap */ { @@ -383,16 +1059,20 @@ rec_copy_prefix_to_dtuple( ulint len; byte* buf = NULL; ulint i; - - ut_ad(rec_validate(rec)); + ulint* offsets; + + offsets = rec_get_offsets(rec, index, n_fields, heap); + + ut_ad(rec_validate(rec, offsets)); ut_ad(dtuple_check_typed(tuple)); - dtuple_set_info_bits(tuple, rec_get_info_bits(rec)); + dtuple_set_info_bits(tuple, + rec_get_info_bits(rec, index->table->comp)); for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(tuple, i); - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { buf = mem_heap_alloc(heap, len); @@ -405,32 +1085,28 @@ rec_copy_prefix_to_dtuple( } /****************************************************************** -Copies the first n fields of a physical record to a new physical record in -a buffer. */ - +Copies the first n fields of an old-style physical record +to a new physical record in a buffer. */ +static rec_t* -rec_copy_prefix_to_buf( -/*===================*/ +rec_copy_prefix_to_buf_old( +/*=======================*/ /* out, own: copied record */ rec_t* rec, /* in: physical record */ ulint n_fields, /* in: number of fields to copy */ + ulint area_end, /* in: end of the prefix data */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size) /* in/out: buffer size */ { rec_t* copy_rec; ulint area_start; - ulint area_end; ulint prefix_len; - ut_ad(rec_validate(rec)); - - area_end = rec_get_field_start_offs(rec, n_fields); - if (rec_get_1byte_offs_flag(rec)) { - area_start = REC_N_EXTRA_BYTES + n_fields; + area_start = REC_N_OLD_EXTRA_BYTES + n_fields; } else { - area_start = REC_N_EXTRA_BYTES + 2 * n_fields; + area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields; } prefix_len = area_start + area_end; @@ -448,17 +1124,114 @@ rec_copy_prefix_to_buf( copy_rec = *buf + area_start; - rec_set_n_fields(copy_rec, n_fields); + rec_set_n_fields_old(copy_rec, n_fields); return(copy_rec); } -/******************************************************************* -Validates the consistency of a physical record. */ +/****************************************************************** +Copies the first n fields of a physical record to a new physical record in +a buffer. */ +rec_t* +rec_copy_prefix_to_buf( +/*===================*/ + /* out, own: copied record */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields to copy */ + byte** buf, /* in/out: memory buffer + for the copied prefix, or NULL */ + ulint* buf_size) /* in/out: buffer size */ +{ + byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + byte* lens = nulls - (index->n_nullable + 7) / 8; + dict_field_t* field; + dtype_t* type; + ulint i; + ulint prefix_len = 0; + ibool is_null; + ulint null_mask = 1; + ulint status; + + if (!index->table->comp) { + ut_ad(rec_validate_old(rec)); + return(rec_copy_prefix_to_buf_old(rec, n_fields, + rec_get_field_start_offs(rec, n_fields), + buf, buf_size)); + } + + status = rec_get_status(rec); + + switch (status) { + case REC_STATUS_ORDINARY: + ut_ad(n_fields <= dict_index_get_n_fields(index)); + break; + case REC_STATUS_NODE_PTR: + /* it doesn't make sense to copy the child page number field */ + ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index)); + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record: no sense to copy anything */ + default: + ut_a(0); + return(NULL); + } + + /* read the lengths of fields 0..n */ + for (i = 0; i < n_fields; i++) { + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); + if (is_null) { + /* nullable field => read the null flag */ + is_null = !!(*nulls & null_mask); + null_mask <<= 1; + if (null_mask == 0x100) + nulls--, null_mask = 1; + } + + if (is_null) { + } else if (field->fixed_len) { + prefix_len += field->fixed_len; + } else { + ulint len = *lens--; + if (dtype_get_len(type) > 255 + || dtype_get_mtype(type) == DATA_BLOB) { + if (len & 0x80) { + /* 1exxxxxx */ + len &= 0x3f; + len <<= 8; + len |= *lens--; + } + } + prefix_len += len; + } + } + + prefix_len += rec - (lens + 1); + + if ((*buf == NULL) || (*buf_size < prefix_len)) { + if (*buf != NULL) { + mem_free(*buf); + } + + *buf = mem_alloc(prefix_len); + *buf_size = prefix_len; + } + + memcpy(*buf, lens + 1, prefix_len); + + return(*buf + (rec - (lens + 1))); +} + +/******************************************************************* +Validates the consistency of an old-style physical record. */ +static ibool -rec_validate( -/*=========*/ +rec_validate_old( +/*=============*/ /* out: TRUE if ok */ rec_t* rec) /* in: physical record */ { @@ -470,7 +1243,7 @@ rec_validate( ulint i; ut_a(rec); - n_fields = rec_get_n_fields(rec); + n_fields = rec_get_n_fields_old(rec); if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { fprintf(stderr, "InnoDB: Error: record has %lu fields\n", @@ -479,7 +1252,7 @@ rec_validate( } for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field_old(rec, i, &len); if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { fprintf(stderr, @@ -499,45 +1272,165 @@ rec_validate( } } - if (len_sum != (ulint)(rec_get_end(rec) - rec)) { + if (len_sum != rec_get_data_size_old(rec)) { fprintf(stderr, "InnoDB: Error: record len should be %lu, len %lu\n", (ulong) len_sum, - (ulong) (rec_get_end(rec) - rec)); + rec_get_data_size_old(rec)); + return(FALSE); + } + + rec_dummy = sum; /* This is here only to fool the compiler */ + + return(TRUE); +} + +/******************************************************************* +Validates the consistency of a physical record. */ + +ibool +rec_validate( +/*=========*/ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + const byte* data; + ulint len; + ulint n_fields; + ulint len_sum = 0; + ulint sum = 0; + ulint i; + + ut_a(rec); + n_fields = rec_offs_n_fields(offsets); + + if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { + fprintf(stderr, "InnoDB: Error: record has %lu fields\n", + (ulong) n_fields); + return(FALSE); + } + + ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec)); + + for (i = 0; i < n_fields; i++) { + data = rec_get_nth_field(rec, offsets, i, &len); + + if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { + fprintf(stderr, + "InnoDB: Error: record field %lu len %lu\n", (ulong) i, + (ulong) len); + return(FALSE); + } + + if (len != UNIV_SQL_NULL) { + len_sum += len; + sum += *(data + len -1); /* dereference the + end of the field to + cause a memory trap + if possible */ + } else if (!rec_offs_comp(offsets)) { + len_sum += rec_get_nth_field_size(rec, i); + } + } + + if (len_sum != (ulint)(rec_get_end(rec, offsets) - rec)) { + fprintf(stderr, + "InnoDB: Error: record len should be %lu, len %lu\n", + (ulong) len_sum, + (ulong) (rec_get_end(rec, offsets) - rec)); return(FALSE); } rec_dummy = sum; /* This is here only to fool the compiler */ + if (!rec_offs_comp(offsets)) { + ut_a(rec_validate_old(rec)); + } + return(TRUE); } +/******************************************************************* +Prints an old-style physical record. */ + +void +rec_print_old( +/*==========*/ + FILE* file, /* in: file where to print */ + rec_t* rec) /* in: physical record */ +{ + const byte* data; + ulint len; + ulint n; + ulint i; + + ut_ad(rec); + + n = rec_get_n_fields_old(rec); + + fprintf(file, "PHYSICAL RECORD: n_fields %lu;" + " %u-byte offsets; info bits %lu\n", + (ulong) n, + rec_get_1byte_offs_flag(rec) ? 1 : 2, + (ulong) rec_get_info_bits(rec, FALSE)); + + for (i = 0; i < n; i++) { + + data = rec_get_nth_field_old(rec, i, &len); + + fprintf(file, " %lu:", (ulong) i); + + if (len != UNIV_SQL_NULL) { + if (len <= 30) { + + ut_print_buf(file, data, len); + } else { + ut_print_buf(file, data, 30); + + fputs("...(truncated)", file); + } + } else { + fprintf(file, " SQL NULL, size %lu ", + rec_get_nth_field_size(rec, i)); + } + putc(';', file); + } + + putc('\n', file); + + rec_validate_old(rec); +} + /******************************************************************* Prints a physical record. */ void rec_print( /*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec) /* in: physical record */ + FILE* file, /* in: file where to print */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - byte* data; - ulint len; - ulint n; - ulint i; + const byte* data; + ulint len; + ulint i; + + if (!rec_offs_comp(offsets)) { + rec_print_old(file, rec); + return; + } ut_ad(rec); - - n = rec_get_n_fields(rec); fprintf(file, "PHYSICAL RECORD: n_fields %lu;" - " 1-byte offs %s; info bits %lu\n", - (ulong) n, rec_get_1byte_offs_flag(rec) ? "TRUE" : "FALSE", - (ulong) rec_get_info_bits(rec)); + " compact format; info bits %lu\n", + (ulong) rec_offs_n_fields(offsets), + (ulong) rec_get_info_bits(rec, TRUE)); - for (i = 0; i < n; i++) { + for (i = 0; i < rec_offs_n_fields(offsets); i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); fprintf(file, " %lu:", (ulong) i); @@ -551,14 +1444,12 @@ rec_print( fputs("...(truncated)", file); } } else { - fprintf(file, " SQL NULL, size %lu ", - (ulong) rec_get_nth_field_size(rec, i)); - + fputs(" SQL NULL", file); } putc(';', file); } putc('\n', file); - rec_validate(rec); + rec_validate(rec, offsets); } diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index 6d1482b6720..1c55005dcfa 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -251,7 +251,7 @@ row_ins_sec_index_entry_by_modify( rec = btr_cur_get_rec(cursor); ut_ad((cursor->index->type & DICT_CLUSTERED) == 0); - ut_ad(rec_get_deleted_flag(rec)); + ut_ad(rec_get_deleted_flag(rec, cursor->index->table->comp)); /* We know that in the alphabetical ordering, entry and rec are identified. But in their binary form there may be differences if @@ -316,7 +316,7 @@ row_ins_clust_index_entry_by_modify( rec = btr_cur_get_rec(cursor); - ut_ad(rec_get_deleted_flag(rec)); + ut_ad(rec_get_deleted_flag(rec, cursor->index->table->comp)); heap = mem_heap_create(1024); @@ -473,6 +473,8 @@ row_ins_cascade_calc_update_vec( if (parent_ufield->field_no == parent_field_no) { + ulint fixed_size; + /* A field in the parent index record is updated. Let us make the update vector field for the child table. */ @@ -512,22 +514,22 @@ row_ins_cascade_calc_update_vec( need to pad with spaces the new value of the child column */ - if (dtype_is_fixed_size(type) + fixed_size = dtype_get_fixed_size(type); + + if (fixed_size && ufield->new_val.len != UNIV_SQL_NULL - && ufield->new_val.len - < dtype_get_fixed_size(type)) { + && ufield->new_val.len < fixed_size) { ufield->new_val.data = mem_heap_alloc(heap, - dtype_get_fixed_size(type)); - ufield->new_val.len = - dtype_get_fixed_size(type); + fixed_size); + ufield->new_val.len = fixed_size; ut_a(dtype_get_pad_char(type) != ULINT_UNDEFINED); memset(ufield->new_val.data, (byte)dtype_get_pad_char(type), - dtype_get_fixed_size(type)); + fixed_size); ut_memcpy(ufield->new_val.data, parent_ufield->new_val.data, parent_ufield->new_val.len); @@ -588,8 +590,16 @@ row_ins_foreign_report_err( fputs(", in index ", ef); ut_print_name(ef, trx, foreign->foreign_index->name); if (rec) { + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, foreign->foreign_index, + ULINT_UNDEFINED, heap); + fputs(", there is a record:\n", ef); - rec_print(ef, rec); + rec_print(ef, rec, offsets); + mem_heap_free(heap); } else { fputs(", the record is not available\n", ef); } @@ -644,7 +654,16 @@ row_ins_foreign_report_add_err( } if (rec) { - rec_print(ef, rec); + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, foreign->foreign_index, + ULINT_UNDEFINED, heap); + + rec_print(ef, rec, offsets); + + mem_heap_free(heap); } putc('\n', ef); @@ -706,7 +725,6 @@ row_ins_foreign_check_on_constraint( dict_index_t* index; dict_index_t* clust_index; dtuple_t* ref; - mem_heap_t* tmp_heap; mem_heap_t* upd_vec_heap = NULL; rec_t* rec; rec_t* clust_rec; @@ -715,8 +733,9 @@ row_ins_foreign_check_on_constraint( ulint err; ulint i; trx_t* trx; + mem_heap_t* tmp_heap = NULL; + ulint* offsets; - ut_a(thr && foreign && pcur && mtr); trx = thr_get_trx(thr); @@ -816,7 +835,7 @@ row_ins_foreign_check_on_constraint( err = DB_ROW_IS_REFERENCED; row_ins_foreign_report_err( -(char*)"Trying a too deep cascaded delete or update\n", +"Trying a too deep cascaded delete or update\n", thr, foreign, btr_pcur_get_rec(pcur), entry); goto nonstandard_exit_func; @@ -848,8 +867,6 @@ row_ins_foreign_check_on_constraint( PAGE_CUR_LE, BTR_SEARCH_LEAF, cascade->pcur, 0, mtr); - mem_heap_free(tmp_heap); - clust_rec = btr_pcur_get_rec(cascade->pcur); if (!page_rec_is_user_rec(clust_rec) @@ -863,10 +880,14 @@ row_ins_foreign_check_on_constraint( fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, rec); + offsets = rec_get_offsets(rec, index, + ULINT_UNDEFINED, tmp_heap); + rec_print(stderr, rec, offsets); fputs("\n" "InnoDB: clustered record ", stderr); - rec_print(stderr, clust_rec); + offsets = rec_reget_offsets(clust_rec, clust_index, + offsets, ULINT_UNDEFINED, tmp_heap); + rec_print(stderr, clust_rec, offsets); fputs("\n" "InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n", stderr); @@ -884,9 +905,14 @@ row_ins_foreign_check_on_constraint( /* Here it suffices to use a LOCK_REC_NOT_GAP type lock; we already have a normal shared lock on the appropriate gap if the search criterion was not unique */ - + + if (!tmp_heap) { + tmp_heap = mem_heap_create(256); + } + offsets = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, tmp_heap); err = lock_clust_rec_read_check_and_lock(0, clust_rec, - clust_index, LOCK_X, LOCK_REC_NOT_GAP, thr); + clust_index, offsets, LOCK_X, LOCK_REC_NOT_GAP, thr); } if (err != DB_SUCCESS) { @@ -894,7 +920,7 @@ row_ins_foreign_check_on_constraint( goto nonstandard_exit_func; } - if (rec_get_deleted_flag(clust_rec)) { + if (rec_get_deleted_flag(clust_rec, table->comp)) { /* This can happen if there is a circular reference of rows such that cascading delete comes to delete a row already in the process of being delete marked */ @@ -1003,6 +1029,10 @@ row_ins_foreign_check_on_constraint( btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); + if (tmp_heap) { + mem_heap_free(tmp_heap); + } + if (upd_vec_heap) { mem_heap_free(upd_vec_heap); } @@ -1010,6 +1040,9 @@ row_ins_foreign_check_on_constraint( return(err); nonstandard_exit_func: + if (tmp_heap) { + mem_heap_free(tmp_heap); + } if (upd_vec_heap) { mem_heap_free(upd_vec_heap); @@ -1037,16 +1070,19 @@ row_ins_set_shared_rec_lock( LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr) /* in: query thread */ { ulint err; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_S, - type, thr); + err = lock_clust_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_S, type, thr); } else { - err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_S, - type, thr); + err = lock_sec_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_S, type, thr); } return(err); @@ -1064,16 +1100,19 @@ row_ins_set_exclusive_rec_lock( LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr) /* in: query thread */ { ulint err; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_X, - type, thr); + err = lock_clust_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_X, type, thr); } else { - err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_X, - type, thr); + err = lock_sec_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_X, type, thr); } return(err); @@ -1114,6 +1153,10 @@ row_ins_check_foreign_constraint( ulint i; mtr_t mtr; trx_t* trx = thr_get_trx(thr); + mem_heap_t* heap; + ulint* offsets = NULL; + + heap = mem_heap_create(100); run_again: #ifdef UNIV_SYNC_DEBUG @@ -1125,7 +1168,7 @@ run_again: if (trx->check_foreigns == FALSE) { /* The user has suppressed foreign key checks currently for this session */ - + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1137,6 +1180,7 @@ run_again: if (UNIV_SQL_NULL == dfield_get_len( dtuple_get_nth_field(entry, i))) { + mem_heap_free(heap); return(DB_SUCCESS); } } @@ -1160,7 +1204,8 @@ run_again: with each foreign key constraint, one after another, and the user has problems predicting in which order they are performed. */ - + + mem_heap_free(heap); return(DB_SUCCESS); } } @@ -1174,6 +1219,8 @@ run_again: } if (check_table == NULL) { + mem_heap_free(heap); + if (check_ref) { FILE* ef = dict_foreign_err_file; mutex_enter(&dict_foreign_err_mutex); @@ -1244,10 +1291,13 @@ run_again: goto next_rec; } + offsets = rec_reget_offsets(rec, check_index, + offsets, ULINT_UNDEFINED, heap); + if (rec == page_get_supremum_rec(buf_frame_align(rec))) { - + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec, - check_index, thr); + check_index, offsets, thr); if (err != DB_SUCCESS) { break; @@ -1256,29 +1306,30 @@ run_again: goto next_rec; } - cmp = cmp_dtuple_rec(entry, rec); + cmp = cmp_dtuple_rec(entry, rec, offsets); if (cmp == 0) { - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, + rec_offs_comp(offsets))) { err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, - rec, check_index, thr); + LOCK_ORDINARY, rec, + check_index, offsets, thr); if (err != DB_SUCCESS) { break; } } else { /* Found a matching record */ + ulint lock_type; if (unique_search) { - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, - rec, check_index, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, - rec, check_index, thr); + lock_type = LOCK_ORDINARY; } + + err = row_ins_set_shared_rec_lock(lock_type, + rec, check_index, offsets, thr); if (err != DB_SUCCESS) { @@ -1315,7 +1366,7 @@ run_again: if (cmp < 0) { err = row_ins_set_shared_rec_lock(LOCK_GAP, - rec, check_index, thr); + rec, check_index, offsets, thr); if (err != DB_SUCCESS) { break; @@ -1373,6 +1424,7 @@ do_possible_lock_wait: err = trx->error_state; } + mem_heap_free(heap); return(err); } @@ -1444,19 +1496,23 @@ row_ins_dupl_error_with_rec( that the caller already has a record lock on the record! */ dtuple_t* entry, /* in: entry to insert */ - dict_index_t* index) /* in: index */ + dict_index_t* index, /* in: index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint matched_fields; ulint matched_bytes; ulint n_unique; ulint i; - + + ut_ad(rec_offs_validate(rec, index, offsets)); + n_unique = dict_index_get_n_unique(index); matched_fields = 0; matched_bytes = 0; - cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes); + cmp_dtuple_rec_with_match(entry, rec, offsets, + &matched_fields, &matched_bytes); if (matched_fields < n_unique) { @@ -1477,7 +1533,7 @@ row_ins_dupl_error_with_rec( } } - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { return(TRUE); } @@ -1509,7 +1565,9 @@ row_ins_scan_sec_index_for_duplicate( ibool moved; mtr_t mtr; trx_t* trx; - + mem_heap_t* heap; + ulint* offsets = NULL; + n_unique = dict_index_get_n_unique(index); /* If the secondary index is unique, but one of the fields in the @@ -1524,6 +1582,7 @@ row_ins_scan_sec_index_for_duplicate( } } + heap = mem_heap_create(100); mtr_start(&mtr); /* Store old value on n_fields_cmp */ @@ -1549,6 +1608,9 @@ row_ins_scan_sec_index_for_duplicate( trx = thr_get_trx(thr); ut_ad(trx); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + if (innobase_query_is_replace()) { /* The manual defines the REPLACE semantics that it @@ -1556,12 +1618,12 @@ row_ins_scan_sec_index_for_duplicate( + INSERT. Therefore, we should take X-lock for duplicates */ - err = row_ins_set_exclusive_rec_lock( - LOCK_ORDINARY,rec,index,thr); + err = row_ins_set_exclusive_rec_lock(LOCK_ORDINARY, + rec, index, offsets, thr); } else { - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, rec, index,thr); + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, + rec, index, offsets, thr); } if (err != DB_SUCCESS) { @@ -1574,10 +1636,11 @@ row_ins_scan_sec_index_for_duplicate( goto next_rec; } - cmp = cmp_dtuple_rec(entry, rec); + cmp = cmp_dtuple_rec(entry, rec, offsets); if (cmp == 0) { - if (row_ins_dupl_error_with_rec(rec, entry, index)) { + if (row_ins_dupl_error_with_rec(rec, entry, + index, offsets)) { err = DB_DUPLICATE_KEY; thr_get_trx(thr)->error_info = index; @@ -1599,6 +1662,7 @@ next_rec: } } + mem_heap_free(heap); mtr_commit(&mtr); /* Restore old value */ @@ -1656,6 +1720,12 @@ row_ins_duplicate_error_in_clust( page = buf_frame_align(rec); if (rec != page_get_infimum_rec(page)) { + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap); /* We set a lock on the possible duplicate: this is needed in logical logging of MySQL to make @@ -1671,24 +1741,26 @@ row_ins_duplicate_error_in_clust( err = row_ins_set_exclusive_rec_lock( LOCK_REC_NOT_GAP,rec,cursor->index, - thr); + offsets, thr); } else { err = row_ins_set_shared_rec_lock( LOCK_REC_NOT_GAP,rec, cursor->index, - thr); + offsets, thr); } if (err != DB_SUCCESS) { - + mem_heap_free(heap); return(err); } if (row_ins_dupl_error_with_rec(rec, entry, - cursor->index)) { + cursor->index, offsets)) { trx->error_info = cursor->index; + mem_heap_free(heap); return(DB_DUPLICATE_KEY); } + mem_heap_free(heap); } } @@ -1698,7 +1770,12 @@ row_ins_duplicate_error_in_clust( page = buf_frame_align(rec); if (rec != page_get_supremum_rec(page)) { + mem_heap_t* heap; + ulint* offsets; + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap); /* The manual defines the REPLACE semantics that it is either an INSERT or DELETE(s) for duplicate key @@ -1708,25 +1785,27 @@ row_ins_duplicate_error_in_clust( if (innobase_query_is_replace()) { err = row_ins_set_exclusive_rec_lock( - LOCK_REC_NOT_GAP, - rec,cursor->index,thr); + LOCK_REC_NOT_GAP, rec, + cursor->index, offsets, thr); } else { err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP,rec, - cursor->index, thr); + LOCK_REC_NOT_GAP, rec, + cursor->index, offsets, thr); } if (err != DB_SUCCESS) { - + mem_heap_free(heap); return(err); } if (row_ins_dupl_error_with_rec(rec, entry, - cursor->index)) { + cursor->index, offsets)) { trx->error_info = cursor->index; + mem_heap_free(heap); return(DB_DUPLICATE_KEY); } + mem_heap_free(heap); } ut_a(!(cursor->index->type & DICT_CLUSTERED)); @@ -1815,6 +1894,8 @@ row_ins_index_entry_low( ulint n_unique; big_rec_t* big_rec = NULL; mtr_t mtr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; log_free_check(); @@ -1847,8 +1928,9 @@ row_ins_index_entry_low( buf_frame_align(btr_cur_get_rec(&cursor)))); if (!page_rec_is_supremum(first_rec)) { - ut_a((rec_get_n_fields(first_rec)) - == dtuple_get_n_fields(entry)); + offsets = rec_get_offsets(first_rec, index, + ULINT_UNDEFINED, heap); + ut_a(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry)); } n_unique = dict_index_get_n_unique(index); @@ -1926,7 +2008,7 @@ row_ins_index_entry_low( if (err == DB_SUCCESS) { if (ext_vec) { - rec_set_field_extern_bits(insert_rec, + rec_set_field_extern_bits(insert_rec, index, ext_vec, n_ext_vec, &mtr); } } @@ -1936,14 +2018,18 @@ function_exit: mtr_commit(&mtr); if (big_rec) { + rec_t* rec; mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, &cursor, 0, &mtr); + rec = btr_cur_get_rec(&cursor); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + + err = btr_store_big_rec_extern_fields(index, rec, + offsets, big_rec, &mtr); - err = btr_store_big_rec_extern_fields(index, - btr_cur_get_rec(&cursor), - big_rec, &mtr); if (modify) { dtuple_big_rec_free(big_rec); } else { @@ -1953,6 +2039,7 @@ function_exit: mtr_commit(&mtr); } + mem_heap_free(heap); return(err); } diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 0de4b189493..be243b44488 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -782,10 +782,11 @@ row_lock_table_for_mysql( /* out: error code or DB_SUCCESS */ row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL table handle */ - dict_table_t* table) /* in: table to LOCK_IX, or NULL + dict_table_t* table, /* in: table to lock, or NULL if prebuilt->table should be locked as LOCK_TABLE_EXP | prebuilt->select_lock_type */ + ulint mode) /* in: lock mode of table */ { trx_t* trx = prebuilt->trx; que_thr_t* thr; @@ -819,7 +820,7 @@ run_again: trx_start_if_not_started(trx); if (table) { - err = lock_table(0, table, LOCK_IX, thr); + err = lock_table(0, table, mode, thr); } else { err = lock_table(LOCK_TABLE_EXP, prebuilt->table, prebuilt->select_lock_type, thr); @@ -3225,7 +3226,8 @@ row_scan_and_check_index( int cmp; ibool contains_null; ulint i; - + ulint* offsets = NULL; + *n_rows = 0; buf = mem_alloc(UNIV_PAGE_SIZE); @@ -3265,8 +3267,10 @@ loop: if (prev_entry != NULL) { matched_fields = 0; matched_bytes = 0; - - cmp = cmp_dtuple_rec_with_match(prev_entry, rec, + + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets, &matched_fields, &matched_bytes); contains_null = FALSE; @@ -3295,7 +3299,7 @@ loop: dtuple_print(stderr, prev_entry); fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); is_ok = FALSE; } else if ((index->type & DICT_UNIQUE) @@ -3309,6 +3313,7 @@ loop: } mem_heap_empty(heap); + offsets = NULL; prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); @@ -3393,7 +3398,7 @@ row_check_table_for_mysql( /* We validate also the whole adaptive hash index for all tables at every CHECK TABLE */ - if (!btr_search_validate()) { + if (!btr_search_validate(index)) { ret = DB_ERROR; } diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index f7e01169b9d..109d0f3b976 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -99,6 +99,8 @@ row_purge_remove_clust_if_poss_low( ibool success; ulint err; mtr_t mtr; + rec_t* rec; + mem_heap_t* heap; index = dict_table_get_first_index(node->table); @@ -117,15 +119,21 @@ row_purge_remove_clust_if_poss_low( return(TRUE); } + rec = btr_pcur_get_rec(pcur); + heap = mem_heap_create(100); + if (0 != ut_dulint_cmp(node->roll_ptr, - row_get_rec_roll_ptr(btr_pcur_get_rec(pcur), index))) { - + row_get_rec_roll_ptr(rec, index, rec_get_offsets( + rec, index, ULINT_UNDEFINED, heap)))) { + mem_heap_free(heap); /* Someone else has modified the record later: do not remove */ btr_pcur_commit_specify_mtr(pcur, &mtr); return(TRUE); } + mem_heap_free(heap); + if (mode == BTR_MODIFY_LEAF) { success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { diff --git a/innobase/row/row0row.c b/innobase/row/row0row.c index 38714b0c49b..9cf285a519d 100644 --- a/innobase/row/row0row.c +++ b/innobase/row/row0row.c @@ -37,17 +37,18 @@ row_get_rec_sys_field( /* out: value of the field */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { - ulint pos; - byte* field; - ulint len; + ulint pos; + byte* field; + ulint len; ut_ad(index->type & DICT_CLUSTERED); pos = dict_index_get_sys_col_pos(index, type); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); if (type == DATA_TRX_ID) { @@ -70,6 +71,7 @@ row_set_rec_sys_field( ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint val) /* in: value to set */ { ulint pos; @@ -77,10 +79,11 @@ row_set_rec_sys_field( ulint len; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); pos = dict_index_get_sys_col_pos(index, type); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); if (type == DATA_TRX_ID) { @@ -182,6 +185,9 @@ row_build( the buffer page of this record must be at least s-latched and the latch held as long as the row dtuple is used! */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) + or NULL, in which case this function + will invoke rec_get_offsets() */ mem_heap_t* heap) /* in: memory heap from which the memory needed is allocated */ { @@ -196,14 +202,26 @@ row_build( ulint row_len; byte* buf; ulint i; - + mem_heap_t* tmp_heap; + ut_ad(index && rec && heap); ut_ad(index->type & DICT_CLUSTERED); + if (!offsets) { + tmp_heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, + ULINT_UNDEFINED, tmp_heap); + } else { + tmp_heap = NULL; + ut_ad(rec_offs_validate(rec, index, offsets)); + } + if (type != ROW_COPY_POINTERS) { /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_get_size(rec)); - rec = rec_copy(buf, rec); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); + rec = rec_copy(buf, rec, offsets); + /* Avoid a debug assertion in rec_offs_validate(). */ + rec_offs_make_valid(rec, index, (ulint*) offsets); } table = index->table; @@ -211,11 +229,9 @@ row_build( row = dtuple_create(heap, row_len); - dtuple_set_info_bits(row, rec_get_info_bits(rec)); - - n_fields = dict_index_get_n_fields(index); + dtuple_set_info_bits(row, rec_get_info_bits(rec, table->comp)); - ut_ad(n_fields == rec_get_n_fields(rec)); + n_fields = rec_offs_n_fields(offsets); dict_table_copy_types(row, table); @@ -227,13 +243,13 @@ row_build( col = dict_field_get_col(ind_field); dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - field = rec_get_nth_field(rec, i, &len); + field = rec_get_nth_field(rec, offsets, i, &len); if (type == ROW_COPY_ALSO_EXTERNALS - && rec_get_nth_field_extern_bit(rec, i)) { + && rec_offs_nth_extern(offsets, i)) { field = btr_rec_copy_externally_stored_field( - rec, i, &len, heap); + rec, offsets, i, &len, heap); } dfield_set_data(dfield, field, len); @@ -242,6 +258,10 @@ row_build( ut_ad(dtuple_check_typed(row)); + if (tmp_heap) { + mem_heap_free(tmp_heap); + } + return(row); } @@ -276,16 +296,23 @@ row_rec_to_index_entry( ulint len; ulint rec_len; byte* buf; - + mem_heap_t* tmp_heap; + ulint* offsets; + ut_ad(rec && heap && index); + tmp_heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, tmp_heap); + if (type == ROW_COPY_DATA) { /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_get_size(rec)); - rec = rec_copy(buf, rec); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); + rec = rec_copy(buf, rec, offsets); + /* Avoid a debug assertion in rec_offs_validate(). */ + rec_offs_make_valid(rec, index, offsets); } - rec_len = rec_get_n_fields(rec); + rec_len = rec_offs_n_fields(offsets); entry = dtuple_create(heap, rec_len); @@ -295,17 +322,19 @@ row_rec_to_index_entry( dict_index_copy_types(entry, index, rec_len); - dtuple_set_info_bits(entry, rec_get_info_bits(rec)); + dtuple_set_info_bits(entry, + rec_get_info_bits(rec, rec_offs_comp(offsets))); for (i = 0; i < rec_len; i++) { dfield = dtuple_get_nth_field(entry, i); - field = rec_get_nth_field(rec, i, &len); + field = rec_get_nth_field(rec, offsets, i, &len); dfield_set_data(dfield, field, len); } ut_ad(dtuple_check_typed(entry)); + mem_heap_free(tmp_heap); return(entry); } @@ -345,15 +374,22 @@ row_build_row_ref( byte* buf; ulint clust_col_prefix_len; ulint i; + mem_heap_t* tmp_heap; + ulint* offsets; ut_ad(index && rec && heap); - + + tmp_heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, tmp_heap); + if (type == ROW_COPY_DATA) { /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_get_size(rec)); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); - rec = rec_copy(buf, rec); + rec = rec_copy(buf, rec, offsets); + /* Avoid a debug assertion in rec_offs_validate(). */ + rec_offs_make_valid(rec, index, offsets); } table = index->table; @@ -373,7 +409,7 @@ row_build_row_ref( ut_a(pos != ULINT_UNDEFINED); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); dfield_set_data(dfield, field, len); @@ -397,6 +433,7 @@ row_build_row_ref( } ut_ad(dtuple_check_typed(ref)); + mem_heap_free(tmp_heap); return(ref); } @@ -427,7 +464,9 @@ row_build_row_ref_in_tuple( ulint pos; ulint clust_col_prefix_len; ulint i; - + mem_heap_t* heap; + ulint* offsets; + ut_a(ref && index && rec); if (!index->table) { @@ -446,7 +485,10 @@ row_build_row_ref_in_tuple( fputs("InnoDB: clust index for table ", stderr); goto notfound; } - + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + ref_len = dict_index_get_n_unique(clust_index); ut_ad(ref_len == dtuple_get_n_fields(ref)); @@ -459,8 +501,8 @@ row_build_row_ref_in_tuple( pos = dict_index_get_nth_field_pos(index, clust_index, i); ut_a(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, pos, &len); + + field = rec_get_nth_field(rec, offsets, pos, &len); dfield_set_data(dfield, field, len); @@ -484,6 +526,7 @@ row_build_row_ref_in_tuple( } ut_ad(dtuple_check_typed(ref)); + mem_heap_free(heap); } /*********************************************************************** diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 27470df81c5..2b40b62e5bc 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -31,6 +31,7 @@ Created 12/19/1997 Heikki Tuuri #include "pars0pars.h" #include "row0mysql.h" #include "read0read.h" +#include "buf0lru.h" /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -77,8 +78,14 @@ row_sel_sec_rec_is_for_clust_rec( ulint n; ulint i; dtype_t* cur_type; + mem_heap_t* heap; + ulint* clust_offs; + ulint* sec_offs; - UT_NOT_USED(clust_index); + heap = mem_heap_create(100); + clust_offs = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, heap); + sec_offs = rec_get_offsets(sec_rec, sec_index, ULINT_UNDEFINED, heap); n = dict_index_get_n_ordering_defined_by_user(sec_index); @@ -86,10 +93,10 @@ row_sel_sec_rec_is_for_clust_rec( ifield = dict_index_get_nth_field(sec_index, i); col = dict_field_get_col(ifield); - clust_field = rec_get_nth_field(clust_rec, + clust_field = rec_get_nth_field(clust_rec, clust_offs, dict_col_get_clust_pos(col), &clust_len); - sec_field = rec_get_nth_field(sec_rec, i, &sec_len); + sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len); if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) { @@ -106,10 +113,12 @@ row_sel_sec_rec_is_for_clust_rec( if (0 != cmp_data_data(dict_col_get_type(col), clust_field, clust_len, sec_field, sec_len)) { + mem_heap_free(heap); return(FALSE); } } + mem_heap_free(heap); return(TRUE); } @@ -265,6 +274,7 @@ row_sel_fetch_columns( dict_index_t* index, /* in: record index */ rec_t* rec, /* in: record in a clustered or non-clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ sym_node_t* column) /* in: first column in a column list, or NULL */ { @@ -274,6 +284,8 @@ row_sel_fetch_columns( byte* data; ulint len; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (index->type & DICT_CLUSTERED) { index_type = SYM_CLUST_FIELD_NO; } else { @@ -285,7 +297,7 @@ row_sel_fetch_columns( if (field_no != ULINT_UNDEFINED) { - data = rec_get_nth_field(rec, field_no, &len); + data = rec_get_nth_field(rec, offsets, field_no, &len); if (column->copy_val) { eval_node_copy_and_alloc_val(column, data, @@ -600,8 +612,15 @@ row_sel_get_clust_rec( rec_t* clust_rec; rec_t* old_vers; ulint err; + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, + btr_pcur_get_btr_cur(&plan->pcur)->index, + ULINT_UNDEFINED, heap); - row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec); + row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets); index = dict_table_get_first_index(plan->table); @@ -618,7 +637,7 @@ row_sel_get_clust_rec( || btr_pcur_get_low_match(&(plan->clust_pcur)) < dict_index_get_n_unique(index)) { - ut_a(rec_get_deleted_flag(rec)); + ut_a(rec_get_deleted_flag(rec, plan->table->comp)); ut_a(node->read_view); /* In a rare case it is possible that no clust rec is found @@ -635,28 +654,30 @@ row_sel_get_clust_rec( goto func_exit; } + offsets = rec_reget_offsets(clust_rec, index, + offsets, ULINT_UNDEFINED, heap); + if (!node->read_view) { /* Try to place a lock on the index record */ /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is - not used. - */ + we lock only the record, i.e., next-key locking is + not used. */ + ulint lock_type; if (srv_locks_unsafe_for_binlog) { - err = lock_clust_rec_read_check_and_lock(0, - clust_rec, - index, node->row_lock_mode, - LOCK_REC_NOT_GAP, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = lock_clust_rec_read_check_and_lock(0, - clust_rec, - index, node->row_lock_mode, - LOCK_ORDINARY, thr); + lock_type = LOCK_ORDINARY; } + err = lock_clust_rec_read_check_and_lock(0, + clust_rec, index, offsets, + node->row_lock_mode, lock_type, thr); + if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } } else { @@ -665,22 +686,21 @@ row_sel_get_clust_rec( old_vers = NULL; - if (!lock_clust_rec_cons_read_sees(clust_rec, index, + if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets, node->read_view)) { err = row_sel_build_prev_vers(node->read_view, plan, clust_rec, &old_vers, mtr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } clust_rec = old_vers; if (clust_rec == NULL) { - *out_rec = clust_rec; - - return(DB_SUCCESS); + goto func_exit; } } @@ -697,23 +717,22 @@ row_sel_get_clust_rec( visit through secondary index records that would not really exist in our snapshot. */ - if ((old_vers || rec_get_deleted_flag(rec)) + if ((old_vers || rec_get_deleted_flag(rec, plan->table->comp)) && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index, clust_rec, index)) { clust_rec = NULL; - *out_rec = clust_rec; - - return(DB_SUCCESS); + goto func_exit; } } /* Fetch the columns needed in test conditions */ - - row_sel_fetch_columns(index, clust_rec, + + row_sel_fetch_columns(index, clust_rec, offsets, UT_LIST_GET_FIRST(plan->columns)); func_exit: *out_rec = clust_rec; + mem_heap_free(heap); return(DB_SUCCESS); } @@ -726,6 +745,7 @@ sel_set_rec_lock( /* out: DB_SUCCESS or error code */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: lock mode */ ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or LOC_REC_NOT_GAP */ que_thr_t* thr) /* in: query thread */ @@ -743,11 +763,11 @@ sel_set_rec_lock( } if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock(0, rec, index, mode, - type, thr); + err = lock_clust_rec_read_check_and_lock(0, + rec, index, offsets, mode, type, thr); } else { - err = lock_sec_rec_read_check_and_lock(0, rec, index, mode, - type, thr); + err = lock_sec_rec_read_check_and_lock(0, + rec, index, offsets, mode, type, thr); } return(err); @@ -955,6 +975,8 @@ row_sel_try_search_shortcut( { dict_index_t* index; rec_t* rec; + mem_heap_t* heap; + ulint* offsets; index = plan->index; @@ -988,21 +1010,28 @@ row_sel_try_search_shortcut( /* This is a non-locking consistent read: if necessary, fetch a previous version of the record */ + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (index->type & DICT_CLUSTERED) { - if (!lock_clust_rec_cons_read_sees(rec, index, + if (!lock_clust_rec_cons_read_sees(rec, index, offsets, node->read_view)) { + mem_heap_free(heap); return(SEL_RETRY); } } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) { + mem_heap_free(heap); return(SEL_RETRY); } /* Test deleted flag. Fetch the columns needed in test conditions. */ - - row_sel_fetch_columns(index, rec, UT_LIST_GET_FIRST(plan->columns)); - if (rec_get_deleted_flag(rec)) { + row_sel_fetch_columns(index, rec, offsets, + UT_LIST_GET_FIRST(plan->columns)); + mem_heap_free(heap); + + if (rec_get_deleted_flag(rec, plan->table->comp)) { return(SEL_EXHAUSTED); } @@ -1066,7 +1095,9 @@ row_sel( to the next non-clustered record */ ulint found_flag; ulint err; - + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; + ut_ad(thr->run_node == node); search_latch_locked = FALSE; @@ -1217,22 +1248,23 @@ rec_loop: if (!consistent_read) { /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is - not used. - */ + we lock only the record, i.e., next-key locking is + not used. */ + + rec_t* next_rec = page_rec_get_next(rec); + ulint lock_type; + offsets = rec_reget_offsets(next_rec, index, + offsets, ULINT_UNDEFINED, heap); if (srv_locks_unsafe_for_binlog) { - err = sel_set_rec_lock(page_rec_get_next(rec), - index, - node->row_lock_mode, - LOCK_REC_NOT_GAP, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = sel_set_rec_lock(page_rec_get_next(rec), - index, - node->row_lock_mode, - LOCK_ORDINARY, thr); + lock_type = LOCK_ORDINARY; } + err = sel_set_rec_lock(next_rec, index, offsets, + node->row_lock_mode, lock_type, thr); + if (err != DB_SUCCESS) { /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting @@ -1259,18 +1291,22 @@ rec_loop: /* Try to place a lock on the index record */ /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is - not used. - */ + we lock only the record, i.e., next-key locking is + not used. */ + + ulint lock_type; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); if (srv_locks_unsafe_for_binlog) { - err = sel_set_rec_lock(rec, index, node->row_lock_mode, - LOCK_REC_NOT_GAP, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = sel_set_rec_lock(rec, index, node->row_lock_mode, - LOCK_ORDINARY, thr); + lock_type = LOCK_ORDINARY; } + err = sel_set_rec_lock(rec, index, offsets, + node->row_lock_mode, lock_type, thr); + if (err != DB_SUCCESS) { goto lock_wait_or_error; @@ -1333,6 +1369,8 @@ rec_loop: /* PHASE 3: Get previous version in a consistent read */ cons_read_requires_clust_rec = FALSE; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); if (consistent_read) { /* This is a non-locking consistent read: if necessary, fetch @@ -1340,7 +1378,7 @@ rec_loop: if (index->type & DICT_CLUSTERED) { - if (!lock_clust_rec_cons_read_sees(rec, index, + if (!lock_clust_rec_cons_read_sees(rec, index, offsets, node->read_view)) { err = row_sel_build_prev_vers(node->read_view, @@ -1353,6 +1391,7 @@ rec_loop: if (old_vers == NULL) { row_sel_fetch_columns(index, rec, + offsets, UT_LIST_GET_FIRST(plan->columns)); if (!row_sel_test_end_conds(plan)) { @@ -1364,6 +1403,8 @@ rec_loop: } rec = old_vers; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); } } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) { @@ -1375,7 +1416,8 @@ rec_loop: /* Fetch the columns needed in test conditions */ - row_sel_fetch_columns(index, rec, UT_LIST_GET_FIRST(plan->columns)); + row_sel_fetch_columns(index, rec, offsets, + UT_LIST_GET_FIRST(plan->columns)); /* Test the selection end conditions: these can only contain columns which already are found in the index, even though the index might be @@ -1390,7 +1432,8 @@ rec_loop: goto table_exhausted; } - if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) { + if (rec_get_deleted_flag(rec, plan->table->comp) + && !cons_read_requires_clust_rec) { /* The record is delete marked: we can skip it if this is not a consistent read which might see an earlier version @@ -1433,7 +1476,7 @@ rec_loop: goto next_rec; } - if (rec_get_deleted_flag(clust_rec)) { + if (rec_get_deleted_flag(clust_rec, plan->table->comp)) { /* The record is delete marked: we can skip it */ @@ -1591,7 +1634,8 @@ next_table_no_mtr: if (search_latch_locked) { rw_lock_s_unlock(&btr_search_latch); } - + + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1625,6 +1669,7 @@ table_exhausted: table_exhausted_no_mtr: if (node->fetch_table == 0) { + mem_heap_free(heap); if (node->is_aggregate && !node->aggregate_already_fetched) { @@ -1673,7 +1718,7 @@ stop_for_a_while: mtr_commit(&mtr); ut_ad(sync_thread_levels_empty_gen(TRUE)); - + mem_heap_free(heap); return(DB_SUCCESS); commit_mtr_for_a_while: @@ -1709,6 +1754,7 @@ lock_wait_or_error: ut_ad(sync_thread_levels_empty_gen(TRUE)); + mem_heap_free(heap); return(err); } @@ -2132,11 +2178,16 @@ row_sel_store_row_id_to_prebuilt( /*=============================*/ row_prebuilt_t* prebuilt, /* in: prebuilt */ rec_t* index_rec, /* in: record */ - dict_index_t* index) /* in: index of the record */ + dict_index_t* index, /* in: index of the record */ + const ulint* offsets) /* in: rec_get_offsets + (index_rec, index) */ { byte* data; ulint len; - data = rec_get_nth_field(index_rec, + + ut_ad(rec_offs_validate(index_rec, index, offsets)); + + data = rec_get_nth_field(index_rec, offsets, dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len); if (len != DATA_ROW_ID_LEN) { @@ -2146,7 +2197,7 @@ row_sel_store_row_id_to_prebuilt( fprintf(stderr, "\n" "InnoDB: Field number %lu, record:\n", (ulong) dict_index_get_sys_col_pos(index, DATA_ROW_ID)); - rec_print(stderr, index_rec); + rec_print(stderr, index_rec, offsets); putc('\n', stderr); ut_error; } @@ -2235,9 +2286,11 @@ row_sel_store_mysql_rec( case) */ byte* mysql_rec, /* out: row in the MySQL format */ row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec) /* in: Innobase record in the index + rec_t* rec, /* in: Innobase record in the index which was described in prebuilt's template */ + const ulint* offsets) /* in: array returned by + rec_get_offsets() */ { mysql_row_templ_t* templ; mem_heap_t* extern_field_heap = NULL; @@ -2246,8 +2299,15 @@ row_sel_store_mysql_rec( byte* blob_buf; int pad_char; ulint i; + dict_index_t* index; ut_ad(prebuilt->mysql_template); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + + index = prebuilt->index; + if (prebuilt->need_to_access_clustered) { + index = dict_table_get_first_index(index->table); + } if (prebuilt->blob_heap != NULL) { mem_heap_free(prebuilt->blob_heap); @@ -2263,9 +2323,10 @@ row_sel_store_mysql_rec( templ = prebuilt->mysql_template + i; - data = rec_get_nth_field(rec, templ->rec_field_no, &len); + data = rec_get_nth_field(rec, offsets, + templ->rec_field_no, &len); - if (rec_get_nth_field_extern_bit(rec, templ->rec_field_no)) { + if (rec_offs_nth_extern(offsets, templ->rec_field_no)) { /* Copy an externally stored field to the temporary heap */ @@ -2279,7 +2340,7 @@ row_sel_store_mysql_rec( causes an assert */ data = btr_rec_copy_externally_stored_field(rec, - templ->rec_field_no, &len, + offsets, templ->rec_field_no, &len, extern_field_heap); ut_a(len != UNIV_SQL_NULL); @@ -2435,6 +2496,8 @@ row_sel_get_clust_rec_for_mysql( rec_t* old_vers; ulint err; trx_t* trx; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; *out_rec = NULL; trx = thr_get_trx(thr); @@ -2465,9 +2528,8 @@ row_sel_get_clust_rec_for_mysql( clustered index record did not exist in the read view of trx. */ - if (!rec_get_deleted_flag(rec) + if (!rec_get_deleted_flag(rec, sec_index->table->comp) || prebuilt->select_lock_type != LOCK_NONE) { - ut_print_timestamp(stderr); fputs(" InnoDB: error clustered record" " for sec rec not found\n" @@ -2475,10 +2537,14 @@ row_sel_get_clust_rec_for_mysql( dict_index_name_print(stderr, trx, sec_index); fputs("\n" "InnoDB: sec index record ", stderr); - rec_print(stderr, rec); + offsets = rec_get_offsets(rec, sec_index, + ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); fputs("\n" "InnoDB: clust index record ", stderr); - rec_print(stderr, clust_rec); + offsets = rec_reget_offsets(clust_rec, clust_index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, clust_rec, offsets); putc('\n', stderr); trx_print(stderr, trx); @@ -2491,17 +2557,21 @@ row_sel_get_clust_rec_for_mysql( goto func_exit; } + offsets = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, heap); + if (prebuilt->select_lock_type != LOCK_NONE) { /* Try to place a lock on the index record; we are searching the clust rec with a unique condition, hence we set a LOCK_REC_NOT_GAP type lock */ err = lock_clust_rec_read_check_and_lock(0, clust_rec, - clust_index, + clust_index, offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } } else { @@ -2515,7 +2585,7 @@ row_sel_get_clust_rec_for_mysql( if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED && !lock_clust_rec_cons_read_sees(clust_rec, clust_index, - trx->read_view)) { + offsets, trx->read_view)) { err = row_sel_build_prev_vers_for_mysql( trx->read_view, clust_index, @@ -2524,6 +2594,7 @@ row_sel_get_clust_rec_for_mysql( if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2543,7 +2614,8 @@ row_sel_get_clust_rec_for_mysql( visit through secondary index records that would not really exist in our snapshot. */ - if (clust_rec && (old_vers || rec_get_deleted_flag(rec)) + if (clust_rec && (old_vers + || rec_get_deleted_flag(rec, sec_index->table->comp)) && !row_sel_sec_rec_is_for_clust_rec(rec, sec_index, clust_rec, clust_index)) { clust_rec = NULL; @@ -2565,6 +2637,7 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } + mem_heap_free(heap); return(DB_SUCCESS); } @@ -2686,12 +2759,14 @@ void row_sel_push_cache_row_for_mysql( /*=============================*/ row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec) /* in: record to push */ + rec_t* rec, /* in: record to push */ + const ulint* offsets) /* in: rec_get_offsets() */ { byte* buf; ulint i; ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_a(!prebuilt->templ_contains_blob); if (prebuilt->fetch_cache[0] == NULL) { @@ -2717,7 +2792,7 @@ row_sel_push_cache_row_for_mysql( ut_a(row_sel_store_mysql_rec( prebuilt->fetch_cache[prebuilt->n_fetch_cached], - prebuilt, rec)); + prebuilt, rec, offsets)); prebuilt->n_fetch_cached++; } @@ -2734,6 +2809,8 @@ row_sel_try_search_shortcut_for_mysql( /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */ rec_t** out_rec,/* out: record if found */ row_prebuilt_t* prebuilt,/* in: prebuilt struct */ + ulint** offsets,/* in/out: for rec_reget_offsets(*out_rec) */ + mem_heap_t* heap, /* in: heap for rec_reget_offsets() */ mtr_t* mtr) /* in: started mtr */ { dict_index_t* index = prebuilt->index; @@ -2771,13 +2848,17 @@ row_sel_try_search_shortcut_for_mysql( /* This is a non-locking consistent read: if necessary, fetch a previous version of the record */ - - if (!lock_clust_rec_cons_read_sees(rec, index, trx->read_view)) { + + *offsets = rec_reget_offsets(rec, index, + *offsets, ULINT_UNDEFINED, heap); + + if (!lock_clust_rec_cons_read_sees(rec, index, + *offsets, trx->read_view)) { return(SEL_RETRY); } - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, index->table->comp)) { return(SEL_EXHAUSTED); } @@ -2846,9 +2927,12 @@ row_search_for_mysql( level is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ ibool success; + ibool comp; ulint cnt = 0; ulint next_offs; mtr_t mtr; + mem_heap_t* heap; + ulint* offsets = NULL; ut_ad(index && pcur && search_tuple); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); @@ -3002,6 +3086,7 @@ row_search_for_mysql( } mtr_start(&mtr); + heap = mem_heap_create(100); /*-------------------------------------------------------------*/ /* PHASE 2: Try fast adaptive hash index search if possible */ @@ -3047,13 +3132,14 @@ row_search_for_mysql( } #endif shortcut = row_sel_try_search_shortcut_for_mysql(&rec, - prebuilt, &mtr); + prebuilt, &offsets, heap, &mtr); if (shortcut == SEL_FOUND) { #ifdef UNIV_SEARCH_DEBUG - ut_a(0 == cmp_dtuple_rec(search_tuple, rec)); + ut_a(0 == cmp_dtuple_rec(search_tuple, + rec, offsets)); #endif if (!row_sel_store_mysql_rec(buf, prebuilt, - rec)) { + rec, offsets)) { err = DB_TOO_BIG_RECORD; /* We let the main loop to do the @@ -3081,7 +3167,7 @@ row_search_for_mysql( /* NOTE that we do NOT store the cursor position */ - + mem_heap_free(heap); return(DB_SUCCESS); } else if (shortcut == SEL_EXHAUSTED) { @@ -3105,6 +3191,7 @@ row_search_for_mysql( /* NOTE that we do NOT store the cursor position */ + mem_heap_free(heap); return(DB_RECORD_NOT_FOUND); } shortcut_fails_too_big_rec: @@ -3218,6 +3305,8 @@ rec_loop: /* PHASE 4: Look for matching records in a loop */ rec = btr_pcur_get_rec(pcur); + comp = index->table->comp; + ut_ad(comp == page_is_comp(buf_frame_align(rec))); /* fputs("Using ", stderr); dict_index_name_print(stderr, index); @@ -3246,7 +3335,9 @@ rec_loop: a gap and therefore we do not set locks there. */ if (srv_locks_unsafe_for_binlog == FALSE) { - err = sel_set_rec_lock(rec, index, + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + err = sel_set_rec_lock(rec, index, offsets, prebuilt->select_lock_type, LOCK_ORDINARY, thr); } @@ -3266,9 +3357,11 @@ rec_loop: /* Do sanity checks in case our cursor has bumped into page corruption */ - next_offs = rec_get_next_offs(rec); + next_offs = rec_get_next_offs(rec, comp); - if (next_offs >= UNIV_PAGE_SIZE || next_offs < PAGE_SUPREMUM) { + if (next_offs >= UNIV_PAGE_SIZE + || next_offs < + (ulint) (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM)) { if (srv_force_recovery == 0 || moves_up == FALSE) { ut_print_timestamp(stderr); @@ -3313,9 +3406,12 @@ rec_loop: } } + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + if (srv_force_recovery > 0) { - if (!rec_validate(rec) || !btr_index_rec_validate(rec, index, - FALSE)) { + if (!rec_validate(rec, offsets) + || !btr_index_rec_validate(rec, index, FALSE)) { fprintf(stderr, "InnoDB: Index corruption: rec offs %lu next offs %lu, page no %lu,\n" "InnoDB: ", @@ -3343,7 +3439,7 @@ rec_loop: /* fputs("Comparing rec and search tuple\n", stderr); */ - if (0 != cmp_dtuple_rec(search_tuple, rec)) { + if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) { if (prebuilt->select_lock_type != LOCK_NONE && set_also_gap_locks) { @@ -3355,6 +3451,7 @@ rec_loop: if (srv_locks_unsafe_for_binlog == FALSE) { err = sel_set_rec_lock(rec, index, + offsets, prebuilt->select_lock_type, LOCK_GAP, thr); } @@ -3376,7 +3473,7 @@ rec_loop: } else if (match_mode == ROW_SEL_EXACT_PREFIX) { - if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec)) { + if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) { if (prebuilt->select_lock_type != LOCK_NONE && set_also_gap_locks) { @@ -3388,6 +3485,7 @@ rec_loop: if (srv_locks_unsafe_for_binlog == FALSE) { err = sel_set_rec_lock(rec, index, + offsets, prebuilt->select_lock_type, LOCK_GAP, thr); } @@ -3419,27 +3517,27 @@ rec_loop: is a non-delete marked record, then it is enough to lock its existence with LOCK_REC_NOT_GAP. */ + ulint lock_type; + if (!set_also_gap_locks - || (unique_search && !rec_get_deleted_flag(rec))) { - err = sel_set_rec_lock(rec, index, - prebuilt->select_lock_type, - LOCK_REC_NOT_GAP, thr); + || (unique_search && !rec_get_deleted_flag(rec, comp))) { + lock_type = LOCK_REC_NOT_GAP; } else { /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is + we lock only the record, i.e., next-key locking is not used. */ - if (srv_locks_unsafe_for_binlog) { - err = sel_set_rec_lock(rec, index, - prebuilt->select_lock_type, - LOCK_REC_NOT_GAP, thr); + if (srv_locks_unsafe_for_binlog) { + lock_type = LOCK_REC_NOT_GAP; } else { - err = sel_set_rec_lock(rec, index, - prebuilt->select_lock_type, - LOCK_ORDINARY, thr); - } + lock_type = LOCK_ORDINARY; + } } - + + err = sel_set_rec_lock(rec, index, offsets, + prebuilt->select_lock_type, + lock_type, thr); + if (err != DB_SUCCESS) { goto lock_wait_or_error; @@ -3462,7 +3560,7 @@ rec_loop: if (srv_force_recovery < 5 && !lock_clust_rec_cons_read_sees(rec, index, - trx->read_view)) { + offsets, trx->read_view)) { err = row_sel_build_prev_vers_for_mysql( trx->read_view, clust_index, @@ -3495,7 +3593,8 @@ rec_loop: } } - if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) { + if (rec_get_deleted_flag(rec, comp) + && !cons_read_requires_clust_rec) { /* The record is delete-marked: we can skip it if this is not a consistent read which might see an earlier version @@ -3531,7 +3630,7 @@ rec_loop: goto next_rec; } - if (rec_get_deleted_flag(clust_rec)) { + if (rec_get_deleted_flag(clust_rec, comp)) { /* The record is delete marked: we can skip it */ @@ -3543,6 +3642,15 @@ rec_loop: } } + if (prebuilt->need_to_access_clustered) { + ut_ad(rec == clust_rec || index == clust_index); + offsets = rec_reget_offsets(rec, clust_index, + offsets, ULINT_UNDEFINED, heap); + } else { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + } + /* We found a qualifying row */ if (prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD @@ -3562,7 +3670,7 @@ rec_loop: not cache rows because there the cursor is a scrollable cursor. */ - row_sel_push_cache_row_for_mysql(prebuilt, rec); + row_sel_push_cache_row_for_mysql(prebuilt, rec, offsets); if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { @@ -3572,11 +3680,13 @@ rec_loop: goto next_rec; } else { if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) { - ut_memcpy(buf + 4, rec - rec_get_extra_size(rec), - rec_get_size(rec)); - mach_write_to_4(buf, rec_get_extra_size(rec) + 4); + memcpy(buf + 4, rec - rec_offs_extra_size(offsets), + rec_offs_size(offsets)); + mach_write_to_4(buf, + rec_offs_extra_size(offsets) + 4); } else { - if (!row_sel_store_mysql_rec(buf, prebuilt, rec)) { + if (!row_sel_store_mysql_rec(buf, prebuilt, + rec, offsets)) { err = DB_TOO_BIG_RECORD; goto lock_wait_or_error; @@ -3584,8 +3694,10 @@ rec_loop: } if (prebuilt->clust_index_was_generated) { + offsets = rec_reget_offsets(index_rec, index, offsets, + ULINT_UNDEFINED, heap); row_sel_store_row_id_to_prebuilt(prebuilt, index_rec, - index); + index, offsets); } } got_row: @@ -3687,6 +3799,7 @@ lock_wait_or_error: fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */ trx->op_info = ""; + mem_heap_free(heap); return(err); normal_return: @@ -3710,6 +3823,7 @@ normal_return: trx->op_info = ""; + mem_heap_free(heap); return(ret); } diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index e16d696314b..ee9066a0d6f 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -430,6 +430,7 @@ row_undo_mod_del_unmark_sec_and_undo_update( found = row_search_index_entry(index, entry, mode, &pcur, &mtr); if (!found) { + heap = mem_heap_create(100); fputs("InnoDB: error in sec index entry del undo in\n" "InnoDB: ", stderr); dict_index_name_print(stderr, trx, index); @@ -438,11 +439,14 @@ row_undo_mod_del_unmark_sec_and_undo_update( dtuple_print(stderr, entry); fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, btr_pcur_get_rec(&pcur)); + rec_print(stderr, btr_pcur_get_rec(&pcur), + rec_get_offsets(btr_pcur_get_rec(&pcur), + index, ULINT_UNDEFINED, heap)); putc('\n', stderr); trx_print(stderr, trx); fputs("\n" "InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n", stderr); + mem_heap_free(heap); } else { btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c index bc3cc8ea9f3..42f5ef94854 100644 --- a/innobase/row/row0undo.c +++ b/innobase/row/row0undo.c @@ -151,6 +151,8 @@ row_undo_search_clust_to_pcur( mtr_t mtr; ibool ret; rec_t* rec; + mem_heap_t* heap; + const ulint* offsets; mtr_start(&mtr); @@ -161,8 +163,11 @@ row_undo_search_clust_to_pcur( rec = btr_pcur_get_rec(&(node->pcur)); + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, clust_index, ULINT_UNDEFINED, heap); + if (!found || 0 != ut_dulint_cmp(node->roll_ptr, - row_get_rec_roll_ptr(rec, clust_index))) { + row_get_rec_roll_ptr(rec, clust_index, offsets))) { /* We must remove the reservation on the undo log record BEFORE releasing the latch on the clustered index page: this @@ -175,7 +180,7 @@ row_undo_search_clust_to_pcur( ret = FALSE; } else { node->row = row_build(ROW_COPY_DATA, clust_index, rec, - node->heap); + offsets, node->heap); btr_pcur_store_position(&(node->pcur), &mtr); ret = TRUE; diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index a449b9f1736..e080d0ba577 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -301,19 +301,20 @@ recovery. */ void row_upd_rec_sys_fields_in_recovery( /*===============================*/ - rec_t* rec, /* in: record */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr)/* in: roll ptr of the undo log record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr)/* in: roll ptr of the undo log record */ { byte* field; ulint len; - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); ut_ad(len == DATA_TRX_ID_LEN); trx_write_trx_id(field, trx_id); - field = rec_get_nth_field(rec, pos + 1, &len); + field = rec_get_nth_field(rec, offsets, pos + 1, &len); ut_ad(len == DATA_ROLL_PTR_LEN); trx_write_roll_ptr(field, roll_ptr); } @@ -361,8 +362,8 @@ row_upd_changes_field_size_or_external( /* out: TRUE if the update changes the size of some field in index or the field is external in rec or update */ - rec_t* rec, /* in: record in index */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update) /* in: update vector */ { upd_field_t* upd_field; @@ -372,6 +373,7 @@ row_upd_changes_field_size_or_external( ulint n_fields; ulint i; + ut_ad(rec_offs_validate(NULL, index, offsets)); n_fields = upd_get_n_fields(update); for (i = 0; i < n_fields; i++) { @@ -380,19 +382,19 @@ row_upd_changes_field_size_or_external( new_val = &(upd_field->new_val); new_len = new_val->len; - if (new_len == UNIV_SQL_NULL) { + if (new_len == UNIV_SQL_NULL && !rec_offs_comp(offsets)) { new_len = dtype_get_sql_null_size( dict_index_get_nth_type(index, i)); } - old_len = rec_get_nth_field_size(rec, upd_field->field_no); - + old_len = rec_offs_nth_size(offsets, upd_field->field_no); + if (old_len != new_len) { return(TRUE); } - if (rec_get_nth_field_extern_bit(rec, upd_field->field_no)) { + if (rec_offs_nth_extern(offsets, upd_field->field_no)) { return(TRUE); } @@ -414,15 +416,18 @@ a clustered index */ void row_upd_rec_in_place( /*=================*/ - rec_t* rec, /* in/out: record where replaced */ - upd_t* update) /* in: update vector */ + rec_t* rec, /* in/out: record where replaced */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update) /* in: update vector */ { upd_field_t* upd_field; dfield_t* new_val; ulint n_fields; ulint i; - rec_set_info_bits(rec, update->info_bits); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + + rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits); n_fields = upd_get_n_fields(update); @@ -430,7 +435,7 @@ row_upd_rec_in_place( upd_field = upd_get_nth_field(update, i); new_val = &(upd_field->new_val); - rec_set_nth_field(rec, upd_field->field_no, + rec_set_nth_field(rec, offsets, upd_field->field_no, dfield_get_data(new_val), dfield_get_len(new_val)); } @@ -695,6 +700,7 @@ row_upd_build_sec_rec_difference_binary( upd_t* update; ulint n_diff; ulint i; + const ulint* offsets; /* This function is used only for a secondary index */ ut_a(0 == (index->type & DICT_CLUSTERED)); @@ -702,10 +708,11 @@ row_upd_build_sec_rec_difference_binary( update = upd_create(dtuple_get_n_fields(entry), heap); n_diff = 0; + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); for (i = 0; i < dtuple_get_n_fields(entry); i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); @@ -768,6 +775,7 @@ row_upd_build_difference_binary( ulint trx_id_pos; ibool extern_bit; ulint i; + const ulint* offsets; /* This function is used only for a clustered index */ ut_a(index->type & DICT_CLUSTERED); @@ -779,9 +787,11 @@ row_upd_build_difference_binary( roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR); trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + for (i = 0; i < dtuple_get_n_fields(entry); i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); @@ -793,7 +803,7 @@ row_upd_build_difference_binary( goto skip_compare; } - extern_bit = rec_get_nth_field_extern_bit(rec, i); + extern_bit = rec_offs_nth_extern(offsets, i); if (extern_bit != upd_ext_vec_contains(ext_vec, n_ext_vec, i) || !dfield_data_is_binary_equal(dfield, len, data)) { @@ -1117,6 +1127,7 @@ void row_upd_copy_columns( /*=================*/ rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ sym_node_t* column) /* in: first column in a column list, or NULL */ { @@ -1124,7 +1135,7 @@ row_upd_copy_columns( ulint len; while (column) { - data = rec_get_nth_field(rec, + data = rec_get_nth_field(rec, offsets, column->field_nos[SYM_CLUST_FIELD_NO], &len); eval_node_copy_and_alloc_val(column, data, len); @@ -1171,7 +1182,9 @@ row_upd_store_row( dict_index_t* clust_index; upd_t* update; rec_t* rec; - + mem_heap_t* heap; + const ulint* offsets; + ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES); if (node->row != NULL) { @@ -1183,10 +1196,12 @@ row_upd_store_row( rec = btr_pcur_get_rec(node->pcur); - node->row = row_build(ROW_COPY_DATA, clust_index, rec, node->heap); - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, clust_index, ULINT_UNDEFINED, heap); + node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, + node->heap); node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint) - * rec_get_n_fields(rec)); + * rec_offs_n_fields(offsets)); if (node->is_delete) { update = NULL; } else { @@ -1194,7 +1209,8 @@ row_upd_store_row( } node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec, - rec, update); + offsets, update); + mem_heap_free(heap); } /*************************************************************** @@ -1247,7 +1263,8 @@ row_upd_sec_index_entry( dtuple_print(stderr, entry); fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap)); putc('\n', stderr); trx_print(stderr, trx); @@ -1259,7 +1276,7 @@ row_upd_sec_index_entry( delete marked if we return after a lock wait in row_ins_index_entry below */ - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); if (err == DB_SUCCESS && check_ref) { @@ -1362,6 +1379,7 @@ row_upd_clust_rec_by_insert( table = node->table; pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); + heap = mem_heap_create(500); if (node->state != UPD_NODE_INSERT_CLUSTERED) { @@ -1369,7 +1387,7 @@ row_upd_clust_rec_by_insert( btr_cur, TRUE, thr, mtr); if (err != DB_SUCCESS) { mtr_commit(mtr); - + mem_heap_free(heap); return(err); } @@ -1379,7 +1397,9 @@ row_upd_clust_rec_by_insert( record is removed from the index tree, or updated. */ btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), - node->update, mtr); + rec_get_offsets(btr_cur_get_rec(btr_cur), + dict_table_get_first_index(table), + ULINT_UNDEFINED, heap), node->update, mtr); if (check_ref) { /* NOTE that the following call loses the position of pcur ! */ @@ -1399,8 +1419,6 @@ row_upd_clust_rec_by_insert( node->state = UPD_NODE_INSERT_CLUSTERED; - heap = mem_heap_create(500); - entry = row_build_index_entry(node->row, index, heap); row_upd_index_replace_new_col_vals(entry, index, node->update, NULL); @@ -1452,7 +1470,8 @@ row_upd_clust_rec( pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); - ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), + index->table->comp)); /* Try optimistic updating of the record, keeping changes within the page; we do not check locks because we assume the x-lock on the @@ -1488,7 +1507,8 @@ row_upd_clust_rec( ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); - ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), + index->table->comp)); err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, &big_rec, node->update, @@ -1496,12 +1516,17 @@ row_upd_clust_rec( mtr_commit(mtr); if (err == DB_SUCCESS && big_rec) { + mem_heap_t* heap; + rec_t* rec; mtr_start(mtr); + + heap = mem_heap_create(100); + rec = btr_cur_get_rec(btr_cur); ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); - - err = btr_store_big_rec_extern_fields(index, - btr_cur_get_rec(btr_cur), - big_rec, mtr); + err = btr_store_big_rec_extern_fields(index, rec, + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap), + big_rec, mtr); + mem_heap_free(heap); mtr_commit(mtr); } @@ -1585,7 +1610,10 @@ row_upd_clust_step( ulint err; mtr_t* mtr; mtr_t mtr_buf; - + rec_t* rec; + mem_heap_t* heap; + const ulint* offsets; + index = dict_table_get_first_index(node->table); check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr)); @@ -1641,13 +1669,16 @@ row_upd_clust_step( } } + rec = btr_pcur_get_rec(pcur); + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (!node->has_clust_rec_x_lock) { err = lock_clust_rec_modify_check_and_lock(0, - btr_pcur_get_rec(pcur), - index, thr); + rec, index, offsets, thr); if (err != DB_SUCCESS) { mtr_commit(mtr); - + mem_heap_free(heap); return(err); } } @@ -1655,6 +1686,7 @@ row_upd_clust_step( /* NOTE: the following function calls will also commit mtr */ if (node->is_delete) { + mem_heap_free(heap); err = row_upd_del_mark_clust_rec(node, index, thr, check_ref, mtr); if (err != DB_SUCCESS) { @@ -1674,12 +1706,13 @@ row_upd_clust_step( if (!node->in_mysql_interface) { /* Copy the necessary columns from clust_rec and calculate the new values to set */ - - row_upd_copy_columns(btr_pcur_get_rec(pcur), + row_upd_copy_columns(rec, offsets, UT_LIST_GET_FIRST(node->columns)); row_upd_eval_new_vals(node->update); } + mem_heap_free(heap); + if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { err = row_upd_clust_rec(node, index, thr, mtr); @@ -1935,6 +1968,7 @@ row_upd_in_place_in_select( btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; + mem_heap_t* heap; ut_ad(sel_node->select_will_do_update); ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF); @@ -1950,11 +1984,15 @@ row_upd_in_place_in_select( /* Copy the necessary columns from clust_rec and calculate the new values to set */ - row_upd_copy_columns(btr_pcur_get_rec(pcur), - UT_LIST_GET_FIRST(node->columns)); + heap = mem_heap_create(100); + row_upd_copy_columns(btr_pcur_get_rec(pcur), rec_get_offsets( + btr_pcur_get_rec(pcur), btr_cur->index, ULINT_UNDEFINED, heap), + UT_LIST_GET_FIRST(node->columns)); + mem_heap_free(heap); row_upd_eval_new_vals(node->update); - ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), + btr_cur->index->table->comp)); ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE); ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c index bc17ede89e3..5281dbd67d7 100644 --- a/innobase/row/row0vers.c +++ b/innobase/row/row0vers.c @@ -41,10 +41,12 @@ row_vers_impl_x_locked_off_kernel( transaction; NOTE that the kernel mutex is temporarily released! */ rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index) /* in: the secondary index */ + dict_index_t* index, /* in: the secondary index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { dict_index_t* clust_index; rec_t* clust_rec; + ulint* clust_offsets; rec_t* version; rec_t* prev_version; dulint trx_id; @@ -59,6 +61,7 @@ row_vers_impl_x_locked_off_kernel( ibool rec_del; ulint err; mtr_t mtr; + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); @@ -96,7 +99,10 @@ row_vers_impl_x_locked_off_kernel( return(NULL); } - trx_id = row_get_rec_trx_id(clust_rec, clust_index); + heap = mem_heap_create(1024); + clust_offsets = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, heap); + trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); mtr_s_lock(&(purge_sys->latch), &mtr); @@ -106,19 +112,27 @@ row_vers_impl_x_locked_off_kernel( /* The transaction that modified or inserted clust_rec is no longer active: no implicit lock on rec */ + mem_heap_free(heap); mtr_commit(&mtr); return(NULL); } - if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, TRUE)) { + if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, + clust_offsets, TRUE)) { /* Corruption noticed: try to avoid a crash by returning */ + mem_heap_free(heap); mtr_commit(&mtr); return(NULL); } + comp = index->table->comp; + ut_ad(index->table == clust_index->table); + ut_ad(comp == page_is_comp(buf_frame_align(rec))); + ut_ad(comp == page_is_comp(buf_frame_align(clust_rec))); + /* We look up if some earlier version, which was modified by the trx_id transaction, of the clustered index record would require rec to be in a different state (delete marked or unmarked, or have different field @@ -128,11 +142,10 @@ row_vers_impl_x_locked_off_kernel( different state, then the trx_id transaction has not yet had time to modify rec, and does not necessarily have an implicit x-lock on rec. */ - rec_del = rec_get_deleted_flag(rec); + rec_del = rec_get_deleted_flag(rec, comp); trx = NULL; version = clust_rec; - heap = NULL; for (;;) { mutex_exit(&kernel_mutex); @@ -146,18 +159,16 @@ row_vers_impl_x_locked_off_kernel( heap2 = heap; heap = mem_heap_create(1024); - err = trx_undo_prev_version_build(clust_rec, &mtr, version, - clust_index, heap, - &prev_version); - if (heap2) { - mem_heap_free(heap2); /* version was stored in heap2, - if heap2 != NULL */ - } + clust_index, clust_offsets, heap, + &prev_version); + mem_heap_free(heap2); /* free version and clust_offsets */ if (prev_version) { + clust_offsets = rec_get_offsets(prev_version, + clust_index, ULINT_UNDEFINED, heap); row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, heap); + prev_version, clust_offsets, heap); entry = row_build_index_entry(row, index, heap); } @@ -189,11 +200,11 @@ row_vers_impl_x_locked_off_kernel( if prev_version would require rec to be in a different state. */ - vers_del = rec_get_deleted_flag(prev_version); + vers_del = rec_get_deleted_flag(prev_version, comp); /* We check if entry and rec are identified in the alphabetical ordering */ - if (0 == cmp_dtuple_rec(entry, rec)) { + if (0 == cmp_dtuple_rec(entry, rec, offsets)) { /* The delete marks of rec and prev_version should be equal for rec to be in the state required by prev_version */ @@ -211,7 +222,7 @@ row_vers_impl_x_locked_off_kernel( dtuple_set_types_binary(entry, dtuple_get_n_fields(entry)); - if (0 != cmp_dtuple_rec(entry, rec)) { + if (0 != cmp_dtuple_rec(entry, rec, offsets)) { trx = trx_get_on_id(trx_id); @@ -226,7 +237,8 @@ row_vers_impl_x_locked_off_kernel( break; } - prev_trx_id = row_get_rec_trx_id(prev_version, clust_index); + prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, + clust_offsets); if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) { /* The versions modified by the trx_id transaction end @@ -297,12 +309,14 @@ row_vers_old_has_index_entry( rec_t* version; rec_t* prev_version; dict_index_t* clust_index; + ulint* clust_offsets; mem_heap_t* heap; mem_heap_t* heap2; dtuple_t* row; dtuple_t* entry; ulint err; - + ibool comp; + ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) || mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_S_FIX)); @@ -313,10 +327,15 @@ row_vers_old_has_index_entry( clust_index = dict_table_get_first_index(index->table); - if (also_curr && !rec_get_deleted_flag(rec)) { + comp = index->table->comp; + ut_ad(comp == page_is_comp(buf_frame_align(rec))); + heap = mem_heap_create(1024); + clust_offsets = rec_get_offsets(rec, clust_index, + ULINT_UNDEFINED, heap); - heap = mem_heap_create(1024); - row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap); + if (also_curr && !rec_get_deleted_flag(rec, comp)) { + row = row_build(ROW_COPY_POINTERS, clust_index, + rec, clust_offsets, heap); entry = row_build_index_entry(row, index, heap); /* NOTE that we cannot do the comparison as binary @@ -331,24 +350,17 @@ row_vers_old_has_index_entry( return(TRUE); } - - mem_heap_free(heap); } version = rec; - heap = NULL; for (;;) { heap2 = heap; heap = mem_heap_create(1024); - err = trx_undo_prev_version_build(rec, mtr, version, - clust_index, heap, - &prev_version); - if (heap2) { - mem_heap_free(heap2); /* version was stored in heap2, - if heap2 != NULL */ - } + clust_index, clust_offsets, heap, + &prev_version); + mem_heap_free(heap2); /* free version and clust_offsets */ if (err != DB_SUCCESS || !prev_version) { /* Versions end here */ @@ -358,9 +370,12 @@ row_vers_old_has_index_entry( return(FALSE); } - if (!rec_get_deleted_flag(prev_version)) { + clust_offsets = rec_get_offsets(prev_version, clust_index, + ULINT_UNDEFINED, heap); + + if (!rec_get_deleted_flag(prev_version, comp)) { row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, heap); + prev_version, clust_offsets, heap); entry = row_build_index_entry(row, index, heap); /* NOTE that we cannot do the comparison as binary @@ -412,6 +427,7 @@ row_vers_build_for_consistent_read( mem_heap_t* heap2; byte* buf; ulint err; + ulint* offsets; ut_ad(index->type & DICT_CLUSTERED); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) @@ -420,22 +436,23 @@ row_vers_build_for_consistent_read( #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ - ut_ad(!read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))); + + heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + + ut_ad(!read_view_sees_trx_id(view, + row_get_rec_trx_id(rec, index, offsets))); rw_lock_s_lock(&(purge_sys->latch)); version = rec; - heap = NULL; for (;;) { heap2 = heap; heap = mem_heap_create(1024); err = trx_undo_prev_version_build(rec, mtr, version, index, - heap, &prev_version); - if (heap2) { - mem_heap_free(heap2); /* version was stored in heap2, - if heap2 != NULL */ - } + offsets, heap, &prev_version); + mem_heap_free(heap2); /* free version and offsets */ if (err != DB_SUCCESS) { break; @@ -449,16 +466,17 @@ row_vers_build_for_consistent_read( break; } - prev_trx_id = row_get_rec_trx_id(prev_version, index); + offsets = rec_get_offsets(prev_version, index, + ULINT_UNDEFINED, heap); + prev_trx_id = row_get_rec_trx_id(prev_version, index, offsets); if (read_view_sees_trx_id(view, prev_trx_id)) { /* The view already sees this version: we can copy it to in_heap and return */ - buf = mem_heap_alloc(in_heap, rec_get_size( - prev_version)); - *old_vers = rec_copy(buf, prev_version); + buf = mem_heap_alloc(in_heap, rec_offs_size(offsets)); + *old_vers = rec_copy(buf, prev_version, offsets); err = DB_SUCCESS; break; diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index ba102b6f4b9..40befae424e 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -44,6 +44,7 @@ Created 10/8/1995 Heikki Tuuri #include "buf0flu.h" #include "btr0sea.h" #include "dict0load.h" +#include "dict0boot.h" #include "srv0start.h" #include "row0mysql.h" @@ -845,6 +846,7 @@ srv_init(void) { srv_conc_slot_t* conc_slot; srv_slot_t* slot; + dict_table_t* table; ulint i; srv_sys = mem_alloc(sizeof(srv_sys_t)); @@ -894,6 +896,31 @@ srv_init(void) UT_LIST_INIT(srv_sys->tasks); + /* create dummy table and index for old-style infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY1", + DICT_HDR_SPACE, 1, FALSE); + dict_mem_table_add_col(table, "DUMMY", DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8, 0); + + srv_sys->dummy_ind1 = dict_mem_index_create("SYS_DUMMY1", + "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1); + dict_index_add_col(srv_sys->dummy_ind1, + dict_table_get_nth_col(table, 0), 0, 0); + srv_sys->dummy_ind1->table = table; + /* create dummy table and index for new-style infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY2", + DICT_HDR_SPACE, 1, TRUE); + dict_mem_table_add_col(table, "DUMMY", DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8, 0); + srv_sys->dummy_ind2 = dict_mem_index_create("SYS_DUMMY2", + "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1); + dict_index_add_col(srv_sys->dummy_ind2, + dict_table_get_nth_col(table, 0), 0, 0); + srv_sys->dummy_ind2->table = table; + + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE; + /* Init the server concurrency restriction data structures */ os_fast_mutex_init(&srv_conc_mutex); diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index fe429d1cc62..484d4f62744 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -38,16 +38,18 @@ trx_undof_page_add_undo_rec_log( ulint new_free, /* in: end offset of the entry */ mtr_t* mtr) /* in: mtr */ { - byte* log_ptr; - ulint len; + byte* log_ptr; + const byte* log_end; + ulint len; - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN); if (log_ptr == NULL) { return; } + log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN]; log_ptr = mlog_write_initial_log_record_fast(undo_page, MLOG_UNDO_INSERT, log_ptr, mtr); len = new_free - old_free - 4; @@ -55,14 +57,11 @@ trx_undof_page_add_undo_rec_log( mach_write_to_2(log_ptr, len); log_ptr += 2; - if (len < 256) { - ut_memcpy(log_ptr, undo_page + old_free + 2, len); - log_ptr += len; - } - - mlog_close(mtr, log_ptr); - - if (len >= MLOG_BUF_MARGIN) { + if (log_ptr + len <= log_end) { + memcpy(log_ptr, undo_page + old_free + 2, len); + mlog_close(mtr, log_ptr + len); + } else { + mlog_close(mtr, log_ptr); mlog_catenate_string(mtr, undo_page + old_free + 2, len); } } @@ -404,6 +403,7 @@ trx_undo_page_report_modify( delete marking is done */ rec_t* rec, /* in: clustered index record which has NOT yet been modified */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector which tells the columns to be updated; in the case of a delete, this should be set to NULL */ @@ -430,6 +430,7 @@ trx_undo_page_report_modify( ulint i; ut_a(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); table = index->table; @@ -454,7 +455,7 @@ trx_undo_page_report_modify( /* Store first some general parameters to the undo log */ if (update) { - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, table->comp)) { type_cmpl = TRX_UNDO_UPD_DEL_REC; } else { type_cmpl = TRX_UNDO_UPD_EXIST_REC; @@ -479,14 +480,15 @@ trx_undo_page_report_modify( /*----------------------------------------*/ /* Store the state of the info bits */ - bits = rec_get_info_bits(rec); + bits = rec_get_info_bits(rec, table->comp); mach_write_to_1(ptr, bits); ptr += 1; /* Store the values of the system columns */ - trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec); - - roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); + trx_id = dict_index_rec_get_sys_col(index, offsets, + DATA_TRX_ID, rec); + roll_ptr = dict_index_rec_get_sys_col(index, offsets, + DATA_ROLL_PTR, rec); len = mach_dulint_write_compressed(ptr, trx_id); ptr += len; @@ -499,7 +501,7 @@ trx_undo_page_report_modify( for (i = 0; i < dict_index_get_n_unique(index); i++) { - field = rec_get_nth_field(rec, i, &flen); + field = rec_get_nth_field(rec, offsets, i, &flen); if (trx_undo_left(undo_page, ptr) < 4) { @@ -547,14 +549,14 @@ trx_undo_page_report_modify( ptr += len; /* Save the old value of field */ - field = rec_get_nth_field(rec, pos, &flen); + field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - if (rec_get_nth_field_extern_bit(rec, pos)) { + if (rec_offs_nth_extern(offsets, pos)) { /* If a field has external storage, we add to flen the flag */ @@ -631,7 +633,7 @@ trx_undo_page_report_modify( ptr += len; /* Save the old value of field */ - field = rec_get_nth_field(rec, pos, &flen); + field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 5) { @@ -1008,7 +1010,9 @@ trx_undo_report_row_operation( ibool is_insert; trx_rseg_t* rseg; mtr_t mtr; - + mem_heap_t* heap; + ulint* offsets = NULL; + ut_a(index->type & DICT_CLUSTERED); if (flags & BTR_NO_UNDO_LOG_FLAG) { @@ -1019,7 +1023,6 @@ trx_undo_report_row_operation( } ut_ad(thr); - ut_a(index->type & DICT_CLUSTERED); ut_ad((op_type != TRX_UNDO_INSERT_OP) || (clust_entry && !update && !rec)); @@ -1063,6 +1066,8 @@ trx_undo_report_row_operation( mtr_start(&mtr); + heap = mem_heap_create(100); + for (;;) { undo_page = buf_page_get_gen(undo->space, page_no, RW_X_LATCH, undo->guess_page, @@ -1079,9 +1084,10 @@ trx_undo_report_row_operation( index, clust_entry, &mtr); } else { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); offset = trx_undo_page_report_modify(undo_page, trx, - index, rec, update, - cmpl_info, &mtr); + index, rec, offsets, update, cmpl_info, &mtr); } if (offset == 0) { @@ -1123,7 +1129,7 @@ trx_undo_report_row_operation( mutex_exit(&(trx->undo_mutex)); mtr_commit(&mtr); - + mem_heap_free(heap); return(DB_OUT_OF_FILE_SPACE); } } @@ -1140,6 +1146,7 @@ trx_undo_report_row_operation( *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no, offset); + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1236,6 +1243,7 @@ trx_undo_prev_version_build( index_rec page and purge_view */ rec_t* rec, /* in: version of a clustered index record */ dict_index_t* index, /* in: clustered index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) */ mem_heap_t* heap, /* in: memory heap from which the memory needed is allocated */ rec_t** old_vers)/* out, own: previous version, or NULL if @@ -1258,7 +1266,7 @@ trx_undo_prev_version_build( ibool dummy_extern; byte* buf; ulint err; - + ulint* index_offsets = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ @@ -1266,21 +1274,25 @@ trx_undo_prev_version_build( MTR_MEMO_PAGE_S_FIX) || mtr_memo_contains(index_mtr, buf_block_align(index_rec), MTR_MEMO_PAGE_X_FIX)); + ut_ad(rec_offs_validate(rec, index, offsets)); + if (!(index->type & DICT_CLUSTERED)) { fprintf(stderr, "InnoDB: Error: trying to access" " update undo rec for non-clustered index %s\n" "InnoDB: Submit a detailed bug report to" " http://bugs.mysql.com\n" "InnoDB: index record ", index->name); - rec_print(stderr, index_rec); + index_offsets = rec_get_offsets(index_rec, index, + ULINT_UNDEFINED, heap); + rec_print(stderr, index_rec, index_offsets); fputs("\n" "InnoDB: record version ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); return(DB_ERROR); } - roll_ptr = row_get_rec_roll_ptr(rec, index); + roll_ptr = row_get_rec_roll_ptr(rec, index, offsets); old_roll_ptr = roll_ptr; *old_vers = NULL; @@ -1292,7 +1304,7 @@ trx_undo_prev_version_build( return(DB_SUCCESS); } - rec_trx_id = row_get_rec_trx_id(rec, index); + rec_trx_id = row_get_rec_trx_id(rec, index, offsets); err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap); @@ -1341,10 +1353,12 @@ trx_undo_prev_version_build( ut_print_buf(stderr, undo_rec, 150); fputs("\n" "InnoDB: index record ", stderr); - rec_print(stderr, index_rec); + index_offsets = rec_get_offsets(index_rec, index, + ULINT_UNDEFINED, heap); + rec_print(stderr, index_rec, index_offsets); fputs("\n" "InnoDB: record version ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); fprintf(stderr, "\n" "InnoDB: Record trx id %lu %lu, update rec trx id %lu %lu\n" "InnoDB: Roll ptr in rec %lu %lu, in update rec %lu %lu\n", @@ -1358,11 +1372,10 @@ trx_undo_prev_version_build( (ulong) ut_dulint_get_low(roll_ptr)); trx_purge_sys_print(); - return(DB_ERROR); } - if (row_upd_changes_field_size_or_external(rec, index, update)) { + if (row_upd_changes_field_size_or_external(index, offsets, update)) { ulint* ext_vect; ulint n_ext_vect; @@ -1372,27 +1385,28 @@ trx_undo_prev_version_build( those fields that update updates to become externally stored fields. Store the info to ext_vect: */ - ext_vect = mem_alloc(sizeof(ulint) * rec_get_n_fields(rec)); - n_ext_vect = btr_push_update_extern_fields(ext_vect, rec, + ext_vect = mem_alloc(sizeof(ulint) + * rec_offs_n_fields(offsets)); + n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update); entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); row_upd_index_replace_new_col_vals(entry, index, update, heap); - buf = mem_heap_alloc(heap, rec_get_converted_size(entry)); + buf = mem_heap_alloc(heap, + rec_get_converted_size(index, entry)); - *old_vers = rec_convert_dtuple_to_rec(buf, entry); + *old_vers = rec_convert_dtuple_to_rec(buf, index, entry); /* Now set the extern bits in the old version of the record */ - rec_set_field_extern_bits(*old_vers, ext_vect, n_ext_vect, - NULL); + rec_set_field_extern_bits(*old_vers, index, + ext_vect, n_ext_vect, NULL); mem_free(ext_vect); } else { - buf = mem_heap_alloc(heap, rec_get_size(rec)); - - *old_vers = rec_copy(buf, rec); - - row_upd_rec_in_place(*old_vers, update); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); + *old_vers = rec_copy(buf, rec, offsets); + rec_offs_make_valid(*old_vers, index, offsets); + row_upd_rec_in_place(*old_vers, offsets, update); } return(DB_SUCCESS); diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index eb7c7f43f03..db5e16c7778 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -331,10 +331,11 @@ trx_savept_take( /*********************************************************************** Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert -undo log. If the transaction was not yet committed, then we roll it back. */ +undo log. If the transaction was not yet committed, then we roll it back. +Note: this is done in a background thread */ -void -trx_rollback_or_clean_all_without_sess(void) +void * +trx_rollback_or_clean_all_without_sess(void *i) /*========================================*/ { mem_heap_t* heap; @@ -362,7 +363,7 @@ trx_rollback_or_clean_all_without_sess(void) fprintf(stderr, "InnoDB: Starting rollback of uncommitted transactions\n"); } else { - return; + os_thread_exit(i); } loop: heap = mem_heap_create(512); @@ -371,9 +372,15 @@ loop: trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - while (trx && (trx->sess || (trx->conc_state == TRX_NOT_STARTED))) { + while (trx) { - trx = UT_LIST_GET_NEXT(trx_list, trx); + if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) { + trx = UT_LIST_GET_NEXT(trx_list, trx); + } else if (trx->conc_state == TRX_PREPARED) { + trx->sess = trx_dummy_sess; + } else { + break; + } } mutex_exit(&kernel_mutex); @@ -384,10 +391,11 @@ loop: mem_heap_free(heap); - return; + os_thread_exit(i); } trx->sess = trx_dummy_sess; + if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) { fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n", @@ -486,6 +494,8 @@ loop: mem_heap_free(heap); goto loop; + + os_thread_exit(i); /* not reached */ } /*********************************************************************** diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c index 54bd5be01a1..35e18064329 100644 --- a/innobase/trx/trx0sys.c +++ b/innobase/trx/trx0sys.c @@ -887,8 +887,12 @@ trx_sys_init_at_db_start(void) trx = UT_LIST_GET_FIRST(trx_sys->trx_list); for (;;) { - rows_to_undo += + + if ( trx->conc_state != TRX_PREPARED) { + rows_to_undo += ut_conv_dulint_to_longlong(trx->undo_no); + } + trx = UT_LIST_GET_NEXT(trx_list, trx); if (!trx) { diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index f7497ac4090..ab8bd898dd6 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -24,6 +24,7 @@ Created 3/26/1996 Heikki Tuuri #include "thr0loc.h" #include "btr0sea.h" #include "os0proc.h" +#include "trx0xa.h" /* Copy of the prototype for innobase_mysql_print_thd: this copy MUST be equal to the one in mysql/sql/ha_innodb.cc ! */ @@ -156,6 +157,10 @@ trx_create( trx->read_view_heap = mem_heap_create(256); trx->read_view = NULL; + /* Set X/Open XA transaction identification to NULL */ + memset(&trx->xid,0,sizeof(trx->xid)); + trx->xid.formatID = -1; + return(trx); } @@ -408,13 +413,22 @@ trx_lists_init_at_db_start(void) trx = trx_create(NULL); trx->id = undo->trx_id; - + trx->xid = undo->xid; trx->insert_undo = undo; trx->rseg = rseg; if (undo->state != TRX_UNDO_ACTIVE) { - trx->conc_state = TRX_COMMITTED_IN_MEMORY; + /* Prepared transactions are left in + the prepared state waiting for a + commit or abort decision from MySQL */ + + if (undo->state == TRX_UNDO_PREPARED) { + trx->conc_state = TRX_PREPARED; + } else { + trx->conc_state = + TRX_COMMITTED_IN_MEMORY; + } /* We give a dummy value for the trx no; this should have no relevance since purge @@ -457,10 +471,22 @@ trx_lists_init_at_db_start(void) trx = trx_create(NULL); trx->id = undo->trx_id; + trx->xid = undo->xid; if (undo->state != TRX_UNDO_ACTIVE) { - trx->conc_state = - TRX_COMMITTED_IN_MEMORY; + + /* Prepared transactions are left in + the prepared state waiting for a + commit or abort decision from MySQL */ + + if (undo->state == TRX_UNDO_PREPARED) { + trx->conc_state = + TRX_PREPARED; + } else { + trx->conc_state = + TRX_COMMITTED_IN_MEMORY; + } + /* We give a dummy value for the trx number */ @@ -726,7 +752,8 @@ trx_commit_off_kernel( mutex_enter(&kernel_mutex); } - ut_ad(trx->conc_state == TRX_ACTIVE); + ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED); + #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ @@ -1667,3 +1694,239 @@ trx_print( innobase_mysql_print_thd(f, trx->mysql_thd); } } + +/******************************************************************** +Prepares a transaction. */ + +void +trx_prepare_off_kernel( +/*==================*/ + trx_t* trx) /* in: transaction */ +{ + page_t* update_hdr_page; + dulint lsn; + trx_rseg_t* rseg; + trx_undo_t* undo; + ibool must_flush_log = FALSE; + mtr_t mtr; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + rseg = trx->rseg; + + if (trx->insert_undo != NULL || trx->update_undo != NULL) { + + mutex_exit(&kernel_mutex); + + mtr_start(&mtr); + + must_flush_log = TRUE; + + /* Change the undo log segment states from TRX_UNDO_ACTIVE + to some other state: these modifications to the file data + structure define the transaction as prepared in the file + based world, at the serialization point of the log sequence + number lsn obtained below. */ + + mutex_enter(&(rseg->mutex)); + + if (trx->insert_undo != NULL) { + trx_undo_set_state_at_prepare(trx, trx->insert_undo, + &mtr); + } + + undo = trx->update_undo; + + if (undo) { + + /* It is not necessary to obtain trx->undo_mutex here + because only a single OS thread is allowed to do the + transaction prepare for this transaction. */ + + update_hdr_page = trx_undo_set_state_at_prepare(trx, undo, &mtr); + } + + mutex_exit(&(rseg->mutex)); + + /*--------------*/ + mtr_commit(&mtr); + /*--------------*/ + lsn = mtr.end_lsn; + + mutex_enter(&kernel_mutex); + } + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + /*--------------------------------------*/ + trx->conc_state = TRX_PREPARED; + /*--------------------------------------*/ + + if (trx->read_view) { + read_view_close(trx->read_view); + + mem_heap_empty(trx->read_view_heap); + trx->read_view = NULL; + } + + if (must_flush_log) { + + mutex_exit(&kernel_mutex); + + /* Write the log to the log files AND flush them to disk */ + + /*-------------------------------------*/ + + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + + /*-------------------------------------*/ + + mutex_enter(&kernel_mutex); + } +} + +/************************************************************************** +Does the transaction prepare for MySQL. */ + +ulint +trx_prepare_for_mysql( +/*=================*/ + /* out: 0 or error number */ + trx_t* trx) /* in: trx handle */ +{ + /* Because we do not do the prepare by sending an Innobase + sig to the transaction, we must here make sure that trx has been + started. */ + + ut_a(trx); + + trx->op_info = "preparing"; + + trx_start_if_not_started(trx); + + mutex_enter(&kernel_mutex); + + trx_prepare_off_kernel(trx); + + mutex_exit(&kernel_mutex); + + trx->op_info = ""; + + return(0); +} + +/************************************************************************** +This function is used to find number of prepared transactions and +their transaction objects for a recovery. */ + +int +trx_recover_for_mysql( +/*==================*/ + /* out: number of prepared transactions + stored in xid_list */ + XID* xid_list, /* in/out: prepared transactions */ + uint len) /* in: number of slots in xid_list */ +{ + trx_t* trx; + int num_of_transactions = 0; + + ut_ad(xid_list); + ut_ad(len); + + fprintf(stderr, + "InnoDB: Starting recovery for XA transactions...\n"); + + + /* We should set those transactions which are in + the prepared state to the xid_list */ + + mutex_enter(&kernel_mutex); + + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx) { + if (trx->conc_state == TRX_PREPARED) { + xid_list[num_of_transactions] = trx->xid; + + fprintf(stderr, +"InnoDB: Transaction %lu %lu in prepared state after recovery\n", + (ulong) ut_dulint_get_high(trx->id), + (ulong) ut_dulint_get_low(trx->id)); + + fprintf(stderr, +"InnoDB: Transaction contains changes to %lu rows\n", + (ulong)ut_conv_dulint_to_longlong(trx->undo_no)); + + num_of_transactions++; + + if ((uint)num_of_transactions == len ) { + break; + } + } + + trx = UT_LIST_GET_NEXT(trx_list, trx); + } + + mutex_exit(&kernel_mutex); + + fprintf(stderr, + "InnoDB: %d transactions in prepare state after recovery\n", + num_of_transactions); + + return (num_of_transactions); +} + +/*********************************************************************** +This function is used to find one X/Open XA distributed transaction +which is in the prepared state */ + +trx_t * +trx_get_trx_by_xid( +/*===============*/ + /* out: trx or NULL */ + XID* xid) /* in: X/Open XA Transaction Idenfication */ +{ + trx_t* trx; + + if (xid == NULL) { + return (NULL); + } + + mutex_enter(&kernel_mutex); + + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx) { + /* Compare two X/Open XA transaction id's: their + length should be the same and binary comparison + of gtrid_lenght+bqual_length bytes should be + the same */ + + if (xid->gtrid_length == trx->xid.gtrid_length && + xid->bqual_length == trx->xid.bqual_length && + memcmp(xid, &trx->xid, + xid->gtrid_length + + xid->bqual_length) == 0) { + break; + } + + trx = UT_LIST_GET_NEXT(trx_list, trx); + } + + mutex_exit(&kernel_mutex); + + if (trx) { + if (trx->conc_state != TRX_PREPARED) { + return(NULL); + } + + return(trx); + } else { + return(NULL); + } +} + diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index 8d1518753dd..4bfa9c20a54 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -19,6 +19,7 @@ Created 3/26/1996 Heikki Tuuri #include "srv0srv.h" #include "trx0rec.h" #include "trx0purge.h" +#include "trx0xa.h" /* How should the old versions in the history list be managed? ---------------------------------------------------------- @@ -97,6 +98,7 @@ trx_undo_mem_create( TRX_UNDO_UPDATE */ dulint trx_id, /* in: id of the trx for which the undo log is created */ + XID* xid, /* in: X/Open XA transaction identification*/ ulint page_no,/* in: undo log header page number */ ulint offset); /* in: undo log header byte offset on page */ /******************************************************************* @@ -109,6 +111,7 @@ trx_undo_insert_header_reuse( page_t* undo_page, /* in: insert undo log segment header page, x-latched */ dulint trx_id, /* in: transaction id */ + XID* xid, /* in: X/Open XA transaction identification*/ mtr_t* mtr); /* in: mtr */ /************************************************************************** If an update undo log can be discarded immediately, this function frees the @@ -484,6 +487,7 @@ trx_undo_header_create( TRX_UNDO_LOG_HDR_SIZE bytes free space on it */ dulint trx_id, /* in: transaction id */ + XID* xid, /* in: X/Open XA XID */ mtr_t* mtr) /* in: mtr */ { trx_upagef_t* page_hdr; @@ -530,11 +534,25 @@ trx_undo_header_create( mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE); - + /* If X/Open XID exits in the log header we store a + flag of it in upper byte of dict operation flag. */ + + if (xid != NULL || xid->formatID != -1) { + mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, TRUE); + } else { + mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); + } + + mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0); mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log); - + + /* Write X/Open XA transaction identification if exists */ + + if (xid && xid->formatID != -1) { + trx_undo_write_xid(log_hdr, xid); + } + trx_undo_header_create_log(undo_page, trx_id, mtr); return(free); @@ -569,6 +587,11 @@ trx_undo_parse_page_header( mtr_t* mtr) /* in: mtr or NULL */ { dulint trx_id; + XID xid; + + /* Set X/Open XA transaction identification to NULL */ + memset(&xid, 0, sizeof(xid)); + xid.formatID = -1; ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id); @@ -579,10 +602,10 @@ trx_undo_parse_page_header( if (page) { if (type == MLOG_UNDO_HDR_CREATE) { - trx_undo_header_create(page, trx_id, mtr); + trx_undo_header_create(page, trx_id, &xid, mtr); } else { ut_ad(type == MLOG_UNDO_HDR_REUSE); - trx_undo_insert_header_reuse(page, trx_id, mtr); + trx_undo_insert_header_reuse(page, trx_id, &xid, mtr); } } @@ -599,6 +622,7 @@ trx_undo_insert_header_reuse( page_t* undo_page, /* in: insert undo log segment header page, x-latched */ dulint trx_id, /* in: transaction id */ + XID* xid, /* in: X/Open XA transaction identification */ mtr_t* mtr) /* in: mtr */ { trx_upagef_t* page_hdr; @@ -636,8 +660,18 @@ trx_undo_insert_header_reuse( mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE); + /* If X/Open XID exits in the log header we store it + to log header. */ + if (xid && xid->formatID != -1) { + mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, TRUE); + + trx_undo_write_xid(log_hdr, xid); + } else { + mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); + } + + mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr); return(free); @@ -717,6 +751,52 @@ trx_undo_discard_latest_update_undo( trx_undo_discard_latest_log(undo_page, mtr); } +/************************************************************************ +Write X/Open XA Transaction Identification (XID) to undo log header */ + +void +trx_undo_write_xid( +/*===============*/ + trx_ulogf_t* log_hdr,/* in: undo log header */ + XID* xid) /* in: X/Open XA Transaction Identification */ +{ + ulint i; + + mach_write_to_4(log_hdr + TRX_UNDO_XA_FORMAT, xid->formatID); + + mach_write_to_4(log_hdr + TRX_UNDO_XA_TRID_LEN, xid->gtrid_length); + + mach_write_to_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN, xid->bqual_length); + + for(i=0; i < XIDDATASIZE; i++) { + mach_write_to_1(log_hdr + TRX_UNDO_XA_XID + i, + (ulint)(xid->data[i])); + } +} + +/************************************************************************ +Read X/Open XA Transaction Identification (XID) from undo log header */ + +void +trx_undo_read_xid( +/*==============*/ + trx_ulogf_t* log_hdr,/* in: undo log header */ + XID* xid) /* out: X/Open XA Transaction Identification */ +{ + ulint i; + + xid->formatID = mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); + + xid->gtrid_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); + + xid->bqual_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); + + for(i=0; i < XIDDATASIZE; i++) { + xid->data[i] = (char)mach_read_from_1(log_hdr + + TRX_UNDO_XA_XID +i); + } +} + /************************************************************************ Tries to add a page to the undo log segment where the undo log is placed. */ @@ -800,7 +880,6 @@ trx_undo_free_page( list */ ulint space, /* in: space */ ulint hdr_page_no, /* in: header page number */ - ulint hdr_offset, /* in: header offset */ ulint page_no, /* in: page number to free: must not be the header page */ mtr_t* mtr) /* in: mtr which does not have a latch to any @@ -813,7 +892,6 @@ trx_undo_free_page( trx_rsegf_t* rseg_header; ulint hist_size; - UT_NOT_USED(hdr_offset); ut_a(hdr_page_no != page_no); #ifdef UNIV_SYNC_DEBUG ut_ad(!mutex_own(&kernel_mutex)); @@ -870,8 +948,7 @@ trx_undo_free_page_in_rollback( #endif /* UNIV_SYNC_DEBUG */ last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space, - undo->hdr_page_no, undo->hdr_offset, - page_no, mtr); + undo->hdr_page_no, page_no, mtr); undo->last_page_no = last_page_no; undo->size--; @@ -1039,7 +1116,7 @@ loop: trx_undo_empty_header_page(space, hdr_page_no, hdr_offset, &mtr); } else { - trx_undo_free_page(rseg, TRUE, space, hdr_page_no, hdr_offset, + trx_undo_free_page(rseg, TRUE, space, hdr_page_no, page_no, &mtr); } @@ -1123,7 +1200,9 @@ trx_undo_mem_create_at_db_start( fil_addr_t last_addr; page_t* last_page; trx_undo_rec_t* rec; - + XID xid; + ibool xid_exists = FALSE; + if (id >= TRX_RSEG_N_SLOTS) { fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", (ulong) id); @@ -1145,15 +1224,31 @@ trx_undo_mem_create_at_db_start( undo_header = undo_page + offset; trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, mtr); + + xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS, + MLOG_1BYTE, mtr); + + /* Read X/Open XA transaction identification if exists or + set it to NULL. */ + + memset(&xid, 0, sizeof(xid)); + xid.formatID = -1; + + if (xid_exists == TRUE) { + trx_undo_read_xid(undo_header, &xid); + } + mutex_enter(&(rseg->mutex)); - undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset); + undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid, + page_no, offset); mutex_exit(&(rseg->mutex)); - undo->dict_operation = mtr_read_ulint( - undo_header + TRX_UNDO_DICT_OPERATION, - MLOG_2BYTES, mtr); + undo->dict_operation = mtr_read_ulint( + undo_header + TRX_UNDO_DICT_TRANS, + MLOG_1BYTE, mtr); + undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID, mtr); undo->state = state; undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr); @@ -1272,7 +1367,8 @@ trx_undo_mem_create( ulint type, /* in: type of the log: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ dulint trx_id, /* in: id of the trx for which the undo log - is created */ + is created */ + XID* xid, /* in: X/Open transaction identification */ ulint page_no,/* in: undo log header page number */ ulint offset) /* in: undo log header byte offset on page */ { @@ -1295,6 +1391,7 @@ trx_undo_mem_create( undo->state = TRX_UNDO_ACTIVE; undo->del_marks = FALSE; undo->trx_id = trx_id; + undo->xid = *xid; undo->dict_operation = FALSE; @@ -1322,6 +1419,7 @@ trx_undo_mem_init_for_reuse( trx_undo_t* undo, /* in: undo log to init */ dulint trx_id, /* in: id of the trx for which the undo log is created */ + XID* xid, /* in: X/Open XA transaction identification*/ ulint offset) /* in: undo log header byte offset on page */ { #ifdef UNIV_SYNC_DEBUG @@ -1339,6 +1437,7 @@ trx_undo_mem_init_for_reuse( undo->state = TRX_UNDO_ACTIVE; undo->del_marks = FALSE; undo->trx_id = trx_id; + undo->xid = *xid; undo->dict_operation = FALSE; @@ -1376,6 +1475,7 @@ trx_undo_create( TRX_UNDO_UPDATE */ dulint trx_id, /* in: id of the trx for which the undo log is created */ + XID* xid, /* in: X/Open transaction identification*/ mtr_t* mtr) /* in: mtr */ { trx_rsegf_t* rseg_header; @@ -1410,9 +1510,10 @@ trx_undo_create( page_no = buf_frame_get_page_no(undo_page); - offset = trx_undo_header_create(undo_page, trx_id, mtr); + offset = trx_undo_header_create(undo_page, trx_id, xid, mtr); - undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset); + undo = trx_undo_mem_create(rseg, id, type, trx_id, xid , + page_no, offset); return(undo); } @@ -1432,6 +1533,7 @@ trx_undo_reuse_cached( TRX_UNDO_UPDATE */ dulint trx_id, /* in: id of the trx for which the undo log is used */ + XID* xid, /* in: X/Open XA transaction identification*/ mtr_t* mtr) /* in: mtr */ { trx_undo_t* undo; @@ -1475,16 +1577,17 @@ trx_undo_reuse_cached( undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); if (type == TRX_UNDO_INSERT) { - offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); + offset = trx_undo_insert_header_reuse(undo_page, trx_id, + xid, mtr); } else { ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); - offset = trx_undo_header_create(undo_page, trx_id, mtr); + offset = trx_undo_header_create(undo_page, trx_id, xid, mtr); } - trx_undo_mem_init_for_reuse(undo, trx_id, offset); + trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); return(undo); } @@ -1506,9 +1609,10 @@ trx_undo_mark_as_dict_operation( hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); - mlog_write_ulint(hdr_page + undo->hdr_offset + TRX_UNDO_DICT_OPERATION, - trx->dict_operation, MLOG_2BYTES, mtr); - + mlog_write_ulint(hdr_page + undo->hdr_offset + + TRX_UNDO_DICT_TRANS, + trx->dict_operation, MLOG_1BYTE, mtr); + mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, trx->table_id, mtr); @@ -1548,10 +1652,10 @@ trx_undo_assign_undo( #endif /* UNIV_SYNC_DEBUG */ mutex_enter(&(rseg->mutex)); - undo = trx_undo_reuse_cached(rseg, type, trx->id, &mtr); + undo = trx_undo_reuse_cached(rseg, type, trx->id, &trx->xid, &mtr); if (undo == NULL) { - undo = trx_undo_create(rseg, type, trx->id, &mtr); + undo = trx_undo_create(rseg, type, trx->id, &trx->xid, &mtr); if (undo == NULL) { /* Did not succeed */ @@ -1632,6 +1736,56 @@ trx_undo_set_state_at_finish( return(undo_page); } +/********************************************************************** +Sets the state of the undo log segment at a transaction prepare. */ + +page_t* +trx_undo_set_state_at_prepare( +/*==========================*/ + /* out: undo log segment header page, + x-latched */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + mtr_t* mtr) /* in: mtr */ +{ + trx_usegf_t* seg_hdr; + trx_upagef_t* page_hdr; + trx_ulogf_t* undo_header; + page_t* undo_page; + ulint offset; + + ut_ad(trx && undo && mtr); + + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", + (ulong) undo->id); + mem_analyze_corruption((byte*)undo); + ut_error; + } + + undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); + + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + + /*------------------------------*/ + undo->state = TRX_UNDO_PREPARED; + undo->xid = trx->xid; + /*------------------------------*/ + + mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state, + MLOG_2BYTES, mtr); + + offset = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); + undo_header = undo_page + offset; + + mlog_write_ulint(undo_header + TRX_UNDO_XID_EXISTS, + TRUE, MLOG_1BYTE, mtr); + + trx_undo_write_xid(undo_header, &undo->xid); + return(undo_page); +} + /************************************************************************** Adds the update undo log header as the first in the history list, and frees the memory object, or puts it to the list of cached update undo log diff --git a/mysql-test/r/archive.result b/mysql-test/r/archive.result index 4adb8a5410e..e215e72d43f 100644 --- a/mysql-test/r/archive.result +++ b/mysql-test/r/archive.result @@ -2601,4 +2601,1215 @@ auto fld1 companynr fld3 fld4 fld5 fld6 2 011401 37 breaking dreaded Steinberg W 3 011402 37 Romans scholastics jarring 4 011403 37 intercepted audiology tinily +INSERT INTO t2 VALUES (1,000001,00,'Omaha','teethe','neat','') , (2,011401,37,'breaking','dreaded','Steinberg','W') , (3,011402,37,'Romans','scholastics','jarring','') , (4,011403,37,'intercepted','audiology','tinily',''); +SELECT * FROM t2; +auto fld1 companynr fld3 fld4 fld5 fld6 +1 000001 00 Omaha teethe neat +2 011401 37 breaking dreaded Steinberg W +3 011402 37 Romans scholastics jarring +4 011403 37 intercepted audiology tinily +5 011501 37 bewilderingly wallet balled +6 011701 37 astound parters persist W +7 011702 37 admonishing eschew attainments +8 011703 37 sumac quitter fanatic +9 012001 37 flanking neat measures FAS +10 012003 37 combed Steinberg rightfulness +11 012004 37 subjective jarring capably +12 012005 37 scatterbrain tinily impulsive +13 012301 37 Eulerian balled starlet +14 012302 36 dubbed persist terminators +15 012303 37 Kane attainments untying +16 012304 37 overlay fanatic announces FAS +17 012305 37 perturb measures featherweight FAS +18 012306 37 goblins rightfulness pessimist FAS +19 012501 37 annihilates capably daughter +20 012602 37 Wotan impulsive decliner FAS +21 012603 37 snatching starlet lawgiver +22 012604 37 concludes terminators stated +23 012605 37 laterally untying readable +24 012606 37 yelped announces attrition +25 012701 37 grazing featherweight cascade FAS +26 012702 37 Baird pessimist motors FAS +27 012703 37 celery daughter interrogate +28 012704 37 misunderstander decliner pests W +29 013601 37 handgun lawgiver stairway +30 013602 37 foldout stated dopers FAS +31 013603 37 mystic readable testicle W +32 013604 37 succumbed attrition Parsifal W +33 013605 37 Nabisco cascade leavings +34 013606 37 fingerings motors postulation W +35 013607 37 aging interrogate squeaking +36 013608 37 afield pests contrasted +37 013609 37 ammonium stairway leftover +38 013610 37 boat dopers whiteners +39 013801 37 intelligibility testicle erases W +40 013802 37 Augustine Parsifal Punjab W +41 013803 37 teethe leavings Merritt +42 013804 37 dreaded postulation Quixotism +43 013901 37 scholastics squeaking sweetish FAS +44 016001 37 audiology contrasted dogging FAS +45 016201 37 wallet leftover scornfully FAS +46 016202 37 parters whiteners bellow +47 016301 37 eschew erases bills +48 016302 37 quitter Punjab cupboard FAS +49 016303 37 neat Merritt sureties FAS +50 016304 37 Steinberg Quixotism puddings +51 018001 37 jarring sweetish tapestry +52 018002 37 tinily dogging fetters +53 018003 37 balled scornfully bivalves +54 018004 37 persist bellow incurring +55 018005 37 attainments bills Adolph +56 018007 37 fanatic cupboard pithed +57 018008 37 measures sureties emergency +58 018009 37 rightfulness puddings Miles +59 018010 37 capably tapestry trimmings +60 018012 37 impulsive fetters tragedies W +61 018013 37 starlet bivalves skulking W +62 018014 37 terminators incurring flint +63 018015 37 untying Adolph flopping W +64 018016 37 announces pithed relaxing FAS +65 018017 37 featherweight emergency offload FAS +66 018018 37 pessimist Miles suites W +67 018019 37 daughter trimmings lists FAS +68 018020 37 decliner tragedies animized FAS +69 018021 37 lawgiver skulking multilayer W +70 018022 37 stated flint standardizes FAS +71 018023 37 readable flopping Judas +72 018024 37 attrition relaxing vacuuming W +73 018025 37 cascade offload dentally W +74 018026 37 motors suites humanness W +75 018027 37 interrogate lists inch W +76 018028 37 pests animized Weissmuller W +77 018029 37 stairway multilayer irresponsibly W +78 018030 37 dopers standardizes luckily FAS +79 018032 37 testicle Judas culled W +80 018033 37 Parsifal vacuuming medical FAS +81 018034 37 leavings dentally bloodbath FAS +82 018035 37 postulation humanness subschema W +83 018036 37 squeaking inch animals W +84 018037 37 contrasted Weissmuller Micronesia +85 018038 37 leftover irresponsibly repetitions +86 018039 37 whiteners luckily Antares +87 018040 37 erases culled ventilate W +88 018041 37 Punjab medical pityingly +89 018042 37 Merritt bloodbath interdependent +90 018043 37 Quixotism subschema Graves FAS +91 018044 37 sweetish animals neonatal +92 018045 37 dogging Micronesia scribbled FAS +93 018046 37 scornfully repetitions chafe W +94 018048 37 bellow Antares honoring +95 018049 37 bills ventilate realtor +96 018050 37 cupboard pityingly elite +97 018051 37 sureties interdependent funereal +98 018052 37 puddings Graves abrogating +99 018053 50 tapestry neonatal sorters +100 018054 37 fetters scribbled Conley +101 018055 37 bivalves chafe lectured +102 018056 37 incurring honoring Abraham +103 018057 37 Adolph realtor Hawaii W +104 018058 37 pithed elite cage +105 018059 36 emergency funereal hushes +106 018060 37 Miles abrogating Simla +107 018061 37 trimmings sorters reporters +108 018101 37 tragedies Conley Dutchman FAS +109 018102 37 skulking lectured descendants FAS +110 018103 37 flint Abraham groupings FAS +111 018104 37 flopping Hawaii dissociate +112 018201 37 relaxing cage coexist W +113 018202 37 offload hushes Beebe +114 018402 37 suites Simla Taoism +115 018403 37 lists reporters Connally +116 018404 37 animized Dutchman fetched FAS +117 018405 37 multilayer descendants checkpoints FAS +118 018406 37 standardizes groupings rusting +119 018409 37 Judas dissociate galling +120 018601 37 vacuuming coexist obliterates +121 018602 37 dentally Beebe traitor +122 018603 37 humanness Taoism resumes FAS +123 018801 37 inch Connally analyzable FAS +124 018802 37 Weissmuller fetched terminator FAS +125 018803 37 irresponsibly checkpoints gritty FAS +126 018804 37 luckily rusting firearm W +127 018805 37 culled galling minima +128 018806 37 medical obliterates Selfridge +129 018807 37 bloodbath traitor disable +130 018808 37 subschema resumes witchcraft W +131 018809 37 animals analyzable betroth W +132 018810 37 Micronesia terminator Manhattanize +133 018811 37 repetitions gritty imprint +134 018812 37 Antares firearm peeked +135 019101 37 ventilate minima swelling +136 019102 37 pityingly Selfridge interrelationships W +137 019103 37 interdependent disable riser +138 019201 37 Graves witchcraft Gandhian W +139 030501 37 neonatal betroth peacock A +140 030502 50 scribbled Manhattanize bee A +141 030503 37 chafe imprint kanji +142 030504 37 honoring peeked dental +143 031901 37 realtor swelling scarf FAS +144 036001 37 elite interrelationships chasm A +145 036002 37 funereal riser insolence A +146 036004 37 abrogating Gandhian syndicate +147 036005 37 sorters peacock alike +148 038001 37 Conley bee imperial A +149 038002 37 lectured kanji convulsion A +150 038003 37 Abraham dental railway A +151 038004 37 Hawaii scarf validate A +152 038005 37 cage chasm normalizes A +153 038006 37 hushes insolence comprehensive +154 038007 37 Simla syndicate chewing +155 038008 37 reporters alike denizen +156 038009 37 Dutchman imperial schemer +157 038010 37 descendants convulsion chronicle +158 038011 37 groupings railway Kline +159 038012 37 dissociate validate Anatole +160 038013 37 coexist normalizes partridges +161 038014 37 Beebe comprehensive brunch +162 038015 37 Taoism chewing recruited +163 038016 37 Connally denizen dimensions W +164 038017 37 fetched schemer Chicana W +165 038018 37 checkpoints chronicle announced +166 038101 37 rusting Kline praised FAS +167 038102 37 galling Anatole employing +168 038103 37 obliterates partridges linear +169 038104 37 traitor brunch quagmire +170 038201 37 resumes recruited western A +171 038202 37 analyzable dimensions relishing +172 038203 37 terminator Chicana serving A +173 038204 37 gritty announced scheduling +174 038205 37 firearm praised lore +175 038206 37 minima employing eventful +176 038208 37 Selfridge linear arteriole A +177 042801 37 disable quagmire disentangle +178 042802 37 witchcraft western cured A +179 046101 37 betroth relishing Fenton W +180 048001 37 Manhattanize serving avoidable A +181 048002 37 imprint scheduling drains A +182 048003 37 peeked lore detectably FAS +183 048004 37 swelling eventful husky +184 048005 37 interrelationships arteriole impelling +185 048006 37 riser disentangle undoes +186 048007 37 Gandhian cured evened +187 048008 37 peacock Fenton squeezes +188 048101 37 bee avoidable destroyer FAS +189 048102 37 kanji drains rudeness +190 048201 37 dental detectably beaner FAS +191 048202 37 scarf husky boorish +192 048203 37 chasm impelling Everhart +193 048204 37 insolence undoes encompass A +194 048205 37 syndicate evened mushrooms +195 048301 37 alike squeezes Alison A +196 048302 37 imperial destroyer externally FAS +197 048303 37 convulsion rudeness pellagra +198 048304 37 railway beaner cult +199 048305 37 validate boorish creek A +200 048401 37 normalizes Everhart Huffman +201 048402 37 comprehensive encompass Majorca FAS +202 048403 37 chewing mushrooms governing A +203 048404 37 denizen Alison gadfly FAS +204 048405 37 schemer externally reassigned FAS +205 048406 37 chronicle pellagra intentness W +206 048407 37 Kline cult craziness +207 048408 37 Anatole creek psychic +208 048409 37 partridges Huffman squabbled +209 048410 37 brunch Majorca burlesque +210 048411 37 recruited governing capped +211 048412 37 dimensions gadfly extracted A +212 048413 37 Chicana reassigned DiMaggio +213 048601 37 announced intentness exclamation FAS +214 048602 37 praised craziness subdirectory +215 048603 37 employing psychic fangs +216 048604 37 linear squabbled buyer A +217 048801 37 quagmire burlesque pithing A +218 050901 37 western capped transistorizing A +219 051201 37 relishing extracted nonbiodegradable +220 056002 37 serving DiMaggio dislocate +221 056003 37 scheduling exclamation monochromatic FAS +222 056004 37 lore subdirectory batting +223 056102 37 eventful fangs postcondition A +224 056203 37 arteriole buyer catalog FAS +225 056204 37 disentangle pithing Remus +226 058003 37 cured transistorizing devices A +227 058004 37 Fenton nonbiodegradable bike A +228 058005 37 avoidable dislocate qualify +229 058006 37 drains monochromatic detained +230 058007 37 detectably batting commended +231 058101 37 husky postcondition civilize +232 058102 37 impelling catalog Elmhurst +233 058103 37 undoes Remus anesthetizing +234 058105 37 evened devices deaf +235 058111 37 squeezes bike Brigham +236 058112 37 destroyer qualify title +237 058113 37 rudeness detained coarse +238 058114 37 beaner commended combinations +239 058115 37 boorish civilize grayness +240 058116 37 Everhart Elmhurst innumerable FAS +241 058117 37 encompass anesthetizing Caroline A +242 058118 37 mushrooms deaf fatty FAS +243 058119 37 Alison Brigham eastbound +244 058120 37 externally title inexperienced +245 058121 37 pellagra coarse hoarder A +246 058122 37 cult combinations scotch W +247 058123 37 creek grayness passport A +248 058124 37 Huffman innumerable strategic FAS +249 058125 37 Majorca Caroline gated +250 058126 37 governing fatty flog +251 058127 37 gadfly eastbound Pipestone +252 058128 37 reassigned inexperienced Dar +253 058201 37 intentness hoarder Corcoran +254 058202 37 craziness scotch flyers A +255 058303 37 psychic passport competitions W +256 058304 37 squabbled strategic suppliers FAS +257 058602 37 burlesque gated skips +258 058603 37 capped flog institutes +259 058604 37 extracted Pipestone troop A +260 058605 37 DiMaggio Dar connective W +261 058606 37 exclamation Corcoran denies +262 058607 37 subdirectory flyers polka +263 060401 36 fangs competitions observations FAS +264 061701 36 buyer suppliers askers +265 066201 36 pithing skips homeless FAS +266 066501 36 transistorizing institutes Anna +267 068001 36 nonbiodegradable troop subdirectories W +268 068002 36 dislocate connective decaying FAS +269 068005 36 monochromatic denies outwitting W +270 068006 36 batting polka Harpy W +271 068007 36 postcondition observations crazed +272 068008 36 catalog askers suffocate +273 068009 36 Remus homeless provers FAS +274 068010 36 devices Anna technically +275 068011 36 bike subdirectories Franklinizations +276 068202 36 qualify decaying considered +277 068302 36 detained outwitting tinnily +278 068303 36 commended Harpy uninterruptedly +279 068401 36 civilize crazed whistled A +280 068501 36 Elmhurst suffocate automate +281 068502 36 anesthetizing provers gutting W +282 068503 36 deaf technically surreptitious +283 068602 36 Brigham Franklinizations Choctaw +284 068603 36 title considered cooks +285 068701 36 coarse tinnily millivolt FAS +286 068702 36 combinations uninterruptedly counterpoise +287 068703 36 grayness whistled Gothicism +288 076001 36 innumerable automate feminine +289 076002 36 Caroline gutting metaphysically W +290 076101 36 fatty surreptitious sanding A +291 076102 36 eastbound Choctaw contributorily +292 076103 36 inexperienced cooks receivers FAS +293 076302 36 hoarder millivolt adjourn +294 076303 36 scotch counterpoise straggled A +295 076304 36 passport Gothicism druggists +296 076305 36 strategic feminine thanking FAS +297 076306 36 gated metaphysically ostrich +298 076307 36 flog sanding hopelessness FAS +299 076402 36 Pipestone contributorily Eurydice +300 076501 36 Dar receivers excitation W +301 076502 36 Corcoran adjourn presumes FAS +302 076701 36 flyers straggled imaginable FAS +303 078001 36 competitions druggists concoct W +304 078002 36 suppliers thanking peering W +305 078003 36 skips ostrich Phelps FAS +306 078004 36 institutes hopelessness ferociousness FAS +307 078005 36 troop Eurydice sentences +308 078006 36 connective excitation unlocks +309 078007 36 denies presumes engrossing W +310 078008 36 polka imaginable Ruth +311 078101 36 observations concoct tying +312 078103 36 askers peering exclaimers +313 078104 36 homeless Phelps synergy +314 078105 36 Anna ferociousness Huey W +315 082101 36 subdirectories sentences merging +316 083401 36 decaying unlocks judges A +317 084001 36 outwitting engrossing Shylock W +318 084002 36 Harpy Ruth Miltonism +319 086001 36 crazed tying hen W +320 086102 36 suffocate exclaimers honeybee FAS +321 086201 36 provers synergy towers +322 088001 36 technically Huey dilutes W +323 088002 36 Franklinizations merging numerals FAS +324 088003 36 considered judges democracy FAS +325 088004 36 tinnily Shylock Ibero- +326 088101 36 uninterruptedly Miltonism invalids +327 088102 36 whistled hen behavior +328 088103 36 automate honeybee accruing +329 088104 36 gutting towers relics A +330 088105 36 surreptitious dilutes rackets +331 088106 36 Choctaw numerals Fischbein W +332 088201 36 cooks democracy phony W +333 088203 36 millivolt Ibero- cross FAS +334 088204 36 counterpoise invalids cleanup +335 088302 37 Gothicism behavior conspirator +336 088303 37 feminine accruing label FAS +337 088305 37 metaphysically relics university +338 088402 37 sanding rackets cleansed FAS +339 088501 36 contributorily Fischbein ballgown +340 088502 36 receivers phony starlet +341 088503 36 adjourn cross aqueous +342 098001 58 straggled cleanup portrayal A +343 098002 58 druggists conspirator despising W +344 098003 58 thanking label distort W +345 098004 58 ostrich university palmed +346 098005 58 hopelessness cleansed faced +347 098006 58 Eurydice ballgown silverware +348 141903 29 excitation starlet assessor +349 098008 58 presumes aqueous spiders +350 098009 58 imaginable portrayal artificially +351 098010 58 concoct despising reminiscence +352 098011 58 peering distort Mexican +353 098012 58 Phelps palmed obnoxious +354 098013 58 ferociousness faced fragile +355 098014 58 sentences silverware apprehensible +356 098015 58 unlocks assessor births +357 098016 58 engrossing spiders garages +358 098017 58 Ruth artificially panty +359 098018 58 tying reminiscence anteater +360 098019 58 exclaimers Mexican displacement A +361 098020 58 synergy obnoxious drovers A +362 098021 58 Huey fragile patenting A +363 098022 58 merging apprehensible far A +364 098023 58 judges births shrieks +365 098024 58 Shylock garages aligning W +366 098025 37 Miltonism panty pragmatism +367 106001 36 hen anteater fevers W +368 108001 36 honeybee displacement reexamines A +369 108002 36 towers drovers occupancies +370 108003 36 dilutes patenting sweats FAS +371 108004 36 numerals far modulators +372 108005 36 democracy shrieks demand W +373 108007 36 Ibero- aligning Madeira +374 108008 36 invalids pragmatism Viennese W +375 108009 36 behavior fevers chillier W +376 108010 36 accruing reexamines wildcats FAS +377 108011 36 relics occupancies gentle +378 108012 36 rackets sweats Angles W +379 108101 36 Fischbein modulators accuracies +380 108102 36 phony demand toggle +381 108103 36 cross Madeira Mendelssohn W +382 108111 50 cleanup Viennese behaviorally +383 108105 36 conspirator chillier Rochford +384 108106 36 label wildcats mirror W +385 108107 36 university gentle Modula +386 108108 50 cleansed Angles clobbering +387 108109 36 ballgown accuracies chronography +388 108110 36 starlet toggle Eskimoizeds +389 108201 36 aqueous Mendelssohn British W +390 108202 36 portrayal behaviorally pitfalls +391 108203 36 despising Rochford verify W +392 108204 36 distort mirror scatter FAS +393 108205 36 palmed Modula Aztecan +394 108301 36 faced clobbering acuity W +395 108302 36 silverware chronography sinking W +396 112101 36 assessor Eskimoizeds beasts FAS +397 112102 36 spiders British Witt W +398 113701 36 artificially pitfalls physicists FAS +399 116001 36 reminiscence verify folksong A +400 116201 36 Mexican scatter strokes FAS +401 116301 36 obnoxious Aztecan crowder +402 116302 36 fragile acuity merry +403 116601 36 apprehensible sinking cadenced +404 116602 36 births beasts alimony A +405 116603 36 garages Witt principled A +406 116701 36 panty physicists golfing +407 116702 36 anteater folksong undiscovered +408 118001 36 displacement strokes irritates +409 118002 36 drovers crowder patriots A +410 118003 36 patenting merry rooms FAS +411 118004 36 far cadenced towering W +412 118005 36 shrieks alimony displease +413 118006 36 aligning principled photosensitive +414 118007 36 pragmatism golfing inking +415 118008 36 fevers undiscovered gainers +416 118101 36 reexamines irritates leaning A +417 118102 36 occupancies patriots hydrant A +418 118103 36 sweats rooms preserve +419 118202 36 modulators towering blinded A +420 118203 36 demand displease interactions A +421 118204 36 Madeira photosensitive Barry +422 118302 36 Viennese inking whiteness A +423 118304 36 chillier gainers pastimes W +424 118305 36 wildcats leaning Edenization +425 118306 36 gentle hydrant Muscat +426 118307 36 Angles preserve assassinated +427 123101 36 accuracies blinded labeled +428 123102 36 toggle interactions glacial A +429 123301 36 Mendelssohn Barry implied W +430 126001 36 behaviorally whiteness bibliographies W +431 126002 36 Rochford pastimes Buchanan +432 126003 36 mirror Edenization forgivably FAS +433 126101 36 Modula Muscat innuendo A +434 126301 36 clobbering assassinated den FAS +435 126302 36 chronography labeled submarines W +436 126402 36 Eskimoizeds glacial mouthful A +437 126601 36 British implied expiring +438 126602 36 pitfalls bibliographies unfulfilled FAS +439 126702 36 verify Buchanan precession +440 128001 36 scatter forgivably nullified +441 128002 36 Aztecan innuendo affects +442 128003 36 acuity den Cynthia +443 128004 36 sinking submarines Chablis A +444 128005 36 beasts mouthful betterments FAS +445 128007 36 Witt expiring advertising +446 128008 36 physicists unfulfilled rubies A +447 128009 36 folksong precession southwest FAS +448 128010 36 strokes nullified superstitious A +449 128011 36 crowder affects tabernacle W +450 128012 36 merry Cynthia silk A +451 128013 36 cadenced Chablis handsomest A +452 128014 36 alimony betterments Persian A +453 128015 36 principled advertising analog W +454 128016 36 golfing rubies complex W +455 128017 36 undiscovered southwest Taoist +456 128018 36 irritates superstitious suspend +457 128019 36 patriots tabernacle relegated +458 128020 36 rooms silk awesome W +459 128021 36 towering handsomest Bruxelles +460 128022 36 displease Persian imprecisely A +461 128023 36 photosensitive analog televise +462 128101 36 inking complex braking +463 128102 36 gainers Taoist true FAS +464 128103 36 leaning suspend disappointing FAS +465 128104 36 hydrant relegated navally W +466 128106 36 preserve awesome circus +467 128107 36 blinded Bruxelles beetles +468 128108 36 interactions imprecisely trumps +469 128202 36 Barry televise fourscore W +470 128203 36 whiteness braking Blackfoots +471 128301 36 pastimes true Grady +472 128302 36 Edenization disappointing quiets FAS +473 128303 36 Muscat navally floundered FAS +474 128304 36 assassinated circus profundity W +475 128305 36 labeled beetles Garrisonian W +476 128307 36 glacial trumps Strauss +477 128401 36 implied fourscore cemented FAS +478 128502 36 bibliographies Blackfoots contrition A +479 128503 36 Buchanan Grady mutations +480 128504 36 forgivably quiets exhibits W +481 128505 36 innuendo floundered tits +482 128601 36 den profundity mate A +483 128603 36 submarines Garrisonian arches +484 128604 36 mouthful Strauss Moll +485 128702 36 expiring cemented ropers +486 128703 36 unfulfilled contrition bombast +487 128704 36 precession mutations difficultly A +488 138001 36 nullified exhibits adsorption +489 138002 36 affects tits definiteness FAS +490 138003 36 Cynthia mate cultivation A +491 138004 36 Chablis arches heals A +492 138005 36 betterments Moll Heusen W +493 138006 36 advertising ropers target FAS +494 138007 36 rubies bombast cited A +495 138008 36 southwest difficultly congresswoman W +496 138009 36 superstitious adsorption Katherine +497 138102 36 tabernacle definiteness titter A +498 138103 36 silk cultivation aspire A +499 138104 36 handsomest heals Mardis +500 138105 36 Persian Heusen Nadia W +501 138201 36 analog target estimating FAS +502 138302 36 complex cited stuck A +503 138303 36 Taoist congresswoman fifteenth A +504 138304 36 suspend Katherine Colombo +505 138401 29 relegated titter survey A +506 140102 29 awesome aspire staffing +507 140103 29 Bruxelles Mardis obtain +508 140104 29 imprecisely Nadia loaded +509 140105 29 televise estimating slaughtered +510 140201 29 braking stuck lights A +511 140701 29 true fifteenth circumference +512 141501 29 disappointing Colombo dull A +513 141502 29 navally survey weekly A +514 141901 29 circus staffing wetness +515 141902 29 beetles obtain visualized +516 142101 29 trumps loaded Tannenbaum +517 142102 29 fourscore slaughtered moribund +518 142103 29 Blackfoots lights demultiplex +519 142701 29 Grady circumference lockings +520 143001 29 quiets dull thugs FAS +521 143501 29 floundered weekly unnerves +522 143502 29 profundity wetness abut +523 148001 29 Garrisonian visualized Chippewa A +524 148002 29 Strauss Tannenbaum stratifications A +525 148003 29 cemented moribund signaled +526 148004 29 contrition demultiplex Italianizes A +527 148005 29 mutations lockings algorithmic A +528 148006 29 exhibits thugs paranoid FAS +529 148007 29 tits unnerves camping A +530 148009 29 mate abut signifying A +531 148010 29 arches Chippewa Patrice W +532 148011 29 Moll stratifications search A +533 148012 29 ropers signaled Angeles A +534 148013 29 bombast Italianizes semblance +535 148023 36 difficultly algorithmic taxed +536 148015 29 adsorption paranoid Beatrice +537 148016 29 definiteness camping retrace +538 148017 29 cultivation signifying lockout +539 148018 29 heals Patrice grammatic +540 148019 29 Heusen search helmsman +541 148020 29 target Angeles uniform W +542 148021 29 cited semblance hamming +543 148022 29 congresswoman taxed disobedience +544 148101 29 Katherine Beatrice captivated A +545 148102 29 titter retrace transferals A +546 148201 29 aspire lockout cartographer A +547 148401 29 Mardis grammatic aims FAS +548 148402 29 Nadia helmsman Pakistani +549 148501 29 estimating uniform burglarized FAS +550 148502 29 stuck hamming saucepans A +551 148503 29 fifteenth disobedience lacerating A +552 148504 29 Colombo captivated corny +553 148601 29 survey transferals megabytes FAS +554 148602 29 staffing cartographer chancellor +555 150701 29 obtain aims bulk A +556 152101 29 loaded Pakistani commits A +557 152102 29 slaughtered burglarized meson W +558 155202 36 lights saucepans deputies +559 155203 29 circumference lacerating northeaster A +560 155204 29 dull corny dipole +561 155205 29 weekly megabytes machining 0 +562 156001 29 wetness chancellor therefore +563 156002 29 visualized bulk Telefunken +564 156102 29 Tannenbaum commits salvaging +565 156301 29 moribund meson Corinthianizes A +566 156302 29 demultiplex deputies restlessly A +567 156303 29 lockings northeaster bromides +568 156304 29 thugs dipole generalized A +569 156305 29 unnerves machining mishaps +570 156306 29 abut therefore quelling +571 156501 29 Chippewa Telefunken spiritual A +572 158001 29 stratifications salvaging beguiles FAS +573 158002 29 signaled Corinthianizes Trobriand FAS +574 158101 29 Italianizes restlessly fleeing A +575 158102 29 algorithmic bromides Armour A +576 158103 29 paranoid generalized chin A +577 158201 29 camping mishaps provers A +578 158202 29 signifying quelling aeronautic A +579 158203 29 Patrice spiritual voltage W +580 158204 29 search beguiles sash +581 158301 29 Angeles Trobriand anaerobic A +582 158302 29 semblance fleeing simultaneous A +583 158303 29 taxed Armour accumulating A +584 158304 29 Beatrice chin Medusan A +585 158305 29 retrace provers shouted A +586 158306 29 lockout aeronautic freakish +587 158501 29 grammatic voltage index FAS +588 160301 29 helmsman sash commercially +589 166101 50 uniform anaerobic mistiness A +590 166102 50 hamming simultaneous endpoint +591 168001 29 disobedience accumulating straight A +592 168002 29 captivated Medusan flurried +593 168003 29 transferals shouted denotative A +594 168101 29 cartographer freakish coming FAS +595 168102 29 aims index commencements FAS +596 168103 29 Pakistani commercially gentleman +597 168104 29 burglarized mistiness gifted +598 168202 29 saucepans endpoint Shanghais +599 168301 29 lacerating straight sportswriting A +600 168502 29 corny flurried sloping A +601 168503 29 megabytes denotative navies +602 168601 29 chancellor coming leaflet A +603 173001 40 bulk commencements shooter +604 173701 40 commits gentleman Joplin FAS +605 173702 40 meson gifted babies +606 176001 40 deputies Shanghais subdivision FAS +607 176101 40 northeaster sportswriting burstiness W +608 176201 40 dipole sloping belted FAS +609 176401 40 machining navies assails FAS +610 176501 40 therefore leaflet admiring W +611 176601 40 Telefunken shooter swaying 0 +612 176602 40 salvaging Joplin Goldstine FAS +613 176603 40 Corinthianizes babies fitting +614 178001 40 restlessly subdivision Norwalk W +615 178002 40 bromides burstiness weakening W +616 178003 40 generalized belted analogy FAS +617 178004 40 mishaps assails deludes +618 178005 40 quelling admiring cokes +619 178006 40 spiritual swaying Clayton +620 178007 40 beguiles Goldstine exhausts +621 178008 40 Trobriand fitting causality +622 178101 40 fleeing Norwalk sating FAS +623 178102 40 Armour weakening icon +624 178103 40 chin analogy throttles +625 178201 40 provers deludes communicants FAS +626 178202 40 aeronautic cokes dehydrate FAS +627 178301 40 voltage Clayton priceless FAS +628 178302 40 sash exhausts publicly +629 178401 40 anaerobic causality incidentals FAS +630 178402 40 simultaneous sating commonplace +631 178403 40 accumulating icon mumbles +632 178404 40 Medusan throttles furthermore W +633 178501 40 shouted communicants cautioned W +634 186002 37 freakish dehydrate parametrized A +635 186102 37 index priceless registration A +636 186201 40 commercially publicly sadly FAS +637 186202 40 mistiness incidentals positioning +638 186203 40 endpoint commonplace babysitting +639 186302 37 straight mumbles eternal A +640 188007 37 flurried furthermore hoarder +641 188008 37 denotative cautioned congregates +642 188009 37 coming parametrized rains +643 188010 37 commencements registration workers W +644 188011 37 gentleman sadly sags A +645 188012 37 gifted positioning unplug W +646 188013 37 Shanghais babysitting garage A +647 188014 37 sportswriting eternal boulder A +648 188015 37 sloping hoarder hollowly A +649 188016 37 navies congregates specifics +650 188017 37 leaflet rains Teresa +651 188102 37 shooter workers Winsett +652 188103 37 Joplin sags convenient A +653 188202 37 babies unplug buckboards FAS +654 188301 40 subdivision garage amenities +655 188302 40 burstiness boulder resplendent FAS +656 188303 40 belted hollowly priding FAS +657 188401 37 assails specifics configurations +658 188402 37 admiring Teresa untidiness A +659 188503 37 swaying Winsett Brice W +660 188504 37 Goldstine convenient sews FAS +661 188505 37 fitting buckboards participated +662 190701 37 Norwalk amenities Simon FAS +663 190703 50 weakening resplendent certificates +664 191701 37 analogy priding Fitzpatrick +665 191702 37 deludes configurations Evanston A +666 191703 37 cokes untidiness misted +667 196001 37 Clayton Brice textures A +668 196002 37 exhausts sews save +669 196003 37 causality participated count +670 196101 37 sating Simon rightful A +671 196103 37 icon certificates chaperone +672 196104 37 throttles Fitzpatrick Lizzy A +673 196201 37 communicants Evanston clenched A +674 196202 37 dehydrate misted effortlessly +675 196203 37 priceless textures accessed +676 198001 37 publicly save beaters A +677 198003 37 incidentals count Hornblower FAS +678 198004 37 commonplace rightful vests A +679 198005 37 mumbles chaperone indulgences FAS +680 198006 37 furthermore Lizzy infallibly A +681 198007 37 cautioned clenched unwilling FAS +682 198008 37 parametrized effortlessly excrete FAS +683 198009 37 registration accessed spools A +684 198010 37 sadly beaters crunches FAS +685 198011 37 positioning Hornblower overestimating FAS +686 198012 37 babysitting vests ineffective +687 198013 37 eternal indulgences humiliation A +688 198014 37 hoarder infallibly sophomore +689 198015 37 congregates unwilling star +690 198017 37 rains excrete rifles +691 198018 37 workers spools dialysis +692 198019 37 sags crunches arriving +693 198020 37 unplug overestimating indulge +694 198021 37 garage ineffective clockers +695 198022 37 boulder humiliation languages +696 198023 50 hollowly sophomore Antarctica A +697 198024 37 specifics star percentage +698 198101 37 Teresa rifles ceiling A +699 198103 37 Winsett dialysis specification +700 198105 37 convenient arriving regimented A +701 198106 37 buckboards indulge ciphers +702 198201 37 amenities clockers pictures A +703 198204 37 resplendent languages serpents A +704 198301 53 priding Antarctica allot A +705 198302 53 configurations percentage realized A +706 198303 53 untidiness ceiling mayoral A +707 198304 53 Brice specification opaquely A +708 198401 37 sews regimented hostess FAS +709 198402 37 participated ciphers fiftieth +710 198403 37 Simon pictures incorrectly +711 202101 37 certificates serpents decomposition FAS +712 202301 37 Fitzpatrick allot stranglings +713 202302 37 Evanston realized mixture FAS +714 202303 37 misted mayoral electroencephalography FAS +715 202304 37 textures opaquely similarities FAS +716 202305 37 save hostess charges W +717 202601 37 count fiftieth freest FAS +718 202602 37 rightful incorrectly Greenberg FAS +719 202605 37 chaperone decomposition tinting +720 202606 37 Lizzy stranglings expelled W +721 202607 37 clenched mixture warm +722 202901 37 effortlessly electroencephalography smoothed +723 202902 37 accessed similarities deductions FAS +724 202903 37 beaters charges Romano W +725 202904 37 Hornblower freest bitterroot +726 202907 37 vests Greenberg corset +727 202908 37 indulgences tinting securing +728 203101 37 infallibly expelled environing FAS +729 203103 37 unwilling warm cute +730 203104 37 excrete smoothed Crays +731 203105 37 spools deductions heiress FAS +732 203401 37 crunches Romano inform FAS +733 203402 37 overestimating bitterroot avenge +734 203404 37 ineffective corset universals +735 203901 37 humiliation securing Kinsey W +736 203902 37 sophomore environing ravines FAS +737 203903 37 star cute bestseller +738 203906 37 rifles Crays equilibrium +739 203907 37 dialysis heiress extents 0 +740 203908 37 arriving inform relatively +741 203909 37 indulge avenge pressure FAS +742 206101 37 clockers universals critiques FAS +743 206201 37 languages Kinsey befouled +744 206202 37 Antarctica ravines rightfully FAS +745 206203 37 percentage bestseller mechanizing FAS +746 206206 37 ceiling equilibrium Latinizes +747 206207 37 specification extents timesharing +748 206208 37 regimented relatively Aden +749 208001 37 ciphers pressure embassies +750 208002 37 pictures critiques males FAS +751 208003 37 serpents befouled shapelessly FAS +752 208004 37 allot rightfully genres FAS +753 208008 37 realized mechanizing mastering +754 208009 37 mayoral Latinizes Newtonian +755 208010 37 opaquely timesharing finishers FAS +756 208011 37 hostess Aden abates +757 208101 37 fiftieth embassies teem +758 208102 37 incorrectly males kiting FAS +759 208103 37 decomposition shapelessly stodgy FAS +760 208104 37 stranglings genres scalps FAS +761 208105 37 mixture mastering feed FAS +762 208110 37 electroencephalography Newtonian guitars +763 208111 37 similarities finishers airships +764 208112 37 charges abates store +765 208113 37 freest teem denounces +766 208201 37 Greenberg kiting Pyle FAS +767 208203 37 tinting stodgy Saxony +768 208301 37 expelled scalps serializations FAS +769 208302 37 warm feed Peruvian FAS +770 208305 37 smoothed guitars taxonomically FAS +771 208401 37 deductions airships kingdom A +772 208402 37 Romano store stint A +773 208403 37 bitterroot denounces Sault A +774 208404 37 corset Pyle faithful +775 208501 37 securing Saxony Ganymede FAS +776 208502 37 environing serializations tidiness FAS +777 208503 37 cute Peruvian gainful FAS +778 208504 37 Crays taxonomically contrary FAS +779 208505 37 heiress kingdom Tipperary FAS +780 210101 37 inform stint tropics W +781 210102 37 avenge Sault theorizers +782 210103 37 universals faithful renew 0 +783 210104 37 Kinsey Ganymede already +784 210105 37 ravines tidiness terminal +785 210106 37 bestseller gainful Hegelian +786 210107 37 equilibrium contrary hypothesizer +787 210401 37 extents Tipperary warningly FAS +788 213201 37 relatively tropics journalizing FAS +789 213203 37 pressure theorizers nested +790 213204 37 critiques renew Lars +791 213205 37 befouled already saplings +792 213206 37 rightfully terminal foothill +793 213207 37 mechanizing Hegelian labeled +794 216101 37 Latinizes hypothesizer imperiously FAS +795 216103 37 timesharing warningly reporters FAS +796 218001 37 Aden journalizing furnishings FAS +797 218002 37 embassies nested precipitable FAS +798 218003 37 males Lars discounts FAS +799 218004 37 shapelessly saplings excises FAS +800 143503 50 genres foothill Stalin +801 218006 37 mastering labeled despot FAS +802 218007 37 Newtonian imperiously ripeness FAS +803 218008 37 finishers reporters Arabia +804 218009 37 abates furnishings unruly +805 218010 37 teem precipitable mournfulness +806 218011 37 kiting discounts boom FAS +807 218020 37 stodgy excises slaughter A +808 218021 50 scalps Stalin Sabine +809 218022 37 feed despot handy FAS +810 218023 37 guitars ripeness rural +811 218024 37 airships Arabia organizer +812 218101 37 store unruly shipyard FAS +813 218102 37 denounces mournfulness civics FAS +814 218103 37 Pyle boom inaccuracy FAS +815 218201 37 Saxony slaughter rules FAS +816 218202 37 serializations Sabine juveniles FAS +817 218203 37 Peruvian handy comprised W +818 218204 37 taxonomically rural investigations +819 218205 37 kingdom organizer stabilizes A +820 218301 37 stint shipyard seminaries FAS +821 218302 37 Sault civics Hunter A +822 218401 37 faithful inaccuracy sporty FAS +823 218402 37 Ganymede rules test FAS +824 218403 37 tidiness juveniles weasels +825 218404 37 gainful comprised CERN +826 218407 37 contrary investigations tempering +827 218408 37 Tipperary stabilizes afore FAS +828 218409 37 tropics seminaries Galatean +829 218410 37 theorizers Hunter techniques W +830 226001 37 renew sporty error +831 226002 37 already test veranda +832 226003 37 terminal weasels severely +833 226004 37 Hegelian CERN Cassites FAS +834 226005 37 hypothesizer tempering forthcoming +835 226006 37 warningly afore guides +836 226007 37 journalizing Galatean vanish FAS +837 226008 37 nested techniques lied A +838 226203 37 Lars error sawtooth FAS +839 226204 37 saplings veranda fated FAS +840 226205 37 foothill severely gradually +841 226206 37 labeled Cassites widens +842 226207 37 imperiously forthcoming preclude +843 226208 37 reporters guides Jobrel +844 226209 37 furnishings vanish hooker +845 226210 37 precipitable lied rainstorm +846 226211 37 discounts sawtooth disconnects +847 228001 37 excises fated cruelty +848 228004 37 Stalin gradually exponentials A +849 228005 37 despot widens affective A +850 228006 37 ripeness preclude arteries +851 228007 37 Arabia Jobrel Crosby FAS +852 228008 37 unruly hooker acquaint +853 228009 37 mournfulness rainstorm evenhandedly +854 228101 37 boom disconnects percentage +855 228108 37 slaughter cruelty disobedience +856 228109 37 Sabine exponentials humility +857 228110 37 handy affective gleaning A +858 228111 37 rural arteries petted A +859 228112 37 organizer Crosby bloater A +860 228113 37 shipyard acquaint minion A +861 228114 37 civics evenhandedly marginal A +862 228115 37 inaccuracy percentage apiary A +863 228116 37 rules disobedience measures +864 228117 37 juveniles humility precaution +865 228118 37 comprised gleaning repelled +866 228119 37 investigations petted primary FAS +867 228120 37 stabilizes bloater coverings +868 228121 37 seminaries minion Artemia A +869 228122 37 Hunter marginal navigate +870 228201 37 sporty apiary spatial +871 228206 37 test measures Gurkha +872 228207 37 weasels precaution meanwhile A +873 228208 37 CERN repelled Melinda A +874 228209 37 tempering primary Butterfield +875 228210 37 afore coverings Aldrich A +876 228211 37 Galatean Artemia previewing A +877 228212 37 techniques navigate glut A +878 228213 37 error spatial unaffected +879 228214 37 veranda Gurkha inmate +880 228301 37 severely meanwhile mineral +881 228305 37 Cassites Melinda impending A +882 228306 37 forthcoming Butterfield meditation A +883 228307 37 guides Aldrich ideas +884 228308 37 vanish previewing miniaturizes W +885 228309 37 lied glut lewdly +886 228310 37 sawtooth unaffected title +887 228311 37 fated inmate youthfulness +888 228312 37 gradually mineral creak FAS +889 228313 37 widens impending Chippewa +890 228314 37 preclude meditation clamored +891 228401 65 Jobrel ideas freezes +892 228402 65 hooker miniaturizes forgivably FAS +893 228403 65 rainstorm lewdly reduce FAS +894 228404 65 disconnects title McGovern W +895 228405 65 cruelty youthfulness Nazis W +896 228406 65 exponentials creak epistle W +897 228407 65 affective Chippewa socializes W +898 228408 65 arteries clamored conceptions +899 228409 65 Crosby freezes Kevin +900 228410 65 acquaint forgivably uncovering +901 230301 37 evenhandedly reduce chews FAS +902 230302 37 percentage McGovern appendixes FAS +903 230303 37 disobedience Nazis raining +904 018062 37 humility epistle infest +905 230501 37 gleaning socializes compartment +906 230502 37 petted conceptions minting +907 230503 37 bloater Kevin ducks +908 230504 37 minion uncovering roped A +909 230505 37 marginal chews waltz +910 230506 37 apiary appendixes Lillian +911 230507 37 measures raining repressions A +912 230508 37 precaution infest chillingly +913 230509 37 repelled compartment noncritical +914 230901 37 primary minting lithograph +915 230902 37 coverings ducks spongers +916 230903 37 Artemia roped parenthood +917 230904 37 navigate waltz posed +918 230905 37 spatial Lillian instruments +919 230906 37 Gurkha repressions filial +920 230907 37 meanwhile chillingly fixedly +921 230908 37 Melinda noncritical relives +922 230909 37 Butterfield lithograph Pandora +923 230910 37 Aldrich spongers watering A +924 230911 37 previewing parenthood ungrateful +925 230912 37 glut posed secures +926 230913 37 unaffected instruments chastisers +927 230914 37 inmate filial icon +928 231304 37 mineral fixedly reuniting A +929 231305 37 impending relives imagining A +930 231306 37 meditation Pandora abiding A +931 231307 37 ideas watering omnisciently +932 231308 37 miniaturizes ungrateful Britannic +933 231309 37 lewdly secures scholastics A +934 231310 37 title chastisers mechanics A +935 231311 37 youthfulness icon humidly A +936 231312 37 creak reuniting masterpiece +937 231313 37 Chippewa imagining however +938 231314 37 clamored abiding Mendelian +939 231315 37 freezes omnisciently jarred +940 232102 37 forgivably Britannic scolds +941 232103 37 reduce scholastics infatuate +942 232104 37 McGovern mechanics willed A +943 232105 37 Nazis humidly joyfully +944 232106 37 epistle masterpiece Microsoft +945 232107 37 socializes however fibrosities +946 232108 37 conceptions Mendelian Baltimorean +947 232601 37 Kevin jarred equestrian +948 232602 37 uncovering scolds Goodrich +949 232603 37 chews infatuate apish A +950 232605 37 appendixes willed Adlerian +5950 1232605 37 appendixes willed Adlerian +5951 1232606 37 appendixes willed Adlerian +5952 1232607 37 appendixes willed Adlerian +5953 1232608 37 appendixes willed Adlerian +5954 1232609 37 appendixes willed Adlerian +951 232606 37 raining joyfully Tropez +952 232607 37 infest Microsoft nouns +953 232608 37 compartment fibrosities distracting +954 232609 37 minting Baltimorean mutton +955 236104 37 ducks equestrian bridgeable A +956 236105 37 roped Goodrich stickers A +957 236106 37 waltz apish transcontinental A +958 236107 37 Lillian Adlerian amateurish +959 236108 37 repressions Tropez Gandhian +960 236109 37 chillingly nouns stratified +961 236110 37 noncritical distracting chamberlains +962 236111 37 lithograph mutton creditably +963 236112 37 spongers bridgeable philosophic +964 236113 37 parenthood stickers ores +965 238005 37 posed transcontinental Carleton +966 238006 37 instruments amateurish tape A +967 238007 37 filial Gandhian afloat A +968 238008 37 fixedly stratified goodness A +969 238009 37 relives chamberlains welcoming +970 238010 37 Pandora creditably Pinsky FAS +971 238011 37 watering philosophic halting +972 238012 37 ungrateful ores bibliography +973 238013 37 secures Carleton decoding +974 240401 41 chastisers tape variance A +975 240402 41 icon afloat allowed A +976 240901 41 reuniting goodness dire A +977 240902 41 imagining welcoming dub A +978 241801 41 abiding Pinsky poisoning +979 242101 41 omnisciently halting Iraqis A +980 242102 41 Britannic bibliography heaving +981 242201 41 scholastics decoding population A +982 242202 41 mechanics variance bomb A +983 242501 41 humidly allowed Majorca A +984 242502 41 masterpiece dire Gershwins +985 246201 41 however dub explorers +986 246202 41 Mendelian poisoning libretto A +987 246203 41 jarred Iraqis occurred +988 246204 41 scolds heaving Lagos +989 246205 41 infatuate population rats +990 246301 41 willed bomb bankruptcies A +991 246302 41 joyfully Majorca crying +992 248001 41 Microsoft Gershwins unexpected +993 248002 41 fibrosities explorers accessed A +994 248003 41 Baltimorean libretto colorful A +995 248004 41 equestrian occurred versatility A +996 248005 41 Goodrich Lagos cosy +997 248006 41 apish rats Darius A +998 248007 41 Adlerian bankruptcies mastering A +999 248008 41 Tropez crying Asiaticizations A +1000 248009 41 nouns unexpected offerers A +1001 248010 41 distracting accessed uncles A +1002 248011 41 mutton colorful sleepwalk +1003 248012 41 bridgeable versatility Ernestine +1004 248013 41 stickers cosy checksumming +1005 248014 41 transcontinental Darius stopped +1006 248015 41 amateurish mastering sicker +1007 248016 41 Gandhian Asiaticizations Italianization +1008 248017 41 stratified offerers alphabetic +1009 248018 41 chamberlains uncles pharmaceutic +1010 248019 41 creditably sleepwalk creator +1011 248020 41 philosophic Ernestine chess +1012 248021 41 ores checksumming charcoal +1013 248101 41 Carleton stopped Epiphany A +1014 248102 41 tape sicker bulldozes A +1015 248201 41 afloat Italianization Pygmalion A +1016 248202 41 goodness alphabetic caressing A +1017 248203 41 welcoming pharmaceutic Palestine A +1018 248204 41 Pinsky creator regimented A +1019 248205 41 halting chess scars A +1020 248206 41 bibliography charcoal realest A +1021 248207 41 decoding Epiphany diffusing A +1022 248208 41 variance bulldozes clubroom A +1023 248209 41 allowed Pygmalion Blythe A +1024 248210 41 dire caressing ahead +1025 248211 50 dub Palestine reviver +1026 250501 34 poisoning regimented retransmitting A +1027 250502 34 Iraqis scars landslide +1028 250503 34 heaving realest Eiffel +1029 250504 34 population diffusing absentee +1030 250505 34 bomb clubroom aye +1031 250601 34 Majorca Blythe forked A +1032 250602 34 Gershwins ahead Peruvianizes +1033 250603 34 explorers reviver clerked +1034 250604 34 libretto retransmitting tutor +1035 250605 34 occurred landslide boulevard +1036 251001 34 Lagos Eiffel shuttered +1037 251002 34 rats absentee quotes A +1038 251003 34 bankruptcies aye Caltech +1039 251004 34 crying forked Mossberg +1040 251005 34 unexpected Peruvianizes kept +1041 251301 34 accessed clerked roundly +1042 251302 34 colorful tutor features A +1043 251303 34 versatility boulevard imaginable A +1044 251304 34 cosy shuttered controller +1045 251305 34 Darius quotes racial +1046 251401 34 mastering Caltech uprisings A +1047 251402 34 Asiaticizations Mossberg narrowed A +1048 251403 34 offerers kept cannot A +1049 251404 34 uncles roundly vest +1050 251405 34 sleepwalk features famine +1051 251406 34 Ernestine imaginable sugars +1052 251801 34 checksumming controller exterminated A +1053 251802 34 stopped racial belays +1054 252101 34 sicker uprisings Hodges A +1055 252102 34 Italianization narrowed translatable +1056 252301 34 alphabetic cannot duality A +1057 252302 34 pharmaceutic vest recording A +1058 252303 34 creator famine rouses A +1059 252304 34 chess sugars poison +1060 252305 34 charcoal exterminated attitude +1061 252306 34 Epiphany belays dusted +1062 252307 34 bulldozes Hodges encompasses +1063 252308 34 Pygmalion translatable presentation +1064 252309 34 caressing duality Kantian +1065 256001 34 Palestine recording imprecision A +1066 256002 34 regimented rouses saving +1067 256003 34 scars poison maternal +1068 256004 34 realest attitude hewed +1069 256005 34 diffusing dusted kerosene +1070 258001 34 clubroom encompasses Cubans +1071 258002 34 Blythe presentation photographers +1072 258003 34 ahead Kantian nymph A +1073 258004 34 reviver imprecision bedlam A +1074 258005 34 retransmitting saving north A +1075 258006 34 landslide maternal Schoenberg A +1076 258007 34 Eiffel hewed botany A +1077 258008 34 absentee kerosene curs +1078 258009 34 aye Cubans solidification +1079 258010 34 forked photographers inheritresses +1080 258011 34 Peruvianizes nymph stiller +1081 258101 68 clerked bedlam t1 A +1082 258102 68 tutor north suite A +1083 258103 34 boulevard Schoenberg ransomer +1084 258104 68 shuttered botany Willy +1085 258105 68 quotes curs Rena A +1086 258106 68 Caltech solidification Seattle A +1087 258107 68 Mossberg inheritresses relaxes A +1088 258108 68 kept stiller exclaim +1089 258109 68 roundly t1 implicated A +1090 258110 68 features suite distinguish +1091 258111 68 imaginable ransomer assayed +1092 258112 68 controller Willy homeowner +1093 258113 68 racial Rena and +1094 258201 34 uprisings Seattle stealth +1095 258202 34 narrowed relaxes coinciding A +1096 258203 34 cannot exclaim founder A +1097 258204 34 vest implicated environing +1098 258205 34 famine distinguish jewelry +1099 258301 34 sugars assayed lemons A +1100 258401 34 exterminated homeowner brokenness A +1101 258402 34 belays and bedpost A +1102 258403 34 Hodges stealth assurers A +1103 258404 34 translatable coinciding annoyers +1104 258405 34 duality founder affixed +1105 258406 34 recording environing warbling +1106 258407 34 rouses jewelry seriously +1107 228123 37 poison lemons boasted +1108 250606 34 attitude brokenness Chantilly +1109 208405 37 dusted bedpost Iranizes +1110 212101 37 encompasses assurers violinist +1111 218206 37 presentation annoyers extramarital +1112 150401 37 Kantian affixed spates +1113 248212 41 imprecision warbling cloakroom +1114 128026 00 saving seriously gazer +1115 128024 00 maternal boasted hand +1116 128027 00 hewed Chantilly tucked +1117 128025 00 kerosene Iranizes gems +1118 128109 00 Cubans violinist clinker +1119 128705 00 photographers extramarital refiner +1120 126303 00 nymph spates callus +1121 128308 00 bedlam cloakroom leopards +1122 128204 00 north gazer comfortingly +1123 128205 00 Schoenberg hand generically +1124 128206 00 botany tucked getters +1125 128207 00 curs gems sexually +1126 118205 00 solidification clinker spear +1127 116801 00 inheritresses refiner serums +1128 116803 00 stiller callus Italianization +1129 116804 00 t1 leopards attendants +1130 116802 00 suite comfortingly spies +1131 128605 00 ransomer generically Anthony +1132 118308 00 Willy getters planar +1133 113702 00 Rena sexually cupped +1134 113703 00 Seattle spear cleanser +1135 112103 00 relaxes serums commuters +1136 118009 00 exclaim Italianization honeysuckle +5136 1118009 00 exclaim Italianization honeysuckle +1137 138011 00 implicated attendants orphanage +1138 138010 00 distinguish spies skies +1139 138012 00 assayed Anthony crushers +1140 068304 00 homeowner planar Puritan +1141 078009 00 and cupped squeezer +1142 108013 00 stealth cleanser bruises +1143 084004 00 coinciding commuters bonfire +1144 083402 00 founder honeysuckle Colombo +1145 084003 00 environing orphanage nondecreasing +1146 088504 00 jewelry skies innocents +1147 088005 00 lemons crushers masked +1148 088007 00 brokenness Puritan file +1149 088006 00 bedpost squeezer brush +1150 148025 00 assurers bruises mutilate +1151 148024 00 annoyers bonfire mommy +1152 138305 00 affixed Colombo bulkheads +1153 138306 00 warbling nondecreasing undeclared +1154 152701 00 seriously innocents displacements +1155 148505 00 boasted masked nieces +1156 158003 00 Chantilly file coeducation +1157 156201 00 Iranizes brush brassy +1158 156202 00 violinist mutilate authenticator +1159 158307 00 extramarital mommy Washoe +1160 158402 00 spates bulkheads penny +1161 158401 00 cloakroom undeclared Flagler +1162 068013 00 gazer displacements stoned +1163 068012 00 hand nieces cranes +1164 068203 00 tucked coeducation masterful +1165 088205 00 gems brassy biracial +1166 068704 00 clinker authenticator steamships +1167 068604 00 refiner Washoe windmills +1168 158502 00 callus penny exploit +1169 123103 00 leopards Flagler riverfront +1170 148026 00 comfortingly stoned sisterly +1171 123302 00 generically cranes sharpshoot +1172 076503 00 getters masterful mittens +1173 126304 00 sexually biracial interdependency +1174 068306 00 spear steamships policy +1175 143504 00 serums windmills unleashing +1176 160201 00 Italianization exploit pretenders +1177 148028 00 attendants riverfront overstatements +1178 148027 00 spies sisterly birthed +1179 143505 00 Anthony sharpshoot opportunism +1180 108014 00 planar mittens showroom +1181 076104 00 cupped interdependency compromisingly +1182 078106 00 cleanser policy Medicare +1183 126102 00 commuters unleashing corresponds +1184 128029 00 honeysuckle pretenders hardware +1185 128028 00 orphanage overstatements implant +1186 018410 00 skies birthed Alicia +1187 128110 00 crushers opportunism requesting +1188 148506 00 Puritan showroom produced +1189 123303 00 squeezer compromisingly criticizes +1190 123304 00 bruises Medicare backer +1191 068504 00 bonfire corresponds positively +1192 068305 00 Colombo hardware colicky +1193 000000 00 nondecreasing implant thrillingly +1 000001 00 Omaha teethe neat +2 011401 37 breaking dreaded Steinberg W +3 011402 37 Romans scholastics jarring +4 011403 37 intercepted audiology tinily +1 000001 00 Omaha teethe neat +2 011401 37 breaking dreaded Steinberg W +3 011402 37 Romans scholastics jarring +4 011403 37 intercepted audiology tinily +INSERT DELAYED INTO t2 VALUES (4,011403,37,'intercepted','audiology','tinily',''); drop table t1, t2; diff --git a/mysql-test/r/flush_read_lock_kill.result b/mysql-test/r/flush_read_lock_kill.result new file mode 100644 index 00000000000..dfdcf51457a --- /dev/null +++ b/mysql-test/r/flush_read_lock_kill.result @@ -0,0 +1,9 @@ +drop table if exists t1; +create table t1 (kill_id int); +insert into t1 values(connection_id()); + flush tables with read lock; +select ((@id := kill_id) - kill_id) from t1; +((@id := kill_id) - kill_id) +0 +kill connection @id; +drop table t1; diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result index 2c8b8816da5..18387107b2f 100644 --- a/mysql-test/r/mysqldump.result +++ b/mysql-test/r/mysqldump.result @@ -367,3 +367,45 @@ CREATE ALGORITHM=UNDEFINED VIEW `test`.`v1` AS select `test`.`t1`.`a` AS `a` fro drop view v1; drop table t1; + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE="NO_AUTO_VALUE_ON_ZERO" */; + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `test` /*!40100 DEFAULT CHARACTER SET latin1 */; + +USE `test`; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; + +create database mysqldump_test_db character set latin2 collate latin2_bin; + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE="NO_AUTO_VALUE_ON_ZERO" */; + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `mysqldump_test_db` /*!40100 DEFAULT CHARACTER SET latin2 COLLATE latin2_bin */; + +USE `mysqldump_test_db`; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; + +drop database mysqldump_test_db; diff --git a/mysql-test/r/ndb_charset.result b/mysql-test/r/ndb_charset.result index 501bec99ea3..00bc36a7c0d 100644 --- a/mysql-test/r/ndb_charset.result +++ b/mysql-test/r/ndb_charset.result @@ -189,3 +189,13 @@ p a 5 aaa 6 AAA drop table t1; +create table t1 ( +a varchar(10) primary key +) engine=ndb; +insert into t1 values ('jonas % '); +replace into t1 values ('jonas % '); +replace into t1 values ('jonas % '); +select * from t1; +a +jonas % +drop table t1; diff --git a/mysql-test/r/ps_7ndb.result b/mysql-test/r/ps_7ndb.result index 70118509d0b..a0da0b5c7bb 100644 --- a/mysql-test/r/ps_7ndb.result +++ b/mysql-test/r/ps_7ndb.result @@ -2,20 +2,19 @@ use test; drop table if exists t1, t9 ; create table t1 ( -a int not null, b varchar(30), +a int, b varchar(30), primary key(a) ) engine = 'NDB' ; -drop table if exists t9; create table t9 ( -c1 tinyint not null, c2 smallint, c3 mediumint, c4 int, +c1 tinyint, c2 smallint, c3 mediumint, c4 int, c5 integer, c6 bigint, c7 float, c8 double, c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4), c13 date, c14 datetime, c15 timestamp(14), c16 time, c17 year, c18 bit, c19 bool, c20 char, -c21 char(10), c22 varchar(30), c23 char(100), c24 char(100), -c25 char(100), c26 char(100), c27 char(100), c28 char(100), -c29 char(100), c30 char(100), c31 enum('one', 'two', 'three'), +c21 char(10), c22 varchar(30), c23 tinyblob, c24 tinytext, +c25 blob, c26 text, c27 mediumblob, c28 mediumtext, +c29 longblob, c30 longtext, c31 enum('one', 'two', 'three'), c32 set('monday', 'tuesday', 'wednesday'), primary key(c1) ) engine = 'NDB' ; @@ -72,14 +71,14 @@ def test t9 t9 c19 c19 1 1 1 Y 32768 0 63 def test t9 t9 c20 c20 254 1 1 Y 0 0 8 def test t9 t9 c21 c21 253 10 10 Y 0 0 8 def test t9 t9 c22 c22 253 30 30 Y 0 0 8 -def test t9 t9 c23 c23 253 100 8 Y 0 0 8 -def test t9 t9 c24 c24 253 100 8 Y 0 0 8 -def test t9 t9 c25 c25 253 100 4 Y 0 0 8 -def test t9 t9 c26 c26 253 100 4 Y 0 0 8 -def test t9 t9 c27 c27 253 100 10 Y 0 0 8 -def test t9 t9 c28 c28 253 100 10 Y 0 0 8 -def test t9 t9 c29 c29 253 100 8 Y 0 0 8 -def test t9 t9 c30 c30 253 100 8 Y 0 0 8 +def test t9 t9 c23 c23 252 255 8 Y 144 0 63 +def test t9 t9 c24 c24 252 255 8 Y 16 0 8 +def test t9 t9 c25 c25 252 65535 4 Y 144 0 63 +def test t9 t9 c26 c26 252 65535 4 Y 16 0 8 +def test t9 t9 c27 c27 252 16777215 10 Y 144 0 63 +def test t9 t9 c28 c28 252 16777215 10 Y 16 0 8 +def test t9 t9 c29 c29 252 16777215 8 Y 144 0 63 +def test t9 t9 c30 c30 252 16777215 8 Y 16 0 8 def test t9 t9 c31 c31 254 5 3 Y 256 0 8 def test t9 t9 c32 c32 254 24 7 Y 2048 0 8 c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 @@ -1152,7 +1151,7 @@ def table 253 64 2 N 1 31 8 def type 253 10 3 N 1 31 8 def possible_keys 253 4096 0 Y 0 31 8 def key 253 64 0 Y 0 31 8 -def key_len 8 3 0 Y 32800 0 8 +def key_len 253 4096 0 Y 128 31 63 def ref 253 1024 0 Y 0 31 8 def rows 8 10 1 N 32801 0 8 def Extra 253 255 0 N 1 31 8 @@ -1188,7 +1187,7 @@ c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; commit ; prepare stmt1 from 'delete from t1 where a=2' ; execute stmt1; -select a,b from t1 where a=2 order by b; +select a,b from t1 where a=2; a b execute stmt1; insert into t1 values(0,NULL); @@ -1203,7 +1202,6 @@ execute stmt1 using @arg00; select a,b from t1 where b=@arg00; a b prepare stmt1 from 'truncate table t1' ; -ERROR HY000: This command is not supported in the prepared statement protocol yet test_sequence ------ update tests ------ delete from t1 ; @@ -1270,18 +1268,23 @@ execute stmt1 using @arg00, @arg00; select a,b from t1 where a=@arg00; a b 2 two +execute stmt1 using @arg01, @arg00; select a,b from t1 where a=@arg01; a b +22 two execute stmt1 using @arg00, @arg01; select a,b from t1 where a=@arg00; a b 2 two set @arg00=NULL; set @arg01=2; +execute stmt1 using @arg00, @arg01; +Warnings: +Warning 1263 Column set to default value; NULL supplied to NOT NULL column 'a' at row 1 select a,b from t1 order by a; a b +0 two 1 one -2 two 3 three 4 four set @arg00=0; @@ -1302,15 +1305,19 @@ create table t2 as select a,b from t1 ; prepare stmt1 from 'update t1 set a=? where b=? and a in (select ? from t2 where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 select a,b from t1 where a = @arg00 ; a b +23 two prepare stmt1 from 'update t1 set a=? where b=? and a not in (select ? from t2 where b = ? or a = ?)'; execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; -affected rows: 0 -info: Rows matched: 0 Changed: 0 Warnings: 0 -select a,b from t1 order by a; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; a b 1 one 2 two @@ -1319,21 +1326,25 @@ a b drop table t2 ; create table t2 ( -a int not null, b varchar(30), +a int, b varchar(30), primary key(a) ) engine = 'NDB' ; insert into t2(a,b) select a, b from t1 ; prepare stmt1 from 'update t1 set a=? where b=? and a in (select ? from t2 where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 select a,b from t1 where a = @arg00 ; a b +23 two prepare stmt1 from 'update t1 set a=? where b=? and a not in (select ? from t2 where b = ? or a = ?)'; execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; -affected rows: 0 -info: Rows matched: 0 Changed: 0 Warnings: 0 +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 select a,b from t1 order by a ; a b 1 one @@ -1474,7 +1485,7 @@ set @arg02=82 ; set @arg03='8-2' ; prepare stmt1 from 'insert into t1 values(?,?),(?,?)'; execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; -select a,b from t1 where a in (@arg00,@arg02) order by a ; +select a,b from t1 where a in (@arg00,@arg02) ; a b 81 8-1 82 8-2 @@ -1489,6 +1500,7 @@ set @arg00=6 ; set @arg01=1 ; prepare stmt1 from 'insert into t1 set a=?, b=''sechs'' on duplicate key update a=a + ?, b=concat(b,''modified'') '; +execute stmt1 using @arg00, @arg01; select * from t1 order by a; a b 0 NULL @@ -1497,13 +1509,15 @@ a b 3 three 4 four 5 five -6 six +7 sixmodified 8 eight 9 nine 81 8-1 82 8-2 set @arg00=81 ; set @arg01=1 ; +execute stmt1 using @arg00, @arg01; +ERROR 23000: Duplicate entry '82' for key 1 drop table if exists t2 ; create table t2 (id int auto_increment primary key) ENGINE= 'NDB' ; @@ -1526,23 +1540,32 @@ set @x1100="x1100" ; set @100=100 ; set @updated="updated" ; insert into t1 values(1000,'x1000_1') ; +insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3) +on duplicate key update a = a + @100, b = concat(b,@updated) ; select a,b from t1 where a >= 1000 order by a ; a b -1000 x1000_1 +1000 x1000_3 +1100 x1000_1updated delete from t1 where a >= 1000 ; insert into t1 values(1000,'x1000_1') ; prepare stmt1 from ' insert into t1 values(?,?),(?,?) on duplicate key update a = a + ?, b = concat(b,?) '; +execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ; select a,b from t1 where a >= 1000 order by a ; a b -1000 x1000_1 +1000 x1000_3 +1100 x1000_1updated delete from t1 where a >= 1000 ; insert into t1 values(1000,'x1000_1') ; +execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ; select a,b from t1 where a >= 1000 order by a ; a b -1000 x1000_1 +1200 x1000_1updatedupdated delete from t1 where a >= 1000 ; prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' '; +execute stmt1; +execute stmt1; +execute stmt1; test_sequence ------ multi table tests ------ delete from t1 ; @@ -1891,13 +1914,13 @@ def @arg19 254 20 1 Y 128 31 63 def @arg20 254 8192 1 Y 0 31 8 def @arg21 254 8192 10 Y 0 31 8 def @arg22 254 8192 30 Y 0 31 8 -def @arg23 254 8192 8 Y 0 31 8 +def @arg23 254 8192 8 Y 128 31 63 def @arg24 254 8192 8 Y 0 31 8 -def @arg25 254 8192 4 Y 0 31 8 +def @arg25 254 8192 4 Y 128 31 63 def @arg26 254 8192 4 Y 0 31 8 -def @arg27 254 8192 10 Y 0 31 8 +def @arg27 254 8192 10 Y 128 31 63 def @arg28 254 8192 10 Y 0 31 8 -def @arg29 254 8192 8 Y 0 31 8 +def @arg29 254 8192 8 Y 128 31 63 def @arg30 254 8192 8 Y 0 31 8 def @arg31 254 8192 3 Y 0 31 8 def @arg32 254 8192 6 Y 128 31 63 @@ -1938,13 +1961,13 @@ def @arg19 254 20 0 Y 128 31 63 def @arg20 254 8192 0 Y 0 31 8 def @arg21 254 8192 0 Y 0 31 8 def @arg22 254 8192 0 Y 0 31 8 -def @arg23 254 8192 0 Y 0 31 8 +def @arg23 254 8192 0 Y 128 31 63 def @arg24 254 8192 0 Y 0 31 8 -def @arg25 254 8192 0 Y 0 31 8 +def @arg25 254 8192 0 Y 128 31 63 def @arg26 254 8192 0 Y 0 31 8 -def @arg27 254 8192 0 Y 0 31 8 +def @arg27 254 8192 0 Y 128 31 63 def @arg28 254 8192 0 Y 0 31 8 -def @arg29 254 8192 0 Y 0 31 8 +def @arg29 254 8192 0 Y 128 31 63 def @arg30 254 8192 0 Y 0 31 8 def @arg31 254 8192 0 Y 0 31 8 def @arg32 254 8192 0 Y 0 31 8 @@ -1988,13 +2011,13 @@ def @arg19 254 20 1 Y 128 31 63 def @arg20 254 8192 1 Y 0 31 8 def @arg21 254 8192 10 Y 0 31 8 def @arg22 254 8192 30 Y 0 31 8 -def @arg23 254 8192 8 Y 0 31 8 +def @arg23 254 8192 8 Y 128 31 63 def @arg24 254 8192 8 Y 0 31 8 -def @arg25 254 8192 4 Y 0 31 8 +def @arg25 254 8192 4 Y 128 31 63 def @arg26 254 8192 4 Y 0 31 8 -def @arg27 254 8192 10 Y 0 31 8 +def @arg27 254 8192 10 Y 128 31 63 def @arg28 254 8192 10 Y 0 31 8 -def @arg29 254 8192 8 Y 0 31 8 +def @arg29 254 8192 8 Y 128 31 63 def @arg30 254 8192 8 Y 0 31 8 def @arg31 254 8192 3 Y 0 31 8 def @arg32 254 8192 6 Y 128 31 63 @@ -2028,13 +2051,13 @@ def @arg19 254 20 0 Y 128 31 63 def @arg20 254 8192 0 Y 0 31 8 def @arg21 254 8192 0 Y 0 31 8 def @arg22 254 8192 0 Y 0 31 8 -def @arg23 254 8192 0 Y 0 31 8 +def @arg23 254 8192 0 Y 128 31 63 def @arg24 254 8192 0 Y 0 31 8 -def @arg25 254 8192 0 Y 0 31 8 +def @arg25 254 8192 0 Y 128 31 63 def @arg26 254 8192 0 Y 0 31 8 -def @arg27 254 8192 0 Y 0 31 8 +def @arg27 254 8192 0 Y 128 31 63 def @arg28 254 8192 0 Y 0 31 8 -def @arg29 254 8192 0 Y 0 31 8 +def @arg29 254 8192 0 Y 128 31 63 def @arg30 254 8192 0 Y 0 31 8 def @arg31 254 8192 0 Y 0 31 8 def @arg32 254 8192 0 Y 0 31 8 @@ -2076,13 +2099,13 @@ def @arg19 254 20 1 Y 128 31 63 def @arg20 254 8192 1 Y 0 31 8 def @arg21 254 8192 10 Y 0 31 8 def @arg22 254 8192 30 Y 0 31 8 -def @arg23 254 8192 8 Y 0 31 8 +def @arg23 254 8192 8 Y 128 31 63 def @arg24 254 8192 8 Y 0 31 8 -def @arg25 254 8192 4 Y 0 31 8 +def @arg25 254 8192 4 Y 128 31 63 def @arg26 254 8192 4 Y 0 31 8 -def @arg27 254 8192 10 Y 0 31 8 +def @arg27 254 8192 10 Y 128 31 63 def @arg28 254 8192 10 Y 0 31 8 -def @arg29 254 8192 8 Y 0 31 8 +def @arg29 254 8192 8 Y 128 31 63 def @arg30 254 8192 8 Y 0 31 8 def @arg31 254 8192 3 Y 0 31 8 def @arg32 254 8192 6 Y 128 31 63 @@ -2120,13 +2143,13 @@ def @arg19 254 20 0 Y 128 31 63 def @arg20 254 8192 0 Y 0 31 8 def @arg21 254 8192 0 Y 0 31 8 def @arg22 254 8192 0 Y 0 31 8 -def @arg23 254 8192 0 Y 0 31 8 +def @arg23 254 8192 0 Y 128 31 63 def @arg24 254 8192 0 Y 0 31 8 -def @arg25 254 8192 0 Y 0 31 8 +def @arg25 254 8192 0 Y 128 31 63 def @arg26 254 8192 0 Y 0 31 8 -def @arg27 254 8192 0 Y 0 31 8 +def @arg27 254 8192 0 Y 128 31 63 def @arg28 254 8192 0 Y 0 31 8 -def @arg29 254 8192 0 Y 0 31 8 +def @arg29 254 8192 0 Y 128 31 63 def @arg30 254 8192 0 Y 0 31 8 def @arg31 254 8192 0 Y 0 31 8 def @arg32 254 8192 0 Y 0 31 8 @@ -2166,13 +2189,13 @@ def @arg19 254 20 1 Y 128 31 63 def @arg20 254 8192 1 Y 0 31 8 def @arg21 254 8192 10 Y 0 31 8 def @arg22 254 8192 30 Y 0 31 8 -def @arg23 254 8192 8 Y 0 31 8 +def @arg23 254 8192 8 Y 128 31 63 def @arg24 254 8192 8 Y 0 31 8 -def @arg25 254 8192 4 Y 0 31 8 +def @arg25 254 8192 4 Y 128 31 63 def @arg26 254 8192 4 Y 0 31 8 -def @arg27 254 8192 10 Y 0 31 8 +def @arg27 254 8192 10 Y 128 31 63 def @arg28 254 8192 10 Y 0 31 8 -def @arg29 254 8192 8 Y 0 31 8 +def @arg29 254 8192 8 Y 128 31 63 def @arg30 254 8192 8 Y 0 31 8 def @arg31 254 8192 3 Y 0 31 8 def @arg32 254 8192 6 Y 128 31 63 @@ -2204,13 +2227,13 @@ def @arg19 254 20 0 Y 128 31 63 def @arg20 254 8192 0 Y 0 31 8 def @arg21 254 8192 0 Y 0 31 8 def @arg22 254 8192 0 Y 0 31 8 -def @arg23 254 8192 0 Y 0 31 8 +def @arg23 254 8192 0 Y 128 31 63 def @arg24 254 8192 0 Y 0 31 8 -def @arg25 254 8192 0 Y 0 31 8 +def @arg25 254 8192 0 Y 128 31 63 def @arg26 254 8192 0 Y 0 31 8 -def @arg27 254 8192 0 Y 0 31 8 +def @arg27 254 8192 0 Y 128 31 63 def @arg28 254 8192 0 Y 0 31 8 -def @arg29 254 8192 0 Y 0 31 8 +def @arg29 254 8192 0 Y 128 31 63 def @arg30 254 8192 0 Y 0 31 8 def @arg31 254 8192 0 Y 0 31 8 def @arg32 254 8192 0 Y 0 31 8 @@ -2524,12 +2547,12 @@ set @arg00= 9223372036854775807 ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1264 Data truncated; out of range for column 'c1' at row 1 -Warning 1264 Data truncated; out of range for column 'c2' at row 1 -Warning 1264 Data truncated; out of range for column 'c3' at row 1 -Warning 1264 Data truncated; out of range for column 'c4' at row 1 -Warning 1264 Data truncated; out of range for column 'c5' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c1' at row 1 +Warning 1264 Out of range value adjusted for column 'c2' at row 1 +Warning 1264 Out of range value adjusted for column 'c3' at row 1 +Warning 1264 Out of range value adjusted for column 'c4' at row 1 +Warning 1264 Out of range value adjusted for column 'c5' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 127 c2 32767 @@ -2547,12 +2570,12 @@ set @arg00= '9223372036854775807' ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1264 Data truncated; out of range for column 'c1' at row 1 -Warning 1264 Data truncated; out of range for column 'c2' at row 1 -Warning 1264 Data truncated; out of range for column 'c3' at row 1 -Warning 1265 Data truncated for column 'c4' at row 1 -Warning 1265 Data truncated for column 'c5' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c1' at row 1 +Warning 1264 Out of range value adjusted for column 'c2' at row 1 +Warning 1264 Out of range value adjusted for column 'c3' at row 1 +Warning 1264 Out of range value adjusted for column 'c4' at row 1 +Warning 1264 Out of range value adjusted for column 'c5' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 127 c2 32767 @@ -2570,12 +2593,12 @@ set @arg00= -9223372036854775808 ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1264 Data truncated; out of range for column 'c1' at row 1 -Warning 1264 Data truncated; out of range for column 'c2' at row 1 -Warning 1264 Data truncated; out of range for column 'c3' at row 1 -Warning 1264 Data truncated; out of range for column 'c4' at row 1 -Warning 1264 Data truncated; out of range for column 'c5' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c1' at row 1 +Warning 1264 Out of range value adjusted for column 'c2' at row 1 +Warning 1264 Out of range value adjusted for column 'c3' at row 1 +Warning 1264 Out of range value adjusted for column 'c4' at row 1 +Warning 1264 Out of range value adjusted for column 'c5' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 -128 c2 -32768 @@ -2593,12 +2616,12 @@ set @arg00= '-9223372036854775808' ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1264 Data truncated; out of range for column 'c1' at row 1 -Warning 1264 Data truncated; out of range for column 'c2' at row 1 -Warning 1264 Data truncated; out of range for column 'c3' at row 1 -Warning 1265 Data truncated for column 'c4' at row 1 -Warning 1265 Data truncated for column 'c5' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c1' at row 1 +Warning 1264 Out of range value adjusted for column 'c2' at row 1 +Warning 1264 Out of range value adjusted for column 'c3' at row 1 +Warning 1264 Out of range value adjusted for column 'c4' at row 1 +Warning 1264 Out of range value adjusted for column 'c5' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 -128 c2 -32768 @@ -2616,14 +2639,14 @@ set @arg00= 1.11111111111111111111e+50 ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1264 Data truncated; out of range for column 'c1' at row 1 -Warning 1264 Data truncated; out of range for column 'c2' at row 1 -Warning 1264 Data truncated; out of range for column 'c3' at row 1 -Warning 1264 Data truncated; out of range for column 'c4' at row 1 -Warning 1264 Data truncated; out of range for column 'c5' at row 1 -Warning 1264 Data truncated; out of range for column 'c6' at row 1 -Warning 1264 Data truncated; out of range for column 'c7' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c1' at row 1 +Warning 1264 Out of range value adjusted for column 'c2' at row 1 +Warning 1264 Out of range value adjusted for column 'c3' at row 1 +Warning 1264 Out of range value adjusted for column 'c4' at row 1 +Warning 1264 Out of range value adjusted for column 'c5' at row 1 +Warning 1264 Out of range value adjusted for column 'c6' at row 1 +Warning 1264 Out of range value adjusted for column 'c7' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 127 c2 32767 @@ -2647,8 +2670,8 @@ Warning 1265 Data truncated for column 'c3' at row 1 Warning 1265 Data truncated for column 'c4' at row 1 Warning 1265 Data truncated for column 'c5' at row 1 Warning 1265 Data truncated for column 'c6' at row 1 -Warning 1264 Data truncated; out of range for column 'c7' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c7' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 1 c2 1 @@ -2666,14 +2689,14 @@ set @arg00= -1.11111111111111111111e+50 ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1264 Data truncated; out of range for column 'c1' at row 1 -Warning 1264 Data truncated; out of range for column 'c2' at row 1 -Warning 1264 Data truncated; out of range for column 'c3' at row 1 -Warning 1264 Data truncated; out of range for column 'c4' at row 1 -Warning 1264 Data truncated; out of range for column 'c5' at row 1 -Warning 1264 Data truncated; out of range for column 'c6' at row 1 -Warning 1264 Data truncated; out of range for column 'c7' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c1' at row 1 +Warning 1264 Out of range value adjusted for column 'c2' at row 1 +Warning 1264 Out of range value adjusted for column 'c3' at row 1 +Warning 1264 Out of range value adjusted for column 'c4' at row 1 +Warning 1264 Out of range value adjusted for column 'c5' at row 1 +Warning 1264 Out of range value adjusted for column 'c6' at row 1 +Warning 1264 Out of range value adjusted for column 'c7' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 -128 c2 -32768 @@ -2697,8 +2720,8 @@ Warning 1265 Data truncated for column 'c3' at row 1 Warning 1265 Data truncated for column 'c4' at row 1 Warning 1265 Data truncated for column 'c5' at row 1 Warning 1265 Data truncated for column 'c6' at row 1 -Warning 1264 Data truncated; out of range for column 'c7' at row 1 -Warning 1264 Data truncated; out of range for column 'c12' at row 1 +Warning 1264 Out of range value adjusted for column 'c7' at row 1 +Warning 1264 Out of range value adjusted for column 'c12' at row 1 execute my_select ; c1 -1 c2 -1 @@ -2770,14 +2793,14 @@ c1 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 41 4 41 41 41 41 41 41 41 41 41 41 42 4 42 42 42 42 42 42 42 42 42 42 43 4 43 43 43 43 43 43 43 43 43 43 -50 5 50 50 50 50 50 50 50 50 50 50 +50 5 50 50 50.00 50.00 50.00 50.00 50.00 50.00 50.00 50.00 51 5 51 51 51 51 51 51 51 51 51 51 -52 5 52 52 52 52 52 52 52 52 52 52 -53 5 53 53 53 53 53 53 53 53 53 53 -54 5 54 54 54 54 54 54 54 54 54 54 +52 5 52 52 52.00 52.00 52.00 52.00 52.00 52.00 52.00 52.00 +53 5 53 53 53.00 53.00 53.00 53.00 53.00 53.00 53.00 53.00 +54 5 54 54 54.00 54.00 54.00 54.00 54.00 54.00 54.00 54.00 55 5 55 55 55 55 55 55 55 55 55 55 -56 6 56 56 56 56 56 56 56 56 56 56 -57 6 57 57 57 57 57 57 57 57 57 57 +56 6 56 56 56.00 56.00 56.00 56.00 56.00 56.00 56.00 56.00 +57 6 57 57 57.00 57.00 57.00 57.00 57.00 57.00 57.00 57.00 60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -2929,45 +2952,45 @@ Warning 1265 Data truncated for column 'c17' at row 1 Warnings: Warning 1265 Data truncated for column 'c17' at row 1 Warnings: -Warning 1264 Data truncated; out of range for column 'c13' at row 1 +Warning 1264 Out of range value adjusted for column 'c13' at row 1 Warning 1265 Data truncated for column 'c14' at row 1 Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: -Warning 1264 Data truncated; out of range for column 'c13' at row 1 +Warning 1264 Out of range value adjusted for column 'c13' at row 1 Warning 1265 Data truncated for column 'c14' at row 1 Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: -Warning 1264 Data truncated; out of range for column 'c13' at row 1 +Warning 1264 Out of range value adjusted for column 'c13' at row 1 Warning 1265 Data truncated for column 'c14' at row 1 Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: -Warning 1264 Data truncated; out of range for column 'c13' at row 1 +Warning 1264 Out of range value adjusted for column 'c13' at row 1 Warning 1265 Data truncated for column 'c14' at row 1 Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 Warnings: Warning 1265 Data truncated for column 'c15' at row 1 -Warning 1264 Data truncated; out of range for column 'c16' at row 1 -Warning 1264 Data truncated; out of range for column 'c17' at row 1 +Warning 1264 Out of range value adjusted for column 'c16' at row 1 +Warning 1264 Out of range value adjusted for column 'c17' at row 1 select c1, c13, c14, c15, c16, c17 from t9 order by c1 ; c1 c13 c14 c15 c16 c17 20 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 diff --git a/mysql-test/r/strict.result b/mysql-test/r/strict.result index 3d735be9423..a3a8c5e9fb2 100644 --- a/mysql-test/r/strict.result +++ b/mysql-test/r/strict.result @@ -531,6 +531,7 @@ Warning 1264 Out of range value adjusted for column 'col1' at row 1 Warning 1264 Out of range value adjusted for column 'col2' at row 1 Warning 1264 Out of range value adjusted for column 'col1' at row 2 Warning 1264 Out of range value adjusted for column 'col2' at row 2 +Warning 1264 Out of range value adjusted for column 'col2' at row 2 UPDATE IGNORE t1 SET col2=1/NULL where col1=0; SELECT * FROM t1; col1 col2 diff --git a/mysql-test/t/archive.test b/mysql-test/t/archive.test index f55aea6e104..e710de7b35e 100644 --- a/mysql-test/t/archive.test +++ b/mysql-test/t/archive.test @@ -1299,4 +1299,15 @@ INSERT INTO t2 VALUES (4,011403,37,'intercepted','audiology','tinily',''); SELECT * FROM t2; OPTIMIZE TABLE t2; SELECT * FROM t2; + +# +# Test bulk inserts +INSERT INTO t2 VALUES (1,000001,00,'Omaha','teethe','neat','') , (2,011401,37,'breaking','dreaded','Steinberg','W') , (3,011402,37,'Romans','scholastics','jarring','') , (4,011403,37,'intercepted','audiology','tinily',''); +SELECT * FROM t2; + +# Just test syntax, we will never know if the out put is right or wrong +INSERT DELAYED INTO t2 VALUES (4,011403,37,'intercepted','audiology','tinily',''); +# +# Cleanup, test is over +# drop table t1, t2; diff --git a/mysql-test/t/flush_read_lock_kill-master.opt b/mysql-test/t/flush_read_lock_kill-master.opt new file mode 100644 index 00000000000..e7fe203239c --- /dev/null +++ b/mysql-test/t/flush_read_lock_kill-master.opt @@ -0,0 +1 @@ +--debug=d,make_global_read_lock_block_commit_loop diff --git a/mysql-test/t/flush_read_lock_kill.test b/mysql-test/t/flush_read_lock_kill.test new file mode 100644 index 00000000000..b711bc63e0e --- /dev/null +++ b/mysql-test/t/flush_read_lock_kill.test @@ -0,0 +1,46 @@ +# Let's see if FLUSH TABLES WITH READ LOCK can be killed when waiting +# for running commits to finish (in the past it could not) +# This will not be a meaningful test on non-debug servers so will be +# skipped. +# If running mysql-test-run --debug, the --debug added by +# mysql-test-run to the mysqld command line will override the one of +# -master.opt. But this test is designed to still pass then (though it +# won't test anything interesting). + +-- source include/have_debug.inc + +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); +connection con1; + +--disable_warnings +drop table if exists t1; +--enable_warnings +create table t1 (kill_id int); +insert into t1 values(connection_id()); + +# Thanks to the parameter we passed to --debug, this FLUSH will +# block on a debug build running with our --debug=make_global... It +# will block until killed. In other cases (non-debug build or other +# --debug) it will succeed immediately + +connection con1; +send flush tables with read lock; + +# kill con1 +connection con2; +select ((@id := kill_id) - kill_id) from t1; + +--sleep 2; # leave time for FLUSH to block +kill connection @id; + +connection con1; +# On debug builds it will be error 1053 (killed); on non-debug, or +# debug build running without our --debug=make_global..., will be +# error 0 (no error). The only important thing to test is that on +# debug builds with our --debug=make_global... we don't hang forever. +--error 0,1053 +reap; + +connection con2; +drop table t1; diff --git a/mysql-test/t/mysqldump.test b/mysql-test/t/mysqldump.test index e8bb98d5ae0..7a811e69147 100644 --- a/mysql-test/t/mysqldump.test +++ b/mysql-test/t/mysqldump.test @@ -136,3 +136,13 @@ create view v1 as select * from t1; --exec $MYSQL_DUMP --skip-comments test drop view v1; drop table t1; + +# +# Bug #6101: create database problem +# + +--exec $MYSQL_DUMP --skip-comments --databases test + +create database mysqldump_test_db character set latin2 collate latin2_bin; +--exec $MYSQL_DUMP --skip-comments --databases mysqldump_test_db; +drop database mysqldump_test_db; diff --git a/mysql-test/t/ndb_charset.test b/mysql-test/t/ndb_charset.test index f1ec0485e12..1b9e7e8bfcc 100644 --- a/mysql-test/t/ndb_charset.test +++ b/mysql-test/t/ndb_charset.test @@ -157,3 +157,13 @@ select * from t1 where a = 'AaA' order by p; # 6 select * from t1 where a = 'AAA' order by p; drop table t1; + +# bug +create table t1 ( + a varchar(10) primary key +) engine=ndb; +insert into t1 values ('jonas % '); +replace into t1 values ('jonas % '); +replace into t1 values ('jonas % '); +select * from t1; +drop table t1; diff --git a/mysql-test/t/ps_7ndb.test b/mysql-test/t/ps_7ndb.test index 22370a7f3ac..b558f2f3c21 100644 --- a/mysql-test/t/ps_7ndb.test +++ b/mysql-test/t/ps_7ndb.test @@ -12,365 +12,11 @@ use test; -- source include/have_ndb.inc let $type= 'NDB' ; ---disable_warnings -drop table if exists t1, t9 ; ---enable_warnings -eval create table t1 -( - a int not null, b varchar(30), - primary key(a) -) engine = $type ; - ---disable_warnings -drop table if exists t9; ---enable_warnings -# The used table type doesn't support BLOB/TEXT columns. -# (The server would send error 1163 .) -# So we use char(100) instead. -eval create table t9 -( - c1 tinyint not null, c2 smallint, c3 mediumint, c4 int, - c5 integer, c6 bigint, c7 float, c8 double, - c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4), - c13 date, c14 datetime, c15 timestamp(14), c16 time, - c17 year, c18 bit, c19 bool, c20 char, - c21 char(10), c22 varchar(30), c23 char(100), c24 char(100), - c25 char(100), c26 char(100), c27 char(100), c28 char(100), - c29 char(100), c30 char(100), c31 enum('one', 'two', 'three'), - c32 set('monday', 'tuesday', 'wednesday'), - primary key(c1) -) engine = $type ; +-- source include/ps_create.inc -- source include/ps_renew.inc -- source include/ps_query.inc -# The following line is deactivated, because the ndb storage engine is not able -# to do primary key column updates . -#-- source include/ps_modify.inc -# let's include all statements which will work ---disable_query_log -select '------ delete tests ------' as test_sequence ; ---enable_query_log ---source include/ps_renew.inc - -## delete without parameter -prepare stmt1 from 'delete from t1 where a=2' ; -execute stmt1; -select a,b from t1 where a=2 order by b; -# delete with row not found -execute stmt1; - -## delete with one parameter in the where clause -insert into t1 values(0,NULL); -set @arg00=NULL; -prepare stmt1 from 'delete from t1 where b=?' ; -execute stmt1 using @arg00; -select a,b from t1 where b is NULL ; -set @arg00='one'; -execute stmt1 using @arg00; -select a,b from t1 where b=@arg00; - -## truncate a table ---error 1295 -prepare stmt1 from 'truncate table t1' ; - - ---disable_query_log -select '------ update tests ------' as test_sequence ; ---enable_query_log ---source include/ps_renew.inc - -## update without parameter -prepare stmt1 from 'update t1 set b=''a=two'' where a=2' ; -execute stmt1; -select a,b from t1 where a=2; -# dummy update -execute stmt1; -select a,b from t1 where a=2; - -## update with one parameter in the set clause -set @arg00=NULL; -prepare stmt1 from 'update t1 set b=? where a=2' ; -execute stmt1 using @arg00; -select a,b from t1 where a=2; -set @arg00='two'; -execute stmt1 using @arg00; -select a,b from t1 where a=2; - -## update with one parameter in the where cause -set @arg00=2; -prepare stmt1 from 'update t1 set b=NULL where a=?' ; -execute stmt1 using @arg00; -select a,b from t1 where a=@arg00; -update t1 set b='two' where a=@arg00; -# row not found in update -set @arg00=2000; -execute stmt1 using @arg00; -select a,b from t1 where a=@arg00; - -## update on primary key column (two parameters) -set @arg00=2; -set @arg01=22; -prepare stmt1 from 'update t1 set a=? where a=?' ; -# dummy update -execute stmt1 using @arg00, @arg00; -select a,b from t1 where a=@arg00; -# deactivated primary key column update -# execute stmt1 using @arg01, @arg00; -select a,b from t1 where a=@arg01; -execute stmt1 using @arg00, @arg01; -select a,b from t1 where a=@arg00; -set @arg00=NULL; -set @arg01=2; -# deactivated primary key column update -# execute stmt1 using @arg00, @arg01; -select a,b from t1 order by a; -set @arg00=0; -execute stmt1 using @arg01, @arg00; -select a,b from t1 order by a; - -## update with subquery and several parameters -set @arg00=23; -set @arg01='two'; -set @arg02=2; -set @arg03='two'; -set @arg04=2; ---disable_warnings -drop table if exists t2; ---enable_warnings -# t2 will be of table type 'MYISAM' -create table t2 as select a,b from t1 ; -prepare stmt1 from 'update t1 set a=? where b=? - and a in (select ? from t2 - where b = ? or a = ?)'; ---enable_info -# deactivated primary key column update -# execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; ---disable_info -select a,b from t1 where a = @arg00 ; -prepare stmt1 from 'update t1 set a=? where b=? - and a not in (select ? from t2 - where b = ? or a = ?)'; ---enable_info -execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; ---disable_info -select a,b from t1 order by a; -drop table t2 ; -# t2 is now of table type '$type' -# The test battery for table type 'MERGE' gets here only a 'MYISAM' table -eval create table t2 -( - a int not null, b varchar(30), - primary key(a) -) engine = $type ; -insert into t2(a,b) select a, b from t1 ; -prepare stmt1 from 'update t1 set a=? where b=? - and a in (select ? from t2 - where b = ? or a = ?)'; ---enable_info -# deactivated primary key column update -# execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; ---disable_info -select a,b from t1 where a = @arg00 ; -prepare stmt1 from 'update t1 set a=? where b=? - and a not in (select ? from t2 - where b = ? or a = ?)'; ---enable_info -execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; ---disable_info -select a,b from t1 order by a ; -drop table t2 ; - -## update with parameters in limit -set @arg00=1; -prepare stmt1 from 'update t1 set b=''bla'' -where a=2 -limit 1'; -execute stmt1 ; -select a,b from t1 where b = 'bla' ; -# currently (May 2004, Version 4.1) it is impossible --- error 1064 -prepare stmt1 from 'update t1 set b=''bla'' -where a=2 -limit ?'; - ---disable_query_log -select '------ insert tests ------' as test_sequence ; ---enable_query_log ---source include/ps_renew.inc - -## insert without parameter -prepare stmt1 from 'insert into t1 values(5, ''five'' )'; -execute stmt1; -select a,b from t1 where a = 5; - -## insert with one parameter in values part -set @arg00='six' ; -prepare stmt1 from 'insert into t1 values(6, ? )'; -execute stmt1 using @arg00; -select a,b from t1 where b = @arg00; -# the second insert fails, because the first column is primary key ---error 1062 -execute stmt1 using @arg00; -set @arg00=NULL ; -prepare stmt1 from 'insert into t1 values(0, ? )'; -execute stmt1 using @arg00; -select a,b from t1 where b is NULL; - -## insert with two parameter in values part -set @arg00=8 ; -set @arg01='eight' ; -prepare stmt1 from 'insert into t1 values(?, ? )'; -execute stmt1 using @arg00, @arg01 ; -select a,b from t1 where b = @arg01; -# cases derived from client_test.c: test_null() -set @NULL= null ; -set @arg00= 'abc' ; -# execute must fail, because first column is primary key (-> not null) ---error 1048 -execute stmt1 using @NULL, @NULL ; ---error 1048 -execute stmt1 using @NULL, @NULL ; ---error 1048 -execute stmt1 using @NULL, @arg00 ; ---error 1048 -execute stmt1 using @NULL, @arg00 ; -let $1 = 2; -while ($1) -{ - eval set @arg01= 10000 + $1 ; - execute stmt1 using @arg01, @arg00 ; - dec $1; -} -select * from t1 where a > 10000 order by a ; -delete from t1 where a > 10000 ; -let $1 = 2; -while ($1) -{ - eval set @arg01= 10000 + $1 ; - execute stmt1 using @arg01, @NULL ; - dec $1; -} -select * from t1 where a > 10000 order by a ; -delete from t1 where a > 10000 ; -let $1 = 10; -while ($1) -{ - eval set @arg01= 10000 + $1 ; - execute stmt1 using @arg01, @arg01 ; - dec $1; -} -select * from t1 where a > 10000 order by a ; -delete from t1 where a > 10000 ; - - -## insert with two rows in values part -set @arg00=81 ; -set @arg01='8-1' ; -set @arg02=82 ; -set @arg03='8-2' ; -prepare stmt1 from 'insert into t1 values(?,?),(?,?)'; -execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; -select a,b from t1 where a in (@arg00,@arg02) order by a ; - -## insert with two parameter in the set part -set @arg00=9 ; -set @arg01='nine' ; -prepare stmt1 from 'insert into t1 set a=?, b=? '; -execute stmt1 using @arg00, @arg01 ; -select a,b from t1 where a = @arg00 ; - -## insert with parameters in the ON DUPLICATE KEY part -set @arg00=6 ; -set @arg01=1 ; -prepare stmt1 from 'insert into t1 set a=?, b=''sechs'' - on duplicate key update a=a + ?, b=concat(b,''modified'') '; -# There is no primary key collision, so there will be no key column update -# If a key column update would be necessary occurs BUG#4312 -# deactivated, activate when BUG#4312: is solved -# execute stmt1 using @arg00, @arg01; -select * from t1 order by a; -set @arg00=81 ; -set @arg01=1 ; -# deactivated, activate when BUG#4312: is solved -# execute stmt1 using @arg00, @arg01; - -## insert, autoincrement column and ' SELECT LAST_INSERT_ID() ' -# cases derived from client_test.c: test_bug3117() ---disable_warnings -drop table if exists t2 ; ---enable_warnings -# The test battery for table type 'MERGE' gets here only a 'MYISAM' table -eval create table t2 (id int auto_increment primary key) -ENGINE= $type ; -prepare stmt1 from ' select last_insert_id() ' ; -insert into t2 values (NULL) ; -execute stmt1 ; -insert into t2 values (NULL) ; -execute stmt1 ; -drop table t2 ; - -## many parameters -set @1000=1000 ; -set @x1000_2="x1000_2" ; -set @x1000_3="x1000_3" ; - -set @x1000="x1000" ; -set @1100=1100 ; -set @x1100="x1100" ; -set @100=100 ; -set @updated="updated" ; -insert into t1 values(1000,'x1000_1') ; -# deactivated, activate when BUG#4312: is solved -# insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3) -# on duplicate key update a = a + @100, b = concat(b,@updated) ; -select a,b from t1 where a >= 1000 order by a ; -delete from t1 where a >= 1000 ; -insert into t1 values(1000,'x1000_1') ; -prepare stmt1 from ' insert into t1 values(?,?),(?,?) - on duplicate key update a = a + ?, b = concat(b,?) '; -# deactivated, activate when BUG#4312: is solved -# execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ; -select a,b from t1 where a >= 1000 order by a ; -delete from t1 where a >= 1000 ; -insert into t1 values(1000,'x1000_1') ; -# deactivated, activate when BUG#4312: is solved -# execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ; -select a,b from t1 where a >= 1000 order by a ; -delete from t1 where a >= 1000 ; - -## replace -prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' '; ---error 1031 - -## multi table statements ---disable_query_log -select '------ multi table tests ------' as test_sequence ; ---enable_query_log -# cases derived from client_test.c: test_multi -delete from t1 ; -delete from t9 ; -insert into t1(a,b) values (1, 'one'), (2, 'two'), (3, 'three') ; -insert into t9 (c1,c21) - values (1, 'one'), (2, 'two'), (3, 'three') ; -prepare stmt_delete from " delete t1, t9 - from t1, t9 where t1.a=t9.c1 and t1.b='updated' "; -prepare stmt_update from " update t1, t9 - set t1.b='updated', t9.c21='updated' - where t1.a=t9.c1 and t1.a=? "; -prepare stmt_select1 from " select a, b from t1 order by a" ; -prepare stmt_select2 from " select c1, c21 from t9 order by c1" ; -set @arg00= 1 ; -let $1= 3 ; -while ($1) -{ - execute stmt_update using @arg00 ; - execute stmt_delete ; - execute stmt_select1 ; - execute stmt_select2 ; - set @arg00= @arg00 + 1 ; - dec $1 ; -} - +-- source include/ps_modify.inc -- source include/ps_modify1.inc -- source include/ps_conv.inc diff --git a/ndb/include/kernel/signaldata/BackupImpl.hpp b/ndb/include/kernel/signaldata/BackupImpl.hpp index 1872069daa7..2ac91570aad 100644 --- a/ndb/include/kernel/signaldata/BackupImpl.hpp +++ b/ndb/include/kernel/signaldata/BackupImpl.hpp @@ -78,15 +78,15 @@ public: STATIC_CONST( SignalLength = 3 ); enum ErrorCode { - Undefined = 200, - FailedToAllocateBuffers = 202, - FailedToSetupFsBuffers = 203, - FailedToAllocateTables = 204, - FailedInsertFileHeader = 205, - FailedInsertTableList = 206, - FailedAllocateTableMem = 207, - FailedToAllocateFileRecord = 208, - FailedToAllocateAttributeRecord = 209 + Undefined = 1340, + FailedToAllocateBuffers = 1342, + FailedToSetupFsBuffers = 1343, + FailedToAllocateTables = 1344, + FailedInsertFileHeader = 1345, + FailedInsertTableList = 1346, + FailedAllocateTableMem = 1347, + FailedToAllocateFileRecord = 1348, + FailedToAllocateAttributeRecord = 1349 }; private: Uint32 backupId; diff --git a/ndb/include/kernel/signaldata/BackupSignalData.hpp b/ndb/include/kernel/signaldata/BackupSignalData.hpp index 42eb8464d53..fb018026a49 100644 --- a/ndb/include/kernel/signaldata/BackupSignalData.hpp +++ b/ndb/include/kernel/signaldata/BackupSignalData.hpp @@ -119,12 +119,13 @@ public: private: enum ErrorCodes { - Undefined = 100, - IAmNotMaster = 101, - OutOfBackupRecord = 102, - OutOfResources = 103, - SequenceFailure = 104, - BackupDefinitionNotImplemented = 105 + Undefined = 1300, + IAmNotMaster = 1301, + OutOfBackupRecord = 1302, + OutOfResources = 1303, + SequenceFailure = 1304, + BackupDefinitionNotImplemented = 1305, + CannotBackupDiskless = 1306 }; Uint32 senderData; Uint32 errorCode; @@ -232,13 +233,13 @@ public: STATIC_CONST( SignalLength = 3 ); enum RequestType { - ClientAbort = 1, - BackupComplete = 2, - BackupFailure = 3, // General backup failure coordinator -> slave - LogBufferFull = 4, // slave -> coordinator - FileOrScanError = 5, // slave -> coordinator - BackupFailureDueToNodeFail = 6, // slave -> slave - OkToClean = 7 // master -> slave + ClientAbort = 1321, + BackupComplete = 1322, + BackupFailure = 1323, // General backup failure coordinator -> slave + LogBufferFull = 1324, // slave -> coordinator + FileOrScanError = 1325, // slave -> coordinator + BackupFailureDueToNodeFail = 1326, // slave -> slave + OkToClean = 1327 // master -> slave }; private: Uint32 requestType; diff --git a/ndb/include/mgmapi/mgmapi.h b/ndb/include/mgmapi/mgmapi.h index a23417f153a..dc4f745adb2 100644 --- a/ndb/include/mgmapi/mgmapi.h +++ b/ndb/include/mgmapi/mgmapi.h @@ -375,6 +375,7 @@ extern "C" { int ndb_mgm_get_configuration_nodeid(NdbMgmHandle handle); int ndb_mgm_get_connected_port(NdbMgmHandle handle); const char *ndb_mgm_get_connected_host(NdbMgmHandle handle); + const char *ndb_mgm_get_connectstring(NdbMgmHandle handle, char *buf, int buf_sz); /** * Destroy a management server handle @@ -746,6 +747,7 @@ extern "C" { int ndb_mgm_get_string_parameter(const ndb_mgm_configuration_iterator*, int param, const char ** value); int ndb_mgm_purge_stale_sessions(NdbMgmHandle handle, char **); + int ndb_mgm_check_connection(NdbMgmHandle handle); #ifdef __cplusplus } #endif diff --git a/ndb/include/mgmcommon/ConfigRetriever.hpp b/ndb/include/mgmcommon/ConfigRetriever.hpp index 80449628867..8461658748e 100644 --- a/ndb/include/mgmcommon/ConfigRetriever.hpp +++ b/ndb/include/mgmcommon/ConfigRetriever.hpp @@ -72,6 +72,7 @@ public: Uint32 get_mgmd_port() const; const char *get_mgmd_host() const; + const char *get_connectstring(char *buf, int buf_sz) const; Uint32 get_configuration_nodeid() const; private: diff --git a/ndb/include/ndbapi/ndb_cluster_connection.hpp b/ndb/include/ndbapi/ndb_cluster_connection.hpp index 59d5a038844..6fa25caf5d0 100644 --- a/ndb/include/ndbapi/ndb_cluster_connection.hpp +++ b/ndb/include/ndbapi/ndb_cluster_connection.hpp @@ -30,12 +30,14 @@ class Ndb_cluster_connection { public: Ndb_cluster_connection(const char * connect_string = 0); ~Ndb_cluster_connection(); - int connect(int reconnect= 0); + int connect(int no_retries, int retry_delay_in_seconds, int verbose); int start_connect_thread(int (*connect_callback)(void)= 0); + const char *get_connectstring(char *buf, int buf_sz) const; + int get_connected_port() const; + const char *get_connected_host() const; private: friend void* run_ndb_cluster_connection_connect_thread(void*); void connect_thread(); - char *m_connect_string; TransporterFacade *m_facade; ConfigRetriever *m_config_retriever; NdbThread *m_connect_thread; diff --git a/ndb/include/ndbapi/ndberror.h b/ndb/include/ndbapi/ndberror.h index 5c2d85b82a6..ceb1881a4cc 100644 --- a/ndb/include/ndbapi/ndberror.h +++ b/ndb/include/ndbapi/ndberror.h @@ -46,7 +46,8 @@ typedef enum ndberror_cl_internal_error = 12, ndberror_cl_function_not_implemented = 13, ndberror_cl_unknown_error_code = 14, - ndberror_cl_node_shutdown = 15 + ndberror_cl_node_shutdown = 15, + ndberror_cl_configuration = 16 } ndberror_classification_enum; diff --git a/ndb/src/common/logger/Logger.cpp b/ndb/src/common/logger/Logger.cpp index 1dc3bd43716..f6f70fbeff7 100644 --- a/ndb/src/common/logger/Logger.cpp +++ b/ndb/src/common/logger/Logger.cpp @@ -174,7 +174,7 @@ Logger::addHandler(const BaseString &logstring) { logstring.split(logdest, ";"); for(i = 0; i < logdest.size(); i++) { - DBUG_PRINT("info",("adding: %s",logdest[i])); + DBUG_PRINT("info",("adding: %s",logdest[i].c_str())); Vector v_type_args; logdest[i].split(v_type_args, ":", 2); diff --git a/ndb/src/common/mgmcommon/ConfigRetriever.cpp b/ndb/src/common/mgmcommon/ConfigRetriever.cpp index 0af5eb2f83c..744412870f5 100644 --- a/ndb/src/common/mgmcommon/ConfigRetriever.cpp +++ b/ndb/src/common/mgmcommon/ConfigRetriever.cpp @@ -90,6 +90,11 @@ const char *ConfigRetriever::get_mgmd_host() const return ndb_mgm_get_connected_host(m_handle); } +const char *ConfigRetriever::get_connectstring(char *buf, int buf_sz) const +{ + return ndb_mgm_get_connectstring(m_handle, buf, buf_sz); +} + //**************************************************************************** //**************************************************************************** diff --git a/ndb/src/kernel/blocks/backup/Backup.cpp b/ndb/src/kernel/blocks/backup/Backup.cpp index e6fe63d9014..9fc00883792 100644 --- a/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/ndb/src/kernel/blocks/backup/Backup.cpp @@ -863,6 +863,13 @@ Backup::execBACKUP_REQ(Signal* signal) sendBackupRef(senderRef, signal, senderData, BackupRef::IAmNotMaster); return; }//if + + if (m_diskless) + { + sendBackupRef(senderRef, signal, senderData, + BackupRef::CannotBackupDiskless); + return; + } if(dataLen32 != 0) { jam(); diff --git a/ndb/src/kernel/blocks/backup/Backup.hpp b/ndb/src/kernel/blocks/backup/Backup.hpp index 4dc2cd13ae0..fb29cb03b96 100644 --- a/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/ndb/src/kernel/blocks/backup/Backup.hpp @@ -526,6 +526,7 @@ public: NdbNodeBitmask c_aliveNodes; DLList c_backups; Config c_defaults; + Uint32 m_diskless; STATIC_CONST(NO_OF_PAGES_META_FILE = 2); diff --git a/ndb/src/kernel/blocks/backup/BackupInit.cpp b/ndb/src/kernel/blocks/backup/BackupInit.cpp index 8daad05558b..d98541f2ea8 100644 --- a/ndb/src/kernel/blocks/backup/BackupInit.cpp +++ b/ndb/src/kernel/blocks/backup/BackupInit.cpp @@ -42,6 +42,7 @@ Backup::Backup(const Configuration & conf) : ndbrequire(p != 0); Uint32 noBackups = 0, noTables = 0, noAttribs = 0; + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &m_diskless)); ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_BACKUPS, &noBackups); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, &noTables)); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_ATTRIBUTES, &noAttribs)); diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 9278a6b16e3..21be6dbfe34 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -7286,6 +7286,8 @@ void Dblqh::closeScanRequestLab(Signal* signal) scanptr.p->m_curr_batch_size_rows = 0; scanptr.p->m_curr_batch_size_bytes= 0; sendScanFragConf(signal, ZTRUE); + abort_scan(signal, scanptr.i, 0); + return; break; case TcConnectionrec::SCAN_TUPKEY: case TcConnectionrec::SCAN_FIRST_STOPPED: @@ -7675,14 +7677,18 @@ void Dblqh::abort_scan(Signal* signal, Uint32 scan_ptr_i, Uint32 errcode){ releaseScanrec(signal); tcConnectptr.p->transactionState = TcConnectionrec::IDLE; tcConnectptr.p->abortState = TcConnectionrec::ABORT_ACTIVE; - - ScanFragRef * ref = (ScanFragRef*)&signal->theData[0]; - ref->senderData = tcConnectptr.p->clientConnectrec; - ref->transId1 = tcConnectptr.p->transid[0]; - ref->transId2 = tcConnectptr.p->transid[1]; - ref->errorCode = errcode; - sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGREF, signal, - ScanFragRef::SignalLength, JBB); + + if(errcode) + { + jam(); + ScanFragRef * ref = (ScanFragRef*)&signal->theData[0]; + ref->senderData = tcConnectptr.p->clientConnectrec; + ref->transId1 = tcConnectptr.p->transid[0]; + ref->transId2 = tcConnectptr.p->transid[1]; + ref->errorCode = errcode; + sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGREF, signal, + ScanFragRef::SignalLength, JBB); + } deleteTransidHash(signal); releaseOprec(signal); releaseTcrec(signal, tcConnectptr); diff --git a/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp b/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp index b2c3634ae9e..8c1ba24d710 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp @@ -706,7 +706,10 @@ Dbtup::checkUpdateOfPrimaryKey(Uint32* updateBuffer, Tablerec* const regTabPtr) tOutBufIndex = 0; tMaxRead = MAX_KEY_SIZE_IN_WORDS; + bool tmp = tXfrmFlag; + tXfrmFlag = false; ndbrequire((this->*f)(&keyReadBuffer[0], ahOut, attrDescriptor, attributeOffset)); + tXfrmFlag = tmp; ndbrequire(tOutBufIndex == ahOut->getDataSize()); if (ahIn.getDataSize() != ahOut->getDataSize()) { ljam(); diff --git a/ndb/src/mgmapi/LocalConfig.cpp b/ndb/src/mgmapi/LocalConfig.cpp index 8f1e2ee8100..1dc805557ee 100644 --- a/ndb/src/mgmapi/LocalConfig.cpp +++ b/ndb/src/mgmapi/LocalConfig.cpp @@ -298,13 +298,21 @@ char * LocalConfig::makeConnectString(char *buf, int sz) { int p= BaseString::snprintf(buf,sz,"nodeid=%d", _ownNodeId); - for (int i = 0; (i < ids.size()) && (sz-p > 0); i++) - { - if (ids[i].type != MgmId_TCP) - continue; - p+=BaseString::snprintf(buf+p,sz-p,",%s:%d", - ids[i].name.c_str(), ids[i].port); - } + if (p < sz) + for (unsigned i = 0; i < ids.size(); i++) + { + if (ids[i].type != MgmId_TCP) + continue; + int new_p= p+BaseString::snprintf(buf+p,sz-p,",%s:%d", + ids[i].name.c_str(), ids[i].port); + if (new_p < sz) + p= new_p; + else + { + buf[p]= 0; + break; + } + } buf[sz-1]=0; return buf; } diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp index 831d14eac52..651a4a8a725 100644 --- a/ndb/src/mgmapi/mgmapi.cpp +++ b/ndb/src/mgmapi/mgmapi.cpp @@ -84,7 +84,6 @@ typedef Parser Parser_t; #define NDB_MGM_MAX_ERR_DESC_SIZE 256 struct ndb_mgm_handle { - char * connectstring; int cfg_i; int connected; @@ -1677,6 +1676,12 @@ const char *ndb_mgm_get_connected_host(NdbMgmHandle handle) return handle->cfg.ids[handle->cfg_i].name.c_str(); } +extern "C" +const char *ndb_mgm_get_connectstring(NdbMgmHandle handle, char *buf, int buf_sz) +{ + return handle->cfg.makeConnectString(buf,buf_sz); +} + extern "C" int ndb_mgm_alloc_nodeid(NdbMgmHandle handle, unsigned int version, int nodetype) @@ -1934,4 +1939,38 @@ ndb_mgm_purge_stale_sessions(NdbMgmHandle handle, char **purged){ return res; } +extern "C" +int +ndb_mgm_check_connection(NdbMgmHandle handle){ + CHECK_HANDLE(handle, 0); + CHECK_CONNECTED(handle, 0); + SocketOutputStream out(handle->socket); + SocketInputStream in(handle->socket, handle->read_timeout); + char buf[32]; + + if (out.println("check connection")) + goto ndb_mgm_check_connection_error; + + if (out.println("")) + goto ndb_mgm_check_connection_error; + + in.gets(buf, sizeof(buf)); + if(strcmp("check connection reply\n", buf)) + goto ndb_mgm_check_connection_error; + + in.gets(buf, sizeof(buf)); + if(strcmp("result: Ok\n", buf)) + goto ndb_mgm_check_connection_error; + + in.gets(buf, sizeof(buf)); + if(strcmp("\n", buf)) + goto ndb_mgm_check_connection_error; + + return 0; + +ndb_mgm_check_connection_error: + ndb_mgm_disconnect(handle); + return -1; +} + template class Vector*>; diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index bfe8b6786b4..7ef62da9bb3 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -54,10 +54,11 @@ public: * * @return true until quit/bye/exit has been typed */ - int execute(const char *_line, int _try_reconnect=-1); + int execute(const char *_line, int _try_reconnect=-1, int *error= 0); private: void printError(); + int execute_impl(const char *_line); /** * Analyse the command line, after the first token. @@ -96,7 +97,7 @@ private: void executeShow(char* parameters); void executeConnect(char* parameters); void executePurge(char* parameters); - void executeShutdown(char* parameters); + int executeShutdown(char* parameters); void executeRun(char* parameters); void executeInfo(char* parameters); void executeClusterLog(char* parameters); @@ -121,7 +122,7 @@ public: void executeStatus(int processId, const char* parameters, bool all); void executeEventReporting(int processId, const char* parameters, bool all); void executeDumpState(int processId, const char* parameters, bool all); - void executeStartBackup(char * parameters); + int executeStartBackup(char * parameters); void executeAbortBackup(char * parameters); void executeRep(char* parameters); @@ -156,6 +157,7 @@ private: bool connected; int m_verbose; int try_reconnect; + int m_error; #ifdef HAVE_GLOBAL_REPLICATION NdbRepHandle m_repserver; const char *rep_host; @@ -179,9 +181,9 @@ Ndb_mgmclient::~Ndb_mgmclient() { delete m_cmd; } -int Ndb_mgmclient::execute(const char *_line, int _try_reconnect) +int Ndb_mgmclient::execute(const char *_line, int _try_reconnect, int *error) { - return m_cmd->execute(_line,_try_reconnect); + return m_cmd->execute(_line,_try_reconnect,error); } int Ndb_mgmclient::disconnect() @@ -227,7 +229,7 @@ extern "C" { #include #include -int Ndb_mgmclient::execute(int argc, char** argv, int _try_reconnect) +int Ndb_mgmclient::execute(int argc, char** argv, int _try_reconnect, int *error) { if (argc <= 0) return 0; @@ -236,7 +238,7 @@ int Ndb_mgmclient::execute(int argc, char** argv, int _try_reconnect) { _line.appfmt(" %s", argv[i]); } - return m_cmd->execute(_line.c_str(),_try_reconnect); + return m_cmd->execute(_line.c_str(),_try_reconnect, error); } /***************************************************************************** @@ -277,7 +279,7 @@ static const char* helpText = "REP CONNECT Connect to REP server on host:port\n" #endif "PURGE STALE SESSIONS Reset reserved nodeid's in the mgmt server\n" -"CONNECT Connect to management server (reconnect if already connected)\n" +"CONNECT [] Connect to management server (reconnect if already connected)\n" "QUIT Quit management client\n" ; @@ -427,6 +429,8 @@ emptyString(const char* s) void CommandInterpreter::printError() { + if (ndb_mgm_check_connection(m_mgmsrv)) + connected= false; ndbout_c("* %5d: %s", ndb_mgm_get_latest_error(m_mgmsrv), ndb_mgm_get_latest_error_msg(m_mgmsrv)); @@ -469,13 +473,24 @@ CommandInterpreter::disconnect() //***************************************************************************** int -CommandInterpreter::execute(const char *_line, int _try_reconnect) +CommandInterpreter::execute(const char *_line, int _try_reconnect, + int *error) { - DBUG_ENTER("CommandInterpreter::execute"); - DBUG_PRINT("info",("line=\"%s\"",_line)); - if (_try_reconnect >= 0) try_reconnect=_try_reconnect; + int result= execute_impl(_line); + if (error) + *error= m_error; + return result; +} + +int +CommandInterpreter::execute_impl(const char *_line) +{ + DBUG_ENTER("CommandInterpreter::execute_impl"); + DBUG_PRINT("enter",("line=\"%s\"",_line)); + m_error= 0; + char * line; if(_line == NULL) { // ndbout << endl; @@ -488,84 +503,80 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect) DBUG_RETURN(true); } - for (unsigned int i = 0; i < strlen(line); ++i) { - line[i] = toupper(line[i]); - } - // if there is anything in the line proceed char* firstToken = strtok(line, " "); char* allAfterFirstToken = strtok(NULL, ""); - if (strcmp(firstToken, "HELP") == 0 || - strcmp(firstToken, "?") == 0) { + if (strcasecmp(firstToken, "HELP") == 0 || + strcasecmp(firstToken, "?") == 0) { executeHelp(allAfterFirstToken); DBUG_RETURN(true); } - else if (strcmp(firstToken, "CONNECT") == 0) { + else if (strcasecmp(firstToken, "CONNECT") == 0) { executeConnect(allAfterFirstToken); DBUG_RETURN(true); } + else if((strcasecmp(firstToken, "QUIT") == 0 || + strcasecmp(firstToken, "EXIT") == 0 || + strcasecmp(firstToken, "BYE") == 0) && + allAfterFirstToken == NULL){ + DBUG_RETURN(false); + } if (!connect()) DBUG_RETURN(true); - if (strcmp(firstToken, "SHOW") == 0) { + if (strcasecmp(firstToken, "SHOW") == 0) { executeShow(allAfterFirstToken); DBUG_RETURN(true); } - else if (strcmp(firstToken, "SHUTDOWN") == 0) { - executeShutdown(allAfterFirstToken); + else if (strcasecmp(firstToken, "SHUTDOWN") == 0) { + m_error= executeShutdown(allAfterFirstToken); DBUG_RETURN(true); } - else if (strcmp(firstToken, "CLUSTERLOG") == 0){ + else if (strcasecmp(firstToken, "CLUSTERLOG") == 0){ executeClusterLog(allAfterFirstToken); DBUG_RETURN(true); } - else if(strcmp(firstToken, "START") == 0 && + else if(strcasecmp(firstToken, "START") == 0 && allAfterFirstToken != NULL && - strncmp(allAfterFirstToken, "BACKUP", sizeof("BACKUP") - 1) == 0){ - executeStartBackup(allAfterFirstToken); + strncasecmp(allAfterFirstToken, "BACKUP", sizeof("BACKUP") - 1) == 0){ + m_error= executeStartBackup(allAfterFirstToken); DBUG_RETURN(true); } - else if(strcmp(firstToken, "ABORT") == 0 && + else if(strcasecmp(firstToken, "ABORT") == 0 && allAfterFirstToken != NULL && - strncmp(allAfterFirstToken, "BACKUP", sizeof("BACKUP") - 1) == 0){ + strncasecmp(allAfterFirstToken, "BACKUP", sizeof("BACKUP") - 1) == 0){ executeAbortBackup(allAfterFirstToken); DBUG_RETURN(true); } - else if (strcmp(firstToken, "PURGE") == 0) { + else if (strcasecmp(firstToken, "PURGE") == 0) { executePurge(allAfterFirstToken); DBUG_RETURN(true); } #ifdef HAVE_GLOBAL_REPLICATION - else if(strcmp(firstToken, "REPLICATION") == 0 || - strcmp(firstToken, "REP") == 0) { + else if(strcasecmp(firstToken, "REPLICATION") == 0 || + strcasecmp(firstToken, "REP") == 0) { executeRep(allAfterFirstToken); DBUG_RETURN(true); } #endif // HAVE_GLOBAL_REPLICATION - else if(strcmp(firstToken, "ENTER") == 0 && + else if(strcasecmp(firstToken, "ENTER") == 0 && allAfterFirstToken != NULL && - strncmp(allAfterFirstToken, "SINGLE USER MODE ", + strncasecmp(allAfterFirstToken, "SINGLE USER MODE ", sizeof("SINGLE USER MODE") - 1) == 0){ executeEnterSingleUser(allAfterFirstToken); DBUG_RETURN(true); } - else if(strcmp(firstToken, "EXIT") == 0 && + else if(strcasecmp(firstToken, "EXIT") == 0 && allAfterFirstToken != NULL && - strncmp(allAfterFirstToken, "SINGLE USER MODE ", + strncasecmp(allAfterFirstToken, "SINGLE USER MODE ", sizeof("SINGLE USER MODE") - 1) == 0){ executeExitSingleUser(allAfterFirstToken); DBUG_RETURN(true); } - else if (strcmp(firstToken, "ALL") == 0) { + else if (strcasecmp(firstToken, "ALL") == 0) { analyseAfterFirstToken(-1, allAfterFirstToken); - } - else if((strcmp(firstToken, "QUIT") == 0 || - strcmp(firstToken, "EXIT") == 0 || - strcmp(firstToken, "BYE") == 0) && - allAfterFirstToken == NULL){ - DBUG_RETURN(false); } else { /** * First token should be a digit, node ID @@ -573,12 +584,12 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect) int nodeId; if (! convert(firstToken, nodeId)) { - ndbout << "Invalid command: " << line << endl; + ndbout << "Invalid command: " << _line << endl; ndbout << "Type HELP for help." << endl << endl; DBUG_RETURN(true); } - if (nodeId < 0) { + if (nodeId <= 0) { ndbout << "Invalid node ID: " << firstToken << "." << endl; DBUG_RETURN(true); } @@ -639,7 +650,7 @@ CommandInterpreter::analyseAfterFirstToken(int processId, ExecuteFunction fun = 0; const char * command = 0; for(int i = 0; i*fun)(processId, allAfterSecondToken, false); ndbout << endl; @@ -705,10 +716,10 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun, const char * allAfterSecondToken) { int nodeId = 0; - if(strcmp(cmd, "STOP") == 0) { + if(strcasecmp(cmd, "STOP") == 0) { ndbout_c("Executing STOP on all nodes."); (this->*fun)(nodeId, allAfterSecondToken, true); - } else if(strcmp(cmd, "RESTART") == 0) { + } else if(strcasecmp(cmd, "RESTART") == 0) { ndbout_c("Executing RESTART on all nodes."); ndbout_c("Starting shutdown. This may take a while. Please wait..."); (this->*fun)(nodeId, allAfterSecondToken, true); @@ -723,7 +734,7 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun, } NdbAutoPtr ap1((char*)cl); while(get_next_nodeid(cl, &nodeId, NDB_MGM_NODE_TYPE_NDB)) { - if(strcmp(cmd, "STATUS") != 0) + if(strcasecmp(cmd, "STATUS") != 0) ndbout_c("Executing %s on node %d.", cmd, nodeId); (this->*fun)(nodeId, allAfterSecondToken, true); ndbout << endl; @@ -751,7 +762,7 @@ CommandInterpreter::parseBlockSpecification(const char* allAfterLog, firstTokenAfterLog[i] = toupper(firstTokenAfterLog[i]); } - if (strcmp(firstTokenAfterLog, "BLOCK") != 0) { + if (strcasecmp(firstTokenAfterLog, "BLOCK") != 0) { ndbout << "Unexpected value: " << firstTokenAfterLog << ". Expected BLOCK." << endl; return false; @@ -764,7 +775,7 @@ CommandInterpreter::parseBlockSpecification(const char* allAfterLog, } char* secondTokenAfterLog = strtok(allAfterFirstToken, " "); - if (strcmp(secondTokenAfterLog, "=") != 0) { + if (strcasecmp(secondTokenAfterLog, "=") != 0) { ndbout << "Unexpected value: " << secondTokenAfterLog << ". Expected =." << endl; return false; @@ -772,7 +783,7 @@ CommandInterpreter::parseBlockSpecification(const char* allAfterLog, char* blockName = strtok(NULL, " "); bool all = false; - if (blockName != NULL && (strcmp(blockName, "ALL") == 0)) { + if (blockName != NULL && (strcasecmp(blockName, "ALL") == 0)) { all = true; } while (blockName != NULL) { @@ -823,15 +834,15 @@ CommandInterpreter::executeHelp(char* parameters) ndbout << " = " << "0 - 15" << endl; ndbout << " = " << "ALL | Any database node id" << endl; ndbout << endl; - } else if (strcmp(parameters, "SHOW") == 0) { + } else if (strcasecmp(parameters, "SHOW") == 0) { ndbout << helpTextShow; #ifdef HAVE_GLOBAL_REPLICATION - } else if (strcmp(parameters, "REPLICATION") == 0 || - strcmp(parameters, "REP") == 0) { + } else if (strcasecmp(parameters, "REPLICATION") == 0 || + strcasecmp(parameters, "REP") == 0) { ndbout << helpTextRep; #endif // HAVE_GLOBAL_REPLICATION #ifdef VM_TRACE // DEBUG ONLY - } else if (strcmp(parameters, "DEBUG") == 0) { + } else if (strcasecmp(parameters, "DEBUG") == 0) { ndbout << helpTextDebug; #endif } else { @@ -845,23 +856,23 @@ CommandInterpreter::executeHelp(char* parameters) * SHUTDOWN *****************************************************************************/ -void +int CommandInterpreter::executeShutdown(char* parameters) { ndb_mgm_cluster_state *state = ndb_mgm_get_status(m_mgmsrv); if(state == NULL) { ndbout_c("Could not get status"); printError(); - return; + return 1; } NdbAutoPtr ap1((char*)state); int result = 0; result = ndb_mgm_stop(m_mgmsrv, 0, 0); if (result < 0) { - ndbout << "Shutdown failed." << endl; + ndbout << "Shutdown off NDB Cluster storage node(s) failed." << endl; printError(); - return; + return result; } ndbout << result << " NDB Cluster storage node(s) have shutdown." << endl; @@ -876,21 +887,23 @@ CommandInterpreter::executeShutdown(char* parameters) ndbout << "Unable to locate management server, " << "shutdown manually with STOP" << endl; - return; + return 1; } } } - result = 0; result = ndb_mgm_stop(m_mgmsrv, 1, &mgm_id); if (result <= 0) { - ndbout << "Shutdown failed." << endl; + ndbout << "Shutdown of NDB Cluster management server failed." << endl; printError(); - return; + if (result == 0) + return 1; + return result; } + connected = false; ndbout << "NDB Cluster management server shutdown." << endl; - exit(0); + return 0; } /***************************************************************************** @@ -939,7 +952,7 @@ print_nodes(ndb_mgm_cluster_state *state, ndb_mgm_configuration_iterator *it, const char *hostname= node_state->connect_address; if (hostname == 0 || strlen(hostname) == 0 - || strcmp(hostname,"0.0.0.0") == 0) + || strcasecmp(hostname,"0.0.0.0") == 0) ndbout << " "; else ndbout << "\t@" << hostname; @@ -984,9 +997,9 @@ CommandInterpreter::executePurge(char* parameters) break; char* firstToken = strtok(parameters, " "); char* nextToken = strtok(NULL, " \0"); - if (strcmp(firstToken,"STALE") == 0 && + if (strcasecmp(firstToken,"STALE") == 0 && nextToken && - strcmp(nextToken, "SESSIONS") == 0) { + strcasecmp(nextToken, "SESSIONS") == 0) { command_ok= 1; break; } @@ -1019,9 +1032,6 @@ CommandInterpreter::executeShow(char* parameters) { int i; if (emptyString(parameters)) { - ndbout << "Cluster Configuration" << endl - << "---------------------" << endl; - ndb_mgm_cluster_state *state = ndb_mgm_get_status(m_mgmsrv); if(state == NULL) { ndbout_c("Could not get status"); @@ -1081,22 +1091,24 @@ CommandInterpreter::executeShow(char* parameters) } } + ndbout << "Cluster Configuration" << endl + << "---------------------" << endl; print_nodes(state, it, "ndbd", ndb_nodes, NDB_MGM_NODE_TYPE_NDB, master_id); print_nodes(state, it, "ndb_mgmd", mgm_nodes, NDB_MGM_NODE_TYPE_MGM, 0); print_nodes(state, it, "mysqld", api_nodes, NDB_MGM_NODE_TYPE_API, 0); // ndbout << helpTextShow; return; - } else if (strcmp(parameters, "PROPERTIES") == 0 || - strcmp(parameters, "PROP") == 0) { + } else if (strcasecmp(parameters, "PROPERTIES") == 0 || + strcasecmp(parameters, "PROP") == 0) { ndbout << "SHOW PROPERTIES is not yet implemented." << endl; // ndbout << "_mgmtSrvr.getConfig()->print();" << endl; /* XXX */ - } else if (strcmp(parameters, "CONFIGURATION") == 0 || - strcmp(parameters, "CONFIG") == 0){ + } else if (strcasecmp(parameters, "CONFIGURATION") == 0 || + strcasecmp(parameters, "CONFIG") == 0){ ndbout << "SHOW CONFIGURATION is not yet implemented." << endl; //nbout << "_mgmtSrvr.getConfig()->printConfigFile();" << endl; /* XXX */ - } else if (strcmp(parameters, "PARAMETERS") == 0 || - strcmp(parameters, "PARAMS") == 0 || - strcmp(parameters, "PARAM") == 0) { + } else if (strcasecmp(parameters, "PARAMETERS") == 0 || + strcasecmp(parameters, "PARAMS") == 0 || + strcasecmp(parameters, "PARAM") == 0) { ndbout << "SHOW PARAMETERS is not yet implemented." << endl; // ndbout << "_mgmtSrvr.getConfig()->getConfigInfo()->print();" // << endl; /* XXX */ @@ -1109,6 +1121,14 @@ void CommandInterpreter::executeConnect(char* parameters) { disconnect(); + if (!emptyString(parameters)) { + if (ndb_mgm_set_connectstring(m_mgmsrv, + BaseString(parameters).trim().c_str())) + { + printError(); + return; + } + } connect(); } @@ -1132,7 +1152,7 @@ CommandInterpreter::executeClusterLog(char* parameters) /******************** * CLUSTERLOG FILTER ********************/ - if (strcmp(item, "FILTER") == 0) { + if (strcasecmp(item, "FILTER") == 0) { item = strtok_r(NULL, " ", &tmpPtr); if (item == NULL) { @@ -1141,21 +1161,21 @@ CommandInterpreter::executeClusterLog(char* parameters) while (item != NULL) { snprintf(name, sizeof(name), item); - if (strcmp(item, "ALL") == 0) { + if (strcasecmp(item, "ALL") == 0) { severity = NDB_MGM_CLUSTERLOG_ALL; - } else if (strcmp(item, "ALERT") == 0) { + } else if (strcasecmp(item, "ALERT") == 0) { severity = NDB_MGM_CLUSTERLOG_ALERT; - } else if (strcmp(item, "CRITICAL") == 0) { + } else if (strcasecmp(item, "CRITICAL") == 0) { severity = NDB_MGM_CLUSTERLOG_CRITICAL; - } else if (strcmp(item, "ERROR") == 0) { + } else if (strcasecmp(item, "ERROR") == 0) { severity = NDB_MGM_CLUSTERLOG_ERROR; - } else if (strcmp(item, "WARNING") == 0) { + } else if (strcasecmp(item, "WARNING") == 0) { severity = NDB_MGM_CLUSTERLOG_WARNING; - } else if (strcmp(item, "INFO") == 0) { + } else if (strcasecmp(item, "INFO") == 0) { severity = NDB_MGM_CLUSTERLOG_INFO; - } else if (strcmp(item, "DEBUG") == 0) { + } else if (strcasecmp(item, "DEBUG") == 0) { severity = NDB_MGM_CLUSTERLOG_DEBUG; - } else if (strcmp(item, "OFF") == 0) { + } else if (strcasecmp(item, "OFF") == 0) { severity = NDB_MGM_CLUSTERLOG_OFF; } else { isOk = false; @@ -1168,17 +1188,17 @@ CommandInterpreter::executeClusterLog(char* parameters) ndbout << "Missing argument(s)." << endl; } else if (isOk) { if(ndb_mgm_filter_clusterlog(m_mgmsrv, severity, NULL)) { - if(strcmp(name, "ALL") == 0 || strcmp(name, "all") == 0) { + if(strcasecmp(name, "ALL") == 0) { ndbout << "All severities levels enabled." << endl; - } else if(strcmp(name, "OFF") == 0 || strcmp(name, "off") == 0) { + } else if(strcasecmp(name, "OFF") == 0) { ndbout << "Cluster logging enabled." << endl; } else { ndbout << name << " events disabled." << endl; } } else { - if(strcmp(name, "ALL") == 0) { + if(strcasecmp(name, "ALL") == 0) { ndbout << "All severities levels disabled." << endl; - } else if(strcmp(name, "OFF") == 0) { + } else if(strcasecmp(name, "OFF") == 0) { ndbout << "Cluster logging disabled." << endl; } else { ndbout << name << " events enabled." << endl; @@ -1191,7 +1211,7 @@ CommandInterpreter::executeClusterLog(char* parameters) /******************** * CLUSTERLOG INFO ********************/ - } else if (strcmp(item, "INFO") == 0) { + } else if (strcasecmp(item, "INFO") == 0) { Uint32 *enabled = ndb_mgm_get_logfilter(m_mgmsrv); if(enabled == NULL) { ndbout << "Couldn't get status" << endl; @@ -1216,7 +1236,7 @@ CommandInterpreter::executeClusterLog(char* parameters) /******************** * CLUSTERLOG OFF ********************/ - } else if (strcmp(item, "OFF") == 0) { + } else if (strcasecmp(item, "OFF") == 0) { Uint32 *enabled = ndb_mgm_get_logfilter(m_mgmsrv); if(enabled == NULL) { ndbout << "Couldn't get status" << endl; @@ -1234,7 +1254,7 @@ CommandInterpreter::executeClusterLog(char* parameters) /******************** * CLUSTERLOG ON ********************/ - } else if (strcmp(item, "ON") == 0) { + } else if (strcasecmp(item, "ON") == 0) { Uint32 *enabled = ndb_mgm_get_logfilter(m_mgmsrv); if(enabled == NULL) { ndbout << "Could not get status" << endl; @@ -1358,11 +1378,11 @@ CommandInterpreter::executeRestart(int processId, const char* parameters, char * tmpPtr = 0; char * item = strtok_r(tmpString, " ", &tmpPtr); while(item != NULL){ - if(strcmp(item, "-N") == 0) + if(strcasecmp(item, "-N") == 0) nostart = 1; - if(strcmp(item, "-I") == 0) + if(strcasecmp(item, "-I") == 0) initialstart = 1; - if(strcmp(item, "-A") == 0) + if(strcasecmp(item, "-A") == 0) abort = 1; item = strtok_r(NULL, " ", &tmpPtr); } @@ -1591,7 +1611,7 @@ CommandInterpreter::executeTrace(int /*processId*/, int result = _mgmtSrvr.setTraceNo(processId, traceNo); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } #endif } @@ -1751,7 +1771,7 @@ CommandInterpreter::executeSet(int /*processId*/, } } else { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; if (configBackupFileUpdated && configPrimaryFileUpdated) { ndbout << "The configuration files are however updated and " << "the value will be used next time the process is restarted." @@ -1786,7 +1806,7 @@ void CommandInterpreter::executeGetStat(int /*processId*/, MgmtSrvr::Statistics statistics; int result = _mgmtSrvr.getStatistics(processId, statistics); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; return; } #endif @@ -1856,7 +1876,7 @@ CommandInterpreter::executeEventReporting(int processId, /***************************************************************************** * Backup *****************************************************************************/ -void +int CommandInterpreter::executeStartBackup(char* /*parameters*/) { struct ndb_mgm_reply reply; @@ -1869,7 +1889,7 @@ CommandInterpreter::executeStartBackup(char* /*parameters*/) ndbout << "Start of backup failed" << endl; printError(); close(fd); - return; + return result; } char *tmp; @@ -1900,6 +1920,7 @@ CommandInterpreter::executeStartBackup(char* /*parameters*/) } while(tmp && tmp[0] != 0); close(fd); + return 0; } void @@ -1966,7 +1987,7 @@ CommandInterpreter::executeRep(char* parameters) unsigned int repId; - if (!strcmp(firstToken, "CONNECT")) { + if (!strcasecmp(firstToken, "CONNECT")) { char * host = strtok(NULL, "\0"); for (unsigned int i = 0; i < strlen(host); ++i) { host[i] = tolower(host[i]); @@ -2001,30 +2022,30 @@ CommandInterpreter::executeRep(char* parameters) /******** * START ********/ - if (!strcmp(firstToken, "START")) { + if (!strcasecmp(firstToken, "START")) { unsigned int req; char *startType = strtok(NULL, "\0"); if (startType == NULL) { req = GrepReq::START; - } else if (!strcmp(startType, "SUBSCRIPTION")) { + } else if (!strcasecmp(startType, "SUBSCRIPTION")) { req = GrepReq::START_SUBSCR; - } else if (!strcmp(startType, "METALOG")) { + } else if (!strcasecmp(startType, "METALOG")) { req = GrepReq::START_METALOG; - } else if (!strcmp(startType, "METASCAN")) { + } else if (!strcasecmp(startType, "METASCAN")) { req = GrepReq::START_METASCAN; - } else if (!strcmp(startType, "DATALOG")) { + } else if (!strcasecmp(startType, "DATALOG")) { req = GrepReq::START_DATALOG; - } else if (!strcmp(startType, "DATASCAN")) { + } else if (!strcasecmp(startType, "DATASCAN")) { req = GrepReq::START_DATASCAN; - } else if (!strcmp(startType, "REQUESTOR")) { + } else if (!strcasecmp(startType, "REQUESTOR")) { req = GrepReq::START_REQUESTOR; - } else if (!strcmp(startType, "TRANSFER")) { + } else if (!strcasecmp(startType, "TRANSFER")) { req = GrepReq::START_TRANSFER; - } else if (!strcmp(startType, "APPLY")) { + } else if (!strcasecmp(startType, "APPLY")) { req = GrepReq::START_APPLY; - } else if (!strcmp(startType, "DELETE")) { + } else if (!strcasecmp(startType, "DELETE")) { req = GrepReq::START_DELETE; } else { ndbout_c("Illegal argument to command 'REPLICATION START'"); @@ -2044,7 +2065,7 @@ CommandInterpreter::executeRep(char* parameters) /******** * STOP ********/ - if (!strcmp(firstToken, "STOP")) { + if (!strcasecmp(firstToken, "STOP")) { unsigned int req; char *startType = strtok(NULL, " "); unsigned int epoch = 0; @@ -2054,7 +2075,7 @@ CommandInterpreter::executeRep(char* parameters) * Stop immediately */ req = GrepReq::STOP; - } else if (!strcmp(startType, "EPOCH")) { + } else if (!strcasecmp(startType, "EPOCH")) { char *strEpoch = strtok(NULL, "\0"); if(strEpoch == NULL) { ndbout_c("Epoch expected!"); @@ -2062,23 +2083,23 @@ CommandInterpreter::executeRep(char* parameters) } req = GrepReq::STOP; epoch=atoi(strEpoch); - } else if (!strcmp(startType, "SUBSCRIPTION")) { + } else if (!strcasecmp(startType, "SUBSCRIPTION")) { req = GrepReq::STOP_SUBSCR; - } else if (!strcmp(startType, "METALOG")) { + } else if (!strcasecmp(startType, "METALOG")) { req = GrepReq::STOP_METALOG; - } else if (!strcmp(startType, "METASCAN")) { + } else if (!strcasecmp(startType, "METASCAN")) { req = GrepReq::STOP_METASCAN; - } else if (!strcmp(startType, "DATALOG")) { + } else if (!strcasecmp(startType, "DATALOG")) { req = GrepReq::STOP_DATALOG; - } else if (!strcmp(startType, "DATASCAN")) { + } else if (!strcasecmp(startType, "DATASCAN")) { req = GrepReq::STOP_DATASCAN; - } else if (!strcmp(startType, "REQUESTOR")) { + } else if (!strcasecmp(startType, "REQUESTOR")) { req = GrepReq::STOP_REQUESTOR; - } else if (!strcmp(startType, "TRANSFER")) { + } else if (!strcasecmp(startType, "TRANSFER")) { req = GrepReq::STOP_TRANSFER; - } else if (!strcmp(startType, "APPLY")) { + } else if (!strcasecmp(startType, "APPLY")) { req = GrepReq::STOP_APPLY; - } else if (!strcmp(startType, "DELETE")) { + } else if (!strcasecmp(startType, "DELETE")) { req = GrepReq::STOP_DELETE; } else { ndbout_c("Illegal argument to command 'REPLICATION STOP'"); @@ -2097,7 +2118,7 @@ CommandInterpreter::executeRep(char* parameters) /********* * STATUS *********/ - if (!strcmp(firstToken, "STATUS")) { + if (!strcasecmp(firstToken, "STATUS")) { struct rep_state repstate; int result = ndb_rep_get_status(m_repserver, &repId, &reply, &repstate); @@ -2117,7 +2138,7 @@ CommandInterpreter::executeRep(char* parameters) /********* * QUERY (see repapi.h for querable counters) *********/ - if (!strcmp(firstToken, "QUERY")) { + if (!strcasecmp(firstToken, "QUERY")) { char *query = strtok(NULL, "\0"); int queryCounter=-1; if(query != NULL) { diff --git a/ndb/src/mgmclient/main.cpp b/ndb/src/mgmclient/main.cpp index 08d5d60cfab..84e27790705 100644 --- a/ndb/src/mgmclient/main.cpp +++ b/ndb/src/mgmclient/main.cpp @@ -60,10 +60,15 @@ static const char default_prompt[]= "ndb_mgm> "; static unsigned _try_reconnect; static char *opt_connect_str= 0; static const char *prompt= default_prompt; +static char *opt_execute_str= 0; static struct my_option my_long_options[] = { NDB_STD_OPTS("ndb_mgm"), + { "execute", 'e', + "execute command and exit", + (gptr*) &opt_execute_str, (gptr*) &opt_execute_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, { "try-reconnect", 't', "Specify number of tries for connecting to ndb_mgmd (0 = infinite)", (gptr*) &_try_reconnect, (gptr*) &_try_reconnect, 0, @@ -156,19 +161,25 @@ int main(int argc, char** argv){ opt_connect_str= buf; } - if (!isatty(0)) + if (!isatty(0) || opt_execute_str) { prompt= 0; } - ndbout << "-- NDB Cluster -- Management Client --" << endl; - signal(SIGPIPE, handler); - com = new Ndb_mgmclient(opt_connect_str,1); - while(read_and_execute(_try_reconnect)); + int ret= 0; + if (!opt_execute_str) + { + ndbout << "-- NDB Cluster -- Management Client --" << endl; + while(read_and_execute(_try_reconnect)); + } + else + { + com->execute(opt_execute_str,_try_reconnect, &ret); + } delete com; - return 0; + return ret; } diff --git a/ndb/src/mgmclient/ndb_mgmclient.hpp b/ndb/src/mgmclient/ndb_mgmclient.hpp index ea592dfdf4e..bffdf69f920 100644 --- a/ndb/src/mgmclient/ndb_mgmclient.hpp +++ b/ndb/src/mgmclient/ndb_mgmclient.hpp @@ -23,8 +23,8 @@ class Ndb_mgmclient public: Ndb_mgmclient(const char*,int verbose=0); ~Ndb_mgmclient(); - int execute(const char *_line, int _try_reconnect=-1); - int execute(int argc, char** argv, int _try_reconnect=-1); + int execute(const char *_line, int _try_reconnect=-1, int *error= 0); + int execute(int argc, char** argv, int _try_reconnect=-1, int *error= 0); int disconnect(); private: CommandInterpreter *m_cmd; diff --git a/ndb/src/mgmsrv/CommandInterpreter.cpp b/ndb/src/mgmsrv/CommandInterpreter.cpp index 2a054a01f1e..02bf24f1d9c 100644 --- a/ndb/src/mgmsrv/CommandInterpreter.cpp +++ b/ndb/src/mgmsrv/CommandInterpreter.cpp @@ -113,6 +113,11 @@ private: void * m_ptr; }; +const char *CommandInterpreter::get_error_text(int err_no) +{ + return _mgmtSrvr.getErrorText(err_no, m_err_str, sizeof(m_err_str)); +} + //***************************************************************************** //***************************************************************************** int CommandInterpreter::readAndExecute() { @@ -600,8 +605,9 @@ stopCallback(int nodeId, void * anyData, int errCode){ ndbout << "\nNode " << nodeId << " has shutdown" << endl; } else { MgmtSrvr * mgm = (MgmtSrvr *)anyData; + char err_str[1024]; ndbout << "Node " << nodeId << " has not shutdown: " - << mgm->getErrorText(errCode) << endl; + << mgm->getErrorText(errCode,err_str,sizeof(err_str)) << endl; } } @@ -653,7 +659,8 @@ versionCallback(int nodeId, int version, void * anyData, int errCode){ } else { MgmtSrvr * mgm = (MgmtSrvr *)anyData; - ndbout << mgm->getErrorText(errCode) << endl; + char err_str[1024]; + ndbout << mgm->getErrorText(errCode,err_str,sizeof(err_str)) << endl; } } @@ -671,7 +678,7 @@ void CommandInterpreter::executeStop(int processId, result = _mgmtSrvr.stopNode(processId, false, stopCallback, this); if(result != 0) - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } @@ -686,7 +693,7 @@ void CommandInterpreter::executeStart(int processId, const char* parameters, int result = _mgmtSrvr.start(processId); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -719,7 +726,7 @@ CommandInterpreter::executeRestart(int processId, const char* parameters, stopCallback, this); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -760,7 +767,7 @@ CommandInterpreter::executeDumpState(int processId, const char* parameters, free(tmpString); int result = _mgmtSrvr.dumpState(processId, pars, no); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -781,7 +788,7 @@ void CommandInterpreter::executeStatus(int processId, &status, &version, &startPhase, &system, &dynamicId, &nodeGroup, &connectCount); if(result != 0){ - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; return; } @@ -875,7 +882,7 @@ void CommandInterpreter::executeLogLevel(int processId, int result = _mgmtSrvr.setNodeLogLevel(processId, logLevel); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } #endif } @@ -913,7 +920,7 @@ void CommandInterpreter::executeError(int processId, int result = _mgmtSrvr.insertError(processId, errorNo); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } free(newpar); } @@ -953,7 +960,7 @@ void CommandInterpreter::executeTrace(int processId, int result = _mgmtSrvr.setTraceNo(processId, traceNo); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } free(newpar); } @@ -974,7 +981,7 @@ void CommandInterpreter::executeLog(int processId, int result = _mgmtSrvr.setSignalLoggingMode(processId, MgmtSrvr::InOut, blocks); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -995,7 +1002,7 @@ void CommandInterpreter::executeLogIn(int processId, int result = _mgmtSrvr.setSignalLoggingMode(processId, MgmtSrvr::In, blocks); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -1014,7 +1021,7 @@ void CommandInterpreter::executeLogOut(int processId, int result = _mgmtSrvr.setSignalLoggingMode(processId, MgmtSrvr::Out, blocks); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -1035,7 +1042,7 @@ void CommandInterpreter::executeLogOff(int processId, int result = _mgmtSrvr.setSignalLoggingMode(processId, MgmtSrvr::Off, blocks); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -1054,7 +1061,7 @@ void CommandInterpreter::executeTestOn(int processId, int result = _mgmtSrvr.startSignalTracing(processId); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -1073,7 +1080,7 @@ void CommandInterpreter::executeTestOff(int processId, int result = _mgmtSrvr.stopSignalTracing(processId); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } } @@ -1126,7 +1133,7 @@ void CommandInterpreter::executeEventReporting(int processId, ndbout_c("processId %d", processId); int result = _mgmtSrvr.setEventReportingLevel(processId, logLevel); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } #endif } @@ -1136,7 +1143,7 @@ CommandInterpreter::executeStartBackup(char* parameters) { Uint32 backupId; int result = _mgmtSrvr.startBackup(backupId); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } else { // ndbout << "Start of backup ordered" << endl; } @@ -1153,7 +1160,7 @@ CommandInterpreter::executeAbortBackup(char* parameters) { } int result = _mgmtSrvr.abortBackup(bid); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } else { ndbout << "Abort of backup " << bid << " ordered" << endl; } @@ -1174,7 +1181,7 @@ CommandInterpreter::executeEnterSingleUser(char* parameters) { } int result = _mgmtSrvr.enterSingleUser(0, nodeId,0,0); if (result != 0) { - ndbout << _mgmtSrvr.getErrorText(result) << endl; + ndbout << get_error_text(result) << endl; } else { ndbout << "Entering single user mode, granting access for node " << nodeId << " OK." << endl; diff --git a/ndb/src/mgmsrv/CommandInterpreter.hpp b/ndb/src/mgmsrv/CommandInterpreter.hpp index db23f76a5bd..1a5184361d6 100644 --- a/ndb/src/mgmsrv/CommandInterpreter.hpp +++ b/ndb/src/mgmsrv/CommandInterpreter.hpp @@ -55,6 +55,9 @@ public: int readAndExecute(); private: + char m_err_str[1024]; + const char *get_error_text(int err_no); + /** * Read a string, and return a pointer to it. * diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index 986da71a8e8..061aa2e0cb8 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -49,6 +49,8 @@ #include +#include + #include #include #include @@ -264,16 +266,6 @@ MgmtSrvr::isEventLogFilterEnabled(int severity) static ErrorItem errorTable[] = { - {200, "Backup undefined error"}, - {202, "Backup failed to allocate buffers (check configuration)"}, - {203, "Backup failed to setup fs buffers (check configuration)"}, - {204, "Backup failed to allocate tables (check configuration)"}, - {205, "Backup failed to insert file header (check configuration)"}, - {206, "Backup failed to insert table list (check configuration)"}, - {207, "Backup failed to allocate table memory (check configuration)"}, - {208, "Backup failed to allocate file record (check configuration)"}, - {209, "Backup failed to allocate attribute record (check configuration)"}, - {MgmtSrvr::NO_CONTACT_WITH_PROCESS, "No contact with the process (dead ?)."}, {MgmtSrvr::PROCESS_NOT_CONFIGURED, "The process is not configured."}, {MgmtSrvr::WRONG_PROCESS_TYPE, @@ -1856,18 +1848,21 @@ MgmtSrvr::dumpState(int processId, const Uint32 args[], Uint32 no) //**************************************************************************** //**************************************************************************** -const char* MgmtSrvr::getErrorText(int errorCode) +const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz) { - static char text[255]; for (int i = 0; i < noOfErrorCodes; ++i) { if (errorCode == errorTable[i]._errorCode) { - return errorTable[i]._errorText; + BaseString::snprintf(buf, buf_sz, errorTable[i]._errorText); + buf[buf_sz-1]= 0; + return buf; } } - - BaseString::snprintf(text, 255, "Unknown management server error code %d", errorCode); - return text; + + ndb_error_string(errorCode, buf, buf_sz); + buf[buf_sz-1]= 0; + + return buf; } void diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index 2ab11250d81..1afb0848ecc 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -466,7 +466,7 @@ public: * @param errorCode: Error code to get a match error text for. * @return The error text. */ - const char* getErrorText(int errorCode); + const char* getErrorText(int errorCode, char *buf, int buf_sz); /** * Get configuration diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 0394c4e80bb..5834d40cc78 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -244,6 +244,8 @@ ParserRow commands[] = { MGM_CMD("purge stale sessions", &MgmApiSession::purge_stale_sessions, ""), + MGM_CMD("check connection", &MgmApiSession::check_connection, ""), + MGM_END() }; @@ -579,7 +581,7 @@ MgmApiSession::insertError(Parser::Context &, m_output->println("insert error reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -597,7 +599,7 @@ MgmApiSession::setTrace(Parser::Context &, m_output->println("set trace reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -665,7 +667,7 @@ MgmApiSession::startBackup(Parser::Context &, m_output->println("start backup reply"); if(result != 0) - m_output->println("result: %s(%d)", m_mgmsrv.getErrorText(result), result); + m_output->println("result: %s(%d)", get_error_text(result), result); else{ m_output->println("result: Ok"); m_output->println("id: %d", backupId); @@ -685,7 +687,7 @@ MgmApiSession::startBackup(Parser::Context &, m_output->println("start backup reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else{ m_output->println("result: Ok"); m_output->println("id: %d", backupId); @@ -705,7 +707,7 @@ MgmApiSession::abortBackup(Parser::Context &, m_output->println("abort backup reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -727,7 +729,7 @@ MgmApiSession::repCommand(Parser::Context &, m_output->println("global replication reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else{ m_output->println("result: Ok"); m_output->println("id: %d", repReqId); @@ -749,7 +751,7 @@ MgmApiSession::dumpState(Parser::Context &, int result = m_mgmsrv.dumpState(node, args_str.c_str()); m_output->println("dump state reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -834,7 +836,7 @@ MgmApiSession::stopSignalLog(Parser::Context &, m_output->println("stop signallog"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -874,7 +876,7 @@ MgmApiSession::restart(Parser::Context &, m_output->println("restart reply"); if(result != 0){ - m_output->println("result: %d-%s", result, m_mgmsrv.getErrorText(result)); + m_output->println("result: %d-%s", result, get_error_text(result)); } else m_output->println("result: Ok"); m_output->println("restarted: %d", restarted); @@ -898,7 +900,7 @@ MgmApiSession::restartAll(Parser::Context &, m_output->println("restart reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println("restarted: %d", count); @@ -1029,7 +1031,7 @@ MgmApiSession::stop(Parser::Context &, m_output->println("stop reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println("stopped: %d", stopped); @@ -1051,7 +1053,7 @@ MgmApiSession::stopAll(Parser::Context &, m_output->println("stop reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println("stopped: %d", stopped); @@ -1067,7 +1069,7 @@ MgmApiSession::enterSingleUser(Parser::Context &, int result = m_mgmsrv.enterSingleUser(&stopped, nodeId); m_output->println("enter single user reply"); if(result != 0) { - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); } else { m_output->println("result: Ok"); @@ -1082,7 +1084,7 @@ MgmApiSession::exitSingleUser(Parser::Context &, int result = m_mgmsrv.exitSingleUser(&stopped, false); m_output->println("exit single user reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -1100,7 +1102,7 @@ MgmApiSession::startSignalLog(Parser::Context &, m_output->println("start signallog reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -1145,7 +1147,7 @@ MgmApiSession::logSignals(Parser::Context &, m_output->println("log signals reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -1162,7 +1164,7 @@ MgmApiSession::start(Parser::Context &, m_output->println("start reply"); if(result != 0) - m_output->println("result: %s", m_mgmsrv.getErrorText(result)); + m_output->println("result: %s", get_error_text(result)); else m_output->println("result: Ok"); m_output->println(""); @@ -1454,6 +1456,15 @@ MgmApiSession::purge_stale_sessions(Parser_t::Context &ctx, m_output->println(""); } +void +MgmApiSession::check_connection(Parser_t::Context &ctx, + const class Properties &args) +{ + m_output->println("check connection reply"); + m_output->println("result: Ok"); + m_output->println(""); +} + template class MutexVector; template class Vector const*>; template class Vector; diff --git a/ndb/src/mgmsrv/Services.hpp b/ndb/src/mgmsrv/Services.hpp index bfc915f18f1..6a5f06a659e 100644 --- a/ndb/src/mgmsrv/Services.hpp +++ b/ndb/src/mgmsrv/Services.hpp @@ -39,10 +39,13 @@ private: OutputStream *m_output; Parser_t *m_parser; MgmtSrvr::Allocated_resources *m_allocated_resources; + char m_err_str[1024]; void getConfig_common(Parser_t::Context &ctx, const class Properties &args, bool compat = false); + const char *get_error_text(int err_no) + { return m_mgmsrv.getErrorText(err_no, m_err_str, sizeof(m_err_str)); } public: MgmApiSession(class MgmtSrvr & mgm, NDB_SOCKET_TYPE sock); @@ -88,6 +91,7 @@ public: void listen_event(Parser_t::Context &ctx, const class Properties &args); void purge_stale_sessions(Parser_t::Context &ctx, const class Properties &args); + void check_connection(Parser_t::Context &ctx, const class Properties &args); void repCommand(Parser_t::Context &ctx, const class Properties &args); }; diff --git a/ndb/src/ndbapi/Ndbinit.cpp b/ndb/src/ndbapi/Ndbinit.cpp index 698bbcde4c6..48e62c36a5f 100644 --- a/ndb/src/ndbapi/Ndbinit.cpp +++ b/ndb/src/ndbapi/Ndbinit.cpp @@ -58,7 +58,7 @@ Ndb::Ndb( const char* aDataBase , const char* aSchema) { theNoOfNdbObjects++; if (global_ndb_cluster_connection == 0) { global_ndb_cluster_connection= new Ndb_cluster_connection(ndbConnectString); - global_ndb_cluster_connection->connect(); + global_ndb_cluster_connection->connect(12,5,1); } setup(global_ndb_cluster_connection, aDataBase, aSchema); DBUG_VOID_RETURN; diff --git a/ndb/src/ndbapi/ndb_cluster_connection.cpp b/ndb/src/ndbapi/ndb_cluster_connection.cpp index b2043b2c2c1..f436ee56ede 100644 --- a/ndb/src/ndbapi/ndb_cluster_connection.cpp +++ b/ndb/src/ndbapi/ndb_cluster_connection.cpp @@ -40,10 +40,7 @@ Ndb_cluster_connection::Ndb_cluster_connection(const char *connect_string) DBUG_ENTER("Ndb_cluster_connection"); DBUG_PRINT("enter",("Ndb_cluster_connection this=0x%x", this)); m_facade= TransporterFacade::theFacadeInstance= new TransporterFacade(); - if (connect_string) - m_connect_string= my_strdup(connect_string,MYF(MY_WME)); - else - m_connect_string= 0; + m_config_retriever= 0; m_connect_thread= 0; m_connect_callback= 0; @@ -58,9 +55,39 @@ Ndb_cluster_connection::Ndb_cluster_connection(const char *connect_string) ndb_print_state_mutex= NdbMutex_Create(); } #endif + m_config_retriever= + new ConfigRetriever(connect_string, NDB_VERSION, NODE_TYPE_API); + if (m_config_retriever->hasError()) + { + printf("Could not connect initialize handle to management server: %s", + m_config_retriever->getErrorString()); + delete m_config_retriever; + m_config_retriever= 0; + } DBUG_VOID_RETURN; } +int Ndb_cluster_connection::get_connected_port() const +{ + if (m_config_retriever) + return m_config_retriever->get_mgmd_port(); + return -1; +} + +const char *Ndb_cluster_connection::get_connected_host() const +{ + if (m_config_retriever) + return m_config_retriever->get_mgmd_host(); + return 0; +} + +const char *Ndb_cluster_connection::get_connectstring(char *buf, int buf_sz) const +{ + if (m_config_retriever) + return m_config_retriever->get_connectstring(buf,buf_sz); + return 0; +} + extern "C" pthread_handler_decl(run_ndb_cluster_connection_connect_thread, me) { my_thread_init(); @@ -77,7 +104,7 @@ void Ndb_cluster_connection::connect_thread() int r; do { NdbSleep_SecSleep(1); - if ((r = connect(1)) == 0) + if ((r = connect(0,0,0)) == 0) break; if (r == -1) { printf("Ndb_cluster_connection::connect_thread error\n"); @@ -98,7 +125,7 @@ int Ndb_cluster_connection::start_connect_thread(int (*connect_callback)(void)) int r; DBUG_ENTER("Ndb_cluster_connection::start_connect_thread"); m_connect_callback= connect_callback; - if ((r = connect(1)) == 1) + if ((r = connect(0,0,0)) == 1) { DBUG_PRINT("info",("starting thread")); m_connect_thread= @@ -117,36 +144,15 @@ int Ndb_cluster_connection::start_connect_thread(int (*connect_callback)(void)) DBUG_RETURN(0); } -int Ndb_cluster_connection::connect(int reconnect) +int Ndb_cluster_connection::connect(int no_retries, int retry_delay_in_seconds, int verbose) { DBUG_ENTER("Ndb_cluster_connection::connect"); const char* error = 0; do { if (m_config_retriever == 0) - { - m_config_retriever= - new ConfigRetriever(m_connect_string, NDB_VERSION, NODE_TYPE_API); - if (m_config_retriever->hasError()) - { - printf("Could not connect initialize handle to management server", - m_config_retriever->getErrorString()); - DBUG_RETURN(-1); - } - } - else - if (reconnect == 0) - DBUG_RETURN(0); - if (reconnect) - { - int r= m_config_retriever->do_connect(0,0,0); - if (r == 1) - DBUG_RETURN(1); // mgmt server not up yet - if (r == -1) - break; - } - else - if(m_config_retriever->do_connect(12,5,1) == -1) - break; + DBUG_RETURN(-1); + if (m_config_retriever->do_connect(no_retries,retry_delay_in_seconds,verbose)) + DBUG_RETURN(1); // mgmt server not up yet Uint32 nodeId = m_config_retriever->allocNodeId(4/*retries*/,3/*delay*/); if(nodeId == 0) @@ -189,7 +195,6 @@ Ndb_cluster_connection::~Ndb_cluster_connection() abort(); TransporterFacade::theFacadeInstance= 0; } - my_free(m_connect_string,MYF(MY_ALLOW_ZERO_PTR)); if (m_config_retriever) delete m_config_retriever; DBUG_VOID_RETURN; diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 6744f4c1640..c0a6b6ba122 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -35,6 +35,7 @@ typedef struct ErrorBundle { #define NE ndberror_cl_none #define AE ndberror_cl_application +#define CE ndberror_cl_configuration #define ND ndberror_cl_no_data_found #define CV ndberror_cl_constraint_violation #define SE ndberror_cl_schema_error @@ -58,6 +59,27 @@ static const char REDO_BUFFER_MSG[]= static const char* empty_string = ""; +/* + * Error code ranges are reserved for respective block + * + * 200 - TC + * 300 - DIH + * 400 - LQH + * 600 - ACC + * 700 - DICT + * 800 - TUP + * 1200 - LQH + * 1300 - BACKUP + * 4000 - API + * 4100 - "" + * 4200 - "" + * 4300 - "" + * 4400 - "" + * 4500 - "" + * 4600 - "" + * 5000 - Management server + */ + static const ErrorBundle ErrorCodes[] = { @@ -303,6 +325,36 @@ ErrorBundle ErrorCodes[] = { */ { 4003, NI, "Function not implemented yet" }, + /** + * Backup error codes + */ + + { 1300, IE, "Undefined error" }, + { 1301, IE, "Backup issued to not master (reissue command to master)" }, + { 1302, IE, "Out of backup record" }, + { 1303, IS, "Out of resources" }, + { 1304, IE, "Sequence failure" }, + { 1305, IE, "Backup definition not implemented" }, + { 1306, AE, "Backup not supported in diskless mode (change Diskless)" }, + + { 1321, IE, "Backup aborted by application" }, + { 1322, IE, "Backup already completed" }, + { 1323, IE, "1323" }, + { 1324, IE, "Backup log buffer full" }, + { 1325, IE, "File or scan error" }, + { 1326, IE, "Backup abortet due to node failure" }, + { 1327, IE, "1327" }, + + { 1340, IE, "Backup undefined error" }, + { 1342, AE, "Backup failed to allocate buffers (check configuration)" }, + { 1343, AE, "Backup failed to setup fs buffers (check configuration)" }, + { 1344, AE, "Backup failed to allocate tables (check configuration)" }, + { 1345, AE, "Backup failed to insert file header (check configuration)" }, + { 1346, AE, "Backup failed to insert table list (check configuration)" }, + { 1347, AE, "Backup failed to allocate table memory (check configuration)" }, + { 1348, AE, "Backup failed to allocate file record (check configuration)" }, + { 1349, AE, "Backup failed to allocate attribute record (check configuration)" }, + /** * Still uncategorized */ @@ -467,6 +519,7 @@ const ErrorStatusClassification StatusClassificationMapping[] = { { ST_S, NE, "No error"}, { ST_P, AE, "Application error"}, + { ST_P, CE, "Configuration or application error"}, { ST_P, ND, "No data found"}, { ST_P, CV, "Constraint violation"}, { ST_P, SE, "Schema error"}, diff --git a/ndb/tools/listTables.cpp b/ndb/tools/listTables.cpp index b923207a4fe..ccb6967e2dc 100644 --- a/ndb/tools/listTables.cpp +++ b/ndb/tools/listTables.cpp @@ -228,10 +228,11 @@ int main(int argc, char** argv){ _tabname = argv[0]; ndb_cluster_connection = new Ndb_cluster_connection(opt_connect_str); + if (ndb_cluster_connection->connect(12,5,1)) + fatal("unable to connect"); ndb = new Ndb(ndb_cluster_connection, _dbname); if (ndb->init() != 0) fatal("init"); - ndb_cluster_connection->connect(); if (ndb->waitUntilReady(30) < 0) fatal("waitUntilReady"); dic = ndb->getDictionary(); diff --git a/sql/examples/ha_archive.cc b/sql/examples/ha_archive.cc index b03e655fef7..3217327dc3d 100644 --- a/sql/examples/ha_archive.cc +++ b/sql/examples/ha_archive.cc @@ -305,7 +305,6 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, TABLE *table) share->use_count= 0; share->table_name_length= length; share->table_name= tmp_name; - share->delayed= FALSE; fn_format(share->data_file_name,table_name,"",ARZ,MY_REPLACE_EXT|MY_UNPACK_FILENAME); fn_format(meta_file_name,table_name,"",ARM,MY_REPLACE_EXT|MY_UNPACK_FILENAME); strmov(share->table_name,table_name); @@ -536,11 +535,9 @@ int ha_archive::write_row(byte * buf) table->timestamp_field->set_time(); pthread_mutex_lock(&share->mutex); written= gzwrite(share->archive_write, buf, table->reclength); - DBUG_PRINT("ha_archive::get_row", ("Wrote %d bytes expected %d", written, table->reclength)); - if (!delayed_insert) + DBUG_PRINT("ha_archive::write_row", ("Wrote %d bytes expected %d", written, table->reclength)); + if (!delayed_insert || !bulk_insert) share->dirty= TRUE; - else - share->delayed= TRUE; if (written != table->reclength) goto error; @@ -599,7 +596,6 @@ int ha_archive::rnd_init(bool scan) { gzflush(share->archive_write, Z_SYNC_FLUSH); share->dirty= FALSE; - share->delayed= FALSE; } pthread_mutex_unlock(&share->mutex); } @@ -634,12 +630,12 @@ int ha_archive::get_row(gzFile file_to_read, byte *buf) if (read == 0) DBUG_RETURN(HA_ERR_END_OF_FILE); - /* If the record is the wrong size, the file is probably damaged, unless - we are dealing with a delayed insert. In that case we can assume the file is ok, - but our row count doesn't match our data since the file has not been flushed. + /* + If the record is the wrong size, the file is probably damaged, unless + we are dealing with a delayed insert or a bulk insert. */ if ((ulong) read != table->reclength) - DBUG_RETURN(share->delayed ? HA_ERR_END_OF_FILE : HA_ERR_CRASHED_ON_USAGE); + DBUG_RETURN(HA_ERR_END_OF_FILE); /* Calculate blob length, we use this for our buffer */ for (field=table->blob_field; *field ; field++) @@ -657,7 +653,7 @@ int ha_archive::get_row(gzFile file_to_read, byte *buf) { read= gzread(file_to_read, last, size); if ((size_t) read != size) - DBUG_RETURN(share->delayed ? HA_ERR_END_OF_FILE : HA_ERR_CRASHED_ON_USAGE); + DBUG_RETURN(HA_ERR_END_OF_FILE); (*field)->set_ptr(size, last); last += size; } @@ -886,14 +882,47 @@ THR_LOCK_DATA **ha_archive::store_lock(THD *thd, return to; } + +/* + Hints for optimizer, see ha_tina for more information +*/ void ha_archive::info(uint flag) { DBUG_ENTER("ha_archive::info"); - /* This is a lie, but you don't want the optimizer to see zero or 1 */ + /* + This should be an accurate number now, though bulk and delayed inserts can + cause the number to be inaccurate. + */ records= share->rows_recorded; deleted= 0; DBUG_VOID_RETURN; } + + +/* + This method tells us that a bulk insert operation is about to occur. We set + a flag which will keep write_row from saying that its data is dirty. This in + turn will keep selects from causing a sync to occur. + Basically, yet another optimizations to keep compression working well. +*/ +void ha_archive::start_bulk_insert(ha_rows rows) +{ + DBUG_ENTER("ha_archive::info"); + bulk_insert= TRUE; + DBUG_VOID_RETURN; +} + + +/* + Other side of start_bulk_insert, is end_bulk_insert. Here we turn off the bulk insert + flag, and set the share dirty so that the next select will call sync for us. +*/ +int ha_archive::end_bulk_insert() +{ + bulk_insert= FALSE; + share->dirty= TRUE; + DBUG_RETURN(0); +} #endif /* HAVE_ARCHIVE_DB */ diff --git a/sql/examples/ha_archive.h b/sql/examples/ha_archive.h index a3f2327b4a7..856513b0f92 100644 --- a/sql/examples/ha_archive.h +++ b/sql/examples/ha_archive.h @@ -36,7 +36,6 @@ typedef struct st_archive_share { gzFile archive_write; /* Archive file we are working with */ bool dirty; /* Flag for if a flush should occur */ ulonglong rows_recorded; /* Number of rows in tables */ - bool delayed; /* If a delayed insert has happened since opena */ } ARCHIVE_SHARE; /* @@ -55,9 +54,10 @@ class ha_archive: public handler String buffer; /* Buffer used for blob storage */ ulonglong scan_rows; /* Number of rows left in scan */ bool delayed_insert; /* If the insert is delayed */ + bool bulk_insert; /* If we are performing a bulk insert */ public: - ha_archive(TABLE *table): handler(table), delayed_insert(0) + ha_archive(TABLE *table): handler(table), delayed_insert(0), bulk_insert(0) { /* Set our original buffer from pre-allocated memory */ buffer.set(byte_buffer, IO_SIZE, system_charset_info); @@ -99,6 +99,8 @@ public: int external_lock(THD *thd, int lock_type); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); int optimize(THD* thd, HA_CHECK_OPT* check_opt); + void start_bulk_insert(ha_rows rows); + int end_bulk_insert(); THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); }; diff --git a/sql/field.cc b/sql/field.cc index e573f6528f4..5e76c590aac 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1866,6 +1866,7 @@ int Field_long::store(double nr) else if (nr > (double) UINT_MAX32) { res= UINT_MAX32; + set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); error= 1; } else diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index c7384857d79..c6ad646be4d 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -80,6 +80,7 @@ extern "C" { #include "../innobase/include/fsp0fsp.h" #include "../innobase/include/sync0sync.h" #include "../innobase/include/fil0fil.h" +#include "../innobase/include/trx0xa.h" } #define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ @@ -149,6 +150,14 @@ static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length, static INNOBASE_SHARE *get_share(const char *table_name); static void free_share(INNOBASE_SHARE *share); +/********************************************************************* +Commits a transaction in an InnoDB database. */ + +void +innobase_commit_low( +/*================*/ + trx_t* trx); /* in: transaction handle */ + struct show_var_st innodb_status_variables[]= { {"buffer_pool_pages_data", (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG}, @@ -511,6 +520,25 @@ innobase_mysql_print_thd( putc('\n', f); } +/********************************************************************** +Determines whether the given character set is of variable length. + +NOTE that the exact prototype of this function has to be in +/innobase/data/data0type.ic! */ +extern "C" +ibool +innobase_is_mb_cset( +/*================*/ + ulint cset) /* in: MySQL charset-collation code */ +{ + CHARSET_INFO* cs; + ut_ad(cset < 256); + + cs = all_charsets[cset]; + + return(cs && cs->mbminlen != cs->mbmaxlen); +} + /********************************************************************** Compares NUL-terminated UTF-8 strings case insensitively. @@ -1336,7 +1364,7 @@ innobase_commit( if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { - /* We were instructed to commit the whole transaction, or + /* We were instructed to commit the whole transaction, or this is an SQL statement end and autocommit is on */ innobase_commit_low(trx); @@ -1477,6 +1505,39 @@ innobase_rollback( DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } +/********************************************************************* +Rolls back a transaction */ + +int +innobase_rollback_trx( +/*==================*/ + /* out: 0 or error number */ + trx_t* trx) /* in: transaction */ +{ + int error = 0; + + DBUG_ENTER("innobase_rollback_trx"); + DBUG_PRINT("trans", ("aborting transaction")); + + /* Release a possible FIFO ticket and search latch. Since we will + reserve the kernel mutex, we have to release the search system latch + first to obey the latching order. */ + + innobase_release_stat_resources(trx); + + if (trx->auto_inc_lock) { + /* If we had reserved the auto-inc lock for some table (if + we come here to roll back the latest SQL statement) we + release it now before a possibly lengthy rollback */ + + row_unlock_table_autoinc_for_mysql(trx); + } + + error = trx_rollback_for_mysql(trx); + + DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); +} + /********************************************************************* Rolls back a transaction to a savepoint. */ @@ -2208,7 +2269,13 @@ build_template( ulint n_fields; ulint n_requested_fields = 0; ibool fetch_all_in_key = FALSE; - ibool fetch_primary_key_cols = FALSE; + ibool fetch_primary_key_cols = TRUE; /* The ROR code in + opt_range.cc assumes that the + primary key cols are always + retrieved. Starting from + MySQL-5.0.2, let us always + fetch them, even though it + wastes some CPU. */ ulint i; if (prebuilt->select_lock_type == LOCK_X) { @@ -2408,20 +2475,58 @@ ha_innobase::write_row( position in the source table need not be adjusted after the intermediate COMMIT, since writes by other transactions are being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */ - ut_a(prebuilt->trx->mysql_n_tables_locked == 2); - ut_a(UT_LIST_GET_LEN(prebuilt->trx->trx_locks) >= 2); - dict_table_t* table = lock_get_ix_table( - UT_LIST_GET_FIRST(prebuilt->trx->trx_locks)); + + dict_table_t* src_table; + ibool mode; + num_write_row = 0; + /* Commit the transaction. This will release the table locks, so they have to be acquired again. */ - innobase_commit(user_thd, prebuilt->trx); - /* Note that this transaction is still active. */ - user_thd->transaction.all.innodb_active_trans = 1; - /* Re-acquire the IX table lock on the source table. */ - row_lock_table_for_mysql(prebuilt, table); - /* We will need an IX lock on the destination table. */ - prebuilt->sql_stat_start = TRUE; + + /* Altering an InnoDB table */ + /* Get the source table. */ + src_table = lock_get_src_table( + prebuilt->trx, prebuilt->table, &mode); + if (!src_table) { + no_commit: + /* Unknown situation: do not commit */ + /* + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB error: ALTER TABLE is holding lock" + " on %lu tables!\n", + prebuilt->trx->mysql_n_tables_locked); + */ + ; + } else if (src_table == prebuilt->table) { + /* Source table is not in InnoDB format: + no need to re-acquire locks on it. */ + + /* Altering to InnoDB format */ + innobase_commit(user_thd, prebuilt->trx); + /* Note that this transaction is still active. */ + user_thd->transaction.all.innodb_active_trans = 1; + /* We will need an IX lock on the destination table. */ + prebuilt->sql_stat_start = TRUE; + } else { + /* Ensure that there are no other table locks than + LOCK_IX and LOCK_AUTO_INC on the destination table. */ + if (!lock_is_table_exclusive(prebuilt->table, + prebuilt->trx)) { + goto no_commit; + } + + /* Commit the transaction. This will release the table + locks, so they have to be acquired again. */ + innobase_commit(user_thd, prebuilt->trx); + /* Note that this transaction is still active. */ + user_thd->transaction.all.innodb_active_trans = 1; + /* Re-acquire the table lock on the source table. */ + row_lock_table_for_mysql(prebuilt, src_table, mode); + /* We will need an IX lock on the destination table. */ + prebuilt->sql_stat_start = TRUE; + } } num_write_row++; @@ -3490,7 +3595,7 @@ create_table_def( TABLE* form, /* in: information on table columns and indexes */ const char* table_name, /* in: table name */ - const char* path_of_temp_table)/* in: if this is a table explicitly + const char* path_of_temp_table,/* in: if this is a table explicitly created by the user with the TEMPORARY keyword, then this parameter is the dir path where the @@ -3498,6 +3603,7 @@ create_table_def( an .ibd file for it (no .ibd extension in the path, though); otherwise this is NULL */ + ibool comp) /* in: TRUE=compact record format */ { Field* field; dict_table_t* table; @@ -3518,7 +3624,7 @@ create_table_def( /* We pass 0 as the space id, and determine at a lower level the space id where to store the table */ - table = dict_mem_table_create((char*) table_name, 0, n_cols); + table = dict_mem_table_create(table_name, 0, n_cols, comp); if (path_of_temp_table) { table->dir_path_of_temp_table = @@ -3782,12 +3888,9 @@ ha_innobase::create( /* Create the table definition in InnoDB */ - if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - - error = create_table_def(trx, form, norm_name, name2); - } else { - error = create_table_def(trx, form, norm_name, NULL); - } + error = create_table_def(trx, form, norm_name, + create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL, + !(form->db_options_in_use & HA_OPTION_PACK_RECORD)); if (error) { innobase_commit_low(trx); @@ -5145,7 +5248,8 @@ ha_innobase::external_lock( if (thd->in_lock_tables && thd->variables.innodb_table_locks) { ulint error; - error = row_lock_table_for_mysql(prebuilt, 0); + error = row_lock_table_for_mysql(prebuilt, + NULL, LOCK_TABLE_EXP); if (error != DB_SUCCESS) { error = convert_error_code_to_mysql( @@ -5756,4 +5860,158 @@ innobase_query_is_replace(void) } } +/*********************************************************************** +This function is used to prepare X/Open XA distributed transaction */ + +int innobase_xa_prepare( +/*====================*/ + /* out: 0 or error number */ + THD* thd, /* in: handle to the MySQL thread of the user + whose XA transaction should be prepared */ + bool all) /* in: TRUE - commit transaction + FALSE - the current SQL statement ended */ +{ + int error = 0; + trx_t* trx; + + trx = check_trx_exists(thd); + + /* TODO: Get X/Open XA Transaction Identification from MySQL*/ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; + + /* Release a possible FIFO ticket and search latch. Since we will + reserve the kernel mutex, we have to release the search system latch + first to obey the latching order. */ + + innobase_release_stat_resources(trx); + + if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) { + + fprintf(stderr, +"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n" +"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n"); + } + + if (all || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { + + /* We were instructed to prepare the whole transaction, or + this is an SQL statement end and autocommit is on */ + + error = trx_prepare_for_mysql(trx); + } else { + /* We just mark the SQL statement ended and do not do a + transaction prepare */ + + if (trx->auto_inc_lock) { + /* If we had reserved the auto-inc lock for some + table in this SQL statement we release it now */ + + row_unlock_table_autoinc_for_mysql(trx); + } + /* Store the current undo_no of the transaction so that we + know where to roll back if we have to roll back the next + SQL statement */ + + trx_mark_sql_stat_end(trx); + } + + /* Tell the InnoDB server that there might be work for utility + threads: */ + + srv_active_wake_master_thread(); + + return error; +} + +/*********************************************************************** +This function is used to recover X/Open XA distributed transactions */ + +int innobase_xa_recover( + /* out: number of prepared transactions + stored in xid_list */ + XID* xid_list, /* in/out: prepared transactions */ + uint len) /* in: number of slots in xid_list */ +/*====================*/ +{ + if (len == 0 || xid_list == NULL) { + return 0; + } + + return (trx_recover_for_mysql(xid_list, len)); +} + +/*********************************************************************** +This function is used to commit one X/Open XA distributed transaction +which is in the prepared state */ + +int innobase_commit_by_xid( +/*=======================*/ + /* out: 0 or error number */ + XID* xid) /* in: X/Open XA Transaction Identification */ +{ + trx_t* trx; + + trx = trx_get_trx_by_xid(xid); + + if (trx) { + innobase_commit_low(trx); + + return(XA_OK); + } else { + return(XAER_NOTA); + } +} + +/*********************************************************************** +This function is used to rollback one X/Open XA distributed transaction +which is in the prepared state */ + +int innobase_rollback_by_xid( + /* out: 0 or error number */ + XID *xid) /* in : X/Open XA Transaction Idenfification */ +{ + trx_t* trx; + + trx = trx_get_trx_by_xid(xid); + + if (trx) { + return(innobase_rollback_trx(trx)); + } else { + return(XAER_NOTA); + } +} + +/*********************************************************************** +This function is used to test commit/rollback of XA transactions */ + +int innobase_xa_end( +/*================*/ + THD* thd) /* in: MySQL thread handle of the user for whom + transactions should be recovered */ +{ + DBUG_ENTER("innobase_xa_end"); + + XID trx_list[100]; + int trx_num, trx_num_max = 100; + int i; + XID xid; + + while((trx_num = innobase_xa_recover(trx_list, trx_num_max))) { + + for(i=0;i < trx_num; i++) { + xid = trx_list[i]; + + if ( i % 2) { + innobase_commit_by_xid(&xid); + } else { + innobase_rollback_by_xid(&xid); + } + } + } + + free(trx_list); + + DBUG_RETURN(0); +} #endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 7bdd3208df3..a64bbc665c1 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -245,3 +245,46 @@ void innobase_release_temporary_latches(void* innobase_tid); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); int innobase_start_trx_and_assign_read_view(THD* thd); + +/*********************************************************************** +This function is used to prepare X/Open XA distributed transaction */ + +int innobase_xa_prepare( +/*====================*/ + /* out: 0 or error number */ + THD* thd, /* in: handle to the MySQL thread of the user + whose XA transaction should be prepared */ + bool all); /* in: TRUE - commit transaction + FALSE - the current SQL statement ended */ + +/*********************************************************************** +This function is used to recover X/Open XA distributed transactions */ + +int innobase_xa_recover( +/*====================*/ + /* out: number of prepared transactions + stored in xid_list */ + XID* xid_list, /* in/out: prepared transactions */ + uint len); /* in: number of slots in xid_list */ + +/*********************************************************************** +This function is used to commit one X/Open XA distributed transaction +which is in the prepared state */ + +int innobase_commit_by_xid( +/*=======================*/ + /* out: 0 or error number */ + XID* xid); /* in : X/Open XA Transaction Identification */ + +/*********************************************************************** +This function is used to rollback one X/Open XA distributed transaction +which is in the prepared state */ + +int innobase_rollback_by_xid( + /* out: 0 or error number */ + XID *xid); /* in : X/Open XA Transaction Idenfification */ + + +int innobase_xa_end(THD *thd); + + diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 29ceffa8b7e..9b0b84aa0b2 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -4248,7 +4248,7 @@ bool ndbcluster_init() new Ndb_cluster_connection(ndbcluster_connectstring)) == 0) { DBUG_PRINT("error",("Ndb_cluster_connection(%s)",ndbcluster_connectstring)); - DBUG_RETURN(TRUE); + goto ndbcluster_init_error; } // Create a Ndb object to open the connection to NDB @@ -4257,25 +4257,33 @@ bool ndbcluster_init() if (g_ndb->init() != 0) { ERR_PRINT (g_ndb->getNdbError()); - DBUG_RETURN(TRUE); + goto ndbcluster_init_error; } - if ((res= g_ndb_cluster_connection->connect(1)) == 0) + if ((res= g_ndb_cluster_connection->connect(0,0,0)) == 0) { + DBUG_PRINT("info",("NDBCLUSTER storage engine at %s on port %d", + g_ndb_cluster_connection->get_connected_host(), + g_ndb_cluster_connection->get_connected_port())); g_ndb->waitUntilReady(10); } else if(res == 1) { if (g_ndb_cluster_connection->start_connect_thread()) { DBUG_PRINT("error", ("g_ndb_cluster_connection->start_connect_thread()")); - DBUG_RETURN(TRUE); + goto ndbcluster_init_error; + } + { + char buf[1024]; + DBUG_PRINT("info",("NDBCLUSTER storage engine not started, will connect using %s", + g_ndb_cluster_connection->get_connectstring(buf,sizeof(buf)))); } } else { DBUG_ASSERT(res == -1); DBUG_PRINT("error", ("permanent error")); - DBUG_RETURN(TRUE); + goto ndbcluster_init_error; } (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0, @@ -4285,9 +4293,12 @@ bool ndbcluster_init() ndbcluster_inited= 1; #ifdef USE_DISCOVER_ON_STARTUP if (ndb_discover_tables() != 0) - DBUG_RETURN(TRUE); + goto ndbcluster_init_error; #endif DBUG_RETURN(FALSE); + ndbcluster_init_error: + ndbcluster_end(); + DBUG_RETURN(TRUE); } diff --git a/sql/handler.h b/sql/handler.h index 4c91f5abe0e..c70ea266734 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -194,6 +194,41 @@ typedef struct st_thd_trans { void *ndb_tid; } THD_TRANS; +#ifndef XIDDATASIZE /* no xa.h included */ + +/* XXX - may be we should disable xa completely in this case ? */ +#define XIDDATASIZE 128 +#define MAXGTRIDSIZE 64 +#define MAXBQUALSIZE 64 + +struct xid_t { + long formatID; + long gtrid_length; + long bqual_length; + char data[XIDDATASIZE]; +}; + +typedef struct xid_t XID; + + +#endif + +typedef struct +{ + byte slot; + uint savepoint_offset; + int (*close_connection)(THD *thd); + int (*savepoint_set)(THD *thd, void *sv); + int (*savepoint_rollback)(THD *thd, void *sv); + int (*savepoint_release)(THD *thd, void *sv); + int (*commit)(THD *thd, bool all); + int (*rollback)(THD *thd, bool all); + int (*prepare)(THD *thd, bool all); + int (*recover)(XID *xid_list, uint len); + int (*commit_by_xid)(XID *xid); + int (*rollback_by_xid)(XID *xid); +} handlerton; + enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED, ISO_REPEATABLE_READ, ISO_SERIALIZABLE}; diff --git a/sql/lock.cc b/sql/lock.cc index c4f1d681b76..3367c6a2900 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -840,19 +840,33 @@ void start_waiting_global_read_lock(THD *thd) } -void make_global_read_lock_block_commit(THD *thd) +bool make_global_read_lock_block_commit(THD *thd) { + bool error; + const char *old_message; + DBUG_ENTER("make_global_read_lock_block_commit"); /* If we didn't succeed lock_global_read_lock(), or if we already suceeded make_global_read_lock_block_commit(), do nothing. */ if (thd->global_read_lock != GOT_GLOBAL_READ_LOCK) - return; + DBUG_RETURN(1); pthread_mutex_lock(&LOCK_open); /* increment this BEFORE waiting on cond (otherwise race cond) */ global_read_lock_blocks_commit++; - while (protect_against_global_read_lock) + /* For testing we set up some blocking, to see if we can be killed */ + DBUG_EXECUTE_IF("make_global_read_lock_block_commit_loop", + protect_against_global_read_lock++;); + old_message= thd->enter_cond(&COND_refresh, &LOCK_open, + "Waiting for all running commits to finish"); + while (protect_against_global_read_lock && !thd->killed) pthread_cond_wait(&COND_refresh, &LOCK_open); - pthread_mutex_unlock(&LOCK_open); - thd->global_read_lock= MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT; + DBUG_EXECUTE_IF("make_global_read_lock_block_commit_loop", + protect_against_global_read_lock--;); + if (error= thd->killed) + global_read_lock_blocks_commit--; // undo what we did + else + thd->global_read_lock= MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT; + thd->exit_cond(old_message); + DBUG_RETURN(error); } diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index babb0c1aa6f..09bec0a9323 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1096,7 +1096,7 @@ void unlock_global_read_lock(THD *thd); bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh, bool is_not_commit); void start_waiting_global_read_lock(THD *thd); -void make_global_read_lock_block_commit(THD *thd); +bool make_global_read_lock_block_commit(THD *thd); /* Lock based on name */ int lock_and_wait_for_table_name(THD *thd, TABLE_LIST *table_list); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 382042e2b42..27248ebaec2 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -37,6 +37,28 @@ #ifdef HAVE_NDBCLUSTER_DB #include "ha_ndbcluster.h" #endif + +#ifdef HAVE_INNOBASE_DB +#define OPT_INNODB_DEFAULT 1 +#else +#define OPT_INNODB_DEFAULT 0 +#endif +#ifdef HAVE_BERKLEY_DB +#define OPT_BDB_DEFAULT 1 +#else +#define OPT_BDB_DEFAULT 0 +#endif +#ifdef HAVE_ISAM_DB +#define OPT_ISAM_DEFAULT 1 +#else +#define OPT_ISAM_DEFAULT 0 +#endif +#ifdef HAVE_NDBCLUSTER_DB +#define OPT_NDBCLUSTER_DEFAULT 0 +#else +#define OPT_NDBCLUSTER_DEFAULT 0 +#endif + #include #include #include @@ -1539,14 +1561,13 @@ void mysql_down_server_cb(void *, void *) // destroy callback resources void mysql_cb_destroy(void *) -{ - UnRegisterEventNotification(eh); // cleanup down event notification +{ + UnRegisterEventNotification(eh); // cleanup down event notification NX_UNWRAP_INTERFACE(ref); - - /* Deregister NSS volume deactivation event */ - NX_UNWRAP_INTERFACE(refneb); + /* Deregister NSS volume deactivation event */ + NX_UNWRAP_INTERFACE(refneb); if (neb_consumer_id) - UnRegisterConsumer(neb_consumer_id, NULL); + UnRegisterConsumer(neb_consumer_id, NULL); } @@ -1665,6 +1686,7 @@ ulong neb_event_callback(struct EventBlock *eblock) nw_panic = TRUE; event_flag= TRUE; kill_server(0); + } } return 0; @@ -1738,8 +1760,8 @@ static void init_signals(void) for (uint i=0 ; i < sizeof(signals)/sizeof(int) ; i++) signal(signals[i], kill_server); mysql_cb_init(); // initialize callbacks -} +} static void start_signal_handler(void) { @@ -2262,7 +2284,13 @@ extern "C" pthread_handler_decl(handle_shutdown,arg) #endif -const char *load_default_groups[]= { "mysqld","server",MYSQL_BASE_VERSION,0,0}; +const char *load_default_groups[]= { +#ifdef HAVE_NDBCLUSTER_DB +"mysql_cluster", +#endif +"mysqld","server",MYSQL_BASE_VERSION,0,0}; +static const int load_default_groups_sz= +sizeof(load_default_groups)/sizeof(load_default_groups[0]); bool open_log(MYSQL_LOG *log, const char *hostname, const char *opt_name, const char *extension, @@ -2884,6 +2912,7 @@ int win_main(int argc, char **argv) int main(int argc, char **argv) #endif { + DEBUGGER_OFF; MY_INIT(argv[0]); // init my_sys library & pthreads @@ -3079,7 +3108,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); #endif /* __NT__ */ /* (void) pthread_attr_destroy(&connection_attrib); */ - + DBUG_PRINT("quit",("Exiting main thread")); #ifndef __WIN__ @@ -3129,6 +3158,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); #endif clean_up_mutexes(); my_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0); + exit(0); return(0); /* purecov: deadcode */ } @@ -3256,7 +3286,7 @@ int main(int argc, char **argv) and we are now stuck with it. */ if (my_strcasecmp(system_charset_info, argv[1],"mysql")) - load_default_groups[3]= argv[1]; + load_default_groups[load_default_groups_sz-2]= argv[1]; start_mode= 1; Service.Init(argv[1], mysql_service); return 0; @@ -3277,7 +3307,7 @@ int main(int argc, char **argv) opt_argv=argv; start_mode= 1; if (my_strcasecmp(system_charset_info, argv[2],"mysql")) - load_default_groups[3]= argv[2]; + load_default_groups[load_default_groups_sz-2]= argv[2]; Service.Init(argv[2], mysql_service); return 0; } @@ -4189,7 +4219,7 @@ struct my_option my_long_options[] = 0, 0, 0, 0, 0, 0}, {"bdb", OPT_BDB, "Enable Berkeley DB (if this version of MySQL supports it). \ Disable with --skip-bdb (will save memory).", - (gptr*) &opt_bdb, (gptr*) &opt_bdb, 0, GET_BOOL, NO_ARG, 1, 0, 0, + (gptr*) &opt_bdb, (gptr*) &opt_bdb, 0, GET_BOOL, NO_ARG, OPT_BDB_DEFAULT, 0, 0, 0, 0, 0}, #ifdef HAVE_BERKELEY_DB {"bdb-home", OPT_BDB_HOME, "Berkeley home directory.", (gptr*) &berkeley_home, @@ -4326,7 +4356,7 @@ Disable with --skip-bdb (will save memory).", REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"innodb", OPT_INNODB, "Enable InnoDB (if this version of MySQL supports it). \ Disable with --skip-innodb (will save memory).", - (gptr*) &opt_innodb, (gptr*) &opt_innodb, 0, GET_BOOL, NO_ARG, 1, 0, 0, + (gptr*) &opt_innodb, (gptr*) &opt_innodb, 0, GET_BOOL, NO_ARG, OPT_INNODB_DEFAULT, 0, 0, 0, 0, 0}, {"innodb_data_file_path", OPT_INNODB_DATA_FILE_PATH, "Path to individual files and their sizes.", @@ -4386,7 +4416,7 @@ Disable with --skip-innodb (will save memory).", #endif /* End HAVE_INNOBASE_DB */ {"isam", OPT_ISAM, "Enable ISAM (if this version of MySQL supports it). \ Disable with --skip-isam.", - (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 1, 0, 0, + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, OPT_ISAM_DEFAULT, 0, 0, 0, 0, 0}, {"language", 'L', "Client error messages in given language. May be given as a full path.", @@ -4514,8 +4544,8 @@ master-ssl", GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"ndbcluster", OPT_NDBCLUSTER, "Enable NDB Cluster (if this version of MySQL supports it). \ Disable with --skip-ndbcluster (will save memory).", - (gptr*) &opt_ndbcluster, (gptr*) &opt_ndbcluster, 0, GET_BOOL, NO_ARG, 1, 0, 0, - 0, 0, 0}, + (gptr*) &opt_ndbcluster, (gptr*) &opt_ndbcluster, 0, GET_BOOL, NO_ARG, + OPT_NDBCLUSTER_DEFAULT, 0, 0, 0, 0, 0}, #ifdef HAVE_NDBCLUSTER_DB {"ndb-connectstring", OPT_NDB_CONNECTSTRING, "Connect string for ndbcluster.", @@ -6400,6 +6430,24 @@ static void get_options(int argc,char **argv) if ((ho_error= handle_options(&argc, &argv, my_long_options, get_one_option))) exit(ho_error); + +#ifndef HAVE_NDBCLUSTER_DB + if (opt_ndbcluster) + sql_print_warning("this binary does not contain NDBCLUSTER storage engine"); +#endif +#ifndef HAVE_INNOBASE_DB + if (opt_innodb) + sql_print_warning("this binary does not contain INNODB storage engine"); +#endif +#ifndef HAVE_ISAM + if (opt_isam) + sql_print_warning("this binary does not contain ISAM storage engine"); +#endif +#ifndef HAVE_BERKELEY_DB + if (opt_bdb) + sql_print_warning("this binary does not contain BDB storage engine"); +#endif + if (argc > 0) { fprintf(stderr, "%s: Too many arguments (first extra is '%s').\nUse --help to get a list of available options\n", my_progname, *argv); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index c04312f6ded..e27cd20e15e 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5746,7 +5746,12 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, return 1; result=close_cached_tables(thd,(options & REFRESH_FAST) ? 0 : 1, tables); - make_global_read_lock_block_commit(thd); + if (make_global_read_lock_block_commit(thd)) + { + /* Don't leave things in a half-locked state */ + unlock_global_read_lock(thd); + return 1; + } } else result=close_cached_tables(thd,(options & REFRESH_FAST) ? 0 : 1, tables); diff --git a/strings/decimal.c b/strings/decimal.c index b88be6116a6..224d75f6844 100644 --- a/strings/decimal.c +++ b/strings/decimal.c @@ -544,7 +544,7 @@ int decimal2longlong(decimal *from, longlong *to) so we can convert -9223372036854775808 correctly */ x=x*DIG_BASE - *buf++; - if (unlikely(y < (LONGLONG_MAX/DIG_BASE) || x > y)) + if (unlikely(y < (LONGLONG_MIN/DIG_BASE) || x > y)) { *to= from->sign ? y : -y; return E_DEC_OVERFLOW; @@ -585,7 +585,7 @@ int decimal2bin(decimal *from, char *to, int precision, int frac) { dec1 mask=from->sign ? -1 : 0, *buf1=from->buf, *stop1; int error=E_DEC_OK, intg=precision-frac, - isize1, intg1, intg1x=from->intg, + isize1, intg1, intg1x, from_intg=from->intg, intg0=intg/DIG_PER_DEC1, frac0=frac/DIG_PER_DEC1, intg0x=intg-intg0*DIG_PER_DEC1, @@ -597,33 +597,33 @@ int decimal2bin(decimal *from, char *to, int precision, int frac) fsize1=frac1*sizeof(dec1)+dig2bytes[frac1x]; /* removing leading zeroes */ - intg1=((intg1x-1) % DIG_PER_DEC1)+1; - while (intg1x > 0 && *buf1 == 0) + intg1=((from_intg-1) % DIG_PER_DEC1)+1; + while (from_intg > 0 && *buf1 == 0) { - intg1x-=intg1; + from_intg-=intg1; intg1=DIG_PER_DEC1; buf1++; } - if (intg1x > 0) + if (from_intg > 0) { - for (intg1=(intg1x-1) % DIG_PER_DEC1; *buf1 < powers10[intg1--]; intg1x--) ; - DBUG_ASSERT(intg1x > 0); + for (intg1=(from_intg-1) % DIG_PER_DEC1; *buf1 < powers10[intg1--]; from_intg--) ; + DBUG_ASSERT(from_intg > 0); } else - intg1x=0; + from_intg=0; - if (unlikely(intg1x+fsize1==0)) + if (unlikely(from_intg+fsize1==0)) { mask=0; /* just in case */ intg=1; buf1=&mask; } - intg1=intg1x/DIG_PER_DEC1; - intg1x=intg1x-intg1*DIG_PER_DEC1; + intg1=from_intg/DIG_PER_DEC1; + intg1x=from_intg-intg1*DIG_PER_DEC1; isize1=intg1*sizeof(dec1)+dig2bytes[intg1x]; - if (isize0 < isize1) + if (intg < from_intg) { buf1+=intg1-intg0+(intg1x>0)-(intg0x>0); intg1=intg0; intg1x=intg0x; @@ -873,7 +873,7 @@ int decimal_round(decimal *from, decimal *to, int scale, decimal_round_mode mode error=E_DEC_TRUNCATED; } - if (scale+from->intg <= 0) + if (scale+from->intg < 0) { decimal_make_zero(to); return E_DEC_OK; @@ -921,6 +921,12 @@ int decimal_round(decimal *from, decimal *to, int scale, decimal_round_mode mode x+=10; *buf1=powers10[pos]*(x-y); } + if (frac0 < 0) + { + dec1 *end=to->buf+intg0, *buf=buf1+1; + while (buf < end) + *buf++=0; + } if (*buf1 >= DIG_BASE) { carry=1; @@ -944,6 +950,16 @@ int decimal_round(decimal *from, decimal *to, int scale, decimal_round_mode mode to->intg++; } } + else + { + while (unlikely(*buf1 == 0) && buf1 >= to->buf) + buf1--; + if (buf1 < to->buf) + { + decimal_make_zero(to); + return E_DEC_OK; + } + } if (scale<0) scale=0; done: @@ -1637,7 +1653,7 @@ void dump_decimal(decimal *d) printf("%09d} */ ", d->buf[i]); } -void print_decimal(decimal *d) +void print_decimal(decimal *d, char *orig) { char s[100]; int slen=sizeof(s); @@ -1645,6 +1661,11 @@ void print_decimal(decimal *d) if (full) dump_decimal(d); decimal2string(d, s, &slen); printf("'%s'", s); + if (orig && strcmp(orig, s)) + { + printf("\n^^^^^^^^^^^^^ must've been '%s'\n", orig); + exit(1); + } } void test_d2s() @@ -1683,12 +1704,12 @@ void test_d2s() dump_decimal(&a); printf(" --> res=%d str='%s' len=%d\n", res, s, slen); } -void test_s2d(char *s) +void test_s2d(char *s, char *orig) { char s1[100]; sprintf(s1, "'%s'", s); printf("len=%2d %-30s => res=%d ", a.len, s1, string2decimal(s, &a, 0)); - print_decimal(&a); + print_decimal(&a, orig); printf("\n"); } @@ -1705,7 +1726,7 @@ void test_d2f(char *s) printf("%-40s => res=%d %.*g\n", s1, res, a.intg+a.frac, x); } -void test_d2b2d(char *str, int p, int s) +void test_d2b2d(char *str, int p, int s, char *orig) { char s1[100], buf[100]; double x; @@ -1723,7 +1744,7 @@ void test_d2b2d(char *str, int p, int s) } res=bin2decimal(buf, &a, p, s); printf(" => res=%d ", res); - print_decimal(&a); + print_decimal(&a, orig); printf("\n"); } void test_f2d(double from) @@ -1732,11 +1753,11 @@ void test_f2d(double from) res=double2decimal(from, &a); printf("%-40.*f => res=%d ", DBL_DIG-2, from, res); - print_decimal(&a); + print_decimal(&a, 0); printf("\n"); } -void test_ull2d(ulonglong from) +void test_ull2d(ulonglong from, char *orig) { char s[100]; int res; @@ -1744,11 +1765,11 @@ void test_ull2d(ulonglong from) res=ulonglong2decimal(from, &a); longlong10_to_str(from,s,10); printf("%-40s => res=%d ", s, res); - print_decimal(&a); + print_decimal(&a, orig); printf("\n"); } -void test_ll2d(longlong from) +void test_ll2d(longlong from, char *orig) { char s[100]; int res; @@ -1756,11 +1777,11 @@ void test_ll2d(longlong from) res=longlong2decimal(from, &a); longlong10_to_str(from,s,-10); printf("%-40s => res=%d ", s, res); - print_decimal(&a); + print_decimal(&a, orig); printf("\n"); } -void test_d2ull(char *s) +void test_d2ull(char *s, char *orig) { char s1[100]; ulonglong x; @@ -1771,9 +1792,14 @@ void test_d2ull(char *s) if (full) dump_decimal(&a); longlong10_to_str(x,s1,10); printf("%-40s => res=%d %s\n", s, res, s1); + if (orig && strcmp(orig, s1)) + { + printf("\n^^^^^^^^^^^^^ must've been '%s'\n", orig); + exit(1); + } } -void test_d2ll(char *s) +void test_d2ll(char *s, char *orig) { char s1[100]; longlong x; @@ -1784,9 +1810,14 @@ void test_d2ll(char *s) if (full) dump_decimal(&a); longlong10_to_str(x,s1,-10); printf("%-40s => res=%d %s\n", s, res, s1); + if (orig && strcmp(orig, s1)) + { + printf("\n^^^^^^^^^^^^^ must've been '%s'\n", orig); + exit(1); + } } -void test_da(char *s1, char *s2) +void test_da(char *s1, char *s2, char *orig) { char s[100]; int res; @@ -1795,11 +1826,11 @@ void test_da(char *s1, char *s2) string2decimal(s2, &b, 0); res=decimal_add(&a, &b, &c); printf("%-40s => res=%d ", s, res); - print_decimal(&c); + print_decimal(&c, orig); printf("\n"); } -void test_ds(char *s1, char *s2) +void test_ds(char *s1, char *s2, char *orig) { char s[100]; int res; @@ -1808,11 +1839,11 @@ void test_ds(char *s1, char *s2) string2decimal(s2, &b, 0); res=decimal_sub(&a, &b, &c); printf("%-40s => res=%d ", s, res); - print_decimal(&c); + print_decimal(&c, orig); printf("\n"); } -void test_dc(char *s1, char *s2) +void test_dc(char *s1, char *s2, int orig) { char s[100]; int res; @@ -1821,9 +1852,14 @@ void test_dc(char *s1, char *s2) string2decimal(s2, &b, 0); res=decimal_cmp(&a, &b); printf("%-40s => res=%d\n", s, res); + if (orig != res) + { + printf("\n^^^^^^^^^^^^^ must've been %d\n", orig); + exit(1); + } } -void test_dm(char *s1, char *s2) +void test_dm(char *s1, char *s2, char *orig) { char s[100]; int res; @@ -1832,11 +1868,11 @@ void test_dm(char *s1, char *s2) string2decimal(s2, &b, 0); res=decimal_mul(&a, &b, &c); printf("%-40s => res=%d ", s, res); - print_decimal(&c); + print_decimal(&c, orig); printf("\n"); } -void test_dv(char *s1, char *s2) +void test_dv(char *s1, char *s2, char *orig) { char s[100]; int res; @@ -1848,11 +1884,11 @@ void test_dv(char *s1, char *s2) if (res == E_DEC_DIV_ZERO) printf("E_DEC_DIV_ZERO"); else - print_decimal(&c); + print_decimal(&c, orig); printf("\n"); } -void test_md(char *s1, char *s2) +void test_md(char *s1, char *s2, char *orig) { char s[100]; int res; @@ -1864,13 +1900,13 @@ void test_md(char *s1, char *s2) if (res == E_DEC_DIV_ZERO) printf("E_DEC_DIV_ZERO"); else - print_decimal(&c); + print_decimal(&c, orig); printf("\n"); } char *round_mode[]={"TRUNCATE", "HALF_EVEN", "HALF_UP", "CEILING", "FLOOR"}; -void test_ro(char *s1, int n, decimal_round_mode mode) +void test_ro(char *s1, int n, decimal_round_mode mode, char *orig) { char s[100]; int res; @@ -1878,7 +1914,7 @@ void test_ro(char *s1, int n, decimal_round_mode mode) string2decimal(s1, &a, 0); res=decimal_round(&a, &b, n, mode); printf("%-40s => res=%d ", s, res); - print_decimal(&b); + print_decimal(&b, orig); printf("\n"); } @@ -1895,17 +1931,17 @@ main() test_d2s(); printf("==== string2decimal ====\n"); - test_s2d("12345"); - test_s2d("12345."); - test_s2d("123.45"); - test_s2d("-123.45"); - test_s2d(".00012345000098765"); - test_s2d(".12345000098765"); - test_s2d("-.000000012345000098765"); - test_s2d("1234500009876.5"); + test_s2d("12345", "12345"); + test_s2d("12345.", "12345"); + test_s2d("123.45", "123.45"); + test_s2d("-123.45", "-123.45"); + test_s2d(".00012345000098765", ".00012345000098765"); + test_s2d(".12345000098765", ".12345000098765"); + test_s2d("-.000000012345000098765", "-.000000012345000098765"); + test_s2d("1234500009876.5", "1234500009876.5"); a.len=1; - test_s2d("123450000098765"); - test_s2d("123450.000098765"); + test_s2d("123450000098765", "98765"); + test_s2d("123450.000098765", "123450"); a.len=sizeof(buf1)/sizeof(dec1); printf("==== decimal2double ====\n"); @@ -1923,159 +1959,160 @@ main() test_f2d(1234500009876.5); printf("==== ulonglong2decimal ====\n"); - test_ull2d(ULL(12345)); - test_ull2d(ULL(0)); - test_ull2d(ULL(18446744073709551615)); + test_ull2d(ULL(12345), "12345"); + test_ull2d(ULL(0), "0"); + test_ull2d(ULL(18446744073709551615), "18446744073709551615"); printf("==== decimal2ulonglong ====\n"); - test_d2ull("12345"); - test_d2ull("0"); - test_d2ull("18446744073709551615"); - test_d2ull("18446744073709551616"); - test_d2ull("-1"); - test_d2ull("1.23"); - test_d2ull("9999999999999999999999999.000"); + test_d2ull("12345", "12345"); + test_d2ull("0", "0"); + test_d2ull("18446744073709551615", "18446744073709551615"); + test_d2ull("18446744073709551616", "18446744073"); + test_d2ull("-1", "0"); + test_d2ull("1.23", "1"); + test_d2ull("9999999999999999999999999.000", "9999999999999999"); printf("==== longlong2decimal ====\n"); - test_ll2d(LL(-12345)); - test_ll2d(LL(-1)); - test_ll2d(LL(-9223372036854775807)); - test_ll2d(ULL(9223372036854775808)); + test_ll2d(LL(-12345), "-12345"); + test_ll2d(LL(-1), "-1"); + test_ll2d(LL(-9223372036854775807), "-9223372036854775807"); + test_ll2d(ULL(9223372036854775808), "-9223372036854775808"); printf("==== decimal2longlong ====\n"); - test_d2ll("18446744073709551615"); - test_d2ll("-1"); - test_d2ll("-1.23"); - test_d2ll("-9223372036854775807"); - test_d2ll("-9223372036854775808"); - test_d2ll("9223372036854775808"); + test_d2ll("18446744073709551615", "18446744073"); + test_d2ll("-1", "-1"); + test_d2ll("-1.23", "-1"); + test_d2ll("-9223372036854775807", "-9223372036854775807"); + test_d2ll("-9223372036854775808", "-9223372036854775808"); + test_d2ll("9223372036854775808", "9223372036854775807"); printf("==== do_add ====\n"); - test_da(".00012345000098765" ,"123.45"); - test_da(".1" ,".45"); - test_da("1234500009876.5" ,".00012345000098765"); - test_da("9999909999999.5" ,".555"); - test_da("99999999" ,"1"); - test_da("989999999" ,"1"); - test_da("999999999" ,"1"); - test_da("12345" ,"123.45"); - test_da("-12345" ,"-123.45"); - test_ds("-12345" ,"123.45"); - test_ds("12345" ,"-123.45"); + test_da(".00012345000098765" ,"123.45", "123.45012345000098765"); + test_da(".1" ,".45", ".55"); + test_da("1234500009876.5" ,".00012345000098765", "1234500009876.50012345000098765"); + test_da("9999909999999.5" ,".555", "9999910000000.055"); + test_da("99999999" ,"1", "100000000"); + test_da("989999999" ,"1", "990000000"); + test_da("999999999" ,"1", "1000000000"); + test_da("12345" ,"123.45", "12468.45"); + test_da("-12345" ,"-123.45", "-12468.45"); + test_ds("-12345" ,"123.45", "-12468.45"); + test_ds("12345" ,"-123.45", "12468.45"); printf("==== do_sub ====\n"); - test_ds(".00012345000098765", "123.45"); - test_ds("1234500009876.5", ".00012345000098765"); - test_ds("9999900000000.5", ".555"); - test_ds("1111.5551", "1111.555"); - test_ds(".555", ".555"); - test_ds("10000000", "1"); - test_ds("1000001000", ".1"); - test_ds("1000000000", ".1"); - test_ds("12345", "123.45"); - test_ds("-12345", "-123.45"); - test_da("-12345", "123.45"); - test_da("12345", "-123.45"); - test_ds("123.45", "12345"); - test_ds("-123.45", "-12345"); - test_da("123.45", "-12345"); - test_da("-123.45", "12345"); - test_da("5", "-6.0"); + test_ds(".00012345000098765", "123.45","-123.44987654999901235"); + test_ds("1234500009876.5", ".00012345000098765","1234500009876.49987654999901235"); + test_ds("9999900000000.5", ".555","9999899999999.945"); + test_ds("1111.5551", "1111.555",".0001"); + test_ds(".555", ".555","0"); + test_ds("10000000", "1","9999999"); + test_ds("1000001000", ".1","1000000999.9"); + test_ds("1000000000", ".1","999999999.9"); + test_ds("12345", "123.45","12221.55"); + test_ds("-12345", "-123.45","-12221.55"); + test_da("-12345", "123.45","-12221.55"); + test_da("12345", "-123.45","12221.55"); + test_ds("123.45", "12345","-12221.55"); + test_ds("-123.45", "-12345","12221.55"); + test_da("123.45", "-12345","-12221.55"); + test_da("-123.45", "12345","12221.55"); + test_da("5", "-6.0","-1.0"); printf("==== decimal_mul ====\n"); - test_dm("12", "10"); - test_dm("-123.456", "98765.4321"); - test_dm("-123456000000", "98765432100000"); - test_dm("123456", "987654321"); - test_dm("123456", "9876543210"); - test_dm("123", "0.01"); - test_dm("123", "0"); + test_dm("12", "10","120"); + test_dm("-123.456", "98765.4321","-12193185.1853376"); + test_dm("-123456000000", "98765432100000","-12193185185337600000000000"); + test_dm("123456", "987654321","121931851853376"); + test_dm("123456", "9876543210","1219318518533760"); + test_dm("123", "0.01","1.23"); + test_dm("123", "0","0"); printf("==== decimal_div ====\n"); - test_dv("120", "10"); - test_dv("123", "0.01"); - test_dv("120", "100000000000.00000"); - test_dv("123", "0"); - test_dv("-12193185.1853376", "98765.4321"); - test_dv("121931851853376", "987654321"); - test_dv("0", "987"); - test_dv("1", "3"); - test_dv("1.000000000000", "3"); - test_dv("1", "1"); - test_dv("0.0123456789012345678912345", "9999999999"); + test_dv("120", "10","12.000000000"); + test_dv("123", "0.01","12300.000000000"); + test_dv("120", "100000000000.00000",".000000001200000000"); + test_dv("123", "0",""); + test_dv("-12193185.1853376", "98765.4321","-123.456000000000000000"); + test_dv("121931851853376", "987654321","123456.000000000"); + test_dv("0", "987","0"); + test_dv("1", "3",".333333333"); + test_dv("1.000000000000", "3",".333333333333333333"); + test_dv("1", "1","1.000000000"); + test_dv("0.0123456789012345678912345", "9999999999",".000000000001234567890246913578148141"); printf("==== decimal_mod ====\n"); - test_md("234","10"); - test_md("234.567","10.555"); - test_md("-234.567","10.555"); - test_md("234.567","-10.555"); + test_md("234","10","4"); + test_md("234.567","10.555","2.357"); + test_md("-234.567","10.555","-2.357"); + test_md("234.567","-10.555","2.357"); if (full) { c.buf[1]=0x3ABECA; - test_md("99999999999999999999999999999999999999","3"); + test_md("99999999999999999999999999999999999999","3","0"); printf("%X\n", c.buf[1]); } printf("==== decimal2bin/bin2decimal ====\n"); - test_d2b2d("-10.55", 4, 2); - test_d2b2d("0.0123456789012345678912345", 30, 25); - test_d2b2d("12345", 5, 0); - test_d2b2d("12345", 10, 3); - test_d2b2d("123.45", 10, 3); - test_d2b2d("-123.45", 20, 10); - test_d2b2d(".00012345000098765", 15, 14); - test_d2b2d(".00012345000098765", 22, 20); - test_d2b2d(".12345000098765", 30, 20); - test_d2b2d("-.000000012345000098765", 30, 20); - test_d2b2d("1234500009876.5", 30, 5); + test_d2b2d("-10.55", 4, 2,"-10.55"); + test_d2b2d("0.0123456789012345678912345", 30, 25,".0123456789012345678912345"); + test_d2b2d("12345", 5, 0,"12345"); + test_d2b2d("12345", 10, 3,"12345.000"); + test_d2b2d("123.45", 10, 3,"123.450"); + test_d2b2d("-123.45", 20, 10,"-123.4500000000"); + test_d2b2d(".00012345000098765", 15, 14,".00012345000098"); + test_d2b2d(".00012345000098765", 22, 20,".00012345000098765000"); + test_d2b2d(".12345000098765", 30, 20,".12345000098765000000"); + test_d2b2d("-.000000012345000098765", 30, 20,"-.00000001234500009876"); + test_d2b2d("1234500009876.5", 30, 5,"1234500009876.50000"); + test_d2b2d("111111111.11", 10, 2,"11111111.11"); printf("==== decimal_cmp ====\n"); - test_dc("12","13"); - test_dc("13","12"); - test_dc("-10","10"); - test_dc("10","-10"); - test_dc("-12","-13"); - test_dc("0","12"); - test_dc("-10","0"); - test_dc("4","4"); + test_dc("12","13",-1); + test_dc("13","12",1); + test_dc("-10","10",-1); + test_dc("10","-10",1); + test_dc("-12","-13",1); + test_dc("0","12",-1); + test_dc("-10","0",-1); + test_dc("4","4",0); printf("==== decimal_round ====\n"); - test_ro("5678.123451",-4,TRUNCATE); - test_ro("5678.123451",-3,TRUNCATE); - test_ro("5678.123451",-2,TRUNCATE); - test_ro("5678.123451",-1,TRUNCATE); - test_ro("5678.123451",0,TRUNCATE); - test_ro("5678.123451",1,TRUNCATE); - test_ro("5678.123451",2,TRUNCATE); - test_ro("5678.123451",3,TRUNCATE); - test_ro("5678.123451",4,TRUNCATE); - test_ro("5678.123451",5,TRUNCATE); - test_ro("5678.123451",6,TRUNCATE); - test_ro("-5678.123451",-4,TRUNCATE); - test_ro("99999999999999999999999999999999999999",-31,TRUNCATE); - test_ro("15.1",0,HALF_UP); - test_ro("15.5",0,HALF_UP); - test_ro("15.9",0,HALF_UP); - test_ro("-15.1",0,HALF_UP); - test_ro("-15.5",0,HALF_UP); - test_ro("-15.9",0,HALF_UP); - test_ro("15.1",1,HALF_UP); - test_ro("-15.1",1,HALF_UP); - test_ro("15.17",1,HALF_UP); - test_ro("15.4",-1,HALF_UP); - test_ro("-15.4",-1,HALF_UP); - test_ro("5.4",-1,HALF_UP); - test_ro("15.1",0,HALF_EVEN); - test_ro("15.5",0,HALF_EVEN); - test_ro("14.5",0,HALF_EVEN); - test_ro("15.9",0,HALF_EVEN); - test_ro("15.1",0,CEILING); - test_ro("-15.1",0,CEILING); - test_ro("15.1",0,FLOOR); - test_ro("-15.1",0,FLOOR); - test_ro("999999999999999999999.999", 0, CEILING); - test_ro("-999999999999999999999.999", 0, FLOOR); - + test_ro("5678.123451",-4,TRUNCATE,"0"); + test_ro("5678.123451",-3,TRUNCATE,"5000"); + test_ro("5678.123451",-2,TRUNCATE,"5600"); + test_ro("5678.123451",-1,TRUNCATE,"5670"); + test_ro("5678.123451",0,TRUNCATE,"5678"); + test_ro("5678.123451",1,TRUNCATE,"5678.1"); + test_ro("5678.123451",2,TRUNCATE,"5678.12"); + test_ro("5678.123451",3,TRUNCATE,"5678.123"); + test_ro("5678.123451",4,TRUNCATE,"5678.1234"); + test_ro("5678.123451",5,TRUNCATE,"5678.12345"); + test_ro("5678.123451",6,TRUNCATE,"5678.123451"); + test_ro("-5678.123451",-4,TRUNCATE,"0"); + memset(buf2, 33, sizeof(buf2)); + test_ro("99999999999999999999999999999999999999",-31,TRUNCATE,"99999990000000000000000000000000000000"); + test_ro("15.1",0,HALF_UP,"15"); + test_ro("15.5",0,HALF_UP,"16"); + test_ro("15.9",0,HALF_UP,"16"); + test_ro("-15.1",0,HALF_UP,"-15"); + test_ro("-15.5",0,HALF_UP,"-16"); + test_ro("-15.9",0,HALF_UP,"-16"); + test_ro("15.1",1,HALF_UP,"15.1"); + test_ro("-15.1",1,HALF_UP,"-15.1"); + test_ro("15.17",1,HALF_UP,"15.2"); + test_ro("15.4",-1,HALF_UP,"20"); + test_ro("-15.4",-1,HALF_UP,"-20"); + test_ro("5.4",-1,HALF_UP,"10"); + test_ro("15.1",0,HALF_EVEN,"15"); + test_ro("15.5",0,HALF_EVEN,"16"); + test_ro("14.5",0,HALF_EVEN,"14"); + test_ro("15.9",0,HALF_EVEN,"16"); + test_ro("15.1",0,CEILING,"16"); + test_ro("-15.1",0,CEILING,"-15"); + test_ro("15.1",0,FLOOR,"15"); + test_ro("-15.1",0,FLOOR,"-16"); + test_ro("999999999999999999999.999", 0, CEILING,"1000000000000000000000"); + test_ro("-999999999999999999999.999", 0, FLOOR,"-1000000000000000000000"); return 0; }