mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
srv0srv.h Support raw disk partitions as data files
srv0start.c Support raw disk partitions as data files srv0srv.c Support raw disk partitions as data files row0purge.c < 4 GB rows, doublewrite, hang fixes row0row.c < 4 GB rows, doublewrite, hang fixes row0sel.c < 4 GB rows, doublewrite, hang fixes row0uins.c < 4 GB rows, doublewrite, hang fixes row0umod.c < 4 GB rows, doublewrite, hang fixes row0undo.c < 4 GB rows, doublewrite, hang fixes row0upd.c < 4 GB rows, doublewrite, hang fixes srv0srv.c < 4 GB rows, doublewrite, hang fixes srv0start.c < 4 GB rows, doublewrite, hang fixes sync0rw.c < 4 GB rows, doublewrite, hang fixes sync0sync.c < 4 GB rows, doublewrite, hang fixes trx0purge.c < 4 GB rows, doublewrite, hang fixes trx0rec.c < 4 GB rows, doublewrite, hang fixes trx0sys.c < 4 GB rows, doublewrite, hang fixes btr0btr.c < 4 GB rows, doublewrite, hang fixes btr0cur.c < 4 GB rows, doublewrite, hang fixes buf0buf.c < 4 GB rows, doublewrite, hang fixes buf0flu.c < 4 GB rows, doublewrite, hang fixes buf0rea.c < 4 GB rows, doublewrite, hang fixes data0data.c < 4 GB rows, doublewrite, hang fixes fil0fil.c < 4 GB rows, doublewrite, hang fixes fsp0fsp.c < 4 GB rows, doublewrite, hang fixes ibuf0ibuf.c < 4 GB rows, doublewrite, hang fixes lock0lock.c < 4 GB rows, doublewrite, hang fixes log0log.c < 4 GB rows, doublewrite, hang fixes log0recv.c < 4 GB rows, doublewrite, hang fixes os0file.c < 4 GB rows, doublewrite, hang fixes page0cur.c < 4 GB rows, doublewrite, hang fixes pars0pars.c < 4 GB rows, doublewrite, hang fixes rem0cmp.c < 4 GB rows, doublewrite, hang fixes rem0rec.c < 4 GB rows, doublewrite, hang fixes row0ins.c < 4 GB rows, doublewrite, hang fixes row0mysql.c < 4 GB rows, doublewrite, hang fixes univ.i < 4 GB rows, doublewrite, hang fixes data0data.ic < 4 GB rows, doublewrite, hang fixes mach0data.ic < 4 GB rows, doublewrite, hang fixes rem0rec.ic < 4 GB rows, doublewrite, hang fixes row0upd.ic < 4 GB rows, doublewrite, hang fixes trx0rec.ic < 4 GB rows, doublewrite, hang fixes rem0cmp.h < 4 GB rows, doublewrite, hang fixes rem0rec.h < 4 GB rows, doublewrite, hang fixes row0ins.h < 4 GB rows, doublewrite, hang fixes row0mysql.h < 4 GB rows, doublewrite, hang fixes row0row.h < 4 GB rows, doublewrite, hang fixes row0upd.h < 4 GB rows, doublewrite, hang fixes srv0srv.h < 4 GB rows, doublewrite, hang fixes sync0sync.h < 4 GB rows, doublewrite, hang fixes trx0rec.h < 4 GB rows, doublewrite, hang fixes trx0sys.h < 4 GB rows, doublewrite, hang fixes trx0types.h < 4 GB rows, doublewrite, hang fixes trx0undo.h < 4 GB rows, doublewrite, hang fixes ut0dbg.h < 4 GB rows, doublewrite, hang fixes ut0ut.h < 4 GB rows, doublewrite, hang fixes btr0btr.h < 4 GB rows, doublewrite, hang fixes btr0cur.h < 4 GB rows, doublewrite, hang fixes buf0buf.h < 4 GB rows, doublewrite, hang fixes buf0flu.h < 4 GB rows, doublewrite, hang fixes data0data.h < 4 GB rows, doublewrite, hang fixes dict0mem.h < 4 GB rows, doublewrite, hang fixes fil0fil.h < 4 GB rows, doublewrite, hang fixes fsp0fsp.h < 4 GB rows, doublewrite, hang fixes os0file.h < 4 GB rows, doublewrite, hang fixes innobase/include/btr0btr.h: < 4 GB rows, doublewrite, hang fixes innobase/include/btr0cur.h: < 4 GB rows, doublewrite, hang fixes innobase/include/buf0buf.h: < 4 GB rows, doublewrite, hang fixes innobase/include/buf0flu.h: < 4 GB rows, doublewrite, hang fixes innobase/include/data0data.h: < 4 GB rows, doublewrite, hang fixes innobase/include/dict0mem.h: < 4 GB rows, doublewrite, hang fixes innobase/include/fil0fil.h: < 4 GB rows, doublewrite, hang fixes innobase/include/fsp0fsp.h: < 4 GB rows, doublewrite, hang fixes innobase/include/os0file.h: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0cmp.h: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0rec.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0ins.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0mysql.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0row.h: < 4 GB rows, doublewrite, hang fixes innobase/include/row0upd.h: < 4 GB rows, doublewrite, hang fixes innobase/include/sync0sync.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0rec.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0sys.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0types.h: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0undo.h: < 4 GB rows, doublewrite, hang fixes innobase/include/ut0dbg.h: < 4 GB rows, doublewrite, hang fixes innobase/include/ut0ut.h: < 4 GB rows, doublewrite, hang fixes innobase/include/data0data.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/mach0data.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/rem0rec.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/row0upd.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/trx0rec.ic: < 4 GB rows, doublewrite, hang fixes innobase/include/univ.i: < 4 GB rows, doublewrite, hang fixes innobase/btr/btr0btr.c: < 4 GB rows, doublewrite, hang fixes innobase/btr/btr0cur.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0buf.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0flu.c: < 4 GB rows, doublewrite, hang fixes innobase/buf/buf0rea.c: < 4 GB rows, doublewrite, hang fixes innobase/data/data0data.c: < 4 GB rows, doublewrite, hang fixes innobase/fil/fil0fil.c: < 4 GB rows, doublewrite, hang fixes innobase/fsp/fsp0fsp.c: < 4 GB rows, doublewrite, hang fixes innobase/ibuf/ibuf0ibuf.c: < 4 GB rows, doublewrite, hang fixes innobase/lock/lock0lock.c: < 4 GB rows, doublewrite, hang fixes innobase/log/log0log.c: < 4 GB rows, doublewrite, hang fixes innobase/log/log0recv.c: < 4 GB rows, doublewrite, hang fixes innobase/os/os0file.c: < 4 GB rows, doublewrite, hang fixes innobase/page/page0cur.c: < 4 GB rows, doublewrite, hang fixes innobase/pars/pars0pars.c: < 4 GB rows, doublewrite, hang fixes innobase/rem/rem0cmp.c: < 4 GB rows, doublewrite, hang fixes innobase/rem/rem0rec.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0ins.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0mysql.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0purge.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0row.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0sel.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0uins.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0umod.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0undo.c: < 4 GB rows, doublewrite, hang fixes innobase/row/row0upd.c: < 4 GB rows, doublewrite, hang fixes innobase/sync/sync0rw.c: < 4 GB rows, doublewrite, hang fixes innobase/sync/sync0sync.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0purge.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0rec.c: < 4 GB rows, doublewrite, hang fixes innobase/trx/trx0sys.c: < 4 GB rows, doublewrite, hang fixes innobase/srv/srv0srv.c: Support raw disk partitions as data files innobase/srv/srv0start.c: Support raw disk partitions as data files innobase/include/srv0srv.h: Support raw disk partitions as data files
This commit is contained in:
@ -71,30 +71,6 @@ btr_page_create(
|
|||||||
dict_tree_t* tree, /* in: index tree */
|
dict_tree_t* tree, /* in: index tree */
|
||||||
mtr_t* mtr); /* in: mtr */
|
mtr_t* mtr); /* in: mtr */
|
||||||
/******************************************************************
|
/******************************************************************
|
||||||
Allocates a new file page to be used in an index tree. */
|
|
||||||
static
|
|
||||||
page_t*
|
|
||||||
btr_page_alloc(
|
|
||||||
/*===========*/
|
|
||||||
/* out: new allocated page,
|
|
||||||
x-latched */
|
|
||||||
dict_tree_t* tree, /* in: index tree */
|
|
||||||
ulint hint_page_no, /* in: hint of a good page */
|
|
||||||
byte file_direction, /* in: direction where a possible
|
|
||||||
page split is made */
|
|
||||||
ulint level, /* in: level where the page is placed
|
|
||||||
in the tree */
|
|
||||||
mtr_t* mtr); /* in: mtr */
|
|
||||||
/******************************************************************
|
|
||||||
Frees a file page used in an index tree. */
|
|
||||||
static
|
|
||||||
void
|
|
||||||
btr_page_free(
|
|
||||||
/*==========*/
|
|
||||||
dict_tree_t* tree, /* in: index tree */
|
|
||||||
page_t* page, /* in, own: page to be freed */
|
|
||||||
mtr_t* mtr); /* in: mtr */
|
|
||||||
/******************************************************************
|
|
||||||
Sets the child node file address in a node pointer. */
|
Sets the child node file address in a node pointer. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
void
|
void
|
||||||
@ -319,11 +295,12 @@ btr_page_alloc_for_ibuf(
|
|||||||
/******************************************************************
|
/******************************************************************
|
||||||
Allocates a new file page to be used in an index tree. NOTE: we assume
|
Allocates a new file page to be used in an index tree. NOTE: we assume
|
||||||
that the caller has made the reservation for free extents! */
|
that the caller has made the reservation for free extents! */
|
||||||
static
|
|
||||||
page_t*
|
page_t*
|
||||||
btr_page_alloc(
|
btr_page_alloc(
|
||||||
/*===========*/
|
/*===========*/
|
||||||
/* out: new allocated page, x-latched */
|
/* out: new allocated page, x-latched;
|
||||||
|
NULL if out of space */
|
||||||
dict_tree_t* tree, /* in: index tree */
|
dict_tree_t* tree, /* in: index tree */
|
||||||
ulint hint_page_no, /* in: hint of a good page */
|
ulint hint_page_no, /* in: hint of a good page */
|
||||||
byte file_direction, /* in: direction where a possible
|
byte file_direction, /* in: direction where a possible
|
||||||
@ -358,7 +335,10 @@ btr_page_alloc(
|
|||||||
|
|
||||||
new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
|
new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
|
||||||
file_direction, TRUE, mtr);
|
file_direction, TRUE, mtr);
|
||||||
ut_a(new_page_no != FIL_NULL);
|
if (new_page_no == FIL_NULL) {
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
new_page = buf_page_get(dict_tree_get_space(tree), new_page_no,
|
new_page = buf_page_get(dict_tree_get_space(tree), new_page_no,
|
||||||
RW_X_LATCH, mtr);
|
RW_X_LATCH, mtr);
|
||||||
@ -435,20 +415,22 @@ btr_page_free_for_ibuf(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/******************************************************************
|
/******************************************************************
|
||||||
Frees a file page used in an index tree. */
|
Frees a file page used in an index tree. Can be used also to (BLOB)
|
||||||
static
|
external storage pages, because the page level 0 can be given as an
|
||||||
|
argument. */
|
||||||
|
|
||||||
void
|
void
|
||||||
btr_page_free(
|
btr_page_free_low(
|
||||||
/*==========*/
|
/*==============*/
|
||||||
dict_tree_t* tree, /* in: index tree */
|
dict_tree_t* tree, /* in: index tree */
|
||||||
page_t* page, /* in: page to be freed, x-latched */
|
page_t* page, /* in: page to be freed, x-latched */
|
||||||
|
ulint level, /* in: page level */
|
||||||
mtr_t* mtr) /* in: mtr */
|
mtr_t* mtr) /* in: mtr */
|
||||||
{
|
{
|
||||||
fseg_header_t* seg_header;
|
fseg_header_t* seg_header;
|
||||||
page_t* root;
|
page_t* root;
|
||||||
ulint space;
|
ulint space;
|
||||||
ulint page_no;
|
ulint page_no;
|
||||||
ulint level;
|
|
||||||
|
|
||||||
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
|
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
|
||||||
MTR_MEMO_PAGE_X_FIX));
|
MTR_MEMO_PAGE_X_FIX));
|
||||||
@ -465,8 +447,6 @@ btr_page_free(
|
|||||||
}
|
}
|
||||||
|
|
||||||
root = btr_root_get(tree, mtr);
|
root = btr_root_get(tree, mtr);
|
||||||
|
|
||||||
level = btr_page_get_level(page, mtr);
|
|
||||||
|
|
||||||
if (level == 0) {
|
if (level == 0) {
|
||||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||||
@ -480,6 +460,26 @@ btr_page_free(
|
|||||||
fseg_free_page(seg_header, space, page_no, mtr);
|
fseg_free_page(seg_header, space, page_no, mtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/******************************************************************
|
||||||
|
Frees a file page used in an index tree. NOTE: cannot free field external
|
||||||
|
storage pages because the page must contain info on its level. */
|
||||||
|
|
||||||
|
void
|
||||||
|
btr_page_free(
|
||||||
|
/*==========*/
|
||||||
|
dict_tree_t* tree, /* in: index tree */
|
||||||
|
page_t* page, /* in: page to be freed, x-latched */
|
||||||
|
mtr_t* mtr) /* in: mtr */
|
||||||
|
{
|
||||||
|
ulint level;
|
||||||
|
|
||||||
|
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
|
||||||
|
MTR_MEMO_PAGE_X_FIX));
|
||||||
|
level = btr_page_get_level(page, mtr);
|
||||||
|
|
||||||
|
btr_page_free_low(tree, page, level, mtr);
|
||||||
|
}
|
||||||
|
|
||||||
/******************************************************************
|
/******************************************************************
|
||||||
Sets the child node file address in a node pointer. */
|
Sets the child node file address in a node pointer. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
@ -1276,6 +1276,7 @@ btr_insert_on_non_leaf_level(
|
|||||||
dtuple_t* tuple, /* in: the record to be inserted */
|
dtuple_t* tuple, /* in: the record to be inserted */
|
||||||
mtr_t* mtr) /* in: mtr */
|
mtr_t* mtr) /* in: mtr */
|
||||||
{
|
{
|
||||||
|
big_rec_t* dummy_big_rec;
|
||||||
btr_cur_t cursor;
|
btr_cur_t cursor;
|
||||||
ulint err;
|
ulint err;
|
||||||
rec_t* rec;
|
rec_t* rec;
|
||||||
@ -1294,7 +1295,7 @@ btr_insert_on_non_leaf_level(
|
|||||||
| BTR_KEEP_SYS_FLAG
|
| BTR_KEEP_SYS_FLAG
|
||||||
| BTR_NO_UNDO_LOG_FLAG,
|
| BTR_NO_UNDO_LOG_FLAG,
|
||||||
&cursor, tuple,
|
&cursor, tuple,
|
||||||
&rec, NULL, mtr);
|
&rec, &dummy_big_rec, NULL, mtr);
|
||||||
ut_a(err == DB_SUCCESS);
|
ut_a(err == DB_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -216,14 +216,44 @@ buf_calc_page_checksum(
|
|||||||
/* out: checksum */
|
/* out: checksum */
|
||||||
byte* page) /* in: buffer page */
|
byte* page) /* in: buffer page */
|
||||||
{
|
{
|
||||||
ulint checksum;
|
ulint checksum;
|
||||||
|
|
||||||
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
|
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
|
||||||
+ ut_fold_binary(page + FIL_PAGE_DATA, UNIV_PAGE_SIZE - FIL_PAGE_DATA
|
+ ut_fold_binary(page + FIL_PAGE_DATA,
|
||||||
- FIL_PAGE_END_LSN);
|
UNIV_PAGE_SIZE - FIL_PAGE_DATA
|
||||||
checksum = checksum & 0xFFFFFFFF;
|
- FIL_PAGE_END_LSN);
|
||||||
|
checksum = checksum & 0xFFFFFFFF;
|
||||||
|
|
||||||
return(checksum);
|
return(checksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
Checks if a page is corrupt. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
buf_page_is_corrupted(
|
||||||
|
/*==================*/
|
||||||
|
/* out: TRUE if corrupted */
|
||||||
|
byte* read_buf) /* in: a database page */
|
||||||
|
{
|
||||||
|
ulint checksum;
|
||||||
|
|
||||||
|
checksum = buf_calc_page_checksum(read_buf);
|
||||||
|
|
||||||
|
if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
|
||||||
|
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
|
||||||
|
- FIL_PAGE_END_LSN + 4))
|
||||||
|
|| (checksum != mach_read_from_4(read_buf
|
||||||
|
+ UNIV_PAGE_SIZE
|
||||||
|
- FIL_PAGE_END_LSN)
|
||||||
|
&& mach_read_from_4(read_buf + FIL_PAGE_LSN)
|
||||||
|
!= mach_read_from_4(read_buf
|
||||||
|
+ UNIV_PAGE_SIZE
|
||||||
|
- FIL_PAGE_END_LSN))) {
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
@ -1265,34 +1295,22 @@ buf_page_io_complete(
|
|||||||
dulint id;
|
dulint id;
|
||||||
dict_index_t* index;
|
dict_index_t* index;
|
||||||
ulint io_type;
|
ulint io_type;
|
||||||
ulint checksum;
|
|
||||||
|
|
||||||
ut_ad(block);
|
ut_ad(block);
|
||||||
|
|
||||||
io_type = block->io_fix;
|
io_type = block->io_fix;
|
||||||
|
|
||||||
if (io_type == BUF_IO_READ) {
|
if (io_type == BUF_IO_READ) {
|
||||||
checksum = buf_calc_page_checksum(block->frame);
|
|
||||||
|
|
||||||
/* From version 3.23.38 up we store the page checksum
|
/* From version 3.23.38 up we store the page checksum
|
||||||
to the 4 upper bytes of the page end lsn field */
|
to the 4 upper bytes of the page end lsn field */
|
||||||
|
|
||||||
if ((mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
|
if (buf_page_is_corrupted(block->frame)) {
|
||||||
!= mach_read_from_4(block->frame + UNIV_PAGE_SIZE
|
fprintf(stderr,
|
||||||
- FIL_PAGE_END_LSN + 4))
|
|
||||||
|| (checksum != mach_read_from_4(block->frame
|
|
||||||
+ UNIV_PAGE_SIZE
|
|
||||||
- FIL_PAGE_END_LSN)
|
|
||||||
&& mach_read_from_4(block->frame + FIL_PAGE_LSN)
|
|
||||||
!= mach_read_from_4(block->frame
|
|
||||||
+ UNIV_PAGE_SIZE
|
|
||||||
- FIL_PAGE_END_LSN))) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"InnoDB: Database page corruption or a failed\n"
|
"InnoDB: Database page corruption or a failed\n"
|
||||||
"InnoDB: file read of page %lu.\n", block->offset);
|
"InnoDB: file read of page %lu.\n", block->offset);
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: You may have to recover from a backup.\n");
|
"InnoDB: You may have to recover from a backup.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recv_recovery_is_on()) {
|
if (recv_recovery_is_on()) {
|
||||||
@ -1601,11 +1619,28 @@ void
|
|||||||
buf_print_io(void)
|
buf_print_io(void)
|
||||||
/*==============*/
|
/*==============*/
|
||||||
{
|
{
|
||||||
|
ulint size;
|
||||||
|
|
||||||
ut_ad(buf_pool);
|
ut_ad(buf_pool);
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
|
||||||
|
|
||||||
printf("pages read %lu, created %lu, written %lu\n",
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
|
printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
|
||||||
|
printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free));
|
||||||
|
printf("Flush list length %lu \n",
|
||||||
|
UT_LIST_GET_LEN(buf_pool->flush_list));
|
||||||
|
printf("Buffer pool size in pages %lu\n", size);
|
||||||
|
|
||||||
|
printf("Pending reads %lu \n", buf_pool->n_pend_reads);
|
||||||
|
|
||||||
|
printf("Pending writes: LRU %lu, flush list %lu, single page %lu\n",
|
||||||
|
buf_pool->n_flush[BUF_FLUSH_LRU],
|
||||||
|
buf_pool->n_flush[BUF_FLUSH_LIST],
|
||||||
|
buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
|
||||||
|
|
||||||
|
printf("Pages read %lu, created %lu, written %lu\n",
|
||||||
buf_pool->n_pages_read, buf_pool->n_pages_created,
|
buf_pool->n_pages_read, buf_pool->n_pages_created,
|
||||||
buf_pool->n_pages_written);
|
buf_pool->n_pages_written);
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/******************************************************
|
/******************************************************
|
||||||
The database buffer buf_pool flush algorithm
|
The database buffer buf_pool flush algorithm
|
||||||
|
|
||||||
(c) 1995 Innobase Oy
|
(c) 1995-2001 Innobase Oy
|
||||||
|
|
||||||
Created 11/11/1995 Heikki Tuuri
|
Created 11/11/1995 Heikki Tuuri
|
||||||
*******************************************************/
|
*******************************************************/
|
||||||
@ -15,7 +15,6 @@ Created 11/11/1995 Heikki Tuuri
|
|||||||
#include "ut0byte.h"
|
#include "ut0byte.h"
|
||||||
#include "ut0lst.h"
|
#include "ut0lst.h"
|
||||||
#include "fil0fil.h"
|
#include "fil0fil.h"
|
||||||
|
|
||||||
#include "buf0buf.h"
|
#include "buf0buf.h"
|
||||||
#include "buf0lru.h"
|
#include "buf0lru.h"
|
||||||
#include "buf0rea.h"
|
#include "buf0rea.h"
|
||||||
@ -195,9 +194,145 @@ buf_flush_write_complete(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
Does an asynchronous write of a buffer page. NOTE: in simulated aio we must
|
Flushes possible buffered writes from the doublewrite memory buffer to disk,
|
||||||
call os_aio_simulated_wake_handler_threads after we have posted a batch
|
and also wakes up the aio thread if simulated aio is used. It is very
|
||||||
of writes! */
|
important to call this function after a batch of writes has been posted,
|
||||||
|
and also when we may have to wait for a page latch! Otherwise a deadlock
|
||||||
|
of threads can occur. */
|
||||||
|
static
|
||||||
|
void
|
||||||
|
buf_flush_buffered_writes(void)
|
||||||
|
/*===========================*/
|
||||||
|
{
|
||||||
|
buf_block_t* block;
|
||||||
|
ulint len;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
if (trx_doublewrite == NULL) {
|
||||||
|
os_aio_simulated_wake_handler_threads();
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_enter(&(trx_doublewrite->mutex));
|
||||||
|
|
||||||
|
/* Write first to doublewrite buffer blocks. We use synchronous
|
||||||
|
aio and thus know that file write has been completed when the
|
||||||
|
control returns. */
|
||||||
|
|
||||||
|
if (trx_doublewrite->first_free == 0) {
|
||||||
|
|
||||||
|
mutex_exit(&(trx_doublewrite->mutex));
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
|
||||||
|
len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
|
||||||
|
} else {
|
||||||
|
len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
fil_io(OS_FILE_WRITE,
|
||||||
|
TRUE, TRX_SYS_SPACE,
|
||||||
|
trx_doublewrite->block1, 0, len,
|
||||||
|
(void*)trx_doublewrite->write_buf, NULL);
|
||||||
|
|
||||||
|
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
|
||||||
|
len = (trx_doublewrite->first_free
|
||||||
|
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
|
||||||
|
|
||||||
|
fil_io(OS_FILE_WRITE,
|
||||||
|
TRUE, TRX_SYS_SPACE,
|
||||||
|
trx_doublewrite->block2, 0, len,
|
||||||
|
(void*)(trx_doublewrite->write_buf
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE),
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now flush the doublewrite buffer data to disk */
|
||||||
|
|
||||||
|
fil_flush(TRX_SYS_SPACE);
|
||||||
|
|
||||||
|
/* We know that the writes have been flushed to disk now
|
||||||
|
and in recovery we will find them in the doublewrite buffer
|
||||||
|
blocks. Next do the writes to the intended positions. */
|
||||||
|
|
||||||
|
for (i = 0; i < trx_doublewrite->first_free; i++) {
|
||||||
|
block = trx_doublewrite->buf_block_arr[i];
|
||||||
|
|
||||||
|
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
|
||||||
|
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
|
||||||
|
(void*)block->frame, (void*)block);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wake possible simulated aio thread to actually post the
|
||||||
|
writes to the operating system */
|
||||||
|
|
||||||
|
os_aio_simulated_wake_handler_threads();
|
||||||
|
|
||||||
|
/* Wait that all async writes to tablespaces have been posted to
|
||||||
|
the OS */
|
||||||
|
|
||||||
|
os_aio_wait_until_no_pending_writes();
|
||||||
|
|
||||||
|
/* Now we flush the data to disk (for example, with fsync) */
|
||||||
|
|
||||||
|
fil_flush_file_spaces(FIL_TABLESPACE);
|
||||||
|
|
||||||
|
/* We can now reuse the doublewrite memory buffer: */
|
||||||
|
|
||||||
|
trx_doublewrite->first_free = 0;
|
||||||
|
|
||||||
|
mutex_exit(&(trx_doublewrite->mutex));
|
||||||
|
}
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
Posts a buffer page for writing. If the doublewrite memory buffer is
|
||||||
|
full, calls buf_flush_buffered_writes and waits for for free space to
|
||||||
|
appear. */
|
||||||
|
static
|
||||||
|
void
|
||||||
|
buf_flush_post_to_doublewrite_buf(
|
||||||
|
/*==============================*/
|
||||||
|
buf_block_t* block) /* in: buffer block to write */
|
||||||
|
{
|
||||||
|
try_again:
|
||||||
|
mutex_enter(&(trx_doublewrite->mutex));
|
||||||
|
|
||||||
|
if (trx_doublewrite->first_free
|
||||||
|
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
|
||||||
|
mutex_exit(&(trx_doublewrite->mutex));
|
||||||
|
|
||||||
|
buf_flush_buffered_writes();
|
||||||
|
|
||||||
|
goto try_again;
|
||||||
|
}
|
||||||
|
|
||||||
|
ut_memcpy(trx_doublewrite->write_buf
|
||||||
|
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
|
||||||
|
block->frame, UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
|
trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
|
||||||
|
|
||||||
|
trx_doublewrite->first_free++;
|
||||||
|
|
||||||
|
if (trx_doublewrite->first_free
|
||||||
|
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
|
||||||
|
mutex_exit(&(trx_doublewrite->mutex));
|
||||||
|
|
||||||
|
buf_flush_buffered_writes();
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_exit(&(trx_doublewrite->mutex));
|
||||||
|
}
|
||||||
|
|
||||||
|
/************************************************************************
|
||||||
|
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
|
||||||
|
also when the doublewrite buffer is used, we must call
|
||||||
|
buf_flush_buffered_writes after we have posted a batch of writes! */
|
||||||
static
|
static
|
||||||
void
|
void
|
||||||
buf_flush_write_block_low(
|
buf_flush_write_block_low(
|
||||||
@ -222,15 +357,24 @@ buf_flush_write_block_low(
|
|||||||
mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
|
mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
|
||||||
block->newest_modification);
|
block->newest_modification);
|
||||||
|
|
||||||
|
/* Write to the page the space id and page number */
|
||||||
|
|
||||||
|
mach_write_to_4(block->frame + FIL_PAGE_SPACE, block->space);
|
||||||
|
mach_write_to_4(block->frame + FIL_PAGE_OFFSET, block->offset);
|
||||||
|
|
||||||
/* We overwrite the first 4 bytes of the end lsn field to store
|
/* We overwrite the first 4 bytes of the end lsn field to store
|
||||||
a page checksum */
|
a page checksum */
|
||||||
|
|
||||||
mach_write_to_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
|
mach_write_to_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
|
||||||
buf_calc_page_checksum(block->frame));
|
buf_calc_page_checksum(block->frame));
|
||||||
|
|
||||||
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
|
if (!trx_doublewrite) {
|
||||||
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
|
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
|
||||||
|
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
|
||||||
(void*)block->frame, (void*)block);
|
(void*)block->frame, (void*)block);
|
||||||
|
} else {
|
||||||
|
buf_flush_post_to_doublewrite_buf(block);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
@ -251,14 +395,14 @@ buf_flush_try_page(
|
|||||||
buf_block_t* block;
|
buf_block_t* block;
|
||||||
ibool locked;
|
ibool locked;
|
||||||
|
|
||||||
ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)
|
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
|
||||||
|| (flush_type == BUF_FLUSH_SINGLE_PAGE));
|
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
|
||||||
|
|
||||||
mutex_enter(&(buf_pool->mutex));
|
mutex_enter(&(buf_pool->mutex));
|
||||||
|
|
||||||
block = buf_page_hash_get(space, offset);
|
block = buf_page_hash_get(space, offset);
|
||||||
|
|
||||||
if ((flush_type == BUF_FLUSH_LIST)
|
if (flush_type == BUF_FLUSH_LIST
|
||||||
&& block && buf_flush_ready_for_flush(block, flush_type)) {
|
&& block && buf_flush_ready_for_flush(block, flush_type)) {
|
||||||
|
|
||||||
block->io_fix = BUF_IO_WRITE;
|
block->io_fix = BUF_IO_WRITE;
|
||||||
@ -286,7 +430,7 @@ buf_flush_try_page(
|
|||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
|
||||||
if (!locked) {
|
if (!locked) {
|
||||||
os_aio_simulated_wake_handler_threads();
|
buf_flush_buffered_writes();
|
||||||
|
|
||||||
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
|
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
|
||||||
}
|
}
|
||||||
@ -300,7 +444,7 @@ buf_flush_try_page(
|
|||||||
|
|
||||||
return(1);
|
return(1);
|
||||||
|
|
||||||
} else if ((flush_type == BUF_FLUSH_LRU) && block
|
} else if (flush_type == BUF_FLUSH_LRU && block
|
||||||
&& buf_flush_ready_for_flush(block, flush_type)) {
|
&& buf_flush_ready_for_flush(block, flush_type)) {
|
||||||
|
|
||||||
/* VERY IMPORTANT:
|
/* VERY IMPORTANT:
|
||||||
@ -328,7 +472,7 @@ buf_flush_try_page(
|
|||||||
|
|
||||||
return(1);
|
return(1);
|
||||||
|
|
||||||
} else if ((flush_type == BUF_FLUSH_SINGLE_PAGE) && block
|
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE && block
|
||||||
&& buf_flush_ready_for_flush(block, flush_type)) {
|
&& buf_flush_ready_for_flush(block, flush_type)) {
|
||||||
|
|
||||||
block->io_fix = BUF_IO_WRITE;
|
block->io_fix = BUF_IO_WRITE;
|
||||||
@ -385,6 +529,14 @@ buf_flush_try_neighbors(
|
|||||||
/* If there is little space, it is better not to flush any
|
/* If there is little space, it is better not to flush any
|
||||||
block except from the end of the LRU list */
|
block except from the end of the LRU list */
|
||||||
|
|
||||||
|
low = offset;
|
||||||
|
high = offset + 1;
|
||||||
|
} else if (flush_type == BUF_FLUSH_LIST) {
|
||||||
|
/* Since semaphore waits require us to flush the
|
||||||
|
doublewrite buffer to disk, it is best that the
|
||||||
|
search area is just the page itself, to minimize
|
||||||
|
chances for semaphore waits */
|
||||||
|
|
||||||
low = offset;
|
low = offset;
|
||||||
high = offset + 1;
|
high = offset + 1;
|
||||||
}
|
}
|
||||||
@ -418,13 +570,6 @@ buf_flush_try_neighbors(
|
|||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
|
||||||
/* In simulated aio we wake up the i/o-handler threads now that
|
|
||||||
we have posted a batch of writes: */
|
|
||||||
|
|
||||||
/* printf("Flush count %lu ; Waking i/o handlers\n", count); */
|
|
||||||
|
|
||||||
os_aio_simulated_wake_handler_threads();
|
|
||||||
|
|
||||||
return(count);
|
return(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -565,13 +710,15 @@ buf_flush_batch(
|
|||||||
|
|
||||||
mutex_exit(&(buf_pool->mutex));
|
mutex_exit(&(buf_pool->mutex));
|
||||||
|
|
||||||
if (buf_debug_prints && (page_count > 0)) {
|
buf_flush_buffered_writes();
|
||||||
|
|
||||||
|
if (buf_debug_prints && page_count > 0) {
|
||||||
if (flush_type == BUF_FLUSH_LRU) {
|
if (flush_type == BUF_FLUSH_LRU) {
|
||||||
printf("To flush %lu pages in LRU flush\n",
|
printf("Flushed %lu pages in LRU flush\n",
|
||||||
page_count);
|
page_count);
|
||||||
} else if (flush_type == BUF_FLUSH_LIST) {
|
} else if (flush_type == BUF_FLUSH_LIST) {
|
||||||
printf("To flush %lu pages in flush list flush\n",
|
printf("Flushed %lu pages in flush list flush\n",
|
||||||
page_count, flush_type);
|
page_count);
|
||||||
} else {
|
} else {
|
||||||
ut_error;
|
ut_error;
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,9 @@ ulint
|
|||||||
buf_read_page_low(
|
buf_read_page_low(
|
||||||
/*==============*/
|
/*==============*/
|
||||||
/* out: 1 if a read request was queued, 0 if the page
|
/* out: 1 if a read request was queued, 0 if the page
|
||||||
already resided in buf_pool */
|
already resided in buf_pool or if the page is in
|
||||||
|
the doublewrite buffer blocks in which case it is never
|
||||||
|
read into the pool */
|
||||||
ibool sync, /* in: TRUE if synchronous aio is desired */
|
ibool sync, /* in: TRUE if synchronous aio is desired */
|
||||||
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
|
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
|
||||||
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
|
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
|
||||||
@ -63,6 +65,16 @@ buf_read_page_low(
|
|||||||
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
|
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
|
||||||
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
|
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
|
||||||
|
|
||||||
|
if (trx_doublewrite && space == TRX_SYS_SPACE
|
||||||
|
&& ( (offset >= trx_doublewrite->block1
|
||||||
|
&& offset < trx_doublewrite->block1
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|
||||||
|
|| (offset >= trx_doublewrite->block2
|
||||||
|
&& offset < trx_doublewrite->block2
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef UNIV_LOG_DEBUG
|
#ifdef UNIV_LOG_DEBUG
|
||||||
if (space % 2 == 1) {
|
if (space % 2 == 1) {
|
||||||
/* We are updating a replicate space while holding the
|
/* We are updating a replicate space while holding the
|
||||||
|
@ -13,7 +13,10 @@ Created 5/30/1994 Heikki Tuuri
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "ut0rnd.h"
|
#include "ut0rnd.h"
|
||||||
|
#include "rem0rec.h"
|
||||||
|
#include "page0page.h"
|
||||||
|
#include "dict0dict.h"
|
||||||
|
#include "btr0cur.h"
|
||||||
|
|
||||||
byte data_error; /* data pointers of tuple fields are initialized
|
byte data_error; /* data pointers of tuple fields are initialized
|
||||||
to point here for error checking */
|
to point here for error checking */
|
||||||
@ -378,6 +381,172 @@ dtuple_sprintf(
|
|||||||
return(len);
|
return(len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/******************************************************************
|
||||||
|
Moves parts of long fields in entry to the big record vector so that
|
||||||
|
the size of tuple drops below the maximum record size allowed in the
|
||||||
|
database. Moves data only from those fields which are not necessary
|
||||||
|
to determine uniquely the insertion place of the tuple in the index. */
|
||||||
|
|
||||||
|
big_rec_t*
|
||||||
|
dtuple_convert_big_rec(
|
||||||
|
/*===================*/
|
||||||
|
/* out, own: created big record vector,
|
||||||
|
NULL if we are not able to shorten
|
||||||
|
the entry enough, i.e., if there are
|
||||||
|
too many short fields in entry */
|
||||||
|
dict_index_t* index, /* in: index */
|
||||||
|
dtuple_t* entry) /* in: index entry */
|
||||||
|
{
|
||||||
|
mem_heap_t* heap;
|
||||||
|
big_rec_t* vector;
|
||||||
|
dfield_t* dfield;
|
||||||
|
ulint size;
|
||||||
|
ulint n_fields;
|
||||||
|
ulint longest;
|
||||||
|
ulint longest_i;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
size = rec_get_converted_size(entry);
|
||||||
|
|
||||||
|
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
|
||||||
|
* sizeof(big_rec_field_t) + 1000);
|
||||||
|
|
||||||
|
vector = mem_heap_alloc(heap, sizeof(big_rec_t));
|
||||||
|
|
||||||
|
vector->heap = heap;
|
||||||
|
vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
|
||||||
|
* sizeof(big_rec_field_t));
|
||||||
|
|
||||||
|
/* Decide which fields to shorten: the algorithm is to look for
|
||||||
|
the longest field which does not occur in the ordering part
|
||||||
|
of any index on the table */
|
||||||
|
|
||||||
|
n_fields = 0;
|
||||||
|
|
||||||
|
while ((rec_get_converted_size(entry)
|
||||||
|
>= page_get_free_space_of_empty() / 2)
|
||||||
|
|| rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE) {
|
||||||
|
|
||||||
|
longest = 0;
|
||||||
|
for (i = dict_index_get_n_unique_in_tree(index);
|
||||||
|
i < dtuple_get_n_fields(entry); i++) {
|
||||||
|
|
||||||
|
/* Skip over fields which are ordering in some index */
|
||||||
|
|
||||||
|
if (dict_field_get_col(
|
||||||
|
dict_index_get_nth_field(index, i))
|
||||||
|
->ord_part == 0) {
|
||||||
|
|
||||||
|
dfield = dtuple_get_nth_field(entry, i);
|
||||||
|
|
||||||
|
if (dfield->len != UNIV_SQL_NULL &&
|
||||||
|
dfield->len > longest) {
|
||||||
|
|
||||||
|
longest = dfield->len;
|
||||||
|
|
||||||
|
longest_i = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10) {
|
||||||
|
|
||||||
|
/* Cannot shorten more */
|
||||||
|
|
||||||
|
mem_heap_free(heap);
|
||||||
|
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Move data from field longest_i to big rec vector,
|
||||||
|
but do not let data size of the remaining entry
|
||||||
|
drop below 128 which is the limit for the 2-byte
|
||||||
|
offset storage format in a physical record */
|
||||||
|
|
||||||
|
dfield = dtuple_get_nth_field(entry, longest_i);
|
||||||
|
vector->fields[n_fields].field_no = longest_i;
|
||||||
|
|
||||||
|
if (dtuple_get_data_size(entry) - dfield->len
|
||||||
|
<= REC_1BYTE_OFFS_LIMIT) {
|
||||||
|
vector->fields[n_fields].len =
|
||||||
|
dtuple_get_data_size(entry)
|
||||||
|
- REC_1BYTE_OFFS_LIMIT;
|
||||||
|
/* Since dfield will contain at least
|
||||||
|
a 20-byte reference to the extern storage,
|
||||||
|
we know that the data size of entry will be
|
||||||
|
> REC_1BYTE_OFFS_LIMIT */
|
||||||
|
} else {
|
||||||
|
vector->fields[n_fields].len = dfield->len;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector->fields[n_fields].data = mem_heap_alloc(heap,
|
||||||
|
vector->fields[n_fields].len);
|
||||||
|
|
||||||
|
/* Copy data (from the end of field) to big rec vector */
|
||||||
|
|
||||||
|
ut_memcpy(vector->fields[n_fields].data,
|
||||||
|
((byte*)dfield->data) + dfield->len
|
||||||
|
- vector->fields[n_fields].len,
|
||||||
|
vector->fields[n_fields].len);
|
||||||
|
dfield->len = dfield->len - vector->fields[n_fields].len
|
||||||
|
+ BTR_EXTERN_FIELD_REF_SIZE;
|
||||||
|
|
||||||
|
/* Set the extern field reference in dfield to zero */
|
||||||
|
memset(((byte*)dfield->data)
|
||||||
|
+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
|
||||||
|
0, BTR_EXTERN_FIELD_REF_SIZE);
|
||||||
|
n_fields++;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector->n_fields = n_fields;
|
||||||
|
return(vector);
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************
|
||||||
|
Puts back to entry the data stored in vector. Note that to ensure the
|
||||||
|
fields in entry can accommodate the data, vector must have been created
|
||||||
|
from entry with dtuple_convert_big_rec. */
|
||||||
|
|
||||||
|
void
|
||||||
|
dtuple_convert_back_big_rec(
|
||||||
|
/*========================*/
|
||||||
|
dict_index_t* index, /* in: index */
|
||||||
|
dtuple_t* entry, /* in: entry whose data was put to vector */
|
||||||
|
big_rec_t* vector) /* in, own: big rec vector; it is
|
||||||
|
freed in this function */
|
||||||
|
{
|
||||||
|
dfield_t* dfield;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
for (i = 0; i < vector->n_fields; i++) {
|
||||||
|
|
||||||
|
dfield = dtuple_get_nth_field(entry,
|
||||||
|
vector->fields[i].field_no);
|
||||||
|
/* Copy data from big rec vector */
|
||||||
|
|
||||||
|
ut_memcpy(((byte*)dfield->data)
|
||||||
|
+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
|
||||||
|
vector->fields[i].data,
|
||||||
|
vector->fields[i].len);
|
||||||
|
dfield->len = dfield->len + vector->fields[i].len
|
||||||
|
- BTR_EXTERN_FIELD_REF_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
mem_heap_free(vector->heap);
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************
|
||||||
|
Frees the memory in a big rec vector. */
|
||||||
|
|
||||||
|
void
|
||||||
|
dtuple_big_rec_free(
|
||||||
|
/*================*/
|
||||||
|
big_rec_t* vector) /* in, own: big rec vector; it is
|
||||||
|
freed in this function */
|
||||||
|
{
|
||||||
|
mem_heap_free(vector->heap);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef notdefined
|
#ifdef notdefined
|
||||||
|
|
||||||
/******************************************************************
|
/******************************************************************
|
||||||
|
@ -90,6 +90,9 @@ struct fil_node_struct {
|
|||||||
is ignored) */
|
is ignored) */
|
||||||
ulint n_pending;
|
ulint n_pending;
|
||||||
/* count of pending i/o-ops on this file */
|
/* count of pending i/o-ops on this file */
|
||||||
|
ibool is_modified; /* this is set to TRUE when we write
|
||||||
|
to the file and FALSE when we call fil_flush
|
||||||
|
for this file space */
|
||||||
UT_LIST_NODE_T(fil_node_t) chain;
|
UT_LIST_NODE_T(fil_node_t) chain;
|
||||||
/* link field for the file chain */
|
/* link field for the file chain */
|
||||||
UT_LIST_NODE_T(fil_node_t) LRU;
|
UT_LIST_NODE_T(fil_node_t) LRU;
|
||||||
@ -301,6 +304,8 @@ fil_node_create(
|
|||||||
node->size = size;
|
node->size = size;
|
||||||
node->magic_n = FIL_NODE_MAGIC_N;
|
node->magic_n = FIL_NODE_MAGIC_N;
|
||||||
node->n_pending = 0;
|
node->n_pending = 0;
|
||||||
|
|
||||||
|
node->is_modified = FALSE;
|
||||||
|
|
||||||
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
|
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
|
||||||
|
|
||||||
@ -720,6 +725,47 @@ fil_space_get_size(
|
|||||||
return(size);
|
return(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***********************************************************************
|
||||||
|
Checks if the pair space, page_no refers to an existing page in a
|
||||||
|
tablespace file space. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
fil_check_adress_in_tablespace(
|
||||||
|
/*===========================*/
|
||||||
|
/* out: TRUE if the address is meaningful */
|
||||||
|
ulint id, /* in: space id */
|
||||||
|
ulint page_no)/* in: page number */
|
||||||
|
{
|
||||||
|
fil_space_t* space;
|
||||||
|
fil_system_t* system = fil_system;
|
||||||
|
ulint size;
|
||||||
|
ibool ret;
|
||||||
|
|
||||||
|
ut_ad(system);
|
||||||
|
|
||||||
|
mutex_enter(&(system->mutex));
|
||||||
|
|
||||||
|
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
|
||||||
|
|
||||||
|
if (space == NULL) {
|
||||||
|
ret = FALSE;
|
||||||
|
} else {
|
||||||
|
size = space->size;
|
||||||
|
|
||||||
|
if (page_no > size) {
|
||||||
|
ret = FALSE;
|
||||||
|
} else if (space->purpose != FIL_TABLESPACE) {
|
||||||
|
ret = FALSE;
|
||||||
|
} else {
|
||||||
|
ret = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_exit(&(system->mutex));
|
||||||
|
|
||||||
|
return(ret);
|
||||||
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
Tries to reserve free extents in a file space. */
|
Tries to reserve free extents in a file space. */
|
||||||
|
|
||||||
@ -812,8 +858,14 @@ fil_node_prepare_for_io(
|
|||||||
fil_node_close(last_node, system);
|
fil_node_close(last_node, system);
|
||||||
}
|
}
|
||||||
|
|
||||||
node->handle = os_file_create(node->name, OS_FILE_OPEN,
|
if (space->purpose == FIL_LOG) {
|
||||||
OS_FILE_AIO, &ret);
|
node->handle = os_file_create(node->name, OS_FILE_OPEN,
|
||||||
|
OS_FILE_AIO, OS_LOG_FILE, &ret);
|
||||||
|
} else {
|
||||||
|
node->handle = os_file_create(node->name, OS_FILE_OPEN,
|
||||||
|
OS_FILE_AIO, OS_DATA_FILE, &ret);
|
||||||
|
}
|
||||||
|
|
||||||
ut_a(ret);
|
ut_a(ret);
|
||||||
|
|
||||||
node->open = TRUE;
|
node->open = TRUE;
|
||||||
@ -851,7 +903,8 @@ void
|
|||||||
fil_node_complete_io(
|
fil_node_complete_io(
|
||||||
/*=================*/
|
/*=================*/
|
||||||
fil_node_t* node, /* in: file node */
|
fil_node_t* node, /* in: file node */
|
||||||
fil_system_t* system) /* in: file system */
|
fil_system_t* system, /* in: file system */
|
||||||
|
ulint type) /* in: OS_FILE_WRITE or ..._READ */
|
||||||
{
|
{
|
||||||
ut_ad(node);
|
ut_ad(node);
|
||||||
ut_ad(system);
|
ut_ad(system);
|
||||||
@ -860,6 +913,10 @@ fil_node_complete_io(
|
|||||||
|
|
||||||
node->n_pending--;
|
node->n_pending--;
|
||||||
|
|
||||||
|
if (type != OS_FILE_READ) {
|
||||||
|
node->is_modified = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
if (node->n_pending == 0) {
|
if (node->n_pending == 0) {
|
||||||
/* The node must be put back to the LRU list */
|
/* The node must be put back to the LRU list */
|
||||||
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
|
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
|
||||||
@ -1016,7 +1073,7 @@ loop:
|
|||||||
|
|
||||||
mutex_enter(&(system->mutex));
|
mutex_enter(&(system->mutex));
|
||||||
|
|
||||||
fil_node_complete_io(node, system);
|
fil_node_complete_io(node, system, type);
|
||||||
|
|
||||||
mutex_exit(&(system->mutex));
|
mutex_exit(&(system->mutex));
|
||||||
|
|
||||||
@ -1090,12 +1147,14 @@ fil_aio_wait(
|
|||||||
fil_node_t* fil_node;
|
fil_node_t* fil_node;
|
||||||
fil_system_t* system = fil_system;
|
fil_system_t* system = fil_system;
|
||||||
void* message;
|
void* message;
|
||||||
|
ulint type;
|
||||||
|
|
||||||
ut_ad(fil_validate());
|
ut_ad(fil_validate());
|
||||||
|
|
||||||
if (os_aio_use_native_aio) {
|
if (os_aio_use_native_aio) {
|
||||||
#ifdef WIN_ASYNC_IO
|
#ifdef WIN_ASYNC_IO
|
||||||
ret = os_aio_windows_handle(segment, 0, &fil_node, &message);
|
ret = os_aio_windows_handle(segment, 0, &fil_node, &message,
|
||||||
|
&type);
|
||||||
#elif defined(POSIX_ASYNC_IO)
|
#elif defined(POSIX_ASYNC_IO)
|
||||||
ret = os_aio_posix_handle(segment, &fil_node, &message);
|
ret = os_aio_posix_handle(segment, &fil_node, &message);
|
||||||
#else
|
#else
|
||||||
@ -1103,14 +1162,14 @@ fil_aio_wait(
|
|||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
ret = os_aio_simulated_handle(segment, (void**) &fil_node,
|
ret = os_aio_simulated_handle(segment, (void**) &fil_node,
|
||||||
&message);
|
&message, &type);
|
||||||
}
|
}
|
||||||
|
|
||||||
ut_a(ret);
|
ut_a(ret);
|
||||||
|
|
||||||
mutex_enter(&(system->mutex));
|
mutex_enter(&(system->mutex));
|
||||||
|
|
||||||
fil_node_complete_io(fil_node, fil_system);
|
fil_node_complete_io(fil_node, fil_system, type);
|
||||||
|
|
||||||
mutex_exit(&(system->mutex));
|
mutex_exit(&(system->mutex));
|
||||||
|
|
||||||
@ -1149,8 +1208,10 @@ fil_flush(
|
|||||||
node = UT_LIST_GET_FIRST(space->chain);
|
node = UT_LIST_GET_FIRST(space->chain);
|
||||||
|
|
||||||
while (node) {
|
while (node) {
|
||||||
if (node->open) {
|
if (node->open && node->is_modified) {
|
||||||
file = node->handle;
|
file = node->handle;
|
||||||
|
|
||||||
|
node->is_modified = FALSE;
|
||||||
|
|
||||||
mutex_exit(&(system->mutex));
|
mutex_exit(&(system->mutex));
|
||||||
|
|
||||||
@ -1159,9 +1220,11 @@ fil_flush(
|
|||||||
handle is still open: we assume that the OS
|
handle is still open: we assume that the OS
|
||||||
will not crash or trap even if we pass a handle
|
will not crash or trap even if we pass a handle
|
||||||
to a closed file below in os_file_flush! */
|
to a closed file below in os_file_flush! */
|
||||||
|
|
||||||
|
/* printf("Flushing to file %s\n", node->name); */
|
||||||
|
|
||||||
os_file_flush(file);
|
os_file_flush(file);
|
||||||
|
|
||||||
mutex_enter(&(system->mutex));
|
mutex_enter(&(system->mutex));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3239,8 +3239,8 @@ fsp_validate(
|
|||||||
|
|
||||||
ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
|
ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
|
||||||
ut_a(n_used + n_full_frag_pages
|
ut_a(n_used + n_full_frag_pages
|
||||||
== n_used2 + (free_limit + XDES_DESCRIBED_PER_PAGE - 1)
|
== n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1)
|
||||||
/ XDES_DESCRIBED_PER_PAGE
|
/ XDES_DESCRIBED_PER_PAGE)
|
||||||
+ seg_inode_len_full + seg_inode_len_free);
|
+ seg_inode_len_full + seg_inode_len_free);
|
||||||
ut_a(frag_n_used == n_used);
|
ut_a(frag_n_used == n_used);
|
||||||
|
|
||||||
|
@ -1946,6 +1946,7 @@ ibuf_insert_low(
|
|||||||
ulint page_no,/* in: page number where to insert */
|
ulint page_no,/* in: page number where to insert */
|
||||||
que_thr_t* thr) /* in: query thread */
|
que_thr_t* thr) /* in: query thread */
|
||||||
{
|
{
|
||||||
|
big_rec_t* dummy_big_rec;
|
||||||
ulint entry_size;
|
ulint entry_size;
|
||||||
btr_pcur_t pcur;
|
btr_pcur_t pcur;
|
||||||
btr_cur_t* cursor;
|
btr_cur_t* cursor;
|
||||||
@ -2101,7 +2102,8 @@ ibuf_insert_low(
|
|||||||
|
|
||||||
if (mode == BTR_MODIFY_PREV) {
|
if (mode == BTR_MODIFY_PREV) {
|
||||||
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
|
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
|
||||||
ibuf_entry, &ins_rec, thr,
|
ibuf_entry, &ins_rec,
|
||||||
|
&dummy_big_rec, thr,
|
||||||
&mtr);
|
&mtr);
|
||||||
if (err == DB_SUCCESS) {
|
if (err == DB_SUCCESS) {
|
||||||
/* Update the page max trx id field */
|
/* Update the page max trx id field */
|
||||||
@ -2121,7 +2123,8 @@ ibuf_insert_low(
|
|||||||
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
|
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
|
||||||
| BTR_NO_UNDO_LOG_FLAG,
|
| BTR_NO_UNDO_LOG_FLAG,
|
||||||
cursor,
|
cursor,
|
||||||
ibuf_entry, &ins_rec, thr,
|
ibuf_entry, &ins_rec,
|
||||||
|
&dummy_big_rec, thr,
|
||||||
&mtr);
|
&mtr);
|
||||||
if (err == DB_SUCCESS) {
|
if (err == DB_SUCCESS) {
|
||||||
/* Update the page max trx id field */
|
/* Update the page max trx id field */
|
||||||
|
@ -357,6 +357,44 @@ btr_get_size(
|
|||||||
/* out: number of pages */
|
/* out: number of pages */
|
||||||
dict_index_t* index, /* in: index */
|
dict_index_t* index, /* in: index */
|
||||||
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||||
|
/******************************************************************
|
||||||
|
Allocates a new file page to be used in an index tree. NOTE: we assume
|
||||||
|
that the caller has made the reservation for free extents! */
|
||||||
|
|
||||||
|
page_t*
|
||||||
|
btr_page_alloc(
|
||||||
|
/*===========*/
|
||||||
|
/* out: new allocated page, x-latched;
|
||||||
|
NULL if out of space */
|
||||||
|
dict_tree_t* tree, /* in: index tree */
|
||||||
|
ulint hint_page_no, /* in: hint of a good page */
|
||||||
|
byte file_direction, /* in: direction where a possible
|
||||||
|
page split is made */
|
||||||
|
ulint level, /* in: level where the page is placed
|
||||||
|
in the tree */
|
||||||
|
mtr_t* mtr); /* in: mtr */
|
||||||
|
/******************************************************************
|
||||||
|
Frees a file page used in an index tree. NOTE: cannot free field external
|
||||||
|
storage pages because the page must contain info on its level. */
|
||||||
|
|
||||||
|
void
|
||||||
|
btr_page_free(
|
||||||
|
/*==========*/
|
||||||
|
dict_tree_t* tree, /* in: index tree */
|
||||||
|
page_t* page, /* in: page to be freed, x-latched */
|
||||||
|
mtr_t* mtr); /* in: mtr */
|
||||||
|
/******************************************************************
|
||||||
|
Frees a file page used in an index tree. Can be used also to BLOB
|
||||||
|
external storage pages, because the page level 0 can be given as an
|
||||||
|
argument. */
|
||||||
|
|
||||||
|
void
|
||||||
|
btr_page_free_low(
|
||||||
|
/*==============*/
|
||||||
|
dict_tree_t* tree, /* in: index tree */
|
||||||
|
page_t* page, /* in: page to be freed, x-latched */
|
||||||
|
ulint level, /* in: page level */
|
||||||
|
mtr_t* mtr); /* in: mtr */
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
Prints size info of a B-tree. */
|
Prints size info of a B-tree. */
|
||||||
|
|
||||||
|
@ -151,11 +151,14 @@ btr_cur_optimistic_insert(
|
|||||||
ulint flags, /* in: undo logging and locking flags: if not
|
ulint flags, /* in: undo logging and locking flags: if not
|
||||||
zero, the parameters index and thr should be
|
zero, the parameters index and thr should be
|
||||||
specified */
|
specified */
|
||||||
btr_cur_t* cursor, /* in: cursor on page after which
|
btr_cur_t* cursor, /* in: cursor on page after which to insert;
|
||||||
to insert; cursor stays valid */
|
cursor stays valid */
|
||||||
dtuple_t* entry, /* in: entry to insert */
|
dtuple_t* entry, /* in: entry to insert */
|
||||||
rec_t** rec, /* out: pointer to inserted record if
|
rec_t** rec, /* out: pointer to inserted record if
|
||||||
succeed */
|
succeed */
|
||||||
|
big_rec_t** big_rec,/* out: big rec vector whose fields have to
|
||||||
|
be stored externally by the caller, or
|
||||||
|
NULL */
|
||||||
que_thr_t* thr, /* in: query thread or NULL */
|
que_thr_t* thr, /* in: query thread or NULL */
|
||||||
mtr_t* mtr); /* in: mtr */
|
mtr_t* mtr); /* in: mtr */
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
@ -169,13 +172,19 @@ btr_cur_pessimistic_insert(
|
|||||||
/*=======================*/
|
/*=======================*/
|
||||||
/* out: DB_SUCCESS or error number */
|
/* out: DB_SUCCESS or error number */
|
||||||
ulint flags, /* in: undo logging and locking flags: if not
|
ulint flags, /* in: undo logging and locking flags: if not
|
||||||
zero, the parameters index and thr should be
|
zero, the parameter thr should be
|
||||||
specified */
|
specified; if no undo logging is specified,
|
||||||
|
then the caller must have reserved enough
|
||||||
|
free extents in the file space so that the
|
||||||
|
insertion will certainly succeed */
|
||||||
btr_cur_t* cursor, /* in: cursor after which to insert;
|
btr_cur_t* cursor, /* in: cursor after which to insert;
|
||||||
cursor does not stay valid */
|
cursor stays valid */
|
||||||
dtuple_t* entry, /* in: entry to insert */
|
dtuple_t* entry, /* in: entry to insert */
|
||||||
rec_t** rec, /* out: pointer to inserted record if
|
rec_t** rec, /* out: pointer to inserted record if
|
||||||
succeed */
|
succeed */
|
||||||
|
big_rec_t** big_rec,/* out: big rec vector whose fields have to
|
||||||
|
be stored externally by the caller, or
|
||||||
|
NULL */
|
||||||
que_thr_t* thr, /* in: query thread or NULL */
|
que_thr_t* thr, /* in: query thread or NULL */
|
||||||
mtr_t* mtr); /* in: mtr */
|
mtr_t* mtr); /* in: mtr */
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
@ -228,8 +237,9 @@ btr_cur_pessimistic_update(
|
|||||||
/* out: DB_SUCCESS or error code */
|
/* out: DB_SUCCESS or error code */
|
||||||
ulint flags, /* in: undo logging, locking, and rollback
|
ulint flags, /* in: undo logging, locking, and rollback
|
||||||
flags */
|
flags */
|
||||||
btr_cur_t* cursor, /* in: cursor on the record to update;
|
btr_cur_t* cursor, /* in: cursor on the record to update */
|
||||||
cursor does not stay valid */
|
big_rec_t** big_rec,/* out: big rec vector whose fields have to
|
||||||
|
be stored externally by the caller, or NULL */
|
||||||
upd_t* update, /* in: update vector; this is allowed also
|
upd_t* update, /* in: update vector; this is allowed also
|
||||||
contain trx id and roll ptr fields, but
|
contain trx id and roll ptr fields, but
|
||||||
the values in update vector have no effect */
|
the values in update vector have no effect */
|
||||||
@ -407,6 +417,92 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
/*======================================*/
|
/*======================================*/
|
||||||
/* out: estimated number of key values */
|
/* out: estimated number of key values */
|
||||||
dict_index_t* index); /* in: index */
|
dict_index_t* index); /* in: index */
|
||||||
|
/***********************************************************************
|
||||||
|
Stores the fields in big_rec_vec to the tablespace and puts pointers to
|
||||||
|
them in rec. The fields are stored on pages allocated from leaf node
|
||||||
|
file segment of the index tree. */
|
||||||
|
|
||||||
|
ulint
|
||||||
|
btr_store_big_rec_extern_fields(
|
||||||
|
/*============================*/
|
||||||
|
/* out: DB_SUCCESS or error */
|
||||||
|
dict_index_t* index, /* in: index of rec; the index tree
|
||||||
|
MUST be X-latched */
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
big_rec_t* big_rec_vec, /* in: vector containing fields
|
||||||
|
to be stored externally */
|
||||||
|
mtr_t* local_mtr); /* in: mtr containing the latch to
|
||||||
|
rec and to the tree */
|
||||||
|
/***********************************************************************
|
||||||
|
Frees the space in an externally stored field to the file space
|
||||||
|
management. */
|
||||||
|
|
||||||
|
void
|
||||||
|
btr_free_externally_stored_field(
|
||||||
|
/*=============================*/
|
||||||
|
dict_index_t* index, /* in: index of the data, the index
|
||||||
|
tree MUST be X-latched */
|
||||||
|
byte* data, /* in: internally stored data
|
||||||
|
+ reference to the externally
|
||||||
|
stored part */
|
||||||
|
ulint local_len, /* in: length of data */
|
||||||
|
mtr_t* local_mtr); /* in: mtr containing the latch to
|
||||||
|
data an an X-latch to the index
|
||||||
|
tree */
|
||||||
|
/***************************************************************
|
||||||
|
Frees the externally stored fields for a record. */
|
||||||
|
|
||||||
|
void
|
||||||
|
btr_rec_free_externally_stored_fields(
|
||||||
|
/*==================================*/
|
||||||
|
dict_index_t* index, /* in: index of the data, the index
|
||||||
|
tree MUST be X-latched */
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
mtr_t* mtr); /* in: mini-transaction handle which contains
|
||||||
|
an X-latch to record page and to the index
|
||||||
|
tree */
|
||||||
|
/***********************************************************************
|
||||||
|
Copies an externally stored field of a record to mem heap. */
|
||||||
|
|
||||||
|
byte*
|
||||||
|
btr_rec_copy_externally_stored_field(
|
||||||
|
/*=================================*/
|
||||||
|
/* out: the field copied to heap */
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint no, /* in: field number */
|
||||||
|
ulint* len, /* out: length of the field */
|
||||||
|
mem_heap_t* heap); /* in: mem heap */
|
||||||
|
/***********************************************************************
|
||||||
|
Copies an externally stored field of a record to mem heap. Parameter
|
||||||
|
data contains a pointer to 'internally' stored part of the field:
|
||||||
|
possibly some data, and the reference to the externally stored part in
|
||||||
|
the last 20 bytes of data. */
|
||||||
|
|
||||||
|
byte*
|
||||||
|
btr_copy_externally_stored_field(
|
||||||
|
/*=============================*/
|
||||||
|
/* out: the whole field copied to heap */
|
||||||
|
ulint* len, /* out: length of the whole field */
|
||||||
|
byte* data, /* in: 'internally' stored part of the
|
||||||
|
field containing also the reference to
|
||||||
|
the external part */
|
||||||
|
ulint local_len,/* in: length of data */
|
||||||
|
mem_heap_t* heap); /* in: mem heap */
|
||||||
|
/***********************************************************************
|
||||||
|
Stores the positions of the fields marked as extern storage in the update
|
||||||
|
vector, and also those fields who are marked as extern storage in rec
|
||||||
|
and not mentioned in updated fields. We use this function to remember
|
||||||
|
which fields we must mark as extern storage in a record inserted for an
|
||||||
|
update. */
|
||||||
|
|
||||||
|
ulint
|
||||||
|
btr_push_update_extern_fields(
|
||||||
|
/*==========================*/
|
||||||
|
/* out: number of values stored in ext_vect */
|
||||||
|
ulint* ext_vect, /* in: array of ulints, must be preallocated
|
||||||
|
to have place for all fields in rec */
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
upd_t* update); /* in: update vector */
|
||||||
|
|
||||||
|
|
||||||
/*######################################################################*/
|
/*######################################################################*/
|
||||||
@ -516,6 +612,19 @@ and sleep this many microseconds in between */
|
|||||||
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
|
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
|
||||||
#define BTR_CUR_RETRY_SLEEP_TIME 50000
|
#define BTR_CUR_RETRY_SLEEP_TIME 50000
|
||||||
|
|
||||||
|
/* The reference in a field of which data is stored on a different page */
|
||||||
|
/*--------------------------------------*/
|
||||||
|
#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
|
||||||
|
#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
|
||||||
|
#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
|
||||||
|
on that page */
|
||||||
|
#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
|
||||||
|
length of the externally
|
||||||
|
stored part of the BLOB */
|
||||||
|
/*--------------------------------------*/
|
||||||
|
#define BTR_EXTERN_FIELD_REF_SIZE 20
|
||||||
|
|
||||||
|
|
||||||
extern ulint btr_cur_n_non_sea;
|
extern ulint btr_cur_n_non_sea;
|
||||||
|
|
||||||
#ifndef UNIV_NONINL
|
#ifndef UNIV_NONINL
|
||||||
|
@ -378,6 +378,14 @@ buf_calc_page_checksum(
|
|||||||
/*===================*/
|
/*===================*/
|
||||||
/* out: checksum */
|
/* out: checksum */
|
||||||
byte* page); /* in: buffer page */
|
byte* page); /* in: buffer page */
|
||||||
|
/************************************************************************
|
||||||
|
Checks if a page is corrupt. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
buf_page_is_corrupted(
|
||||||
|
/*==================*/
|
||||||
|
/* out: TRUE if corrupted */
|
||||||
|
byte* read_buf); /* in: a database page */
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Gets the page number of a pointer pointing within a buffer frame containing
|
Gets the page number of a pointer pointing within a buffer frame containing
|
||||||
a file page. */
|
a file page. */
|
||||||
|
@ -101,7 +101,7 @@ make sure that a read-ahead batch can be read efficiently in a single
|
|||||||
sweep). */
|
sweep). */
|
||||||
|
|
||||||
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
|
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
|
||||||
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4)
|
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
|
||||||
|
|
||||||
#ifndef UNIV_NONINL
|
#ifndef UNIV_NONINL
|
||||||
#include "buf0flu.ic"
|
#include "buf0flu.ic"
|
||||||
|
@ -14,6 +14,9 @@ Created 5/30/1994 Heikki Tuuri
|
|||||||
#include "data0types.h"
|
#include "data0types.h"
|
||||||
#include "data0type.h"
|
#include "data0type.h"
|
||||||
#include "mem0mem.h"
|
#include "mem0mem.h"
|
||||||
|
#include "dict0types.h"
|
||||||
|
|
||||||
|
typedef struct big_rec_struct big_rec_t;
|
||||||
|
|
||||||
/* Some non-inlined functions used in the MySQL interface: */
|
/* Some non-inlined functions used in the MySQL interface: */
|
||||||
void
|
void
|
||||||
@ -312,6 +315,41 @@ dtuple_sprintf(
|
|||||||
char* buf, /* in: print buffer */
|
char* buf, /* in: print buffer */
|
||||||
ulint buf_len,/* in: buf length in bytes */
|
ulint buf_len,/* in: buf length in bytes */
|
||||||
dtuple_t* tuple); /* in: tuple */
|
dtuple_t* tuple); /* in: tuple */
|
||||||
|
/******************************************************************
|
||||||
|
Moves parts of long fields in entry to the big record vector so that
|
||||||
|
the size of tuple drops below the maximum record size allowed in the
|
||||||
|
database. Moves data only from those fields which are not necessary
|
||||||
|
to determine uniquely the insertion place of the tuple in the index. */
|
||||||
|
|
||||||
|
big_rec_t*
|
||||||
|
dtuple_convert_big_rec(
|
||||||
|
/*===================*/
|
||||||
|
/* out, own: created big record vector,
|
||||||
|
NULL if we are not able to shorten
|
||||||
|
the entry enough, i.e., if there are
|
||||||
|
too many short fields in entry */
|
||||||
|
dict_index_t* index, /* in: index */
|
||||||
|
dtuple_t* entry); /* in: index entry */
|
||||||
|
/******************************************************************
|
||||||
|
Puts back to entry the data stored in vector. Note that to ensure the
|
||||||
|
fields in entry can accommodate the data, vector must have been created
|
||||||
|
from entry with dtuple_convert_big_rec. */
|
||||||
|
|
||||||
|
void
|
||||||
|
dtuple_convert_back_big_rec(
|
||||||
|
/*========================*/
|
||||||
|
dict_index_t* index, /* in: index */
|
||||||
|
dtuple_t* entry, /* in: entry whose data was put to vector */
|
||||||
|
big_rec_t* vector);/* in, own: big rec vector; it is
|
||||||
|
freed in this function */
|
||||||
|
/******************************************************************
|
||||||
|
Frees the memory in a big rec vector. */
|
||||||
|
|
||||||
|
void
|
||||||
|
dtuple_big_rec_free(
|
||||||
|
/*================*/
|
||||||
|
big_rec_t* vector); /* in, own: big rec vector; it is
|
||||||
|
freed in this function */
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Generates a random tuple. */
|
Generates a random tuple. */
|
||||||
|
|
||||||
@ -396,7 +434,7 @@ dtuple_gen_search_tuple_TPC_C(
|
|||||||
/* Structure for an SQL data field */
|
/* Structure for an SQL data field */
|
||||||
struct dfield_struct{
|
struct dfield_struct{
|
||||||
void* data; /* pointer to data */
|
void* data; /* pointer to data */
|
||||||
ulint len; /* data length; UNIV_SQL_NULL if SQL null */
|
ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
|
||||||
dtype_t type; /* type of data */
|
dtype_t type; /* type of data */
|
||||||
ulint col_no; /* when building index entries, the column
|
ulint col_no; /* when building index entries, the column
|
||||||
number can be stored here */
|
number can be stored here */
|
||||||
@ -423,6 +461,24 @@ struct dtuple_struct {
|
|||||||
};
|
};
|
||||||
#define DATA_TUPLE_MAGIC_N 65478679
|
#define DATA_TUPLE_MAGIC_N 65478679
|
||||||
|
|
||||||
|
/* A slot for a field in a big rec vector */
|
||||||
|
|
||||||
|
typedef struct big_rec_field_struct big_rec_field_t;
|
||||||
|
struct big_rec_field_struct {
|
||||||
|
ulint field_no; /* field number in record */
|
||||||
|
ulint len; /* stored data len */
|
||||||
|
byte* data; /* stored data */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Storage format for overflow data in a big record, that is, a record
|
||||||
|
which needs external storage of data fields */
|
||||||
|
|
||||||
|
struct big_rec_struct {
|
||||||
|
mem_heap_t* heap; /* memory heap from which allocated */
|
||||||
|
ulint n_fields; /* number of stored fields */
|
||||||
|
big_rec_field_t* fields; /* stored fields */
|
||||||
|
};
|
||||||
|
|
||||||
#ifndef UNIV_NONINL
|
#ifndef UNIV_NONINL
|
||||||
#include "data0data.ic"
|
#include "data0data.ic"
|
||||||
#endif
|
#endif
|
||||||
|
@ -307,12 +307,13 @@ dtuple_create(
|
|||||||
|
|
||||||
/**************************************************************
|
/**************************************************************
|
||||||
The following function returns the sum of data lengths of a tuple. The space
|
The following function returns the sum of data lengths of a tuple. The space
|
||||||
occupied by the field structs or the tuple struct is not counted. */
|
occupied by the field structs or the tuple struct is not counted. Neither
|
||||||
|
is possible space in externally stored parts of the field. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
ulint
|
ulint
|
||||||
dtuple_get_data_size(
|
dtuple_get_data_size(
|
||||||
/*=================*/
|
/*=================*/
|
||||||
/* out: sum of data lens */
|
/* out: sum of data lengths */
|
||||||
dtuple_t* tuple) /* in: typed data tuple */
|
dtuple_t* tuple) /* in: typed data tuple */
|
||||||
{
|
{
|
||||||
dfield_t* field;
|
dfield_t* field;
|
||||||
@ -382,7 +383,7 @@ dtuple_datas_are_equal(
|
|||||||
|
|
||||||
field2 = dtuple_get_nth_field(tuple2, i);
|
field2 = dtuple_get_nth_field(tuple2, i);
|
||||||
data2 = (byte*) dfield_get_data(field2);
|
data2 = (byte*) dfield_get_data(field2);
|
||||||
len2 = dfield_get_len(field2);
|
len2 = dfield_get_len(field2);
|
||||||
|
|
||||||
if (len1 != len2) {
|
if (len1 != len2) {
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ struct dict_col_struct{
|
|||||||
ulint clust_pos;/* position of the column in the
|
ulint clust_pos;/* position of the column in the
|
||||||
clustered index */
|
clustered index */
|
||||||
ulint ord_part;/* count of how many times this column
|
ulint ord_part;/* count of how many times this column
|
||||||
appears in an ordering fields of an index */
|
appears in ordering fields of an index */
|
||||||
char* name; /* name */
|
char* name; /* name */
|
||||||
dtype_t type; /* data type */
|
dtype_t type; /* data type */
|
||||||
dict_table_t* table; /* back pointer to table of this column */
|
dict_table_t* table; /* back pointer to table of this column */
|
||||||
|
@ -196,6 +196,16 @@ fil_space_get_size(
|
|||||||
/* out: space size */
|
/* out: space size */
|
||||||
ulint id); /* in: space id */
|
ulint id); /* in: space id */
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
|
Checks if the pair space, page_no refers to an existing page in a
|
||||||
|
tablespace file space. */
|
||||||
|
|
||||||
|
ibool
|
||||||
|
fil_check_adress_in_tablespace(
|
||||||
|
/*===========================*/
|
||||||
|
/* out: TRUE if the address is meaningful */
|
||||||
|
ulint id, /* in: space id */
|
||||||
|
ulint page_no);/* in: page number */
|
||||||
|
/***********************************************************************
|
||||||
Appends a new file to the chain of files of a space.
|
Appends a new file to the chain of files of a space.
|
||||||
File must be closed. */
|
File must be closed. */
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ page_t*
|
|||||||
fseg_create(
|
fseg_create(
|
||||||
/*========*/
|
/*========*/
|
||||||
/* out: the page where the segment header is placed,
|
/* out: the page where the segment header is placed,
|
||||||
x-latched, FIL_NULL if could not create segment
|
x-latched, NULL if could not create segment
|
||||||
because of lack of space */
|
because of lack of space */
|
||||||
ulint space, /* in: space id */
|
ulint space, /* in: space id */
|
||||||
ulint page, /* in: page where the segment header is placed: if
|
ulint page, /* in: page where the segment header is placed: if
|
||||||
|
@ -115,7 +115,7 @@ mach_write_to_4(
|
|||||||
{
|
{
|
||||||
ut_ad(b);
|
ut_ad(b);
|
||||||
|
|
||||||
#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
|
#if (0 == 1) && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
|
||||||
|
|
||||||
/* We do not use this even on Intel, because unaligned accesses may
|
/* We do not use this even on Intel, because unaligned accesses may
|
||||||
be slow */
|
be slow */
|
||||||
@ -143,7 +143,7 @@ mach_read_from_4(
|
|||||||
/* out: ulint integer */
|
/* out: ulint integer */
|
||||||
byte* b) /* in: pointer to four bytes */
|
byte* b) /* in: pointer to four bytes */
|
||||||
{
|
{
|
||||||
#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
|
#if (0 == 1) && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
|
||||||
/* We do not use this even on Intel, because unaligned accesses may
|
/* We do not use this even on Intel, because unaligned accesses may
|
||||||
be slow */
|
be slow */
|
||||||
|
|
||||||
|
@ -59,6 +59,10 @@ log. */
|
|||||||
#define OS_FILE_AIO 61
|
#define OS_FILE_AIO 61
|
||||||
#define OS_FILE_NORMAL 62
|
#define OS_FILE_NORMAL 62
|
||||||
|
|
||||||
|
/* Types for file create */
|
||||||
|
#define OS_DATA_FILE 100
|
||||||
|
#define OS_LOG_FILE 101
|
||||||
|
|
||||||
/* Error codes from os_file_get_last_error */
|
/* Error codes from os_file_get_last_error */
|
||||||
#define OS_FILE_NOT_FOUND 71
|
#define OS_FILE_NOT_FOUND 71
|
||||||
#define OS_FILE_DISK_FULL 72
|
#define OS_FILE_DISK_FULL 72
|
||||||
@ -125,6 +129,7 @@ os_file_create(
|
|||||||
if a new file is created or an old overwritten */
|
if a new file is created or an old overwritten */
|
||||||
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
|
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
|
||||||
is desired, OS_FILE_NORMAL, if any normal file */
|
is desired, OS_FILE_NORMAL, if any normal file */
|
||||||
|
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
|
||||||
ibool* success);/* out: TRUE if succeed, FALSE if error */
|
ibool* success);/* out: TRUE if succeed, FALSE if error */
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
Closes a file handle. In case of error, error number can be retrieved with
|
Closes a file handle. In case of error, error number can be retrieved with
|
||||||
@ -263,6 +268,13 @@ os_aio(
|
|||||||
operation); if mode is OS_AIO_SYNC, these
|
operation); if mode is OS_AIO_SYNC, these
|
||||||
are ignored */
|
are ignored */
|
||||||
void* message2);
|
void* message2);
|
||||||
|
/****************************************************************************
|
||||||
|
Waits until there are no pending writes in os_aio_write_array. There can
|
||||||
|
be other, synchronous, pending writes. */
|
||||||
|
|
||||||
|
void
|
||||||
|
os_aio_wait_until_no_pending_writes(void);
|
||||||
|
/*=====================================*/
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Wakes up simulated aio i/o-handler threads if they have something to do. */
|
Wakes up simulated aio i/o-handler threads if they have something to do. */
|
||||||
|
|
||||||
@ -298,7 +310,8 @@ os_aio_windows_handle(
|
|||||||
the aio operation failed, these output
|
the aio operation failed, these output
|
||||||
parameters are valid and can be used to
|
parameters are valid and can be used to
|
||||||
restart the operation, for example */
|
restart the operation, for example */
|
||||||
void** message2);
|
void** message2,
|
||||||
|
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
|
||||||
#endif
|
#endif
|
||||||
#ifdef POSIX_ASYNC_IO
|
#ifdef POSIX_ASYNC_IO
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
@ -335,7 +348,8 @@ os_aio_simulated_handle(
|
|||||||
the aio operation failed, these output
|
the aio operation failed, these output
|
||||||
parameters are valid and can be used to
|
parameters are valid and can be used to
|
||||||
restart the operation, for example */
|
restart the operation, for example */
|
||||||
void** message2);
|
void** message2,
|
||||||
|
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Validates the consistency of the aio system. */
|
Validates the consistency of the aio system. */
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
Comparison services for records
|
Comparison services for records
|
||||||
|
|
||||||
(c) 1994-1996 Innobase Oy
|
(c) 1994-2001 Innobase Oy
|
||||||
|
|
||||||
Created 7/1/1994 Heikki Tuuri
|
Created 7/1/1994 Heikki Tuuri
|
||||||
************************************************************************/
|
************************************************************************/
|
||||||
@ -31,14 +31,18 @@ This function is used to compare a data tuple to a physical record.
|
|||||||
Only dtuple->n_fields_cmp first fields are taken into account for
|
Only dtuple->n_fields_cmp first fields are taken into account for
|
||||||
the the data tuple! If we denote by n = n_fields_cmp, then rec must
|
the the data tuple! If we denote by n = n_fields_cmp, then rec must
|
||||||
have either m >= n fields, or it must differ from dtuple in some of
|
have either m >= n fields, or it must differ from dtuple in some of
|
||||||
the m fields rec has. */
|
the m fields rec has. If rec has an externally stored field we do not
|
||||||
|
compare it but return with value 0 if such a comparison should be
|
||||||
|
made. */
|
||||||
|
|
||||||
int
|
int
|
||||||
cmp_dtuple_rec_with_match(
|
cmp_dtuple_rec_with_match(
|
||||||
/*======================*/
|
/*======================*/
|
||||||
/* out: 1, 0, -1, if dtuple is greater, equal,
|
/* out: 1, 0, -1, if dtuple is greater, equal,
|
||||||
less than rec, respectively, when only the
|
less than rec, respectively, when only the
|
||||||
common first fields are compared */
|
common first fields are compared, or
|
||||||
|
until the first externally stored field in
|
||||||
|
rec */
|
||||||
dtuple_t* dtuple, /* in: data tuple */
|
dtuple_t* dtuple, /* in: data tuple */
|
||||||
rec_t* rec, /* in: physical record which differs from
|
rec_t* rec, /* in: physical record which differs from
|
||||||
dtuple in some of the common fields, or which
|
dtuple in some of the common fields, or which
|
||||||
@ -89,7 +93,8 @@ cmp_dtuple_rec_prefix_equal(
|
|||||||
fields in dtuple */
|
fields in dtuple */
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
This function is used to compare two physical records. Only the common
|
This function is used to compare two physical records. Only the common
|
||||||
first fields are compared. */
|
first fields are compared, and if an externally stored field is
|
||||||
|
encountered, then 0 is returned. */
|
||||||
|
|
||||||
int
|
int
|
||||||
cmp_rec_rec_with_match(
|
cmp_rec_rec_with_match(
|
||||||
|
@ -12,6 +12,7 @@ Created 5/30/1994 Heikki Tuuri
|
|||||||
#include "univ.i"
|
#include "univ.i"
|
||||||
#include "data0data.h"
|
#include "data0data.h"
|
||||||
#include "rem0types.h"
|
#include "rem0types.h"
|
||||||
|
#include "mtr0types.h"
|
||||||
|
|
||||||
/* Maximum values for various fields (for non-blob tuples) */
|
/* Maximum values for various fields (for non-blob tuples) */
|
||||||
#define REC_MAX_N_FIELDS (1024 - 1)
|
#define REC_MAX_N_FIELDS (1024 - 1)
|
||||||
@ -162,6 +163,49 @@ rec_get_nth_field_size(
|
|||||||
/* out: field size in bytes */
|
/* out: field size in bytes */
|
||||||
rec_t* rec, /* in: record */
|
rec_t* rec, /* in: record */
|
||||||
ulint n); /* in: index of the field */
|
ulint n); /* in: index of the field */
|
||||||
|
/***************************************************************
|
||||||
|
Gets the value of the ith field extern storage bit. If it is TRUE
|
||||||
|
it means that the field is stored on another page. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
rec_get_nth_field_extern_bit(
|
||||||
|
/*=========================*/
|
||||||
|
/* in: TRUE or FALSE */
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint i); /* in: ith field */
|
||||||
|
/**********************************************************
|
||||||
|
Returns TRUE if the extern bit is set in any of the fields
|
||||||
|
of rec. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
rec_contains_externally_stored_field(
|
||||||
|
/*=================================*/
|
||||||
|
/* out: TRUE if a field is stored externally */
|
||||||
|
rec_t* rec); /* in: record */
|
||||||
|
/***************************************************************
|
||||||
|
Sets the value of the ith field extern storage bit. */
|
||||||
|
|
||||||
|
void
|
||||||
|
rec_set_nth_field_extern_bit(
|
||||||
|
/*=========================*/
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint i, /* in: ith field */
|
||||||
|
ibool val, /* in: value to set */
|
||||||
|
mtr_t* mtr); /* in: mtr holding an X-latch to the page where
|
||||||
|
rec is, or NULL; in the NULL case we do not
|
||||||
|
write to log about the change */
|
||||||
|
/***************************************************************
|
||||||
|
Sets TRUE the extern storage bits of fields mentioned in an array. */
|
||||||
|
|
||||||
|
void
|
||||||
|
rec_set_field_extern_bits(
|
||||||
|
/*======================*/
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint* vec, /* in: array of field numbers */
|
||||||
|
ulint n_fields, /* in: number of fields numbers */
|
||||||
|
mtr_t* mtr); /* in: mtr holding an X-latch to the page
|
||||||
|
where rec is, or NULL; in the NULL case we
|
||||||
|
do not write to log about the change */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
The following function is used to get a copy of the nth
|
The following function is used to get a copy of the nth
|
||||||
data field in the record to a buffer. */
|
data field in the record to a buffer. */
|
||||||
@ -350,6 +394,15 @@ rec_sprintf(
|
|||||||
|
|
||||||
#define REC_INFO_BITS 6 /* This is single byte bit-field */
|
#define REC_INFO_BITS 6 /* This is single byte bit-field */
|
||||||
|
|
||||||
|
/* Maximum lengths for the data in a physical record if the offsets
|
||||||
|
are given in one byte (resp. two byte) format. */
|
||||||
|
#define REC_1BYTE_OFFS_LIMIT 0x7F
|
||||||
|
#define REC_2BYTE_OFFS_LIMIT 0x7FFF
|
||||||
|
|
||||||
|
/* The data size of record must be smaller than this because we reserve
|
||||||
|
two upmost bits in a two byte offset for special purposes */
|
||||||
|
#define REC_MAX_DATA_SIZE (16 * 1024)
|
||||||
|
|
||||||
#ifndef UNIV_NONINL
|
#ifndef UNIV_NONINL
|
||||||
#include "rem0rec.ic"
|
#include "rem0rec.ic"
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,12 +25,6 @@ significant bytes and bits are written below less significant.
|
|||||||
4 bits info bits
|
4 bits info bits
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* Maximum lengths for the data in a physical record if the offsets
|
|
||||||
are given as one byte (resp. two byte) format. */
|
|
||||||
#define REC_1BYTE_OFFS_LIMIT 0x7F
|
|
||||||
#define REC_2BYTE_OFFS_LIMIT 0x7FFF
|
|
||||||
|
|
||||||
/* We list the byte offsets from the origin of the record, the mask,
|
/* We list the byte offsets from the origin of the record, the mask,
|
||||||
and the shift needed to obtain each bit-field of the record. */
|
and the shift needed to obtain each bit-field of the record. */
|
||||||
|
|
||||||
@ -66,6 +60,11 @@ one-byte and two-byte offsets */
|
|||||||
#define REC_1BYTE_SQL_NULL_MASK 0x80
|
#define REC_1BYTE_SQL_NULL_MASK 0x80
|
||||||
#define REC_2BYTE_SQL_NULL_MASK 0x8000
|
#define REC_2BYTE_SQL_NULL_MASK 0x8000
|
||||||
|
|
||||||
|
/* In a 2-byte offset the second most significant bit denotes
|
||||||
|
a field stored to another page: */
|
||||||
|
|
||||||
|
#define REC_2BYTE_EXTERN_MASK 0x4000
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Sets the value of the ith field SQL null bit. */
|
Sets the value of the ith field SQL null bit. */
|
||||||
|
|
||||||
@ -489,7 +488,7 @@ ulint
|
|||||||
rec_2_get_field_end_info(
|
rec_2_get_field_end_info(
|
||||||
/*=====================*/
|
/*=====================*/
|
||||||
/* out: offset of the start of the field, SQL null
|
/* out: offset of the start of the field, SQL null
|
||||||
flag ORed */
|
flag and extern storage flag ORed */
|
||||||
rec_t* rec, /* in: record */
|
rec_t* rec, /* in: record */
|
||||||
ulint n) /* in: field index */
|
ulint n) /* in: field index */
|
||||||
{
|
{
|
||||||
@ -499,6 +498,63 @@ rec_2_get_field_end_info(
|
|||||||
return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
|
return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************
|
||||||
|
Gets the value of the ith field extern storage bit. If it is TRUE
|
||||||
|
it means that the field is stored on another page. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
rec_get_nth_field_extern_bit(
|
||||||
|
/*=========================*/
|
||||||
|
/* in: TRUE or FALSE */
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint i) /* in: ith field */
|
||||||
|
{
|
||||||
|
ulint info;
|
||||||
|
|
||||||
|
if (rec_get_1byte_offs_flag(rec)) {
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
info = rec_2_get_field_end_info(rec, i);
|
||||||
|
|
||||||
|
if (info & REC_2BYTE_EXTERN_MASK) {
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**********************************************************
|
||||||
|
Returns TRUE if the extern bit is set in any of the fields
|
||||||
|
of rec. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
rec_contains_externally_stored_field(
|
||||||
|
/*=================================*/
|
||||||
|
/* out: TRUE if a field is stored externally */
|
||||||
|
rec_t* rec) /* in: record */
|
||||||
|
{
|
||||||
|
ulint n;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
if (rec_get_1byte_offs_flag(rec)) {
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
n = rec_get_n_fields(rec);
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
if (rec_get_nth_field_extern_bit(rec, i)) {
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
/**********************************************************
|
/**********************************************************
|
||||||
Returns the offset of n - 1th field end if the record is stored in the 1-byte
|
Returns the offset of n - 1th field end if the record is stored in the 1-byte
|
||||||
offsets form. If the field is SQL null, the flag is ORed in the returned
|
offsets form. If the field is SQL null, the flag is ORed in the returned
|
||||||
@ -616,7 +672,7 @@ rec_2_get_field_start_offs(
|
|||||||
}
|
}
|
||||||
|
|
||||||
return(rec_2_get_prev_field_end_info(rec, n)
|
return(rec_2_get_prev_field_end_info(rec, n)
|
||||||
& ~REC_2BYTE_SQL_NULL_MASK);
|
& ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**********************************************************
|
/**********************************************************
|
||||||
|
@ -56,6 +56,9 @@ row_ins_index_entry_low(
|
|||||||
pessimistic descent down the index tree */
|
pessimistic descent down the index tree */
|
||||||
dict_index_t* index, /* in: index */
|
dict_index_t* index, /* in: index */
|
||||||
dtuple_t* entry, /* in: index entry to insert */
|
dtuple_t* entry, /* in: index entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
que_thr_t* thr); /* in: query thread */
|
que_thr_t* thr); /* in: query thread */
|
||||||
/*******************************************************************
|
/*******************************************************************
|
||||||
Inserts an index entry to index. Tries first optimistic, then pessimistic
|
Inserts an index entry to index. Tries first optimistic, then pessimistic
|
||||||
@ -70,6 +73,9 @@ row_ins_index_entry(
|
|||||||
DB_DUPLICATE_KEY, or some other error code */
|
DB_DUPLICATE_KEY, or some other error code */
|
||||||
dict_index_t* index, /* in: index */
|
dict_index_t* index, /* in: index */
|
||||||
dtuple_t* entry, /* in: index entry to insert */
|
dtuple_t* entry, /* in: index entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
que_thr_t* thr); /* in: query thread */
|
que_thr_t* thr); /* in: query thread */
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Inserts a row to a table. */
|
Inserts a row to a table. */
|
||||||
|
@ -189,7 +189,9 @@ row_update_for_mysql(
|
|||||||
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
|
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
|
||||||
handle */
|
handle */
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Does a table creation operation for MySQL. */
|
Does a table creation operation for MySQL. If the name of the created
|
||||||
|
table ends to characters INNODB_MONITOR, then this also starts
|
||||||
|
printing of monitor output by the master thread. */
|
||||||
|
|
||||||
int
|
int
|
||||||
row_create_table_for_mysql(
|
row_create_table_for_mysql(
|
||||||
@ -209,7 +211,9 @@ row_create_index_for_mysql(
|
|||||||
dict_index_t* index, /* in: index defintion */
|
dict_index_t* index, /* in: index defintion */
|
||||||
trx_t* trx); /* in: transaction handle */
|
trx_t* trx); /* in: transaction handle */
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Drops a table for MySQL. */
|
Drops a table for MySQL. If the name of the dropped table ends to
|
||||||
|
characters INNODB_MONITOR, then this also stops printing of monitor
|
||||||
|
output by the master thread. */
|
||||||
|
|
||||||
int
|
int
|
||||||
row_drop_table_for_mysql(
|
row_drop_table_for_mysql(
|
||||||
|
@ -250,6 +250,7 @@ row_search_index_entry(
|
|||||||
|
|
||||||
#define ROW_COPY_DATA 1
|
#define ROW_COPY_DATA 1
|
||||||
#define ROW_COPY_POINTERS 2
|
#define ROW_COPY_POINTERS 2
|
||||||
|
#define ROW_COPY_ALSO_EXTERNALS 3
|
||||||
|
|
||||||
/* The allowed latching order of index records is the following:
|
/* The allowed latching order of index records is the following:
|
||||||
(1) a secondary index record ->
|
(1) a secondary index record ->
|
||||||
|
@ -147,6 +147,9 @@ row_upd_build_difference(
|
|||||||
fields, excluding roll ptr and trx id */
|
fields, excluding roll ptr and trx id */
|
||||||
dict_index_t* index, /* in: clustered index */
|
dict_index_t* index, /* in: clustered index */
|
||||||
dtuple_t* entry, /* in: entry to insert */
|
dtuple_t* entry, /* in: entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
rec_t* rec, /* in: clustered index record */
|
rec_t* rec, /* in: clustered index record */
|
||||||
mem_heap_t* heap); /* in: memory heap from which allocated */
|
mem_heap_t* heap); /* in: memory heap from which allocated */
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
@ -262,6 +265,9 @@ struct upd_field_struct{
|
|||||||
constants in the symbol table of the
|
constants in the symbol table of the
|
||||||
query graph */
|
query graph */
|
||||||
dfield_t new_val; /* new value for the column */
|
dfield_t new_val; /* new value for the column */
|
||||||
|
ibool extern_storage; /* this is set to TRUE if dfield
|
||||||
|
actually contains a reference to
|
||||||
|
an externally stored field */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Update vector structure */
|
/* Update vector structure */
|
||||||
@ -318,6 +324,10 @@ struct upd_node_struct{
|
|||||||
dtuple_t* row; /* NULL, or a copy (also fields copied to
|
dtuple_t* row; /* NULL, or a copy (also fields copied to
|
||||||
heap) of the row to update; this must be reset
|
heap) of the row to update; this must be reset
|
||||||
to NULL after a successful update */
|
to NULL after a successful update */
|
||||||
|
ulint* ext_vec;/* array describing which fields are stored
|
||||||
|
externally in the clustered index record of
|
||||||
|
row */
|
||||||
|
ulint n_ext_vec;/* number of fields in ext_vec */
|
||||||
mem_heap_t* heap; /* memory heap used as auxiliary storage for
|
mem_heap_t* heap; /* memory heap used as auxiliary storage for
|
||||||
row; this must be emptied after a successful
|
row; this must be emptied after a successful
|
||||||
update if node->row != NULL */
|
update if node->row != NULL */
|
||||||
@ -349,7 +359,7 @@ struct upd_node_struct{
|
|||||||
looked at and updated if an ordering
|
looked at and updated if an ordering
|
||||||
field changed */
|
field changed */
|
||||||
|
|
||||||
/* Compilation info flags: these must fit within one byte */
|
/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
|
||||||
#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
|
#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
|
||||||
changed in the update and no ordering
|
changed in the update and no ordering
|
||||||
field of the clustered index */
|
field of the clustered index */
|
||||||
|
@ -23,6 +23,7 @@ upd_create(
|
|||||||
mem_heap_t* heap) /* in: heap from which memory allocated */
|
mem_heap_t* heap) /* in: heap from which memory allocated */
|
||||||
{
|
{
|
||||||
upd_t* update;
|
upd_t* update;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
update = mem_heap_alloc(heap, sizeof(upd_t));
|
update = mem_heap_alloc(heap, sizeof(upd_t));
|
||||||
|
|
||||||
@ -30,6 +31,10 @@ upd_create(
|
|||||||
update->n_fields = n;
|
update->n_fields = n;
|
||||||
update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
|
update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
update->fields[i].extern_storage = 0;
|
||||||
|
}
|
||||||
|
|
||||||
return(update);
|
return(update);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,9 @@ extern char** srv_data_file_names;
|
|||||||
extern ulint* srv_data_file_sizes;
|
extern ulint* srv_data_file_sizes;
|
||||||
extern ulint* srv_data_file_is_raw_partition;
|
extern ulint* srv_data_file_is_raw_partition;
|
||||||
|
|
||||||
|
#define SRV_NEW_RAW 1
|
||||||
|
#define SRV_OLD_RAW 2
|
||||||
|
|
||||||
extern char** srv_log_group_home_dirs;
|
extern char** srv_log_group_home_dirs;
|
||||||
|
|
||||||
extern ulint srv_n_log_groups;
|
extern ulint srv_n_log_groups;
|
||||||
@ -52,10 +55,14 @@ extern ulint srv_lock_wait_timeout;
|
|||||||
extern char* srv_unix_file_flush_method_str;
|
extern char* srv_unix_file_flush_method_str;
|
||||||
extern ulint srv_unix_file_flush_method;
|
extern ulint srv_unix_file_flush_method;
|
||||||
|
|
||||||
|
extern ibool srv_use_doublewrite_buf;
|
||||||
|
|
||||||
extern ibool srv_set_thread_priorities;
|
extern ibool srv_set_thread_priorities;
|
||||||
extern int srv_query_thread_priority;
|
extern int srv_query_thread_priority;
|
||||||
|
|
||||||
/*-------------------------------------------*/
|
/*-------------------------------------------*/
|
||||||
|
|
||||||
|
extern ibool srv_print_innodb_monitor;
|
||||||
extern ulint srv_n_spin_wait_rounds;
|
extern ulint srv_n_spin_wait_rounds;
|
||||||
extern ulint srv_spin_wait_delay;
|
extern ulint srv_spin_wait_delay;
|
||||||
extern ibool srv_priority_boost;
|
extern ibool srv_priority_boost;
|
||||||
@ -104,26 +111,13 @@ typedef struct srv_sys_struct srv_sys_t;
|
|||||||
/* The server system */
|
/* The server system */
|
||||||
extern srv_sys_t* srv_sys;
|
extern srv_sys_t* srv_sys;
|
||||||
|
|
||||||
/* Alternatives for file flush option in Unix; see the InnoDB manual about
|
/* Alternatives for fiel flush option in Unix; see the InnoDB manual about
|
||||||
what these mean */
|
what these mean */
|
||||||
#define SRV_UNIX_FDATASYNC 1
|
#define SRV_UNIX_FDATASYNC 1
|
||||||
#define SRV_UNIX_O_DSYNC 2
|
#define SRV_UNIX_O_DSYNC 2
|
||||||
#define SRV_UNIX_LITTLESYNC 3
|
#define SRV_UNIX_LITTLESYNC 3
|
||||||
#define SRV_UNIX_NOSYNC 4
|
#define SRV_UNIX_NOSYNC 4
|
||||||
|
|
||||||
/* Raw partition flags */
|
|
||||||
#define SRV_OLD_RAW 1
|
|
||||||
#define SRV_NEW_RAW 2
|
|
||||||
|
|
||||||
void
|
|
||||||
srv_mysql_thread_release(void);
|
|
||||||
/*==========================*/
|
|
||||||
os_event_t
|
|
||||||
srv_mysql_thread_event_get(void);
|
|
||||||
void
|
|
||||||
srv_mysql_thread_slot_free(
|
|
||||||
/*==========================*/
|
|
||||||
os_event_t event);
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Boots Innobase server. */
|
Boots Innobase server. */
|
||||||
|
|
||||||
|
@ -393,6 +393,7 @@ Memory pool mutex */
|
|||||||
#define SYNC_RSEG_HEADER_NEW 591
|
#define SYNC_RSEG_HEADER_NEW 591
|
||||||
#define SYNC_RSEG_HEADER 590
|
#define SYNC_RSEG_HEADER 590
|
||||||
#define SYNC_TRX_UNDO_PAGE 570
|
#define SYNC_TRX_UNDO_PAGE 570
|
||||||
|
#define SYNC_EXTERN_STORAGE 500
|
||||||
#define SYNC_FSP 400
|
#define SYNC_FSP 400
|
||||||
#define SYNC_FSP_PAGE 395
|
#define SYNC_FSP_PAGE 395
|
||||||
/*------------------------------------- Insert buffer headers */
|
/*------------------------------------- Insert buffer headers */
|
||||||
@ -415,6 +416,7 @@ Memory pool mutex */
|
|||||||
the level is SYNC_MEM_HASH. */
|
the level is SYNC_MEM_HASH. */
|
||||||
#define SYNC_BUF_POOL 150
|
#define SYNC_BUF_POOL 150
|
||||||
#define SYNC_BUF_BLOCK 149
|
#define SYNC_BUF_BLOCK 149
|
||||||
|
#define SYNC_DOUBLEWRITE 140
|
||||||
#define SYNC_ANY_LATCH 135
|
#define SYNC_ANY_LATCH 135
|
||||||
#define SYNC_MEM_HASH 131
|
#define SYNC_MEM_HASH 131
|
||||||
#define SYNC_MEM_POOL 130
|
#define SYNC_MEM_POOL 130
|
||||||
|
@ -45,6 +45,14 @@ trx_undo_rec_get_cmpl_info(
|
|||||||
/* out: compiler info */
|
/* out: compiler info */
|
||||||
trx_undo_rec_t* undo_rec); /* in: undo log record */
|
trx_undo_rec_t* undo_rec); /* in: undo log record */
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
|
Returns TRUE if an undo log record contains an extern storage field. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
trx_undo_rec_get_extern_storage(
|
||||||
|
/*============================*/
|
||||||
|
/* out: TRUE if extern */
|
||||||
|
trx_undo_rec_t* undo_rec); /* in: undo log record */
|
||||||
|
/**************************************************************************
|
||||||
Reads the undo log record number. */
|
Reads the undo log record number. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
dulint
|
dulint
|
||||||
@ -65,6 +73,8 @@ trx_undo_rec_get_pars(
|
|||||||
TRX_UNDO_INSERT_REC, ... */
|
TRX_UNDO_INSERT_REC, ... */
|
||||||
ulint* cmpl_info, /* out: compiler info, relevant only
|
ulint* cmpl_info, /* out: compiler info, relevant only
|
||||||
for update type records */
|
for update type records */
|
||||||
|
ibool* updated_extern, /* out: TRUE if we updated an
|
||||||
|
externally stored fild */
|
||||||
dulint* undo_no, /* out: undo log record number */
|
dulint* undo_no, /* out: undo log record number */
|
||||||
dulint* table_id); /* out: table id */
|
dulint* table_id); /* out: table id */
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
@ -272,7 +282,11 @@ record */
|
|||||||
do not change */
|
do not change */
|
||||||
#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
|
#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
|
||||||
this and ORed to the type above */
|
this and ORed to the type above */
|
||||||
|
#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
|
||||||
|
to denote that we updated external
|
||||||
|
storage fields: used by purge to
|
||||||
|
free the external storage */
|
||||||
|
|
||||||
/* Operation type flags used in trx_undo_report_row_operation */
|
/* Operation type flags used in trx_undo_report_row_operation */
|
||||||
#define TRX_UNDO_INSERT_OP 1
|
#define TRX_UNDO_INSERT_OP 1
|
||||||
#define TRX_UNDO_MODIFY_OP 2
|
#define TRX_UNDO_MODIFY_OP 2
|
||||||
|
@ -30,6 +30,23 @@ trx_undo_rec_get_cmpl_info(
|
|||||||
return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
|
return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
Returns TRUE if an undo log record contains an extern storage field. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
trx_undo_rec_get_extern_storage(
|
||||||
|
/*============================*/
|
||||||
|
/* out: TRUE if extern */
|
||||||
|
trx_undo_rec_t* undo_rec) /* in: undo log record */
|
||||||
|
{
|
||||||
|
if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Reads the undo log record number. */
|
Reads the undo log record number. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
|
@ -27,6 +27,23 @@ Created 3/26/1996 Heikki Tuuri
|
|||||||
/* The transaction system */
|
/* The transaction system */
|
||||||
extern trx_sys_t* trx_sys;
|
extern trx_sys_t* trx_sys;
|
||||||
|
|
||||||
|
/* Doublewrite system */
|
||||||
|
extern trx_doublewrite_t* trx_doublewrite;
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Creates the doublewrite buffer at a database start. The header of the
|
||||||
|
doublewrite buffer is placed on the trx system header page. */
|
||||||
|
|
||||||
|
void
|
||||||
|
trx_sys_create_doublewrite_buf(void);
|
||||||
|
/*================================*/
|
||||||
|
/********************************************************************
|
||||||
|
At a database startup uses a possible doublewrite buffer to restore
|
||||||
|
half-written pages in the data files. */
|
||||||
|
|
||||||
|
void
|
||||||
|
trx_sys_doublewrite_restore_corrupt_pages(void);
|
||||||
|
/*===========================================*/
|
||||||
/*******************************************************************
|
/*******************************************************************
|
||||||
Checks if a page address is the trx sys header page. */
|
Checks if a page address is the trx sys header page. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
@ -235,6 +252,59 @@ therefore 256 */
|
|||||||
segment specification slots */
|
segment specification slots */
|
||||||
/*-------------------------------------------------------------*/
|
/*-------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/* The offset of the doublewrite buffer header on the trx system header page */
|
||||||
|
#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
|
||||||
|
/*-------------------------------------------------------------*/
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg
|
||||||
|
containing the doublewrite
|
||||||
|
buffer */
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
|
||||||
|
/* 4-byte magic number which
|
||||||
|
shows if we already have
|
||||||
|
created the doublewrite
|
||||||
|
buffer */
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
|
||||||
|
/* page number of the
|
||||||
|
first page in the first
|
||||||
|
sequence of 64
|
||||||
|
(= FSP_EXTENT_SIZE) consecutive
|
||||||
|
pages in the doublewrite
|
||||||
|
buffer */
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
|
||||||
|
/* page number of the
|
||||||
|
first page in the second
|
||||||
|
sequence of 64 consecutive
|
||||||
|
pages in the doublewrite
|
||||||
|
buffer */
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3
|
||||||
|
numbers so that if the trx
|
||||||
|
sys header is half-written
|
||||||
|
to disk, we still may be able
|
||||||
|
to recover the information */
|
||||||
|
/*-------------------------------------------------------------*/
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
|
||||||
|
|
||||||
|
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
|
||||||
|
|
||||||
|
/* Doublewrite control struct */
|
||||||
|
struct trx_doublewrite_struct{
|
||||||
|
mutex_t mutex; /* mutex protecting the first_free field and
|
||||||
|
write_buf */
|
||||||
|
ulint block1; /* the page number of the first
|
||||||
|
doublewrite block (64 pages) */
|
||||||
|
ulint block2; /* page number of the second block */
|
||||||
|
ulint first_free; /* first free position in write_buf measured
|
||||||
|
in units of UNIV_PAGE_SIZE */
|
||||||
|
byte* write_buf; /* write buffer used in writing to the
|
||||||
|
doublewrite buffer, aligned to an
|
||||||
|
address divisible by UNIV_PAGE_SIZE
|
||||||
|
(which is required by Windows aio) */
|
||||||
|
byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */
|
||||||
|
buf_block_t**
|
||||||
|
buf_block_arr; /* array to store pointers to the buffer
|
||||||
|
blocks which have been cached to write_buf */
|
||||||
|
};
|
||||||
|
|
||||||
/* The transaction system central memory data structure; protected by the
|
/* The transaction system central memory data structure; protected by the
|
||||||
kernel mutex */
|
kernel mutex */
|
||||||
struct trx_sys_struct{
|
struct trx_sys_struct{
|
||||||
|
@ -15,6 +15,7 @@ Created 3/26/1996 Heikki Tuuri
|
|||||||
/* Memory objects */
|
/* Memory objects */
|
||||||
typedef struct trx_struct trx_t;
|
typedef struct trx_struct trx_t;
|
||||||
typedef struct trx_sys_struct trx_sys_t;
|
typedef struct trx_sys_struct trx_sys_t;
|
||||||
|
typedef struct trx_doublewrite_struct trx_doublewrite_t;
|
||||||
typedef struct trx_sig_struct trx_sig_t;
|
typedef struct trx_sig_struct trx_sig_t;
|
||||||
typedef struct trx_rseg_struct trx_rseg_t;
|
typedef struct trx_rseg_struct trx_rseg_t;
|
||||||
typedef struct trx_undo_struct trx_undo_t;
|
typedef struct trx_undo_struct trx_undo_t;
|
||||||
|
@ -341,7 +341,9 @@ struct trx_undo_struct{
|
|||||||
have delete marked records, because of
|
have delete marked records, because of
|
||||||
a delete of a row or an update of an
|
a delete of a row or an update of an
|
||||||
indexed field; purge is then
|
indexed field; purge is then
|
||||||
necessary. */
|
necessary; also TRUE if the transaction
|
||||||
|
has updated an externally stored
|
||||||
|
field */
|
||||||
dulint trx_id; /* id of the trx assigned to the undo
|
dulint trx_id; /* id of the trx assigned to the undo
|
||||||
log */
|
log */
|
||||||
ibool dict_operation; /* TRUE if a dict operation trx */
|
ibool dict_operation; /* TRUE if a dict operation trx */
|
||||||
|
@ -9,11 +9,12 @@ Created 1/20/1994 Heikki Tuuri
|
|||||||
#ifndef univ_i
|
#ifndef univ_i
|
||||||
#define univ_i
|
#define univ_i
|
||||||
|
|
||||||
#undef UNIV_INTEL_X86
|
#if (defined(_WIN32) || defined(_WIN64))
|
||||||
|
|
||||||
#if (defined(_WIN32) || defined(_WIN64)) && !defined(MYSQL_SERVER)
|
|
||||||
#define __WIN__
|
#define __WIN__
|
||||||
|
|
||||||
|
#ifndef MYSQL_SERVER
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/* If you want to check for errors with compiler level -W4,
|
/* If you want to check for errors with compiler level -W4,
|
||||||
comment out the above include of windows.h and let the following defines
|
comment out the above include of windows.h and let the following defines
|
||||||
@ -40,10 +41,8 @@ subdirectory of 'mysql'. */
|
|||||||
#include <global.h>
|
#include <global.h>
|
||||||
#include <my_pthread.h>
|
#include <my_pthread.h>
|
||||||
|
|
||||||
#ifndef __WIN__
|
|
||||||
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
|
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef PACKAGE
|
#undef PACKAGE
|
||||||
#undef VERSION
|
#undef VERSION
|
||||||
@ -63,19 +62,21 @@ subdirectory of 'mysql'. */
|
|||||||
|
|
||||||
/* DEBUG VERSION CONTROL
|
/* DEBUG VERSION CONTROL
|
||||||
===================== */
|
===================== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
#define UNIV_SYNC_DEBUG
|
||||||
|
*/
|
||||||
|
|
||||||
/* Make a non-inline debug version */
|
/* Make a non-inline debug version */
|
||||||
/*
|
/*
|
||||||
#define UNIV_DEBUG
|
#define UNIV_DEBUG
|
||||||
#define UNIV_MEM_DEBUG
|
#define UNIV_MEM_DEBUG
|
||||||
#define UNIV_SYNC_DEBUG
|
|
||||||
#define UNIV_SEARCH_DEBUG
|
#define UNIV_SEARCH_DEBUG
|
||||||
|
|
||||||
#define UNIV_IBUF_DEBUG
|
#define UNIV_IBUF_DEBUG
|
||||||
|
|
||||||
#define UNIV_SYNC_PERF_STAT
|
#define UNIV_SYNC_PERF_STAT
|
||||||
#define UNIV_SEARCH_PERF_STAT
|
#define UNIV_SEARCH_PERF_STAT
|
||||||
|
|
||||||
#define UNIV_DEBUG_FILE_ACCESSES
|
|
||||||
*/
|
*/
|
||||||
#define UNIV_LIGHT_MEM_DEBUG
|
#define UNIV_LIGHT_MEM_DEBUG
|
||||||
|
|
||||||
@ -192,6 +193,13 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
|
|||||||
has the SQL NULL as its value. */
|
has the SQL NULL as its value. */
|
||||||
#define UNIV_SQL_NULL ULINT_UNDEFINED
|
#define UNIV_SQL_NULL ULINT_UNDEFINED
|
||||||
|
|
||||||
|
/* Lengths which are not UNIV_SQL_NULL, but bigger than the following
|
||||||
|
number indicate that a field contains a reference to an externally
|
||||||
|
stored part of the field in the tablespace. The length field then
|
||||||
|
contains the sum of the following flag and the locally stored len. */
|
||||||
|
|
||||||
|
#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
|
||||||
|
|
||||||
/* The following definition of __FILE__ removes compiler warnings
|
/* The following definition of __FILE__ removes compiler warnings
|
||||||
associated with const char* / char* mismatches with __FILE__ */
|
associated with const char* / char* mismatches with __FILE__ */
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ extern ulint* ut_dbg_null_ptr;
|
|||||||
}\
|
}\
|
||||||
if (ut_dbg_stop_threads) {\
|
if (ut_dbg_stop_threads) {\
|
||||||
fprintf(stderr,\
|
fprintf(stderr,\
|
||||||
"Innobase: Thread %lu stopped in file %s line %lu\n",\
|
"InnoDB: Thread %lu stopped in file %s line %lu\n",\
|
||||||
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
|
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
|
||||||
os_thread_sleep(1000000000);\
|
os_thread_sleep(1000000000);\
|
||||||
}\
|
}\
|
||||||
@ -50,19 +50,17 @@ extern ulint* ut_dbg_null_ptr;
|
|||||||
#define ut_error {\
|
#define ut_error {\
|
||||||
ulint dbg_i;\
|
ulint dbg_i;\
|
||||||
fprintf(stderr,\
|
fprintf(stderr,\
|
||||||
"Innobase: Assertion failure in thread %lu in file %s line %lu\n",\
|
"InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\
|
||||||
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
|
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
|
||||||
fprintf(stderr,\
|
fprintf(stderr,\
|
||||||
"Innobase: we intentionally generate a memory trap.\n");\
|
"InnoDB: We intentionally generate a memory trap.\n");\
|
||||||
fprintf(stderr,\
|
fprintf(stderr,\
|
||||||
"Innobase: Send a bug report to mysql@lists.mysql.com\n");\
|
"InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\
|
||||||
ut_dbg_stop_threads = TRUE;\
|
ut_dbg_stop_threads = TRUE;\
|
||||||
dbg_i = *(ut_dbg_null_ptr);\
|
dbg_i = *(ut_dbg_null_ptr);\
|
||||||
printf("%lu", dbg_i);\
|
printf("%lu", dbg_i);\
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef UNIV_DEBUG
|
#ifdef UNIV_DEBUG
|
||||||
#define ut_ad(EXPR) ut_a(EXPR)
|
#define ut_ad(EXPR) ut_a(EXPR)
|
||||||
#define ut_d(EXPR) {EXPR;}
|
#define ut_d(EXPR) {EXPR;}
|
||||||
|
@ -11,8 +11,7 @@ Created 1/20/1994 Heikki Tuuri
|
|||||||
|
|
||||||
#include "univ.i"
|
#include "univ.i"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <m_ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
|
|
||||||
typedef time_t ib_time_t;
|
typedef time_t ib_time_t;
|
||||||
|
|
||||||
|
@ -3219,6 +3219,7 @@ lock_rec_print(
|
|||||||
ulint space;
|
ulint space;
|
||||||
ulint page_no;
|
ulint page_no;
|
||||||
ulint i;
|
ulint i;
|
||||||
|
ulint count = 0;
|
||||||
mtr_t mtr;
|
mtr_t mtr;
|
||||||
|
|
||||||
ut_ad(mutex_own(&kernel_mutex));
|
ut_ad(mutex_own(&kernel_mutex));
|
||||||
@ -3230,7 +3231,8 @@ lock_rec_print(
|
|||||||
printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu",
|
printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu",
|
||||||
space, page_no, lock_rec_get_n_bits(lock));
|
space, page_no, lock_rec_get_n_bits(lock));
|
||||||
|
|
||||||
printf(" index %s trx id %lu %lu", (lock->index)->name,
|
printf(" table %s index %s trx id %lu %lu",
|
||||||
|
lock->index->table->name, lock->index->name,
|
||||||
(lock->trx)->id.high, (lock->trx)->id.low);
|
(lock->trx)->id.high, (lock->trx)->id.low);
|
||||||
|
|
||||||
if (lock_get_mode(lock) == LOCK_S) {
|
if (lock_get_mode(lock) == LOCK_S) {
|
||||||
@ -3281,10 +3283,18 @@ lock_rec_print(
|
|||||||
rec_print(page_find_rec_with_heap_no(page, i));
|
rec_print(page_find_rec_with_heap_no(page, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
count++;
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
if (count >= 3) {
|
||||||
|
printf(
|
||||||
|
"3 LOCKS PRINTED FOR THIS TRX AND PAGE: SUPPRESSING FURTHER PRINTS\n");
|
||||||
|
goto end_prints;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_prints:
|
||||||
mtr_commit(&mtr);
|
mtr_commit(&mtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3335,7 +3345,6 @@ lock_print_info(void)
|
|||||||
|
|
||||||
lock_mutex_enter_kernel();
|
lock_mutex_enter_kernel();
|
||||||
|
|
||||||
printf("------------------------------------\n");
|
|
||||||
printf("LOCK INFO:\n");
|
printf("LOCK INFO:\n");
|
||||||
printf("Number of locks in the record hash table %lu\n",
|
printf("Number of locks in the record hash table %lu\n",
|
||||||
lock_get_n_rec_locks());
|
lock_get_n_rec_locks());
|
||||||
@ -3352,7 +3361,7 @@ loop:
|
|||||||
if (trx == NULL) {
|
if (trx == NULL) {
|
||||||
lock_mutex_exit_kernel();
|
lock_mutex_exit_kernel();
|
||||||
|
|
||||||
lock_validate();
|
/* lock_validate(); */
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -3360,6 +3369,19 @@ loop:
|
|||||||
if (nth_lock == 0) {
|
if (nth_lock == 0) {
|
||||||
printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high,
|
printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high,
|
||||||
trx->id.low);
|
trx->id.low);
|
||||||
|
if (trx->que_state == TRX_QUE_LOCK_WAIT) {
|
||||||
|
printf(
|
||||||
|
"################# TRX IS WAITING FOR THE LOCK: ###\n");
|
||||||
|
|
||||||
|
if (lock_get_type(trx->wait_lock) == LOCK_REC) {
|
||||||
|
lock_rec_print(trx->wait_lock);
|
||||||
|
} else {
|
||||||
|
lock_table_print(trx->wait_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(
|
||||||
|
"##################################################\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
@ -3409,6 +3431,16 @@ loop:
|
|||||||
|
|
||||||
nth_lock++;
|
nth_lock++;
|
||||||
|
|
||||||
|
if (nth_lock >= 25) {
|
||||||
|
printf(
|
||||||
|
"25 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n");
|
||||||
|
|
||||||
|
nth_trx++;
|
||||||
|
nth_lock = 0;
|
||||||
|
|
||||||
|
goto loop;
|
||||||
|
}
|
||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -838,7 +838,9 @@ log_io_complete(
|
|||||||
/* It was a checkpoint write */
|
/* It was a checkpoint write */
|
||||||
group = (log_group_t*)((ulint)group - 1);
|
group = (log_group_t*)((ulint)group - 1);
|
||||||
|
|
||||||
if (srv_unix_file_flush_method == SRV_UNIX_LITTLESYNC) {
|
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
|
||||||
|
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
|
||||||
|
|
||||||
fil_flush(group->space_id);
|
fil_flush(group->space_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -847,7 +849,9 @@ log_io_complete(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (srv_unix_file_flush_method == SRV_UNIX_LITTLESYNC) {
|
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
|
||||||
|
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
|
||||||
|
|
||||||
fil_flush(group->space_id);
|
fil_flush(group->space_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1478,7 +1482,7 @@ log_checkpoint(
|
|||||||
recv_apply_hashed_log_recs(TRUE);
|
recv_apply_hashed_log_recs(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (srv_unix_file_flush_method == SRV_UNIX_LITTLESYNC) {
|
if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
|
||||||
fil_flush_file_spaces(FIL_TABLESPACE);
|
fil_flush_file_spaces(FIL_TABLESPACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1885,10 +1889,11 @@ loop:
|
|||||||
fil_reserve_right_to_open();
|
fil_reserve_right_to_open();
|
||||||
|
|
||||||
file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
|
file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
|
||||||
&ret);
|
OS_DATA_FILE, &ret);
|
||||||
|
|
||||||
if (!ret && (open_mode == OS_FILE_CREATE)) {
|
if (!ret && (open_mode == OS_FILE_CREATE)) {
|
||||||
file_handle = os_file_create(name, OS_FILE_OPEN,
|
file_handle = os_file_create(name, OS_FILE_OPEN,
|
||||||
OS_FILE_AIO, &ret);
|
OS_FILE_AIO, OS_DATA_FILE, &ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
@ -2234,7 +2234,8 @@ try_open_again:
|
|||||||
|
|
||||||
fil_reserve_right_to_open();
|
fil_reserve_right_to_open();
|
||||||
|
|
||||||
file_handle = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
|
file_handle = os_file_create(name, OS_FILE_OPEN,
|
||||||
|
OS_FILE_LOG, OS_FILE_AIO, &ret);
|
||||||
|
|
||||||
if (ret == FALSE) {
|
if (ret == FALSE) {
|
||||||
fil_release_right_to_open();
|
fil_release_right_to_open();
|
||||||
|
@ -10,6 +10,7 @@ Created 10/21/1995 Heikki Tuuri
|
|||||||
#include "os0sync.h"
|
#include "os0sync.h"
|
||||||
#include "ut0mem.h"
|
#include "ut0mem.h"
|
||||||
#include "srv0srv.h"
|
#include "srv0srv.h"
|
||||||
|
#include "trx0sys.h"
|
||||||
|
|
||||||
#undef HAVE_FDATASYNC
|
#undef HAVE_FDATASYNC
|
||||||
|
|
||||||
@ -74,9 +75,12 @@ typedef struct os_aio_array_struct os_aio_array_t;
|
|||||||
|
|
||||||
struct os_aio_array_struct{
|
struct os_aio_array_struct{
|
||||||
os_mutex_t mutex; /* the mutex protecting the aio array */
|
os_mutex_t mutex; /* the mutex protecting the aio array */
|
||||||
os_event_t not_full; /* The event which is set to signaled
|
os_event_t not_full; /* The event which is set to the signaled
|
||||||
state when there is space in the aio
|
state when there is space in the aio
|
||||||
outside the ibuf segment */
|
outside the ibuf segment */
|
||||||
|
os_event_t is_empty; /* The event which is set to the signaled
|
||||||
|
state when there are no pending i/os
|
||||||
|
in this array */
|
||||||
ulint n_slots; /* Total number of slots in the aio array.
|
ulint n_slots; /* Total number of slots in the aio array.
|
||||||
This must be divisible by n_threads. */
|
This must be divisible by n_threads. */
|
||||||
ulint n_segments;/* Number of segments in the aio array of
|
ulint n_segments;/* Number of segments in the aio array of
|
||||||
@ -254,6 +258,7 @@ os_file_create(
|
|||||||
if a new is created or an old overwritten */
|
if a new is created or an old overwritten */
|
||||||
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
|
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
|
||||||
is desired, OS_FILE_NORMAL, if any normal file */
|
is desired, OS_FILE_NORMAL, if any normal file */
|
||||||
|
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
|
||||||
ibool* success)/* out: TRUE if succeed, FALSE if error */
|
ibool* success)/* out: TRUE if succeed, FALSE if error */
|
||||||
{
|
{
|
||||||
#ifdef __WIN__
|
#ifdef __WIN__
|
||||||
@ -347,11 +352,10 @@ try_again:
|
|||||||
|
|
||||||
UT_NOT_USED(purpose);
|
UT_NOT_USED(purpose);
|
||||||
|
|
||||||
/* Currently use only O_SYNC because there may be a bug in
|
|
||||||
Linux O_DSYNC! */
|
|
||||||
|
|
||||||
#ifdef O_SYNC
|
#ifdef O_SYNC
|
||||||
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
|
if ((!srv_use_doublewrite_buf || type != OS_DATA_FILE)
|
||||||
|
&& srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
|
||||||
|
|
||||||
create_flag = create_flag | O_SYNC;
|
create_flag = create_flag | O_SYNC;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -551,12 +555,6 @@ os_file_flush(
|
|||||||
#else
|
#else
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
#ifdef O_DSYNC
|
|
||||||
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
|
|
||||||
return(TRUE);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_FDATASYNC
|
#ifdef HAVE_FDATASYNC
|
||||||
ret = fdatasync(file);
|
ret = fdatasync(file);
|
||||||
#else
|
#else
|
||||||
@ -637,7 +635,8 @@ os_file_pwrite(
|
|||||||
ret = pwrite(file, buf, n, offs);
|
ret = pwrite(file, buf, n, offs);
|
||||||
|
|
||||||
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
|
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
|
||||||
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
|
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
|
||||||
|
&& !trx_doublewrite) {
|
||||||
|
|
||||||
/* Always do fsync to reduce the probability that when
|
/* Always do fsync to reduce the probability that when
|
||||||
the OS crashes, a database page is only partially
|
the OS crashes, a database page is only partially
|
||||||
@ -666,7 +665,8 @@ os_file_pwrite(
|
|||||||
ret = write(file, buf, n);
|
ret = write(file, buf, n);
|
||||||
|
|
||||||
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
|
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
|
||||||
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
|
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
|
||||||
|
&& !trx_doublewrite) {
|
||||||
|
|
||||||
/* Always do fsync to reduce the probability that when
|
/* Always do fsync to reduce the probability that when
|
||||||
the OS crashes, a database page is only partially
|
the OS crashes, a database page is only partially
|
||||||
@ -825,7 +825,9 @@ try_again:
|
|||||||
/* Always do fsync to reduce the probability that when the OS crashes,
|
/* Always do fsync to reduce the probability that when the OS crashes,
|
||||||
a database page is only partially physically written to disk. */
|
a database page is only partially physically written to disk. */
|
||||||
|
|
||||||
ut_a(TRUE == os_file_flush(file));
|
if (!trx_doublewrite) {
|
||||||
|
ut_a(TRUE == os_file_flush(file));
|
||||||
|
}
|
||||||
|
|
||||||
os_mutex_exit(os_file_seek_mutexes[i]);
|
os_mutex_exit(os_file_seek_mutexes[i]);
|
||||||
|
|
||||||
@ -900,6 +902,10 @@ os_aio_array_create(
|
|||||||
|
|
||||||
array->mutex = os_mutex_create(NULL);
|
array->mutex = os_mutex_create(NULL);
|
||||||
array->not_full = os_event_create(NULL);
|
array->not_full = os_event_create(NULL);
|
||||||
|
array->is_empty = os_event_create(NULL);
|
||||||
|
|
||||||
|
os_event_set(array->is_empty);
|
||||||
|
|
||||||
array->n_slots = n;
|
array->n_slots = n;
|
||||||
array->n_segments = n_segments;
|
array->n_segments = n_segments;
|
||||||
array->n_reserved = 0;
|
array->n_reserved = 0;
|
||||||
@ -999,6 +1005,17 @@ os_aio_init(
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
Waits until there are no pending writes in os_aio_write_array. There can
|
||||||
|
be other, synchronous, pending writes. */
|
||||||
|
|
||||||
|
void
|
||||||
|
os_aio_wait_until_no_pending_writes(void)
|
||||||
|
/*=====================================*/
|
||||||
|
{
|
||||||
|
os_event_wait(os_aio_write_array->is_empty);
|
||||||
|
}
|
||||||
|
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Calculates segment number for a slot. */
|
Calculates segment number for a slot. */
|
||||||
static
|
static
|
||||||
@ -1191,6 +1208,10 @@ loop:
|
|||||||
|
|
||||||
array->n_reserved++;
|
array->n_reserved++;
|
||||||
|
|
||||||
|
if (array->n_reserved == 1) {
|
||||||
|
os_event_reset(array->is_empty);
|
||||||
|
}
|
||||||
|
|
||||||
if (array->n_reserved == array->n_slots) {
|
if (array->n_reserved == array->n_slots) {
|
||||||
os_event_reset(array->not_full);
|
os_event_reset(array->not_full);
|
||||||
}
|
}
|
||||||
@ -1264,6 +1285,10 @@ os_aio_array_free_slot(
|
|||||||
os_event_set(array->not_full);
|
os_event_set(array->not_full);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (array->n_reserved == 0) {
|
||||||
|
os_event_set(array->is_empty);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WIN_ASYNC_IO
|
#ifdef WIN_ASYNC_IO
|
||||||
os_event_reset(slot->control.hEvent);
|
os_event_reset(slot->control.hEvent);
|
||||||
#endif
|
#endif
|
||||||
@ -1377,6 +1402,7 @@ os_aio(
|
|||||||
DWORD len = n;
|
DWORD len = n;
|
||||||
void* dummy_mess1;
|
void* dummy_mess1;
|
||||||
void* dummy_mess2;
|
void* dummy_mess2;
|
||||||
|
ulint dummy_type;
|
||||||
#endif
|
#endif
|
||||||
ulint err = 0;
|
ulint err = 0;
|
||||||
ibool retry;
|
ibool retry;
|
||||||
@ -1489,8 +1515,9 @@ try_again:
|
|||||||
use the same wait mechanism as for async i/o */
|
use the same wait mechanism as for async i/o */
|
||||||
|
|
||||||
return(os_aio_windows_handle(ULINT_UNDEFINED,
|
return(os_aio_windows_handle(ULINT_UNDEFINED,
|
||||||
slot->pos,
|
slot->pos,
|
||||||
&dummy_mess1, &dummy_mess2));
|
&dummy_mess1, &dummy_mess2,
|
||||||
|
&dummy_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
@ -1547,7 +1574,8 @@ os_aio_windows_handle(
|
|||||||
the aio operation failed, these output
|
the aio operation failed, these output
|
||||||
parameters are valid and can be used to
|
parameters are valid and can be used to
|
||||||
restart the operation, for example */
|
restart the operation, for example */
|
||||||
void** message2)
|
void** message2,
|
||||||
|
ulint* type) /* out: OS_FILE_WRITE or ..._READ */
|
||||||
{
|
{
|
||||||
os_aio_array_t* array;
|
os_aio_array_t* array;
|
||||||
os_aio_slot_t* slot;
|
os_aio_slot_t* slot;
|
||||||
@ -1592,10 +1620,12 @@ os_aio_windows_handle(
|
|||||||
*message1 = slot->message1;
|
*message1 = slot->message1;
|
||||||
*message2 = slot->message2;
|
*message2 = slot->message2;
|
||||||
|
|
||||||
|
*type = slot->type;
|
||||||
|
|
||||||
if (ret && len == slot->len) {
|
if (ret && len == slot->len) {
|
||||||
ret_val = TRUE;
|
ret_val = TRUE;
|
||||||
|
|
||||||
if (slot->type == OS_FILE_WRITE) {
|
if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
|
||||||
ut_a(TRUE == os_file_flush(slot->file));
|
ut_a(TRUE == os_file_flush(slot->file));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1679,7 +1709,7 @@ os_aio_posix_handle(
|
|||||||
*message1 = slot->message1;
|
*message1 = slot->message1;
|
||||||
*message2 = slot->message2;
|
*message2 = slot->message2;
|
||||||
|
|
||||||
if (slot->type == OS_FILE_WRITE) {
|
if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
|
||||||
ut_a(TRUE == os_file_flush(slot->file));
|
ut_a(TRUE == os_file_flush(slot->file));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1709,7 +1739,8 @@ os_aio_simulated_handle(
|
|||||||
the aio operation failed, these output
|
the aio operation failed, these output
|
||||||
parameters are valid and can be used to
|
parameters are valid and can be used to
|
||||||
restart the operation, for example */
|
restart the operation, for example */
|
||||||
void** message2)
|
void** message2,
|
||||||
|
ulint* type) /* out: OS_FILE_WRITE or ..._READ */
|
||||||
{
|
{
|
||||||
os_aio_array_t* array;
|
os_aio_array_t* array;
|
||||||
ulint segment;
|
ulint segment;
|
||||||
@ -1906,6 +1937,8 @@ slot_io_done:
|
|||||||
*message1 = slot->message1;
|
*message1 = slot->message1;
|
||||||
*message2 = slot->message2;
|
*message2 = slot->message2;
|
||||||
|
|
||||||
|
*type = slot->type;
|
||||||
|
|
||||||
os_mutex_exit(array->mutex);
|
os_mutex_exit(array->mutex);
|
||||||
|
|
||||||
os_aio_array_free_slot(array, slot);
|
os_aio_array_free_slot(array, slot);
|
||||||
@ -1989,13 +2022,13 @@ os_aio_print(void)
|
|||||||
os_aio_slot_t* slot;
|
os_aio_slot_t* slot;
|
||||||
ulint n_reserved;
|
ulint n_reserved;
|
||||||
ulint i;
|
ulint i;
|
||||||
|
|
||||||
|
printf("Pending normal aio reads:\n");
|
||||||
|
|
||||||
array = os_aio_read_array;
|
array = os_aio_read_array;
|
||||||
loop:
|
loop:
|
||||||
ut_a(array);
|
ut_a(array);
|
||||||
|
|
||||||
printf("INFO OF AN AIO ARRAY\n");
|
|
||||||
|
|
||||||
os_mutex_enter(array->mutex);
|
os_mutex_enter(array->mutex);
|
||||||
|
|
||||||
ut_a(array->n_slots > 0);
|
ut_a(array->n_slots > 0);
|
||||||
@ -2022,24 +2055,29 @@ loop:
|
|||||||
os_mutex_exit(array->mutex);
|
os_mutex_exit(array->mutex);
|
||||||
|
|
||||||
if (array == os_aio_read_array) {
|
if (array == os_aio_read_array) {
|
||||||
|
printf("Pending aio writes:\n");
|
||||||
|
|
||||||
array = os_aio_write_array;
|
array = os_aio_write_array;
|
||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (array == os_aio_write_array) {
|
if (array == os_aio_write_array) {
|
||||||
|
printf("Pending insert buffer aio reads:\n");
|
||||||
array = os_aio_ibuf_array;
|
array = os_aio_ibuf_array;
|
||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (array == os_aio_ibuf_array) {
|
if (array == os_aio_ibuf_array) {
|
||||||
|
printf("Pending log writes or reads:\n");
|
||||||
array = os_aio_log_array;
|
array = os_aio_log_array;
|
||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (array == os_aio_log_array) {
|
if (array == os_aio_log_array) {
|
||||||
|
printf("Pending synchronous reads or writes:\n");
|
||||||
array = os_aio_sync_array;
|
array = os_aio_sync_array;
|
||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
|
@ -1019,16 +1019,16 @@ page_cur_delete_rec(
|
|||||||
page_cur_t* cursor, /* in: a page cursor */
|
page_cur_t* cursor, /* in: a page cursor */
|
||||||
mtr_t* mtr) /* in: mini-transaction handle */
|
mtr_t* mtr) /* in: mini-transaction handle */
|
||||||
{
|
{
|
||||||
|
page_dir_slot_t* cur_dir_slot;
|
||||||
|
page_dir_slot_t* prev_slot;
|
||||||
page_t* page;
|
page_t* page;
|
||||||
rec_t* current_rec;
|
rec_t* current_rec;
|
||||||
rec_t* prev_rec = NULL;
|
rec_t* prev_rec = NULL;
|
||||||
rec_t* next_rec;
|
rec_t* next_rec;
|
||||||
ulint cur_slot_no;
|
ulint cur_slot_no;
|
||||||
page_dir_slot_t* cur_dir_slot;
|
|
||||||
page_dir_slot_t* prev_slot;
|
|
||||||
ulint cur_n_owned;
|
ulint cur_n_owned;
|
||||||
rec_t* rec;
|
rec_t* rec;
|
||||||
|
|
||||||
ut_ad(cursor && mtr);
|
ut_ad(cursor && mtr);
|
||||||
|
|
||||||
page = page_cur_get_page(cursor);
|
page = page_cur_get_page(cursor);
|
||||||
@ -1037,7 +1037,7 @@ page_cur_delete_rec(
|
|||||||
/* The record must not be the supremum or infimum record. */
|
/* The record must not be the supremum or infimum record. */
|
||||||
ut_ad(current_rec != page_get_supremum_rec(page));
|
ut_ad(current_rec != page_get_supremum_rec(page));
|
||||||
ut_ad(current_rec != page_get_infimum_rec(page));
|
ut_ad(current_rec != page_get_infimum_rec(page));
|
||||||
|
|
||||||
/* Save to local variables some data associated with current_rec */
|
/* Save to local variables some data associated with current_rec */
|
||||||
cur_slot_no = page_dir_find_owner_slot(current_rec);
|
cur_slot_no = page_dir_find_owner_slot(current_rec);
|
||||||
cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
|
cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
|
||||||
|
@ -2028,11 +2028,7 @@ pars_complete_graph_for_exec(
|
|||||||
|
|
||||||
que_node_set_parent(node, thr);
|
que_node_set_parent(node, thr);
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
|
||||||
|
|
||||||
trx->graph = NULL;
|
trx->graph = NULL;
|
||||||
|
|
||||||
mutex_exit(&kernel_mutex);
|
|
||||||
|
|
||||||
return(thr);
|
return(thr);
|
||||||
}
|
}
|
||||||
|
@ -295,14 +295,18 @@ This function is used to compare a data tuple to a physical record.
|
|||||||
Only dtuple->n_fields_cmp first fields are taken into account for
|
Only dtuple->n_fields_cmp first fields are taken into account for
|
||||||
the the data tuple! If we denote by n = n_fields_cmp, then rec must
|
the the data tuple! If we denote by n = n_fields_cmp, then rec must
|
||||||
have either m >= n fields, or it must differ from dtuple in some of
|
have either m >= n fields, or it must differ from dtuple in some of
|
||||||
the m fields rec has. */
|
the m fields rec has. If rec has an externally stored field we do not
|
||||||
|
compare it but return with value 0 if such a comparison should be
|
||||||
|
made. */
|
||||||
|
|
||||||
int
|
int
|
||||||
cmp_dtuple_rec_with_match(
|
cmp_dtuple_rec_with_match(
|
||||||
/*======================*/
|
/*======================*/
|
||||||
/* out: 1, 0, -1, if dtuple is greater, equal,
|
/* out: 1, 0, -1, if dtuple is greater, equal,
|
||||||
less than rec, respectively, when only the
|
less than rec, respectively, when only the
|
||||||
common first fields are compared */
|
common first fields are compared, or
|
||||||
|
until the first externally stored field in
|
||||||
|
rec */
|
||||||
dtuple_t* dtuple, /* in: data tuple */
|
dtuple_t* dtuple, /* in: data tuple */
|
||||||
rec_t* rec, /* in: physical record which differs from
|
rec_t* rec, /* in: physical record which differs from
|
||||||
dtuple in some of the common fields, or which
|
dtuple in some of the common fields, or which
|
||||||
@ -344,7 +348,8 @@ cmp_dtuple_rec_with_match(
|
|||||||
ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
|
ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
|
||||||
ut_ad(cur_field <= rec_get_n_fields(rec));
|
ut_ad(cur_field <= rec_get_n_fields(rec));
|
||||||
|
|
||||||
/* Match fields in a loop; stop if we run out of fields in dtuple */
|
/* Match fields in a loop; stop if we run out of fields in dtuple
|
||||||
|
or find an externally stored field */
|
||||||
|
|
||||||
while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
|
while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
|
||||||
|
|
||||||
@ -357,7 +362,8 @@ cmp_dtuple_rec_with_match(
|
|||||||
|
|
||||||
/* If we have matched yet 0 bytes, it may be that one or
|
/* If we have matched yet 0 bytes, it may be that one or
|
||||||
both the fields are SQL null, or the record or dtuple may be
|
both the fields are SQL null, or the record or dtuple may be
|
||||||
the predefined minimum record */
|
the predefined minimum record, or the field is externally
|
||||||
|
stored */
|
||||||
|
|
||||||
if (cur_bytes == 0) {
|
if (cur_bytes == 0) {
|
||||||
if (cur_field == 0) {
|
if (cur_field == 0) {
|
||||||
@ -384,6 +390,15 @@ cmp_dtuple_rec_with_match(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rec_get_nth_field_extern_bit(rec, cur_field)) {
|
||||||
|
/* We do not compare to an externally
|
||||||
|
stored field */
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
goto order_resolved;
|
||||||
|
}
|
||||||
|
|
||||||
if (dtuple_f_len == UNIV_SQL_NULL
|
if (dtuple_f_len == UNIV_SQL_NULL
|
||||||
|| rec_f_len == UNIV_SQL_NULL) {
|
|| rec_f_len == UNIV_SQL_NULL) {
|
||||||
|
|
||||||
@ -604,7 +619,8 @@ cmp_dtuple_rec_prefix_equal(
|
|||||||
|
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
This function is used to compare two physical records. Only the common
|
This function is used to compare two physical records. Only the common
|
||||||
first fields are compared. */
|
first fields are compared, and if an externally stored field is
|
||||||
|
encountered, then 0 is returned. */
|
||||||
|
|
||||||
int
|
int
|
||||||
cmp_rec_rec_with_match(
|
cmp_rec_rec_with_match(
|
||||||
@ -688,8 +704,18 @@ cmp_rec_rec_with_match(
|
|||||||
|
|
||||||
goto order_resolved;
|
goto order_resolved;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rec_get_nth_field_extern_bit(rec1, cur_field)
|
||||||
|
|| rec_get_nth_field_extern_bit(rec2, cur_field)) {
|
||||||
|
/* We do not compare to an externally
|
||||||
|
stored field */
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
goto order_resolved;
|
||||||
|
}
|
||||||
|
|
||||||
if (rec1_f_len == UNIV_SQL_NULL
|
if (rec1_f_len == UNIV_SQL_NULL
|
||||||
|| rec2_f_len == UNIV_SQL_NULL) {
|
|| rec2_f_len == UNIV_SQL_NULL) {
|
||||||
|
|
||||||
@ -812,7 +838,8 @@ order_resolved:
|
|||||||
Used in debug checking of cmp_dtuple_... .
|
Used in debug checking of cmp_dtuple_... .
|
||||||
This function is used to compare a data tuple to a physical record. If
|
This function is used to compare a data tuple to a physical record. If
|
||||||
dtuple has n fields then rec must have either m >= n fields, or it must
|
dtuple has n fields then rec must have either m >= n fields, or it must
|
||||||
differ from dtuple in some of the m fields rec has. */
|
differ from dtuple in some of the m fields rec has. If encounters an
|
||||||
|
externally stored field, returns 0. */
|
||||||
static
|
static
|
||||||
int
|
int
|
||||||
cmp_debug_dtuple_rec_with_match(
|
cmp_debug_dtuple_rec_with_match(
|
||||||
@ -882,6 +909,14 @@ cmp_debug_dtuple_rec_with_match(
|
|||||||
|
|
||||||
rec_f_data = rec_get_nth_field(rec, cur_field, &rec_f_len);
|
rec_f_data = rec_get_nth_field(rec, cur_field, &rec_f_len);
|
||||||
|
|
||||||
|
if (rec_get_nth_field_extern_bit(rec, cur_field)) {
|
||||||
|
/* We do not compare to an externally stored field */
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
goto order_resolved;
|
||||||
|
}
|
||||||
|
|
||||||
ret = cmp_data_data(cur_type, dtuple_f_data, dtuple_f_len,
|
ret = cmp_data_data(cur_type, dtuple_f_data, dtuple_f_len,
|
||||||
rec_f_data, rec_f_len);
|
rec_f_data, rec_f_len);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/************************************************************************
|
/************************************************************************
|
||||||
Record manager
|
Record manager
|
||||||
|
|
||||||
(c) 1994-1996 Innobase Oy
|
(c) 1994-2001 Innobase Oy
|
||||||
|
|
||||||
Created 5/30/1994 Heikki Tuuri
|
Created 5/30/1994 Heikki Tuuri
|
||||||
*************************************************************************/
|
*************************************************************************/
|
||||||
@ -12,6 +12,9 @@ Created 5/30/1994 Heikki Tuuri
|
|||||||
#include "rem0rec.ic"
|
#include "rem0rec.ic"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "mtr0mtr.h"
|
||||||
|
#include "mtr0log.h"
|
||||||
|
|
||||||
/* PHYSICAL RECORD
|
/* PHYSICAL RECORD
|
||||||
===============
|
===============
|
||||||
|
|
||||||
@ -21,7 +24,10 @@ found in index pages of the database, has the following format
|
|||||||
represented on a higher text line):
|
represented on a higher text line):
|
||||||
|
|
||||||
| offset of the end of the last field of data, the most significant
|
| offset of the end of the last field of data, the most significant
|
||||||
bit is set to 1 if and only if the field is SQL-null |
|
bit is set to 1 if and only if the field is SQL-null,
|
||||||
|
if the offset is 2-byte, then the second most significant
|
||||||
|
bit is set to 1 if the field is stored on another page:
|
||||||
|
mostly this will occur in the case of big BLOB fields |
|
||||||
...
|
...
|
||||||
| offset of the end of the first field of data + the SQL-null bit |
|
| offset of the end of the first field of data + the SQL-null bit |
|
||||||
| 4 bits used to delete mark a record, and mark a predefined
|
| 4 bits used to delete mark a record, and mark a predefined
|
||||||
@ -122,7 +128,8 @@ rec_get_nth_field(
|
|||||||
return(rec + os);
|
return(rec + os);
|
||||||
}
|
}
|
||||||
|
|
||||||
next_os = next_os & ~REC_2BYTE_SQL_NULL_MASK;
|
next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
|
||||||
|
| REC_2BYTE_EXTERN_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
*len = next_os - os;
|
*len = next_os - os;
|
||||||
@ -170,6 +177,60 @@ rec_set_nth_field_null_bit(
|
|||||||
rec_2_set_field_end_info(rec, i, info);
|
rec_2_set_field_end_info(rec, i, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************
|
||||||
|
Sets the value of the ith field extern storage bit. */
|
||||||
|
|
||||||
|
void
|
||||||
|
rec_set_nth_field_extern_bit(
|
||||||
|
/*=========================*/
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint i, /* in: ith field */
|
||||||
|
ibool val, /* in: value to set */
|
||||||
|
mtr_t* mtr) /* in: mtr holding an X-latch to the page where
|
||||||
|
rec is, or NULL; in the NULL case we do not
|
||||||
|
write to log about the change */
|
||||||
|
{
|
||||||
|
ulint info;
|
||||||
|
|
||||||
|
ut_a(!rec_get_1byte_offs_flag(rec));
|
||||||
|
ut_a(i < rec_get_n_fields(rec));
|
||||||
|
|
||||||
|
info = rec_2_get_field_end_info(rec, i);
|
||||||
|
|
||||||
|
if (val) {
|
||||||
|
info = info | REC_2BYTE_EXTERN_MASK;
|
||||||
|
} else {
|
||||||
|
info = info & ~REC_2BYTE_EXTERN_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mtr) {
|
||||||
|
mlog_write_ulint(rec - REC_N_EXTRA_BYTES - 2 * (i + 1), info,
|
||||||
|
MLOG_2BYTES, mtr);
|
||||||
|
} else {
|
||||||
|
rec_2_set_field_end_info(rec, i, info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************
|
||||||
|
Sets TRUE the extern storage bits of fields mentioned in an array. */
|
||||||
|
|
||||||
|
void
|
||||||
|
rec_set_field_extern_bits(
|
||||||
|
/*======================*/
|
||||||
|
rec_t* rec, /* in: record */
|
||||||
|
ulint* vec, /* in: array of field numbers */
|
||||||
|
ulint n_fields, /* in: number of fields numbers */
|
||||||
|
mtr_t* mtr) /* in: mtr holding an X-latch to the page
|
||||||
|
where rec is, or NULL; in the NULL case we
|
||||||
|
do not write to log about the change */
|
||||||
|
{
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
for (i = 0; i < n_fields; i++) {
|
||||||
|
rec_set_nth_field_extern_bit(rec, vec[i], TRUE, mtr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Sets a record field to SQL null. The physical size of the field is not
|
Sets a record field to SQL null. The physical size of the field is not
|
||||||
changed. */
|
changed. */
|
||||||
|
@ -234,7 +234,13 @@ row_ins_clust_index_entry_by_modify(
|
|||||||
depending on whether mtr holds just a leaf
|
depending on whether mtr holds just a leaf
|
||||||
latch or also a tree latch */
|
latch or also a tree latch */
|
||||||
btr_cur_t* cursor, /* in: B-tree cursor */
|
btr_cur_t* cursor, /* in: B-tree cursor */
|
||||||
|
big_rec_t** big_rec,/* out: possible big rec vector of fields
|
||||||
|
which have to be stored externally by the
|
||||||
|
caller */
|
||||||
dtuple_t* entry, /* in: index entry to insert */
|
dtuple_t* entry, /* in: index entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
que_thr_t* thr, /* in: query thread */
|
que_thr_t* thr, /* in: query thread */
|
||||||
mtr_t* mtr) /* in: mtr */
|
mtr_t* mtr) /* in: mtr */
|
||||||
{
|
{
|
||||||
@ -243,8 +249,10 @@ row_ins_clust_index_entry_by_modify(
|
|||||||
upd_t* update;
|
upd_t* update;
|
||||||
ulint err;
|
ulint err;
|
||||||
|
|
||||||
ut_ad((cursor->index)->type & DICT_CLUSTERED);
|
ut_ad(cursor->index->type & DICT_CLUSTERED);
|
||||||
|
|
||||||
|
*big_rec = NULL;
|
||||||
|
|
||||||
rec = btr_cur_get_rec(cursor);
|
rec = btr_cur_get_rec(cursor);
|
||||||
|
|
||||||
ut_ad(rec_get_deleted_flag(rec));
|
ut_ad(rec_get_deleted_flag(rec));
|
||||||
@ -254,21 +262,21 @@ row_ins_clust_index_entry_by_modify(
|
|||||||
/* Build an update vector containing all the fields to be modified;
|
/* Build an update vector containing all the fields to be modified;
|
||||||
NOTE that this vector may contain also system columns! */
|
NOTE that this vector may contain also system columns! */
|
||||||
|
|
||||||
update = row_upd_build_difference(cursor->index, entry, rec, heap);
|
update = row_upd_build_difference(cursor->index, entry, ext_vec,
|
||||||
|
n_ext_vec, rec, heap);
|
||||||
if (mode == BTR_MODIFY_LEAF) {
|
if (mode == BTR_MODIFY_LEAF) {
|
||||||
/* Try optimistic updating of the record, keeping changes
|
/* Try optimistic updating of the record, keeping changes
|
||||||
within the page */
|
within the page */
|
||||||
|
|
||||||
err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
|
err = btr_cur_optimistic_update(0, cursor, update, 0, thr, mtr);
|
||||||
mtr);
|
|
||||||
if ((err == DB_OVERFLOW) || (err == DB_UNDERFLOW)) {
|
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
|
||||||
err = DB_FAIL;
|
err = DB_FAIL;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ut_ad(mode == BTR_MODIFY_TREE);
|
ut_a(mode == BTR_MODIFY_TREE);
|
||||||
err = btr_cur_pessimistic_update(0, cursor, update, 0, thr,
|
err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
|
||||||
mtr);
|
0, thr, mtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
@ -597,14 +605,18 @@ row_ins_index_entry_low(
|
|||||||
pessimistic descent down the index tree */
|
pessimistic descent down the index tree */
|
||||||
dict_index_t* index, /* in: index */
|
dict_index_t* index, /* in: index */
|
||||||
dtuple_t* entry, /* in: index entry to insert */
|
dtuple_t* entry, /* in: index entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
que_thr_t* thr) /* in: query thread */
|
que_thr_t* thr) /* in: query thread */
|
||||||
{
|
{
|
||||||
btr_cur_t cursor;
|
btr_cur_t cursor;
|
||||||
ulint modify;
|
ulint modify;
|
||||||
rec_t* dummy_rec;
|
rec_t* insert_rec;
|
||||||
rec_t* rec;
|
rec_t* rec;
|
||||||
ulint err;
|
ulint err;
|
||||||
ulint n_unique;
|
ulint n_unique;
|
||||||
|
big_rec_t* big_rec = NULL;
|
||||||
mtr_t mtr;
|
mtr_t mtr;
|
||||||
|
|
||||||
log_free_check();
|
log_free_check();
|
||||||
@ -682,24 +694,54 @@ row_ins_index_entry_low(
|
|||||||
|
|
||||||
if (index->type & DICT_CLUSTERED) {
|
if (index->type & DICT_CLUSTERED) {
|
||||||
err = row_ins_clust_index_entry_by_modify(mode,
|
err = row_ins_clust_index_entry_by_modify(mode,
|
||||||
&cursor, entry,
|
&cursor, &big_rec,
|
||||||
thr, &mtr);
|
entry,
|
||||||
|
ext_vec, n_ext_vec,
|
||||||
|
thr, &mtr);
|
||||||
} else {
|
} else {
|
||||||
err = row_ins_sec_index_entry_by_modify(&cursor,
|
err = row_ins_sec_index_entry_by_modify(&cursor,
|
||||||
thr, &mtr);
|
thr, &mtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (mode == BTR_MODIFY_LEAF) {
|
|
||||||
err = btr_cur_optimistic_insert(0, &cursor, entry,
|
|
||||||
&dummy_rec, thr, &mtr);
|
|
||||||
} else {
|
} else {
|
||||||
ut_ad(mode == BTR_MODIFY_TREE);
|
if (mode == BTR_MODIFY_LEAF) {
|
||||||
err = btr_cur_pessimistic_insert(0, &cursor, entry,
|
err = btr_cur_optimistic_insert(0, &cursor, entry,
|
||||||
&dummy_rec, thr, &mtr);
|
&insert_rec, &big_rec, thr, &mtr);
|
||||||
|
} else {
|
||||||
|
ut_a(mode == BTR_MODIFY_TREE);
|
||||||
|
err = btr_cur_pessimistic_insert(0, &cursor, entry,
|
||||||
|
&insert_rec, &big_rec, thr, &mtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err == DB_SUCCESS) {
|
||||||
|
if (ext_vec) {
|
||||||
|
rec_set_field_extern_bits(insert_rec,
|
||||||
|
ext_vec, n_ext_vec, &mtr);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function_exit:
|
function_exit:
|
||||||
mtr_commit(&mtr);
|
mtr_commit(&mtr);
|
||||||
|
|
||||||
|
if (big_rec) {
|
||||||
|
mtr_start(&mtr);
|
||||||
|
|
||||||
|
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
|
||||||
|
BTR_MODIFY_TREE, &cursor, 0, &mtr);
|
||||||
|
|
||||||
|
err = btr_store_big_rec_extern_fields(index,
|
||||||
|
btr_cur_get_rec(&cursor),
|
||||||
|
big_rec, &mtr);
|
||||||
|
if (modify) {
|
||||||
|
dtuple_big_rec_free(big_rec);
|
||||||
|
} else {
|
||||||
|
dtuple_convert_back_big_rec(index, entry, big_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
mtr_commit(&mtr);
|
||||||
|
}
|
||||||
|
|
||||||
return(err);
|
return(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -716,14 +758,17 @@ row_ins_index_entry(
|
|||||||
DB_DUPLICATE_KEY, or some other error code */
|
DB_DUPLICATE_KEY, or some other error code */
|
||||||
dict_index_t* index, /* in: index */
|
dict_index_t* index, /* in: index */
|
||||||
dtuple_t* entry, /* in: index entry to insert */
|
dtuple_t* entry, /* in: index entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
que_thr_t* thr) /* in: query thread */
|
que_thr_t* thr) /* in: query thread */
|
||||||
{
|
{
|
||||||
ulint err;
|
ulint err;
|
||||||
|
|
||||||
/* Try first optimistic descent to the B-tree */
|
/* Try first optimistic descent to the B-tree */
|
||||||
|
|
||||||
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, thr);
|
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
|
||||||
|
ext_vec, n_ext_vec, thr);
|
||||||
if (err != DB_FAIL) {
|
if (err != DB_FAIL) {
|
||||||
|
|
||||||
return(err);
|
return(err);
|
||||||
@ -731,8 +776,8 @@ row_ins_index_entry(
|
|||||||
|
|
||||||
/* Try then pessimistic descent to the B-tree */
|
/* Try then pessimistic descent to the B-tree */
|
||||||
|
|
||||||
err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, thr);
|
err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
|
||||||
|
ext_vec, n_ext_vec, thr);
|
||||||
return(err);
|
return(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -784,7 +829,7 @@ row_ins_index_entry_step(
|
|||||||
|
|
||||||
ut_ad(dtuple_check_typed(node->entry));
|
ut_ad(dtuple_check_typed(node->entry));
|
||||||
|
|
||||||
err = row_ins_index_entry(node->index, node->entry, thr);
|
err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
|
||||||
|
|
||||||
return(err);
|
return(err);
|
||||||
}
|
}
|
||||||
|
@ -625,7 +625,8 @@ row_update_for_mysql(
|
|||||||
|
|
||||||
ut_ad(prebuilt && trx);
|
ut_ad(prebuilt && trx);
|
||||||
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
|
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
|
||||||
|
UT_NOT_USED(mysql_rec);
|
||||||
|
|
||||||
node = prebuilt->upd_node;
|
node = prebuilt->upd_node;
|
||||||
|
|
||||||
clust_index = dict_table_get_first_index(table);
|
clust_index = dict_table_get_first_index(table);
|
||||||
@ -777,7 +778,9 @@ row_get_mysql_key_number_for_index(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Does a table creation operation for MySQL. */
|
Does a table creation operation for MySQL. If the name of the created
|
||||||
|
table ends to characters INNODB_MONITOR, then this also starts
|
||||||
|
printing of monitor output by the master thread. */
|
||||||
|
|
||||||
int
|
int
|
||||||
row_create_table_for_mysql(
|
row_create_table_for_mysql(
|
||||||
@ -789,6 +792,8 @@ row_create_table_for_mysql(
|
|||||||
tab_node_t* node;
|
tab_node_t* node;
|
||||||
mem_heap_t* heap;
|
mem_heap_t* heap;
|
||||||
que_thr_t* thr;
|
que_thr_t* thr;
|
||||||
|
ulint namelen;
|
||||||
|
ulint keywordlen;
|
||||||
ulint err;
|
ulint err;
|
||||||
|
|
||||||
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
|
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
|
||||||
@ -833,6 +838,20 @@ row_create_table_for_mysql(
|
|||||||
}
|
}
|
||||||
|
|
||||||
trx->error_state = DB_SUCCESS;
|
trx->error_state = DB_SUCCESS;
|
||||||
|
} else {
|
||||||
|
namelen = ut_strlen(table->name);
|
||||||
|
|
||||||
|
keywordlen = ut_strlen("innodb_monitor");
|
||||||
|
|
||||||
|
if (namelen >= keywordlen
|
||||||
|
&& 0 == ut_memcmp(table->name + namelen - keywordlen,
|
||||||
|
"innodb_monitor", keywordlen)) {
|
||||||
|
|
||||||
|
/* Table name ends to characters innodb_monitor:
|
||||||
|
start monitor prints */
|
||||||
|
|
||||||
|
srv_print_innodb_monitor = TRUE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_exit(&(dict_sys->mutex));
|
mutex_exit(&(dict_sys->mutex));
|
||||||
@ -900,7 +919,9 @@ row_create_index_for_mysql(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Drops a table for MySQL. */
|
Drops a table for MySQL. If the name of the dropped table ends to
|
||||||
|
characters INNODB_MONITOR, then this also stops printing of monitor
|
||||||
|
output by the master thread. */
|
||||||
|
|
||||||
int
|
int
|
||||||
row_drop_table_for_mysql(
|
row_drop_table_for_mysql(
|
||||||
@ -918,11 +939,26 @@ row_drop_table_for_mysql(
|
|||||||
char* str1;
|
char* str1;
|
||||||
char* str2;
|
char* str2;
|
||||||
ulint len;
|
ulint len;
|
||||||
|
ulint namelen;
|
||||||
|
ulint keywordlen;
|
||||||
char buf[10000];
|
char buf[10000];
|
||||||
|
|
||||||
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
|
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
|
||||||
ut_a(name != NULL);
|
ut_a(name != NULL);
|
||||||
|
|
||||||
|
namelen = ut_strlen(name);
|
||||||
|
keywordlen = ut_strlen("innodb_monitor");
|
||||||
|
|
||||||
|
if (namelen >= keywordlen
|
||||||
|
&& 0 == ut_memcmp(name + namelen - keywordlen,
|
||||||
|
"innodb_monitor", keywordlen)) {
|
||||||
|
|
||||||
|
/* Table name ends to characters innodb_monitor:
|
||||||
|
stop monitor prints */
|
||||||
|
|
||||||
|
srv_print_innodb_monitor = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
/* We use the private SQL parser of Innobase to generate the
|
/* We use the private SQL parser of Innobase to generate the
|
||||||
query graphs needed in deleting the dictionary data from system
|
query graphs needed in deleting the dictionary data from system
|
||||||
tables in Innobase. Deleting a row from SYS_INDEXES table also
|
tables in Innobase. Deleting a row from SYS_INDEXES table also
|
||||||
|
@ -347,20 +347,36 @@ row_purge_del_mark(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Purges an update of an existing record. */
|
Purges an update of an existing record. Also purges an update of a delete
|
||||||
|
marked record if that record contained an externally stored field. */
|
||||||
static
|
static
|
||||||
void
|
void
|
||||||
row_purge_upd_exist(
|
row_purge_upd_exist_or_extern(
|
||||||
/*================*/
|
/*==========================*/
|
||||||
purge_node_t* node, /* in: row purge node */
|
purge_node_t* node, /* in: row purge node */
|
||||||
que_thr_t* thr) /* in: query thread */
|
que_thr_t* thr) /* in: query thread */
|
||||||
{
|
{
|
||||||
mem_heap_t* heap;
|
mem_heap_t* heap;
|
||||||
dtuple_t* entry;
|
dtuple_t* entry;
|
||||||
dict_index_t* index;
|
dict_index_t* index;
|
||||||
|
upd_field_t* ufield;
|
||||||
|
ibool is_insert;
|
||||||
|
ulint rseg_id;
|
||||||
|
ulint page_no;
|
||||||
|
ulint offset;
|
||||||
|
ulint internal_offset;
|
||||||
|
byte* data_field;
|
||||||
|
ulint data_field_len;
|
||||||
|
ulint i;
|
||||||
|
mtr_t mtr;
|
||||||
|
|
||||||
ut_ad(node && thr);
|
ut_ad(node && thr);
|
||||||
|
|
||||||
|
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
|
||||||
|
|
||||||
|
goto skip_secondaries;
|
||||||
|
}
|
||||||
|
|
||||||
heap = mem_heap_create(1024);
|
heap = mem_heap_create(1024);
|
||||||
|
|
||||||
while (node->index != NULL) {
|
while (node->index != NULL) {
|
||||||
@ -378,6 +394,53 @@ row_purge_upd_exist(
|
|||||||
}
|
}
|
||||||
|
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
|
|
||||||
|
skip_secondaries:
|
||||||
|
/* Free possible externally stored fields */
|
||||||
|
for (i = 0; i < upd_get_n_fields(node->update); i++) {
|
||||||
|
|
||||||
|
ufield = upd_get_nth_field(node->update, i);
|
||||||
|
|
||||||
|
if (ufield->extern_storage) {
|
||||||
|
/* We use the fact that new_val points to
|
||||||
|
node->undo_rec and get thus the offset of
|
||||||
|
dfield data inside the unod record. Then we
|
||||||
|
can calculate from node->roll_ptr the file
|
||||||
|
address of the new_val data */
|
||||||
|
|
||||||
|
internal_offset = ((byte*)ufield->new_val.data)
|
||||||
|
- node->undo_rec;
|
||||||
|
|
||||||
|
ut_a(internal_offset < UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
|
trx_undo_decode_roll_ptr(node->roll_ptr,
|
||||||
|
&is_insert, &rseg_id,
|
||||||
|
&page_no, &offset);
|
||||||
|
mtr_start(&mtr);
|
||||||
|
|
||||||
|
/* We have to acquire an X-latch to the clustered
|
||||||
|
index tree */
|
||||||
|
|
||||||
|
index = dict_table_get_first_index(node->table);
|
||||||
|
|
||||||
|
mtr_x_lock(dict_tree_get_lock(index->tree), &mtr);
|
||||||
|
|
||||||
|
/* We assume in purge of externally stored fields
|
||||||
|
that the space id of the undo log record is 0! */
|
||||||
|
|
||||||
|
data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
|
||||||
|
+ offset + internal_offset;
|
||||||
|
|
||||||
|
buf_page_dbg_add_level(buf_frame_align(data_field),
|
||||||
|
SYNC_TRX_UNDO_PAGE);
|
||||||
|
|
||||||
|
data_field_len = ufield->new_val.len;
|
||||||
|
|
||||||
|
btr_free_externally_stored_field(index, data_field,
|
||||||
|
data_field_len, &mtr);
|
||||||
|
mtr_commit(&mtr);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
@ -388,6 +451,9 @@ row_purge_parse_undo_rec(
|
|||||||
/*=====================*/
|
/*=====================*/
|
||||||
/* out: TRUE if purge operation required */
|
/* out: TRUE if purge operation required */
|
||||||
purge_node_t* node, /* in: row undo node */
|
purge_node_t* node, /* in: row undo node */
|
||||||
|
ibool* updated_extern,
|
||||||
|
/* out: TRUE if an externally stored field
|
||||||
|
was updated */
|
||||||
que_thr_t* thr) /* in: query thread */
|
que_thr_t* thr) /* in: query thread */
|
||||||
{
|
{
|
||||||
dict_index_t* clust_index;
|
dict_index_t* clust_index;
|
||||||
@ -403,10 +469,10 @@ row_purge_parse_undo_rec(
|
|||||||
ut_ad(node && thr);
|
ut_ad(node && thr);
|
||||||
|
|
||||||
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
|
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
|
||||||
&undo_no, &table_id);
|
updated_extern, &undo_no, &table_id);
|
||||||
node->rec_type = type;
|
node->rec_type = type;
|
||||||
|
|
||||||
if (type == TRX_UNDO_UPD_DEL_REC) {
|
if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
|
||||||
|
|
||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
@ -416,7 +482,7 @@ row_purge_parse_undo_rec(
|
|||||||
node->table = NULL;
|
node->table = NULL;
|
||||||
|
|
||||||
if (type == TRX_UNDO_UPD_EXIST_REC
|
if (type == TRX_UNDO_UPD_EXIST_REC
|
||||||
&& cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
|
&& cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
|
||||||
|
|
||||||
/* Purge requires no changes to indexes: we may return */
|
/* Purge requires no changes to indexes: we may return */
|
||||||
|
|
||||||
@ -455,8 +521,11 @@ row_purge_parse_undo_rec(
|
|||||||
|
|
||||||
/* Read to the partial row the fields that occur in indexes */
|
/* Read to the partial row the fields that occur in indexes */
|
||||||
|
|
||||||
ptr = trx_undo_rec_get_partial_row(ptr, clust_index, &(node->row),
|
if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
|
||||||
node->heap);
|
ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
|
||||||
|
&(node->row), node->heap);
|
||||||
|
}
|
||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -475,6 +544,7 @@ row_purge(
|
|||||||
{
|
{
|
||||||
dulint roll_ptr;
|
dulint roll_ptr;
|
||||||
ibool purge_needed;
|
ibool purge_needed;
|
||||||
|
ibool updated_extern;
|
||||||
|
|
||||||
ut_ad(node && thr);
|
ut_ad(node && thr);
|
||||||
|
|
||||||
@ -494,7 +564,8 @@ row_purge(
|
|||||||
if (node->undo_rec == &trx_purge_dummy_rec) {
|
if (node->undo_rec == &trx_purge_dummy_rec) {
|
||||||
purge_needed = FALSE;
|
purge_needed = FALSE;
|
||||||
} else {
|
} else {
|
||||||
purge_needed = row_purge_parse_undo_rec(node, thr);
|
purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
|
||||||
|
thr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (purge_needed) {
|
if (purge_needed) {
|
||||||
@ -503,11 +574,13 @@ row_purge(
|
|||||||
node->index = dict_table_get_next_index(
|
node->index = dict_table_get_next_index(
|
||||||
dict_table_get_first_index(node->table));
|
dict_table_get_first_index(node->table));
|
||||||
|
|
||||||
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
|
if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
|
||||||
row_purge_upd_exist(node, thr);
|
|
||||||
} else {
|
|
||||||
ut_ad(node->rec_type == TRX_UNDO_DEL_MARK_REC);
|
|
||||||
row_purge_del_mark(node, thr);
|
row_purge_del_mark(node, thr);
|
||||||
|
|
||||||
|
} else if (updated_extern
|
||||||
|
|| node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
|
||||||
|
|
||||||
|
row_purge_upd_exist_or_extern(node, thr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->found_clust) {
|
if (node->found_clust) {
|
||||||
|
@ -146,15 +146,17 @@ row_build_index_entry(
|
|||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
An inverse function to dict_row_build_index_entry. Builds a row from a
|
An inverse function to dict_row_build_index_entry. Builds a row from a
|
||||||
record in a clustered index. */
|
record in a clustered index. NOTE that externally stored (often big)
|
||||||
|
fields are always copied to heap. */
|
||||||
|
|
||||||
dtuple_t*
|
dtuple_t*
|
||||||
row_build(
|
row_build(
|
||||||
/*======*/
|
/*======*/
|
||||||
/* out, own: row built; see the NOTE below! */
|
/* out, own: row built; see the NOTE below! */
|
||||||
ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
|
ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or
|
||||||
the former copies also the data fields to
|
ROW_COPY_ALSO_EXTERNALS,
|
||||||
heap as the latter only places pointers to
|
the two last copy also the data fields to
|
||||||
|
heap as the first only places pointers to
|
||||||
data fields on the index page, and thus is
|
data fields on the index page, and thus is
|
||||||
more efficient */
|
more efficient */
|
||||||
dict_index_t* index, /* in: clustered index */
|
dict_index_t* index, /* in: clustered index */
|
||||||
@ -170,19 +172,19 @@ row_build(
|
|||||||
{
|
{
|
||||||
dtuple_t* row;
|
dtuple_t* row;
|
||||||
dict_table_t* table;
|
dict_table_t* table;
|
||||||
ulint n_fields;
|
dict_col_t* col;
|
||||||
ulint i;
|
|
||||||
dfield_t* dfield;
|
dfield_t* dfield;
|
||||||
|
ulint n_fields;
|
||||||
byte* field;
|
byte* field;
|
||||||
ulint len;
|
ulint len;
|
||||||
ulint row_len;
|
ulint row_len;
|
||||||
dict_col_t* col;
|
|
||||||
byte* buf;
|
byte* buf;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
ut_ad(index && rec && heap);
|
ut_ad(index && rec && heap);
|
||||||
ut_ad(index->type & DICT_CLUSTERED);
|
ut_ad(index->type & DICT_CLUSTERED);
|
||||||
|
|
||||||
if (type == ROW_COPY_DATA) {
|
if (type != ROW_COPY_POINTERS) {
|
||||||
/* Take a copy of rec to heap */
|
/* Take a copy of rec to heap */
|
||||||
buf = mem_heap_alloc(heap, rec_get_size(rec));
|
buf = mem_heap_alloc(heap, rec_get_size(rec));
|
||||||
rec = rec_copy(buf, rec);
|
rec = rec_copy(buf, rec);
|
||||||
@ -207,6 +209,13 @@ row_build(
|
|||||||
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
|
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
|
||||||
field = rec_get_nth_field(rec, i, &len);
|
field = rec_get_nth_field(rec, i, &len);
|
||||||
|
|
||||||
|
if (type == ROW_COPY_ALSO_EXTERNALS
|
||||||
|
&& rec_get_nth_field_extern_bit(rec, i)) {
|
||||||
|
|
||||||
|
field = btr_rec_copy_externally_stored_field(rec,
|
||||||
|
i, &len, heap);
|
||||||
|
}
|
||||||
|
|
||||||
dfield_set_data(dfield, field, len);
|
dfield_set_data(dfield, field, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,6 +224,7 @@ row_build(
|
|||||||
return(row);
|
return(row);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef notdefined
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
An inverse function to dict_row_build_index_entry. Builds a row from a
|
An inverse function to dict_row_build_index_entry. Builds a row from a
|
||||||
record in a clustered index. */
|
record in a clustered index. */
|
||||||
@ -229,7 +239,9 @@ row_build_to_tuple(
|
|||||||
directly into this record, therefore,
|
directly into this record, therefore,
|
||||||
the buffer page of this record must be
|
the buffer page of this record must be
|
||||||
at least s-latched and the latch held
|
at least s-latched and the latch held
|
||||||
as long as the row dtuple is used! */
|
as long as the row dtuple is used!
|
||||||
|
NOTE 2: does not work with externally
|
||||||
|
stored fields! */
|
||||||
{
|
{
|
||||||
dict_table_t* table;
|
dict_table_t* table;
|
||||||
ulint n_fields;
|
ulint n_fields;
|
||||||
@ -265,9 +277,11 @@ row_build_to_tuple(
|
|||||||
|
|
||||||
ut_ad(dtuple_check_typed(row));
|
ut_ad(dtuple_check_typed(row));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
Converts an index record to a typed data tuple. */
|
Converts an index record to a typed data tuple. NOTE that externally
|
||||||
|
stored (often big) fields are NOT copied to heap. */
|
||||||
|
|
||||||
dtuple_t*
|
dtuple_t*
|
||||||
row_rec_to_index_entry(
|
row_rec_to_index_entry(
|
||||||
|
@ -2036,7 +2036,8 @@ row_sel_store_mysql_rec(
|
|||||||
which was described in prebuilt's
|
which was described in prebuilt's
|
||||||
template */
|
template */
|
||||||
{
|
{
|
||||||
mysql_row_templ_t* templ;
|
mysql_row_templ_t* templ;
|
||||||
|
mem_heap_t* extern_field_heap = NULL;
|
||||||
byte* data;
|
byte* data;
|
||||||
ulint len;
|
ulint len;
|
||||||
byte* blob_buf;
|
byte* blob_buf;
|
||||||
@ -2059,6 +2060,24 @@ row_sel_store_mysql_rec(
|
|||||||
|
|
||||||
data = rec_get_nth_field(rec, templ->rec_field_no, &len);
|
data = rec_get_nth_field(rec, templ->rec_field_no, &len);
|
||||||
|
|
||||||
|
if (rec_get_nth_field_extern_bit(rec, templ->rec_field_no)) {
|
||||||
|
/* Copy an externally stored field to the temporary
|
||||||
|
heap */
|
||||||
|
|
||||||
|
if (prebuilt->trx->has_search_latch) {
|
||||||
|
rw_lock_s_unlock(&btr_search_latch);
|
||||||
|
prebuilt->trx->has_search_latch = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern_field_heap = mem_heap_create(UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
|
data = btr_rec_copy_externally_stored_field(rec,
|
||||||
|
templ->rec_field_no, &len,
|
||||||
|
extern_field_heap);
|
||||||
|
|
||||||
|
ut_a(len != UNIV_SQL_NULL);
|
||||||
|
}
|
||||||
|
|
||||||
if (len != UNIV_SQL_NULL) {
|
if (len != UNIV_SQL_NULL) {
|
||||||
if (templ->type == DATA_BLOB) {
|
if (templ->type == DATA_BLOB) {
|
||||||
|
|
||||||
@ -2081,6 +2100,10 @@ row_sel_store_mysql_rec(
|
|||||||
mysql_rec + templ->mysql_col_offset,
|
mysql_rec + templ->mysql_col_offset,
|
||||||
templ->mysql_col_len, data, len,
|
templ->mysql_col_len, data, len,
|
||||||
templ->type, templ->is_unsigned);
|
templ->type, templ->is_unsigned);
|
||||||
|
|
||||||
|
if (extern_field_heap) {
|
||||||
|
mem_heap_free(extern_field_heap);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
mysql_rec[templ->mysql_null_byte_offset] |=
|
mysql_rec[templ->mysql_null_byte_offset] |=
|
||||||
(byte) (templ->mysql_null_bit_mask);
|
(byte) (templ->mysql_null_bit_mask);
|
||||||
@ -2450,6 +2473,7 @@ row_search_for_mysql(
|
|||||||
ibool unique_search_from_clust_index = FALSE;
|
ibool unique_search_from_clust_index = FALSE;
|
||||||
ibool mtr_has_extra_clust_latch = FALSE;
|
ibool mtr_has_extra_clust_latch = FALSE;
|
||||||
ibool moves_up = FALSE;
|
ibool moves_up = FALSE;
|
||||||
|
ulint cnt = 0;
|
||||||
mtr_t mtr;
|
mtr_t mtr;
|
||||||
|
|
||||||
ut_ad(index && pcur && search_tuple);
|
ut_ad(index && pcur && search_tuple);
|
||||||
@ -2457,6 +2481,11 @@ row_search_for_mysql(
|
|||||||
|
|
||||||
ut_ad(sync_thread_levels_empty_gen(FALSE));
|
ut_ad(sync_thread_levels_empty_gen(FALSE));
|
||||||
|
|
||||||
|
/* printf("Match mode %lu\n search tuple ", match_mode);
|
||||||
|
dtuple_print(search_tuple);
|
||||||
|
|
||||||
|
printf("N tables locked %lu\n", trx->mysql_n_tables_locked);
|
||||||
|
*/
|
||||||
if (direction == 0) {
|
if (direction == 0) {
|
||||||
prebuilt->n_rows_fetched = 0;
|
prebuilt->n_rows_fetched = 0;
|
||||||
prebuilt->n_fetch_cached = 0;
|
prebuilt->n_fetch_cached = 0;
|
||||||
@ -2528,6 +2557,8 @@ row_search_for_mysql(
|
|||||||
|
|
||||||
mtr_commit(&mtr);
|
mtr_commit(&mtr);
|
||||||
|
|
||||||
|
/* printf("%s record not found 1\n", index->name); */
|
||||||
|
|
||||||
return(DB_RECORD_NOT_FOUND);
|
return(DB_RECORD_NOT_FOUND);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2565,17 +2596,18 @@ row_search_for_mysql(
|
|||||||
|
|
||||||
mtr_commit(&mtr);
|
mtr_commit(&mtr);
|
||||||
|
|
||||||
|
/* printf("%s shortcut\n", index->name); */
|
||||||
|
|
||||||
return(DB_SUCCESS);
|
return(DB_SUCCESS);
|
||||||
|
|
||||||
} else if (shortcut == SEL_EXHAUSTED) {
|
} else if (shortcut == SEL_EXHAUSTED) {
|
||||||
|
|
||||||
mtr_commit(&mtr);
|
mtr_commit(&mtr);
|
||||||
|
|
||||||
|
/* printf("%s record not found 2\n",
|
||||||
|
index->name); */
|
||||||
return(DB_RECORD_NOT_FOUND);
|
return(DB_RECORD_NOT_FOUND);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Commit the mini-transaction since it can
|
|
||||||
hold latches */
|
|
||||||
|
|
||||||
mtr_commit(&mtr);
|
mtr_commit(&mtr);
|
||||||
mtr_start(&mtr);
|
mtr_start(&mtr);
|
||||||
@ -2659,7 +2691,12 @@ rec_loop:
|
|||||||
cons_read_requires_clust_rec = FALSE;
|
cons_read_requires_clust_rec = FALSE;
|
||||||
|
|
||||||
rec = btr_pcur_get_rec(pcur);
|
rec = btr_pcur_get_rec(pcur);
|
||||||
|
/*
|
||||||
|
printf("Using index %s cnt %lu ", index->name, cnt);
|
||||||
|
printf("; Page no %lu\n",
|
||||||
|
buf_frame_get_page_no(buf_frame_align(rec)));
|
||||||
|
rec_print(rec);
|
||||||
|
*/
|
||||||
if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
|
if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
|
||||||
|
|
||||||
/* The infimum record on a page cannot be in the result set,
|
/* The infimum record on a page cannot be in the result set,
|
||||||
@ -2700,12 +2737,15 @@ rec_loop:
|
|||||||
/* Test if the index record matches completely to search_tuple
|
/* Test if the index record matches completely to search_tuple
|
||||||
in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
|
in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
|
||||||
|
|
||||||
|
/* printf("Comparing rec and search tuple\n"); */
|
||||||
|
|
||||||
if (0 != cmp_dtuple_rec(search_tuple, rec)) {
|
if (0 != cmp_dtuple_rec(search_tuple, rec)) {
|
||||||
|
|
||||||
btr_pcur_store_position(pcur, &mtr);
|
btr_pcur_store_position(pcur, &mtr);
|
||||||
|
|
||||||
ret = DB_RECORD_NOT_FOUND;
|
ret = DB_RECORD_NOT_FOUND;
|
||||||
|
/* printf("%s record not found 3\n", index->name); */
|
||||||
|
|
||||||
goto normal_return;
|
goto normal_return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2716,6 +2756,7 @@ rec_loop:
|
|||||||
btr_pcur_store_position(pcur, &mtr);
|
btr_pcur_store_position(pcur, &mtr);
|
||||||
|
|
||||||
ret = DB_RECORD_NOT_FOUND;
|
ret = DB_RECORD_NOT_FOUND;
|
||||||
|
/* printf("%s record not found 4\n", index->name); */
|
||||||
|
|
||||||
goto normal_return;
|
goto normal_return;
|
||||||
}
|
}
|
||||||
@ -2884,6 +2925,8 @@ next_rec:
|
|||||||
moved = sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
|
moved = sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
|
||||||
moves_up, &mtr);
|
moves_up, &mtr);
|
||||||
if (moved) {
|
if (moved) {
|
||||||
|
cnt++;
|
||||||
|
|
||||||
goto rec_loop;
|
goto rec_loop;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2906,6 +2949,8 @@ next_rec:
|
|||||||
goto normal_return;
|
goto normal_return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cnt++;
|
||||||
|
|
||||||
goto rec_loop;
|
goto rec_loop;
|
||||||
/*-------------------------------------------------------------*/
|
/*-------------------------------------------------------------*/
|
||||||
lock_wait_or_error:
|
lock_wait_or_error:
|
||||||
@ -2931,7 +2976,9 @@ lock_wait_or_error:
|
|||||||
|
|
||||||
goto rec_loop;
|
goto rec_loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* printf("Using index %s cnt %lu ret value %lu err\n", index->name,
|
||||||
|
cnt, err); */
|
||||||
return(err);
|
return(err);
|
||||||
|
|
||||||
normal_return:
|
normal_return:
|
||||||
@ -2945,5 +2992,7 @@ normal_return:
|
|||||||
ret = DB_SUCCESS;
|
ret = DB_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* printf("Using index %s cnt %lu ret value %lu\n", index->name,
|
||||||
|
cnt, err); */
|
||||||
return(ret);
|
return(ret);
|
||||||
}
|
}
|
||||||
|
@ -242,11 +242,12 @@ row_undo_ins_parse_undo_rec(
|
|||||||
dulint table_id;
|
dulint table_id;
|
||||||
ulint type;
|
ulint type;
|
||||||
ulint dummy;
|
ulint dummy;
|
||||||
|
ibool dummy_extern;
|
||||||
|
|
||||||
ut_ad(node && thr);
|
ut_ad(node && thr);
|
||||||
|
|
||||||
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, &undo_no,
|
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
|
||||||
&table_id);
|
&dummy_extern, &undo_no, &table_id);
|
||||||
ut_ad(type == TRX_UNDO_INSERT_REC);
|
ut_ad(type == TRX_UNDO_INSERT_REC);
|
||||||
node->rec_type = type;
|
node->rec_type = type;
|
||||||
|
|
||||||
@ -284,9 +285,9 @@ row_undo_ins(
|
|||||||
row_undo_ins_parse_undo_rec(node, thr);
|
row_undo_ins_parse_undo_rec(node, thr);
|
||||||
|
|
||||||
if (node->table == NULL) {
|
if (node->table == NULL) {
|
||||||
found = FALSE;
|
found = FALSE;
|
||||||
} else {
|
} else {
|
||||||
found = row_undo_search_clust_to_pcur(node, thr);
|
found = row_undo_search_clust_to_pcur(node, thr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!found) {
|
if (!found) {
|
||||||
|
@ -94,12 +94,12 @@ row_undo_mod_clust_low(
|
|||||||
mtr_t* mtr, /* in: mtr */
|
mtr_t* mtr, /* in: mtr */
|
||||||
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
|
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
|
||||||
{
|
{
|
||||||
|
big_rec_t* dummy_big_rec;
|
||||||
dict_index_t* index;
|
dict_index_t* index;
|
||||||
btr_pcur_t* pcur;
|
btr_pcur_t* pcur;
|
||||||
btr_cur_t* btr_cur;
|
btr_cur_t* btr_cur;
|
||||||
ulint err;
|
ulint err;
|
||||||
ibool success;
|
ibool success;
|
||||||
ibool do_remove;
|
|
||||||
|
|
||||||
index = dict_table_get_first_index(node->table);
|
index = dict_table_get_first_index(node->table);
|
||||||
|
|
||||||
@ -110,49 +110,80 @@ row_undo_mod_clust_low(
|
|||||||
|
|
||||||
ut_ad(success);
|
ut_ad(success);
|
||||||
|
|
||||||
|
if (mode == BTR_MODIFY_LEAF) {
|
||||||
|
|
||||||
|
err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
|
||||||
|
| BTR_NO_UNDO_LOG_FLAG
|
||||||
|
| BTR_KEEP_SYS_FLAG,
|
||||||
|
btr_cur, node->update,
|
||||||
|
node->cmpl_info, thr, mtr);
|
||||||
|
} else {
|
||||||
|
ut_ad(mode == BTR_MODIFY_TREE);
|
||||||
|
|
||||||
|
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG
|
||||||
|
| BTR_NO_UNDO_LOG_FLAG
|
||||||
|
| BTR_KEEP_SYS_FLAG,
|
||||||
|
btr_cur, &dummy_big_rec, node->update,
|
||||||
|
node->cmpl_info, thr, mtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************
|
||||||
|
Removes a clustered index record after undo if possible. */
|
||||||
|
static
|
||||||
|
ulint
|
||||||
|
row_undo_mod_remove_clust_low(
|
||||||
|
/*==========================*/
|
||||||
|
/* out: DB_SUCCESS, DB_FAIL, or error code:
|
||||||
|
we may run out of file space */
|
||||||
|
undo_node_t* node, /* in: row undo node */
|
||||||
|
que_thr_t* thr, /* in: query thread */
|
||||||
|
mtr_t* mtr, /* in: mtr */
|
||||||
|
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
|
||||||
|
{
|
||||||
|
btr_pcur_t* pcur;
|
||||||
|
btr_cur_t* btr_cur;
|
||||||
|
ulint err;
|
||||||
|
ibool success;
|
||||||
|
|
||||||
|
pcur = &(node->pcur);
|
||||||
|
btr_cur = btr_pcur_get_btr_cur(pcur);
|
||||||
|
|
||||||
|
success = btr_pcur_restore_position(mode, pcur, mtr);
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
|
|
||||||
|
return(DB_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
/* Find out if we can remove the whole clustered index record */
|
/* Find out if we can remove the whole clustered index record */
|
||||||
|
|
||||||
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
|
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
|
||||||
&& !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
|
&& !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
|
||||||
|
|
||||||
do_remove = TRUE;
|
/* Ok, we can remove */
|
||||||
} else {
|
} else {
|
||||||
do_remove = FALSE;
|
return(DB_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode == BTR_MODIFY_LEAF) {
|
if (mode == BTR_MODIFY_LEAF) {
|
||||||
|
success = btr_cur_optimistic_delete(btr_cur, mtr);
|
||||||
|
|
||||||
if (do_remove) {
|
if (success) {
|
||||||
success = btr_cur_optimistic_delete(btr_cur, mtr);
|
err = DB_SUCCESS;
|
||||||
|
|
||||||
if (success) {
|
|
||||||
err = DB_SUCCESS;
|
|
||||||
} else {
|
|
||||||
err = DB_FAIL;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
|
err = DB_FAIL;
|
||||||
| BTR_NO_UNDO_LOG_FLAG
|
|
||||||
| BTR_KEEP_SYS_FLAG,
|
|
||||||
btr_cur, node->update,
|
|
||||||
node->cmpl_info, thr, mtr);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ut_ad(mode == BTR_MODIFY_TREE);
|
ut_ad(mode == BTR_MODIFY_TREE);
|
||||||
|
|
||||||
if (do_remove) {
|
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr);
|
||||||
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr);
|
|
||||||
|
|
||||||
/* The delete operation may fail if we have little
|
/* The delete operation may fail if we have little
|
||||||
file space left: TODO: easiest to crash the database
|
file space left: TODO: easiest to crash the database
|
||||||
and restart with more file space */
|
and restart with more file space */
|
||||||
} else {
|
|
||||||
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG
|
|
||||||
| BTR_NO_UNDO_LOG_FLAG
|
|
||||||
| BTR_KEEP_SYS_FLAG,
|
|
||||||
btr_cur, node->update,
|
|
||||||
node->cmpl_info, thr, mtr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return(err);
|
return(err);
|
||||||
@ -204,10 +235,31 @@ row_undo_mod_clust(
|
|||||||
err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
|
err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
|
||||||
}
|
}
|
||||||
|
|
||||||
node->state = UNDO_NODE_FETCH_NEXT;
|
|
||||||
|
|
||||||
btr_pcur_commit_specify_mtr(pcur, &mtr);
|
btr_pcur_commit_specify_mtr(pcur, &mtr);
|
||||||
|
|
||||||
|
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
|
||||||
|
|
||||||
|
mtr_start(&mtr);
|
||||||
|
|
||||||
|
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
|
||||||
|
BTR_MODIFY_LEAF);
|
||||||
|
if (err != DB_SUCCESS) {
|
||||||
|
btr_pcur_commit_specify_mtr(pcur, &mtr);
|
||||||
|
|
||||||
|
/* We may have to modify tree structure: do a
|
||||||
|
pessimistic descent down the index tree */
|
||||||
|
|
||||||
|
mtr_start(&mtr);
|
||||||
|
|
||||||
|
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
|
||||||
|
BTR_MODIFY_TREE);
|
||||||
|
}
|
||||||
|
|
||||||
|
btr_pcur_commit_specify_mtr(pcur, &mtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
node->state = UNDO_NODE_FETCH_NEXT;
|
||||||
|
|
||||||
trx_undo_rec_release(node->trx, node->undo_no);
|
trx_undo_rec_release(node->trx, node->undo_no);
|
||||||
|
|
||||||
if (more_vers && err == DB_SUCCESS) {
|
if (more_vers && err == DB_SUCCESS) {
|
||||||
@ -388,7 +440,6 @@ row_undo_mod_del_unmark_sec(
|
|||||||
mem_free(err_buf);
|
mem_free(err_buf);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
btr_cur = btr_pcur_get_btr_cur(&pcur);
|
btr_cur = btr_pcur_get_btr_cur(&pcur);
|
||||||
|
|
||||||
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
|
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
|
||||||
@ -546,11 +597,12 @@ row_undo_mod_parse_undo_rec(
|
|||||||
ulint info_bits;
|
ulint info_bits;
|
||||||
ulint type;
|
ulint type;
|
||||||
ulint cmpl_info;
|
ulint cmpl_info;
|
||||||
|
ibool dummy_extern;
|
||||||
|
|
||||||
ut_ad(node && thr);
|
ut_ad(node && thr);
|
||||||
|
|
||||||
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
|
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
|
||||||
&undo_no, &table_id);
|
&dummy_extern, &undo_no, &table_id);
|
||||||
node->rec_type = type;
|
node->rec_type = type;
|
||||||
|
|
||||||
node->table = dict_table_get_on_id(table_id, thr_get_trx(thr));
|
node->table = dict_table_get_on_id(table_id, thr_get_trx(thr));
|
||||||
@ -598,10 +650,9 @@ row_undo_mod(
|
|||||||
row_undo_mod_parse_undo_rec(node, thr);
|
row_undo_mod_parse_undo_rec(node, thr);
|
||||||
|
|
||||||
if (node->table == NULL) {
|
if (node->table == NULL) {
|
||||||
found = FALSE;
|
found = FALSE;
|
||||||
} else {
|
} else {
|
||||||
|
found = row_undo_search_clust_to_pcur(node, thr);
|
||||||
found = row_undo_search_clust_to_pcur(node, thr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!found) {
|
if (!found) {
|
||||||
|
@ -124,6 +124,8 @@ row_undo_node_create(
|
|||||||
undo->state = UNDO_NODE_FETCH_NEXT;
|
undo->state = UNDO_NODE_FETCH_NEXT;
|
||||||
undo->trx = trx;
|
undo->trx = trx;
|
||||||
|
|
||||||
|
btr_pcur_init(&(undo->pcur));
|
||||||
|
|
||||||
undo->heap = mem_heap_create(256);
|
undo->heap = mem_heap_create(256);
|
||||||
|
|
||||||
return(undo);
|
return(undo);
|
||||||
@ -303,6 +305,16 @@ row_undo_step(
|
|||||||
if (err != DB_SUCCESS) {
|
if (err != DB_SUCCESS) {
|
||||||
/* SQL error detected */
|
/* SQL error detected */
|
||||||
|
|
||||||
|
fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n", err);
|
||||||
|
|
||||||
|
if (err == DB_OUT_OF_FILE_SPACE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Error 13 means out of tablespace.\n"
|
||||||
|
"InnoDB: Consider increasing your tablespace.\n");
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
ut_a(0);
|
ut_a(0);
|
||||||
|
|
||||||
return(NULL);
|
return(NULL);
|
||||||
|
@ -90,8 +90,10 @@ upd_node_create(
|
|||||||
node->in_mysql_interface = FALSE;
|
node->in_mysql_interface = FALSE;
|
||||||
|
|
||||||
node->row = NULL;
|
node->row = NULL;
|
||||||
|
node->ext_vec = NULL;
|
||||||
node->index = NULL;
|
node->index = NULL;
|
||||||
|
node->update = NULL;
|
||||||
|
|
||||||
node->select = NULL;
|
node->select = NULL;
|
||||||
|
|
||||||
node->heap = mem_heap_create(128);
|
node->heap = mem_heap_create(128);
|
||||||
@ -160,7 +162,8 @@ row_upd_index_entry_sys_field(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Returns TRUE if row update changes size of some field in index. */
|
Returns TRUE if row update changes size of some field in index
|
||||||
|
or if some field to be updated is stored externally in rec or update. */
|
||||||
|
|
||||||
ibool
|
ibool
|
||||||
row_upd_changes_field_size(
|
row_upd_changes_field_size(
|
||||||
@ -199,6 +202,16 @@ row_upd_changes_field_size(
|
|||||||
|
|
||||||
return(TRUE);
|
return(TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rec_get_nth_field_extern_bit(rec, upd_field->field_no)) {
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (upd_field->extern_storage) {
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return(FALSE);
|
return(FALSE);
|
||||||
@ -441,6 +454,34 @@ row_upd_index_parse(
|
|||||||
|
|
||||||
return(ptr);
|
return(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*******************************************************************
|
||||||
|
Returns TRUE if ext_vec contains i. */
|
||||||
|
UNIV_INLINE
|
||||||
|
ibool
|
||||||
|
upd_ext_vec_contains(
|
||||||
|
/*=================*/
|
||||||
|
/* out: TRUE if i is in ext_vec */
|
||||||
|
ulint* ext_vec, /* in: array of indexes or NULL */
|
||||||
|
ulint n_ext_vec, /* in: number of numbers in ext_vec */
|
||||||
|
ulint i) /* in: a number */
|
||||||
|
{
|
||||||
|
ulint j;
|
||||||
|
|
||||||
|
if (ext_vec == NULL) {
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < n_ext_vec; j++) {
|
||||||
|
if (ext_vec[j] == i) {
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
/*******************************************************************
|
/*******************************************************************
|
||||||
Builds an update vector from those fields, excluding the roll ptr and
|
Builds an update vector from those fields, excluding the roll ptr and
|
||||||
@ -454,6 +495,9 @@ row_upd_build_difference(
|
|||||||
fields, excluding roll ptr and trx id */
|
fields, excluding roll ptr and trx id */
|
||||||
dict_index_t* index, /* in: clustered index */
|
dict_index_t* index, /* in: clustered index */
|
||||||
dtuple_t* entry, /* in: entry to insert */
|
dtuple_t* entry, /* in: entry to insert */
|
||||||
|
ulint* ext_vec,/* in: array containing field numbers of
|
||||||
|
externally stored fields in entry, or NULL */
|
||||||
|
ulint n_ext_vec,/* in: number of fields in ext_vec */
|
||||||
rec_t* rec, /* in: clustered index record */
|
rec_t* rec, /* in: clustered index record */
|
||||||
mem_heap_t* heap) /* in: memory heap from which allocated */
|
mem_heap_t* heap) /* in: memory heap from which allocated */
|
||||||
{
|
{
|
||||||
@ -480,16 +524,25 @@ row_upd_build_difference(
|
|||||||
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
|
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
|
||||||
|
|
||||||
data = rec_get_nth_field(rec, i, &len);
|
data = rec_get_nth_field(rec, i, &len);
|
||||||
|
|
||||||
dfield = dtuple_get_nth_field(entry, i);
|
dfield = dtuple_get_nth_field(entry, i);
|
||||||
|
|
||||||
if ((i != trx_id_pos) && (i != roll_ptr_pos)
|
if ((rec_get_nth_field_extern_bit(rec, i)
|
||||||
&& !dfield_data_is_equal(dfield, len, data)) {
|
!= upd_ext_vec_contains(ext_vec, n_ext_vec, i))
|
||||||
|
|| ((i != trx_id_pos) && (i != roll_ptr_pos)
|
||||||
|
&& !dfield_data_is_equal(dfield, len, data))) {
|
||||||
|
|
||||||
upd_field = upd_get_nth_field(update, n_diff);
|
upd_field = upd_get_nth_field(update, n_diff);
|
||||||
|
|
||||||
dfield_copy(&(upd_field->new_val), dfield);
|
dfield_copy(&(upd_field->new_val), dfield);
|
||||||
|
|
||||||
upd_field_set_field_no(upd_field, i, index);
|
upd_field_set_field_no(upd_field, i, index);
|
||||||
|
|
||||||
|
if (upd_ext_vec_contains(ext_vec, n_ext_vec, i)) {
|
||||||
|
upd_field->extern_storage = TRUE;
|
||||||
|
} else {
|
||||||
|
upd_field->extern_storage = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
n_diff++;
|
n_diff++;
|
||||||
}
|
}
|
||||||
@ -630,9 +683,7 @@ row_upd_changes_ord_field(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Checks if an update vector changes an ordering field of an index record.
|
Checks if an update vector changes an ordering field of an index record. */
|
||||||
This function is fast if the update vector is short or the number of ordering
|
|
||||||
fields in the index is small. Otherwise, this can be quadratic. */
|
|
||||||
|
|
||||||
ibool
|
ibool
|
||||||
row_upd_changes_some_index_ord_field(
|
row_upd_changes_some_index_ord_field(
|
||||||
@ -642,19 +693,24 @@ row_upd_changes_some_index_ord_field(
|
|||||||
dict_table_t* table, /* in: table */
|
dict_table_t* table, /* in: table */
|
||||||
upd_t* update) /* in: update vector for the row */
|
upd_t* update) /* in: update vector for the row */
|
||||||
{
|
{
|
||||||
|
upd_field_t* upd_field;
|
||||||
dict_index_t* index;
|
dict_index_t* index;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
index = dict_table_get_first_index(table);
|
index = dict_table_get_first_index(table);
|
||||||
|
|
||||||
while (index) {
|
for (i = 0; i < upd_get_n_fields(update); i++) {
|
||||||
if (row_upd_changes_ord_field(NULL, index, update)) {
|
|
||||||
|
|
||||||
return(TRUE);
|
upd_field = upd_get_nth_field(update, i);
|
||||||
|
|
||||||
|
if (dict_field_get_col(dict_index_get_nth_field(index,
|
||||||
|
upd_field->field_no))
|
||||||
|
->ord_part) {
|
||||||
|
|
||||||
|
return(TRUE);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
index = dict_table_get_next_index(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -710,15 +766,17 @@ row_upd_eval_new_vals(
|
|||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
Stores to the heap the row on which the node->pcur is positioned. */
|
Stores to the heap the row on which the node->pcur is positioned. */
|
||||||
UNIV_INLINE
|
static
|
||||||
void
|
void
|
||||||
row_upd_store_row(
|
row_upd_store_row(
|
||||||
/*==============*/
|
/*==============*/
|
||||||
upd_node_t* node) /* in: row update node */
|
upd_node_t* node) /* in: row update node */
|
||||||
{
|
{
|
||||||
dict_index_t* clust_index;
|
dict_index_t* clust_index;
|
||||||
|
upd_t* update;
|
||||||
|
rec_t* rec;
|
||||||
|
|
||||||
ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES);
|
ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
|
||||||
|
|
||||||
if (node->row != NULL) {
|
if (node->row != NULL) {
|
||||||
mem_heap_empty(node->heap);
|
mem_heap_empty(node->heap);
|
||||||
@ -727,8 +785,20 @@ row_upd_store_row(
|
|||||||
|
|
||||||
clust_index = dict_table_get_first_index(node->table);
|
clust_index = dict_table_get_first_index(node->table);
|
||||||
|
|
||||||
node->row = row_build(ROW_COPY_DATA, clust_index,
|
rec = btr_pcur_get_rec(node->pcur);
|
||||||
btr_pcur_get_rec(node->pcur), node->heap);
|
|
||||||
|
node->row = row_build(ROW_COPY_DATA, clust_index, rec, node->heap);
|
||||||
|
|
||||||
|
node->ext_vec = mem_heap_alloc(node->heap, rec_get_n_fields(rec));
|
||||||
|
|
||||||
|
if (node->is_delete) {
|
||||||
|
update = NULL;
|
||||||
|
} else {
|
||||||
|
update = node->update;
|
||||||
|
}
|
||||||
|
|
||||||
|
node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec,
|
||||||
|
rec, update);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************
|
/***************************************************************
|
||||||
@ -812,7 +882,7 @@ row_upd_sec_index_entry(
|
|||||||
row_upd_index_replace_new_col_vals(entry, index, node->update);
|
row_upd_index_replace_new_col_vals(entry, index, node->update);
|
||||||
|
|
||||||
/* Insert new index entry */
|
/* Insert new index entry */
|
||||||
err = row_ins_index_entry(index, entry, thr);
|
err = row_ins_index_entry(index, entry, NULL, 0, thr);
|
||||||
|
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
|
|
||||||
@ -870,6 +940,8 @@ row_upd_clust_rec_by_insert(
|
|||||||
dict_table_t* table;
|
dict_table_t* table;
|
||||||
mem_heap_t* heap;
|
mem_heap_t* heap;
|
||||||
dtuple_t* entry;
|
dtuple_t* entry;
|
||||||
|
ulint* ext_vec;
|
||||||
|
ulint n_ext_vec;
|
||||||
ulint err;
|
ulint err;
|
||||||
|
|
||||||
ut_ad(node);
|
ut_ad(node);
|
||||||
@ -897,14 +969,18 @@ row_upd_clust_rec_by_insert(
|
|||||||
|
|
||||||
heap = mem_heap_create(1024);
|
heap = mem_heap_create(1024);
|
||||||
|
|
||||||
|
ext_vec = mem_heap_alloc(heap,
|
||||||
|
sizeof(ulint) * dtuple_get_n_fields(node->row));
|
||||||
|
n_ext_vec = 0;
|
||||||
|
|
||||||
entry = row_build_index_entry(node->row, index, heap);
|
entry = row_build_index_entry(node->row, index, heap);
|
||||||
|
|
||||||
row_upd_clust_index_replace_new_col_vals(entry, node->update);
|
row_upd_clust_index_replace_new_col_vals(entry, node->update);
|
||||||
|
|
||||||
row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
|
row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
|
||||||
|
|
||||||
err = row_ins_index_entry(index, entry, thr);
|
err = row_ins_index_entry(index, entry, node->ext_vec,
|
||||||
|
node->n_ext_vec, thr);
|
||||||
mem_heap_free(heap);
|
mem_heap_free(heap);
|
||||||
|
|
||||||
return(err);
|
return(err);
|
||||||
@ -924,6 +1000,7 @@ row_upd_clust_rec(
|
|||||||
que_thr_t* thr, /* in: query thread */
|
que_thr_t* thr, /* in: query thread */
|
||||||
mtr_t* mtr) /* in: mtr; gets committed here */
|
mtr_t* mtr) /* in: mtr; gets committed here */
|
||||||
{
|
{
|
||||||
|
big_rec_t* big_rec = NULL;
|
||||||
btr_pcur_t* pcur;
|
btr_pcur_t* pcur;
|
||||||
btr_cur_t* btr_cur;
|
btr_cur_t* btr_cur;
|
||||||
ulint err;
|
ulint err;
|
||||||
@ -973,9 +1050,24 @@ row_upd_clust_rec(
|
|||||||
ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
|
ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
|
||||||
|
|
||||||
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
|
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
|
||||||
node->update, node->cmpl_info, thr, mtr);
|
&big_rec, node->update,
|
||||||
|
node->cmpl_info, thr, mtr);
|
||||||
mtr_commit(mtr);
|
mtr_commit(mtr);
|
||||||
|
|
||||||
|
if (err == DB_SUCCESS && big_rec) {
|
||||||
|
mtr_start(mtr);
|
||||||
|
ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
|
||||||
|
|
||||||
|
err = btr_store_big_rec_extern_fields(index,
|
||||||
|
btr_cur_get_rec(btr_cur),
|
||||||
|
big_rec, mtr);
|
||||||
|
mtr_commit(mtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (big_rec) {
|
||||||
|
dtuple_big_rec_free(big_rec);
|
||||||
|
}
|
||||||
|
|
||||||
return(err);
|
return(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1194,10 +1286,12 @@ row_upd(
|
|||||||
ut_ad(node && thr);
|
ut_ad(node && thr);
|
||||||
|
|
||||||
if (node->in_mysql_interface) {
|
if (node->in_mysql_interface) {
|
||||||
|
|
||||||
/* We do not get the cmpl_info value from the MySQL
|
/* We do not get the cmpl_info value from the MySQL
|
||||||
interpreter: we must calculate it on the fly: */
|
interpreter: we must calculate it on the fly: */
|
||||||
|
|
||||||
if (row_upd_changes_some_index_ord_field(node->table,
|
if (node->is_delete ||
|
||||||
|
row_upd_changes_some_index_ord_field(node->table,
|
||||||
node->update)) {
|
node->update)) {
|
||||||
node->cmpl_info = 0;
|
node->cmpl_info = 0;
|
||||||
} else {
|
} else {
|
||||||
@ -1239,6 +1333,7 @@ function_exit:
|
|||||||
if (node->row != NULL) {
|
if (node->row != NULL) {
|
||||||
mem_heap_empty(node->heap);
|
mem_heap_empty(node->heap);
|
||||||
node->row = NULL;
|
node->row = NULL;
|
||||||
|
node->n_ext_vec = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
node->state = UPD_NODE_UPDATE_CLUSTERED;
|
node->state = UPD_NODE_UPDATE_CLUSTERED;
|
||||||
|
@ -93,6 +93,8 @@ ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
|
|||||||
char* srv_unix_file_flush_method_str = NULL;
|
char* srv_unix_file_flush_method_str = NULL;
|
||||||
ulint srv_unix_file_flush_method = 0;
|
ulint srv_unix_file_flush_method = 0;
|
||||||
|
|
||||||
|
ibool srv_use_doublewrite_buf = TRUE;
|
||||||
|
|
||||||
ibool srv_set_thread_priorities = TRUE;
|
ibool srv_set_thread_priorities = TRUE;
|
||||||
int srv_query_thread_priority = 0;
|
int srv_query_thread_priority = 0;
|
||||||
/*-------------------------------------------*/
|
/*-------------------------------------------*/
|
||||||
@ -109,6 +111,8 @@ ibool srv_print_buf_io = FALSE;
|
|||||||
ibool srv_print_log_io = FALSE;
|
ibool srv_print_log_io = FALSE;
|
||||||
ibool srv_print_latch_waits = FALSE;
|
ibool srv_print_latch_waits = FALSE;
|
||||||
|
|
||||||
|
ibool srv_print_innodb_monitor = FALSE;
|
||||||
|
|
||||||
/* The parameters below are obsolete: */
|
/* The parameters below are obsolete: */
|
||||||
|
|
||||||
ibool srv_print_parsed_sql = FALSE;
|
ibool srv_print_parsed_sql = FALSE;
|
||||||
@ -1492,7 +1496,6 @@ srv_init(void)
|
|||||||
slot = srv_mysql_table + i;
|
slot = srv_mysql_table + i;
|
||||||
slot->in_use = FALSE;
|
slot->in_use = FALSE;
|
||||||
slot->event = os_event_create(NULL);
|
slot->event = os_event_create(NULL);
|
||||||
slot->suspended = FALSE;
|
|
||||||
ut_a(slot->event);
|
ut_a(slot->event);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1661,7 +1664,6 @@ srv_suspend_mysql_thread(
|
|||||||
slot->thr = thr;
|
slot->thr = thr;
|
||||||
|
|
||||||
os_event_reset(event);
|
os_event_reset(event);
|
||||||
slot->suspended = TRUE;
|
|
||||||
|
|
||||||
slot->suspend_time = ut_time();
|
slot->suspend_time = ut_time();
|
||||||
|
|
||||||
@ -1693,27 +1695,6 @@ srv_suspend_mysql_thread(
|
|||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
os_event_t
|
|
||||||
srv_mysql_thread_event_get(void)
|
|
||||||
{
|
|
||||||
srv_slot_t* slot;
|
|
||||||
os_event_t event;
|
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
|
||||||
|
|
||||||
slot = srv_table_reserve_slot_for_mysql();
|
|
||||||
|
|
||||||
event = slot->event;
|
|
||||||
|
|
||||||
os_event_reset(event);
|
|
||||||
|
|
||||||
slot->suspended = TRUE;
|
|
||||||
|
|
||||||
mutex_exit(&kernel_mutex);
|
|
||||||
|
|
||||||
return(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
Releases a MySQL OS thread waiting for a lock to be released, if the
|
Releases a MySQL OS thread waiting for a lock to be released, if the
|
||||||
thread is already suspended. */
|
thread is already suspended. */
|
||||||
@ -1737,7 +1718,6 @@ srv_release_mysql_thread_if_suspended(
|
|||||||
/* Found */
|
/* Found */
|
||||||
|
|
||||||
os_event_set(slot->event);
|
os_event_set(slot->event);
|
||||||
slot->suspended = FALSE;
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1746,59 +1726,6 @@ srv_release_mysql_thread_if_suspended(
|
|||||||
/* not found */
|
/* not found */
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
srv_mysql_thread_release(void)
|
|
||||||
/*==========================*/
|
|
||||||
{
|
|
||||||
srv_slot_t* slot;
|
|
||||||
ulint i;
|
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
|
||||||
|
|
||||||
for (i = 0; i < OS_THREAD_MAX_N; i++) {
|
|
||||||
|
|
||||||
slot = srv_mysql_table + i;
|
|
||||||
|
|
||||||
if (slot->in_use && slot->suspended) {
|
|
||||||
/* Found */
|
|
||||||
slot->suspended = FALSE;
|
|
||||||
mutex_exit(&kernel_mutex);
|
|
||||||
|
|
||||||
os_event_set(slot->event);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ut_a(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
srv_mysql_thread_slot_free(
|
|
||||||
/*==========================*/
|
|
||||||
os_event_t event)
|
|
||||||
{
|
|
||||||
srv_slot_t* slot;
|
|
||||||
ulint i;
|
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
|
||||||
|
|
||||||
for (i = 0; i < OS_THREAD_MAX_N; i++) {
|
|
||||||
|
|
||||||
slot = srv_mysql_table + i;
|
|
||||||
|
|
||||||
if (slot->in_use && slot->event == event) {
|
|
||||||
/* Found */
|
|
||||||
slot->in_use = FALSE;
|
|
||||||
mutex_exit(&kernel_mutex);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ut_a(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
A thread which wakes up threads whose lock wait may have lasted too long. */
|
A thread which wakes up threads whose lock wait may have lasted too long. */
|
||||||
|
|
||||||
@ -1924,6 +1851,7 @@ srv_master_thread(
|
|||||||
ulint i;
|
ulint i;
|
||||||
time_t last_flush_time;
|
time_t last_flush_time;
|
||||||
time_t current_time;
|
time_t current_time;
|
||||||
|
time_t last_monitor_time;
|
||||||
|
|
||||||
UT_NOT_USED(arg);
|
UT_NOT_USED(arg);
|
||||||
|
|
||||||
@ -1936,6 +1864,8 @@ srv_master_thread(
|
|||||||
mutex_exit(&kernel_mutex);
|
mutex_exit(&kernel_mutex);
|
||||||
|
|
||||||
os_event_set(srv_sys->operational);
|
os_event_set(srv_sys->operational);
|
||||||
|
|
||||||
|
last_monitor_time = time(NULL);
|
||||||
loop:
|
loop:
|
||||||
mutex_enter(&kernel_mutex);
|
mutex_enter(&kernel_mutex);
|
||||||
|
|
||||||
@ -1975,8 +1905,18 @@ loop:
|
|||||||
while (n_pages_purged) {
|
while (n_pages_purged) {
|
||||||
/* TODO: replace this by a check if we are running
|
/* TODO: replace this by a check if we are running
|
||||||
out of file space! */
|
out of file space! */
|
||||||
|
if (srv_print_innodb_monitor) {
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
printf(" InnoDB starts purge\n");
|
||||||
|
}
|
||||||
|
|
||||||
n_pages_purged = trx_purge();
|
n_pages_purged = trx_purge();
|
||||||
|
|
||||||
|
if (srv_print_innodb_monitor) {
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
printf(" InnoDB purged %lu pages\n", n_pages_purged);
|
||||||
|
}
|
||||||
|
|
||||||
current_time = time(NULL);
|
current_time = time(NULL);
|
||||||
|
|
||||||
if (difftime(current_time, last_flush_time) > 1) {
|
if (difftime(current_time, last_flush_time) > 1) {
|
||||||
@ -1986,14 +1926,40 @@ loop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
background_loop:
|
background_loop:
|
||||||
/*
|
|
||||||
sync_array_print_info(sync_primary_wait_array);
|
|
||||||
os_aio_print();
|
|
||||||
buf_print_io();
|
|
||||||
*/
|
|
||||||
/* In this loop we run background operations while the server
|
/* In this loop we run background operations while the server
|
||||||
is quiet */
|
is quiet */
|
||||||
|
|
||||||
|
current_time = time(NULL);
|
||||||
|
|
||||||
|
if (srv_print_innodb_monitor
|
||||||
|
&& difftime(current_time, last_monitor_time) > 8) {
|
||||||
|
|
||||||
|
printf("================================\n");
|
||||||
|
last_monitor_time = time(NULL);
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
|
||||||
|
printf(" INNODB MONITOR OUTPUT\n"
|
||||||
|
"================================\n");
|
||||||
|
printf("--------------------------\n"
|
||||||
|
"LOCKS HELD BY TRANSACTIONS\n"
|
||||||
|
"--------------------------\n");
|
||||||
|
lock_print_info();
|
||||||
|
printf("-----------------------------------------------\n"
|
||||||
|
"CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n"
|
||||||
|
"-----------------------------------------------\n");
|
||||||
|
sync_print();
|
||||||
|
printf("CURRENT PENDING FILE I/O'S\n"
|
||||||
|
"--------------------------\n");
|
||||||
|
os_aio_print();
|
||||||
|
printf("-----------\n"
|
||||||
|
"BUFFER POOL\n"
|
||||||
|
"-----------\n");
|
||||||
|
buf_print_io();
|
||||||
|
printf("----------------------------\n"
|
||||||
|
"END OF INNODB MONITOR OUTPUT\n"
|
||||||
|
"============================\n");
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
mutex_enter(&kernel_mutex);
|
||||||
if (srv_activity_count != old_activity_count) {
|
if (srv_activity_count != old_activity_count) {
|
||||||
mutex_exit(&kernel_mutex);
|
mutex_exit(&kernel_mutex);
|
||||||
@ -2005,8 +1971,18 @@ background_loop:
|
|||||||
/* The server has been quiet for a while: start running background
|
/* The server has been quiet for a while: start running background
|
||||||
operations */
|
operations */
|
||||||
|
|
||||||
|
if (srv_print_innodb_monitor) {
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
printf(" InnoDB starts purge\n");
|
||||||
|
}
|
||||||
|
|
||||||
n_pages_purged = trx_purge();
|
n_pages_purged = trx_purge();
|
||||||
|
|
||||||
|
if (srv_print_innodb_monitor) {
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
printf(" InnoDB purged %lu pages\n", n_pages_purged);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
mutex_enter(&kernel_mutex);
|
||||||
if (srv_activity_count != old_activity_count) {
|
if (srv_activity_count != old_activity_count) {
|
||||||
mutex_exit(&kernel_mutex);
|
mutex_exit(&kernel_mutex);
|
||||||
@ -2014,8 +1990,18 @@ background_loop:
|
|||||||
}
|
}
|
||||||
mutex_exit(&kernel_mutex);
|
mutex_exit(&kernel_mutex);
|
||||||
|
|
||||||
|
if (srv_print_innodb_monitor) {
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
printf(" InnoDB starts insert buffer merge\n");
|
||||||
|
}
|
||||||
|
|
||||||
n_bytes_merged = ibuf_contract(TRUE);
|
n_bytes_merged = ibuf_contract(TRUE);
|
||||||
|
|
||||||
|
if (srv_print_innodb_monitor) {
|
||||||
|
ut_print_timestamp(stdout);
|
||||||
|
printf(" InnoDB merged %lu bytes\n", n_bytes_merged);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
mutex_enter(&kernel_mutex);
|
||||||
if (srv_activity_count != old_activity_count) {
|
if (srv_activity_count != old_activity_count) {
|
||||||
mutex_exit(&kernel_mutex);
|
mutex_exit(&kernel_mutex);
|
||||||
@ -2023,7 +2009,7 @@ background_loop:
|
|||||||
}
|
}
|
||||||
mutex_exit(&kernel_mutex);
|
mutex_exit(&kernel_mutex);
|
||||||
|
|
||||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 20, ut_dulint_max);
|
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
|
||||||
|
|
||||||
mutex_enter(&kernel_mutex);
|
mutex_enter(&kernel_mutex);
|
||||||
if (srv_activity_count != old_activity_count) {
|
if (srv_activity_count != old_activity_count) {
|
||||||
@ -2052,14 +2038,12 @@ background_loop:
|
|||||||
|
|
||||||
/* mem_print_new_info();
|
/* mem_print_new_info();
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
/* fsp_print(0); */
|
fsp_print(0);
|
||||||
|
fprintf(stderr, "Validating tablespace\n");
|
||||||
/* fprintf(stderr, "Validating tablespace\n");
|
|
||||||
fsp_validate(0);
|
fsp_validate(0);
|
||||||
fprintf(stderr, "Validation ok\n");
|
fprintf(stderr, "Validation ok\n");
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef UNIV_SEARCH_PERF_STAT
|
#ifdef UNIV_SEARCH_PERF_STAT
|
||||||
/* btr_search_print_info(); */
|
/* btr_search_print_info(); */
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/************************************************************************
|
/************************************************************************
|
||||||
Starts the InnoDB database server
|
Starts the InnoDB database server
|
||||||
|
|
||||||
(c) 1996-2000 InnoDB Oy
|
(c) 1996-2000 Innobase Oy
|
||||||
|
|
||||||
Created 2/16/1996 Heikki Tuuri
|
Created 2/16/1996 Heikki Tuuri
|
||||||
*************************************************************************/
|
*************************************************************************/
|
||||||
@ -203,8 +203,8 @@ open_or_create_log_file(
|
|||||||
|
|
||||||
sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", i);
|
sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", i);
|
||||||
|
|
||||||
files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL, &ret);
|
files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
|
||||||
|
OS_LOG_FILE, &ret);
|
||||||
if (ret == FALSE) {
|
if (ret == FALSE) {
|
||||||
if (os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) {
|
if (os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
@ -214,7 +214,8 @@ open_or_create_log_file(
|
|||||||
}
|
}
|
||||||
|
|
||||||
files[i] = os_file_create(
|
files[i] = os_file_create(
|
||||||
name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
|
name, OS_FILE_OPEN, OS_FILE_AIO,
|
||||||
|
OS_LOG_FILE, &ret);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Error in opening %s\n", name);
|
"InnoDB: Error in opening %s\n", name);
|
||||||
@ -239,7 +240,7 @@ open_or_create_log_file(
|
|||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Log file %s did not exist: new to be created\n",
|
"InnoDB: Log file %s did not exist: new to be created\n",
|
||||||
name);
|
name);
|
||||||
printf("InnoDB: Setting log file %s size to %lu\n",
|
fprintf(stderr, "InnoDB: Setting log file %s size to %lu\n",
|
||||||
name, UNIV_PAGE_SIZE * srv_log_file_size);
|
name, UNIV_PAGE_SIZE * srv_log_file_size);
|
||||||
|
|
||||||
ret = os_file_set_size(name, files[i],
|
ret = os_file_set_size(name, files[i],
|
||||||
@ -330,27 +331,28 @@ open_or_create_data_files(
|
|||||||
|
|
||||||
sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
|
sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
|
||||||
|
|
||||||
if (srv_data_file_is_raw_partition[i] == 0) {
|
files[i] = os_file_create(name, OS_FILE_CREATE,
|
||||||
|
OS_FILE_NORMAL, OS_DATA_FILE, &ret);
|
||||||
|
|
||||||
files[i] = os_file_create(name, OS_FILE_CREATE,
|
if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
|
||||||
OS_FILE_NORMAL, &ret);
|
/* The partition is opened, not created; then it is
|
||||||
} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
|
written over */
|
||||||
ret = FALSE;
|
|
||||||
} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
|
|
||||||
|
|
||||||
files[i] = os_file_create(
|
files[i] = os_file_create(
|
||||||
name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
|
name, OS_FILE_OPEN, OS_FILE_NORMAL,
|
||||||
|
OS_DATA_FILE, &ret);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Error in opening %s\n", name);
|
"InnoDB: Error in opening %s\n", name);
|
||||||
|
|
||||||
return(DB_ERROR);
|
return(DB_ERROR);
|
||||||
}
|
}
|
||||||
|
} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
|
||||||
|
ret = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == FALSE) {
|
if (ret == FALSE) {
|
||||||
if (srv_data_file_is_raw_partition[i] == 0
|
if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW
|
||||||
&& os_file_get_last_error() !=
|
&& os_file_get_last_error() !=
|
||||||
OS_FILE_ALREADY_EXISTS) {
|
OS_FILE_ALREADY_EXISTS) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
@ -370,8 +372,8 @@ open_or_create_data_files(
|
|||||||
}
|
}
|
||||||
|
|
||||||
files[i] = os_file_create(
|
files[i] = os_file_create(
|
||||||
name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
|
name, OS_FILE_OPEN, OS_FILE_NORMAL,
|
||||||
|
OS_DATA_FILE, &ret);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Error in opening %s\n", name);
|
"InnoDB: Error in opening %s\n", name);
|
||||||
@ -379,18 +381,21 @@ open_or_create_data_files(
|
|||||||
return(DB_ERROR);
|
return(DB_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = os_file_get_size(files[i], &size, &size_high);
|
if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW) {
|
||||||
ut_a(ret);
|
|
||||||
|
ret = os_file_get_size(files[i], &size,
|
||||||
|
&size_high);
|
||||||
|
ut_a(ret);
|
||||||
|
|
||||||
if (srv_data_file_is_raw_partition[i] == 0
|
if (size !=
|
||||||
&& (size != UNIV_PAGE_SIZE * srv_data_file_sizes[i]
|
UNIV_PAGE_SIZE * srv_data_file_sizes[i]
|
||||||
|| size_high != 0)) {
|
|| size_high != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
fprintf(stderr,
|
|
||||||
"InnoDB: Error: data file %s is of different size\n"
|
"InnoDB: Error: data file %s is of different size\n"
|
||||||
"InnoDB: than specified in the .cnf file!\n", name);
|
"InnoDB: than specified in the .cnf file!\n", name);
|
||||||
|
|
||||||
return(DB_ERROR);
|
return(DB_ERROR);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fil_read_flushed_lsn_and_arch_log_no(files[i],
|
fil_read_flushed_lsn_and_arch_log_no(files[i],
|
||||||
@ -403,7 +408,8 @@ open_or_create_data_files(
|
|||||||
|
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Data file %s did not exist: new to be created\n", name);
|
"InnoDB: Data file %s did not exist: new to be created\n",
|
||||||
|
name);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: The first specified data file %s did not exist:\n"
|
"InnoDB: The first specified data file %s did not exist:\n"
|
||||||
@ -411,10 +417,10 @@ open_or_create_data_files(
|
|||||||
*create_new_db = TRUE;
|
*create_new_db = TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("InnoDB: Setting file %s size to %lu\n",
|
fprintf(stderr, "InnoDB: Setting file %s size to %lu\n",
|
||||||
name, UNIV_PAGE_SIZE * srv_data_file_sizes[i]);
|
name, UNIV_PAGE_SIZE * srv_data_file_sizes[i]);
|
||||||
|
|
||||||
printf(
|
fprintf(stderr,
|
||||||
"InnoDB: Database physically writes the file full: wait...\n");
|
"InnoDB: Database physically writes the file full: wait...\n");
|
||||||
|
|
||||||
ret = os_file_set_size(name, files[i],
|
ret = os_file_set_size(name, files[i],
|
||||||
@ -555,19 +561,22 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
srv_startup_is_before_trx_rollback_phase = TRUE;
|
srv_startup_is_before_trx_rollback_phase = TRUE;
|
||||||
|
|
||||||
if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) {
|
if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) {
|
||||||
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
|
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
|
||||||
|
|
||||||
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "O_DSYNC")) {
|
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "O_DSYNC")) {
|
||||||
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
|
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
|
||||||
|
|
||||||
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
|
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
|
||||||
"littlesync")) {
|
"littlesync")) {
|
||||||
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
|
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
|
||||||
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) {
|
|
||||||
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr,
|
|
||||||
"InnoDB: Unrecognized value for innodb_unix_file_flush_method\n");
|
|
||||||
|
|
||||||
return(DB_ERROR);
|
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) {
|
||||||
|
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Unrecognized value %s for innodb_flush_method\n",
|
||||||
|
srv_unix_file_flush_method_str);
|
||||||
|
return(DB_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -593,14 +602,15 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
#ifdef __WIN__
|
#ifdef __WIN__
|
||||||
if (os_get_os_version() == OS_WIN95
|
if (os_get_os_version() == OS_WIN95
|
||||||
|| os_get_os_version() == OS_WIN31) {
|
|| os_get_os_version() == OS_WIN31) {
|
||||||
/* On Win 95, 98, ME, and Win32 subsystem for Windows 3.1 use
|
|
||||||
simulated aio */
|
|
||||||
|
|
||||||
os_aio_use_native_aio = FALSE;
|
/* On Win 95, 98, ME, and Win32 subsystem for Windows 3.1 use
|
||||||
srv_n_file_io_threads = 4;
|
simulated aio */
|
||||||
|
|
||||||
|
os_aio_use_native_aio = FALSE;
|
||||||
|
srv_n_file_io_threads = 4;
|
||||||
} else {
|
} else {
|
||||||
/* On NT and Win 2000 always use aio */
|
/* On NT and Win 2000 always use aio */
|
||||||
os_aio_use_native_aio = TRUE;
|
os_aio_use_native_aio = TRUE;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (!os_aio_use_native_aio) {
|
if (!os_aio_use_native_aio) {
|
||||||
@ -652,14 +662,21 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
sum_of_new_sizes = 0;
|
sum_of_new_sizes = 0;
|
||||||
|
|
||||||
for (i = 0; i < srv_n_data_files; i++) {
|
for (i = 0; i < srv_n_data_files; i++) {
|
||||||
sum_of_new_sizes += srv_data_file_sizes[i];
|
if (srv_data_file_sizes[i] >= 262144) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Error: file size must be < 4 GB, or on some OS's < 2 GB\n");
|
||||||
|
|
||||||
|
return(DB_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
sum_of_new_sizes += srv_data_file_sizes[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sum_of_new_sizes < 640) {
|
if (sum_of_new_sizes < 640) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"InnoDB: Error: tablespace size must be at least 10 MB\n");
|
"InnoDB: Error: tablespace size must be at least 10 MB\n");
|
||||||
|
|
||||||
return(DB_ERROR);
|
return(DB_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = open_or_create_data_files(&create_new_db,
|
err = open_or_create_data_files(&create_new_db,
|
||||||
@ -673,6 +690,15 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
return((int) err);
|
return((int) err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!create_new_db) {
|
||||||
|
/* If we are using the doublewrite method, we will
|
||||||
|
check if there are half-written pages in data files,
|
||||||
|
and restore them from the doublewrite buffer if
|
||||||
|
possible */
|
||||||
|
|
||||||
|
trx_sys_doublewrite_restore_corrupt_pages();
|
||||||
|
}
|
||||||
|
|
||||||
srv_normalize_path_for_win(srv_arch_dir);
|
srv_normalize_path_for_win(srv_arch_dir);
|
||||||
srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
|
srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
|
||||||
|
|
||||||
@ -742,7 +768,6 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
mutex_exit(&(log_sys->mutex));
|
mutex_exit(&(log_sys->mutex));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* mutex_create(&row_mysql_thread_mutex); */
|
|
||||||
sess_sys_init_at_db_start();
|
sess_sys_init_at_db_start();
|
||||||
|
|
||||||
if (create_new_db) {
|
if (create_new_db) {
|
||||||
@ -834,7 +859,7 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (srv_measure_contention) {
|
if (srv_measure_contention) {
|
||||||
/* os_thread_create(&test_measure_cont, NULL, thread_ids +
|
/* os_thread_create(&test_measure_cont, NULL, thread_ids +
|
||||||
SRV_MAX_N_IO_THREADS); */
|
SRV_MAX_N_IO_THREADS); */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -849,16 +874,20 @@ innobase_start_or_create_for_mysql(void)
|
|||||||
/* Create the thread which watches the timeouts for lock waits */
|
/* Create the thread which watches the timeouts for lock waits */
|
||||||
os_thread_create(&srv_lock_timeout_monitor_thread, NULL,
|
os_thread_create(&srv_lock_timeout_monitor_thread, NULL,
|
||||||
thread_ids + 2 + SRV_MAX_N_IO_THREADS);
|
thread_ids + 2 + SRV_MAX_N_IO_THREADS);
|
||||||
ut_print_timestamp(stderr);
|
|
||||||
fprintf(stderr, " InnoDB: Started\n");
|
|
||||||
|
|
||||||
srv_was_started = TRUE;
|
srv_was_started = TRUE;
|
||||||
srv_is_being_started = FALSE;
|
srv_is_being_started = FALSE;
|
||||||
|
|
||||||
sync_order_checks_on = TRUE;
|
sync_order_checks_on = TRUE;
|
||||||
|
|
||||||
|
if (srv_use_doublewrite_buf && trx_doublewrite == NULL) {
|
||||||
|
trx_sys_create_doublewrite_buf();
|
||||||
|
}
|
||||||
|
|
||||||
/* buf_debug_prints = TRUE; */
|
/* buf_debug_prints = TRUE; */
|
||||||
|
|
||||||
|
ut_print_timestamp(stderr);
|
||||||
|
fprintf(stderr, " InnoDB: Started\n");
|
||||||
|
|
||||||
return((int) DB_SUCCESS);
|
return((int) DB_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -810,11 +810,10 @@ rw_lock_print(
|
|||||||
ulint count = 0;
|
ulint count = 0;
|
||||||
rw_lock_debug_t* info;
|
rw_lock_debug_t* info;
|
||||||
|
|
||||||
printf("----------------------------------------------\n");
|
printf("-------------------------------------------------\n");
|
||||||
printf("RW-LOCK INFO\n");
|
printf("RW-LOCK INFO\n");
|
||||||
printf("RW-LOCK: %lx ", (ulint)lock);
|
printf("RW-LOCK: %lx ", (ulint)lock);
|
||||||
|
|
||||||
mutex_enter(&(lock->mutex));
|
|
||||||
if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
|
if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
|
||||||
|| (rw_lock_get_reader_count(lock) != 0)
|
|| (rw_lock_get_reader_count(lock) != 0)
|
||||||
|| (rw_lock_get_waiters(lock) != 0)) {
|
|| (rw_lock_get_waiters(lock) != 0)) {
|
||||||
@ -831,8 +830,6 @@ rw_lock_print(
|
|||||||
info = UT_LIST_GET_NEXT(list, info);
|
info = UT_LIST_GET_NEXT(list, info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_exit(&(lock->mutex));
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ struct sync_thread_struct{
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Number of slots reserved for each OS thread in the sync level array */
|
/* Number of slots reserved for each OS thread in the sync level array */
|
||||||
#define SYNC_THREAD_N_LEVELS 256
|
#define SYNC_THREAD_N_LEVELS 10000
|
||||||
|
|
||||||
struct sync_level_struct{
|
struct sync_level_struct{
|
||||||
void* latch; /* pointer to a mutex or an rw-lock; NULL means that
|
void* latch; /* pointer to a mutex or an rw-lock; NULL means that
|
||||||
@ -768,6 +768,9 @@ sync_thread_levels_g(
|
|||||||
thread */
|
thread */
|
||||||
ulint limit) /* in: level limit */
|
ulint limit) /* in: level limit */
|
||||||
{
|
{
|
||||||
|
char* file_name;
|
||||||
|
ulint line;
|
||||||
|
ulint thread_id;
|
||||||
sync_level_t* slot;
|
sync_level_t* slot;
|
||||||
rw_lock_t* lock;
|
rw_lock_t* lock;
|
||||||
mutex_t* mutex;
|
mutex_t* mutex;
|
||||||
@ -783,8 +786,29 @@ sync_thread_levels_g(
|
|||||||
lock = slot->latch;
|
lock = slot->latch;
|
||||||
mutex = slot->latch;
|
mutex = slot->latch;
|
||||||
|
|
||||||
ut_error;
|
printf(
|
||||||
|
"InnoDB error: sync levels should be > %lu but a level is %lu\n",
|
||||||
|
limit, slot->level);
|
||||||
|
|
||||||
|
if (mutex->magic_n == MUTEX_MAGIC_N) {
|
||||||
|
printf("Mutex created at %s %lu\n", &(mutex->cfile_name),
|
||||||
|
mutex->cline);
|
||||||
|
|
||||||
|
if (mutex_get_lock_word(mutex) != 0) {
|
||||||
|
|
||||||
|
mutex_get_debug_info(mutex,
|
||||||
|
&file_name, &line, &thread_id);
|
||||||
|
|
||||||
|
printf("InnoDB: Locked mutex: addr %lx thread %ld file %s line %ld\n",
|
||||||
|
(ulint)mutex, thread_id,
|
||||||
|
file_name, line);
|
||||||
|
} else {
|
||||||
|
printf("Not locked\n");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rw_lock_print(lock);
|
||||||
|
}
|
||||||
|
|
||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -973,6 +997,8 @@ sync_thread_add_level(
|
|||||||
ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
|
ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
|
||||||
} else if (level == SYNC_TRX_SYS_HEADER) {
|
} else if (level == SYNC_TRX_SYS_HEADER) {
|
||||||
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL));
|
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL));
|
||||||
|
} else if (level == SYNC_DOUBLEWRITE) {
|
||||||
|
ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
|
||||||
} else if (level == SYNC_BUF_BLOCK) {
|
} else if (level == SYNC_BUF_BLOCK) {
|
||||||
ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
|
ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
|
||||||
&& sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
|
&& sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
|
||||||
@ -1000,6 +1026,8 @@ sync_thread_add_level(
|
|||||||
} else if (level == SYNC_FSP) {
|
} else if (level == SYNC_FSP) {
|
||||||
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
|
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
|
||||||
|| sync_thread_levels_g(array, SYNC_FSP));
|
|| sync_thread_levels_g(array, SYNC_FSP));
|
||||||
|
} else if (level == SYNC_EXTERN_STORAGE) {
|
||||||
|
ut_a(TRUE);
|
||||||
} else if (level == SYNC_TRX_UNDO_PAGE) {
|
} else if (level == SYNC_TRX_UNDO_PAGE) {
|
||||||
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|
||||||
|| sync_thread_levels_contain(array, SYNC_RSEG)
|
|| sync_thread_levels_contain(array, SYNC_RSEG)
|
||||||
@ -1221,10 +1249,10 @@ void
|
|||||||
sync_print(void)
|
sync_print(void)
|
||||||
/*============*/
|
/*============*/
|
||||||
{
|
{
|
||||||
printf("SYNC INFO:------------------------------------------\n");
|
printf("SYNC INFO:\n");
|
||||||
mutex_list_print_info();
|
mutex_list_print_info();
|
||||||
rw_lock_list_print_info();
|
rw_lock_list_print_info();
|
||||||
sync_array_print_info(sync_primary_wait_array);
|
sync_array_print_info(sync_primary_wait_array);
|
||||||
sync_print_wait_info();
|
sync_print_wait_info();
|
||||||
printf("----------------------------------------------------\n");
|
printf("-----------------------------------------------------\n");
|
||||||
}
|
}
|
||||||
|
@ -692,6 +692,9 @@ trx_purge_choose_next_log(void)
|
|||||||
min_rseg = rseg;
|
min_rseg = rseg;
|
||||||
min_trx_no = rseg->last_trx_no;
|
min_trx_no = rseg->last_trx_no;
|
||||||
space = rseg->space;
|
space = rseg->space;
|
||||||
|
ut_a(space == 0); /* We assume in purge of
|
||||||
|
externally stored fields
|
||||||
|
that space id == 0 */
|
||||||
page_no = rseg->last_page_no;
|
page_no = rseg->last_page_no;
|
||||||
offset = rseg->last_offset;
|
offset = rseg->last_offset;
|
||||||
}
|
}
|
||||||
@ -820,6 +823,10 @@ trx_purge_get_next_rec(
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
|
cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
|
||||||
|
|
||||||
|
if (trx_undo_rec_get_extern_storage(rec2)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if ((type == TRX_UNDO_UPD_EXIST_REC)
|
if ((type == TRX_UNDO_UPD_EXIST_REC)
|
||||||
&& !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
|
&& !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
|
||||||
|
@ -292,6 +292,8 @@ trx_undo_rec_get_pars(
|
|||||||
TRX_UNDO_INSERT_REC, ... */
|
TRX_UNDO_INSERT_REC, ... */
|
||||||
ulint* cmpl_info, /* out: compiler info, relevant only
|
ulint* cmpl_info, /* out: compiler info, relevant only
|
||||||
for update type records */
|
for update type records */
|
||||||
|
ibool* updated_extern, /* out: TRUE if we updated an
|
||||||
|
externally stored fild */
|
||||||
dulint* undo_no, /* out: undo log record number */
|
dulint* undo_no, /* out: undo log record number */
|
||||||
dulint* table_id) /* out: table id */
|
dulint* table_id) /* out: table id */
|
||||||
{
|
{
|
||||||
@ -303,7 +305,14 @@ trx_undo_rec_get_pars(
|
|||||||
|
|
||||||
type_cmpl = mach_read_from_1(ptr);
|
type_cmpl = mach_read_from_1(ptr);
|
||||||
ptr++;
|
ptr++;
|
||||||
|
|
||||||
|
if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
|
||||||
|
*updated_extern = TRUE;
|
||||||
|
type_cmpl -= TRX_UNDO_UPD_EXTERN;
|
||||||
|
} else {
|
||||||
|
*updated_extern = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
|
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
|
||||||
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
|
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
|
||||||
|
|
||||||
@ -336,7 +345,11 @@ trx_undo_rec_get_col_val(
|
|||||||
*field = ptr;
|
*field = ptr;
|
||||||
|
|
||||||
if (*len != UNIV_SQL_NULL) {
|
if (*len != UNIV_SQL_NULL) {
|
||||||
ptr += *len;
|
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
|
||||||
|
ptr += (*len - UNIV_EXTERN_STORAGE_FIELD);
|
||||||
|
} else {
|
||||||
|
ptr += *len;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return(ptr);
|
return(ptr);
|
||||||
@ -452,6 +465,7 @@ trx_undo_page_report_modify(
|
|||||||
ulint col_no;
|
ulint col_no;
|
||||||
byte* old_ptr;
|
byte* old_ptr;
|
||||||
ulint type_cmpl;
|
ulint type_cmpl;
|
||||||
|
byte* type_cmpl_ptr;
|
||||||
ulint i;
|
ulint i;
|
||||||
|
|
||||||
ut_ad(index->type & DICT_CLUSTERED);
|
ut_ad(index->type & DICT_CLUSTERED);
|
||||||
@ -491,6 +505,8 @@ trx_undo_page_report_modify(
|
|||||||
|
|
||||||
mach_write_to_1(ptr, type_cmpl);
|
mach_write_to_1(ptr, type_cmpl);
|
||||||
|
|
||||||
|
type_cmpl_ptr = ptr;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
||||||
ptr += len;
|
ptr += len;
|
||||||
@ -577,7 +593,23 @@ trx_undo_page_report_modify(
|
|||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
len = mach_write_compressed(ptr, flen);
|
if (rec_get_nth_field_extern_bit(rec, pos)) {
|
||||||
|
/* If a field has external storage, we add to
|
||||||
|
flen the flag */
|
||||||
|
|
||||||
|
len = mach_write_compressed(ptr,
|
||||||
|
UNIV_EXTERN_STORAGE_FIELD + flen);
|
||||||
|
|
||||||
|
/* Notify purge that it eventually has to free the old
|
||||||
|
externally stored field */
|
||||||
|
|
||||||
|
(trx->update_undo)->del_marks = TRUE;
|
||||||
|
|
||||||
|
*type_cmpl_ptr = *type_cmpl_ptr | TRX_UNDO_UPD_EXTERN;
|
||||||
|
} else {
|
||||||
|
len = mach_write_compressed(ptr, flen);
|
||||||
|
}
|
||||||
|
|
||||||
ptr += len;
|
ptr += len;
|
||||||
|
|
||||||
if (flen != UNIV_SQL_NULL) {
|
if (flen != UNIV_SQL_NULL) {
|
||||||
@ -825,6 +857,13 @@ trx_undo_update_rec_get_update(
|
|||||||
|
|
||||||
upd_field_set_field_no(upd_field, field_no, index);
|
upd_field_set_field_no(upd_field, field_no, index);
|
||||||
|
|
||||||
|
if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) {
|
||||||
|
|
||||||
|
upd_field->extern_storage = TRUE;
|
||||||
|
|
||||||
|
len -= UNIV_EXTERN_STORAGE_FIELD;
|
||||||
|
}
|
||||||
|
|
||||||
dfield_set_data(&(upd_field->new_val), field, len);
|
dfield_set_data(&(upd_field->new_val), field, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1222,8 +1261,10 @@ trx_undo_prev_version_build(
|
|||||||
byte* ptr;
|
byte* ptr;
|
||||||
ulint info_bits;
|
ulint info_bits;
|
||||||
ulint cmpl_info;
|
ulint cmpl_info;
|
||||||
|
ibool dummy_extern;
|
||||||
byte* buf;
|
byte* buf;
|
||||||
ulint err;
|
ulint err;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
|
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
|
||||||
ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
|
ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
|
||||||
@ -1252,8 +1293,9 @@ trx_undo_prev_version_build(
|
|||||||
return(err);
|
return(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &undo_no,
|
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
|
||||||
&table_id);
|
&dummy_extern, &undo_no, &table_id);
|
||||||
|
|
||||||
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
|
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
|
||||||
&info_bits);
|
&info_bits);
|
||||||
ptr = trx_undo_rec_skip_row_ref(ptr, index);
|
ptr = trx_undo_rec_skip_row_ref(ptr, index);
|
||||||
@ -1278,5 +1320,15 @@ trx_undo_prev_version_build(
|
|||||||
row_upd_rec_in_place(*old_vers, update);
|
row_upd_rec_in_place(*old_vers, update);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < upd_get_n_fields(update); i++) {
|
||||||
|
|
||||||
|
if (upd_get_nth_field(update, i)->extern_storage) {
|
||||||
|
|
||||||
|
rec_set_nth_field_extern_bit(*old_vers,
|
||||||
|
upd_get_nth_field(update, i)->field_no,
|
||||||
|
TRUE, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return(DB_SUCCESS);
|
return(DB_SUCCESS);
|
||||||
}
|
}
|
||||||
|
@ -19,9 +19,326 @@ Created 3/26/1996 Heikki Tuuri
|
|||||||
#include "trx0undo.h"
|
#include "trx0undo.h"
|
||||||
#include "srv0srv.h"
|
#include "srv0srv.h"
|
||||||
#include "trx0purge.h"
|
#include "trx0purge.h"
|
||||||
|
#include "log0log.h"
|
||||||
|
|
||||||
/* The transaction system */
|
/* The transaction system */
|
||||||
trx_sys_t* trx_sys = NULL;
|
trx_sys_t* trx_sys = NULL;
|
||||||
|
trx_doublewrite_t* trx_doublewrite = NULL;
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Creates or initialializes the doublewrite buffer at a database start. */
|
||||||
|
static
|
||||||
|
void
|
||||||
|
trx_doublewrite_init(
|
||||||
|
/*=================*/
|
||||||
|
byte* doublewrite) /* in: pointer to the doublewrite buf
|
||||||
|
header on trx sys page */
|
||||||
|
{
|
||||||
|
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
|
||||||
|
|
||||||
|
mutex_create(&(trx_doublewrite->mutex));
|
||||||
|
mutex_set_level(&(trx_doublewrite->mutex), SYNC_DOUBLEWRITE);
|
||||||
|
|
||||||
|
trx_doublewrite->first_free = 0;
|
||||||
|
|
||||||
|
trx_doublewrite->block1 = mach_read_from_4(
|
||||||
|
doublewrite
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK1);
|
||||||
|
trx_doublewrite->block2 = mach_read_from_4(
|
||||||
|
doublewrite
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK2);
|
||||||
|
trx_doublewrite->write_buf_unaligned =
|
||||||
|
ut_malloc(
|
||||||
|
(1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|
||||||
|
* UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
|
trx_doublewrite->write_buf = ut_align(
|
||||||
|
trx_doublewrite->write_buf_unaligned,
|
||||||
|
UNIV_PAGE_SIZE);
|
||||||
|
trx_doublewrite->buf_block_arr = mem_alloc(
|
||||||
|
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
|
||||||
|
* sizeof(void*));
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Creates the doublewrite buffer at a database start. The header of the
|
||||||
|
doublewrite buffer is placed on the trx system header page. */
|
||||||
|
|
||||||
|
void
|
||||||
|
trx_sys_create_doublewrite_buf(void)
|
||||||
|
/*================================*/
|
||||||
|
{
|
||||||
|
page_t* page;
|
||||||
|
page_t* page2;
|
||||||
|
page_t* new_page;
|
||||||
|
byte* doublewrite;
|
||||||
|
byte* fseg_header;
|
||||||
|
ulint page_no;
|
||||||
|
ulint prev_page_no;
|
||||||
|
ulint i;
|
||||||
|
mtr_t mtr;
|
||||||
|
|
||||||
|
if (trx_doublewrite) {
|
||||||
|
/* Already inited */
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
start_again:
|
||||||
|
mtr_start(&mtr);
|
||||||
|
|
||||||
|
page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
|
||||||
|
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
|
||||||
|
|
||||||
|
doublewrite = page + TRX_SYS_DOUBLEWRITE;
|
||||||
|
|
||||||
|
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
|
||||||
|
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
|
||||||
|
|
||||||
|
/* The doublewrite buffer has already been created:
|
||||||
|
just read in some numbers */
|
||||||
|
|
||||||
|
trx_doublewrite_init(doublewrite);
|
||||||
|
|
||||||
|
mtr_commit(&mtr);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Doublewrite buffer not found: creating new\n");
|
||||||
|
|
||||||
|
if (buf_pool_get_curr_size() <
|
||||||
|
(2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
|
||||||
|
+ FSP_EXTENT_SIZE / 2 + 100)
|
||||||
|
* UNIV_PAGE_SIZE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Cannot create doublewrite buffer: you must\n"
|
||||||
|
"InnoDB: increase your buffer pool size.\n"
|
||||||
|
"InnoDB: Cannot continue operation.\n");
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
|
||||||
|
TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
|
||||||
|
|
||||||
|
/* fseg_create acquires a second latch on the page,
|
||||||
|
therefore we must declare it: */
|
||||||
|
|
||||||
|
buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
|
||||||
|
|
||||||
|
if (page2 == NULL) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Cannot create doublewrite buffer: you must\n"
|
||||||
|
"InnoDB: increase your tablespace size.\n"
|
||||||
|
"InnoDB: Cannot continue operation.\n");
|
||||||
|
|
||||||
|
/* We exit without committing the mtr to prevent
|
||||||
|
its modifications to the database getting to disk */
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fseg_header = page + TRX_SYS_DOUBLEWRITE
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_FSEG;
|
||||||
|
prev_page_no = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
|
||||||
|
+ FSP_EXTENT_SIZE / 2; i++) {
|
||||||
|
page_no = fseg_alloc_free_page(fseg_header,
|
||||||
|
prev_page_no + 1,
|
||||||
|
FSP_UP, &mtr);
|
||||||
|
if (page_no == FIL_NULL) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Cannot create doublewrite buffer: you must\n"
|
||||||
|
"InnoDB: increase your tablespace size.\n"
|
||||||
|
"InnoDB: Cannot continue operation.\n");
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We read the allocated pages to the buffer pool;
|
||||||
|
when they are written to disk in a flush, the space
|
||||||
|
id and page number fields are also written to the
|
||||||
|
pages. When we at database startup read pages
|
||||||
|
from the doublewrite buffer, we know that if the
|
||||||
|
space id and page number in them are the same as
|
||||||
|
the page position in the tablespace, then the page
|
||||||
|
has not been written to in doublewrite. */
|
||||||
|
|
||||||
|
new_page = buf_page_get(TRX_SYS_SPACE, page_no,
|
||||||
|
RW_X_LATCH, &mtr);
|
||||||
|
buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
|
||||||
|
|
||||||
|
/* Make a dummy change to the page to ensure it will
|
||||||
|
be written to disk in a flush */
|
||||||
|
|
||||||
|
mlog_write_ulint(new_page + FIL_PAGE_DATA,
|
||||||
|
TRX_SYS_DOUBLEWRITE_MAGIC_N,
|
||||||
|
MLOG_4BYTES, &mtr);
|
||||||
|
|
||||||
|
if (i == FSP_EXTENT_SIZE / 2) {
|
||||||
|
mlog_write_ulint(doublewrite
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
|
||||||
|
page_no, MLOG_4BYTES, &mtr);
|
||||||
|
mlog_write_ulint(doublewrite
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_REPEAT
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
|
||||||
|
page_no, MLOG_4BYTES, &mtr);
|
||||||
|
} else if (i == FSP_EXTENT_SIZE / 2
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
|
||||||
|
mlog_write_ulint(doublewrite
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
|
||||||
|
page_no, MLOG_4BYTES, &mtr);
|
||||||
|
mlog_write_ulint(doublewrite
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_REPEAT
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
|
||||||
|
page_no, MLOG_4BYTES, &mtr);
|
||||||
|
} else if (i > FSP_EXTENT_SIZE / 2) {
|
||||||
|
ut_a(page_no == prev_page_no + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
prev_page_no = page_no;
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
|
||||||
|
TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
|
||||||
|
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
|
||||||
|
+ TRX_SYS_DOUBLEWRITE_REPEAT,
|
||||||
|
TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
|
||||||
|
mtr_commit(&mtr);
|
||||||
|
|
||||||
|
/* Flush the modified pages to disk and make a checkpoint */
|
||||||
|
log_make_checkpoint_at(ut_dulint_max, TRUE);
|
||||||
|
|
||||||
|
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
|
||||||
|
|
||||||
|
goto start_again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
At a database startup uses a possible doublewrite buffer to restore
|
||||||
|
half-written pages in the data files. */
|
||||||
|
|
||||||
|
void
|
||||||
|
trx_sys_doublewrite_restore_corrupt_pages(void)
|
||||||
|
/*===========================================*/
|
||||||
|
{
|
||||||
|
byte* buf;
|
||||||
|
byte* read_buf;
|
||||||
|
byte* unaligned_read_buf;
|
||||||
|
ulint block1;
|
||||||
|
ulint block2;
|
||||||
|
byte* page;
|
||||||
|
byte* doublewrite;
|
||||||
|
ulint space_id;
|
||||||
|
ulint page_no;
|
||||||
|
ulint i;
|
||||||
|
|
||||||
|
/* We do the file i/o past the buffer pool */
|
||||||
|
|
||||||
|
unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
|
||||||
|
read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
|
||||||
|
|
||||||
|
/* Read the trx sys header to check if we are using the
|
||||||
|
doublewrite buffer */
|
||||||
|
|
||||||
|
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
|
||||||
|
UNIV_PAGE_SIZE, read_buf, NULL);
|
||||||
|
|
||||||
|
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
|
||||||
|
|
||||||
|
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
|
||||||
|
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
|
||||||
|
/* The doublewrite buffer has been created */
|
||||||
|
|
||||||
|
trx_doublewrite_init(doublewrite);
|
||||||
|
|
||||||
|
block1 = trx_doublewrite->block1;
|
||||||
|
block2 = trx_doublewrite->block2;
|
||||||
|
|
||||||
|
buf = trx_doublewrite->write_buf;
|
||||||
|
} else {
|
||||||
|
goto leave_func;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read the pages from the doublewrite buffer to memory */
|
||||||
|
|
||||||
|
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
|
||||||
|
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
|
||||||
|
buf, NULL);
|
||||||
|
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
|
||||||
|
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
|
||||||
|
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
|
||||||
|
NULL);
|
||||||
|
/* Check if any of these pages is half-written in data files, in the
|
||||||
|
intended position */
|
||||||
|
|
||||||
|
page = buf;
|
||||||
|
|
||||||
|
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
|
||||||
|
|
||||||
|
space_id = mach_read_from_4(page + FIL_PAGE_SPACE);
|
||||||
|
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
|
||||||
|
|
||||||
|
if (!fil_check_adress_in_tablespace(space_id, page_no)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Warning: an inconsistent page in the doublewrite buffer\n"
|
||||||
|
"InnoDB: space id %lu page number %lu, %lu'th page in dblwr buf.\n",
|
||||||
|
space_id, page_no, i);
|
||||||
|
|
||||||
|
} else if (space_id == TRX_SYS_SPACE
|
||||||
|
&& ( (page_no >= block1
|
||||||
|
&& page_no
|
||||||
|
< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|
||||||
|
|| (page_no >= block2
|
||||||
|
&& page_no
|
||||||
|
< block2 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
|
||||||
|
|
||||||
|
/* It is an unwritten doublewrite buffer page:
|
||||||
|
do nothing */
|
||||||
|
|
||||||
|
} else {
|
||||||
|
/* Read in the actual page from the data files */
|
||||||
|
|
||||||
|
fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
|
||||||
|
UNIV_PAGE_SIZE, read_buf, NULL);
|
||||||
|
/* Check if the page is corrupt */
|
||||||
|
|
||||||
|
if (buf_page_is_corrupted(read_buf)) {
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Warning: database page corruption or a failed\n"
|
||||||
|
"InnoDB: file read of page %lu.\n", page_no);
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Trying to recover it from the doublewrite buffer.\n");
|
||||||
|
|
||||||
|
if (buf_page_is_corrupted(page)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Also the page in the doublewrite buffer is corrupt.\n"
|
||||||
|
"InnoDB: Cannot continue operation.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write the good page from the
|
||||||
|
doublewrite buffer to the intended
|
||||||
|
position */
|
||||||
|
|
||||||
|
fil_io(OS_FILE_WRITE, TRUE, space_id,
|
||||||
|
page_no, 0,
|
||||||
|
UNIV_PAGE_SIZE, page, NULL);
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Recovered the page from the doublewrite buffer.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
page += UNIV_PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
fil_flush_file_spaces(FIL_TABLESPACE);
|
||||||
|
|
||||||
|
leave_func:
|
||||||
|
ut_free(unaligned_read_buf);
|
||||||
|
}
|
||||||
|
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
Checks that trx is in the trx list. */
|
Checks that trx is in the trx list. */
|
||||||
|
Reference in New Issue
Block a user