mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Many files:
Remove potential starvation of a full log buffer flush: only flush up to the lsn which was the largest at the time when we requested the full log buffer flush os0sync.h, os0sync.c: Fix a bug in os_event on Unix: even though we signaled the event, some threads could continue waiting if the event became nonsignaled quickly again; this made group commit less efficient than it should be
This commit is contained in:
@ -822,9 +822,16 @@ btr_page_reorganize_low(
|
|||||||
{
|
{
|
||||||
page_t* new_page;
|
page_t* new_page;
|
||||||
ulint log_mode;
|
ulint log_mode;
|
||||||
|
ulint data_size1;
|
||||||
|
ulint data_size2;
|
||||||
|
ulint max_ins_size1;
|
||||||
|
ulint max_ins_size2;
|
||||||
|
|
||||||
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
|
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
|
||||||
MTR_MEMO_PAGE_X_FIX));
|
MTR_MEMO_PAGE_X_FIX));
|
||||||
|
data_size1 = page_get_data_size(page);
|
||||||
|
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
|
||||||
|
|
||||||
/* Write the log record */
|
/* Write the log record */
|
||||||
mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr);
|
mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr);
|
||||||
|
|
||||||
@ -859,6 +866,19 @@ btr_page_reorganize_low(
|
|||||||
lock_move_reorganize_page(page, new_page);
|
lock_move_reorganize_page(page, new_page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data_size2 = page_get_data_size(page);
|
||||||
|
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
|
||||||
|
|
||||||
|
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
|
||||||
|
buf_page_print(page);
|
||||||
|
buf_page_print(new_page);
|
||||||
|
fprintf(stderr,
|
||||||
|
"InnoDB: Error: page old data size %lu new data size %lu\n"
|
||||||
|
"InnoDB: Error: page old max ins size %lu new max ins size %lu\n"
|
||||||
|
"InnoDB: Make a detailed bug report and send it to mysql@lists.mysql.com\n",
|
||||||
|
data_size1, data_size2, max_ins_size1, max_ins_size2);
|
||||||
|
}
|
||||||
|
|
||||||
buf_frame_free(new_page);
|
buf_frame_free(new_page);
|
||||||
|
|
||||||
/* Restore logging mode */
|
/* Restore logging mode */
|
||||||
@ -1945,11 +1965,20 @@ btr_compress(
|
|||||||
|
|
||||||
btr_page_reorganize(merge_page, mtr);
|
btr_page_reorganize(merge_page, mtr);
|
||||||
|
|
||||||
|
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
|
||||||
|
|
||||||
ut_ad(page_validate(merge_page, cursor->index));
|
ut_ad(page_validate(merge_page, cursor->index));
|
||||||
ut_ad(page_get_max_insert_size(merge_page, n_recs)
|
ut_ad(page_get_max_insert_size(merge_page, n_recs)
|
||||||
== max_ins_size_reorg);
|
== max_ins_size_reorg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (data_size > max_ins_size) {
|
||||||
|
|
||||||
|
/* Add fault tolerance, though this should never happen */
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
btr_search_drop_page_hash_index(page);
|
btr_search_drop_page_hash_index(page);
|
||||||
|
|
||||||
/* Remove the page from the level list */
|
/* Remove the page from the level list */
|
||||||
|
@ -173,6 +173,12 @@ log_write_up_to(
|
|||||||
/* in: TRUE if we want the written log also to be
|
/* in: TRUE if we want the written log also to be
|
||||||
flushed to disk */
|
flushed to disk */
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
|
Does a syncronous flush of the log buffer to disk. */
|
||||||
|
|
||||||
|
void
|
||||||
|
log_buffer_flush_to_disk(void);
|
||||||
|
/*==========================*/
|
||||||
|
/********************************************************************
|
||||||
Advances the smallest lsn for which there are unflushed dirty blocks in the
|
Advances the smallest lsn for which there are unflushed dirty blocks in the
|
||||||
buffer pool and also may make a new checkpoint. NOTE: this function may only
|
buffer pool and also may make a new checkpoint. NOTE: this function may only
|
||||||
be called if the calling thread owns no synchronization objects! */
|
be called if the calling thread owns no synchronization objects! */
|
||||||
|
@ -36,8 +36,12 @@ typedef os_event_struct_t* os_event_t;
|
|||||||
struct os_event_struct {
|
struct os_event_struct {
|
||||||
os_fast_mutex_t os_mutex; /* this mutex protects the next
|
os_fast_mutex_t os_mutex; /* this mutex protects the next
|
||||||
fields */
|
fields */
|
||||||
ibool is_set; /* this is TRUE if the next mutex is
|
ibool is_set; /* this is TRUE when the event is
|
||||||
not reserved */
|
in the signaled state, i.e., a thread
|
||||||
|
does not stop if it tries to wait for
|
||||||
|
this event */
|
||||||
|
ib_longlong signal_count; /* this is incremented each time
|
||||||
|
the event becomes signaled */
|
||||||
pthread_cond_t cond_var; /* condition variable is used in
|
pthread_cond_t cond_var; /* condition variable is used in
|
||||||
waiting for the event */
|
waiting for the event */
|
||||||
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
|
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
|
||||||
|
@ -178,7 +178,8 @@ loop:
|
|||||||
|
|
||||||
/* Not enough free space, do a syncronous flush of the log
|
/* Not enough free space, do a syncronous flush of the log
|
||||||
buffer */
|
buffer */
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS, TRUE);
|
|
||||||
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
@ -1364,6 +1365,24 @@ do_waits:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/********************************************************************
|
||||||
|
Does a syncronous flush of the log buffer to disk. */
|
||||||
|
|
||||||
|
void
|
||||||
|
log_buffer_flush_to_disk(void)
|
||||||
|
/*==========================*/
|
||||||
|
{
|
||||||
|
dulint lsn;
|
||||||
|
|
||||||
|
mutex_enter(&(log_sys->mutex));
|
||||||
|
|
||||||
|
lsn = log_sys->lsn;
|
||||||
|
|
||||||
|
mutex_exit(&(log_sys->mutex));
|
||||||
|
|
||||||
|
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
Tries to establish a big enough margin of free space in the log buffer, such
|
Tries to establish a big enough margin of free space in the log buffer, such
|
||||||
that a new log entry can be catenated without an immediate need for a flush. */
|
that a new log entry can be catenated without an immediate need for a flush. */
|
||||||
@ -1374,6 +1393,7 @@ log_flush_margin(void)
|
|||||||
{
|
{
|
||||||
ibool do_flush = FALSE;
|
ibool do_flush = FALSE;
|
||||||
log_t* log = log_sys;
|
log_t* log = log_sys;
|
||||||
|
dulint lsn;
|
||||||
|
|
||||||
mutex_enter(&(log->mutex));
|
mutex_enter(&(log->mutex));
|
||||||
|
|
||||||
@ -1384,13 +1404,14 @@ log_flush_margin(void)
|
|||||||
free space */
|
free space */
|
||||||
} else {
|
} else {
|
||||||
do_flush = TRUE;
|
do_flush = TRUE;
|
||||||
|
lsn = log->lsn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_exit(&(log->mutex));
|
mutex_exit(&(log->mutex));
|
||||||
|
|
||||||
if (do_flush) {
|
if (do_flush) {
|
||||||
log_write_up_to(ut_dulint_max, LOG_NO_WAIT, FALSE);
|
log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,6 +143,7 @@ os_event_create(
|
|||||||
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
|
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
|
||||||
#endif
|
#endif
|
||||||
event->is_set = FALSE;
|
event->is_set = FALSE;
|
||||||
|
event->signal_count = 0;
|
||||||
#endif /* __WIN__ */
|
#endif /* __WIN__ */
|
||||||
|
|
||||||
/* Put to the list of events */
|
/* Put to the list of events */
|
||||||
@ -218,6 +219,7 @@ os_event_set(
|
|||||||
/* Do nothing */
|
/* Do nothing */
|
||||||
} else {
|
} else {
|
||||||
event->is_set = TRUE;
|
event->is_set = TRUE;
|
||||||
|
event->signal_count += 1;
|
||||||
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
|
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -310,9 +312,15 @@ os_event_wait(
|
|||||||
os_thread_exit(NULL);
|
os_thread_exit(NULL);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
ib_longlong old_signal_count;
|
||||||
|
|
||||||
os_fast_mutex_lock(&(event->os_mutex));
|
os_fast_mutex_lock(&(event->os_mutex));
|
||||||
|
|
||||||
|
old_signal_count = event->signal_count;
|
||||||
loop:
|
loop:
|
||||||
if (event->is_set == TRUE) {
|
if (event->is_set == TRUE
|
||||||
|
|| event->signal_count != old_signal_count) {
|
||||||
|
|
||||||
os_fast_mutex_unlock(&(event->os_mutex));
|
os_fast_mutex_unlock(&(event->os_mutex));
|
||||||
|
|
||||||
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
|
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
|
||||||
@ -326,8 +334,9 @@ loop:
|
|||||||
|
|
||||||
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
|
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
|
||||||
|
|
||||||
/* Solaris manual said that spurious wakeups may occur: we have
|
/* Solaris manual said that spurious wakeups may occur: we have to
|
||||||
to check the 'is_set' variable again */
|
check if the event really has been signaled after we came here to
|
||||||
|
wait */
|
||||||
|
|
||||||
goto loop;
|
goto loop;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1655,7 +1655,7 @@ row_drop_table_for_mysql_in_background(
|
|||||||
the InnoDB data dictionary get out-of-sync if the user runs
|
the InnoDB data dictionary get out-of-sync if the user runs
|
||||||
with innodb_flush_log_at_trx_commit = 0 */
|
with innodb_flush_log_at_trx_commit = 0 */
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
trx_commit_for_mysql(trx);
|
trx_commit_for_mysql(trx);
|
||||||
|
|
||||||
|
@ -2836,7 +2836,7 @@ loop:
|
|||||||
at transaction commit */
|
at transaction commit */
|
||||||
|
|
||||||
srv_main_thread_op_info = (char*)"flushing log";
|
srv_main_thread_op_info = (char*)"flushing log";
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
/* If there were less than 5 i/os during the
|
/* If there were less than 5 i/os during the
|
||||||
one second sleep, we assume that there is free
|
one second sleep, we assume that there is free
|
||||||
@ -2852,10 +2852,9 @@ loop:
|
|||||||
(char*)"doing insert buffer merge";
|
(char*)"doing insert buffer merge";
|
||||||
ibuf_contract_for_n_pages(TRUE, 5);
|
ibuf_contract_for_n_pages(TRUE, 5);
|
||||||
|
|
||||||
srv_main_thread_op_info =
|
srv_main_thread_op_info = (char*)"flushing log";
|
||||||
(char*)"flushing log";
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
|
log_buffer_flush_to_disk();
|
||||||
TRUE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buf_get_modified_ratio_pct() >
|
if (buf_get_modified_ratio_pct() >
|
||||||
@ -2905,7 +2904,7 @@ loop:
|
|||||||
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
|
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
|
||||||
|
|
||||||
srv_main_thread_op_info = (char*) "flushing log";
|
srv_main_thread_op_info = (char*) "flushing log";
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We run a batch of insert buffer merge every 10 seconds,
|
/* We run a batch of insert buffer merge every 10 seconds,
|
||||||
@ -2915,7 +2914,7 @@ loop:
|
|||||||
ibuf_contract_for_n_pages(TRUE, 5);
|
ibuf_contract_for_n_pages(TRUE, 5);
|
||||||
|
|
||||||
srv_main_thread_op_info = (char*)"flushing log";
|
srv_main_thread_op_info = (char*)"flushing log";
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
/* We run a full purge every 10 seconds, even if the server
|
/* We run a full purge every 10 seconds, even if the server
|
||||||
were active */
|
were active */
|
||||||
@ -2939,8 +2938,7 @@ loop:
|
|||||||
if (difftime(current_time, last_flush_time) > 1) {
|
if (difftime(current_time, last_flush_time) > 1) {
|
||||||
srv_main_thread_op_info = (char*) "flushing log";
|
srv_main_thread_op_info = (char*) "flushing log";
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
|
log_buffer_flush_to_disk();
|
||||||
TRUE);
|
|
||||||
last_flush_time = current_time;
|
last_flush_time = current_time;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3060,6 +3058,10 @@ flush_loop:
|
|||||||
(char*) "waiting for buffer pool flush to end";
|
(char*) "waiting for buffer pool flush to end";
|
||||||
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
|
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
|
||||||
|
|
||||||
|
srv_main_thread_op_info = (char*) "flushing log";
|
||||||
|
|
||||||
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
srv_main_thread_op_info = (char*)"making checkpoint";
|
srv_main_thread_op_info = (char*)"making checkpoint";
|
||||||
|
|
||||||
log_checkpoint(TRUE, FALSE);
|
log_checkpoint(TRUE, FALSE);
|
||||||
|
@ -1524,6 +1524,8 @@ trx_commit_complete_for_mysql(
|
|||||||
dulint lsn = trx->commit_lsn;
|
dulint lsn = trx->commit_lsn;
|
||||||
|
|
||||||
ut_a(trx);
|
ut_a(trx);
|
||||||
|
|
||||||
|
trx->op_info = (char*)"flushing log";
|
||||||
|
|
||||||
if (srv_flush_log_at_trx_commit == 0) {
|
if (srv_flush_log_at_trx_commit == 0) {
|
||||||
/* Do nothing */
|
/* Do nothing */
|
||||||
@ -1547,6 +1549,8 @@ trx_commit_complete_for_mysql(
|
|||||||
ut_a(0);
|
ut_a(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trx->op_info = (char*)"";
|
||||||
|
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -915,7 +915,7 @@ innobase_flush_logs(void)
|
|||||||
|
|
||||||
DBUG_ENTER("innobase_flush_logs");
|
DBUG_ENTER("innobase_flush_logs");
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
DBUG_RETURN(result);
|
DBUG_RETURN(result);
|
||||||
}
|
}
|
||||||
@ -3538,7 +3538,7 @@ ha_innobase::create(
|
|||||||
the InnoDB data dictionary get out-of-sync if the user runs
|
the InnoDB data dictionary get out-of-sync if the user runs
|
||||||
with innodb_flush_log_at_trx_commit = 0 */
|
with innodb_flush_log_at_trx_commit = 0 */
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
innobase_table = dict_table_get(norm_name, NULL);
|
innobase_table = dict_table_get(norm_name, NULL);
|
||||||
|
|
||||||
@ -3613,7 +3613,7 @@ ha_innobase::delete_table(
|
|||||||
the InnoDB data dictionary get out-of-sync if the user runs
|
the InnoDB data dictionary get out-of-sync if the user runs
|
||||||
with innodb_flush_log_at_trx_commit = 0 */
|
with innodb_flush_log_at_trx_commit = 0 */
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
/* Tell the InnoDB server that there might be work for
|
/* Tell the InnoDB server that there might be work for
|
||||||
utility threads: */
|
utility threads: */
|
||||||
@ -3683,7 +3683,7 @@ innobase_drop_database(
|
|||||||
the InnoDB data dictionary get out-of-sync if the user runs
|
the InnoDB data dictionary get out-of-sync if the user runs
|
||||||
with innodb_flush_log_at_trx_commit = 0 */
|
with innodb_flush_log_at_trx_commit = 0 */
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
/* Tell the InnoDB server that there might be work for
|
/* Tell the InnoDB server that there might be work for
|
||||||
utility threads: */
|
utility threads: */
|
||||||
@ -3755,7 +3755,7 @@ ha_innobase::rename_table(
|
|||||||
the InnoDB data dictionary get out-of-sync if the user runs
|
the InnoDB data dictionary get out-of-sync if the user runs
|
||||||
with innodb_flush_log_at_trx_commit = 0 */
|
with innodb_flush_log_at_trx_commit = 0 */
|
||||||
|
|
||||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
log_buffer_flush_to_disk();
|
||||||
|
|
||||||
/* Tell the InnoDB server that there might be work for
|
/* Tell the InnoDB server that there might be work for
|
||||||
utility threads: */
|
utility threads: */
|
||||||
|
Reference in New Issue
Block a user