From c7fd61814f6014e90d90f4919ea4024f77c7e14c Mon Sep 17 00:00:00 2001 From: Annamalai Gurusami Date: Thu, 20 Dec 2012 11:59:36 +0530 Subject: [PATCH] Bug #14556349 RENAME OF COMPRESSED TABLE AND INSERT BUFFER MERGE CAUSE HANG Problem Statement: When the operation RENAME TABLE is about rename the tablespace of the table, it will stop all i/o operations on the tablespace temporarily. For this the fil_space_t::stop_ios member is used. Once the fil_space_t::stop_ios member is set to TRUE in the RENAME TABLE operation, it is expected that no new i/o operation will be done on the tablespace and all pending i/o operation can be completed on the tablespace. If the pending i/o operations initiate any new i/o operations then there will be deadlock. The RENAME TABLE operation will be waiting for pending i/o on the tablespace to be completed, and the pending i/o operations will be waiting on the RENAME TABLE operation to set the file_space_t::stop_ios flag to be set to FALSE. But in the given scenario the pending i/o operations did not initiate new i/o. But they where still unnecessarily checking the fil_space_t::stop_ios flag. This resulted in deadlock. Solution: I noticed that this deadlock happens in fil_space_get_size() and fil_space_get_zip_size() in the i/o threads. These functions check the stop_ios flag even when no i/o will be initiated. I modified these functions to ensure that they check the stop_ios flag only when they will be initiating an i/o operation. This solves the problem. rb://1635 (mysql-5.5) rb://1660 (mysql-trunk) approved by Inaam, Jimmy, and ima. --- storage/innobase/fil/fil0fil.c | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index 0e817942908..2f875663039 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -1466,7 +1466,7 @@ fil_space_get_size( ut_ad(fil_system); - fil_mutex_enter_and_prepare_for_io(id); + mutex_enter(&fil_system->mutex); space = fil_space_get_by_id(id); @@ -1481,6 +1481,23 @@ fil_space_get_size( ut_a(1 == UT_LIST_GET_LEN(space->chain)); + mutex_exit(&fil_system->mutex); + + /* It is possible that the space gets evicted at this point + before the fil_mutex_enter_and_prepare_for_io() acquires + the fil_system->mutex. Check for this after completing the + call to fil_mutex_enter_and_prepare_for_io(). */ + fil_mutex_enter_and_prepare_for_io(id); + + /* We are still holding the fil_system->mutex. Check if + the space is still in memory cache. */ + space = fil_space_get_by_id(id); + + if (space == NULL) { + mutex_exit(&fil_system->mutex); + return(0); + } + node = UT_LIST_GET_FIRST(space->chain); /* It must be a single-table tablespace and we have not opened @@ -1518,7 +1535,7 @@ fil_space_get_flags( return(0); } - fil_mutex_enter_and_prepare_for_io(id); + mutex_enter(&fil_system->mutex); space = fil_space_get_by_id(id); @@ -1533,6 +1550,23 @@ fil_space_get_flags( ut_a(1 == UT_LIST_GET_LEN(space->chain)); + mutex_exit(&fil_system->mutex); + + /* It is possible that the space gets evicted at this point + before the fil_mutex_enter_and_prepare_for_io() acquires + the fil_system->mutex. Check for this after completing the + call to fil_mutex_enter_and_prepare_for_io(). */ + fil_mutex_enter_and_prepare_for_io(id); + + /* We are still holding the fil_system->mutex. Check if + the space is still in memory cache. */ + space = fil_space_get_by_id(id); + + if (space == NULL) { + mutex_exit(&fil_system->mutex); + return(0); + } + node = UT_LIST_GET_FIRST(space->chain); /* It must be a single-table tablespace and we have not opened