1
0
mirror of https://github.com/codership/wsrep-lib.git synced 2025-07-20 01:03:16 +03:00

Fixes to streaming rollback processing

* Count separately fragments certified and fragments stored in
  streaming context. Storing the fragment may ultimately fail
  due to BF abort even if the fragment was succesfully certified.
  Therefore we need to have separate counter for certified fragments
  to determine if the transaction is streaming and seqnos of fragments
  which have been succesfully stored.
* Provider release is called only after succesful fragment certification
  and fragment store.
* Fixed handling of write sets with rollback flag set in apply_write_set()
This commit is contained in:
Teemu Ollakka
2018-07-16 10:07:46 +03:00
parent 21ae2c849e
commit 9f153be277
5 changed files with 125 additions and 56 deletions

View File

@ -236,7 +236,7 @@ int wsrep::client_state::enable_streaming(
size_t fragment_size)
{
assert(mode_ == m_local);
if (transaction_.streaming_context().fragments_certified() &&
if (transaction_.is_streaming() &&
transaction_.streaming_context().fragment_unit() !=
fragment_unit)
{

View File

@ -120,13 +120,45 @@ static int apply_write_set(wsrep::server_state& server_state,
const wsrep::const_buffer& data)
{
int ret(0);
if (wsrep::starts_transaction(ws_meta.flags()) &&
wsrep::commits_transaction(ws_meta.flags()) &&
wsrep::rolls_back_transaction(ws_meta.flags()))
// wsrep::log_info() << "apply_write_set: " << ws_meta;
if (wsrep::rolls_back_transaction(ws_meta.flags()))
{
// Non streaming rollback (certification failed)
ret = high_priority_service.log_dummy_write_set(
ws_handle, ws_meta);
if (wsrep::starts_transaction(ws_meta.flags()))
{
// No transaction existed before, log a dummy write set
ret = high_priority_service.log_dummy_write_set(
ws_handle, ws_meta);
}
else
{
wsrep::high_priority_service* sa(
server_state.find_streaming_applier(
ws_meta.server_id(), ws_meta.transaction_id()));
if (sa == 0)
{
// It is possible that rapid group membership changes
// may cause streaming transaction be rolled back before
// commit fragment comes in. Although this is a valid
// situation, log a warning if a sac cannot be found as
// it may be an indication of a bug too.
wsrep::log_warning()
<< "Could not find applier context for "
<< ws_meta.server_id()
<< ": " << ws_meta.transaction_id();
ret = high_priority_service.log_dummy_write_set(
ws_handle, ws_meta);
}
else
{
// rollback_fragment() consumes sa
ret = rollback_fragment(server_state,
high_priority_service,
sa,
ws_handle,
ws_meta,
data);
}
}
}
else if (wsrep::starts_transaction(ws_meta.flags()) &&
wsrep::commits_transaction(ws_meta.flags()))
@ -225,36 +257,6 @@ static int apply_write_set(wsrep::server_state& server_state,
}
}
}
else if (wsrep::rolls_back_transaction(ws_meta.flags()))
{
wsrep::high_priority_service* sa(
server_state.find_streaming_applier(
ws_meta.server_id(), ws_meta.transaction_id()));
if (sa == 0)
{
// It is possible that rapid group membership changes
// may cause streaming transaction be rolled back before
// commit fragment comes in. Although this is a valid
// situation, log a warning if a sac cannot be found as
// it may be an indication of a bug too.
wsrep::log_warning()
<< "Could not find applier context for "
<< ws_meta.server_id()
<< ": " << ws_meta.transaction_id();
ret = high_priority_service.log_dummy_write_set(
ws_handle, ws_meta);
}
else
{
// Rollback fragment consumes sa
ret = rollback_fragment(server_state,
high_priority_service,
sa,
ws_handle,
ws_meta,
data);
}
}
else
{
assert(0);

View File

@ -434,6 +434,7 @@ int wsrep::transaction::before_commit()
if (ret)
{
state(lock, s_must_abort);
state(lock, s_aborting);
}
break;
default:
@ -459,8 +460,22 @@ int wsrep::transaction::ordered_commit()
// aborted anymore
// 3) The provider should always guarantee that the transactions which
// have been ordered for commit can finish committing.
assert(ret == 0);
state(lock, s_ordered_commit);
//
// The exception here is a storage service transaction which is running
// in high priority mode. The fragment storage commit may get BF
// aborted in the provider after commit ordering has been
// established since the transaction is operating in streaming
// mode.
if (ret)
{
assert(client_state_.mode() == wsrep::client_state::m_high_priority);
state(lock, s_must_abort);
state(lock, s_aborting);
}
else
{
state(lock, s_ordered_commit);
}
debug_log_state("ordered_commit_leave");
return ret;
}
@ -1024,9 +1039,11 @@ int wsrep::transaction::certify_fragment(
{
case wsrep::provider::success:
assert(sr_ws_meta.seqno().is_undefined() == false);
streaming_context_.certified(sr_ws_meta.seqno(), data.size());
streaming_context_.certified(data.size());
if (storage_service.update_fragment_meta(sr_ws_meta))
{
storage_service.rollback(wsrep::ws_handle(),
wsrep::ws_meta());
ret = 1;
break;
}
@ -1034,9 +1051,16 @@ int wsrep::transaction::certify_fragment(
{
ret = 1;
}
else
{
streaming_context_.stored(sr_ws_meta.seqno());
}
break;
default:
storage_service.rollback(ws_handle_, sr_ws_meta);
// Storage service rollback must be done out of order,
// otherwise there may be a deadlock between BF aborter
// and the rollback process.
storage_service.rollback(wsrep::ws_handle(), wsrep::ws_meta());
ret = 1;
break;
}
@ -1044,8 +1068,13 @@ int wsrep::transaction::certify_fragment(
// Note: This does not release the handle in the provider
// since streaming is still on. However it is needed to
// make provider internal state to transition for the
// next fragment.
provider().release(ws_handle_);
// next fragment. If any of the operations above failed,
// the handle needs to be left unreleased for the following
// rollback process.
if (ret == 0)
{
provider().release(ws_handle_);
}
lock.lock();
if (ret)
{
@ -1152,8 +1181,8 @@ int wsrep::transaction::certify_commit(
break;
case s_must_abort:
// We got BF aborted after succesful certification
// and before acquiring client context lock. This means that
// the trasaction must be replayed.
// and before acquiring client state lock. The trasaction
// must be replayed.
client_service_.will_replay();
state(lock, s_must_replay);
break;
@ -1266,6 +1295,7 @@ void wsrep::transaction::streaming_rollback()
debug_log_state("streaming_rollback enter");
assert(state_ != s_must_replay);
assert(streaming_context_.rolled_back() == false);
assert(is_streaming());
if (bf_aborted_in_total_order_)
{