1
0
mirror of https://github.com/codership/wsrep-lib.git synced 2025-06-04 09:02:12 +03:00

Cache rollback events that failed to replicate for later retry

This patch introduces a queue to store ids of transactions that failed
to send a rollback fragment in streaming_rollback(). This is to avoid
potentially  missed rollback fragments when a cluster splits and then
later reforms. Rollback fragments would be missing if a node rolled
back a transaction locally (either BFed or voluntary rollback) while
non-primary, and the attempt to send rollback fragment failed in
transaction::streaming_rollback().
Transaction that fail to send rollback fragment can proceed to
rollback locally.  However we must ensure that rollback fragments for
those transactions are eventually delivered by the cluster. This must
be done before a potentially conflicting writeset causes BF-BF
conflicts in the rest of the cluster.
This commit is contained in:
Daniele Sciascia 2021-09-24 10:45:34 +02:00
parent efb4aab090
commit 22921e7082
7 changed files with 443 additions and 28 deletions

View File

@ -92,6 +92,7 @@
#include "compiler.hpp"
#include "xid.hpp"
#include <deque>
#include <vector>
#include <string>
#include <map>
@ -271,6 +272,17 @@ namespace wsrep
wsrep::high_priority_service* find_streaming_applier(
const wsrep::xid& xid) const;
/**
* Queue a rollback fragment the transaction with given id
*/
void queue_rollback_event(const wsrep::transaction_id& id);
/**
* Send rollback fragments for previously queued events via
* queue_rollback_event()
*/
enum wsrep::provider::status send_pending_rollback_events();
/**
* Load WSRep provider.
*
@ -632,6 +644,7 @@ namespace wsrep
, previous_primary_view_()
, current_view_()
, last_committed_gtid_()
, rollback_event_queue_()
{ }
private:
@ -670,6 +683,11 @@ namespace wsrep
void go_final(wsrep::unique_lock<wsrep::mutex>&,
const wsrep::view&, wsrep::high_priority_service*);
// Send rollback fragments for all transactions in
// rollback_event_queue_
enum wsrep::provider::status send_pending_rollback_events(
wsrep::unique_lock<wsrep::mutex>& lock);
wsrep::mutex& mutex_;
wsrep::condition_variable& cond_;
wsrep::server_service& server_service_;
@ -693,20 +711,6 @@ namespace wsrep
streaming_clients_map streaming_clients_;
typedef std::map<std::pair<wsrep::id, wsrep::transaction_id>,
wsrep::high_priority_service*> streaming_appliers_map;
class server_id_cmp
{
public:
server_id_cmp(const wsrep::id& server_id)
: server_id_(server_id)
{ }
bool operator()(const std::vector<wsrep::view::member>::value_type& vt) const
{
return (vt.id() == server_id_);
}
private:
wsrep::id server_id_;
};
streaming_appliers_map streaming_appliers_;
bool streaming_appliers_recovered_;
wsrep::provider* provider_;
@ -722,9 +726,9 @@ namespace wsrep
wsrep::view previous_primary_view_;
wsrep::view current_view_;
wsrep::gtid last_committed_gtid_;
std::deque<wsrep::transaction_id> rollback_event_queue_;
};
static inline const char* to_c_string(
enum wsrep::server_state::state state)
{

View File

@ -110,7 +110,9 @@ namespace wsrep
int protocol_version() const
{ return protocol_version_; }
const std::vector<member>& members() const { return members_; }
const std::vector<member>& members() const
{ return members_; }
/**
* Return true if the view is final
@ -128,6 +130,14 @@ namespace wsrep
*/
int member_index(const wsrep::id& member_id) const;
/**
* Return true if id is member of this view
*/
bool is_member(const wsrep::id& id) const
{
return member_index(id) != -1;
}
void print(std::ostream& os) const;
private:

View File

@ -1113,6 +1113,14 @@ void wsrep::server_state::on_sync()
}
}
init_synced_ = true;
enum wsrep::provider::status status(send_pending_rollback_events(lock));
if (status)
{
// TODO should be retried?
wsrep::log_warning()
<< "Failed to flush rollback event cache: " << status;
}
}
int wsrep::server_state::on_apply(
@ -1456,6 +1464,10 @@ void wsrep::server_state::close_orphaned_sr_transactions(
// - (1 non-primary) and (2 non-primary)
// - (1,2 primary)
// We need to rollback SRs owned by both 1 and 2.
// Notice that since the introduction of rollback_event_queue_,
// checking for equal consecutive views is no longer needed.
// However, we must keep it here for the time being, for backwards
// compatibility.
const bool equal_consecutive_views =
current_view_.equal_membership(previous_primary_view_);
@ -1504,10 +1516,8 @@ void wsrep::server_state::close_orphaned_sr_transactions(
if ((streaming_applier->transaction().state() !=
wsrep::transaction::s_prepared) &&
(equal_consecutive_views ||
(std::find_if(current_view_.members().begin(),
current_view_.members().end(),
server_id_cmp(i->first.first)) ==
current_view_.members().end())))
not current_view_.is_member(
streaming_applier->transaction().server_id())))
{
WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
wsrep::log::debug_level_server_state,
@ -1580,3 +1590,50 @@ void wsrep::server_state::close_transactions_at_disconnect(
}
streaming_appliers_recovered_ = false;
}
//
// Rollback event queue
//
void wsrep::server_state::queue_rollback_event(
const wsrep::transaction_id& id)
{
wsrep::unique_lock<wsrep::mutex> lock(mutex_);
#ifndef NDEBUG
// Make sure we don't have duplicate
// transaction ids in rollback event queue.
// There is no need to do this in release
// build given that caller (streaming_rollback())
// should avoid duplicates.
for (auto i : rollback_event_queue_)
{
assert(id != i);
}
#endif
rollback_event_queue_.push_back(id);
}
enum wsrep::provider::status
wsrep::server_state::send_pending_rollback_events(
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED)
{
assert(lock.owns_lock());
while (not rollback_event_queue_.empty())
{
const wsrep::transaction_id& id(rollback_event_queue_.front());
const enum wsrep::provider::status status(provider().rollback(id));
if (status)
{
return status;
}
rollback_event_queue_.pop_front();
}
return wsrep::provider::success;
}
enum wsrep::provider::status
wsrep::server_state::send_pending_rollback_events()
{
wsrep::unique_lock<wsrep::mutex> lock(mutex_);
return send_pending_rollback_events(lock);
}

View File

@ -1452,6 +1452,19 @@ int wsrep::transaction::certify_fragment(
lock.unlock();
client_service_.debug_sync("wsrep_before_fragment_certification");
enum wsrep::provider::status status(
client_state_.server_state_.send_pending_rollback_events());
if (status)
{
wsrep::log_warning()
<< "Failed to replicate pending rollback events: "
<< status << " ("
<< wsrep::provider::to_string(status) << ")";
lock.lock();
state(lock, s_must_abort);
return 1;
}
wsrep::mutable_buffer data;
size_t log_position(0);
if (client_service_.prepare_fragment_for_replication(data, log_position))
@ -1684,6 +1697,32 @@ int wsrep::transaction::certify_commit(
state(lock, s_certifying);
lock.unlock();
enum wsrep::provider::status status(
client_state_.server_state_.send_pending_rollback_events());
if (status)
{
wsrep::log_warning()
<< "Failed to replicate pending rollback events: "
<< status << " ("
<< wsrep::provider::to_string(status) << ")";
// We failed to replicate some pending rollback fragment.
// Meaning that some transaction that was rolled back
// locally might still be active out there in the cluster.
// To avoid a potential BF-BF conflict, we need to abort
// and give up on this one.
// Notice that we can't abort a prepared XA that wants to
// commit. Fortunately, there is no need to in this case:
// the commit fragment for XA does not cause any changes and
// can't possibly conflict with other transactions out there.
if (!is_xa())
{
lock.lock();
state(lock, s_must_abort);
return 1;
}
}
if (is_streaming())
{
if (!is_xa())
@ -1912,12 +1951,16 @@ void wsrep::transaction::streaming_rollback(
lock.lock();
streaming_context_.cleanup();
enum wsrep::provider::status ret;
if ((ret = provider().rollback(id_)))
enum wsrep::provider::status status(provider().rollback(id_));
if (status)
{
lock.unlock();
client_state_.server_state_.queue_rollback_event(id_);
lock.lock();
wsrep::log_debug()
<< "Failed to replicate rollback fragment for "
<< id_ << ": " << ret;
<< "Failed to replicate rollback fragment for " << id_
<< ": " << status << " ( "
<< wsrep::provider::to_string(status) << ")";
}
}

View File

@ -51,6 +51,7 @@ int wsrep::mock_high_priority_service::apply_write_set(
{
assert(client_state_->toi_meta().seqno().is_undefined());
assert(client_state_->transaction().state() == wsrep::transaction::s_executing ||
client_state_->transaction().state() == wsrep::transaction::s_prepared ||
client_state_->transaction().state() == wsrep::transaction::s_replaying);
if (fail_next_applying_)
{
@ -62,7 +63,18 @@ int wsrep::mock_high_priority_service::apply_write_set(
}
else
{
return 0;
int ret(0);
if (!(meta.flags() & wsrep::provider::flag::commit))
{
client_state_->fragment_applied(meta.seqno());
}
if ((meta.flags() & wsrep::provider::flag::prepare))
{
client_state_->assign_xid(wsrep::xid(1, 3, 1, "xid"));
ret = client_state_->before_prepare() ||
client_state_->after_prepare();
}
return ret;
};
}
@ -77,9 +89,19 @@ int wsrep::mock_high_priority_service::commit(
ret = client_state_->before_prepare() ||
client_state_->after_prepare();
}
return (ret || client_state_->before_commit() ||
client_state_->ordered_commit() ||
client_state_->after_commit());
const bool is_ordered= !ws_meta.seqno().is_undefined();
if (!is_ordered)
{
client_state_->before_rollback();
client_state_->after_rollback();
return 0;
}
else
{
return (ret || client_state_->before_commit() ||
client_state_->ordered_commit() ||
client_state_->after_commit());
}
}
int wsrep::mock_high_priority_service::rollback(

View File

@ -649,3 +649,266 @@ BOOST_FIXTURE_TEST_CASE(
ss.disconnect();
BOOST_REQUIRE(ss.state() == wsrep::server_state::s_disconnected);
}
/////////////////////////////////////////////////////////////////////////////
// Orphaned SR //
/////////////////////////////////////////////////////////////////////////////
// Test the behavior of server_state::close_orphaned_sr_transactions().
// In this test we check the scenario where we initially have 3 nodes in
// the cluster (s1, s2, s3), and this server_state delivers one streaming
// fragment from s2 and s3 each, followed by view changes:
//
// view 1: primary (s1, s2, s3)
// view 2: primary (s1, s2)
// view 3: non-primary (s1)
// view 4: non-primary (s1, s3)
// view 5: primary (s1, s2, s3)
//
// We expect that on view 2, transaction originated from s3 is considered
// orphaned, so it should be rolled back.
// Transaction from s2 should never be considered orphaned in this scenario,
// we expect it to survive until the end of the test. That's because
// transactions are rolled back in primary views only, and because s2
// is member of all primary views in this scenario.
BOOST_FIXTURE_TEST_CASE(server_state_close_orphaned_transactions,
sst_first_server_fixture)
{
connect_in_view(third_view);
server_service.logged_view(third_view);
sst_received_action();
ss.on_view(third_view, &hps);
// initially we have members (s1, s2, s3)
std::vector<wsrep::view::member> members(ss.current_view().members());
// apply a fragment coming from s2
wsrep::ws_meta meta_s2(wsrep::gtid(wsrep::id("s2"), wsrep::seqno(1)),
wsrep::stid(wsrep::id("s2"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(1),
wsrep::provider::flag::start_transaction);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_s2,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
// apply a fragment coming from s3
wsrep::ws_meta meta_s3(wsrep::gtid(wsrep::id("s3"), wsrep::seqno(2)),
wsrep::stid(wsrep::id("s3"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(2),
wsrep::provider::flag::start_transaction);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_s3,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// s3 drops out of the cluster, deliver primary view (s1, s2)
wsrep::view::member s3(members.back());
members.pop_back();
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno() + 1,
wsrep::view::primary,
0, // capabilities
0, // own index
1, // protocol version
members), &hps);
// transaction from s2 is still present
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
// transaction from s3 is gone
BOOST_REQUIRE(not ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// s2 drops out of the cluster, deliver non-primary view (s1)
wsrep::view::member s2(members.back());
members.pop_back();
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno(),
wsrep::view::non_primary,
0, // capabilities
0, // own index
1, // protocol version
members), &hps);
// no streaming appliers are closed on non-primary view,
// so transaction from s2 is still present
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
// s3 comes back, deliver non-primary view (s1, s3)
members.push_back(s3);
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno() + 1,
wsrep::view::non_primary,
0, // capabilities
0, // own index
1, // protocol version
members), &hps);
// transaction s2 is still present after non-primary view
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
// s2 comes back, deliver primary-view (s1, s2, s3)
members.push_back(s2);
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno() + 1,
wsrep::view::primary,
0, // capabilities
0, // own index
1, // protocol version
members), &hps);
// finally, transaction from s2 is still present (part of primary view)
// and transaction from s3 is gone
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
BOOST_REQUIRE(not ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// cleanup
wsrep::ws_meta meta_commit_s2(wsrep::gtid(wsrep::id("s2"), wsrep::seqno(3)),
wsrep::stid(wsrep::id("s2"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(3),
wsrep::provider::flag::commit);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_commit_s2,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(not ss.find_streaming_applier(
meta_commit_s2.server_id(), meta_commit_s2.transaction_id()));
}
// Test the case where two consecutive primary views with the
// same members are delivered (provider may do so).
// Expect SR transactions to be rolled back on equal consecutive views
BOOST_FIXTURE_TEST_CASE(server_state_equal_consecutive_views,
sst_first_server_fixture)
{
connect_in_view(third_view);
server_service.logged_view(third_view);
sst_received_action();
ss.on_view(third_view, &hps);
// apply a fragment coming from s2
wsrep::ws_meta meta_s2(wsrep::gtid(wsrep::id("s2"), wsrep::seqno(1)),
wsrep::stid(wsrep::id("s2"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(1),
wsrep::provider::flag::start_transaction);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_s2,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
// apply a fragment coming from s3
wsrep::ws_meta meta_s3(wsrep::gtid(wsrep::id("s3"), wsrep::seqno(2)),
wsrep::stid(wsrep::id("s3"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(2),
wsrep::provider::flag::start_transaction);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_s3,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// deliver primary view with the same members (s1, s2, s3)
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno() + 1,
wsrep::view::primary,
0, // capabilities
0, // own index
1, // protocol version
ss.current_view().members()), &hps);
// transaction from s2 and s3 are gone
BOOST_REQUIRE(not ss.find_streaming_applier(
meta_s2.server_id(), meta_s2.transaction_id()));
BOOST_REQUIRE(not ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
}
// Verify that prepared XA transactions are not rolled back
// by close_orphaned_transactions()
BOOST_FIXTURE_TEST_CASE(server_state_xa_not_orphaned,
sst_first_server_fixture)
{
connect_in_view(third_view);
server_service.logged_view(third_view);
sst_received_action();
ss.on_view(third_view, &hps);
// initially we have members (s1, s2, s3)
std::vector<wsrep::view::member> members(ss.current_view().members());
wsrep::ws_meta meta_s3(wsrep::gtid(wsrep::id("s3"), wsrep::seqno(1)),
wsrep::stid(wsrep::id("s3"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(1),
wsrep::provider::flag::start_transaction |
wsrep::provider::flag::prepare);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_s3,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// s3 drops out of the cluster, deliver primary view (s1, s2)
wsrep::view::member s3(members.back());
members.pop_back();
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno() + 1,
wsrep::view::primary,
0, // capabilities
0, // own index
1, // protocol version
members), &hps);
// transaction from s3 is still present
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// s3 comes back, deliver primary view (s1, s2, s3)
members.push_back(s3);
ss.on_view(wsrep::view(ss.current_view().state_id(),
ss.current_view().view_seqno() + 1,
wsrep::view::primary,
0, // capabilities
0, // own index
1, // protocol version
members), &hps);
// transaction from s3 is still present
BOOST_REQUIRE(ss.find_streaming_applier(
meta_s3.server_id(), meta_s3.transaction_id()));
// cleanup
wsrep::ws_meta meta_commit_s3(wsrep::gtid(wsrep::id("s3"), wsrep::seqno(3)),
wsrep::stid(wsrep::id("s3"),
wsrep::transaction_id(1),
wsrep::client_id(1)),
wsrep::seqno(3),
wsrep::provider::flag::commit);
BOOST_REQUIRE(ss.on_apply(hps, ws_handle, meta_commit_s3,
wsrep::const_buffer("1", 1)) == 0);
BOOST_REQUIRE(not ss.find_streaming_applier(
meta_commit_s3.server_id(), meta_commit_s3.transaction_id()));
}

View File

@ -87,3 +87,19 @@ BOOST_AUTO_TEST_CASE(view_test_equal_membership)
BOOST_REQUIRE(!v1.equal_membership(v3));
BOOST_REQUIRE(!v3.equal_membership(v1));
}
BOOST_AUTO_TEST_CASE(view_test_is_member)
{
wsrep::view view(wsrep::gtid(wsrep::id("cluster"), wsrep::seqno(1)),
wsrep::seqno(1),
wsrep::view::primary,
0,
1,
0,
{ wsrep::view::member(wsrep::id("1"), "", ""),
wsrep::view::member(wsrep::id("2"), "", "") });
BOOST_REQUIRE(view.is_member(wsrep::id("2")));
BOOST_REQUIRE(view.is_member(wsrep::id("1")));
BOOST_REQUIRE(not view.is_member(wsrep::id("0")));
}