1
0
mirror of https://github.com/codership/wsrep-lib.git synced 2025-07-02 05:22:26 +03:00

close SR transacions on equal consecutive views

Fixes a bug where the fact that an SR master leaves the primary view
gets missed. When two consecutive primary views have the same
membership we now assume that every SR needs to be rolled back, as the
system may have been through a state of only non-primary components.
This commit is contained in:
Leandro Pacheco
2019-03-05 09:41:48 +01:00
parent 56f605c607
commit 71f3fb2d01
5 changed files with 107 additions and 11 deletions

View File

@ -355,6 +355,10 @@ namespace wsrep
* Return current view * Return current view
*/ */
const wsrep::view& current_view() const { return current_view_; } const wsrep::view& current_view() const { return current_view_; }
/**
* Return the previous primary view
*/
const wsrep::view& previous_primary_view() const { return previous_primary_view_; }
/** /**
* Set last committed GTID. * Set last committed GTID.
*/ */
@ -603,6 +607,7 @@ namespace wsrep
, max_protocol_version_(max_protocol_version) , max_protocol_version_(max_protocol_version)
, rollback_mode_(rollback_mode) , rollback_mode_(rollback_mode)
, connected_gtid_() , connected_gtid_()
, previous_primary_view_()
, current_view_() , current_view_()
, last_committed_gtid_() , last_committed_gtid_()
{ } { }
@ -692,6 +697,7 @@ namespace wsrep
int max_protocol_version_; int max_protocol_version_;
enum rollback_mode rollback_mode_; enum rollback_mode rollback_mode_;
wsrep::gtid connected_gtid_; wsrep::gtid connected_gtid_;
wsrep::view previous_primary_view_;
wsrep::view current_view_; wsrep::view current_view_;
wsrep::gtid last_committed_gtid_; wsrep::gtid last_committed_gtid_;
}; };

View File

@ -103,6 +103,11 @@ namespace wsrep
ssize_t own_index() const ssize_t own_index() const
{ return own_index_; } { return own_index_; }
/**
* Return true if the two views have the same membership
*/
bool equal_membership(const wsrep::view& other) const;
int protocol_version() const int protocol_version() const
{ return protocol_version_; } { return protocol_version_; }
const std::vector<member>& members() const { return members_; } const std::vector<member>& members() const { return members_; }

View File

@ -612,6 +612,9 @@ void wsrep::server_state::sst_received(wsrep::client_service& cs,
throw wsrep::runtime_error(msg.str()); throw wsrep::runtime_error(msg.str());
} }
if (current_view_.status() == wsrep::view::primary) {
previous_primary_view_ = current_view_;
}
current_view_ = v; current_view_ = v;
server_service_.log_view(NULL /* this view is stored already */, v); server_service_.log_view(NULL /* this view is stored already */, v);
} }
@ -898,6 +901,9 @@ void wsrep::server_state::on_view(const wsrep::view& view,
<< "name: " << i->name(); << "name: " << i->name();
} }
wsrep::log_info() << "================================================="; wsrep::log_info() << "=================================================";
if (current_view_.status() == wsrep::view::primary) {
previous_primary_view_ = current_view_;
}
current_view_ = view; current_view_ = view;
switch (view.status()) switch (view.status())
{ {
@ -1256,7 +1262,20 @@ void wsrep::server_state::close_orphaned_sr_transactions(
wsrep::high_priority_service& high_priority_service) wsrep::high_priority_service& high_priority_service)
{ {
assert(lock.owns_lock()); assert(lock.owns_lock());
if (current_view_.own_index() == -1)
// When the originator of an SR transaction leaves the primary
// component of the cluster, that SR must be rolled back. When two
// consecutive primary views have the same membership, the system
// may have been in a state with no primary components.
// Example with 2 node cluster:
// - (1,2 primary)
// - (1 non-primary) and (2 non-primary)
// - (1,2 primary)
// We need to rollback SRs owned by both 1 and 2.
const bool equal_consecutive_views =
current_view_.equal_membership(previous_primary_view_);
if (current_view_.own_index() == -1 || equal_consecutive_views)
{ {
while (streaming_clients_.empty() == false) while (streaming_clients_.empty() == false)
{ {
@ -1285,20 +1304,21 @@ void wsrep::server_state::close_orphaned_sr_transactions(
} }
} }
streaming_appliers_map::iterator i(streaming_appliers_.begin()); streaming_appliers_map::iterator i(streaming_appliers_.begin());
while (i != streaming_appliers_.end()) while (i != streaming_appliers_.end())
{ {
if (std::find_if(current_view_.members().begin(), bool origin_not_in_view = std::find_if(current_view_.members().begin(),
current_view_.members().end(), current_view_.members().end(),
server_id_cmp(i->first.first)) == server_id_cmp(i->first.first)) ==
current_view_.members().end()) current_view_.members().end();
if (origin_not_in_view || equal_consecutive_views)
{ {
WSREP_LOG_DEBUG(wsrep::log::debug_log_level(), WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
wsrep::log::debug_level_server_state, wsrep::log::debug_level_server_state,
"Removing SR fragments for " "Removing SR fragments for "
<< i->first.first << i->first.first
<< ", " << i->first.second); << ", " << i->first.second);
wsrep::id server_id(i->first.first); wsrep::id server_id(i->first.first);
wsrep::transaction_id transaction_id(i->first.second); wsrep::transaction_id transaction_id(i->first.second);
wsrep::high_priority_service* streaming_applier(i->second); wsrep::high_priority_service* streaming_applier(i->second);

View File

@ -33,6 +33,23 @@ int wsrep::view::member_index(const wsrep::id& member_id) const
return -1; return -1;
} }
bool wsrep::view::equal_membership(const wsrep::view& other) const
{
if (members_.size() != other.members_.size())
{
return false;
}
// we can't assume members ordering
for (std::vector<member>::const_iterator i(members_.begin());
i != members_.end(); ++i)
{
if (other.member_index(i->id()) == -1) {
return false;
}
}
return true;
}
void wsrep::view::print(std::ostream& os) const void wsrep::view::print(std::ostream& os) const
{ {
os << " id: " << state_id() << "\n" os << " id: " << state_id() << "\n"

View File

@ -39,3 +39,51 @@ BOOST_AUTO_TEST_CASE(view_test_member_index)
BOOST_REQUIRE(view.member_index(wsrep::id("3")) == 2); BOOST_REQUIRE(view.member_index(wsrep::id("3")) == 2);
BOOST_REQUIRE(view.member_index(wsrep::id("4")) == -1); BOOST_REQUIRE(view.member_index(wsrep::id("4")) == -1);
} }
BOOST_AUTO_TEST_CASE(view_test_equal_membership)
{
std::vector<wsrep::view::member> m1;
m1.push_back(wsrep::view::member(wsrep::id("1"), "", ""));
m1.push_back(wsrep::view::member(wsrep::id("2"), "", ""));
m1.push_back(wsrep::view::member(wsrep::id("3"), "", ""));
std::vector<wsrep::view::member> m2;
m2.push_back(wsrep::view::member(wsrep::id("2"), "", ""));
m2.push_back(wsrep::view::member(wsrep::id("3"), "", ""));
m2.push_back(wsrep::view::member(wsrep::id("1"), "", ""));
std::vector<wsrep::view::member> m3;
m3.push_back(wsrep::view::member(wsrep::id("1"), "", ""));
m3.push_back(wsrep::view::member(wsrep::id("2"), "", ""));
m3.push_back(wsrep::view::member(wsrep::id("3"), "", ""));
m3.push_back(wsrep::view::member(wsrep::id("4"), "", ""));
wsrep::view v1(wsrep::gtid(wsrep::id("cluster"), wsrep::seqno(1)),
wsrep::seqno(1),
wsrep::view::primary,
0,
1,
0,
m1);
wsrep::view v2(wsrep::gtid(wsrep::id("cluster"), wsrep::seqno(1)),
wsrep::seqno(1),
wsrep::view::primary,
0,
1,
0,
m2);
wsrep::view v3(wsrep::gtid(wsrep::id("cluster"), wsrep::seqno(1)),
wsrep::seqno(1),
wsrep::view::primary,
0,
1,
0,
m3);
BOOST_REQUIRE(v1.equal_membership(v2));
BOOST_REQUIRE(v2.equal_membership(v1));
BOOST_REQUIRE(!v1.equal_membership(v3));
BOOST_REQUIRE(!v3.equal_membership(v1));
}