1
0
mirror of https://github.com/codership/wsrep-lib.git synced 2025-07-28 20:02:00 +03:00

Recover current view from state after SST.

When member joins the group and needs to receive an SST it won't
receive the corresponding menbership view event because the SST
happens after the event and will already include the effects of
all events ordered before it. The view then must be recovered from
the received state.

Minor renames and cleanups.

References codership/wsrep-lib#18
This commit is contained in:
Alexey Yurchenko
2018-11-11 21:59:24 +02:00
parent c7e8bfbdb5
commit fb14883547
12 changed files with 198 additions and 27 deletions

View File

@ -49,6 +49,48 @@ wsrep::provider* wsrep::provider::make_provider(
return 0;
}
std::string wsrep::provider::capability::str(int caps)
{
std::ostringstream os;
#define WSREP_PRINT_CAPABILITY(cap_value, cap_string) \
if (caps & cap_value) { \
os << cap_string ", "; \
caps &= ~cap_value; \
}
WSREP_PRINT_CAPABILITY(multi_master, "MULTI-MASTER");
WSREP_PRINT_CAPABILITY(certification, "CERTIFICATION");
WSREP_PRINT_CAPABILITY(parallel_applying, "PARALLEL_APPLYING");
WSREP_PRINT_CAPABILITY(transaction_replay, "REPLAY");
WSREP_PRINT_CAPABILITY(isolation, "ISOLATION");
WSREP_PRINT_CAPABILITY(pause, "PAUSE");
WSREP_PRINT_CAPABILITY(causal_reads, "CAUSAL_READ");
WSREP_PRINT_CAPABILITY(causal_transaction, "CAUSAL_TRX");
WSREP_PRINT_CAPABILITY(incremental_writeset, "INCREMENTAL_WS");
WSREP_PRINT_CAPABILITY(session_locks, "SESSION_LOCK");
WSREP_PRINT_CAPABILITY(distributed_locks, "DISTRIBUTED_LOCK");
WSREP_PRINT_CAPABILITY(consistency_check, "CONSISTENCY_CHECK");
WSREP_PRINT_CAPABILITY(unordered, "UNORDERED");
WSREP_PRINT_CAPABILITY(annotation, "ANNOTATION");
WSREP_PRINT_CAPABILITY(preordered, "PREORDERED");
WSREP_PRINT_CAPABILITY(streaming, "STREAMING");
WSREP_PRINT_CAPABILITY(snapshot, "SNAPSHOT");
WSREP_PRINT_CAPABILITY(nbo, "NBO");
#undef WSREP_PRINT_CAPABILITY
if (caps)
{
assert(caps == 0); // to catch missed capabilities
os << "UNKNOWN(" << caps << ") ";
}
std::string ret(os.str());
if (ret.size() > 2) ret.erase(ret.size() - 2);
return ret;
}
std::string wsrep::flags_to_string(int flags)
{
std::ostringstream oss;

View File

@ -491,11 +491,13 @@ void wsrep::server_state::sst_sent(const wsrep::gtid& gtid, int error)
}
}
void wsrep::server_state::sst_received(const wsrep::gtid& gtid, int error)
void wsrep::server_state::sst_received(wsrep::client_service& cs,
const wsrep::gtid& gtid, int error)
{
wsrep::log_info() << "SST received: " << gtid;
wsrep::unique_lock<wsrep::mutex> lock(mutex_);
assert(state_ == s_joiner || state_ == s_initialized);
if (server_service_.sst_before_init())
{
if (init_initialized_ == false)
@ -504,20 +506,40 @@ void wsrep::server_state::sst_received(const wsrep::gtid& gtid, int error)
wait_until_state(lock, s_initialized);
assert(init_initialized_);
}
state(lock, s_joined);
lock.unlock();
if (provider().sst_received(gtid, error))
{
throw wsrep::runtime_error("SST received failed");
}
}
else
state(lock, s_joined);
lock.unlock();
if (id_.is_undefined())
{
state(lock, s_joined);
if (provider().sst_received(gtid, error))
{
throw wsrep::runtime_error("SST received failed");
}
assert(0);
throw wsrep::runtime_error(
"wsrep::sst_received() called before connection to cluster");
}
wsrep::view const v(server_service_.get_view(cs, id_));
wsrep::log_info() << "Recovered view from SST:\n" << v;
if (v.state_id().id() != gtid.id() ||
v.state_id().seqno() > gtid.seqno())
{
/* Since IN GENERAL we may not be able to recover SST GTID from
* the state data, we have to rely on SST script passing the GTID
* value explicitly.
* Here we check if the passed GTID makes any sense: it should
* have the same UUID and greater or equal seqno than the last
* logged view. */
std::ostringstream msg;
msg << "SST script passed bogus GTID: " << gtid
<< ". Preceeding view GTID: " << v.state_id();
throw wsrep::runtime_error(msg.str());
}
current_view_ = v;
if (provider().sst_received(gtid, error))
{
throw wsrep::runtime_error("wsrep::sst_received() failed");
}
}
@ -676,7 +698,7 @@ void wsrep::server_state::on_view(const wsrep::view& view,
assert(high_priority_service);
if (high_priority_service)
{
close_foreign_sr_transactions(lock, *high_priority_service);
close_orphaned_sr_transactions(lock, *high_priority_service);
}
if (server_service_.sst_before_init())
{
@ -1009,7 +1031,7 @@ void wsrep::server_state::wait_until_state(
cond_.notify_all();
}
void wsrep::server_state::close_foreign_sr_transactions(
void wsrep::server_state::close_orphaned_sr_transactions(
wsrep::unique_lock<wsrep::mutex>& lock,
wsrep::high_priority_service& high_priority_service)
{

View File

@ -18,13 +18,45 @@
*/
#include "wsrep/view.hpp"
#include "wsrep/provider.hpp"
int wsrep::view::member_index(const wsrep::id& member_id) const
{
// first, quick guess
if (own_index_ >= 0 && members_[own_index_].id() == member_id)
{
return own_index_;
}
// guesing didn't work, scan the list
for (unsigned int i(0); i < members_.size(); ++i)
{
if (i != own_index_ && members_[i].id() == member_id) return i;
}
return -1;
}
static const char* view_status_str(enum wsrep::view::status s)
{
switch(s)
{
case wsrep::view::primary: return "PRIMARY";
case wsrep::view::non_primary: return "NON-PRIMARY";
case wsrep::view::disconnected: return "DISCONNECTED";
}
assert(0);
return "invalid status";
}
void wsrep::view::print(std::ostream& os) const
{
os << " id: " << state_id() << "\n"
<< " status: " << status() << "\n"
<< " status: " << view_status_str(status()) << "\n"
<< " prococol_version: " << protocol_version() << "\n"
<< " final: " << final() << "\n"
<< " capabilities: " << provider::capability::str(capabilities())<<"\n"
<< " final: " << (final() ? "yes" : "no") << "\n"
<< " own_index: " << own_index() << "\n"
<< " members(" << members().size() << "):\n";
@ -32,7 +64,7 @@ void wsrep::view::print(std::ostream& os) const
i != members().end(); ++i)
{
os << "\t" << (i - members().begin()) /* ordinal index */
<< ") id: " << i->id()
<< ", name: " << i->name() << "\n";
<< ": " << i->id()
<< ", " << i->name() << "\n";
}
}