mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
Fix for the following bugs:
- BUG#22306: STOP INSTANCE can not be applied for instances in Crashed, Failed and Abandoned; - BUG#23476: DROP INSTANCE does not work - BUG#23215: STOP INSTANCE takes too much time BUG#22306: The problem was that STOP INSTANCE checked that mysqld is up and running. If it was not so, STOP INSTANCE reported an error. Now, STOP INSTANCE reports an error if the instance has been started (mysqld can be down). BUG#23476: The problem was that DROP INSTANCE tried to stop inactive instance. The fix is trivial. BUG#23215: The problem was that locks were not acquired properly, so the instance-monitoring thread could not acquire the mutex, holded by the query-processing thread. The fix is to simplify locking scheme by moving instance-related information to Instance-class out of Guardian-class. This allows to get rid of storing a separate list of Instance-information in Guardian and keeping it synchronized with the original list in Instance_map. server-tools/instance-manager/commands.cc: 1. Introduce Instance_cmd class -- base class for the commands that deal with the one instance; 2. Remove Instance_map argument from command constructors; 3. Ensure, that Instance Map and Instance are locked in the proper order; 4. Polishing. server-tools/instance-manager/commands.h: 1. Introduce Instance_cmd class -- base class for the commands that deal with the one instance; 2. Remove Instance_map argument from command constructors; 3. Polishing. server-tools/instance-manager/guardian.cc: 1. Move "extended" instance information to the Instance-class. That allows to get rid of storing instance-related container and data in Guardian class, that significantly simplifies locking schema. 2. Polishing. server-tools/instance-manager/guardian.h: 1. Move "extended" instance information to the Instance-class. That allows to get rid of storing instance-related container and data in Guardian class, that significantly simplifies locking schema. 2. Polishing. server-tools/instance-manager/instance.cc: 1. Move "extended" instance information to the Instance-class. 2. Introduce new state STOPPED to mark that guarded instance is stopped and should not be restarted by Guardian. 3. Polishing. server-tools/instance-manager/instance.h: 1. Move "extended" instance information to the Instance-class. 2. Introduce new state STOPPED to mark that guarded instance is stopped and should not be restarted by Guardian. 3. Polishing. server-tools/instance-manager/instance_map.cc: 1. Move flush_instances() from Instance_map to Manager. 2. Polishing. server-tools/instance-manager/instance_map.h: 1. Move flush_instances() from Instance_map to Manager. 2. Polishing. server-tools/instance-manager/instance_options.h: Polishing. server-tools/instance-manager/manager.cc: 1. Move flush_instances() from Instance_map to Manager. 2. Polishing. server-tools/instance-manager/manager.h: 1. Move flush_instances() from Instance_map to Manager. 2. Polishing. server-tools/instance-manager/user_map.cc: Polishing.
This commit is contained in:
@ -28,101 +28,126 @@
|
||||
#include "instance_map.h"
|
||||
#include "log.h"
|
||||
#include "mysql_manager_error.h"
|
||||
#include "options.h"
|
||||
|
||||
const char *
|
||||
Guardian::get_instance_state_name(enum_instance_state state)
|
||||
{
|
||||
switch (state) {
|
||||
case NOT_STARTED:
|
||||
return "offline";
|
||||
|
||||
case STARTING:
|
||||
return "starting";
|
||||
/*************************************************************************
|
||||
{{{ Constructor & destructor.
|
||||
*************************************************************************/
|
||||
|
||||
case STARTED:
|
||||
return "online";
|
||||
/**
|
||||
Guardian constructor.
|
||||
|
||||
case JUST_CRASHED:
|
||||
return "failed";
|
||||
SYNOPSIS
|
||||
Guardian()
|
||||
thread_registry_arg
|
||||
instance_map_arg
|
||||
|
||||
case CRASHED:
|
||||
return "crashed";
|
||||
|
||||
case CRASHED_AND_ABANDONED:
|
||||
return "abandoned";
|
||||
|
||||
case STOPPING:
|
||||
return "stopping";
|
||||
}
|
||||
|
||||
return NULL; /* just to ignore compiler warning. */
|
||||
}
|
||||
|
||||
/* {{{ Constructor & destructor. */
|
||||
DESCRIPTION
|
||||
Nominal contructor intended for assigning references and initialize
|
||||
trivial objects. Real initialization is made by init() method.
|
||||
*/
|
||||
|
||||
Guardian::Guardian(Thread_registry *thread_registry_arg,
|
||||
Instance_map *instance_map_arg,
|
||||
uint monitoring_interval_arg)
|
||||
:stopped(FALSE),
|
||||
monitoring_interval(monitoring_interval_arg),
|
||||
Instance_map *instance_map_arg)
|
||||
:shutdown_requested(FALSE),
|
||||
stopped(FALSE),
|
||||
thread_registry(thread_registry_arg),
|
||||
instance_map(instance_map_arg),
|
||||
shutdown_requested(FALSE)
|
||||
instance_map(instance_map_arg)
|
||||
{
|
||||
pthread_mutex_init(&LOCK_guardian, 0);
|
||||
pthread_cond_init(&COND_guardian, 0);
|
||||
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
|
||||
}
|
||||
|
||||
|
||||
Guardian::~Guardian()
|
||||
{
|
||||
/* delay guardian destruction to the moment when no one needs it */
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
free_root(&alloc, MYF(0));
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
/*
|
||||
NOTE: it's necessary to synchronize here, because Guiardian thread can be
|
||||
still alive an hold the mutex (because it is detached and we have no
|
||||
control over it).
|
||||
*/
|
||||
|
||||
lock();
|
||||
unlock();
|
||||
|
||||
pthread_mutex_destroy(&LOCK_guardian);
|
||||
pthread_cond_destroy(&COND_guardian);
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
/*************************************************************************
|
||||
}}}
|
||||
*************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
Send request to stop Guardian.
|
||||
|
||||
SYNOPSIS
|
||||
request_shutdown()
|
||||
*/
|
||||
|
||||
void Guardian::request_shutdown()
|
||||
{
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
/* STOP Instances or just clean up Guardian repository */
|
||||
stop_instances();
|
||||
|
||||
lock();
|
||||
shutdown_requested= TRUE;
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
unlock();
|
||||
|
||||
ping();
|
||||
}
|
||||
|
||||
|
||||
void Guardian::process_instance(Instance *instance,
|
||||
GUARD_NODE *current_node,
|
||||
LIST **guarded_instances,
|
||||
LIST *node)
|
||||
/**
|
||||
Process an instance.
|
||||
|
||||
SYNOPSIS
|
||||
process_instance()
|
||||
instance a pointer to the instance for processing
|
||||
|
||||
MT-NOTE:
|
||||
- the given instance must be locked before calling this operation;
|
||||
- Guardian must be locked before calling this operation.
|
||||
*/
|
||||
|
||||
void Guardian::process_instance(Instance *instance)
|
||||
{
|
||||
uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY;
|
||||
/* The amount of times, Guardian attempts to restart an instance */
|
||||
int restart_retry= 100;
|
||||
time_t current_time= time(NULL);
|
||||
|
||||
if (current_node->state == STOPPING)
|
||||
if (instance->get_state() == Instance::STOPPING)
|
||||
{
|
||||
waitchild= instance->options.get_shutdown_delay();
|
||||
/* This brach is executed during shutdown. */
|
||||
|
||||
/* this returns TRUE if and only if an instance was stopped for sure */
|
||||
/* This returns TRUE if and only if an instance was stopped for sure. */
|
||||
if (instance->is_crashed())
|
||||
*guarded_instances= list_delete(*guarded_instances, node);
|
||||
else if ( (uint) (current_time - current_node->last_checked) > waitchild)
|
||||
{
|
||||
log_info("Guardian: '%s' stopped.",
|
||||
(const char *) instance->get_name()->str);
|
||||
|
||||
instance->set_state(Instance::STOPPED);
|
||||
}
|
||||
else if ((uint) (current_time - instance->last_checked) >=
|
||||
instance->options.get_shutdown_delay())
|
||||
{
|
||||
log_info("Guardian: '%s' hasn't stopped within %d secs.",
|
||||
(const char *) instance->get_name()->str,
|
||||
(int) instance->options.get_shutdown_delay());
|
||||
|
||||
instance->kill_mysqld(SIGKILL);
|
||||
/*
|
||||
Later we do node= node->next. This is ok, as we are only removing
|
||||
the node from the list. The pointer to the next one is still valid.
|
||||
*/
|
||||
*guarded_instances= list_delete(*guarded_instances, node);
|
||||
|
||||
log_info("Guardian: pretend that '%s' is killed.",
|
||||
(const char *) instance->get_name()->str);
|
||||
|
||||
instance->set_state(Instance::STOPPED);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("Guardian: waiting for '%s' to stop (%d secs left).",
|
||||
(const char *) instance->get_name()->str,
|
||||
(int) (instance->options.get_shutdown_delay() -
|
||||
current_time + instance->last_checked));
|
||||
}
|
||||
|
||||
return;
|
||||
@ -133,83 +158,90 @@ void Guardian::process_instance(Instance *instance,
|
||||
/* The instance can be contacted on it's port */
|
||||
|
||||
/* If STARTING also check that pidfile has been created */
|
||||
if (current_node->state == STARTING &&
|
||||
current_node->instance->options.load_pid() == 0)
|
||||
if (instance->get_state() == Instance::STARTING &&
|
||||
instance->options.load_pid() == 0)
|
||||
{
|
||||
/* Pid file not created yet, don't go to STARTED state yet */
|
||||
}
|
||||
else if (current_node->state != STARTED)
|
||||
else if (instance->get_state() != Instance::STARTED)
|
||||
{
|
||||
/* clear status fields */
|
||||
log_info("Guardian: '%s' is running, set state to STARTED.",
|
||||
(const char *) instance->options.instance_name.str);
|
||||
current_node->restart_counter= 0;
|
||||
current_node->crash_moment= 0;
|
||||
current_node->state= STARTED;
|
||||
instance->reset_stat();
|
||||
instance->set_state(Instance::STARTED);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (current_node->state) {
|
||||
case NOT_STARTED:
|
||||
switch (instance->get_state()) {
|
||||
case Instance::NOT_STARTED:
|
||||
log_info("Guardian: starting '%s'...",
|
||||
(const char *) instance->options.instance_name.str);
|
||||
|
||||
/* NOTE, set state to STARTING _before_ start() is called */
|
||||
current_node->state= STARTING;
|
||||
instance->start();
|
||||
current_node->last_checked= current_time;
|
||||
break;
|
||||
case STARTED: /* fallthrough */
|
||||
case STARTING: /* let the instance start or crash */
|
||||
if (instance->is_crashed())
|
||||
{
|
||||
current_node->crash_moment= current_time;
|
||||
current_node->last_checked= current_time;
|
||||
current_node->state= JUST_CRASHED;
|
||||
/* fallthrough -- restart an instance immediately */
|
||||
}
|
||||
else
|
||||
break;
|
||||
case JUST_CRASHED:
|
||||
if (current_time - current_node->crash_moment <= 2)
|
||||
/* NOTE: set state to STARTING _before_ start() is called. */
|
||||
instance->set_state(Instance::STARTING);
|
||||
instance->last_checked= current_time;
|
||||
|
||||
instance->start_mysqld();
|
||||
|
||||
return;
|
||||
|
||||
case Instance::STARTED: /* fallthrough */
|
||||
case Instance::STARTING: /* let the instance start or crash */
|
||||
if (!instance->is_crashed())
|
||||
return;
|
||||
|
||||
instance->crash_moment= current_time;
|
||||
instance->last_checked= current_time;
|
||||
instance->set_state(Instance::JUST_CRASHED);
|
||||
/* fallthrough -- restart an instance immediately */
|
||||
|
||||
case Instance::JUST_CRASHED:
|
||||
if (current_time - instance->crash_moment <= 2)
|
||||
{
|
||||
if (instance->is_crashed())
|
||||
{
|
||||
instance->start();
|
||||
instance->start_mysqld();
|
||||
log_info("Guardian: starting '%s'...",
|
||||
(const char *) instance->options.instance_name.str);
|
||||
}
|
||||
}
|
||||
else
|
||||
current_node->state= CRASHED;
|
||||
break;
|
||||
case CRASHED: /* just regular restarts */
|
||||
if (current_time - current_node->last_checked >
|
||||
monitoring_interval)
|
||||
instance->set_state(Instance::CRASHED);
|
||||
|
||||
return;
|
||||
|
||||
case Instance::CRASHED: /* just regular restarts */
|
||||
if (current_time - instance->last_checked <=
|
||||
Options::Main::monitoring_interval)
|
||||
return;
|
||||
|
||||
if (instance->restart_counter < restart_retry)
|
||||
{
|
||||
if ((current_node->restart_counter < restart_retry))
|
||||
if (instance->is_crashed())
|
||||
{
|
||||
if (instance->is_crashed())
|
||||
{
|
||||
instance->start();
|
||||
current_node->last_checked= current_time;
|
||||
current_node->restart_counter++;
|
||||
log_info("Guardian: restarting '%s'...",
|
||||
(const char *) instance->options.instance_name.str);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("Guardian: can not start '%s'. "
|
||||
"Abandoning attempts to (re)start it",
|
||||
instance->start_mysqld();
|
||||
instance->last_checked= current_time;
|
||||
|
||||
log_info("Guardian: restarting '%s'...",
|
||||
(const char *) instance->options.instance_name.str);
|
||||
current_node->state= CRASHED_AND_ABANDONED;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CRASHED_AND_ABANDONED:
|
||||
break; /* do nothing */
|
||||
else
|
||||
{
|
||||
log_info("Guardian: can not start '%s'. "
|
||||
"Abandoning attempts to (re)start it",
|
||||
(const char *) instance->options.instance_name.str);
|
||||
|
||||
instance->set_state(Instance::CRASHED_AND_ABANDONED);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
case Instance::CRASHED_AND_ABANDONED:
|
||||
return; /* do nothing */
|
||||
|
||||
default:
|
||||
DBUG_ASSERT(0);
|
||||
}
|
||||
@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/**
|
||||
Main function of Guardian thread.
|
||||
|
||||
SYNOPSIS
|
||||
run()
|
||||
|
||||
DESCRIPTION
|
||||
Check for all guarded instances and restart them if needed. If everything
|
||||
is fine go and sleep for some time.
|
||||
Check for all guarded instances and restart them if needed.
|
||||
*/
|
||||
|
||||
void Guardian::run()
|
||||
{
|
||||
Instance *instance;
|
||||
LIST *node;
|
||||
struct timespec timeout;
|
||||
|
||||
log_info("Guardian: started.");
|
||||
|
||||
thread_registry->register_thread(&thread_info);
|
||||
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
/* Loop, until all instances were shut down at the end. */
|
||||
|
||||
/* loop, until all instances were shut down at the end */
|
||||
while (!(shutdown_requested && (guarded_instances == NULL)))
|
||||
while (true)
|
||||
{
|
||||
node= guarded_instances;
|
||||
Instance_map::Iterator instances_it(instance_map);
|
||||
Instance *instance;
|
||||
bool all_instances_stopped= TRUE;
|
||||
|
||||
while (node != NULL)
|
||||
instance_map->lock();
|
||||
|
||||
while ((instance= instances_it.next()))
|
||||
{
|
||||
GUARD_NODE *current_node= (GUARD_NODE *) node->data;
|
||||
instance= ((GUARD_NODE *) node->data)->instance;
|
||||
process_instance(instance, current_node, &guarded_instances, node);
|
||||
instance->lock();
|
||||
|
||||
node= node->next;
|
||||
if (!instance->is_guarded() ||
|
||||
instance->get_state() == Instance::STOPPED)
|
||||
{
|
||||
instance->unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
process_instance(instance);
|
||||
|
||||
if (instance->get_state() != Instance::STOPPED)
|
||||
all_instances_stopped= FALSE;
|
||||
|
||||
instance->unlock();
|
||||
}
|
||||
timeout.tv_sec= time(NULL) + monitoring_interval;
|
||||
|
||||
instance_map->unlock();
|
||||
|
||||
lock();
|
||||
|
||||
if (shutdown_requested && all_instances_stopped)
|
||||
{
|
||||
log_info("Guardian: all guarded mysqlds stopped.");
|
||||
|
||||
stopped= TRUE;
|
||||
unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
timeout.tv_sec= time(NULL) + Options::Main::monitoring_interval;
|
||||
timeout.tv_nsec= 0;
|
||||
|
||||
/* check the loop predicate before sleeping */
|
||||
if (!(shutdown_requested && (!(guarded_instances))))
|
||||
thread_registry->cond_timedwait(&thread_info, &COND_guardian,
|
||||
&LOCK_guardian, &timeout);
|
||||
thread_registry->cond_timedwait(&thread_info, &COND_guardian,
|
||||
&LOCK_guardian, &timeout);
|
||||
unlock();
|
||||
}
|
||||
|
||||
log_info("Guardian: stopped.");
|
||||
|
||||
stopped= TRUE;
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
/* now, when the Guardian is stopped we can stop the IM */
|
||||
/* Now, when the Guardian is stopped we can stop the IM. */
|
||||
|
||||
thread_registry->unregister_thread(&thread_info);
|
||||
thread_registry->request_shutdown();
|
||||
|
||||
@ -274,129 +328,65 @@ void Guardian::run()
|
||||
}
|
||||
|
||||
|
||||
int Guardian::is_stopped()
|
||||
/**
|
||||
Return the value of stopped flag.
|
||||
*/
|
||||
|
||||
bool Guardian::is_stopped()
|
||||
{
|
||||
int var;
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
|
||||
lock();
|
||||
var= stopped;
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
unlock();
|
||||
|
||||
return var;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Initialize the list of guarded instances: loop through the Instance_map and
|
||||
add all of the instances, which don't have 'nonguarded' option specified.
|
||||
/**
|
||||
Wake up Guardian thread.
|
||||
|
||||
SYNOPSIS
|
||||
Guardian::init()
|
||||
|
||||
NOTE: The operation should be invoked with the following locks acquired:
|
||||
- Guardian;
|
||||
- Instance_map;
|
||||
|
||||
RETURN
|
||||
0 - ok
|
||||
1 - error occurred
|
||||
MT-NOTE: though usually the mutex associated with condition variable should
|
||||
be acquired before signalling the variable, here this is not needed.
|
||||
Signalling under locked mutex is used to avoid lost signals. In the current
|
||||
logic however locking mutex does not guarantee that the signal will not be
|
||||
lost.
|
||||
*/
|
||||
|
||||
int Guardian::init()
|
||||
void Guardian::ping()
|
||||
{
|
||||
pthread_cond_signal(&COND_guardian);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Prepare list of instances.
|
||||
|
||||
SYNOPSIS
|
||||
init()
|
||||
|
||||
MT-NOTE: Instance Map must be locked before calling the operation.
|
||||
*/
|
||||
|
||||
void Guardian::init()
|
||||
{
|
||||
Instance *instance;
|
||||
Instance_map::Iterator iterator(instance_map);
|
||||
|
||||
/* clear the list of guarded instances */
|
||||
free_root(&alloc, MYF(0));
|
||||
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
|
||||
guarded_instances= NULL;
|
||||
|
||||
while ((instance= iterator.next()))
|
||||
{
|
||||
if (instance->options.nonguarded)
|
||||
continue;
|
||||
instance->lock();
|
||||
|
||||
if (guard(instance, TRUE)) /* do not lock guardian */
|
||||
return 1;
|
||||
instance->reset_stat();
|
||||
instance->set_state(Instance::NOT_STARTED);
|
||||
|
||||
instance->unlock();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Add instance to the Guardian list
|
||||
|
||||
SYNOPSIS
|
||||
guard()
|
||||
instance the instance to be guarded
|
||||
nolock whether we prefer do not lock Guardian here,
|
||||
but use external locking instead
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
The instance is added to the guarded instances list. Usually guard() is
|
||||
called after we start an instance.
|
||||
|
||||
RETURN
|
||||
0 - ok
|
||||
1 - error occurred
|
||||
*/
|
||||
|
||||
int Guardian::guard(Instance *instance, bool nolock)
|
||||
{
|
||||
LIST *node;
|
||||
GUARD_NODE *content;
|
||||
|
||||
node= (LIST *) alloc_root(&alloc, sizeof(LIST));
|
||||
content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE));
|
||||
|
||||
if ((!(node)) || (!(content)))
|
||||
return 1;
|
||||
/* we store the pointers to instances from the instance_map's MEM_ROOT */
|
||||
content->instance= instance;
|
||||
content->restart_counter= 0;
|
||||
content->crash_moment= 0;
|
||||
content->state= NOT_STARTED;
|
||||
node->data= (void*) content;
|
||||
|
||||
if (nolock)
|
||||
guarded_instances= list_add(guarded_instances, node);
|
||||
else
|
||||
{
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
guarded_instances= list_add(guarded_instances, node);
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
TODO: perhaps it would make sense to create a pool of the LIST nodeents
|
||||
and give them upon request. Now we are loosing a bit of memory when
|
||||
guarded instance was stopped and then restarted (since we cannot free just
|
||||
a piece of the MEM_ROOT).
|
||||
*/
|
||||
|
||||
int Guardian::stop_guard(Instance *instance)
|
||||
{
|
||||
LIST *node;
|
||||
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
|
||||
node= find_instance_node(instance);
|
||||
|
||||
if (node != NULL)
|
||||
guarded_instances= list_delete(guarded_instances, node);
|
||||
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
|
||||
/* if there is nothing to delete it is also fine */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
An internal method which is called at shutdown to unregister instances and
|
||||
attempt to stop them if requested.
|
||||
|
||||
@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance)
|
||||
accordingly.
|
||||
|
||||
NOTE
|
||||
Guardian object should be locked by the calling function.
|
||||
Guardian object should be locked by the caller.
|
||||
|
||||
RETURN
|
||||
0 - ok
|
||||
1 - error occurred
|
||||
*/
|
||||
|
||||
int Guardian::stop_instances()
|
||||
void Guardian::stop_instances()
|
||||
{
|
||||
LIST *node;
|
||||
node= guarded_instances;
|
||||
while (node != NULL)
|
||||
Instance_map::Iterator instances_it(instance_map);
|
||||
Instance *instance;
|
||||
|
||||
instance_map->lock();
|
||||
|
||||
while ((instance= instances_it.next()))
|
||||
{
|
||||
GUARD_NODE *current_node= (GUARD_NODE *) node->data;
|
||||
instance->lock();
|
||||
|
||||
if (!instance->is_guarded() ||
|
||||
instance->get_state() == Instance::STOPPED)
|
||||
{
|
||||
instance->unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
If instance is running or was running (and now probably hanging),
|
||||
request stop.
|
||||
*/
|
||||
if (current_node->instance->is_mysqld_running() ||
|
||||
(current_node->state == STARTED))
|
||||
|
||||
if (instance->is_mysqld_running() ||
|
||||
instance->get_state() == Instance::STARTED)
|
||||
{
|
||||
current_node->state= STOPPING;
|
||||
current_node->last_checked= time(NULL);
|
||||
instance->set_state(Instance::STOPPING);
|
||||
instance->last_checked= time(NULL);
|
||||
}
|
||||
else
|
||||
/* otherwise remove it from the list */
|
||||
guarded_instances= list_delete(guarded_instances, node);
|
||||
/* But try to kill it anyway. Just in case */
|
||||
current_node->instance->kill_mysqld(SIGTERM);
|
||||
node= node->next;
|
||||
{
|
||||
/* Otherwise mark it as STOPPED. */
|
||||
instance->set_state(Instance::STOPPED);
|
||||
}
|
||||
|
||||
/* Request mysqld to stop. */
|
||||
|
||||
instance->kill_mysqld(SIGTERM);
|
||||
|
||||
instance->unlock();
|
||||
}
|
||||
return 0;
|
||||
|
||||
instance_map->unlock();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Lock Guardian.
|
||||
*/
|
||||
|
||||
void Guardian::lock()
|
||||
{
|
||||
pthread_mutex_lock(&LOCK_guardian);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Unlock Guardian.
|
||||
*/
|
||||
|
||||
void Guardian::unlock()
|
||||
{
|
||||
pthread_mutex_unlock(&LOCK_guardian);
|
||||
}
|
||||
|
||||
|
||||
LIST *Guardian::find_instance_node(Instance *instance)
|
||||
{
|
||||
LIST *node= guarded_instances;
|
||||
|
||||
while (node != NULL)
|
||||
{
|
||||
/*
|
||||
We compare only pointers, as we always use pointers from the
|
||||
instance_map's MEM_ROOT.
|
||||
*/
|
||||
if (((GUARD_NODE *) node->data)->instance == instance)
|
||||
return node;
|
||||
|
||||
node= node->next;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool Guardian::is_active(Instance *instance)
|
||||
{
|
||||
bool guarded;
|
||||
|
||||
lock();
|
||||
|
||||
guarded= find_instance_node(instance) != NULL;
|
||||
|
||||
/* is_running() can take a long time, so let's unlock mutex first. */
|
||||
unlock();
|
||||
|
||||
if (guarded)
|
||||
return true;
|
||||
|
||||
return instance->is_mysqld_running();
|
||||
}
|
||||
|
Reference in New Issue
Block a user