1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-29 00:08:14 +03:00
Files
mariadb/server-tools/instance-manager/guardian.cc
unknown 8b4fcf6e3b This is an implementation of two WL items:
- WL#3158: IM: Instance configuration extensions;
  - WL#3159: IM: --bootstrap and --start-default-instance modes

The following new statements have been added:
  - CREATE INSTANCE;
  - DROP INSTANCE;

The behaviour of the following statements have been changed:
  - SET;
  - UNSET;
  - FLUSH INSTANCES;
  - SHOW INSTANCES;
  - SHOW INSTANCE OPTIONS;


BitKeeper/deleted/.del-im_options_set.imtest~b53d9d60e5684833:
  Delete: mysql-test/t/im_options_set.imtest
BitKeeper/deleted/.del-im_options_set.result~59278f56be61d921:
  Delete: mysql-test/r/im_options_set.result
BitKeeper/deleted/.del-im_options_unset.imtest~768eb186b51d0048:
  Delete: mysql-test/t/im_options_unset.imtest
BitKeeper/deleted/.del-im_options_unset.result~20a4790cd3c70a4f:
  Delete: mysql-test/r/im_options_unset.result
client/get_password.c:
  Change prototype to avoid casting when using C-strings (char *).
include/m_string.h:
  Moved LEX_STRING to global header from sql/ to be accessible
  from all components (IM for one).
include/my_sys.h:
  Added constants for modify_defaults_file().
include/mysql_com.h:
  Removed duplicated declarations. my_sys.h should be used instead.
libmysql/get_password.c:
  Change prototype to avoid casting when using C-strings (char *).
mysql-test/mysql-test-run.pl:
  Added environment variables to be used from tests.
mysql-test/r/im_daemon_life_cycle.result:
  Column name has been changed in SHOW INSTANCES.
mysql-test/r/im_life_cycle.result:
  1. Column name has been changed in SHOW INSTANCES.
  2. Removed redundant SHOW INSTANCE STATUS statements.
mysql-test/r/im_utils.result:
  Updated the result file.
mysql-test/t/im_daemon_life_cycle-im.opt:
  Set minimal monitoring interval for Instance Manager to speed up testing.
mysql-test/t/im_daemon_life_cycle.imtest:
  Get Instance Manager and managed mysqld-instances enough time to start.
mysql-test/t/im_life_cycle.imtest:
  1. Polishing;
  2. Fixed a test error in 1.1.2.
mysql-test/t/im_utils.imtest:
  Get Instance Manager and managed mysqld-instances enough time to start.
mysys/default.c:
  Pass the name of the section to the handler function as well.
mysys/default_modify.c:
  Added REMOVE_SECTION functionality.
server-tools/instance-manager/IMService.cpp:
  Polishing: be more verbose.
server-tools/instance-manager/IMService.h:
  Polishing: added copyright.
server-tools/instance-manager/Makefile.am:
  Added new files.
server-tools/instance-manager/WindowsService.cpp:
  Polishing: according to The Coding Style, TRUE/FALSE must be
  used instead of true/false.
server-tools/instance-manager/WindowsService.h:
  Polishing: added copyright.
server-tools/instance-manager/command.h:
  Polishing: provide a comment for the main operation of "Command" class.
server-tools/instance-manager/commands.cc:
  1. Added support for CREATE INSTANCE, DROP INSTANCE statements;
  2. Added "deprecated" column in output of SHOW INSTANCE OPTIONS;
  3. Modified the behaviour of SET/UNSET, FLUSH INSTANCES statements;
server-tools/instance-manager/commands.h:
  1. Added support for CREATE INSTANCE, DROP INSTANCE statements;
  2. Added "deprecated" column in output of SHOW INSTANCE OPTIONS;
  3. Modified the behaviour of SET/UNSET, FLUSH INSTANCES statements;
server-tools/instance-manager/guardian.cc:
  Added operations to retrieve state of managed instances.
server-tools/instance-manager/guardian.h:
  Added operations to retrieve state of managed instances.
server-tools/instance-manager/instance.cc:
  1. Provided an operation to check validity of instance name.
  2. Added an attribute to distiguish mysqld-instances,
     whose configuration should be kept backward-compatible.
server-tools/instance-manager/instance.h:
  1. Provided an operation to check validity of instance name.
  2. Added an attribute to distiguish mysqld-instances,
     whose configuration should be kept backward-compatible.
server-tools/instance-manager/instance_map.cc:
  1. Used the operation to check validity of instance name;
  2. Added operations to manage instances.
server-tools/instance-manager/instance_map.h:
  Added operations to manage instances.
server-tools/instance-manager/instance_options.cc:
  Changed Instance_options so that it will be possible to manage
  options on the fly.
server-tools/instance-manager/instance_options.h:
  Changed Instance_options so that it will be possible to manage
  options on the fly.
server-tools/instance-manager/listener.cc:
  1. Remove reference to the instance of Options;
  2. Use new Options naming scheme.
server-tools/instance-manager/listener.h:
  Remove reference to the instance of Options;
server-tools/instance-manager/log.cc:
  Polishing: use TRUE/FALSE instead of true/false.
server-tools/instance-manager/manager.cc:
  Added a common for IM operation to work with configuration file.
server-tools/instance-manager/manager.h:
  Added a common for IM operation to work with configuration file.
server-tools/instance-manager/messages.cc:
  Added messages for new errors.
server-tools/instance-manager/mysql_connection.cc:
  1. Move a constant to common place.
  2. Polishing.
server-tools/instance-manager/mysql_manager_error.h:
  Added new errors.
server-tools/instance-manager/mysqlmanager.cc:
  1. Use error code from Options::load();
  2. Eliminate type-casting warning on Windows.
server-tools/instance-manager/options.cc:
  Added support for user-management command-line options.
server-tools/instance-manager/options.h:
  Added support for user-management command-line options.
server-tools/instance-manager/parse.cc:
  1. Added support of new statements:
     - CREATE INSTANCE;
     - DROP INSTANCE.
  2. Modified SET/UNSET.
server-tools/instance-manager/parse.h:
  1. Added support of new statements:
     - CREATE INSTANCE;
     - DROP INSTANCE.
  2. Modified SET/UNSET.
server-tools/instance-manager/parse_output.cc:
  Sorted out header files.
server-tools/instance-manager/parse_output.h:
  Sorted out header files.
server-tools/instance-manager/portability.h:
  1. Added constants for Windows.
  2. Moved system-dependent defines from instance_options.cc.
server-tools/instance-manager/priv.cc:
  Updated version.
server-tools/instance-manager/priv.h:
  Added some global constants.
server-tools/instance-manager/protocol.cc:
  Replaced NAME_WITH_LENGTH by LEX_STRING.
server-tools/instance-manager/protocol.h:
  Replaced NAME_WITH_LENGTH by LEX_STRING.
server-tools/instance-manager/thread_registry.cc:
  Polishing: use TRUE/FALSE instead of true/false.
server-tools/instance-manager/user_map.cc:
  Added support for managing password database.
server-tools/instance-manager/user_map.h:
  Added support for managing password database.
sql/sp.cc:
  Replaced LEX_STRING_WITH_INIT by LEX_STRING + struct initialization.
sql/sp_head.cc:
  Replaced LEX_STRING_WITH_INIT by LEX_STRING + struct initialization.
sql/spatial.cc:
  Removed LEX_STRING_WITH_INIT.
sql/spatial.h:
  Removed LEX_STRING_WITH_INIT.
sql/sql_string.h:
  Moved STRING_WITH_LEN() macro out from sql (to m_string.h).
sql/sql_trigger.cc:
  Moved STRING_WITH_LEN() macro out from sql (to m_string.h).
sql/structs.h:
  Removed LEX_STRING_WITH_INIT.
support-files/mysql.server.sh:
  Instruct Instance Manager to work in mysqld-safe compatible mode
  for backward compatibility.
mysql-test/r/im_cmd_line.result:
  Added result file.
mysql-test/r/im_instance_conf.result:
  Added result file.
mysql-test/r/im_options.result:
  Added result file.
mysql-test/t/im_cmd_line.imtest:
  IM command-line options test.
mysql-test/t/im_instance_conf-im.opt:
  Set minimal monitoring interval for Instance Manager to speed up testing.
mysql-test/t/im_instance_conf.imtest:
  Added a new test case for checking instance-management.
mysql-test/t/im_life_cycle-im.opt:
  Set minimal monitoring interval for Instance Manager to speed up testing.
mysql-test/t/im_options.imtest:
  Join im_options_set and im_options_unset and add new tests.
mysql-test/t/im_utils-im.opt:
  Set minimal monitoring interval for Instance Manager to speed up testing.
server-tools/instance-manager/exit_codes.h:
  New file for defining exit codes for user-management mode.
server-tools/instance-manager/user_management_commands.cc:
  User-management commands implementation.
server-tools/instance-manager/user_management_commands.h:
  User-management command declarations.
2006-05-18 18:57:50 +04:00

500 lines
12 KiB
C++

/* Copyright (C) 2004 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#if defined(__GNUC__) && defined(USE_PRAGMA_IMPLEMENTATION)
#pragma implementation
#endif
#include "guardian.h"
#include <string.h>
#include <sys/types.h>
#include <signal.h>
#include "instance.h"
#include "instance_map.h"
#include "log.h"
#include "mysql_manager_error.h"
pthread_handler_t guardian(void *arg)
{
Guardian_thread *guardian_thread= (Guardian_thread *) arg;
guardian_thread->run();
return 0;
}
const char *
Guardian_thread::get_instance_state_name(enum_instance_state state)
{
switch (state) {
case NOT_STARTED:
return "offline";
case STARTING:
return "starting";
case STARTED:
return "online";
case JUST_CRASHED:
return "failed";
case CRASHED:
return "crashed";
case CRASHED_AND_ABANDONED:
return "abandoned";
case STOPPING:
return "stopping";
}
return NULL; /* just to ignore compiler warning. */
}
Guardian_thread::Guardian_thread(Thread_registry &thread_registry_arg,
Instance_map *instance_map_arg,
uint monitoring_interval_arg) :
Guardian_thread_args(thread_registry_arg, instance_map_arg,
monitoring_interval_arg),
thread_info(pthread_self()), guarded_instances(0)
{
pthread_mutex_init(&LOCK_guardian, 0);
pthread_cond_init(&COND_guardian, 0);
shutdown_requested= FALSE;
stopped= FALSE;
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
}
Guardian_thread::~Guardian_thread()
{
/* delay guardian destruction to the moment when no one needs it */
pthread_mutex_lock(&LOCK_guardian);
free_root(&alloc, MYF(0));
pthread_mutex_unlock(&LOCK_guardian);
pthread_mutex_destroy(&LOCK_guardian);
pthread_cond_destroy(&COND_guardian);
}
void Guardian_thread::request_shutdown(bool stop_instances_arg)
{
pthread_mutex_lock(&LOCK_guardian);
/* stop instances or just clean up Guardian repository */
stop_instances(stop_instances_arg);
shutdown_requested= TRUE;
pthread_mutex_unlock(&LOCK_guardian);
}
void Guardian_thread::process_instance(Instance *instance,
GUARD_NODE *current_node,
LIST **guarded_instances,
LIST *node)
{
uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY;
/* The amount of times, Guardian attempts to restart an instance */
int restart_retry= 100;
time_t current_time= time(NULL);
if (current_node->state == STOPPING)
{
/* this brach is executed during shutdown */
if (instance->options.shutdown_delay)
{
/*
NOTE: it is important to check shutdown_delay here, but use
shutdown_delay_val. The idea is that if the option is unset,
shutdown_delay will be NULL, but shutdown_delay_val will not be reset.
*/
waitchild= instance->options.shutdown_delay_val;
}
/* this returns TRUE if and only if an instance was stopped for sure */
if (instance->is_crashed())
*guarded_instances= list_delete(*guarded_instances, node);
else if ( (uint) (current_time - current_node->last_checked) > waitchild)
{
instance->kill_instance(SIGKILL);
/*
Later we do node= node->next. This is ok, as we are only removing
the node from the list. The pointer to the next one is still valid.
*/
*guarded_instances= list_delete(*guarded_instances, node);
}
return;
}
if (instance->is_running())
{
/* clear status fields */
current_node->restart_counter= 0;
current_node->crash_moment= 0;
current_node->state= STARTED;
}
else
{
switch (current_node->state) {
case NOT_STARTED:
instance->start();
current_node->last_checked= current_time;
log_info("guardian: starting instance %s",
instance->options.instance_name);
current_node->state= STARTING;
break;
case STARTED: /* fallthrough */
case STARTING: /* let the instance start or crash */
if (instance->is_crashed())
{
current_node->crash_moment= current_time;
current_node->last_checked= current_time;
current_node->state= JUST_CRASHED;
/* fallthrough -- restart an instance immediately */
}
else
break;
case JUST_CRASHED:
if (current_time - current_node->crash_moment <= 2)
{
instance->start();
log_info("guardian: starting instance %s",
instance->options.instance_name);
}
else
current_node->state= CRASHED;
break;
case CRASHED: /* just regular restarts */
if (current_time - current_node->last_checked >
monitoring_interval)
{
if ((current_node->restart_counter < restart_retry))
{
instance->start();
current_node->last_checked= current_time;
current_node->restart_counter++;
log_info("guardian: restarting instance %s",
instance->options.instance_name);
}
else
{
log_info("guardian: cannot start instance %s. Abandoning attempts "
"to (re)start it", instance->options.instance_name);
current_node->state= CRASHED_AND_ABANDONED;
}
}
break;
case CRASHED_AND_ABANDONED:
break; /* do nothing */
default:
DBUG_ASSERT(0);
}
}
}
/*
Run guardian thread
SYNOPSYS
run()
DESCRIPTION
Check for all guarded instances and restart them if needed. If everything
is fine go and sleep for some time.
*/
void Guardian_thread::run()
{
Instance *instance;
LIST *node;
struct timespec timeout;
thread_registry.register_thread(&thread_info);
my_thread_init();
pthread_mutex_lock(&LOCK_guardian);
/* loop, until all instances were shut down at the end */
while (!(shutdown_requested && (guarded_instances == NULL)))
{
node= guarded_instances;
while (node != NULL)
{
GUARD_NODE *current_node= (GUARD_NODE *) node->data;
instance= ((GUARD_NODE *) node->data)->instance;
process_instance(instance, current_node, &guarded_instances, node);
node= node->next;
}
timeout.tv_sec= time(NULL) + monitoring_interval;
timeout.tv_nsec= 0;
/* check the loop predicate before sleeping */
if (!(shutdown_requested && (!(guarded_instances))))
thread_registry.cond_timedwait(&thread_info, &COND_guardian,
&LOCK_guardian, &timeout);
}
stopped= TRUE;
pthread_mutex_unlock(&LOCK_guardian);
/* now, when the Guardian is stopped we can stop the IM */
thread_registry.unregister_thread(&thread_info);
thread_registry.request_shutdown();
my_thread_end();
}
int Guardian_thread::is_stopped()
{
int var;
pthread_mutex_lock(&LOCK_guardian);
var= stopped;
pthread_mutex_unlock(&LOCK_guardian);
return var;
}
/*
Initialize the list of guarded instances: loop through the Instance_map and
add all of the instances, which don't have 'nonguarded' option specified.
SYNOPSYS
Guardian_thread::init()
NOTE: The operation should be invoked with the following locks acquired:
- Guardian_thread;
- Instance_map;
RETURN
0 - ok
1 - error occured
*/
int Guardian_thread::init()
{
Instance *instance;
Instance_map::Iterator iterator(instance_map);
/* clear the list of guarded instances */
free_root(&alloc, MYF(0));
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
guarded_instances= NULL;
while ((instance= iterator.next()))
{
if (instance->options.nonguarded)
continue;
if (guard(instance, TRUE)) /* do not lock guardian */
return 1;
}
return 0;
}
/*
Add instance to the Guardian list
SYNOPSYS
guard()
instance the instance to be guarded
nolock whether we prefer do not lock Guardian here,
but use external locking instead
DESCRIPTION
The instance is added to the guarded instances list. Usually guard() is
called after we start an instance.
RETURN
0 - ok
1 - error occured
*/
int Guardian_thread::guard(Instance *instance, bool nolock)
{
LIST *node;
GUARD_NODE *content;
node= (LIST *) alloc_root(&alloc, sizeof(LIST));
content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE));
if ((!(node)) || (!(content)))
return 1;
/* we store the pointers to instances from the instance_map's MEM_ROOT */
content->instance= instance;
content->restart_counter= 0;
content->crash_moment= 0;
content->state= NOT_STARTED;
node->data= (void*) content;
if (nolock)
guarded_instances= list_add(guarded_instances, node);
else
{
pthread_mutex_lock(&LOCK_guardian);
guarded_instances= list_add(guarded_instances, node);
pthread_mutex_unlock(&LOCK_guardian);
}
return 0;
}
/*
TODO: perhaps it would make sense to create a pool of the LIST nodeents
and give them upon request. Now we are loosing a bit of memory when
guarded instance was stopped and then restarted (since we cannot free just
a piece of the MEM_ROOT).
*/
int Guardian_thread::stop_guard(Instance *instance)
{
LIST *node;
pthread_mutex_lock(&LOCK_guardian);
node= find_instance_node(instance);
if (node != NULL)
guarded_instances= list_delete(guarded_instances, node);
pthread_mutex_unlock(&LOCK_guardian);
/* if there is nothing to delete it is also fine */
return 0;
}
/*
An internal method which is called at shutdown to unregister instances and
attempt to stop them if requested.
SYNOPSYS
stop_instances()
stop_instances_arg whether we should stop instances at shutdown
DESCRIPTION
Loops through the guarded_instances list and prepares them for shutdown.
If stop_instances was requested, we need to issue a stop command and change
the state accordingly. Otherwise we simply delete an entry.
NOTE
Guardian object should be locked by the calling function.
RETURN
0 - ok
1 - error occured
*/
int Guardian_thread::stop_instances(bool stop_instances_arg)
{
LIST *node;
node= guarded_instances;
while (node != NULL)
{
if (!stop_instances_arg)
{
/* just forget about an instance */
guarded_instances= list_delete(guarded_instances, node);
/*
This should still work fine, as we have only removed the
node from the list. The pointer to the next one is still valid
*/
node= node->next;
}
else
{
GUARD_NODE *current_node= (GUARD_NODE *) node->data;
/*
If instance is running or was running (and now probably hanging),
request stop.
*/
if (current_node->instance->is_running() ||
(current_node->state == STARTED))
{
current_node->state= STOPPING;
current_node->last_checked= time(NULL);
}
else
/* otherwise remove it from the list */
guarded_instances= list_delete(guarded_instances, node);
/* But try to kill it anyway. Just in case */
current_node->instance->kill_instance(SIGTERM);
node= node->next;
}
}
return 0;
}
void Guardian_thread::lock()
{
pthread_mutex_lock(&LOCK_guardian);
}
void Guardian_thread::unlock()
{
pthread_mutex_unlock(&LOCK_guardian);
}
LIST *Guardian_thread::find_instance_node(Instance *instance)
{
LIST *node= guarded_instances;
while (node != NULL)
{
/*
We compare only pointers, as we always use pointers from the
instance_map's MEM_ROOT.
*/
if (((GUARD_NODE *) node->data)->instance == instance)
return node;
node= node->next;
}
return NULL;
}
bool Guardian_thread::is_active(Instance *instance)
{
bool guarded;
lock();
guarded= find_instance_node(instance) != NULL;
/* is_running() can take a long time, so let's unlock mutex first. */
unlock();
if (guarded)
return true;
return instance->is_running();
}