You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-10-31 18:30:33 +03:00
Don't log trace_params in tracing logger, because it already has all this data Don't print span attrs, it can contain lots of headers Save small part of the response into the span, if the response was a JSON string Added JSON logging of trace details into a separate file (to not spam the main log with machine readable stuff) Record part of the response into the span Set duration attribute in server spans Log 404 errors Colorize the traces (each span slightly changes the color of the parent span) Improve trace visualization with duration formatting and notes for request/response pairs
256 lines
8.8 KiB
Python
256 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
CherryPy-based webservice daemon with background threads
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import threading
|
|
import time
|
|
from datetime import datetime
|
|
|
|
import cherrypy
|
|
from cherrypy.process import plugins
|
|
|
|
# TODO: fix dispatcher choose logic because code executing in endpoints.py
|
|
# while import process, this cause module logger misconfiguration
|
|
from cmapi_server.logging_management import config_cmapi_server_logging
|
|
from tracing.sentry import maybe_init_sentry
|
|
from tracing.traceparent_backend import TraceparentBackend
|
|
from tracing.tracer import get_tracer
|
|
config_cmapi_server_logging()
|
|
from tracing.trace_tool import register_tracing_tools
|
|
|
|
from cmapi_server import helpers
|
|
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, CMAPI_CONF_PATH
|
|
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error, jsonify_404
|
|
from cmapi_server.failover_agent import FailoverAgent
|
|
from cmapi_server.managers.application import AppManager
|
|
from cmapi_server.managers.process import MCSProcessManager
|
|
from cmapi_server.managers.certificate import CertificateManager
|
|
from failover.node_monitor import NodeMonitor
|
|
from mcs_node_control.models.dbrm_socket import SOCK_TIMEOUT, DBRMSocketHandler
|
|
from mcs_node_control.models.node_config import NodeConfig
|
|
|
|
|
|
def worker(app):
|
|
"""Background Timer that runs clean_txn_by_timeout() every 5 seconds
|
|
TODO: this needs to be fixed/optimized. I don't like creating the thread
|
|
repeatedly.
|
|
"""
|
|
while True:
|
|
t = threading.Timer(5.0, clean_txn_by_timeout, args=(app,))
|
|
t.start()
|
|
t.join()
|
|
|
|
|
|
def clean_txn_by_timeout(app):
|
|
txn_section = app.config.get('txn', None)
|
|
timeout_timestamp = txn_section.get('timeout') if txn_section is not None else None
|
|
current_timestamp = int(datetime.now().timestamp())
|
|
if timeout_timestamp is not None and current_timestamp > timeout_timestamp:
|
|
txn_config_changed = txn_section.get('config_changed', None)
|
|
if txn_config_changed is True:
|
|
node_config = NodeConfig()
|
|
node_config.rollback_config()
|
|
node_config.apply_config(
|
|
xml_string=node_config.get_current_config()
|
|
)
|
|
app.config.update({
|
|
'txn': {
|
|
'id': 0,
|
|
'timeout': 0,
|
|
'manager_address': '',
|
|
'config_changed': False,
|
|
},
|
|
})
|
|
|
|
|
|
class TxnBackgroundThread(plugins.SimplePlugin):
|
|
"""CherryPy plugin to create a background worker thread"""
|
|
app = None
|
|
|
|
def __init__(self, bus, app):
|
|
super(TxnBackgroundThread, self).__init__(bus)
|
|
self.t = None
|
|
self.app = app
|
|
|
|
def start(self):
|
|
"""Plugin entrypoint"""
|
|
|
|
self.t = threading.Thread(
|
|
target=worker, name='TxnBackgroundThread', args=(self.app,)
|
|
)
|
|
self.t.daemon = True
|
|
self.t.start()
|
|
|
|
# Start at a higher priority than "Daemonize" (which we're not using
|
|
# yet but may in the future)
|
|
start.priority = 85
|
|
|
|
|
|
class FailoverBackgroundThread(plugins.SimplePlugin):
|
|
"""CherryPy plugin to start the thread for failover monitoring."""
|
|
|
|
def __init__(self, bus, turned_on):
|
|
super().__init__(bus)
|
|
self.node_monitor = NodeMonitor(agent=FailoverAgent())
|
|
self.running = False
|
|
self.turned_on = turned_on
|
|
if self.turned_on:
|
|
logging.info(
|
|
'Failover is turned ON by default or in CMAPI config file.'
|
|
)
|
|
else:
|
|
logging.info('Failover is turned OFF in CMAPI config file.')
|
|
|
|
def _start(self):
|
|
if self.running:
|
|
return
|
|
self.bus.log('Starting Failover monitor thread.')
|
|
self.node_monitor.start()
|
|
self.running = True
|
|
|
|
def _stop(self):
|
|
if not self.running:
|
|
return
|
|
self.bus.log('Stopping Failover monitor thread.')
|
|
self.node_monitor.stop()
|
|
self.running = False
|
|
|
|
def _subscriber(self, run_failover: bool):
|
|
if not self.turned_on:
|
|
return
|
|
if not isinstance(run_failover, bool):
|
|
self.bus.log(f'Got wrong obj in failover channel {run_failover}')
|
|
return
|
|
if run_failover:
|
|
self._start()
|
|
else:
|
|
self._stop()
|
|
|
|
def start(self):
|
|
self.bus.subscribe('failover', self._subscriber)
|
|
|
|
def stop(self):
|
|
cherrypy.engine.unsubscribe('failover', self._subscriber)
|
|
self._stop()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
logging.info(f'CMAPI Version: {AppManager.get_version()}')
|
|
|
|
# TODO: read cmapi config filepath as an argument
|
|
helpers.cmapi_config_check()
|
|
|
|
register_tracing_tools()
|
|
get_tracer().register_backend(TraceparentBackend()) # Register default tracing backend
|
|
maybe_init_sentry() # Init Sentry if DSN is present
|
|
|
|
CertificateManager.create_self_signed_certificate_if_not_exist()
|
|
CertificateManager.renew_certificate()
|
|
|
|
app = cherrypy.tree.mount(root=None, config=CMAPI_CONF_PATH)
|
|
root_config = {
|
|
"request.dispatch": dispatcher,
|
|
"error_page.default": jsonify_error,
|
|
"error_page.404": jsonify_404,
|
|
# Enable tracing tools
|
|
'tools.trace.on': True,
|
|
'tools.trace_end.on': True,
|
|
}
|
|
|
|
app.config.update({
|
|
'/': root_config,
|
|
'config': {
|
|
'path': CMAPI_CONF_PATH,
|
|
},
|
|
})
|
|
|
|
cherrypy.config.update(CMAPI_CONF_PATH)
|
|
cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH)
|
|
dispatcher_name, dispatcher_path = helpers.get_dispatcher_name_and_path(
|
|
cfg_parser
|
|
)
|
|
MCSProcessManager.detect(dispatcher_name, dispatcher_path)
|
|
# If we don't have auto_failover flag in the config turn it ON by default.
|
|
turn_on_failover = cfg_parser.getboolean(
|
|
'application', 'auto_failover', fallback=True
|
|
)
|
|
TxnBackgroundThread(cherrypy.engine, app).subscribe()
|
|
# subscribe FailoverBackgroundThread plugin code to bus channels
|
|
# code below not starting "real" failover background thread
|
|
FailoverBackgroundThread(cherrypy.engine, turn_on_failover).subscribe()
|
|
cherrypy.engine.certificate_monitor = plugins.BackgroundTask(
|
|
3600, CertificateManager.renew_certificate
|
|
)
|
|
cherrypy.engine.certificate_monitor.start()
|
|
cherrypy.engine.start()
|
|
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
|
|
|
|
success = False
|
|
config_mtime = os.path.getmtime(DEFAULT_MCS_CONF_PATH)
|
|
# if the mtime changed, we infer that a put_config was run on this node,
|
|
# and we now have a current config file.
|
|
# TODO: Research all affected cases and remove/rewrite this loop below.
|
|
# Previously this affects endless waiting time while starting
|
|
# application after upgrade.
|
|
# Do we have any cases when we need to try syncing config with other
|
|
# nodes with endless retry?
|
|
if not helpers.in_maintenance_state(DEFAULT_MCS_CONF_PATH):
|
|
while (
|
|
not success
|
|
and config_mtime == os.path.getmtime(DEFAULT_MCS_CONF_PATH)
|
|
):
|
|
try:
|
|
success = helpers.get_current_config_file()
|
|
except Exception:
|
|
logging.info(
|
|
'Main got exception while get_current_config_file',
|
|
exc_info=True
|
|
)
|
|
success = False
|
|
if not success:
|
|
delay = 10
|
|
logging.warning(
|
|
'Failed to fetch the current config file, '
|
|
f'retrying in {delay}s'
|
|
)
|
|
time.sleep(delay)
|
|
|
|
config_mtime = os.path.getmtime(DEFAULT_MCS_CONF_PATH)
|
|
helpers.wait_for_deactivation_or_put_config(config_mtime)
|
|
|
|
dbrm_socket = DBRMSocketHandler()
|
|
# TODO: fix DBRM message show on nodes restart.
|
|
# Use DBRM() context manager.
|
|
try:
|
|
dbrm_socket.connect()
|
|
dbrm_socket._detect_protocol()
|
|
dbrm_socket.close()
|
|
except Exception:
|
|
logging.warning(
|
|
'Something went wrong while trying to detect dbrm protocol.\n'
|
|
'Seems "controllernode" process isn\'t started.\n'
|
|
'This is just a notification, not a problem.\n'
|
|
'Next detection will start at first node\\cluster '
|
|
'status check.\n'
|
|
f'This can cause extra {SOCK_TIMEOUT} seconds delay during\n'
|
|
'this first attempt to get the status.',
|
|
exc_info=True
|
|
)
|
|
else:
|
|
logging.info(
|
|
'In maintenance state, not syncing config from other nodes.'
|
|
)
|
|
|
|
if turn_on_failover:
|
|
if not helpers.in_maintenance_state(DEFAULT_MCS_CONF_PATH):
|
|
cherrypy.engine.publish('failover', True)
|
|
else:
|
|
logging.info('In maintenance state, not starting Failover.')
|
|
|
|
AppManager.started = True
|
|
cherrypy.engine.block()
|