You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-08-05 16:15:50 +03:00
fix(cmapi): MCOL-5638 Improve Client Error Messaging and async broadcasting config.
* fix(cmapi): broadcast_new_config function using aiohttp and improved errors handling mechanics * fix(cmapi): requirements updating aiohttp version and it's dependencies * fix(cmapi, deps): unused imports * fix(cmapi, tests): test_broadcast_new_config
This commit is contained in:
committed by
Alan Mologorsky
parent
89c892ab2f
commit
55b09a71fc
@@ -47,17 +47,7 @@ def toggle_cluster_state(
|
|||||||
|
|
||||||
switch_node_maintenance(maintainance_flag)
|
switch_node_maintenance(maintainance_flag)
|
||||||
update_revision_and_manager()
|
update_revision_and_manager()
|
||||||
|
broadcast_new_config(config)
|
||||||
# TODO: move this from multiple places to one
|
|
||||||
try:
|
|
||||||
broadcast_successful = broadcast_new_config(config)
|
|
||||||
except Exception as err:
|
|
||||||
raise CMAPIBasicError(
|
|
||||||
'Error while distributing config file.'
|
|
||||||
) from err
|
|
||||||
|
|
||||||
if not broadcast_successful:
|
|
||||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
|
||||||
|
|
||||||
|
|
||||||
class ClusterHandler():
|
class ClusterHandler():
|
||||||
@@ -189,16 +179,7 @@ class ClusterHandler():
|
|||||||
input_config_filename=config, output_config_filename=config
|
input_config_filename=config, output_config_filename=config
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
broadcast_new_config(config)
|
||||||
broadcast_successful = broadcast_new_config(config)
|
|
||||||
except Exception as err:
|
|
||||||
raise CMAPIBasicError(
|
|
||||||
'Error while distributing config file.'
|
|
||||||
) from err
|
|
||||||
|
|
||||||
if not broadcast_successful:
|
|
||||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
|
||||||
|
|
||||||
logger.debug(f'Successfully finished adding node {node}.')
|
logger.debug(f'Successfully finished adding node {node}.')
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@@ -242,17 +223,7 @@ class ClusterHandler():
|
|||||||
update_revision_and_manager(
|
update_revision_and_manager(
|
||||||
input_config_filename=config, output_config_filename=config
|
input_config_filename=config, output_config_filename=config
|
||||||
)
|
)
|
||||||
try:
|
broadcast_new_config(config, nodes=active_nodes)
|
||||||
broadcast_successful = broadcast_new_config(
|
|
||||||
config, nodes=active_nodes
|
|
||||||
)
|
|
||||||
except Exception as err:
|
|
||||||
raise CMAPIBasicError(
|
|
||||||
'Error while distributing config file.'
|
|
||||||
) from err
|
|
||||||
if not broadcast_successful:
|
|
||||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
|
||||||
|
|
||||||
logger.debug(f'Successfully finished removing node {node}.')
|
logger.debug(f'Successfully finished removing node {node}.')
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@@ -4,7 +4,6 @@ TODO: remove NodeConfig usage and move to arguments (eg. nc or root)
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import concurrent
|
|
||||||
import configparser
|
import configparser
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
@@ -18,6 +17,7 @@ from shutil import copyfile
|
|||||||
from typing import Tuple, Optional
|
from typing import Tuple, Optional
|
||||||
from urllib.parse import urlencode, urlunparse
|
from urllib.parse import urlencode, urlunparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
import lxml.objectify
|
import lxml.objectify
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -291,8 +291,8 @@ def broadcast_new_config(
|
|||||||
test_mode: bool = False,
|
test_mode: bool = False,
|
||||||
nodes: Optional[list] = None,
|
nodes: Optional[list] = None,
|
||||||
timeout: Optional[int] = None
|
timeout: Optional[int] = None
|
||||||
) -> bool:
|
) -> None:
|
||||||
"""Send new config to nodes. Now in async way.
|
"""Send new config to nodes in async way.
|
||||||
|
|
||||||
:param cs_config_filename: Columnstore.xml path,
|
:param cs_config_filename: Columnstore.xml path,
|
||||||
defaults to DEFAULT_MCS_CONF_PATH
|
defaults to DEFAULT_MCS_CONF_PATH
|
||||||
@@ -309,9 +309,8 @@ def broadcast_new_config(
|
|||||||
:type nodes: Optional[list], optional
|
:type nodes: Optional[list], optional
|
||||||
:param timeout: timeout passing to gracefully stop DMLProc TODO: for next
|
:param timeout: timeout passing to gracefully stop DMLProc TODO: for next
|
||||||
releases. Could affect all logic of broadcacting new config
|
releases. Could affect all logic of broadcacting new config
|
||||||
:type timeout: int
|
:type timeout: Optional[int], optional
|
||||||
:return: success state
|
:raises CMAPIBasicError: If Broadcasting config to nodes failed with errors
|
||||||
:rtype: bool
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO: move this from multiple places to one, eg to helpers
|
# TODO: move this from multiple places to one, eg to helpers
|
||||||
@@ -343,72 +342,75 @@ def broadcast_new_config(
|
|||||||
if test_mode:
|
if test_mode:
|
||||||
body['test'] = True
|
body['test'] = True
|
||||||
|
|
||||||
failed_nodes = []
|
async def update_config(node: str, headers: dict, body: dict) -> None:
|
||||||
success_nodes = []
|
"""Update remote node config asyncronously.
|
||||||
|
|
||||||
async def update_config(node, success_nodes, failed_nodes, headers, body):
|
:param node: node ip address or hostname
|
||||||
|
:type node: str
|
||||||
|
:param headers: headers for request
|
||||||
|
:type headers: dict
|
||||||
|
:param body: request body
|
||||||
|
:type body: dict
|
||||||
|
:raises CMAPIBasicError: If request failed by status code
|
||||||
|
:raises CMAPIBasicError: If request failed by some undefined error
|
||||||
|
:raises CMAPIBasicError: If request failed by timeout
|
||||||
|
:raises CMAPIBasicError: If undefined error happened
|
||||||
|
"""
|
||||||
url = f'https://{node}:8640/cmapi/{version}/node/config'
|
url = f'https://{node}:8640/cmapi/{version}/node/config'
|
||||||
# TODO: investigate about hardcoded 120 seconds timeout
|
|
||||||
# Check e1242eed47b61276ebc86136f124f6d974655515 in cmapi old
|
|
||||||
# repo to get more info. Patric made it because:
|
|
||||||
# "Made the timeout for a CS process restart 120s, since
|
|
||||||
# the container dispatcher waits up to 60s for SM to stop"
|
|
||||||
request_put = partial(
|
|
||||||
requests.put, url, verify=False, headers=headers, json=body,
|
|
||||||
timeout=120
|
|
||||||
)
|
|
||||||
success = False
|
|
||||||
executor = concurrent.futures.ThreadPoolExecutor()
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
|
|
||||||
# TODO: remove this retry, it cause retries and long waiting time
|
async with aiohttp.ClientSession() as session:
|
||||||
# for eg if some of mcs processes couldn't properly start/stop.
|
|
||||||
# Fix error handling, could be raising error instead of returning
|
|
||||||
# bool value
|
|
||||||
for retry in range(5):
|
|
||||||
try:
|
try:
|
||||||
r = await loop.run_in_executor(executor, request_put)
|
async with session.put(
|
||||||
r.raise_for_status()
|
url, headers=headers, json=body, ssl=False, timeout=120
|
||||||
except requests.Timeout as e:
|
) as response:
|
||||||
logging.warning(
|
response.raise_for_status()
|
||||||
f'Timeout while pushing new config to "{node}"'
|
logging.info(f'Node "{node}" config put successfull.')
|
||||||
|
except aiohttp.ClientResponseError as err:
|
||||||
|
message = (
|
||||||
|
f'Node "{node}" config put failed with status: '
|
||||||
|
f'"{err.status}" and message: {err.message}'
|
||||||
)
|
)
|
||||||
except requests.exceptions.RequestException as e:
|
logging.error(message)
|
||||||
logging.warning(
|
raise CMAPIBasicError(message)
|
||||||
'Error while pushing new config to "%s": %s"', node, str(e)
|
except aiohttp.ClientError as err:
|
||||||
|
message = (
|
||||||
|
f'Node "{node}" config put failed with ClientError: '
|
||||||
|
f'{str(err)}'
|
||||||
|
)
|
||||||
|
logging.error(message)
|
||||||
|
raise CMAPIBasicError(message)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
message = f'Node "{node}" config put failed by Timeout: '
|
||||||
|
logging.error(message)
|
||||||
|
raise CMAPIBasicError(message)
|
||||||
|
except Exception as err:
|
||||||
|
message = (
|
||||||
|
f'Node "{node}" config put failed by undefined exception: '
|
||||||
|
f'{str(err)}'
|
||||||
)
|
)
|
||||||
logging.debug('Response: %s', r.text)
|
logging.error(message)
|
||||||
except Exception as e:
|
raise CMAPIBasicError(message)
|
||||||
logging.warning(
|
|
||||||
f'Got an unexpected error pushing new config to "{node}"',
|
|
||||||
exc_info=True
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
success_nodes.append(node)
|
|
||||||
success = True
|
|
||||||
break
|
|
||||||
if not success:
|
|
||||||
failed_nodes.append(node)
|
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
tasks = [
|
tasks = [
|
||||||
update_config(node, success_nodes, failed_nodes, headers, body)
|
update_config(node, headers, body)
|
||||||
for node in nodes
|
for node in nodes
|
||||||
]
|
]
|
||||||
loop.run_until_complete(asyncio.wait(tasks))
|
error_messages = []
|
||||||
|
results = loop.run_until_complete(
|
||||||
|
asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
)
|
||||||
|
for result in results:
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
error_messages.append(result.message)
|
||||||
loop.close()
|
loop.close()
|
||||||
|
if error_messages:
|
||||||
if len(success_nodes) > 0:
|
errors_str = ', '.join(error_messages)
|
||||||
logging.info(
|
final_message = (
|
||||||
f'Successfully pushed new config file to {success_nodes}'
|
f'Broadcasting config to nodes failed with errors: {errors_str}'
|
||||||
)
|
)
|
||||||
if len(failed_nodes) > 0:
|
raise CMAPIBasicError(final_message)
|
||||||
logging.error(
|
|
||||||
f'Failed to push the new config to {failed_nodes}'
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
# Might be more appropriate to put these in node_manipulation?
|
# Might be more appropriate to put these in node_manipulation?
|
||||||
|
@@ -115,7 +115,10 @@ class TestTransactions(unittest.TestCase):
|
|||||||
node_manipulation.add_node(myaddr, mcs_config_filename, mcs_config_filename)
|
node_manipulation.add_node(myaddr, mcs_config_filename, mcs_config_filename)
|
||||||
|
|
||||||
# Note, 1.2.3.4 is intentional -> doesn't exist, so shouldn't end up in the node list returned
|
# Note, 1.2.3.4 is intentional -> doesn't exist, so shouldn't end up in the node list returned
|
||||||
print("\n\nNOTE! This is expected to pause here for ~10s, this isn't an error, yet.\n")
|
print(
|
||||||
|
"\n\nNOTE! This is expected to pause here for ~10s, "
|
||||||
|
"this isn't an error, yet.\n"
|
||||||
|
)
|
||||||
result = helpers.start_transaction(
|
result = helpers.start_transaction(
|
||||||
cmapi_config_filename, mcs_config_filename,
|
cmapi_config_filename, mcs_config_filename,
|
||||||
optional_nodes = ['1.2.3.4']
|
optional_nodes = ['1.2.3.4']
|
||||||
@@ -123,17 +126,24 @@ class TestTransactions(unittest.TestCase):
|
|||||||
self.assertTrue(result[0])
|
self.assertTrue(result[0])
|
||||||
self.assertEqual(len(result[2]), 1)
|
self.assertEqual(len(result[2]), 1)
|
||||||
self.assertEqual(result[2][0], myaddr)
|
self.assertEqual(result[2][0], myaddr)
|
||||||
success = helpers.broadcast_new_config(
|
try:
|
||||||
mcs_config_filename,
|
helpers.broadcast_new_config(
|
||||||
cmapi_config_filename=cmapi_config_filename,
|
mcs_config_filename,
|
||||||
test_mode=True,
|
cmapi_config_filename=cmapi_config_filename,
|
||||||
nodes = result[2]
|
test_mode=True,
|
||||||
)
|
nodes = result[2]
|
||||||
# not specifying nodes here to exercise the nodes = None path
|
)
|
||||||
helpers.commit_transaction(
|
# not specifying nodes here to exercise the
|
||||||
result[1], cmapi_config_filename, mcs_config_filename
|
# nodes = None path
|
||||||
)
|
helpers.commit_transaction(
|
||||||
self.assertTrue(success)
|
result[1], cmapi_config_filename, mcs_config_filename
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
self.fail(
|
||||||
|
'Unexpected broadcast_new_config failure with error: '
|
||||||
|
f'{exc}'
|
||||||
|
)
|
||||||
|
raise
|
||||||
except:
|
except:
|
||||||
cherrypy.engine.exit()
|
cherrypy.engine.exit()
|
||||||
cherrypy.engine.block()
|
cherrypy.engine.block()
|
||||||
|
@@ -8,15 +8,15 @@ psutil==5.9.1
|
|||||||
pyotp==2.6.0
|
pyotp==2.6.0
|
||||||
requests==2.27.1
|
requests==2.27.1
|
||||||
typer==0.15.1
|
typer==0.15.1
|
||||||
|
aiohttp==3.11.12
|
||||||
|
|
||||||
# indirect dependencies
|
# indirect dependencies
|
||||||
aiohttp==3.8.1
|
aiohappyeyeballs==2.4.4
|
||||||
aiosignal==1.2.0
|
aiosignal==1.3.2
|
||||||
argcomplete==2.0.0
|
argcomplete==2.0.0
|
||||||
async-timeout==4.0.2
|
async-timeout==5.0.1
|
||||||
asynctest==0.13.0
|
asynctest==0.13.0
|
||||||
attrs==22.1.0
|
attrs==25.1.0
|
||||||
boto==2.49.0
|
boto==2.49.0
|
||||||
boto3==1.24.55
|
boto3==1.24.55
|
||||||
botocore==1.27.55
|
botocore==1.27.55
|
||||||
@@ -30,7 +30,7 @@ colorama==0.4.4
|
|||||||
crcmod==1.7
|
crcmod==1.7
|
||||||
docutils==0.16
|
docutils==0.16
|
||||||
fasteners==0.17.3
|
fasteners==0.17.3
|
||||||
frozenlist==1.3.1
|
frozenlist==1.5.0
|
||||||
gcs-oauth2-boto-plugin==3.0
|
gcs-oauth2-boto-plugin==3.0
|
||||||
google-apitools==0.5.32
|
google-apitools==0.5.32
|
||||||
google-auth==2.10.0
|
google-auth==2.10.0
|
||||||
@@ -49,10 +49,11 @@ markdown-it-py==3.0.0
|
|||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
monotonic==1.6
|
monotonic==1.6
|
||||||
more-itertools==8.12.0
|
more-itertools==8.12.0
|
||||||
multidict==6.0.2
|
multidict==6.1.0
|
||||||
oauth2client==4.1.3
|
oauth2client==4.1.3
|
||||||
orderedmultidict==1.0.1
|
orderedmultidict==1.0.1
|
||||||
portend==3.1.0
|
portend==3.1.0
|
||||||
|
propcache==0.2.1
|
||||||
pyasn1-modules==0.2.8
|
pyasn1-modules==0.2.8
|
||||||
pyasn1==0.4.8
|
pyasn1==0.4.8
|
||||||
pycparser==2.21
|
pycparser==2.21
|
||||||
@@ -74,6 +75,6 @@ six==1.16.0
|
|||||||
tempora==5.0.1
|
tempora==5.0.1
|
||||||
typing_extensions==4.12.2
|
typing_extensions==4.12.2
|
||||||
urllib3==1.26.8
|
urllib3==1.26.8
|
||||||
yarl==1.8.1
|
yarl==1.18.3
|
||||||
zc.lockfile==2.0
|
zc.lockfile==2.0
|
||||||
zipp==3.7.0
|
zipp==3.7.0
|
||||||
|
Reference in New Issue
Block a user