1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00

MCOL-5496: Merge CMAPI code to engine repo.

[add] cmapi code to engine
This commit is contained in:
mariadb-AlanMologorsky 2022-11-18 15:18:40 +02:00 committed by Alan Mologorsky
parent 77eedd1756
commit a079a2c944
93 changed files with 15218 additions and 0 deletions

89
cmapi/.gitignore vendored Normal file
View File

@ -0,0 +1,89 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
venv/
.python-version
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
#Ipython Notebook
.ipynb_checkpoints
*.py.swp
python/
deps/
engine/
cmapi_server/test/tmp.xml
systemd.env
mariadb-columnstore-cmapi.service
prerm
postinst
conffiles
CMakeCache.txt
CMakeFiles
CMakeScripts
Makefile
cmake_install.cmake
install_manifest.txt
*CPack*
*.rpm
*.deb
result
centos8
ubuntu20.04
buildinfo.txt

107
cmapi/CMakeLists.txt Normal file
View File

@ -0,0 +1,107 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.11)
PROJECT(cmapi NONE)
SET(CPACK_PACKAGE_NAME "MariaDB-columnstore-cmapi")
FILE(STRINGS VERSION CRUDE_CMAPI_VERSION_MAJOR REGEX "^CMAPI_VERSION_MAJOR=")
FILE(STRINGS VERSION CRUDE_CMAPI_VERSION_MINOR REGEX "^CMAPI_VERSION_MINOR=")
FILE(STRINGS VERSION CRUDE_CMAPI_VERSION_PATCH REGEX "^CMAPI_VERSION_PATCH=")
STRING(REPLACE "CMAPI_VERSION_MAJOR=" "" CMAPI_VERSION_MAJOR ${CRUDE_CMAPI_VERSION_MAJOR})
STRING(REPLACE "CMAPI_VERSION_MINOR=" "" CMAPI_VERSION_MINOR ${CRUDE_CMAPI_VERSION_MINOR})
STRING(REPLACE "CMAPI_VERSION_PATCH=" "" CMAPI_VERSION_PATCH ${CRUDE_CMAPI_VERSION_PATCH})
SET(PACKAGE_VERSION "${CMAPI_VERSION_MAJOR}.${CMAPI_VERSION_MINOR}.${CMAPI_VERSION_PATCH}")
SET(CMAPI_USER "root")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "MariaDB ColumnStore CMAPI: cluster management API and command line tool.")
SET(CPACK_PACKAGE_URL "http://www.mariadb.com")
SET(CPACK_PACKAGE_CONTACT "MariaDB Corporation Ab")
SET(CPACK_PACKAGE_SUMMARY "MariaDB ColumnStore CMAPI: cluster management API and command line tool.")
SET(CPACK_PACKAGE_VENDOR "MariaDB Corporation Ab")
SET(CPACK_PACKAGE_LICENSE "Copyright (c) 2023 MariaDB Corporation Ab.; redistributable under the terms of the GPLv2, see the file LICENSE.GPL2 for details.")
SET(BIN_DIR "/usr/bin")
SET(ETC_DIR "/etc/columnstore")
SET(SHARE_DIR "/usr/share/columnstore")
SET(CMAPI_DIR "${SHARE_DIR}/cmapi")
SET(SYSTEMD_UNIT_DIR "/usr/lib/systemd/system")
SET(SYSTEMD_ENGINE_UNIT_NAME "mariadb-columnstore")
SET(CMAPI_CONF_FILEPATH "${ETC_DIR}/cmapi_server.conf")
STRING(TOLOWER ${CPACK_PACKAGE_NAME} SYSTEMD_UNIT_NAME)
CONFIGURE_FILE(service.template ${SYSTEMD_UNIT_NAME}.service)
CONFIGURE_FILE(systemd.env.template systemd.env)
CONFIGURE_FILE(postinst.template postinst)
CONFIGURE_FILE(prerm.template prerm)
CONFIGURE_FILE(conffiles.template conffiles)
CONFIGURE_FILE(mcs.template mcs)
INSTALL(DIRECTORY python deps mcs_node_control failover cmapi_server engine_files mcs_cluster_tool
DESTINATION ${CMAPI_DIR}
USE_SOURCE_PERMISSIONS
PATTERN "test" EXCLUDE
PATTERN "cmapi_server.conf" EXCLUDE)
INSTALL(FILES LICENSE.GPL2 VERSION
DESTINATION ${CMAPI_DIR})
INSTALL(FILES check_ready.sh
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
DESTINATION ${CMAPI_DIR})
INSTALL(FILES cmapi_server/cmapi_server.conf systemd.env
DESTINATION ${ETC_DIR})
INSTALL(FILES ${SYSTEMD_UNIT_NAME}.service
DESTINATION ${SYSTEMD_UNIT_DIR})
INSTALL(FILES mcs
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
DESTINATION ${BIN_DIR})
OPTION(RPM "Build an RPM" OFF)
IF(RPM)
SET(CPACK_GENERATOR "RPM")
SET(CPACK_RPM_PACKAGE_VERSION ${PACKAGE_VERSION})
SET(CPACK_RPM_PACKAGE_NAME ${CPACK_PACKAGE_NAME})
SET(CPACK_RPM_PACKAGE_LICENSE "GPLv2")
SET(CPACK_RPM_PACKAGE_GROUP "Applications/Databases")
SET(CPACK_RPM_PACKAGE_URL ${CPACK_PACKAGE_URL})
SET(CPACK_RPM_PACKAGE_SUMMARY ${CPACK_PACKAGE_SUMMARY})
SET(CPACK_RPM_PACKAGE_VENDOR ${CPACK_PACKAGE_VENDOR})
SET(CPACK_RPM_PACKAGE_LICENSE ${CPACK_PACKAGE_LICENSE})
SET(CPACK_RPM_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION_SUMMARY})
SET(CPACK_RPM_SPEC_MORE_DEFINE "%undefine __brp_mangle_shebangs")
SET(CPACK_RPM_PACKAGE_AUTOREQ "no")
SET(CPACK_RPM_POST_INSTALL_SCRIPT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/postinst)
SET(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/prerm)
SET(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION ${ETC_DIR} ${SHARE_DIR})
SET(CPACK_RPM_USER_FILELIST "%config(noreplace) ${CMAPI_CONF_FILEPATH}")
SET(CPACK_RPM_PACKAGE_OBSOLETES "mariadb-columnstore-cmapi")
SET(CPACK_DEBIAN_PACKAGE_DEPENDS "curl")
ENDIF()
OPTION(DEB "Build a DEB" OFF)
IF(DEB)
SET(CPACK_GENERATOR "DEB")
SET(CPACK_DEBIAN_PACKAGE_VERSION ${PACKAGE_VERSION})
# TODO: different names in deb and rpm packages, fix it in next releases.
STRING(TOLOWER ${CPACK_PACKAGE_NAME} CPACK_DEBIAN_PACKAGE_NAME)
STRING(TOLOWER ${CPACK_PACKAGE_NAME} CPACK_PACKAGE_NAME)
SET(CPACK_DEBIAN_PACKAGE_LICENSE "GPLv2")
SET(CPACK_DEBIAN_PACKAGE_URL ${CPACK_PACKAGE_URL})
SET(CPACK_DEBIAN_PACKAGE_SUMMARY ${CPACK_PACKAGE_SUMMARY})
SET(CPACK_DEBIAN_PACKAGE_VENDOR ${CPACK_PACKAGE_VENDOR})
SET(CPACK_DEBIAN_PACKAGE_LICENSE ${CPACK_PACKAGE_LICENSE})
SET(CPACK_DEBIAN_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION_SUMMARY})
SET(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/prerm;${CMAKE_CURRENT_SOURCE_DIR}/postinst;${CMAKE_CURRENT_SOURCE_DIR}/conffiles")
SET(CPACK_DEBIAN_PACKAGE_REPLACES "mariadb-columnstore-cmapi")
SET(CPACK_RPM_PACKAGE_REQUIRES "curl")
ENDIF()
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${PACKAGE_VERSION}.${CMAKE_HOST_SYSTEM_PROCESSOR}")
INCLUDE (CPack)

339
cmapi/LICENSE.GPL2 Normal file
View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

61
cmapi/README.md Normal file
View File

@ -0,0 +1,61 @@
# CMAPI REST server
[![Build Status](https://ci.columnstore.mariadb.net/api/badges/mariadb-corporation/mariadb-columnstore-cmapi/status.svg)](https://ci.columnstore.mariadb.net/mariadb-corporation/mariadb-columnstore-cmapi)
## Overview
This RESTfull server enables multi-node setups for MCS.
## Requirements
See requirements.txt file.
All the Python packages prerequisits are shipped with a pre-built Python enterpreter.
## Usage
To run the server using defaults call:
```sh
python3 -m cmapi_server
```
There is a configuration server inside cmapi_server.
## Testing
To launch the integration and unit tests use unittest discovery mode.
```sh
python3 -m unittest discover -v mcs_node_control
python3 -m unittest discover -v cmapi_server
python3 -m unittest discover -v failover
```
mcs_control_node unit tests ask for root privileges and additional systemd unit
to run smoothly.
## Build packages
Packages have bundled python interpreter and python dependencies.
## Get dependencies
# get portable python
wget -qO- https://cspkg.s3.amazonaws.com/python-dist-no-nis.tar.gz | tar xzf - -C ./
# install python dependencies
python/bin/pip3 install -t deps --only-binary :all -r requirements.txt
## RPM
```sh
./cleanup.sh
yum install -y wget cmake make rpm-build
cmake -DRPM=1 .
make package
```
## DEB
```sh
./cleanup.sh
DEBIAN_FRONTEND=noninteractive apt update && apt install -y cmake make
cmake -DDEB=1 .
make package
```

3
cmapi/VERSION Normal file
View File

@ -0,0 +1,3 @@
CMAPI_VERSION_MAJOR=23
CMAPI_VERSION_MINOR=03
CMAPI_VERSION_PATCH=1b

19
cmapi/check_ready.sh Executable file
View File

@ -0,0 +1,19 @@
SEC_TO_WAIT=15
echo -n "Waiting CMAPI to finish startup"
success=false
for i in $(seq 1 $SEC_TO_WAIT); do
echo -n "..$i"
if ! $(curl -k -s --output /dev/null --fail https://127.0.0.1:8640/cmapi/ready); then
sleep 1
else
success=true
break
fi
done
echo
if $success; then
echo "CMAPI ready to handle requests."
else
echo "CMAPI not ready after waiting $SEC_TO_WAIT seconds. Check log file for further details."
fi

21
cmapi/cleanup.sh Executable file
View File

@ -0,0 +1,21 @@
#!/bin/bash
set -euo pipefail
IFS=$'\n\t'
rm -rf \
cmapi_server/test/tmp.xml \
systemd.env \
*.service \
prerm \
postinst \
CMakeCache.txt \
CMakeFiles \
CMakeScripts \
Makefile \
cmake_install.cmake \
install_manifest.txt \
*CPack* \
# buildinfo.txt
find . -type d -name __pycache__ -exec rm -rf {} +
find . -type f -iname '*.swp' -exec rm -rf {} +

View File

@ -0,0 +1,249 @@
<Columnstore Version="V1.0.0">
<!--
WARNING: Do not make changes to this file unless directed to do so by
MariaDB service engineers. Incorrect settings can render your system
unusable and will require a service call to correct.
-->
<ExeMgr1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8601</Port>
<Module>unassigned</Module>
</ExeMgr1>
<pm1_WriteEngineServer>
<IPAddr>127.0.0.1</IPAddr>
<Port>8630</Port>
</pm1_WriteEngineServer>
<DDLProc>
<IPAddr>127.0.0.1</IPAddr>
<Port>8612</Port>
</DDLProc>
<DMLProc>
<IPAddr>127.0.0.1</IPAddr>
<Port>8614</Port>
</DMLProc>
<BatchInsert>
<RowsPerBatch>10000</RowsPerBatch>
</BatchInsert>
<PrimitiveServers>
<Count>1</Count>
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
<ProcessorThreshold>128</ProcessorThreshold>
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
<DebugLevel>0</DebugLevel>
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
<PrefetchThreshold>1</PrefetchThreshold>
<PTTrace>0</PTTrace>
<RotatingDestination>n</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
<DirectIO>y</DirectIO>
<HighPriorityPercentage/>
<MediumPriorityPercentage/>
<LowPriorityPercentage/>
</PrimitiveServers>
<PMS1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS1>
<SystemConfig>
<SystemName>columnstore-1</SystemName>
<ParentOAMModuleName>pm1</ParentOAMModuleName>
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
<!-- Warning: Do not change this value once database is built -->
<DBRootCount>1</DBRootCount>
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
<DBRMTimeOut>15</DBRMTimeOut> <!-- in seconds -->
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
<DataFileLog>OFF</DataFileLog>
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
-->
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
files are left behind. -->
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
</SystemConfig>
<SystemModuleConfig>
<ModuleType2>um</ModuleType2>
<ModuleDesc2>User Module</ModuleDesc2>
<ModuleCount2>0</ModuleCount2>
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
<ModuleType3>pm</ModuleType3>
<ModuleDesc3>Performance Module</ModuleDesc3>
<ModuleCount3>1</ModuleCount3>
<ModuleIPAddr1-1-3>127.0.0.1</ModuleIPAddr1-1-3>
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
</SystemModuleConfig>
<SessionManager>
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
</SessionManager>
<VersionBuffer>
<!-- VersionBufferFileSize must be a multiple of 8192.
One version buffer file will be put on each DB root. -->
<VersionBufferFileSize>1GB</VersionBufferFileSize>
</VersionBuffer>
<OIDManager>
<!-- Do not change this file after database built -->
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
<!-- Do not change this value after database built -->
<FirstOID>3000</FirstOID>
</OIDManager>
<WriteEngine>
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
<FastDelete>n</FastDelete>
</WriteEngine>
<DBRM_Controller>
<NumWorkers>1</NumWorkers>
<IPAddr>127.0.0.1</IPAddr>
<Port>8616</Port>
</DBRM_Controller>
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
<DBRM_Worker1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8700</Port>
<Module>pm1</Module>
</DBRM_Worker1>
<DBBC>
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
<!-- Alternatively, this can be specified in absolute terms using
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
<!-- <NumBlocksPct>95</NumBlocksPct> -->
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
<IOMTracing>0</IOMTracing>
<BRPTracing>0</BRPTracing>
<ReportFrequency>65536</ReportFrequency>
<MaxOpenFiles>2K</MaxOpenFiles>
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
<FDCacheTrace>0</FDCacheTrace>
<NumBlocksPct>50</NumBlocksPct>
</DBBC>
<Installation>
<ServerTypeInstall>2</ServerTypeInstall>
<PMwithUM>y</PMwithUM>
<MySQLRep>n</MySQLRep>
<DBRootStorageType>internal</DBRootStorageType>
<UMStorageType>internal</UMStorageType>
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
<DataRedundancyNetworkType/>
</Installation>
<ExtentMap>
<!--
WARNING: these can only be changed on an empty system. Once any object has been allocated
it cannot be changed!. Extent size is 8M rows.
-->
<FilesPerColumnPartition>4</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
<BRM_UID>0x0</BRM_UID>
</ExtentMap>
<HashJoin>
<MaxBuckets>128</MaxBuckets>
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
<TotalUmMemory>25%</TotalUmMemory>
<CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin>
<JobList>
<FlushInterval>16K</FlushInterval>
<FifoSize>16</FifoSize>
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
each extent. The default is 16. MaxOutstandingRequests is the size of
the window of work in terms of extents. A value of 20 means there
is 20 extents worth of work for the PMs to process at any given time.
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
as many threads are available across all PMs. -->
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
<!-- MaxOutstandingRequests is going to default to the num of cores available
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
but will be lower bounded by 20 -->
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
<ThreadPoolSize>100</ThreadPoolSize>
</JobList>
<RowAggregation>
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is the number of cores -->
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
</RowAggregation>
<CrossEngineSupport>
<Host>127.0.0.1</Host>
<Port>3306</Port>
<User>root</User>
<Password/>
<TLSCA/>
<TLSClientCert/>
<TLSClientKey/>
</CrossEngineSupport>
<QueryStats>
<Enabled>N</Enabled>
</QueryStats>
<UserPriority>
<Enabled>N</Enabled>
</UserPriority>
<NetworkCompression>
<Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression>
<QueryTele>
<Host>127.0.0.1</Host>
<Port>0</Port>
</QueryTele>
<StorageManager>
<MaxSockets>30</MaxSockets>
<Enabled>N</Enabled>
</StorageManager>
<DataRedundancyConfig>
<DBRoot1PMs/>
</DataRedundancyConfig>
</Columnstore>

View File

View File

@ -0,0 +1,293 @@
#!/usr/bin/env python3
"""
CherryPy-based webservice daemon with background threads
"""
import logging
import os
import threading
import time
from datetime import datetime, timedelta
import cherrypy
from cherrypy.process import plugins
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization, hashes
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.x509.oid import NameOID
# TODO: fix dispatcher choose logic because code executing in endpoints.py
# while import process, this cause module logger misconfiguration
from cmapi_server.logging_management import config_cmapi_server_logging
config_cmapi_server_logging()
from cmapi_server import helpers
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, CMAPI_CONF_PATH
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
from cmapi_server.failover_agent import FailoverAgent
from cmapi_server.managers.process import MCSProcessManager
from cmapi_server.managers.application import AppManager
from failover.node_monitor import NodeMonitor
from mcs_node_control.models.dbrm_socket import SOCK_TIMEOUT, DBRMSocketHandler
from mcs_node_control.models.node_config import NodeConfig
cert_filename = './cmapi_server/self-signed.crt'
def worker():
"""Background Timer that runs clean_txn_by_timeout() every 5 seconds
TODO: this needs to be fixed/optimized. I don't like creating the thread
repeatedly.
"""
while True:
t = threading.Timer(5.0, clean_txn_by_timeout)
t.start()
t.join()
def clean_txn_by_timeout():
txn_section = app.config.get('txn', None)
timeout_timestamp = txn_section.get('timeout') if txn_section is not None else None
current_timestamp = int(datetime.now().timestamp())
if timeout_timestamp is not None and current_timestamp > timeout_timestamp:
txn_config_changed = txn_section.get('config_changed', None)
if txn_config_changed is True:
node_config = NodeConfig()
node_config.rollback_config()
node_config.apply_config(
xml_string=node_config.get_current_config()
)
app.config.update({
'txn': {
'id': 0,
'timeout': 0,
'manager_address': '',
'config_changed': False,
},
})
class TxnBackgroundThread(plugins.SimplePlugin):
"""CherryPy plugin to create a background worker thread"""
app = None
def __init__(self, bus, app):
super(TxnBackgroundThread, self).__init__(bus)
self.t = None
self.app = app
def start(self):
"""Plugin entrypoint"""
self.t = threading.Thread(target=worker, name='TxnBackgroundThread')
self.t.daemon = True
self.t.start()
# Start at a higher priority than "Daemonize" (which we're not using
# yet but may in the future)
start.priority = 85
class FailoverBackgroundThread(plugins.SimplePlugin):
"""CherryPy plugin to start the thread for failover monitoring."""
def __init__(self, bus, turned_on):
super().__init__(bus)
self.node_monitor = NodeMonitor(agent=FailoverAgent())
self.running = False
self.turned_on = turned_on
if self.turned_on:
logging.info(
'Failover is turned ON by default or in CMAPI config file.'
)
else:
logging.info('Failover is turned OFF in CMAPI config file.')
def _start(self):
if self.running:
return
self.bus.log('Starting Failover monitor thread.')
self.node_monitor.start()
self.running = True
def _stop(self):
if not self.running:
return
self.bus.log('Stopping Failover monitor thread.')
self.node_monitor.stop()
self.running = False
def _subscriber(self, run_failover: bool):
if not self.turned_on:
return
if not isinstance(run_failover, bool):
self.bus.log(f'Got wrong obj in failover channel {run_failover}')
return
if run_failover:
self._start()
else:
self._stop()
def start(self):
self.bus.subscribe('failover', self._subscriber)
def stop(self):
cherrypy.engine.unsubscribe('failover', self._subscriber)
self._stop()
def create_self_signed_certificate():
key_filename = './cmapi_server/self-signed.key'
key = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
backend=default_backend()
)
with open(key_filename, "wb") as f:
f.write(key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption()),
)
subject = issuer = x509.Name([
x509.NameAttribute(NameOID.COUNTRY_NAME, 'US'),
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, 'California'),
x509.NameAttribute(NameOID.LOCALITY_NAME, 'Redwood City'),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, 'MariaDB'),
x509.NameAttribute(NameOID.COMMON_NAME, 'mariadb.com'),
])
basic_contraints = x509.BasicConstraints(ca=True, path_length=0)
cert = x509.CertificateBuilder(
).subject_name(
subject
).issuer_name(
issuer
).public_key(
key.public_key()
).serial_number(
x509.random_serial_number()
).not_valid_before(
datetime.utcnow()
).not_valid_after(
datetime.utcnow() + timedelta(days=365)
).add_extension(
basic_contraints,
False
).add_extension(
x509.SubjectAlternativeName([x509.DNSName('localhost')]),
critical=False
).sign(key, hashes.SHA256(), default_backend())
with open(cert_filename, 'wb') as f:
f.write(cert.public_bytes(serialization.Encoding.PEM))
if __name__ == '__main__':
logging.info(f'CMAPI Version: {AppManager.get_version()}')
# TODO: read cmapi config filepath as an argument
helpers.cmapi_config_check()
if not os.path.exists(cert_filename):
create_self_signed_certificate()
app = cherrypy.tree.mount(root=None, config=CMAPI_CONF_PATH)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': CMAPI_CONF_PATH,
},
})
cherrypy.config.update(CMAPI_CONF_PATH)
cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH)
dispatcher_name, dispatcher_path = helpers.get_dispatcher_name_and_path(
cfg_parser
)
MCSProcessManager.detect(dispatcher_name, dispatcher_path)
# If we don't have auto_failover flag in the config turn it ON by default.
turn_on_failover = cfg_parser.getboolean(
'application', 'auto_failover', fallback=True
)
TxnBackgroundThread(cherrypy.engine, app).subscribe()
# subscribe FailoverBackgroundThread plugin code to bus channels
# code below not starting "real" failover background thread
FailoverBackgroundThread(cherrypy.engine, turn_on_failover).subscribe()
cherrypy.engine.start()
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
success = False
config_mtime = os.path.getmtime(DEFAULT_MCS_CONF_PATH)
# if the mtime changed, we infer that a put_config was run on this node,
# and we now have a current config file.
# TODO: Research all affected cases and remove/rewrite this loop below.
# Previously this affects endless waiting time while starting
# application after upgrade.
# Do we have any cases when we need to try syncing config with other
# nodes with endless retry?
if not helpers.in_maintenance_state(DEFAULT_MCS_CONF_PATH):
while (
not success
and config_mtime == os.path.getmtime(DEFAULT_MCS_CONF_PATH)
):
try:
success = helpers.get_current_config_file()
except Exception:
logging.info(
'Main got exception while get_current_config_file',
exc_info=True
)
success = False
if not success:
delay = 10
logging.warning(
'Failed to fetch the current config file, '
f'retrying in {delay}s'
)
time.sleep(delay)
config_mtime = os.path.getmtime(DEFAULT_MCS_CONF_PATH)
helpers.wait_for_deactivation_or_put_config(config_mtime)
dbrm_socket = DBRMSocketHandler()
# TODO: fix DBRM message show on nodes restart.
# Use DBRM() context manager.
try:
dbrm_socket.connect()
dbrm_socket._detect_protocol()
dbrm_socket.close()
except Exception:
logging.warning(
'Something went wrong while trying to detect dbrm protocol.\n'
'Seems "controllernode" process isn\'t started.\n'
'This is just a notification, not a problem.\n'
'Next detection will started at first node\\cluster '
'status check.\n'
f'This can cause extra {SOCK_TIMEOUT} seconds delay while\n'
'first attempt to get status.',
exc_info=True
)
else:
logging.info(
'In maintenance state, not syncing config from other nodes.'
)
if turn_on_failover:
if not helpers.in_maintenance_state(DEFAULT_MCS_CONF_PATH):
cherrypy.engine.publish('failover', True)
else:
logging.info('In maintenance state, not starting Failover.')
AppManager.started = True
cherrypy.engine.block()

View File

@ -0,0 +1,83 @@
{
"version": 1,
"filters": {
"add_ip_filter": {
"()": "cmapi_server.logging_management.AddIpFilter"
}
},
"formatters": {
"cmapi_server": {
"format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(ip)s %(message)s",
"datefmt": "%d/%b/%Y %H:%M:%S"
},
"default": {
"format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(message)s",
"datefmt": "%d/%b/%Y %H:%M:%S"
},
"container_sh": {
"format" : "`%(asctime)s`: %(message)s",
"datefmt": "%a %d %b %Y %I:%M:%S %p %Z"
}
},
"handlers": {
"cmapi_server": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"filters": ["add_ip_filter"],
"formatter": "cmapi_server",
"stream": "ext://sys.stdout"
},
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "default",
"stream": "ext://sys.stdout"
},
"file": {
"level": "DEBUG",
"class": "logging.handlers.RotatingFileHandler",
"formatter": "default",
"filename": "/var/log/mariadb/columnstore/cmapi_server.log",
"mode": "a",
"maxBytes": 1048576,
"backupCount": 10,
"encoding": "utf8"
},
"container_sh_file": {
"level": "DEBUG",
"class": "logging.handlers.RotatingFileHandler",
"formatter": "container_sh",
"filename": "/var/log/mariadb/columnstore/container-sh.log",
"mode": "a",
"maxBytes": 1024,
"backupCount": 3,
"encoding": "utf8"
}
},
"loggers": {
"cherrypy.access": {
"handlers": ["console", "file"],
"level": "INFO",
"propagate": false
},
"cherrypy.error": {
"handlers": ["console", "file"],
"level": "INFO",
"propagate": false
},
"cmapi_server": {
"handlers": ["cmapi_server", "file"],
"level": "DEBUG",
"propagate": false
},
"container_sh": {
"handlers": ["file", "container_sh_file"],
"level": "DEBUG",
"propagate": false
},
"": {
"handlers": ["console", "file"],
"level": "DEBUG"
}
}
}

View File

@ -0,0 +1,9 @@
[global]
server.socket_host = '0.0.0.0'
server.socket_port = 8640
server.ssl_module = 'builtin'
server.ssl_certificate = './cmapi_server/self-signed.crt'
server.ssl_private_key = './cmapi_server/self-signed.key'
engine.autoreload.on = False
log.access_file = ''
log.error_file = ''

View File

@ -0,0 +1,84 @@
"""Module contains constants values for cmapi, failover and other .py files.
TODO: move main constant paths here and replace in files in next releases.
"""
import os
from typing import NamedTuple
# default MARIADB ColumnStore config path
MCS_ETC_PATH = '/etc/columnstore'
DEFAULT_MCS_CONF_PATH = os.path.join(MCS_ETC_PATH, 'Columnstore.xml')
# default Storage Manager config path
DEFAULT_SM_CONF_PATH = os.path.join(MCS_ETC_PATH, 'storagemanager.cnf')
# MCSDATADIR (in mcs engine code) and related paths
MCS_DATA_PATH = '/var/lib/columnstore'
MCS_MODULE_FILE_PATH = os.path.join(MCS_DATA_PATH, 'local/module')
EM_PATH_SUFFIX = 'data1/systemFiles/dbrm'
MCS_EM_PATH = os.path.join(MCS_DATA_PATH, EM_PATH_SUFFIX)
MCS_BRM_CURRENT_PATH = os.path.join(MCS_EM_PATH, 'BRM_saves_current')
S3_BRM_CURRENT_PATH = os.path.join(EM_PATH_SUFFIX, 'BRM_saves_current')
# keys file for CEJ password encryption\decryption
# (CrossEngineSupport section in Columnstore.xml)
MCS_SECRETS_FILE_PATH = os.path.join(MCS_DATA_PATH, '.secrets')
# CMAPI SERVER
CMAPI_CONFIG_FILENAME = 'cmapi_server.conf'
CMAPI_ROOT_PATH = os.path.dirname(__file__)
PROJECT_PATH = os.path.dirname(CMAPI_ROOT_PATH)
# path to VERSION file
VERSION_PATH = os.path.join(PROJECT_PATH, 'VERSION')
CMAPI_LOG_CONF_PATH = os.path.join(CMAPI_ROOT_PATH, 'cmapi_logger.conf')
# path to CMAPI default config
CMAPI_DEFAULT_CONF_PATH = os.path.join(CMAPI_ROOT_PATH, CMAPI_CONFIG_FILENAME)
# CMAPI config path
CMAPI_CONF_PATH = os.path.join(MCS_ETC_PATH, CMAPI_CONFIG_FILENAME)
# TOTP secret key
SECRET_KEY = 'MCSIsTheBestEver' # not just a random string! (base32)
# network constants
LOCALHOSTS = ('localhost', '127.0.0.1', '::1')
CMAPI_INSTALL_PATH = '/usr/share/columnstore/cmapi/'
CMAPI_PYTHON_BIN = os.path.join(CMAPI_INSTALL_PATH, "python/bin/python3")
CMAPI_PYTHON_DEPS_PATH = os.path.join(CMAPI_INSTALL_PATH, "deps")
CMAPI_PYTHON_BINARY_DEPS_PATH = os.path.join(CMAPI_PYTHON_DEPS_PATH, "bin")
CMAPI_SINGLE_NODE_XML = os.path.join(
CMAPI_INSTALL_PATH, 'cmapi_server/SingleNode.xml'
)
# constants for dispatchers
class ProgInfo(NamedTuple):
"""NamedTuple for some additional info about handling mcs processes."""
stop_priority: int # priority for building stop sequence
service_name: str # systemd service name
subcommand: str # subcommand for process run in docker container
only_primary: bool # use this process only on primary
delay: int = 0 # delay after process start in docker container
# mcs-loadbrm and mcs-savebrm are dependencies for workernode and resolved
# on top level of process handling
# mcs-storagemanager starts conditionally inside mcs-loadbrm, but should be
# stopped using cmapi
ALL_MCS_PROGS = {
# workernode starts on primary and non primary node with 1 or 2 added
# to subcommand (DBRM_Worker1 - on primary, DBRM_Worker2 - non primary)
'StorageManager': ProgInfo(15, 'mcs-storagemanager', '', False, 1),
'workernode': ProgInfo(13, 'mcs-workernode', 'DBRM_Worker{}', False, 1),
'controllernode': ProgInfo(11, 'mcs-controllernode', 'fg', True),
'PrimProc': ProgInfo(5, 'mcs-primproc', '', False, 1),
'ExeMgr': ProgInfo(9, 'mcs-exemgr', '', False, 1),
'WriteEngineServer': ProgInfo(7, 'mcs-writeengineserver', '', False, 3),
'DMLProc': ProgInfo(3, 'mcs-dmlproc', '', False),
'DDLProc': ProgInfo(1, 'mcs-ddlproc', '', False),
}
# constants for docker container dispatcher
MCS_INSTALL_BIN = '/usr/bin'
IFLAG = os.path.join(MCS_ETC_PATH, 'container-initialized')
LIBJEMALLOC_DEFAULT_PATH = os.path.join(MCS_DATA_PATH, 'libjemalloc.so.2')
MCS_LOG_PATH = '/var/log/mariadb/columnstore'

View File

@ -0,0 +1,262 @@
import json
import cherrypy
from cmapi_server.controllers.endpoints import (
StatusController, ConfigController, BeginController, CommitController,
RollbackController, StartController, ShutdownController,
ExtentMapController, ClusterController, ApiKeyController,
LoggingConfigController, AppController
)
from cmapi_server.controllers.s3dataload import S3DataLoadController
_version = '0.4.0'
dispatcher = cherrypy.dispatch.RoutesDispatcher()
# /_version/status (GET)
dispatcher.connect(name = 'status',
route = f'/cmapi/{_version}/node/status',
action = 'get_status',
controller = StatusController(),
conditions = {'method': ['GET']})
# /_version/master (GET)
dispatcher.connect(name = 'get_primary',
route = f'/cmapi/{_version}/node/primary',
action = 'get_primary',
controller = StatusController(),
conditions = {'method': ['GET']})
# /_version/new_primary (GET)
dispatcher.connect(name = 'get_new_primary',
route = f'/cmapi/{_version}/node/new_primary',
action = 'get_new_primary',
controller = StatusController(),
conditions = {'method': ['GET']})
# /_version/config/ (GET)
dispatcher.connect(name = 'get_config', # what does this name is used for?
route = f'/cmapi/{_version}/node/config',
action = 'get_config',
controller = ConfigController(),
conditions = {'method': ['GET']})
# /_version/config/ (PUT)
dispatcher.connect(name = 'put_config',
route = f'/cmapi/{_version}/node/config',
action = 'put_config',
controller = ConfigController(),
conditions = {'method': ['PUT']})
# /_version/begin/ (PUT)
dispatcher.connect(name = 'put_begin',
route = f'/cmapi/{_version}/node/begin',
action = 'put_begin',
controller = BeginController(),
conditions = {'method': ['PUT']})
# /_version/rollback/ (PUT)
dispatcher.connect(name = 'put_rollback',
route = f'/cmapi/{_version}/node/rollback',
action = 'put_rollback',
controller = RollbackController(),
conditions = {'method': ['PUT']})
# /_version/commit/ (PUT)
dispatcher.connect(name = 'put_commit',
route = f'/cmapi/{_version}/node/commit',
action = 'put_commit',
controller = CommitController(),
conditions = {'method': ['PUT']})
# /_version/start/ (PUT)
dispatcher.connect(name = 'start',
route = f'/cmapi/{_version}/node/start',
action = 'put_start',
controller = StartController(),
conditions = {'method': ['PUT']})
# /_version/shutdown/ (PUT)
dispatcher.connect(name = 'shutdown',
route = f'/cmapi/{_version}/node/shutdown',
action = 'put_shutdown',
controller = ShutdownController(),
conditions = {'method': ['PUT']})
# /_version/meta/em/ (GET)
dispatcher.connect(name = 'get_em',
route = f'/cmapi/{_version}/node/meta/em',
action = 'get_em',
controller = ExtentMapController(),
conditions = {'method': ['GET']})
# /_version/meta/journal/ (GET)
dispatcher.connect(name = 'get_journal',
route = f'/cmapi/{_version}/node/meta/journal',
action = 'get_journal',
controller = ExtentMapController(),
conditions = {'method': ['GET']})
# /_version/meta/vss/ (GET)
dispatcher.connect(name = 'get_vss',
route = f'/cmapi/{_version}/node/meta/vss',
action = 'get_vss',
controller = ExtentMapController(),
conditions = {'method': ['GET']})
# /_version/meta/vbbm/ (GET)
dispatcher.connect(name = 'get_vbbm',
route = f'/cmapi/{_version}/node/meta/vbbm',
action = 'get_vbbm',
controller = ExtentMapController(),
conditions = {'method': ['GET']})
# /_version/meta/footprint/ (GET)
dispatcher.connect(name = 'get_footprint',
route = f'/cmapi/{_version}/node/meta/footprint',
action = 'get_footprint',
controller = ExtentMapController(),
conditions = {'method': ['GET']})
# /_version/cluster/start/ (PUT)
dispatcher.connect(name = 'cluster_start',
route = f'/cmapi/{_version}/cluster/start',
action = 'put_start',
controller = ClusterController(),
conditions = {'method': ['PUT']})
# /_version/cluster/shutdown/ (PUT)
dispatcher.connect(name = 'cluster_shutdown',
route = f'/cmapi/{_version}/cluster/shutdown',
action = 'put_shutdown',
controller = ClusterController(),
conditions = {'method': ['PUT']})
# /_version/cluster/mode-set/ (PUT)
dispatcher.connect(name = 'cluster_mode_set',
route = f'/cmapi/{_version}/cluster/mode-set',
action = 'put_mode_set',
controller = ClusterController(),
conditions = {'method': ['PUT']})
# /_version/cluster/node/ (POST, PUT)
dispatcher.connect(name = 'cluster_add_node',
route = f'/cmapi/{_version}/cluster/node',
action = 'put_add_node',
controller = ClusterController(),
conditions = {'method': ['POST', 'PUT']})
# /_version/cluster/node/ (DELETE)
dispatcher.connect(name = 'cluster_remove_node',
route = f'/cmapi/{_version}/cluster/node',
action = 'delete_remove_node',
controller = ClusterController(),
conditions = {'method': ['DELETE']})
# /_version/cluster/status/ (GET)
dispatcher.connect(name = 'cluster_status',
route = f'/cmapi/{_version}/cluster/status',
action = 'get_status',
controller = ClusterController(),
conditions = {'method': ['GET']})
# /_version/node/apikey-set/ (PUT)
dispatcher.connect(
name = 'node_set_api_key',
route = f'/cmapi/{_version}/node/apikey-set',
action = 'set_api_key',
controller = ApiKeyController(),
conditions = {'method': ['PUT']}
)
# /_version/cluster/apikey-set/ (PUT)
dispatcher.connect(
name = 'cluster_set_api_key',
route = f'/cmapi/{_version}/cluster/apikey-set',
action = 'set_api_key',
controller = ClusterController(),
conditions = {'method': ['PUT']}
)
# /_version/cluster/node/ (POST, PUT)
dispatcher.connect(name = 'cluster_load_s3data',
route = f'/cmapi/{_version}/cluster/load_s3data',
action = 'load_s3data',
controller = S3DataLoadController(),
conditions = {'method': ['POST', 'PUT']})
# /_version/node/log-config/ (PUT)
dispatcher.connect(
name = 'node_set_log_level',
route = f'/cmapi/{_version}/node/log-level',
action = 'set_log_level',
controller = LoggingConfigController(),
conditions = {'method': ['PUT']}
)
# /_version/cluster/log-config'/ (PUT)
dispatcher.connect(
name = 'cluster_set_log_level',
route = f'/cmapi/{_version}/cluster/log-level',
action = 'set_log_level',
controller = ClusterController(),
conditions = {'method': ['PUT']}
)
# /ready (GET)
dispatcher.connect(
name = 'app_ready',
route = '/cmapi/ready',
action = 'ready',
controller = AppController(),
conditions = {'method': ['GET']}
)
def jsonify_error(status, message, traceback, version): \
# pylint: disable=unused-argument
"""JSONify all CherryPy error responses (created by raising the
cherrypy.HTTPError exception)
"""
cherrypy.response.headers['Content-Type'] = 'application/json'
response_body = json.dumps(
{
'error': {
'http_status': status,
'message': message,
}
}
)
cherrypy.response.status = status
return response_body

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
import json
import cherrypy as cp
class APIError(cp.HTTPError):
def __init__(self, status: int = 500, message: str = ''):
super().__init__(status=status)
self._error_message = message
def set_response(self):
super().set_response()
response = cp.serving.response
response.body = json.dumps({'error': self._error_message}).encode()

View File

@ -0,0 +1,335 @@
import logging
import os
import re
import selectors
import tempfile
import uuid
from subprocess import PIPE, Popen, run, CalledProcessError
import cherrypy
import furl
from cmapi_server.constants import (
CMAPI_PYTHON_BIN, CMAPI_PYTHON_BINARY_DEPS_PATH, CMAPI_PYTHON_DEPS_PATH
)
from cmapi_server.controllers.endpoints import raise_422_error
module_logger = logging.getLogger('cmapi_server')
def response_error(text):
raise_422_error(module_logger, 'load_s3data', text)
class S3DataLoadController:
@cherrypy.tools.json_in()
@cherrypy.tools.json_out()
@cherrypy.tools.validate_api_key() # pylint: disable=no-member
def load_s3data(self):
"""
Handler for /cluster/load_s3data (POST, PUT)
Invokes cpimport with passed params
This is internal columnstore engine handler
Not targeted for manual usage
Waits for json dictionary params in request
bucket - S3 bucket with table data
table - table name to load data into
filename - name of file in S3 with table data
key - S3 secret key
secret - S3 secret
region - S3 region
database - db name to load data into
"""
def checkShellParamsAreOK(param, paramname):
"""Check shell params for dangerous symbols.
As this params will be passed to shell, we should check,
there is no shell injection
AWS Access Key ID is 20 alpha-numeric characters
like022QF06E7MXBSH9DHM02
AWS Secret Access Key is 40 alpha-numeric-slash-plus characters
like kWcrlUX5JEDGM/LtmEENI/aVmYvHNif5zB+d9+ct
AWS buckets names are alpha-numeric-dot-underscore
like log-delivery-march-2020.com
AWS regions names, table names, file names are also not allowed
for dangerous symbols so just raise error for injection dangerous
symbols in params.
"""
dangerous_symbols = ' &|;\n\r`$'
for symbol in dangerous_symbols:
if symbol in param:
response_error(
f'S3 configuration parameters wrong: {paramname}'
f'cannot contain "{symbol}"'
)
def getKey(keyname, request_body, skip_check=False, required=True):
value = request_body.get(keyname, None)
if not value and required:
response_error(
f'Some S3 configuration parameters missing: {keyname} '
'not provided'
)
if not skip_check:
checkShellParamsAreOK(value, keyname)
return value
def prepare_aws(bucket, filename, secret, key, region):
"""Prepare aws_cli popen object.
Invoke aws_cli download, and return proc for further
use with cpimport.
:param bucket: bucket name
:type bucket: str
:param filename: filename in bucket
:type filename: str
:param secret: aws secret
:type secret: str
:param key: aws key
:type key: str
:param region: aws region
:type region: str
:return: popen aws_cli object
:rtype: subprocess.Popen
"""
my_env = os.environ.copy()
my_env['AWS_ACCESS_KEY_ID'] = key
my_env['AWS_SECRET_ACCESS_KEY'] = secret
my_env['PYTHONPATH'] = CMAPI_PYTHON_DEPS_PATH
aws_cli_binary = os.path.join(CMAPI_PYTHON_BINARY_DEPS_PATH, 'aws')
s3_url = furl.furl(bucket).add(path=filename).url
aws_command_line = [
CMAPI_PYTHON_BIN, aws_cli_binary,
"s3", "cp", "--source-region", region, s3_url, "-"
]
module_logger.debug(
f'AWS commandline: {" ".join(aws_command_line)}')
try:
aws_proc = Popen(
aws_command_line, env=my_env, stdout=PIPE,
stderr=PIPE, shell=False, encoding='utf-8'
)
except CalledProcessError as exc:
response_error(exc.stderr.split('\n')[0])
return aws_proc
def prepare_google_storage(
bucket, filename, secret, key, temporary_config
):
"""Prepare gsutil popen object.
Invoke gsutil download, and return proc for further use
with cpimport.
:param bucket: bucket name
:type bucket: str
:param filename: filename in bucket
:type filename: str
:param secret: gsutil secret
:type secret: str
:param key: gsutil key
:type key: str
:param temporary_config: temp config for gsutil
:type temporary_config: str
:return: popen gsutil object
:rtype: subprocess.Popen
"""
project_id = 'project_id'
gs_cli_binary = os.path.join(
CMAPI_PYTHON_BINARY_DEPS_PATH, 'gsutil'
)
commandline = (
f'/usr/bin/bash -c '
f'\'echo -e "{key}\n{secret}\n{project_id}"\' | '
f'{CMAPI_PYTHON_BIN} {gs_cli_binary} '
f'config -a -o {temporary_config}'
)
module_logger.debug(
f'gsutil config commadline: '
f'{commandline.encode("unicode_escape").decode("utf-8")}'
)
my_env = os.environ.copy()
my_env['PYTHONPATH'] = CMAPI_PYTHON_DEPS_PATH
my_env['BOTO_CONFIG'] = temporary_config
try:
p = run(
commandline, capture_output=True,
shell=True, encoding='utf-8', check=True, env=my_env
)
except CalledProcessError as exc:
response_error(exc.stderr.split('\n')[0])
try:
check_commandline = [
CMAPI_PYTHON_BIN, gs_cli_binary, 'version', '-l'
]
p = run(
check_commandline, capture_output=True,
shell=False, encoding='utf-8', check=True, env=my_env
)
module_logger.debug(
f'gsutil config check commandline : '
f'{" ".join(check_commandline)}'
)
module_logger.debug(f'gsutil config : {p.stdout}')
except CalledProcessError as exc:
response_error(exc.stderr.split('\n')[0])
gs_url = furl.furl(bucket).add(path=filename).url
gs_command_line = [
CMAPI_PYTHON_BIN, gs_cli_binary, 'cat', gs_url
]
module_logger.debug(
f'gsutil cat commandline : {" ".join(gs_command_line)}'
)
try:
gs_process = Popen(
gs_command_line, env=my_env, stdout=PIPE, stderr=PIPE,
shell=False, encoding='utf-8'
)
except CalledProcessError as exc:
response_error(exc.stderr.split('\n')[0])
return gs_process
module_logger.debug(f'LOAD S3 Data')
request = cherrypy.request
request_body = request.json
bucket = getKey('bucket', request_body)
if bucket.startswith(r's3://'):
storage = 'aws'
elif bucket.startswith(r'gs://'):
storage = 'gs'
else:
error = (
'Incorrect bucket. Should start with s3://for AWS S3 or '
'gs:// for Google Storage'
)
response_error(error)
table = getKey('table', request_body)
filename = getKey('filename', request_body)
key = getKey('key', request_body)
secret = getKey('secret', request_body)
region = getKey('region', request_body, required=storage=='aws')
database = getKey('database', request_body)
terminated_by = getKey('terminated_by', request_body, skip_check=True)
enclosed_by = getKey(
'enclosed_by', request_body, skip_check=True, required=False
)
escaped_by = getKey(
'escaped_by', request_body, skip_check=True, required=False
)
if storage == 'aws':
download_proc = prepare_aws(bucket, filename, secret, key, region)
elif storage == 'gs':
temporary_config = os.path.join(
tempfile.gettempdir(), '.boto.' + str(uuid.uuid4())
)
download_proc = prepare_google_storage(
bucket, filename, secret, key, temporary_config
)
else:
response_error('Unknown storage detected. Internal error')
cpimport_command_line = [
'cpimport', database, table, '-s', terminated_by
]
if escaped_by:
cpimport_command_line += ['-C', escaped_by]
if enclosed_by:
cpimport_command_line += ['-E', enclosed_by]
module_logger.debug(
f'cpimport command line: {" ".join(cpimport_command_line)}'
)
cpimport_proc = Popen(
cpimport_command_line, shell=False, stdin=download_proc.stdout,
stdout=PIPE, stderr=PIPE, encoding='utf-8'
)
selector = selectors.DefaultSelector()
for stream in [
download_proc.stderr, cpimport_proc.stderr, cpimport_proc.stdout
]:
os.set_blocking(stream.fileno(), False)
selector.register(
download_proc.stderr, selectors.EVENT_READ, data='downloader_error'
)
selector.register(
cpimport_proc.stderr, selectors.EVENT_READ, data='cpimport_error'
)
selector.register(
cpimport_proc.stdout, selectors.EVENT_READ, data='cpimport_output'
)
downloader_error = ''
cpimport_error = ''
cpimport_output = ''
alive = 3
while alive > 0:
events = selector.select()
for key, mask in events:
name = key.data
line = key.fileobj.readline().rstrip()
if not line:
# EOF
alive -= 1
selector.unregister(key.fileobj)
continue
if name == 'downloader_error':
downloader_error += line + '\n'
if name == 'cpimport_error':
cpimport_error += line + '\n'
if name == 'cpimport_output':
cpimport_output += line + '\n'
# clean after Prepare Google
if storage == 'gs' and os.path.exists(temporary_config):
os.remove(temporary_config)
if downloader_error:
response_error(downloader_error)
if cpimport_error:
response_error(cpimport_error)
module_logger.debug(f'LOAD S3 Data stdout: {cpimport_output}')
pattern = '([0-9]+) rows processed and ([0-9]+) rows inserted'
match = re.search(pattern, cpimport_output)
if not match:
return {
'success': False,
'inserted': 0,
'processed': 0
}
return {
'success': True,
'inserted': match.group(2),
'processed': match.group(1)
}

View File

@ -0,0 +1,22 @@
"""Module contains custom exceptions."""
class CMAPIBasicError(Exception):
"""Basic exception raised for CMAPI related processes.
Attributes:
message -- explanation of the error
"""
def __init__(self, message: str) -> None:
self.message = message
super().__init__(self.message)
def __str__(self) -> str:
return self.message
class CEJError(CMAPIBasicError):
"""Exception raised for CEJ related processes.
Attributes:
message -- explanation of the error
"""

View File

@ -0,0 +1,185 @@
'''
This class implements the interface used by the failover module to notify
the cluster of events like node-up / node-down, etc.
'''
import logging
import time
import requests
from cmapi_server import helpers, node_manipulation
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH
from cmapi_server.exceptions import CMAPIBasicError
from cmapi_server.managers.process import MCSProcessManager
from failover.agent_comm import AgentBase
from mcs_node_control.models.node_config import NodeConfig
# Bug in pylint https://github.com/PyCQA/pylint/issues/4584
requests.packages.urllib3.disable_warnings() # pylint: disable=no-member
logger = logging.getLogger('failover_agent')
class FailoverAgent(AgentBase):
def activateNodes(
self, nodes, input_config_filename=DEFAULT_MCS_CONF_PATH,
output_config_filename=None, test_mode=False
):
logger.info(f'FA.activateNodes(): activating nodes: {nodes}')
new_node_count = 0
for node in nodes:
try:
logger.info(f'FA.activateNodes(): adding node {node}')
node_manipulation.add_node(
node, input_config_filename, output_config_filename
)
new_node_count += 1
except Exception:
logger.error(f'FA.activateNodes(): failed to add node {node}')
raise
return new_node_count
def deactivateNodes(
self, nodes, input_config_filename=DEFAULT_MCS_CONF_PATH,
output_config_filename=None, test_mode=False
):
logger.info(f'FA.deactivateNodes(): deactivating nodes: {nodes}')
removed_node_count = 0
for node in nodes:
try:
logger.info(f'FA.deactivateNodes(): deactivating node {node}')
node_manipulation.remove_node(
node, input_config_filename, output_config_filename,
deactivate_only=True, test_mode=test_mode
)
removed_node_count += 1
except Exception as err:
logger.error(
f'FA.deactivateNodes(): failed to deactivate node {node}, '
f'got {str(err)}'
)
raise
return removed_node_count
# the 'hack' parameter is a placeholder. When run by agent_comm, this function gets a first parameter
# of (). When that is the input_config_filename, that's bad. Need to fix.
def movePrimaryNode(self, hack, input_config_filename = None, output_config_filename = None, test_mode = False):
logger.info(f"FA.movePrimaryNode(): moving primary node functionality")
# to save a little typing in testing
kwargs = {
"cs_config_filename": input_config_filename,
"input_config_filename" : input_config_filename,
"output_config_filename" : output_config_filename,
"test_mode" : test_mode
}
try:
node_manipulation.move_primary_node(**kwargs)
except Exception as e:
logger.error(f"FA.movePrimaryNode(): failed to move primary node, got {str(e)}")
raise
def enterStandbyMode(self, test_mode = False):
nc = NodeConfig()
node_name = nc.get_module_net_address(nc.get_current_config_root())
logger.info(
f'FA.enterStandbyMode(): shutting down node "{node_name}"'
)
# this gets retried by the caller on error
try:
# TODO: remove test_mode condition and add mock for testing
if not test_mode:
MCSProcessManager.stop_node(is_primary=nc.is_primary_node())
logger.info(
'FA.enterStandbyMode(): successfully stopped node.'
)
except CMAPIBasicError as err:
logger.error(
'FA.enterStandbyMode(): caught error while stopping node.'
f'{err.message}'
)
def raiseAlarm(self, msg):
logger.critical(msg)
# The start/commit/rollback transaction fcns use the active list to decide which
# nodes to send to; when we're adding a node the new node isn't in the active list yet
# extra_nodes gives us add'l hostnames/addrs to send the transaction to.
# Likewise for removing a node. Presumably that node is not reachable, so must be
# removed from the list to send to.
def startTransaction(self, extra_nodes = [], remove_nodes = []):
got_txn = False
count = 0
while not got_txn:
msg = None
try:
(got_txn, txn_id, nodes) = helpers.start_transaction(
extra_nodes=extra_nodes, remove_nodes=remove_nodes
)
except Exception as e:
got_txn = False
msg = (
f'FA.start_transaction(): attempt #{count+1}, '
f'failed to get a transaction, got {str(e)}'
)
if not got_txn:
if msg is None:
msg = (
f'FA.start_transaction(): attempt #{count+1}, '
'failed to get a transaction'
)
if count < 5:
logger.warning(msg)
else:
logger.error(msg)
time.sleep(1)
count += 1
logger.info(f'FA.startTransaction(): started transaction {txn_id}')
return (txn_id, nodes)
# These shouldn't throw for now
def commitTransaction(self, txn_id, nodes, **kwargs):
try:
helpers.update_revision_and_manager()
# broadcacting new config invokes node restart
helpers.broadcast_new_config(nodes=nodes)
helpers.commit_transaction(txn_id, nodes=nodes)
except Exception:
logger.error(
(
'FA.commitTransaction(): failed to commit transaciton '
f'{txn_id}'
),
exc_info=True
)
else:
logger.info(
f'FA.commitTransaction(): committed transaction {txn_id}'
)
def rollbackTransaction(self, txn_id, nodes):
try:
helpers.rollback_transaction(txn_id, nodes = nodes)
except Exception:
logger.error(
(
'FA.rollbackTransaction(): failed to rollback transaction '
f'{txn_id}. Got unrecognised error.'
),
exc_info=True
)
else:
logger.info(
f'FA.rollbackTransaction(): rolled back transaction {txn_id})'
)

View File

View File

@ -0,0 +1,119 @@
"""Module contains all things related to working with .secrets file."""
import json
import logging
import os
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cmapi_server.constants import MCS_SECRETS_FILE_PATH
from cmapi_server.exceptions import CEJError
AES_BLOCK_SIZE_BITS = algorithms.AES.block_size
AES_IV_BIN_SIZE = int(AES_BLOCK_SIZE_BITS/8)
# two hex chars for each byte
AES_IV_HEX_SIZE = AES_IV_BIN_SIZE * 2
class CEJPasswordHandler():
"""Handler for CrossEngineSupport password decryption."""
@classmethod
def secretsfile_exists(cls):
"""Check the .secrets file in MCS_SECRETS_FILE_PATH.
:return: True if file exists and not empty.
:rtype: bool
"""
try:
if (
os.path.isfile(MCS_SECRETS_FILE_PATH) and
os.path.getsize(MCS_SECRETS_FILE_PATH) > 0
):
return True
except Exception:
# TODO: remove after check if python 3.8 everytime exist
# in package because isfile and getsize not rasing
# exceptions after 3.8
logging.warning(
'Something went wrong while detecting the .secrets file.',
exc_info=True
)
return False
@classmethod
def get_secrets_json(cls):
"""Get json from .secrets file.
:raises CEJError: on empty\corrupted\wrong format .secrets file
:return: json from .secrets file
:rtype: dict
"""
if not cls.secretsfile_exists():
raise CEJError(f'{MCS_SECRETS_FILE_PATH} file does not exist.')
with open(MCS_SECRETS_FILE_PATH) as secrets_file:
try:
secrets_json = json.load(secrets_file)
except Exception:
logging.error(
'Something went wrong while loading json from '
f'{MCS_SECRETS_FILE_PATH}',
exc_info=True
)
raise CEJError(
f'Looks like file {MCS_SECRETS_FILE_PATH} is corrupted or'
'has wrong format.'
) from None
return secrets_json
@classmethod
def decrypt_password(cls, enc_data:str):
"""Decrypt CEJ password if needed.
:param enc_data: encrypted initialization vector + password in hex str
:type enc_data: str
:return: decrypted CEJ password
:rtype: str
"""
if not cls.secretsfile_exists():
logging.warning('Unencrypted CrossEngineSupport password used.')
return enc_data
logging.info('Encrypted CrossEngineSupport password found.')
try:
iv = bytes.fromhex(enc_data[:AES_IV_HEX_SIZE])
encrypted_passwd = bytes.fromhex(enc_data[AES_IV_HEX_SIZE:])
except ValueError as value_error:
raise CEJError(
'Non-hexadecimal number found in encrypted CEJ password.'
) from value_error
secrets_json = cls.get_secrets_json()
encryption_key_hex = secrets_json.get('encryption_key')
if not encryption_key_hex:
raise CEJError(
f'Empty "encryption key" found in {MCS_SECRETS_FILE_PATH}'
)
try:
encryption_key = bytes.fromhex(encryption_key_hex)
except ValueError as value_error:
raise CEJError(
'Non-hexadecimal number found in encryption key from '
f'{MCS_SECRETS_FILE_PATH} file.'
) from value_error
cipher = Cipher(
algorithms.AES(encryption_key),
modes.CBC(iv)
)
decryptor = cipher.decryptor()
unpadder = padding.PKCS7(AES_BLOCK_SIZE_BITS).unpadder()
padded_passwd_bytes = (
decryptor.update(encrypted_passwd)
+ decryptor.finalize()
)
passwd_bytes = (
unpadder.update(padded_passwd_bytes) + unpadder.finalize()
)
return passwd_bytes.decode()

View File

@ -0,0 +1,579 @@
"""Module contains Cluster business logic functions."""
import logging
from datetime import datetime
import requests
from cmapi_server.constants import (
CMAPI_CONF_PATH, DEFAULT_MCS_CONF_PATH,
)
from cmapi_server.exceptions import CMAPIBasicError
from cmapi_server.helpers import (
broadcast_new_config, commit_transaction, get_active_nodes, get_dbroots,
get_config_parser, get_current_key, get_id, get_version, start_transaction,
rollback_transaction, update_revision_and_manager,
)
from cmapi_server.node_manipulation import (
add_node, add_dbroot, remove_node, switch_node_maintenance,
)
from mcs_node_control.models.misc import get_dbrm_master
from mcs_node_control.models.node_config import NodeConfig
class ClusterHandler():
"""Class for handling MCS Cluster operations."""
@staticmethod
def status(
config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to get MCS Cluster status information
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: if catch some exception while getting status
from each node separately
:return: status result
:rtype: dict
"""
logger.debug('Cluster status command called. Getting status.')
response = {'timestamp': str(datetime.now())}
active_nodes = get_active_nodes(config)
cmapi_cfg_parser = get_config_parser(CMAPI_CONF_PATH)
api_key = get_current_key(cmapi_cfg_parser)
headers = {'x-api-key': api_key}
num_nodes = 0
for node in active_nodes:
url = f'https://{node}:8640/cmapi/{get_version()}/node/status'
try:
r = requests.get(url, verify=False, headers=headers)
r.raise_for_status()
r_json = r.json()
if len(r_json.get('services', 0)) == 0:
r_json['dbrm_mode'] = 'offline'
response[f'{str(node)}'] = r_json
num_nodes += 1
except Exception as err:
raise CMAPIBasicError(
f'Got an error retrieving status from node {node}'
) from err
response['num_nodes'] = num_nodes
logger.debug('Successfully finished getting cluster status.')
return response
@staticmethod
def start(
config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to start MCS Cluster.
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: on exception while starting transaction
:raises CMAPIBasicError: if transaction start isn't successful
:raises CMAPIBasicError: if no nodes in the cluster
:raises CMAPIBasicError: on exception while distributing new config
:raises CMAPIBasicError: on unsuccessful distibuting config file
:raises CMAPIBasicError: on exception while committing transaction
:return: start timestamp
:rtype: dict
"""
logger.debug('Cluster start command called. Starting the cluster.')
start_time = str(datetime.now())
transaction_id = get_id()
try:
suceeded, transaction_id, successes = start_transaction(
cs_config_filename=config, id=transaction_id
)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while starting the transaction.'
) from err
if not suceeded:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Starting transaction isn\'t successful.')
if suceeded and len(successes) == 0:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('There are no nodes in the cluster.')
switch_node_maintenance(False)
update_revision_and_manager()
# TODO: move this from multiple places to one, eg to helpers
try:
broadcast_successful = broadcast_new_config(config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while distributing config file.'
) from err
if not broadcast_successful:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Config distribution isn\'t successful.')
try:
commit_transaction(transaction_id, cs_config_filename=config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while committing transaction.'
) from err
logger.debug('Successfully finished cluster start.')
return {'timestamp': start_time}
@staticmethod
def shutdown(
config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to stop the MCS Cluster.
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: if no nodes in the cluster
:return: start timestamp
:rtype: dict
"""
logger.debug(
'Cluster shutdown command called. Shutting down the cluster.'
)
start_time = str(datetime.now())
transaction_id = get_id()
try:
suceeded, transaction_id, successes = start_transaction(
cs_config_filename=config, id=transaction_id
)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while starting the transaction.'
) from err
if not suceeded:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Starting transaction isn\'t successful.')
if suceeded and len(successes) == 0:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('There are no nodes in the cluster.')
switch_node_maintenance(True)
update_revision_and_manager()
# TODO: move this from multiple places to one, eg to helpers
try:
broadcast_successful = broadcast_new_config(config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while distributing config file.'
) from err
if not broadcast_successful:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Config distribution isn\'t successful.')
try:
commit_transaction(transaction_id, cs_config_filename=config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while committing transaction.'
) from err
logger.debug('Successfully finished shutting down the cluster.')
return {'timestamp': start_time}
@staticmethod
def add_node(
node: str, config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to add node to MCS CLuster.
:param node: node IP or name or FQDN
:type node: str
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: on exception while starting transaction
:raises CMAPIBasicError: if transaction start isn't successful
:raises CMAPIBasicError: on exception while adding node
:raises CMAPIBasicError: on exception while distributing new config
:raises CMAPIBasicError: on unsuccessful distibuting config file
:raises CMAPIBasicError: on exception while committing transaction
:return: result of adding node
:rtype: dict
"""
logger.debug(f'Cluster add node command called. Adding node {node}.')
response = {'timestamp': str(datetime.now())}
transaction_id = get_id()
try:
suceeded, transaction_id, successes = start_transaction(
cs_config_filename=config, extra_nodes=[node],
id=transaction_id
)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while starting the transaction.'
) from err
if not suceeded:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Starting transaction isn\'t successful.')
try:
add_node(
node, input_config_filename=config,
output_config_filename=config
)
if not get_dbroots(node, config):
add_dbroot(
host=node, input_config_filename=config,
output_config_filename=config
)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Error while adding node.') from err
response['node_id'] = node
update_revision_and_manager(
input_config_filename=config, output_config_filename=config
)
try:
broadcast_successful = broadcast_new_config(config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while distributing config file.'
) from err
if not broadcast_successful:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Config distribution isn\'t successful.')
try:
commit_transaction(transaction_id, cs_config_filename=config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while committing transaction.'
) from err
logger.debug(f'Successfully finished adding node {node}.')
return response
@staticmethod
def remove_node(
node: str, config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to remove node from MCS CLuster.
:param node: node IP or name or FQDN
:type node: str
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: on exception while starting transaction
:raises CMAPIBasicError: if transaction start isn't successful
:raises CMAPIBasicError: on exception while removing node
:raises CMAPIBasicError: on exception while distributing new config
:raises CMAPIBasicError: on unsuccessful distibuting config file
:raises CMAPIBasicError: on exception while committing transaction
:return: result of node removing
:rtype: dict
"""
logger.debug(
f'Cluster remove node command called. Removing node {node}.'
)
response = {'timestamp': str(datetime.now())}
transaction_id = get_id()
try:
suceeded, transaction_id, txn_nodes = start_transaction(
cs_config_filename=config, remove_nodes=[node],
id=transaction_id
)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while starting the transaction.'
) from err
if not suceeded:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Starting transaction isn\'t successful.')
try:
remove_node(
node, input_config_filename=config,
output_config_filename=config
)
except Exception as err:
rollback_transaction(
transaction_id, nodes=txn_nodes, cs_config_filename=config
)
raise CMAPIBasicError('Error while removing node.') from err
response['node_id'] = node
if len(txn_nodes) > 0:
update_revision_and_manager(
input_config_filename=config, output_config_filename=config
)
try:
broadcast_successful = broadcast_new_config(
config, nodes=txn_nodes
)
except Exception as err:
rollback_transaction(
transaction_id, nodes=txn_nodes, cs_config_filename=config
)
raise CMAPIBasicError(
'Error while distributing config file.'
) from err
if not broadcast_successful:
rollback_transaction(
transaction_id, nodes=txn_nodes, cs_config_filename=config
)
raise CMAPIBasicError('Config distribution isn\'t successful.')
try:
commit_transaction(transaction_id, cs_config_filename=config)
except Exception as err:
rollback_transaction(
transaction_id, nodes=txn_nodes, cs_config_filename=config
)
raise CMAPIBasicError(
'Error while committing transaction.'
) from err
logger.debug(f'Successfully finished removing node {node}.')
return response
@staticmethod
def set_mode(
mode: str, timeout:int = 60, config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to set MCS CLuster mode.
:param mode: cluster mode to set, can be only "readonly" or "readwrite"
:type mode: str
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: if no master found in the cluster
:raises CMAPIBasicError: on exception while starting transaction
:raises CMAPIBasicError: if transaction start isn't successful
:raises CMAPIBasicError: on exception while adding node
:raises CMAPIBasicError: on exception while distributing new config
:raises CMAPIBasicError: on unsuccessful distibuting config file
:raises CMAPIBasicError: on exception while committing transaction
:return: result of adding node
:rtype: dict
"""
logger.debug(
f'Cluster mode set command called. Setting mode to {mode}.'
)
response = {'timestamp': str(datetime.now())}
cmapi_cfg_parser = get_config_parser(CMAPI_CONF_PATH)
api_key = get_current_key(cmapi_cfg_parser)
headers = {'x-api-key': api_key}
transaction_id = get_id()
master = None
if len(get_active_nodes(config)) != 0:
master = get_dbrm_master(config)
if master is None:
raise CMAPIBasicError('No master found in the cluster.')
else:
master = master['IPAddr']
payload = {'cluster_mode': mode}
url = f'https://{master}:8640/cmapi/{get_version()}/node/config'
try:
suceeded, transaction_id, successes = start_transaction(
cs_config_filename=config, id=transaction_id
)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while starting the transaction.'
) from err
if not suceeded:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError('Starting transaction isn\'t successful.')
nc = NodeConfig()
root = nc.get_current_config_root(config_filename=config)
payload['manager'] = root.find('./ClusterManager').text
payload['revision'] = root.find('./ConfigRevision').text
payload['timeout'] = timeout
payload['cluster_mode'] = mode
try:
r = requests.put(url, headers=headers, json=payload, verify=False)
r.raise_for_status()
response['cluster-mode'] = mode
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
f'Error while setting cluster mode to {mode}'
) from err
try:
commit_transaction(transaction_id, cs_config_filename=config)
except Exception as err:
rollback_transaction(transaction_id, cs_config_filename=config)
raise CMAPIBasicError(
'Error while committing transaction.'
) from err
logger.debug(f'Successfully set cluster mode to {mode}.')
return response
@staticmethod
def set_api_key(
api_key: str, verification_key: str,
config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to set API key for each CMAPI node in cluster.
:param api_key: API key to set
:type api_key: str
:param verification_key: TOTP key to verify
:type verification_key: str
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:raises CMAPIBasicError: if catch some exception while setting API key
to each node
:return: status result
:rtype: dict
"""
logger.debug('Cluster set API key command called.')
active_nodes = get_active_nodes(config)
body = {
'api_key': api_key,
'verification_key': verification_key
}
response = {}
# only for changing response object below
active_nodes_count = len(active_nodes)
if not active_nodes:
# set api key in configuration file on this node
logger.debug(
'No active nodes found, set API key into current CMAPI conf.'
)
active_nodes.append('localhost')
for node in active_nodes:
logger.debug(f'Setting new api key to "{node}".')
url = f'https://{node}:8640/cmapi/{get_version()}/node/apikey-set'
try:
resp = requests.put(url, verify=False, json=body)
resp.raise_for_status()
r_json = resp.json()
if active_nodes_count > 0:
response[str(node)] = r_json
except Exception as err:
raise CMAPIBasicError(
f'Got an error setting API key to "{node}".'
) from err
logger.debug(f'Successfully set new api key to "{node}".')
response['timestamp'] = str(datetime.now())
logger.debug(
'Successfully finished setting new API key to all nodes.'
)
return response
@staticmethod
def set_log_level(
level: str, config: str = DEFAULT_MCS_CONF_PATH,
logger: logging.Logger = logging.getLogger('cmapi_server')
) -> dict:
"""Method to set level for loggers on each CMAPI node in cluster.
:param level: logging level, including custom
:type level: str
:param config: columnstore xml config file path,
defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:param logger: logger, defaults to logging.getLogger('cmapi_server')
:type logger: logging.Logger, optional
:return: status result
:rtype: dict
"""
logger.debug('Cluster set new logging level called.')
active_nodes = get_active_nodes(config)
body = {'level': level}
response = {}
# only for changing response object below
active_nodes_count = len(active_nodes)
if not active_nodes:
# set api key in configuration file on this node
logger.debug(
'No active nodes found, set log level onluy for current node.'
)
active_nodes.append('localhost')
for node in active_nodes:
logger.debug(f'Setting new log level to "{node}".')
url = f'https://{node}:8640/cmapi/{get_version()}/node/log-level'
try:
resp = requests.put(url, verify=False, json=body)
resp.raise_for_status()
r_json = resp.json()
if active_nodes_count > 0:
response[str(node)] = r_json
except Exception as err:
raise CMAPIBasicError(
f'Got an error setting log level to "{node}".'
) from err
logger.debug(f'Successfully set new log level to "{node}".')
response['timestamp'] = str(datetime.now())
logger.debug(
'Successfully finished setting new log level to all nodes.'
)
return response

View File

@ -0,0 +1,847 @@
"""Module with helpers functions.
TODO: remove NodeConfig usage and move to arguments (eg. nc or root)
"""
import asyncio
import concurrent
import configparser
import datetime
import logging
import os
import socket
import time
from functools import partial
from random import random
from shutil import copyfile
from typing import Tuple, Optional
import lxml.objectify
import requests
from cmapi_server.exceptions import CMAPIBasicError
# Bug in pylint https://github.com/PyCQA/pylint/issues/4584
requests.packages.urllib3.disable_warnings() # pylint: disable=no-member
from cmapi_server.constants import (
CMAPI_CONF_PATH, CMAPI_DEFAULT_CONF_PATH, DEFAULT_MCS_CONF_PATH,
DEFAULT_SM_CONF_PATH, LOCALHOSTS
)
from cmapi_server.handlers.cej import CEJPasswordHandler
from cmapi_server.managers.process import MCSProcessManager
from mcs_node_control.models.node_config import NodeConfig
def get_id():
return int(random() * 1000000)
def start_transaction(
config_filename=CMAPI_CONF_PATH,
cs_config_filename=DEFAULT_MCS_CONF_PATH,
extra_nodes=None,
remove_nodes=None,
optional_nodes=None,
id=get_id()
):
"""Start internal CMAPI transaction.
Returns (success, txnid, nodes). success = True means it successfully
started a transaction, False means it didn't. If True, then txnid will have
the transaction ID and the list of nodes the transaction was started on.
If False, the txnid and nodes have undefined values.
:param config_filename: cmapi config filepath,
defaults to CMAPI_CONF_PATH
:type config_filename: str
:param cs_config_filename: columnstore xml config filepath,
defaults to DEFAULT_MCS_CONF_PATH
:type cs_config_filename: str, optional
:param extra_nodes: extra nodes, defaults to None
:type extra_nodes: list, optional
:param remove_nodes: remove nodes, defaults to None
:type remove_nodes: list, optional
:param optional_nodes: optional nodes, defaults to None
:type optional_nodes: list, optional
:return: (success, txnid, nodes)
:rtype: tuple
"""
# TODO: Somehow change that logic for eg using several input types
# (str\list\set) and detect which one we got.
extra_nodes = extra_nodes or []
remove_nodes = remove_nodes or []
optional_nodes = optional_nodes or []
cfg_parser = get_config_parser(config_filename)
api_key = get_current_key(cfg_parser)
version = get_version()
headers = {'x-api-key': api_key}
body = {'id' : id}
final_time = datetime.datetime.now() + datetime.timedelta(seconds=300)
success = False
while datetime.datetime.now() < final_time and not success:
successes = []
# it's painful to look at, but if this call fails to get a lock on
# every server, it may be because a node went down, and the config file
# was updated. So, update the list on every iteration.
#
# There is a race here between reading the config and getting the txn.
# What can stop it with the current design is using a mutex here,
# and having config updates come from only one node.
# For changes coming from failover, this will be true.
#
# There is also a race on the config file in general.
# Need to read it before you can get a lock, and need to lock it before
# it can be read reliably. Resolution TBD. File locking? Retries?
# TODO: need to work with data types of nodes here
unfiltered_nodes = [
*get_active_nodes(cs_config_filename),
*extra_nodes,
*optional_nodes
]
tmp_active_nodes = {
node for node in unfiltered_nodes
if node not in remove_nodes
}
active_nodes = set()
# resolve localhost addrs
for node in tmp_active_nodes:
if node in ['127.0.0.1', 'localhost', '::1']:
active_nodes.add(socket.gethostbyname(socket.gethostname()))
else:
active_nodes.add(node)
# this copy will be updated if an optional node can't be reached
real_active_nodes = set(active_nodes)
logging.trace(f'Active nodes on start transaction {active_nodes}')
for node in active_nodes:
url = f'https://{node}:8640/cmapi/{version}/node/begin'
node_success = False
logging.trace(f'Processing node "{node}"')
for retry in range(5):
logging.trace(
f'In {retry} attempt for node {node} and active nodes var '
f'is {active_nodes} and real active nodes var is '
f'{real_active_nodes}'
)
try:
# who knows how much time has gone by...
# Update timeout to keep nodes in sync +/-
body['timeout'] = (
final_time - datetime.datetime.now()
).seconds
r = requests.put(
url, verify=False, headers=headers, json=body,
timeout=10
)
# a 4xx error from our endpoint;
# likely another txn is running
# Breaking here will cause a rollback on nodes we have
# successfully started a txn on so far. Then it will try
# again to get a transaction on all nodes. Put all
# conditions where that is the desired behavior here.
if int(r.status_code / 100) == 4:
logging.debug(
'Got a 4xx error while beginning transaction '
f'with response text {r.text}'
)
break # TODO: useless, got break in finally statement
# TODO: is there any case to separate 4xx
# from all other error codes
r.raise_for_status()
node_success = True
break
except requests.Timeout:
logging.warning(
f'start_transaction(): timeout on node {node}'
)
except Exception:
logging.warning(
'start_transaction(): got error during request '
f'to node {node}',
exc_info=True
)
finally:
if not node_success and node in optional_nodes:
logging.info(
f'start_transaction(): node {node} is optional;'
'ignoring the error'
)
real_active_nodes.remove(node)
break
# wait 1 sec and try on this node again
time.sleep(1)
if not node_success and node not in optional_nodes:
rollback_txn_attempt(api_key, version, id, successes)
# wait up to 5 secs and try the whole thing again
time.sleep(random() * 5)
break
elif node_success:
successes.append(node)
# TODO: a little more work needs to be done here. If not all of the active-nodes
# are up when start is called, this will fail. It should succeed if 'enough' nodes
# are up (> 50%).
success = (len(successes) == len(real_active_nodes))
return (success, id, successes)
def rollback_txn_attempt(key, version, txnid, nodes):
headers = {'x-api-key': key}
body = {'id': txnid}
for node in nodes:
url = f"https://{node}:8640/cmapi/{version}/node/rollback"
for retry in range(5):
try:
r = requests.put(
url, verify=False, headers=headers, json=body, timeout=5
)
r.raise_for_status()
except requests.Timeout:
logging.warning(
f'rollback_txn_attempt(): timeout on node "{node}"'
)
except Exception:
logging.error(
(
f'rollback_txn_attempt(): got unrecognised error '
f'during request to "{node}".'
),
exc_info=True
)
else:
break
time.sleep(1)
# on a failure to rollback or commit a txn on a subset of nodes, what are the options?
# - open a new txn and revert the changes on the nodes that respond
# - go forward with the subset. If those nodes are still up, they will have a config that is out of sync.
# -> for now, going to assume that the node went down, and that when it comes back up, its config
# will be sync'd
def rollback_transaction(
id, config_filename=CMAPI_CONF_PATH,
cs_config_filename=DEFAULT_MCS_CONF_PATH, nodes=None
):
cfg_parser = get_config_parser(config_filename)
key = get_current_key(cfg_parser)
version = get_version()
if nodes is None:
nodes = get_active_nodes(cs_config_filename)
rollback_txn_attempt(key, version, id, nodes)
def commit_transaction(
id, config_filename=CMAPI_CONF_PATH,
cs_config_filename=DEFAULT_MCS_CONF_PATH, nodes = None
):
cfg_parser = get_config_parser(config_filename)
key = get_current_key(cfg_parser)
version = get_version()
if nodes is None:
nodes = get_active_nodes(cs_config_filename)
headers = {'x-api-key': key}
body = {'id': id}
for node in nodes:
url = f"https://{node}:8640/cmapi/{version}/node/commit"
for retry in range(5):
try:
r = requests.put(url, verify = False, headers = headers, json = body, timeout = 5)
r.raise_for_status()
except requests.Timeout as e:
logging.warning(f"commit_transaction(): timeout on node {node}")
except Exception as e:
logging.warning(f"commit_transaction(): got error during request to {node}: {str(e)}")
else:
break
time.sleep(1)
def broadcast_new_config(
cs_config_filename: str = DEFAULT_MCS_CONF_PATH,
cmapi_config_filename: str = CMAPI_CONF_PATH,
sm_config_filename: str = DEFAULT_SM_CONF_PATH,
test_mode: bool = False,
nodes: Optional[list] = None,
) -> bool:
"""Send new config to nodes. Now in async way.
:param cs_config_filename: Columnstore.xml path,
defaults to DEFAULT_MCS_CONF_PATH
:type cs_config_filename: str, optional
:param cmapi_config_filename: cmapi config path,
defaults to CMAPI_CONF_PATH
:type cmapi_config_filename: str, optional
:param sm_config_filename: storage manager config path,
defaults to DEFAULT_SM_CONF_PATH
:type sm_config_filename: str, optional
:param test_mode: for test purposes, defaults to False TODO: remove
:type test_mode: bool, optional
:param nodes: nodes list for config put, defaults to None
:type nodes: Optional[list], optional
:return: success state
:rtype: _type_
"""
cfg_parser = get_config_parser(cmapi_config_filename)
key = get_current_key(cfg_parser)
version = get_version()
if nodes is None:
nodes = get_active_nodes(cs_config_filename)
nc = NodeConfig()
root = nc.get_current_config_root(config_filename=cs_config_filename)
with open(cs_config_filename) as f:
config_text = f.read()
with open(sm_config_filename) as f:
sm_config_text = f.read()
headers = {'x-api-key': key}
body = {
'manager': root.find('./ClusterManager').text,
'revision': root.find('./ConfigRevision').text,
'timeout': 300,
'config': config_text,
'cs_config_filename': cs_config_filename,
'sm_config_filename': sm_config_filename,
'sm_config': sm_config_text
}
# TODO: remove test mode here and replace it by mock in tests
if test_mode:
body['test'] = True
failed_nodes = []
success_nodes = []
async def update_config(node, success_nodes, failed_nodes, headers, body):
url = f'https://{node}:8640/cmapi/{version}/node/config'
request_put = partial(
requests.put, url, verify=False, headers=headers, json=body,
timeout=120
)
success = False
executor = concurrent.futures.ThreadPoolExecutor()
loop = asyncio.get_event_loop()
# TODO: remove this retry, it cause retries and long waiting time
# for eg if some of mcs processes couldn't properly start/stop.
# Fix error handling, could be raising error instead of returning
# bool value
for retry in range(5):
try:
r = await loop.run_in_executor(executor, request_put)
r.raise_for_status()
except requests.Timeout as e:
logging.warning(
f'Timeout while pushing new config to "{node}"'
)
except Exception as e:
logging.warning(
f'Got an unexpected error pushing new config to "{node}"',
exc_info=True
)
else:
success_nodes.append(node)
success = True
break
if not success:
failed_nodes.append(node)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
tasks = [
update_config(node, success_nodes, failed_nodes, headers, body)
for node in nodes
]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
if len(success_nodes) > 0:
logging.info(
f'Successfully pushed new config file to {success_nodes}'
)
if len(failed_nodes) > 0:
logging.error(
f'Failed to push the new config to {failed_nodes}'
)
return False
return True
# Might be more appropriate to put these in node_manipulation?
def update_revision_and_manager(
input_config_filename: Optional[str] = None,
output_config_filename: Optional[str] = None
):
"""Update MCS xml config revision and cluster manager tags.
:param input_config_filename: , defaults to None
:type input_config_filename: Optional[str], optional
:param output_config_filename: _description_, defaults to None
:type output_config_filename: Optional[str], optional
"""
nc = NodeConfig()
if input_config_filename is None:
root = nc.get_current_config_root()
else:
root = nc.get_current_config_root(input_config_filename)
try:
rev_node = root.find('./ConfigRevision')
cur_revision = int(rev_node.text) + 1
rev_node.text = str(cur_revision)
root.find('./ClusterManager').text = str(
nc.get_module_net_address(root=root, module_id=1)
)
except Exception:
logging.error(
'Caught exception while updating MCS config revision cluster '
'manager tag, will not write new config',
exc_info=True
)
else:
if output_config_filename is None:
nc.write_config(root)
else:
nc.write_config(root, filename = output_config_filename)
def get_config_parser(
config_filepath: str = CMAPI_CONF_PATH
) -> configparser.ConfigParser:
"""Get config parser from cmapi server ini config file.
:param config_filename: cmapi server conf path, defaults to CMAPI_CONF_PATH
:type config_filename: str, optional
:return: config parser
:rtype: configparser.ConfigParser
"""
cfg_parser = configparser.ConfigParser()
try:
with open(config_filepath, 'r', encoding='utf-8') as cfg_file:
cfg_parser.read_file(cfg_file)
except PermissionError as e:
# TODO: looks like it's useless here, because of creating config
# from default on cmapi server startup
# Anyway looks like it have to raise error and then
# return 500 error
logging.error(
'CMAPI cannot create configuration file. '
'API key stored in memory only.',
exc_info=True
)
return cfg_parser
def save_cmapi_conf_file(cfg_parser, config_filepath: str = CMAPI_CONF_PATH):
"""Save config file from config parser.
:param cfg_parser: config parser to save
:type cfg_parser: configparser.ConfigParser
:param config_filepath: cmapi config filepath, defaults to CMAPI_CONF_PATH
:type config_filepath: str, optional
"""
try:
with open(config_filepath, 'w', encoding='utf-8') as cfg_file:
cfg_parser.write(cfg_file)
except PermissionError:
logging.error(
'CMAPI cannot save configuration file due to permissions. '
'Some values still can be stored in memory.',
exc_info=True
)
def get_active_nodes(config:str = DEFAULT_MCS_CONF_PATH) -> list:
"""Get active nodes from Columnstore.xml.
Actually this is only names of nodes by which node have been added.
:param config: xml config path, defaults to DEFAULT_MCS_CONF_PATH
:type config: str, optional
:return: active nodes
:rtype: list
"""
nc = NodeConfig()
root = nc.get_current_config_root(config, upgrade=False)
nodes = root.findall('./ActiveNodes/Node')
return [ node.text for node in nodes ]
def get_desired_nodes(config=DEFAULT_MCS_CONF_PATH):
nc = NodeConfig()
root = nc.get_current_config_root(config, upgrade=False)
nodes = root.findall("./DesiredNodes/Node")
return [ node.text for node in nodes ]
def in_maintenance_state(config=DEFAULT_MCS_CONF_PATH):
nc = NodeConfig()
root = nc.get_current_config_root(config, upgrade=False)
raw_state = root.find('./Maintenance')
# if no Maintainace tag in xml config found
state = False
if raw_state is not None:
# returns True on "true" string else return false
state = lxml.objectify.BoolElement(raw_state.text)
return state
def get_current_key(config_parser):
"""Get API key for cmapi server endpoints from ini config.
:param config_parser: config parser
:type config_parser: configparser.ConfigParser
:return: api key
:rtype: str
"""
# ConfigParser reading value as is , for eg with quotes
return config_parser.get('Authentication', 'x-api-key', fallback='')
def get_version():
from cmapi_server.controllers.dispatcher import _version
return _version
def get_dbroots(node, config=DEFAULT_MCS_CONF_PATH):
# TODO: somehow duplicated with NodeConfig.get_all_dbroots?
nc = NodeConfig()
root = nc.get_current_config_root(config)
dbroots = []
smc_node = root.find('./SystemModuleConfig')
mod_count = int(smc_node.find('./ModuleCount3').text)
for i in range(1, mod_count+1):
ip_addr = smc_node.find(f'./ModuleIPAddr{i}-1-3').text
hostname = smc_node.find(f'./ModuleHostName{i}-1-3').text
node_fqdn = socket.gethostbyaddr(hostname)[0]
if node in LOCALHOSTS and hostname != 'localhost':
node = socket.gethostbyaddr(socket.gethostname())[0]
elif node not in LOCALHOSTS and hostname == 'localhost':
# hostname will only be loclahost if we are in one node cluster
hostname = socket.gethostbyaddr(socket.gethostname())[0]
if node == ip_addr or node == hostname or node == node_fqdn:
for j in range(
1, int(smc_node.find(f"./ModuleDBRootCount{i}-3").text) + 1
):
dbroots.append(
smc_node.find(f"./ModuleDBRootID{i}-{j}-3").text
)
return dbroots
def get_current_config_file(
config_filename=DEFAULT_MCS_CONF_PATH,
cmapi_config_filename=CMAPI_CONF_PATH
):
"""Start a transaction on all DesiredNodes, which are all optional.
- the transaction prevents config changes from being made at the same time
- get the config from each node
- discard config files for different clusters
- call put_config on the config file with the highest revision number found
- end the transaction
"""
logging.info('get_current_config_file(): seeking the current config file')
cfg_parser = get_config_parser(cmapi_config_filename)
key = get_current_key(cfg_parser)
nc = NodeConfig()
root = nc.get_current_config_root(config_filename = config_filename)
# TODO: here we got set of ip addresses of DesiredNodes
# but after that we convert them to list and send as
# an optional_nodes argument to start_transaction()
# So need to work with data type of nodes.
desired_nodes = {
node.text for node in root.findall('./DesiredNodes/Node')
}
if len(desired_nodes) <= 1:
return True
current_rev = int(root.find('ConfigRevision').text)
cluster_name = root.find('ClusterName').text
highest_rev = current_rev
highest_node = 'localhost'
highest_config = nc.to_string(root)
# TODO: data type of optional_nodes set -> list
# Need to work with it inside and outside of start_transaction
(success, txn_id, nodes) = start_transaction(
cs_config_filename=config_filename,
optional_nodes=list(desired_nodes)
)
localhost_aliases = set(nc.get_network_addresses_and_names())
other_nodes = set(nodes) - localhost_aliases
if not success or len(other_nodes) == 0:
if success:
commit_transaction(txn_id, nodes = nodes)
return False
nodes_in_same_cluster = 0
for node in nodes:
if node in localhost_aliases:
continue
headers = {'x-api-key' : key}
url = f'https://{node}:8640/cmapi/{get_version()}/node/config'
try:
r = requests.get(url, verify=False, headers=headers, timeout=5)
r.raise_for_status()
config = r.json()['config']
except Exception as e:
logging.warning(
'get_current_config_file(): got an error fetching the '
f'config file from {node}: {str(e)}'
)
continue
tmp_root = nc.get_root_from_string(config)
name_node = tmp_root.find('ClusterName')
if name_node is None or name_node.text != cluster_name:
continue
nodes_in_same_cluster += 1
rev_node = tmp_root.find('ConfigRevision')
if rev_node is None or int(rev_node.text) <= highest_rev:
continue
highest_rev = int(rev_node.text)
highest_config = config
highest_node = node
nc.apply_config(config_filename=config_filename, xml_string=highest_config)
# TODO: do we need restart node here?
commit_transaction(txn_id, cs_config_filename=config_filename, nodes=nodes)
# todo, we might want stronger criteria for a large cluster.
# Right now we want to reach at least one other node
# (if there is another node)
if len(desired_nodes) > 1 and nodes_in_same_cluster < 1:
logging.error(
'get_current_config_file(): failed to contact enough nodes '
f'in my cluster ({cluster_name}) to reliably retrieve a current '
'configuration file. Manual intervention may be required.'
)
# TODO: addition error handling.
try:
MCSProcessManager.stop_node(is_primary=nc.is_primary_node())
except CMAPIBasicError as err:
logging.error(err.message)
return False
if highest_rev != current_rev:
logging.info(
'get_current_config_file(): Accepted the config file from'
f' {highest_node}'
)
else:
logging.info(
'get_current_config_file(): This node has the current config file'
)
return True
def wait_for_deactivation_or_put_config(
config_mtime, config_filename=DEFAULT_MCS_CONF_PATH
):
'''
if a multi-node cluster...
Wait for either a put_config operation (as determined by monitoring the mtime of config_filename),
or wait for this node to be removed from active_nodes,
or wait for a period long enough for this to be considered a 'long' outage (30s right now, as determined
by the failover code. TODO: make that time period configurable...
Activating failover after one of these three events should allow this node to join the cluster either as part
of the failover behavior, or as part of the cluster-wide start cmd.
'''
my_names = set(NodeConfig().get_network_addresses_and_names())
desired_nodes = get_desired_nodes(config_filename)
if len(desired_nodes) == 1 and desired_nodes[0] in my_names:
logging.info("wait_for_deactivation_or_put_config: Single-node cluster, safe to continue")
return
final_time = datetime.datetime.now() + datetime.timedelta(seconds = 40)
while config_mtime == os.path.getmtime(config_filename) and \
len(my_names.intersection(set(get_active_nodes(config_filename)))) > 0 and \
datetime.datetime.now() < final_time:
logging.info("wait_for_deactivation_or_put_config: Waiting...")
time.sleep(5)
if config_mtime != os.path.getmtime(config_filename):
logging.info("wait_for_deactivation_or_put_config: A new config was received, safe to continue.")
elif len(my_names.intersection(set(get_active_nodes(config_filename)))) == 0:
logging.info("wait_for_deactivation_or_put_config: Was removed from the cluster, safe to continue.")
else:
logging.info("wait_for_deactivation_or_put_config: Time limit reached, continuing.")
# This isn't used currently. Remove once we decide there is no need for it.
def if_primary_restart(
config_filename=DEFAULT_MCS_CONF_PATH,
cmapi_config_filename=CMAPI_CONF_PATH
):
nc = NodeConfig()
root = nc.get_current_config_root(config_filename = config_filename)
primary_node = root.find("./PrimaryNode").text
if primary_node not in nc.get_network_addresses_and_names():
return
cfg_parser = get_config_parser(cmapi_config_filename)
key = get_current_key(cfg_parser)
headers = { "x-api-key" : key }
body = { "config": config_filename }
logging.info("if_primary_restart(): restarting the cluster.")
url = f"https://{primary_node}:8640/cmapi/{get_version()}/cluster/start"
endtime = datetime.datetime.now() + datetime.timedelta(seconds = 600) # questionable how long to retry
success = False
while not success and datetime.datetime.now() < endtime:
try:
response = requests.put(url, verify = False, headers = headers, json = body, timeout = 60)
response.raise_for_status()
success = True
except Exception as e:
logging.warning(f"if_primary_restart(): failed to start the cluster, got {str(e)}")
time.sleep(10)
if not success:
logging.error(f"if_primary_restart(): failed to start the cluster. Manual intervention is required.")
def get_cej_info(config_root):
"""Get CEJ (Cross Engine Join) info.
Get credentials from CrossEngineSupport section in Columnstore.xml .
Decrypt CEJ user password if needed.
:param config_root: config root element from Columnstore.xml file
:type config_root: lxml.Element
:return: cej_host, cej_port, cej_username, cej_password
:rtype: tuple
"""
cej_node = config_root.find('./CrossEngineSupport')
cej_host = cej_node.find('Host').text or '127.0.0.1'
cej_port = cej_node.find('Port').text or '3306'
cej_username = cej_node.find('./User').text
cej_password = cej_node.find('./Password').text or ''
if not cej_username:
logging.error(
'Columnstore.xml has an empty CrossEngineSupport.User tag'
)
if not cej_password:
logging.warning(
'Columnstore.xml has an empty CrossEngineSupport.Password tag'
)
if CEJPasswordHandler.secretsfile_exists():
cej_password = CEJPasswordHandler.decrypt_password(cej_password)
return cej_host, cej_port, cej_username, cej_password
def system_ready(config_filename=DEFAULT_MCS_CONF_PATH):
"""Indicates whether the node is ready to accept queries.
:param config_filename: columnstore xml config filepath,
defaults to DEFAULT_MCS_CONF_PATH
:type config_filename: str, optional
:return: tuple of 2 booleans
:rtype: tuple
"""
nc = NodeConfig()
root = nc.get_current_config_root(config_filename)
host, port, username, password = get_cej_info(root)
if username is None:
# Second False indicates not to retry inside calling function's
# retry loop
return False, False
cmd = (
f"/usr/bin/mariadb -h '{host}' "
f"-P '{port}' "
f"-u '{username}' "
f"--password='{password}' "
"-sN -e "
"\"SELECT mcssystemready();\""
)
import subprocess
ret = subprocess.run(cmd, stdout=subprocess.PIPE, shell = True)
if ret.returncode == 0:
response = ret.stdout.decode("utf-8").strip()
if response == '1':
return True, False
else:
return False, True
return False, False
def cmapi_config_check(cmapi_conf_path: str = CMAPI_CONF_PATH):
"""Check if cmapi config file exists and copy default config if not.
:param cmapi_conf_path: cmapi conf path, defaults to CMAPI_CONF_PATH
:type cmapi_conf_path: str, optional
"""
if not os.path.exists(cmapi_conf_path):
logging.info(
f'There are no config file at "{cmapi_conf_path}". '
f'So copy default config from {CMAPI_DEFAULT_CONF_PATH} there.'
)
copyfile(CMAPI_DEFAULT_CONF_PATH, cmapi_conf_path)
def dequote(input_str: str) -> str:
"""Dequote input string.
If a string has single or double quotes around it, remove them.
Make sure the pair of quotes match.
If a matching pair of quotes is not found, return the string unchanged.
:param input_str: input probably quoted string
:type input_str: str
:return: unquoted string
:rtype: str
"""
if (
len(input_str) >= 2 and
input_str[0] == input_str[-1]
) and input_str.startswith(("'", '"')):
return input_str[1:-1]
return input_str
def get_dispatcher_name_and_path(
config_parser: configparser.ConfigParser
) -> Tuple[str, str]:
"""Get dispatcher name and path from cmapi conf file.
:param config_parser: cmapi conf file parser
:type config_parser: configparser.ConfigParser
:return: dispatcher name and path strings
:rtype: tuple[str, str]
"""
dispatcher_name = dequote(
config_parser.get('Dispatcher', 'name', fallback='systemd')
)
# TODO: used only for next releases for CustomDispatcher class
# remove if useless
dispatcher_path = dequote(
config_parser.get('Dispatcher', 'path', fallback='')
)
return dispatcher_name, dispatcher_path

View File

@ -0,0 +1,131 @@
import json
import logging
import logging.config
from functools import partial, partialmethod
import cherrypy
from cherrypy import _cperror
from cmapi_server.constants import CMAPI_LOG_CONF_PATH
class AddIpFilter(logging.Filter):
"""Filter to add IP address to logging record."""
def filter(self, record):
record.ip = cherrypy.request.remote.name or cherrypy.request.remote.ip
return True
def custom_cherrypy_error(
self, msg='', context='', severity=logging.INFO, traceback=False
):
"""Write the given ``msg`` to the error log. [now without hardcoded time]
This is not just for errors! [looks awful, but cherrypy realisation as is]
Applications may call this at any time to log application-specific
information.
If ``traceback`` is True, the traceback of the current exception
(if any) will be appended to ``msg``.
..Note:
All informatio
"""
exc_info = None
if traceback:
exc_info = _cperror._exc_info()
self.error_log.log(severity, ' '.join((context, msg)), exc_info=exc_info)
def dict_config(config_filepath: str):
with open(config_filepath, 'r', encoding='utf-8') as json_config:
config_dict = json.load(json_config)
logging.config.dictConfig(config_dict)
def add_logging_level(level_name, level_num, method_name=None):
"""
Comprehensively adds a new logging level to the `logging` module and the
currently configured logging class.
`level_name` becomes an attribute of the `logging` module with the value
`level_num`.
`methodName` becomes a convenience method for both `logging` itself
and the class returned by `logging.getLoggerClass()` (usually just
`logging.Logger`).
If `methodName` is not specified, `levelName.lower()` is used.
To avoid accidental clobberings of existing attributes, this method will
raise an `AttributeError` if the level name is already an attribute of the
`logging` module or if the method name is already present
Example
-------
>>> add_logging_level('TRACE', logging.DEBUG - 5)
>>> logging.getLogger(__name__).setLevel('TRACE')
>>> logging.getLogger(__name__).trace('that worked')
>>> logging.trace('so did this')
>>> logging.TRACE
5
"""
if not method_name:
method_name = level_name.lower()
if hasattr(logging, level_name):
raise AttributeError(f'{level_name} already defined in logging module')
if hasattr(logging, method_name):
raise AttributeError(
f'{method_name} already defined in logging module'
)
if hasattr(logging.getLoggerClass(), method_name):
raise AttributeError(f'{method_name} already defined in logger class')
# This method was inspired by the answers to Stack Overflow post
# http://stackoverflow.com/q/2183233/2988730, especially
# https://stackoverflow.com/a/35804945
# https://stackoverflow.com/a/55276759
logging.addLevelName(level_num, level_name)
setattr(logging, level_name, level_num)
setattr(
logging.getLoggerClass(), method_name,
partialmethod(logging.getLoggerClass().log, level_num)
)
setattr(logging, method_name, partial(logging.log, level_num))
def config_cmapi_server_logging():
# add custom level TRACE only for develop purposes
# could be activated using API endpoints or cli tool without relaunching
add_logging_level('TRACE', 5)
cherrypy._cplogging.LogManager.error = custom_cherrypy_error
# reconfigure cherrypy.access log message format
# Default access_log_format '{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"'
# h - remote.name or remote.ip, l - "-",
# u - getattr(request, 'login', None) or '-', t - self.time(),
# r - request.request_line, s - status,
# b - dict.get(outheaders, 'Content-Length', '') or '-',
# f - dict.get(inheaders, 'Referer', ''),
# a - dict.get(inheaders, 'User-Agent', ''),
# o - dict.get(inheaders, 'Host', '-'),
# i - request.unique_id, z - LazyRfc3339UtcTime()
cherrypy._cplogging.LogManager.access_log_format = (
'{h} ACCESS "{r}" code {s}, bytes {b}, user-agent "{a}"'
)
dict_config(CMAPI_LOG_CONF_PATH)
def change_loggers_level(level: str):
"""Set level for each custom logger except cherrypy library.
:param level: logging level to set
:type level: str
"""
loggers = [
logging.getLogger(name) for name in logging.root.manager.loggerDict
if 'cherrypy' not in name
]
loggers.append(logging.getLogger()) # add RootLogger
for logger in loggers:
logger.setLevel(level)

View File

View File

@ -0,0 +1,29 @@
import logging
from typing import Optional
from cmapi_server.constants import VERSION_PATH
class AppManager:
started: bool = False
version: Optional[str] = None
@classmethod
def get_version(cls) -> str:
"""Get CMAPI version.
:return: cmapi version
:rtype: str
"""
if cls.version:
return cls.version
with open(VERSION_PATH, encoding='utf-8') as version_file:
version = '.'.join([
i.strip().split('=')[1]
for i in version_file.read().splitlines() if i
])
if not version:
logging.error('Couldn\'t detect version from VERSION file!')
version = 'Undefined'
cls.version = version
return cls.version

View File

@ -0,0 +1,439 @@
from __future__ import annotations
import logging
import os.path
import socket
from time import sleep
import psutil
from cmapi_server.exceptions import CMAPIBasicError
from cmapi_server.constants import MCS_INSTALL_BIN, ALL_MCS_PROGS
from cmapi_server.process_dispatchers.systemd import SystemdDispatcher
from cmapi_server.process_dispatchers.container import (
ContainerDispatcher
)
from mcs_node_control.models.dbrm import DBRM
from mcs_node_control.models.dbrm_socket import SOCK_TIMEOUT
from mcs_node_control.models.misc import get_workernodes
from mcs_node_control.models.process import Process
PROCESS_DISPATCHERS = {
'systemd': SystemdDispatcher,
# could be used in docker containers and OSes w/o systemd
'container': ContainerDispatcher,
}
PRIMARY_PROGS = ('controllernode', 'DMLProc', 'DDLProc')
class MCSProcessManager:
"""Class to run process operations.
e.g. re/-start or stop systemd services, run executable.
"""
CONTROLLER_MAX_RETRY = 30
mcs_progs = {}
mcs_version_info = None
dispatcher_name = None
process_dispatcher = None
@classmethod
def _get_prog_name(cls, name: str) -> str:
"""Get proper service name for systemd or non-systemd installations.
:param name: service name
:type name: str
:return: correct service name
:rtype: str
"""
if cls.dispatcher_name == 'systemd':
return ALL_MCS_PROGS[name].service_name
return name
@classmethod
def _get_sorted_progs(
cls, is_primary: bool, reverse: bool = False
) -> dict:
"""Get sorted services dict.
:param is_primary: is primary node or not
:type is_primary: bool
:param reverse: reverse sort order, defaults to False
:type reverse: bool, optional
:return: dict with sorted services in correct start/stop order
:rtype: dict
"""
unsorted_progs: dict
if is_primary:
unsorted_progs = cls.mcs_progs
else:
unsorted_progs = {
prog_name: prog_info
for prog_name, prog_info in cls.mcs_progs.items()
if prog_name not in PRIMARY_PROGS
}
if reverse:
# stop sequence builds using stop_priority property
return dict(
sorted(
unsorted_progs.items(),
key=lambda item: item[1].stop_priority,
)
)
# start up sequence is a dict default sequence
return unsorted_progs
@classmethod
def _detect_processes(cls) -> None:
"""Detect existing mcs services. Depends on MCS version."""
if cls.mcs_progs:
logging.warning('Mcs ProcessHandler already detected processes.')
for prog_name, prog_info in ALL_MCS_PROGS.items():
if os.path.exists(os.path.join(MCS_INSTALL_BIN, prog_name)):
cls.mcs_progs[prog_name] = prog_info
@classmethod
def detect(cls, dispatcher_name: str, dispatcher_path: str = None) -> None:
"""Detect mcs version info and installed processes.
:param dispatcher_name: process dispatcher name
:type dispatcher_name: str
:param dispatcher_path: path to custom dispatcher,
for next releases, defaults to None
:type dispatcher_path: str, optional
:raises CMAPIBasicError: if custom dispatcher path doesn't exists
:raises CMAPIBasicError: Not implemented custom dispatcher error
"""
cls._detect_processes()
# detect mcs version info by processes
if len(cls.mcs_progs) == 8:
cls.mcs_version_info = '6.4.x and lower'
elif len(cls.mcs_progs) == 7 and 'ExeMgr' not in cls.mcs_progs:
cls.mcs_version_info = '22.08.x and higher'
else:
cls.mcs_version_info = 'Undefined'
logging.warning(
'MCS version haven\'t been detected properly.'
'Please try to update your CMAPI version or contact support.'
)
logging.info(
f'Detected {len(cls.mcs_progs)} MCS services.'
f'MCS version is {cls.mcs_version_info}'
)
# TODO: For next releases. Do we really need custom dispatchers?
if dispatcher_name not in PROCESS_DISPATCHERS:
logging.warning(
f'Custom process dispatcher with name "{dispatcher_name}" '
f'and path "{dispatcher_path}" used.'
)
if not dispatcher_path or not os.path.exists(dispatcher_path):
err_msg = 'Wrong dispatcher path in cmapi_config file.'
logging.error(err_msg)
raise CMAPIBasicError(err_msg)
cls.dispatcher_name = 'custom'
raise CMAPIBasicError('Custom dispatchers yet not implemented!')
cls.dispatcher_name = dispatcher_name
cls.process_dispatcher = PROCESS_DISPATCHERS[dispatcher_name]
cls.process_dispatcher.init()
@classmethod
def _wait_for_workernodes(cls) -> bool:
"""Wait for workernodes processes.
Waiting for all workernodes to come up before starting
controllernode on a primary.
:return: True on success
:rtype: bool
"""
logging.debug(
'Waiting for all workernodes to come up before starting '
'controllernode on a primary.'
)
workernodes = get_workernodes()
attempts = cls.CONTROLLER_MAX_RETRY
while attempts > 0 and len(workernodes) > 0:
logging.debug(f'Waiting for "{list(workernodes)}"....{attempts}')
# creating a separated list with workernode names
# for safe deleting items from source dict
for name in list(workernodes):
try:
sock = socket.socket(
socket.AF_INET, socket.SOCK_STREAM
)
sock.settimeout(SOCK_TIMEOUT)
sock.connect(
(
workernodes[name]['IPAddr'],
workernodes[name]['Port']
)
)
except socket.timeout:
logging.debug(
f'"{name}" {workernodes[name]["IPAddr"]}:'
f'{workernodes[name]["Port"]} not started yet.'
)
else:
# delete started workernode from workernodes dict
del workernodes[name]
finally:
sock.close()
attempts -= 1
if workernodes:
logging.error(
f'Some workernodes: "{workernodes}" are not reachable after '
f'{cls.CONTROLLER_MAX_RETRY} attempts to connect with '
f'{SOCK_TIMEOUT} seconds timeout.'
'Starting mcs-controllernode anyway.'
)
return False
return True
@classmethod
def _wait_for_controllernode(cls) -> bool:
"""Waiting for controllernode to come up on a primary.
:return: True on success
:rtype: bool
"""
logging.debug(
'Waiting for controllernode to come up before starting '
'ddlproc/dmlproc on non-primary nodes.'
)
attempts = cls.CONTROLLER_MAX_RETRY
success = False
while attempts > 0:
try:
with DBRM():
# check connection
success = True
except (ConnectionRefusedError, RuntimeError, socket.error):
logging.info(
'Cannot establish connection to controllernode.'
f'Controller node still not started. Waiting...{attempts}'
)
else:
break
attempts -= 1
if not success:
logging.error(
'Controllernode is not reachable after '
f'{cls.CONTROLLER_MAX_RETRY} attempts to connect with '
f'{SOCK_TIMEOUT} seconds timeout.'
'Starting mcs-dmlproc/mcs-ddlproc anyway.'
)
return False
return True
@classmethod
def _wait_for_DMLProc_stop(cls, timeout: int = 10) -> bool:
"""Waiting DMLProc process to stop.
:param timeout: timeout to wait, defaults to 10
:type timeout: int, optional
:return: True on success
:rtype: bool
"""
logging.info(f'Waiting for DMLProc to stop in {timeout} seconds')
dmlproc_stopped = False
while timeout > 0:
logging.info(
f'Waiting for DMLProc to stop. Seconds left {timeout}.'
)
if not Process.check_process_alive('DMLProc'):
logging.info('DMLProc gracefully stopped by DBRM command.')
dmlproc_stopped = True
break
sleep(1)
timeout -= 1
else:
logging.error(
f'DMLProc did not stopped gracefully by DBRM command within '
f'{timeout} seconds. Will be stopped directly.'
)
return dmlproc_stopped
@classmethod
def noop(cls, *args, **kwargs):
"""No operation. TODO: looks like useless."""
cls.process_dispatcher.noop()
@classmethod
def start(cls, name: str, is_primary: bool, use_sudo: bool) -> bool:
"""Start mcs process.
:param name: mcs process name
:type name: str
:param is_primary: is node primary or not
:type is_primary: bool
:param use_sudo: use sudo or not
:type use_sudo: bool
:return: True if process started successfully
:rtype: bool
"""
return cls.process_dispatcher.start(
cls._get_prog_name(name), is_primary, use_sudo
)
@classmethod
def stop(
cls, name: str, is_primary: bool, use_sudo: bool, timeout: int = 10
) -> bool:
"""Stop mcs process.
:param name: mcs process name
:type name: str
:param is_primary: is node primary or not
:type is_primary: bool
:param use_sudo: use sudo or not
:type use_sudo: bool
:param timeout: timeout for DMLProc gracefully stop using DBRM, seconds
:type timeout: int
:return: True if process stopped successfully
:rtype: bool
"""
# TODO: do we need here force stop DMLProc as a method argument?
if is_primary and name == 'DMLProc':
logging.info(
'Trying to gracefully stop DMLProc using DBRM commands.'
)
try:
with DBRM() as dbrm:
dbrm.set_system_state(
['SS_ROLLBACK', 'SS_SHUTDOWN_PENDING']
)
except (ConnectionRefusedError, RuntimeError):
logging.error(
'Cannot set SS_ROLLBACK and SS_SHUTDOWN_PENDING '
'using DBRM while trying to gracefully auto stop DMLProc.'
'Continue with a regular stop method.'
)
# stop DMLProc using regular signals or systemd
return cls.process_dispatcher.stop(
cls._get_prog_name(name), is_primary, use_sudo
)
# DMLProc gracefully stopped using DBRM commands otherwise
# continue with a regular stop method
if cls._wait_for_DMLProc_stop(timeout):
return True
return cls.process_dispatcher.stop(
cls._get_prog_name(name), is_primary, use_sudo
)
@classmethod
def restart(cls, name: str, is_primary: bool, use_sudo: bool) -> bool:
"""Restart mcs process.
:param name: mcs process name
:type name: str
:param is_primary: is node primary or not
:type is_primary: bool
:param use_sudo: use sudo or not
:type use_sudo: bool
:return: True if process restarted successfully
:rtype: bool
"""
return cls.process_dispatcher.restart(
cls._get_prog_name(name), is_primary, use_sudo
)
@classmethod
def get_running_mcs_procs(cls) -> list[dict]:
"""Search for mcs processes.
The method returns PIDs of MCS services in both container or systemd
environments.
:return: list of dicts with name and pid of mcs process
:rtype: list[dict]
"""
return [
{'name': proc.name(), 'pid': proc.pid}
for proc in psutil.process_iter(['pid', 'name'])
if proc.name() in cls.mcs_progs
]
@classmethod
def is_node_processes_ok(
cls, is_primary: bool, node_stopped: bool
) -> bool:
"""Check if needed processes exists or not.
:param is_primary: is node primary or not
:type is_primary: bool
:param node_stopped: is node stopped or started
:type node_stopped: bool
:return: True if there are expected value of processes, else False
:rtype: bool
..NOTE: For next releases. Now only used in tests.
"""
running_procs = cls.get_running_mcs_procs()
if node_stopped:
return len(running_procs) == 0
node_progs = cls._get_sorted_progs(is_primary)
return set(node_progs) == set(p['name'] for p in running_procs)
@classmethod
def start_node(cls, is_primary: bool, use_sudo: bool = True):
"""Start mcs node processes.
:param is_primary: is node primary or not, defaults to True
:type is_primary: bool
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:raises CMAPIBasicError: immediately if one mcs process not started
"""
for prog_name in cls._get_sorted_progs(is_primary):
if (
cls.dispatcher_name == 'systemd'
and prog_name == 'StorageManager'
):
# TODO: MCOL-5458
logging.info(
f'Skip starting {prog_name} with systemd dispatcher.'
)
continue
# TODO: additional error handling
if prog_name == 'controllernode':
cls._wait_for_workernodes()
if prog_name in ('DMLProc', 'DDLProc'):
cls._wait_for_controllernode()
if not cls.start(prog_name, is_primary, use_sudo):
logging.error(f'Process "{prog_name}" not started properly.')
raise CMAPIBasicError(f'Error while starting "{prog_name}".')
@classmethod
def stop_node(
cls, is_primary: bool, use_sudo: bool = True, timeout: int = 10
):
"""Stop mcs node processes.
:param is_primary: is node primary or not, defaults to True
:type is_primary: bool
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:param timeout: timeout for DMLProc gracefully stop using DBRM, seconds
:type timeout: int
:raises CMAPIBasicError: immediately if one mcs process not stopped
"""
# Every time try to stop all processes no matter primary it or slave,
# so use full available list of processes. Otherwise, it could cause
# undefined behaviour when primary gone and then recovers (failover
# triggered 2 times).
for prog_name in cls._get_sorted_progs(True, reverse=True):
if not cls.stop(prog_name, is_primary, use_sudo):
logging.error(f'Process "{prog_name}" not stopped properly.')
raise CMAPIBasicError(f'Error while stopping "{prog_name}"')
@classmethod
def restart_node(cls, is_primary: bool, use_sudo: bool):
"""TODO: For next releases."""
if cls.get_running_mcs_procs():
cls.stop_node(is_primary, use_sudo)
cls.start_node(is_primary, use_sudo)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,146 @@
"""Module contains base process dispatcher class implementation.
Formally this is must have interface for subclasses.
"""
import logging
import os
import shlex
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, TextIO, Tuple
from cmapi_server.constants import MCS_INSTALL_BIN, MCS_LOG_PATH
class BaseDispatcher:
"""Class with base interfaces for dispatchers."""
@staticmethod
def _create_mcs_process_logfile(filename: str) -> str:
"""Create log file by name.
:param filename: log filename
:type filename: str
:return: full path of created log file
:rtype: str
"""
log_fullpath = os.path.join(MCS_LOG_PATH, filename)
Path(log_fullpath).touch(mode=666)
return log_fullpath
@staticmethod
def exec_command(
command: str, daemonize: bool = False, silent: bool = False,
stdout: TextIO = subprocess.PIPE, env: Optional[Dict] = None
) -> Tuple[bool, str]:
"""Run command using subprocess.
:param command: command to run
:type command: str
:param daemonize: run command in detached mode, defaults to False
:type daemonize: bool, optional
:param silent: prevent error logs on non-zero exit status,
defaults to False
:type silent: bool, optional
:param stdout: stdout argument for Popen, defaults to subprocess.STDOUT
:type stdout: TextIO, optional
:param env: environment argument for Popen, defaults to None
:type env: Optional[Dict], optional
:return: tuple with success status and output string from subprocess,
if there are multiple lines in output they should be splitted
:rtype: Tuple[bool, str]
"""
output: str = ''
result: Tuple = (False, output)
try:
proc = subprocess.Popen(
shlex.split(command),
stdout=stdout,
stderr=subprocess.STDOUT,
start_new_session=daemonize,
env=env,
encoding='utf-8'
)
except Exception:
logging.error(f'Failed on run command "{command}".', exc_info=True)
# TODO: cmapi have to close with exception here
# to stop docker container?
# raise
return result
if daemonize:
# remove Popen object. optionally gc.collect could be invoked.
# this is made to prevent eventually spawning duplicated "defunct"
# (zombie) python parented processes. This could happened
# previously after cluster restart. It didn't affects cluster
# condition, only makes "mcs cluster status" command output
# confusing and ugly.
del proc
result = (True, output)
else:
logging.debug('Waiting command to finish.')
stdout_str, _ = proc.communicate()
returncode = proc.wait()
if stdout_str is not None:
# output guaranteed to be empty string not None
output = stdout_str
result = (True, output)
if returncode != 0:
if not silent:
logging.error(
f'Calling "{command}" finished with return code: '
f'"{returncode}" and stderr+stdout "{output}".'
)
result = (False, output)
return result
@classmethod
def _run_dbbuilder(cls, use_su=False) -> None:
# attempt to run dbbuilder on primary node
# e.g., s3 was setup after columnstore install
logging.info('Attempt to run dbbuilder on primary node')
dbbuilder_path = os.path.join(MCS_INSTALL_BIN, 'dbbuilder')
dbbuilder_arg = '7'
dbb_command = f'{dbbuilder_path} {dbbuilder_arg}'
if use_su:
# TODO: move mysql user to constants
dbb_command = f'su -s /bin/sh -c "{dbb_command}" mysql'
dbb_log_path = cls._create_mcs_process_logfile('dbbuilder.log')
with open(dbb_log_path, 'a', encoding='utf-8') as dbb_log_fh:
dbb_start_time = datetime.now().strftime('%d/%b/%Y %H:%M:%S')
dbb_log_fh.write(f'-----Started at {dbb_start_time}.-----\n')
# TODO: error handling?
# check if exist for next releases?
success, _ = cls.exec_command(dbb_command, stdout=dbb_log_fh)
dbb_log_fh.write('-----Finished run.-----\n\n')
@classmethod
def init(cls):
"""Method for dispatcher initialisation."""
pass
@classmethod
def is_service_running(cls, service: str, use_sudo: bool) -> bool:
"""Check if systemd proceess/service is running."""
raise NotImplementedError
@classmethod
def start(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
"""Start process/service."""
raise NotImplementedError
@classmethod
def stop(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
"""Stop process/service."""
raise NotImplementedError
@classmethod
def restart(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
"""Restart process/service."""
raise NotImplementedError
@classmethod
def reload(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
"""Reload process/service."""
raise NotImplementedError

View File

@ -0,0 +1,294 @@
"""
Module contains non-systemd/container process dispatcher class implementation.
"""
import logging
import os.path
import re
from pathlib import Path
from time import sleep
import psutil
from cmapi_server.constants import (
IFLAG, LIBJEMALLOC_DEFAULT_PATH, MCS_INSTALL_BIN, ALL_MCS_PROGS
)
from cmapi_server.exceptions import CMAPIBasicError
from cmapi_server.process_dispatchers.base import BaseDispatcher
class ContainerDispatcher(BaseDispatcher):
"""Manipulates processes in docker container.
It's possible to use in any OS/container environment in cases when
we don't want to use systemd or don't have it.
"""
libjemalloc_path = None
@staticmethod
def _set_iflag():
"""Create IFLAG file.
Means Columnstore container init finished.
"""
Path(IFLAG).touch()
@classmethod
def _get_proc_object(cls, name: str) -> psutil.Process:
"""Getting psutil Process object by service name.
:param name: process name
:type name: str
:raises psutil.NoSuchProcess: if no process with such name presented
:return: Process object with specified name
:rtype: psutil.Process
...TODO: add types-psutil to requirements for mypy checks
"""
for proc in psutil.process_iter(['pid', 'name', 'username']):
if proc.name().lower() == name.lower():
return proc
raise psutil.NoSuchProcess(pid=None, name=name)
@classmethod
def get_libjemalloc_path(cls) -> str:
"""Get libjemalloc.so path.
:raises CMAPIBasicError: raises if ldconfig execution returned non zero
:raises FileNotFoundError: if no libjemalloc.so.2 found
:return: libjemalloc.so.2 path
:rtype: str
"""
logger = logging.getLogger('container_sh')
if cls.libjemalloc_path:
return cls.libjemalloc_path
# pylint: disable=line-too-long
# for reference: https://github.com/pyinstaller/pyinstaller/blob/f29b577df4e1659cf65aacb797034763308fd298/PyInstaller/depend/utils.py#L304
splitlines_count = 1
pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)')
success, result = cls.exec_command('ldconfig -p')
if not success:
raise CMAPIBasicError('Failed executing ldconfig.')
text = result.strip().splitlines()[splitlines_count:]
for line in text:
# this assumes library names do not contain whitespace
p_match = pattern.match(line)
# Sanitize away any abnormal lines of output.
if p_match is None:
continue
lib_path = p_match.groups()[-1]
lib_name = p_match.group(1)
if 'libjemalloc' in lib_name:
# use the first entry
# TODO: do we need path or name here?
# $(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
cls.libjemalloc_path = lib_path
break
if not cls.libjemalloc_path:
if not os.path.exists(LIBJEMALLOC_DEFAULT_PATH):
logger.error('No libjemalloc.so.2 found.')
raise FileNotFoundError
cls.libjemalloc_path = LIBJEMALLOC_DEFAULT_PATH
return cls.libjemalloc_path
@classmethod
def is_service_running(cls, service: str, use_sudo: bool = True) -> bool:
"""Check if mcs process is running.
:param service: service name
:type service: str
:param use_sudo: interface requirement, unused here, defaults to True
:type use_sudo: bool, optional
:return: True if service is running, otherwise False
:rtype: bool
"""
try:
cls._get_proc_object(service)
except psutil.NoSuchProcess:
return False
return True
@staticmethod
def _make_cmd(service: str) -> str:
"""Make shell command by service name.
:param service: service name
:type service: str
:return: command with arguments if needed
:rtype: str
"""
service_info = ALL_MCS_PROGS[service]
command = os.path.join(MCS_INSTALL_BIN, service)
if service_info.subcommand:
subcommand = service_info.subcommand
command = f'{command} {subcommand}'
return command
@classmethod
def start(
cls, service: str, is_primary: bool, use_sudo: bool = True
) -> bool:
"""Start process in docker container.
:param service: process name
:type service: str
:param is_primary: is node primary or not
:type is_primary: bool, optional
:param use_sudo: interface required, unused here, defaults to True
:type use_sudo: bool, optional
:return: True if service started successfully
:rtype: bool
"""
logger = logging.getLogger('container_sh')
if cls.is_service_running(service):
return True
logger.debug(f'Starting {service}')
env_vars = {"LD_PRELOAD": cls.get_libjemalloc_path()}
command = cls._make_cmd(service)
if service == 'workernode':
# workernode starts on primary and non primary node with 1 or 2
# added to the end of argument:
# DBRM_Worker1 - on primary, DBRM_Worker2 - non primary
command = command.format(1 if is_primary else 2)
# start mcs-loadbrm.py before workernode
logger.debug('Waiting to load BRM.')
loadbrm_path = os.path.join(MCS_INSTALL_BIN, 'mcs-loadbrm.py')
loadbrm_logpath = cls._create_mcs_process_logfile(
'mcs-loadbrm.log'
)
with open(loadbrm_logpath, 'a', encoding='utf-8') as loadbrm_logfh:
success, _ = cls.exec_command(
f'{loadbrm_path} no', stdout=loadbrm_logfh, env=env_vars
)
if not success:
logger.error('Error while loading BRM.')
else:
logger.debug('Successfully loaded BRM.')
service_log_path = cls._create_mcs_process_logfile(
f'{service.lower()}.log'
)
success, _ = cls.exec_command(
command, daemonize=True,
stdout=open(service_log_path, 'a', encoding='utf-8'),
env=env_vars
)
# TODO: any other way to detect service finished its initialisation?
sleep(ALL_MCS_PROGS[service].delay)
logger.debug(f'Started "{service}".')
if is_primary and service == 'DDLProc':
cls._run_dbbuilder()
return cls.is_service_running(service)
@classmethod
def stop(
cls, service: str, is_primary: bool, use_sudo: bool = True
) -> bool:
"""Stop process in docker container.
:param service: process name
:type service: str
:param is_primary: is node primary or not
:type is_primary: bool, optional
:param use_sudo: interface required, unused here, defaults to True
:type use_sudo: bool, optional
:return: True if service started successfully
:rtype: bool
"""
logger = logging.getLogger('container_sh')
if not cls.is_service_running(service):
return True
logger.debug(f'Stopping {service}')
service_proc = cls._get_proc_object(service)
if service == 'workernode':
# start mcs-savebrm.py before stoping workernode
logger.debug('Waiting to save BRM.')
savebrm_path = os.path.join(MCS_INSTALL_BIN, 'mcs-savebrm.py')
savebrm_logpath = cls._create_mcs_process_logfile(
'mcs-savebrm.log'
)
with open(savebrm_logpath, 'a', encoding='utf-8') as savebrm_logfh:
success, _ = cls.exec_command(
savebrm_path, stdout=savebrm_logfh
)
if not success:
logger.error('Error while saving BRM.')
else:
logger.debug('Successfully saved BRM.')
logger.debug('Start clearing SHM.')
clearshm_path = os.path.join(MCS_INSTALL_BIN, 'clearShm')
success, _ = cls.exec_command(clearshm_path)
if not success:
logger.error('Error while clearing SHM.')
else:
logger.debug('Successfully cleared SHM.')
service_proc.terminate()
# timeout got from old container.sh
# TODO: this is still not enough for controllernode process
# it should be always stop by SIGKILL, need to investigate.
timeout = 3
if service == 'StorageManager':
timeout = 300 # 5 minutes
logger.debug(f'Waiting to gracefully stop "{service}".')
# This function will return as soon as all processes terminate
# or when timeout (seconds) occurs.
gone, alive = psutil.wait_procs([service_proc], timeout=timeout)
if alive:
logger.debug(
f'{service} not terminated with SIGTERM, sending SIGKILL.'
)
# only one process could be in a list
alive[0].kill()
gone, alive = psutil.wait_procs([service_proc], timeout=timeout)
if gone:
logger.debug(f'Successfully killed "{service}".')
else:
logger.warning(
f'Service "{service}" still alive after sending "kill -9" '
f'and waiting {timeout} seconds.'
)
else:
logger.debug(f'Gracefully stopped "{service}".')
return not cls.is_service_running(service)
@classmethod
def restart(
cls, service: str, is_primary: bool, use_sudo: bool = True
) -> bool:
"""Restart process in docker container.
:param service: process name
:type service: str
:param is_primary: is node primary or not
:type is_primary: bool, optional
:param use_sudo: interface required, unused here, defaults to True
:type use_sudo: bool, optional
:return: True if service started successfully
:rtype: bool
...TODO: for next releases. Additional error handling.
"""
if cls.is_service_running(service):
# TODO: retry?
stop_success = cls.stop(service, is_primary, use_sudo)
start_success = cls.start(service, is_primary, use_sudo)
return stop_success and start_success

View File

@ -0,0 +1,231 @@
"""Module contains systemd process dispatcher class implementation."""
import logging
import re
from typing import Union, Tuple
from cmapi_server.process_dispatchers.base import BaseDispatcher
class SystemdDispatcher(BaseDispatcher):
"""Manipulates with systemd services."""
systemctl_version: int = 219 #CentOS 7 version
@classmethod
def _systemctl_call(
cls, command: str, service: str, use_sudo: bool = True,
return_output=False, *args, **kwargs
) -> Union[Tuple[bool, str], bool]:
"""Run "systemctl" with arguments.
:param command: command for systemctl
:type command: str
:param service: systemd service name
:type service: str
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:return: return status of operation, True if success, otherwise False
:rtype: Union[Tuple[bool, str], bool]
"""
cmd = f'systemctl {command} {service}'
if use_sudo:
cmd = f'sudo {cmd}'
logging.debug(f'Call "{command}" on service "{service}" with "{cmd}".')
success, output = cls.exec_command(cmd, *args, **kwargs)
if return_output:
return success, output
return success
@classmethod
def init(cls):
cmd = 'systemctl --version'
success, output = cls.exec_command(cmd)
if success:
# raw result will be like
# "systemd 239 (245.4-4ubuntu3.17)\n <string with compile flags>"
cls.systemctl_version = int(
re.search(r'systemd (\d+)', output).group(1)
)
logging.info(f'Detected {cls.systemctl_version} SYSTEMD version.')
else:
logging.error('Couldn\'t detect SYSTEMD version')
@classmethod
def is_service_running(cls, service: str, use_sudo: bool = True) -> bool:
"""Check if systemd service is running.
:param service: service name
:type service: str, optional
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:return: True if service is running, otherwise False
:rtype: bool
..Note:
Not working with multiple services at a time.
"""
logging.debug(f'Checking "{service}" is running.')
# TODO: remove conditions below when we'll drop CentOS 7 support
cmd = 'show -p ActiveState --value'
if cls.systemctl_version < 230: # not supported --value in old version
cmd = 'show -p ActiveState'
_, output = cls._systemctl_call(
cmd,
service, use_sudo, return_output=True
)
service_state = output.strip()
if cls.systemctl_version < 230: # result like 'ActiveState=active'
service_state = service_state.split('=')[1]
logging.debug(f'Service "{service}" is in "{service_state}" state')
# interpret non "active" state as not running service
if service_state == 'active':
return True
# output could be inactive, activating or even empty if
# command execution was unsuccessfull
return False
@staticmethod
def _workernode_get_service_name(is_primary: bool) -> str:
"""Get proper workernode service name based on primary status.
:param is_primary: is node where we running primary?
:type is_primary: bool
:return: correct workernode service name
:rtype: str
"""
service = 'mcs-workernode'
return f'{service}@1.service' if is_primary else f'{service}@2.service'
@classmethod
def _workernode_enable(cls, enable: bool, use_sudo: bool = True) -> None:
"""Enable workernode service.
:param enable: enable or disable
:type enable: bool
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
"""
sub_cmd = 'enable' if enable else 'disable'
service = 'mcs-workernode@1.service'
if not cls._systemctl_call(sub_cmd, service, use_sudo):
# enabling\disabling service is not critical, just log failure
logging.warning(f'Failed to {sub_cmd} {service}')
@classmethod
def start(
cls, service: str, is_primary: bool = True, use_sudo: bool = True
) -> bool:
"""Start systemd service.
:param service: service name
:type service: str, optional
:param is_primary: is node primary or not
:type is_primary: bool, optional
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:return: True if service started successfully
:rtype: bool
"""
service_name = service
if service_name == 'mcs-workernode':
service_name = cls._workernode_get_service_name(is_primary)
if is_primary:
cls._workernode_enable(True, use_sudo)
if cls.is_service_running(service_name, use_sudo):
return True
logging.debug(f'Starting "{service_name}".')
if not cls._systemctl_call('start', service_name, use_sudo):
logging.error(f'Failed while starting "{service_name}".')
return False
if is_primary and service == 'mcs-ddlproc':
cls._run_dbbuilder(use_su=True)
logging.debug(f'Successfully started {service_name}.')
return cls.is_service_running(service_name, use_sudo)
@classmethod
def stop(
cls, service: str, is_primary: bool = True, use_sudo: bool = True
) -> bool:
"""Stop systemd service.
:param service: service name
:type service: str, optional
:param is_primary: is node primary or not
:type is_primary: bool, optional
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:return: True if service stopped successfully
:rtype: bool
"""
service_name = service
if service_name == 'mcs-workernode':
service_name = f'{service_name}@1.service {service_name}@2.service'
cls._workernode_enable(False, use_sudo)
logging.debug(f'Stopping "{service_name}".')
if not cls._systemctl_call('stop', service_name, use_sudo):
logging.error(f'Failed while stopping "{service_name}".')
return False
return not cls.is_service_running(service, use_sudo)
@classmethod
def restart(
cls, service: str, is_primary: bool = True, use_sudo: bool = True
) -> bool:
"""Restart systemd service.
:param service: service name
:type service: str, optional
:param is_primary: is node primary or not, defaults to True
:type is_primary: bool, optional
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:return: True if service restarted successfully
:rtype: bool
"""
service_name = service
if service_name == 'mcs-workernode':
service_name = cls._workernode_get_service_name(is_primary)
logging.debug(f'Restarting "{service_name}".')
if not cls._systemctl_call('restart', service_name, use_sudo):
logging.error(f'Failed while restarting "{service_name}".')
return False
return cls.is_service_running(service, use_sudo)
@classmethod
def reload(
cls, service: str, is_primary: bool = True, use_sudo: bool=True
) -> bool:
"""Reload systemd service.
:param service: service name, defaults to 'Unknown_service'
:type service: str, optional
:param is_primary: is node primary or not, defaults to True
:type is_primary: bool, optional
:param use_sudo: use sudo or not, defaults to True
:type use_sudo: bool, optional
:return: True if service reloaded successfully
:rtype: bool
..NOTE: For next releases. It should become important when we teach
MCS to add/remove nodes w/o whole cluster restart.
Additional error handling?
"""
service_name = service
if service_name == 'mcs-workernode':
service_name = cls._workernode_get_service_name(is_primary)
logging.debug(f'Reloading "{service_name}".')
if not cls._systemctl_call('reload', service_name, use_sudo):
logging.error(f'Failed while reloading "{service_name}".')
return False
return not cls.is_service_running(service, use_sudo)

View File

@ -0,0 +1,501 @@
<Columnstore Version="V1.0.0">
<!--
WARNING: Do not make changes to this file unless directed to do so by
MariaDB service engineers. Incorrect settings can render your system
unusable and will require a service call to correct.
-->
<ExeMgr1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8601</Port>
<Module>unassigned</Module>
</ExeMgr1>
<JobProc>
<IPAddr>0.0.0.0</IPAddr>
<Port>8602</Port>
</JobProc>
<ProcMgr>
<IPAddr>127.0.0.1</IPAddr>
<Port>8603</Port>
</ProcMgr>
<ProcMgr_Alarm>
<IPAddr>127.0.0.1</IPAddr>
<Port>8606</Port>
</ProcMgr_Alarm>
<ProcStatusControl>
<IPAddr>127.0.0.1</IPAddr>
<Port>8604</Port>
</ProcStatusControl>
<ProcStatusControlStandby>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcStatusControlStandby>
<!-- Disabled
<ProcHeartbeatControl>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcHeartbeatControl>
-->
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
<localhost_ProcessMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8800</Port>
</localhost_ProcessMonitor>
<dm1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</dm1_ProcessMonitor>
<um1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</um1_ProcessMonitor>
<pm1_ProcessMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8800</Port>
</pm1_ProcessMonitor>
<dm1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</dm1_ServerMonitor>
<um1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</um1_ServerMonitor>
<pm1_ServerMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8622</Port>
</pm1_ServerMonitor>
<pm1_WriteEngineServer>
<IPAddr>127.0.0.1</IPAddr>
<Port>8630</Port>
</pm1_WriteEngineServer>
<DDLProc>
<IPAddr>127.0.0.1</IPAddr>
<Port>8612</Port>
</DDLProc>
<DMLProc>
<IPAddr>127.0.0.1</IPAddr>
<Port>8614</Port>
</DMLProc>
<BatchInsert>
<RowsPerBatch>10000</RowsPerBatch>
</BatchInsert>
<PrimitiveServers>
<Count>1</Count>
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
<ProcessorThreshold>128</ProcessorThreshold>
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
<DebugLevel>0</DebugLevel>
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
<PrefetchThreshold>1</PrefetchThreshold>
<PTTrace>0</PTTrace>
<RotatingDestination>n</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
<DirectIO>y</DirectIO>
<HighPriorityPercentage/>
<MediumPriorityPercentage/>
<LowPriorityPercentage/>
</PrimitiveServers>
<PMS1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS1>
<PMS2>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS2>
<PMS3>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS3>
<PMS4>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS4>
<PMS5>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS5>
<PMS6>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS6>
<PMS7>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS7>
<PMS8>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS8>
<PMS9>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS9>
<PMS10>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS10>
<PMS11>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS11>
<PMS12>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS12>
<PMS13>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS13>
<PMS14>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS14>
<PMS15>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS15>
<PMS16>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS16>
<PMS17>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS17>
<PMS18>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS18>
<PMS19>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS19>
<PMS20>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS20>
<PMS21>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS21>
<PMS22>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS22>
<PMS23>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS23>
<PMS24>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS24>
<PMS25>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS25>
<PMS26>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS26>
<PMS27>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS27>
<PMS28>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS28>
<PMS29>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS29>
<PMS30>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS30>
<PMS31>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS31>
<PMS32>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS32>
<SystemConfig>
<SystemName>columnstore-1</SystemName>
<ParentOAMModuleName>pm1</ParentOAMModuleName>
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
<!-- Warning: Do not change this value once database is built -->
<DBRootCount>1</DBRootCount>
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
<DBRMTimeOut>15</DBRMTimeOut> <!-- in seconds -->
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
<DataFileLog>OFF</DataFileLog>
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
-->
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
files are left behind. -->
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
</SystemConfig>
<SystemModuleConfig>
<ModuleType1>dm</ModuleType1>
<ModuleDesc1>Director Module</ModuleDesc1>
<ModuleCount1>0</ModuleCount1>
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
<ModuleType2>um</ModuleType2>
<ModuleDesc2>User Module</ModuleDesc2>
<ModuleCount2>0</ModuleCount2>
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
<ModuleType3>pm</ModuleType3>
<ModuleDesc3>Performance Module</ModuleDesc3>
<ModuleCount3>1</ModuleCount3>
<ModuleIPAddr1-1-3>127.0.0.1</ModuleIPAddr1-1-3>
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
</SystemModuleConfig>
<SystemExtDeviceConfig>
<Count>0</Count>
<Name1>unassigned</Name1>
<IPAddr1>0.0.0.0</IPAddr1>
<DisableState1>ENABLED</DisableState1>
</SystemExtDeviceConfig>
<SessionManager>
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
</SessionManager>
<VersionBuffer>
<!-- VersionBufferFileSize must be a multiple of 8192.
One version buffer file will be put on each DB root. -->
<VersionBufferFileSize>1GB</VersionBufferFileSize>
</VersionBuffer>
<OIDManager>
<!-- Do not change this file after database built -->
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
<!-- Do not change this value after database built -->
<FirstOID>3000</FirstOID>
</OIDManager>
<WriteEngine>
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
<FastDelete>n</FastDelete>
</WriteEngine>
<DBRM_Controller>
<NumWorkers>1</NumWorkers>
<IPAddr>127.0.0.1</IPAddr>
<Port>8616</Port>
</DBRM_Controller>
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
<DBRM_Worker1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8700</Port>
<Module>pm1</Module>
</DBRM_Worker1>
<DBRM_Worker2>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker2>
<DBRM_Worker3>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker3>
<DBRM_Worker4>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker4>
<DBRM_Worker5>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker5>
<DBRM_Worker6>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker6>
<DBRM_Worker7>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker7>
<DBRM_Worker8>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker8>
<DBRM_Worker9>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker9>
<DBRM_Worker10>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker10>
<DBBC>
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
<!-- Alternatively, this can be specified in absolute terms using
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
<!-- <NumBlocksPct>95</NumBlocksPct> -->
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
<IOMTracing>0</IOMTracing>
<BRPTracing>0</BRPTracing>
<ReportFrequency>65536</ReportFrequency>
<MaxOpenFiles>2K</MaxOpenFiles>
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
<FDCacheTrace>0</FDCacheTrace>
<NumBlocksPct>50</NumBlocksPct>
</DBBC>
<Installation>
<ServerTypeInstall>2</ServerTypeInstall>
<PMwithUM>n</PMwithUM>
<MySQLRep>n</MySQLRep>
<DBRootStorageType>internal</DBRootStorageType>
<UMStorageType>internal</UMStorageType>
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
<DataRedundancyNetworkType/>
</Installation>
<ExtentMap>
<!--
WARNING: these can only be changed on an empty system. Once any object has been allocated
it cannot be changed!. Extent size is 8M rows.
-->
<FilesPerColumnPartition>4</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
<BRM_UID>0x0</BRM_UID>
</ExtentMap>
<HashJoin>
<MaxBuckets>128</MaxBuckets>
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
<TotalUmMemory>25%</TotalUmMemory>
<CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin>
<JobList>
<FlushInterval>16K</FlushInterval>
<FifoSize>16</FifoSize>
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
each extent. The default is 16. MaxOutstandingRequests is the size of
the window of work in terms of extents. A value of 20 means there
is 20 extents worth of work for the PMs to process at any given time.
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
as many threads are available across all PMs. -->
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
<!-- MaxOutstandingRequests is going to default to the num of cores available
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
but will be lower bounded by 20 -->
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
<ThreadPoolSize>100</ThreadPoolSize>
</JobList>
<RowAggregation>
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is the number of cores -->
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
</RowAggregation>
<CrossEngineSupport>
<Host>127.0.0.1</Host>
<Port>3306</Port>
<User>root</User>
<Password/>
<TLSCA/>
<TLSClientCert/>
<TLSClientKey/>
</CrossEngineSupport>
<QueryStats>
<Enabled>N</Enabled>
</QueryStats>
<UserPriority>
<Enabled>N</Enabled>
</UserPriority>
<NetworkCompression>
<Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression>
<QueryTele>
<Host>127.0.0.1</Host>
<Port>0</Port>
</QueryTele>
<StorageManager>
<MaxSockets>30</MaxSockets>
<Enabled>N</Enabled>
</StorageManager>
<DataRedundancyConfig>
<DBRoot1PMs/>
</DataRedundancyConfig>
</Columnstore>

View File

@ -0,0 +1,535 @@
<Columnstore Version="V1.0.0">
<!--
WARNING: Do not make changes to this file unless directed to do so by
MariaDB service engineers. Incorrect settings can render your system
unusable and will require a service call to correct.
-->
<ClusterManager>0.0.0.0</ClusterManager>
<ConfigRevision>2</ConfigRevision>
<NextNodeId>3</NextNodeId>
<NextDBRootId>5</NextDBRootId>
<ExeMgr1>
<IPAddr>10.128.0.23</IPAddr>
<Port>8601</Port>
<Module>pm1</Module>
</ExeMgr1>
<JobProc>
<IPAddr>0.0.0.0</IPAddr>
<Port>8602</Port>
</JobProc>
<ProcMgr>
<IPAddr>10.128.0.23</IPAddr>
<Port>8603</Port>
</ProcMgr>
<ProcMgr_Alarm>
<IPAddr>10.128.0.23</IPAddr>
<Port>8606</Port>
</ProcMgr_Alarm>
<ProcStatusControl>
<IPAddr>10.128.0.23</IPAddr>
<Port>8604</Port>
</ProcStatusControl>
<ProcStatusControlStandby>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcStatusControlStandby>
<!-- Disabled
<ProcHeartbeatControl>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcHeartbeatControl>
-->
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
<localhost_ProcessMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8800</Port>
</localhost_ProcessMonitor>
<dm1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</dm1_ProcessMonitor>
<um1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</um1_ProcessMonitor>
<pm1_ProcessMonitor>
<IPAddr>10.128.0.23</IPAddr>
<Port>8800</Port>
</pm1_ProcessMonitor>
<dm1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</dm1_ServerMonitor>
<um1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</um1_ServerMonitor>
<pm1_ServerMonitor>
<IPAddr>10.128.0.23</IPAddr>
<Port>8622</Port>
</pm1_ServerMonitor>
<pm1_WriteEngineServer>
<IPAddr>10.128.0.23</IPAddr>
<Port>8630</Port>
</pm1_WriteEngineServer>
<DDLProc>
<IPAddr>10.128.0.23</IPAddr>
<Port>8612</Port>
</DDLProc>
<DMLProc>
<IPAddr>10.128.0.23</IPAddr>
<Port>8614</Port>
</DMLProc>
<BatchInsert>
<RowsPerBatch>10000</RowsPerBatch>
</BatchInsert>
<PrimitiveServers>
<Count>2</Count>
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
<ProcessorThreshold>128</ProcessorThreshold>
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
<DebugLevel>0</DebugLevel>
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
<PrefetchThreshold>1</PrefetchThreshold>
<PTTrace>0</PTTrace>
<RotatingDestination>n</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
<DirectIO>y</DirectIO>
</PrimitiveServers>
<PMS1>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS1>
<PMS2>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS2>
<PMS3>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS3>
<PMS4>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS4>
<PMS5>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS5>
<PMS6>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS6>
<PMS7>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS7>
<PMS8>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS8>
<PMS9>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS9>
<PMS10>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS10>
<PMS11>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS11>
<PMS12>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS12>
<PMS13>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS13>
<PMS14>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS14>
<PMS15>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS15>
<PMS16>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS16>
<PMS17>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS17>
<PMS18>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS18>
<PMS19>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS19>
<PMS20>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS20>
<PMS21>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS21>
<PMS22>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS22>
<PMS23>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS23>
<PMS24>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS24>
<PMS25>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS25>
<PMS26>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS26>
<PMS27>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS27>
<PMS28>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS28>
<PMS29>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS29>
<PMS30>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS30>
<PMS31>
<IPAddr>10.128.0.23</IPAddr>
<Port>8620</Port>
</PMS31>
<PMS32>
<IPAddr>172.30.0.144</IPAddr>
<Port>8620</Port>
</PMS32>
<SystemConfig>
<SystemLang>C</SystemLang>
<SystemName>columnstore-1</SystemName>
<ParentOAMModuleName>pm1</ParentOAMModuleName>
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
<!-- Warning: Do not change this value once database is built -->
<DBRootCount>2</DBRootCount>
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
<DBRMTimeOut>20</DBRMTimeOut> <!-- in seconds -->
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
<DataFileLog>OFF</DataFileLog>
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
-->
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
files are left behind. -->
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
<DataFilePlugin/>
<DBRoot2>/var/lib/columnstore/data2</DBRoot2>
</SystemConfig>
<SystemModuleConfig>
<ModuleType1>dm</ModuleType1>
<ModuleDesc1>Director Module</ModuleDesc1>
<ModuleCount1>0</ModuleCount1>
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
<ModuleType2>um</ModuleType2>
<ModuleDesc2>User Module</ModuleDesc2>
<ModuleCount2>0</ModuleCount2>
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
<ModuleType3>pm</ModuleType3>
<ModuleDesc3>Performance Module</ModuleDesc3>
<ModuleCount3>2</ModuleCount3>
<ModuleIPAddr1-1-3>10.128.0.23</ModuleIPAddr1-1-3>
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
<ModuleHostName1-2-3>unassigned</ModuleHostName1-2-3>
<ModuleIPAddr1-2-3>0.0.0.0</ModuleIPAddr1-2-3>
<ModuleHostName1-3-3>unassigned</ModuleHostName1-3-3>
<ModuleIPAddr1-3-3>0.0.0.0</ModuleIPAddr1-3-3>
<ModuleDisableState2-3>ENABLED</ModuleDisableState2-3>
<ModuleHostName2-1-3>pm2</ModuleHostName2-1-3>
<ModuleIPAddr2-1-3>172.30.0.144</ModuleIPAddr2-1-3>
<ModuleHostName2-2-3>unassigned</ModuleHostName2-2-3>
<ModuleIPAddr2-2-3>0.0.0.0</ModuleIPAddr2-2-3>
<ModuleDBRootID2-1-3>3</ModuleDBRootID2-1-3>
<ModuleDBRootID2-2-3>4</ModuleDBRootID2-2-3>
<ModuleDBRootID2-3-3>5</ModuleDBRootID2-3-3>
<ModuleDBRootCount2-3>3</ModuleDBRootCount2-3>
</SystemModuleConfig>
<SystemExtDeviceConfig>
<Count>0</Count>
<Name1>unassigned</Name1>
<IPAddr1>0.0.0.0</IPAddr1>
<DisableState1>ENABLED</DisableState1>
</SystemExtDeviceConfig>
<SessionManager>
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
</SessionManager>
<VersionBuffer>
<!-- VersionBufferFileSize must be a multiple of 8192.
One version buffer file will be put on each DB root. -->
<VersionBufferFileSize>1GB</VersionBufferFileSize>
</VersionBuffer>
<OIDManager>
<!-- Do not change this file after database built -->
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
<!-- Do not change this value after database built -->
<FirstOID>3000</FirstOID>
</OIDManager>
<WriteEngine>
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
<FastDelete>n</FastDelete>
</WriteEngine>
<DBRM_Controller>
<NumWorkers>2</NumWorkers>
<IPAddr>10.128.0.23</IPAddr>
<Port>8616</Port>
</DBRM_Controller>
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
<DBRM_Worker1>
<IPAddr>10.128.0.23</IPAddr>
<Port>8700</Port>
<Module>pm1</Module>
</DBRM_Worker1>
<DBRM_Worker2>
<IPAddr>172.30.0.144</IPAddr>
<Port>8700</Port>
<Module>pm2</Module>
</DBRM_Worker2>
<DBRM_Worker3>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker3>
<DBRM_Worker4>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker4>
<DBRM_Worker5>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker5>
<DBRM_Worker6>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker6>
<DBRM_Worker7>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker7>
<DBRM_Worker8>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker8>
<DBRM_Worker9>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker9>
<DBRM_Worker10>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker10>
<DBBC>
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
<!-- Alternatively, this can be specified in absolute terms using
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
<!-- <NumBlocksPct>70</NumBlocksPct> -->
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
<IOMTracing>0</IOMTracing>
<BRPTracing>0</BRPTracing>
<ReportFrequency>65536</ReportFrequency>
<MaxOpenFiles>2K</MaxOpenFiles>
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
<FDCacheTrace>0</FDCacheTrace>
<NumBlocksPct>50</NumBlocksPct>
</DBBC>
<Installation>
<ServerTypeInstall>2</ServerTypeInstall>
<PMwithUM>n</PMwithUM>
<MySQLRep>y</MySQLRep>
<DBRootStorageType>internal</DBRootStorageType>
<UMStorageType>internal</UMStorageType>
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
</Installation>
<ExtentMap>
<!--
WARNING: these can only be changed on an empty system. Once any object has been allocated
it cannot be changed!. Extent size is 8M rows.
-->
<FilesPerColumnPartition>4</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
<BRM_UID>0x0</BRM_UID>
</ExtentMap>
<HashJoin>
<MaxBuckets>128</MaxBuckets>
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
<TotalUmMemory>25%</TotalUmMemory>
<CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin>
<JobList>
<FlushInterval>16K</FlushInterval>
<FifoSize>16</FifoSize>
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
each extent. The default is 16. MaxOutstandingRequests is the size of
the window of work in terms of extents. A value of 20 means there
is 20 extents worth of work for the PMs to process at any given time.
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
as many threads are available across all PMs. -->
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
<!-- MaxOutstandingRequests is going to default to the num of cores available
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
but will be lower bounded by 20 -->
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
<ThreadPoolSize>100</ThreadPoolSize>
</JobList>
<RowAggregation>
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is the number of cores -->
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
</RowAggregation>
<CrossEngineSupport>
<Host>127.0.0.1</Host>
<Port>3306</Port>
<User>root</User>
<Password/>
<TLSCA/>
<TLSClientCert/>
<TLSClientKey/>
</CrossEngineSupport>
<QueryStats>
<Enabled>N</Enabled>
</QueryStats>
<UserPriority>
<Enabled>N</Enabled>
</UserPriority>
<NetworkCompression>
<Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression>
<QueryTele>
<Host>127.0.0.1</Host>
<Port>0</Port>
</QueryTele>
<StorageManager>
<MaxSockets>30</MaxSockets>
<Enabled>N</Enabled>
</StorageManager>
<ProcHeartbeatControl>
<IPAddr>10.128.0.23</IPAddr>
</ProcHeartbeatControl>
<pm2_ProcessMonitor>
<IPAddr>172.30.0.144</IPAddr>
<Port>8800</Port>
</pm2_ProcessMonitor>
<pm2_ServerMonitor>
<IPAddr>172.30.0.144</IPAddr>
<Port>8622</Port>
</pm2_ServerMonitor>
<pm2_WriteEngineServer>
<IPAddr>172.30.0.144</IPAddr>
<Port>8630</Port>
</pm2_WriteEngineServer>
<ExeMgr2>
<IPAddr>172.30.0.144</IPAddr>
<Port>8601</Port>
<Module>pm2</Module>
</ExeMgr2>
</Columnstore>

View File

View File

@ -0,0 +1,50 @@
import unittest
import requests
import configparser
from pathlib import Path
from datetime import datetime
from cmapi_server.controllers.dispatcher import _version
config_filename = './cmapi_server/cmapi_server.conf'
url = f"https://localhost:8640/cmapi/{_version}/node/config"
begin_url = f"https://localhost:8640/cmapi/{_version}/node/begin"
config_path = './cmapi_server/test/Columnstore_apply_config.xml'
# create tmp dir
tmp_prefix = '/tmp/mcs_config_test'
tmp_path = Path(tmp_prefix)
tmp_path.mkdir(parents = True, exist_ok = True)
copyfile(config_path_old, tmp_prefix + '/Columnstore.xml')
def get_current_key():
app_config = configparser.ConfigParser()
try:
with open(config_filename, 'r') as _config_file:
app_config.read_file(_config_file)
except FileNotFoundError:
return ''
if 'Authentication' not in app_config.sections():
return ''
return app_config['Authentication'].get('x-api-key', '')
headers = {'x-api-key': get_current_key()}
body = {'id': 42, 'timeout': 120}
r = requests.put(begin_url, verify=False, headers=headers, json=body)
config_file = Path(config_path)
config = config_file.read_text()
body = {
'revision': 42,
'manager': '1.1.1.1',
'timeout': 0,
'config': config,
}
#print(config)
#r = requests.put(url, verify=False, headers=headers, json=body)

View File

@ -0,0 +1,35 @@
"""Tests for all the CEJ (Cross Engine Join) related stuff."""
import os
import subprocess
import sys
import unittest
from shutil import which
from cmapi_server.handlers.cej import CEJPasswordHandler
from cmapi_server.constants import MCS_SECRETS_FILE_PATH
class SecretsTestCase(unittest.TestCase):
"""Test case for checking .secrets file related stuff."""
@unittest.skipIf(which('cskeys') is None,
'This MCS version doesn\'t provide "cskeys" tool.')
def test_cspasswd_decrypt_algorithm(self) -> None:
"""Test to check decrypt algorithm.
Check that CEJ password decrypting algorithm is the same between
"cspasswd" tool in MCS and in CMAPI.
"""
test_passwd = 'columstore is the best'
# create .secrets file using cskeys util
ret = subprocess.run(
'cskeys', shell=True, stdout=subprocess.PIPE, check=True
)
encrypted_passwd = subprocess.check_output(
['cspasswd', test_passwd]
).decode(sys.stdout.encoding).strip()
self.assertEqual(
test_passwd, CEJPasswordHandler.decrypt_password(encrypted_passwd)
)
os.remove(MCS_SECRETS_FILE_PATH)

View File

@ -0,0 +1,236 @@
import logging
import os
import socket
import subprocess
from shutil import copyfile
import requests
from cmapi_server.controllers.dispatcher import _version
from cmapi_server.managers.process import MCSProcessManager
from cmapi_server.test.unittest_global import (
BaseServerTestCase, MCS_CONFIG_FILEPATH, COPY_MCS_CONFIG_FILEPATH,
TEST_MCS_CONFIG_FILEPATH,
)
logging.basicConfig(level='DEBUG')
requests.urllib3.disable_warnings()
class BaseClusterTestCase(BaseServerTestCase):
@classmethod
def setUpClass(cls) -> None:
copyfile(MCS_CONFIG_FILEPATH, COPY_MCS_CONFIG_FILEPATH)
return super().setUpClass()
@classmethod
def tearDownClass(cls) -> None:
copyfile(COPY_MCS_CONFIG_FILEPATH, MCS_CONFIG_FILEPATH)
os.remove(os.path.abspath(COPY_MCS_CONFIG_FILEPATH))
MCSProcessManager.stop_node(is_primary=True)
MCSProcessManager.start_node(is_primary=True)
return super().tearDownClass()
def setUp(self) -> None:
copyfile(TEST_MCS_CONFIG_FILEPATH, MCS_CONFIG_FILEPATH)
MCSProcessManager.stop_node(is_primary=True)
MCSProcessManager.start_node(is_primary=True)
return super().setUp()
class ClusterStartTestCase(BaseClusterTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/cluster/start'
def test_endpoint_with_no_api_key(self):
r = requests.put(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
json={}
)
self.assertEqual(r.status_code, 401)
def test_endpoint_with_no_nodes_in_cluster(self):
r = requests.put(
self.URL, verify=False, headers=self.HEADERS,
json={}
)
error = r.json()['error']
self.assertEqual(r.status_code, 422)
self.assertEqual(error, 'There are no nodes in the cluster.')
def test_start_after_adding_a_node(self):
payload = {'node': socket.gethostname()}
resp = requests.post(
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
json=payload
)
self.assertEqual(resp.status_code, 200)
payload = {'node': None}
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=payload
)
self.assertEqual(resp.status_code, 200)
# test_columnstore_started
controllernode = subprocess.check_output(['pgrep', 'controllernode'])
self.assertIsNotNone(controllernode)
class ClusterShutdownTestCase(BaseClusterTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/cluster/shutdown'
def test_endpoint_with_no_api_key(self):
r = requests.put(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
json={}
)
self.assertEqual(r.status_code, 401)
def test_endpoint_with_no_nodes_in_cluster(self):
resp = requests.put(self.URL, verify=False, headers=self.HEADERS,
json={}
)
error = resp.json()['error']
self.assertEqual(resp.status_code, 422)
self.assertEqual(error, 'There are no nodes in the cluster.')
def test_add_node_and_shutdown(self):
payload = {'node': socket.gethostname()}
resp = requests.post(
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
json=payload
)
self.assertEqual(resp.status_code, 200)
# note: POST node starts up node
try:
controllernode = subprocess.check_output(
['pgrep', 'controllernode']
)
except Exception as e:
controllernode = None
self.assertIsNotNone(controllernode)
payload = {'timeout': 60}
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS,
json=payload
)
self.assertEqual(resp.status_code, 200)
# Check columnstore stopped
try:
controllernode = subprocess.check_output(
['pgrep', 'controllernode']
)
except Exception as e:
controllernode = None
self.assertIsNone(controllernode)
class ClusterModesetTestCase(BaseClusterTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/cluster/mode-set'
def test_endpoint_with_no_api_key(self):
resp = requests.put(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
json={}
)
self.assertEqual(resp.status_code, 401)
def test_endpoint_with_no_nodes_in_cluster(self):
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS,
json={}
)
error = resp.json()['error']
self.assertEqual(resp.status_code, 422)
self.assertEqual(error, 'No master found in the cluster.')
def test_add_node_and_set_readonly(self):
payload = {'node': socket.gethostname()}
resp = requests.post(
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
json=payload
)
self.assertEqual(resp.status_code, 200)
payload = {'mode': 'readonly'}
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=payload
)
self.assertEqual(resp.status_code, 200)
# return readwrite mode back
payload = {'mode': 'readwrite'}
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=payload
)
self.assertEqual(resp.status_code, 200)
class ClusterAddNodeTestCase(BaseClusterTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/cluster/node'
def test_endpoint_with_no_apikey(self):
resp = requests.post(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
json={}
)
self.assertEqual(resp.status_code, 401)
def test_endpoint_with_missing_node_parameter(self):
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS,
json={}
)
error = resp.json()['error']
self.assertEqual(resp.status_code, 422)
self.assertEqual(error, 'missing node argument')
def test_endpoint(self):
payload = {'node': socket.gethostname()}
resp = requests.put(
self.URL, verify=False, headers=self.HEADERS,
json=payload
)
self.assertEqual(resp.status_code, 200)
# Check Columntore started
controllernode = subprocess.check_output(
['pgrep', 'controllernode'])
self.assertIsNotNone(controllernode)
class ClusterRemoveNodeTestCase(BaseClusterTestCase):
URL = ClusterAddNodeTestCase.URL
def test_endpoint_with_no_apikey(self):
resp = requests.delete(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
json={}
)
self.assertEqual(resp.status_code, 401)
def test_endpoint_with_missing_node_parameter(self):
resp = requests.delete(
self.URL, verify=False, headers=self.HEADERS,
json={}
)
error = resp.json()['error']
self.assertEqual(resp.status_code, 422)
self.assertEqual(error, 'missing node argument')
def test_add_node_and_remove(self):
payload = {'node': socket.gethostname()}
resp = requests.post(
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
json=payload
)
self.assertEqual(resp.status_code, 200)
resp = requests.delete(
self.URL, verify=False, headers=self.HEADERS, json=payload
)
self.assertEqual(resp.status_code, 200)

View File

@ -0,0 +1,217 @@
import configparser
import subprocess
import unittest
from contextlib import contextmanager
from os import path, remove
from pathlib import Path
from shutil import copyfile
import cherrypy
import requests
requests.packages.urllib3.disable_warnings()
from cmapi_server.constants import (
EM_PATH_SUFFIX, MCS_EM_PATH, MCS_BRM_CURRENT_PATH, S3_BRM_CURRENT_PATH
)
from cmapi_server.controllers.dispatcher import (
dispatcher, jsonify_error,_version
)
from cmapi_server.test.unittest_global import (
create_self_signed_certificate, cert_filename, cmapi_config_filename,
tmp_cmapi_config_filename
)
from mcs_node_control.models.node_config import NodeConfig
@contextmanager
def run_server():
if not path.exists(cert_filename):
create_self_signed_certificate()
cherrypy.engine.start()
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
yield
cherrypy.engine.exit()
cherrypy.engine.block()
def get_current_key():
app_config = configparser.ConfigParser()
try:
with open(cmapi_config_filename, 'r') as _config_file:
app_config.read_file(_config_file)
except FileNotFoundError:
return ''
if 'Authentication' not in app_config.sections():
return ''
return app_config['Authentication'].get('x-api-key', '')
class TestEMEndpoints(unittest.TestCase):
@classmethod
def setUpClass(cls):
if not path.exists(tmp_cmapi_config_filename):
f = open(tmp_cmapi_config_filename, 'x')
f.close()
copyfile(cmapi_config_filename, tmp_cmapi_config_filename)
@classmethod
def tearDownClass(cls):
if path.exists(tmp_cmapi_config_filename):
copyfile(tmp_cmapi_config_filename, cmapi_config_filename)
remove(tmp_cmapi_config_filename)
def get_examplar_bytes(self, element: str):
node_config = NodeConfig()
if node_config.s3_enabled():
ret = subprocess.run(
["smcat", S3_BRM_CURRENT_PATH], stdout=subprocess.PIPE
)
element_current_suffix = ret.stdout.decode("utf-8").rstrip()
element_current_filename = f'{EM_PATH_SUFFIX}/{element_current_suffix}_{element}'
ret = subprocess.run(
["smcat", element_current_filename], stdout=subprocess.PIPE
)
result = ret.stdout
else:
element_current_name = Path(MCS_BRM_CURRENT_PATH)
element_current_filename = element_current_name.read_text().rstrip()
element_current_file = Path(
f'{MCS_EM_PATH}/{element_current_filename}_{element}'
)
result = element_current_file.read_bytes()
return result
def test_em(self):
app = cherrypy.tree.mount(root=None,
config=cmapi_config_filename)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': cmapi_config_filename,
},
})
cherrypy.config.update(cmapi_config_filename)
api_key = get_current_key()
try:
with run_server():
url = f"https://localhost:8640/cmapi/{_version}/node/meta/em"
# Auth failure
headers = {'x-api-key': None}
r = requests.get(url, verify=False, headers=headers)
self.assertEqual(r.status_code, 401)
# OK
headers = {'x-api-key': api_key}
r = requests.get(url, verify=False, headers=headers)
extent_map = self.get_examplar_bytes('em')
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content, extent_map)
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
def test_journal(self):
app = cherrypy.tree.mount(root=None,
config=cmapi_config_filename)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': cmapi_config_filename,
},
})
cherrypy.config.update(cmapi_config_filename)
api_key = get_current_key()
try:
with run_server():
url = f"https://localhost:8640/cmapi/{_version}/node/meta/journal"
# Auth failure
headers = {'x-api-key': None}
r = requests.get(url, verify=False, headers=headers)
self.assertEqual(r.status_code, 401)
# OK
headers = {'x-api-key': api_key}
r = requests.get(url, verify=False, headers=headers)
journal = self.get_examplar_bytes('journal')
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content, journal)
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
def test_vss(self):
app = cherrypy.tree.mount(root=None,
config=cmapi_config_filename)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': cmapi_config_filename,
},
})
cherrypy.config.update(cmapi_config_filename)
api_key = get_current_key()
try:
with run_server():
url = f"https://localhost:8640/cmapi/{_version}/node/meta/vss"
# Auth failure
headers = {'x-api-key': None}
r = requests.get(url, verify=False, headers=headers)
self.assertEqual(r.status_code, 401)
# OK
headers = {'x-api-key': api_key}
r = requests.get(url, verify=False, headers=headers)
vss = self.get_examplar_bytes('vss')
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content, vss)
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
def test_vbbm(self):
app = cherrypy.tree.mount(root=None,
config=cmapi_config_filename)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': cmapi_config_filename,
},
})
cherrypy.config.update(cmapi_config_filename)
api_key = get_current_key()
try:
with run_server():
url = f"https://localhost:8640/cmapi/{_version}/node/meta/vbbm"
# Auth failure
headers = {'x-api-key': None}
r = requests.get(url, verify=False, headers=headers)
self.assertEqual(r.status_code, 401)
# OK
headers = {'x-api-key': api_key}
r = requests.get(url, verify=False, headers=headers)
vbbm = self.get_examplar_bytes('vbbm')
self.assertEqual(r.status_code, 200)
self.assertEqual(r.content, vbbm)
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise

View File

@ -0,0 +1,124 @@
import logging
import socket
from cmapi_server.failover_agent import FailoverAgent
from cmapi_server.node_manipulation import add_node, remove_node
from mcs_node_control.models.node_config import NodeConfig
from cmapi_server.test.unittest_global import (
tmp_mcs_config_filename, BaseNodeManipTestCase
)
logging.basicConfig(level='DEBUG')
class TestFailoverAgent(BaseNodeManipTestCase):
def test_activateNodes(self):
self.tmp_files = ('./activate0.xml', './activate1.xml')
hostaddr = socket.gethostbyname(socket.gethostname())
fa = FailoverAgent()
fa.activateNodes(
[self.NEW_NODE_NAME], tmp_mcs_config_filename, self.tmp_files[0],
test_mode=True
)
add_node(
hostaddr, self.tmp_files[0], self.tmp_files[1]
)
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[1])
pm_count = int(root.find('./PrimitiveServers/Count').text)
self.assertEqual(pm_count, 2)
node = root.find('./PMS1/IPAddr')
self.assertEqual(node.text, self.NEW_NODE_NAME)
node = root.find('./pm1_WriteEngineServer/IPAddr')
self.assertEqual(node.text, self.NEW_NODE_NAME)
node = root.find('./PMS2/IPAddr')
self.assertEqual(node.text, hostaddr)
node = root.find('./pm2_WriteEngineServer/IPAddr')
self.assertEqual(node.text, hostaddr)
remove_node(self.NEW_NODE_NAME, self.tmp_files[1], self.tmp_files[1])
def test_deactivateNodes(self):
self.tmp_files = (
'./deactivate0.xml','./deactivate1.xml', './deactivate2.xml'
)
fa = FailoverAgent()
hostname = socket.gethostname()
hostaddr = socket.gethostbyname(hostname)
add_node(
hostaddr, tmp_mcs_config_filename, self.tmp_files[0]
)
fa.activateNodes(
[self.NEW_NODE_NAME], self.tmp_files[0], self.tmp_files[1],
test_mode=True
)
fa.deactivateNodes(
[self.NEW_NODE_NAME], self.tmp_files[1], self.tmp_files[2],
test_mode=True
)
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[2])
pm_count = int(root.find('./PrimitiveServers/Count').text)
self.assertEqual(pm_count, 1)
node = root.find('./PMS1/IPAddr')
self.assertEqual(node.text, hostaddr)
# TODO: Fix node_manipulation add_node logic and _replace_localhost
# node = root.find('./PMS2/IPAddr')
# self.assertEqual(node, None)
node = root.find('./pm1_WriteEngineServer/IPAddr')
self.assertTrue(node.text, hostaddr)
node = root.find('./pm2_WriteEngineServer/IPAddr')
self.assertTrue(node is None)
#node = root.find("./ConfigRevision")
#self.assertEqual(node.text, "3")
# make sure there are no traces of mysql.com,
# or an ip addr that isn't localhost or 127.0.0.1
all_nodes = root.findall('./')
for node in all_nodes:
self.assertFalse(node.text == self.NEW_NODE_NAME)
if node.tag in ['IPAddr', 'Node']:
self.assertTrue(node.text in [hostname, hostaddr])
def test_designatePrimaryNode(self):
self.tmp_files = (
'./primary-node0.xml', './primary-node1.xml', './primary-node2.xml'
)
fa = FailoverAgent()
hostaddr = socket.gethostbyname(socket.gethostname())
fa.activateNodes(
[self.NEW_NODE_NAME], tmp_mcs_config_filename, self.tmp_files[0],
test_mode=True
)
add_node(
hostaddr, self.tmp_files[0], self.tmp_files[1]
)
fa.movePrimaryNode(
'placeholder', self.tmp_files[1], self.tmp_files[2], test_mode=True
)
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[2])
pm_count = int(root.find('./PrimitiveServers/Count').text)
self.assertEqual(pm_count, 2)
node = root.find('./PMS1/IPAddr')
self.assertEqual(node.text, self.NEW_NODE_NAME)
node = root.find('./PMS2/IPAddr')
self.assertEqual(node.text, hostaddr)
node = root.find('./pm1_WriteEngineServer/IPAddr')
self.assertEqual(node.text, self.NEW_NODE_NAME)
node = root.find('./pm2_WriteEngineServer/IPAddr')
self.assertEqual(node.text, hostaddr)
for tag in ['ExeMgr1', 'DMLProc', 'DDLProc']:
node = root.find(f'./{tag}/IPAddr')
self.assertEqual(node.text, self.NEW_NODE_NAME)
self.assertEqual(self.NEW_NODE_NAME, root.find('./PrimaryNode').text)
def test_enterStandbyMode(self):
fa = FailoverAgent()
fa.enterStandbyMode(test_mode=True)

View File

@ -0,0 +1,117 @@
import os
from cmapi_server.managers.process import MCSProcessManager
from cmapi_server.process_dispatchers.systemd import SystemdDispatcher
from cmapi_server.test.unittest_global import (
DDL_SERVICE, CONTROLLERNODE_SERVICE, SYSTEMCTL,
BaseProcessDispatcherCase
)
class SystemdTest(BaseProcessDispatcherCase):
def test_systemd_status_start(self):
os.system(f'{SYSTEMCTL} stop {DDL_SERVICE}')
self.assertFalse(
SystemdDispatcher.is_service_running(DDL_SERVICE)
)
self.assertTrue(SystemdDispatcher.start(DDL_SERVICE))
os.system(f'{SYSTEMCTL} stop {CONTROLLERNODE_SERVICE}')
self.assertFalse(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
result = SystemdDispatcher.start(CONTROLLERNODE_SERVICE)
self.assertTrue(result)
self.assertTrue(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
def test_systemd_status_stop(self):
os.system(f'{SYSTEMCTL} start {CONTROLLERNODE_SERVICE}')
self.assertTrue(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
self.assertTrue(SystemdDispatcher.stop(CONTROLLERNODE_SERVICE))
self.assertFalse(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
def test_systemd_status_restart(self):
os.system(f'{SYSTEMCTL} start {CONTROLLERNODE_SERVICE}')
self.assertTrue(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
self.assertTrue(SystemdDispatcher.restart(CONTROLLERNODE_SERVICE))
self.assertTrue(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
os.system(f'{SYSTEMCTL} stop {CONTROLLERNODE_SERVICE}')
self.assertFalse(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
self.assertTrue(SystemdDispatcher.restart(CONTROLLERNODE_SERVICE))
self.assertTrue(
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
)
class MCSProcessManagerTest(BaseProcessDispatcherCase):
def get_systemd_serv_name(self, service_name):
if service_name == 'mcs-workernode':
return f'{service_name}@1'
return service_name
def test_mcs_process_manager(self):
MCSProcessManager.detect('systemd', '')
for prog in MCSProcessManager._get_sorted_progs(True, True).values():
serv_name = self.get_systemd_serv_name(prog.service_name)
os.system(f'{SYSTEMCTL} stop {serv_name}')
self.assertIsNone(MCSProcessManager.start_node(True))
for prog in MCSProcessManager.mcs_progs.values():
serv_name = self.get_systemd_serv_name(prog.service_name)
if serv_name == 'mcs-storagemanager':
continue
self.assertTrue(
MCSProcessManager.process_dispatcher.is_service_running(
serv_name
)
)
self.assertIsNone(MCSProcessManager.stop_node(is_primary=True))
for prog in MCSProcessManager.mcs_progs.values():
serv_name = self.get_systemd_serv_name(prog.service_name)
self.assertFalse(
MCSProcessManager.process_dispatcher.is_service_running(
serv_name
)
)
self.assertEqual(len(MCSProcessManager.get_running_mcs_procs()), 0)
self.assertTrue(
MCSProcessManager.is_node_processes_ok(
is_primary=True, node_stopped=True
)
)
for prog in MCSProcessManager._get_sorted_progs(True).values():
serv_name = self.get_systemd_serv_name(prog.service_name)
os.system(f'{SYSTEMCTL} start {serv_name}')
for prog in MCSProcessManager.mcs_progs.values():
serv_name = self.get_systemd_serv_name(prog.service_name)
self.assertTrue(
MCSProcessManager.process_dispatcher.is_service_running(
serv_name
)
)
self.assertEqual(
len(MCSProcessManager.get_running_mcs_procs()),
len(MCSProcessManager.mcs_progs.keys())
)
self.assertTrue(
MCSProcessManager.is_node_processes_ok(
is_primary=True, node_stopped=False
)
)

View File

@ -0,0 +1,211 @@
import logging
import socket
from lxml import etree
from cmapi_server import node_manipulation
from cmapi_server.constants import MCS_DATA_PATH
from cmapi_server.test.unittest_global import (
tmp_mcs_config_filename, BaseNodeManipTestCase
)
from mcs_node_control.models.node_config import NodeConfig
logging.basicConfig(level='DEBUG')
class NodeManipTester(BaseNodeManipTestCase):
def test_add_remove_node(self):
self.tmp_files = (
'./test-output0.xml','./test-output1.xml','./test-output2.xml'
)
hostaddr = socket.gethostbyname(socket.gethostname())
node_manipulation.add_node(
self.NEW_NODE_NAME, tmp_mcs_config_filename, self.tmp_files[0]
)
node_manipulation.add_node(
hostaddr, self.tmp_files[0], self.tmp_files[1]
)
# get a NodeConfig, read test.xml
# look for some of the expected changes.
# Total verification will take too long to code up right now.
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[1])
pms_node_ipaddr = root.find('./PMS1/IPAddr')
self.assertEqual(pms_node_ipaddr.text, self.NEW_NODE_NAME)
pms_node_ipaddr = root.find('./PMS2/IPAddr')
self.assertEqual(pms_node_ipaddr.text, hostaddr)
node = root.find("./ExeMgr2/IPAddr")
self.assertEqual(node.text, hostaddr)
node_manipulation.remove_node(
self.NEW_NODE_NAME, self.tmp_files[1], self.tmp_files[2],
test_mode=True
)
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[2])
node = root.find('./PMS1/IPAddr')
self.assertEqual(node.text, hostaddr)
# TODO: Fix node_manipulation add_node logic and _replace_localhost
# node = root.find('./PMS2/IPAddr')
# self.assertEqual(node, None)
def test_add_dbroots_nodes_rebalance(self):
self.tmp_files = (
'./extra-dbroots-0.xml', './extra-dbroots-1.xml',
'./extra-dbroots-2.xml'
)
# add 2 dbroots, let's see what happen
nc = NodeConfig()
root = nc.get_current_config_root(tmp_mcs_config_filename)
sysconf_node = root.find('./SystemConfig')
dbroot_count_node = sysconf_node.find('./DBRootCount')
dbroot_count = int(dbroot_count_node.text) + 2
dbroot_count_node.text = str(dbroot_count)
etree.SubElement(sysconf_node, 'DBRoot2').text = '/dummy_path/data2'
etree.SubElement(sysconf_node, 'DBRoot10').text = '/dummy_path/data10'
nc.write_config(root, self.tmp_files[0])
node_manipulation.add_node(
self.NEW_NODE_NAME, self.tmp_files[0], self.tmp_files[1]
)
# get a NodeConfig, read test.xml
# look for some of the expected changes.
# Total verification will take too long to code up right now.
# Do eyeball verification for now.
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[1])
node = root.find("./PMS2/IPAddr")
self.assertEqual(node.text, self.NEW_NODE_NAME)
hostname = socket.gethostname()
# awesome, I saw dbroots 1 and 10 get assigned to node 1,
# and dbroot 2 assigned to node 2
# now, remove node 1 (hostname) and see what we get
node_manipulation.remove_node(
hostname, self.tmp_files[1], self.tmp_files[2],
test_mode=True
)
def test_add_dbroot(self):
self.tmp_files = (
'./dbroot-test0.xml', './dbroot-test1.xml', './dbroot-test2.xml',
'./dbroot-test3.xml', './dbroot-test4.xml'
)
# add a dbroot, verify it exists
id = node_manipulation.add_dbroot(
tmp_mcs_config_filename, self.tmp_files[0]
)
self.assertEqual(id, 2)
nc = NodeConfig()
root = nc.get_current_config_root(self.tmp_files[0])
self.assertEqual(2, int(root.find('./SystemConfig/DBRootCount').text))
self.assertEqual(
f'{MCS_DATA_PATH}/data2',
root.find('./SystemConfig/DBRoot2').text
)
# add a node, verify we can add a dbroot to each of them
hostname = socket.gethostname()
node_manipulation.add_node(
hostname, tmp_mcs_config_filename, self.tmp_files[1]
)
node_manipulation.add_node(
self.NEW_NODE_NAME, self.tmp_files[1], self.tmp_files[2]
)
id1 = node_manipulation.add_dbroot(
self.tmp_files[2], self.tmp_files[3], host=self.NEW_NODE_NAME
)
id2 = node_manipulation.add_dbroot(
self.tmp_files[3], self.tmp_files[4], host=hostname
)
self.assertEqual(id1, 2)
self.assertEqual(id2, 3)
root = nc.get_current_config_root(self.tmp_files[4])
dbroot_count1 = int(
root.find('./SystemModuleConfig/ModuleDBRootCount1-3').text
)
dbroot_count2 = int(
root.find('./SystemModuleConfig/ModuleDBRootCount2-3').text
)
self.assertEqual(dbroot_count1 + dbroot_count2, 3)
unique_dbroots = set()
for i in range(1, dbroot_count1 + 1):
unique_dbroots.add(int(
root.find(f'./SystemModuleConfig/ModuleDBRootID1-{i}-3').text)
)
for i in range(1, dbroot_count2 + 1):
unique_dbroots.add(int(
root.find(f'./SystemModuleConfig/ModuleDBRootID2-{i}-3').text)
)
self.assertEqual(list(unique_dbroots), [1, 2, 3])
def test_change_primary_node(self):
# add a node, make it the primary, verify expected result
self.tmp_files = ('./primary-node0.xml', './primary-node1.xml')
node_manipulation.add_node(
self.NEW_NODE_NAME, tmp_mcs_config_filename, self.tmp_files[0]
)
node_manipulation.move_primary_node(
self.tmp_files[0], self.tmp_files[1]
)
root = NodeConfig().get_current_config_root(self.tmp_files[1])
self.assertEqual(
root.find('./ExeMgr1/IPAddr').text, self.NEW_NODE_NAME
)
self.assertEqual(
root.find('./DMLProc/IPAddr').text, self.NEW_NODE_NAME
)
self.assertEqual(
root.find('./DDLProc/IPAddr').text, self.NEW_NODE_NAME
)
# This version doesn't support IPv6
dbrm_controller_ip = root.find("./DBRM_Controller/IPAddr").text
self.assertEqual(dbrm_controller_ip, self.NEW_NODE_NAME)
self.assertEqual(root.find('./PrimaryNode').text, self.NEW_NODE_NAME)
def test_unassign_dbroot1(self):
self.tmp_files = (
'./tud-0.xml', './tud-1.xml', './tud-2.xml', './tud-3.xml',
)
node_manipulation.add_node(
self.NEW_NODE_NAME, tmp_mcs_config_filename, self.tmp_files[0]
)
root = NodeConfig().get_current_config_root(self.tmp_files[0])
(name, addr) = node_manipulation.find_dbroot1(root)
self.assertEqual(name, self.NEW_NODE_NAME)
# add a second node and more dbroots to make the test slightly more robust
node_manipulation.add_node(
socket.gethostname(), self.tmp_files[0], self.tmp_files[1]
)
node_manipulation.add_dbroot(
self.tmp_files[1], self.tmp_files[2], socket.gethostname()
)
node_manipulation.add_dbroot(
self.tmp_files[2], self.tmp_files[3], self.NEW_NODE_NAME
)
root = NodeConfig().get_current_config_root(self.tmp_files[3])
(name, addr) = node_manipulation.find_dbroot1(root)
self.assertEqual(name, self.NEW_NODE_NAME)
node_manipulation.unassign_dbroot1(root)
caught_it = False
try:
node_manipulation.find_dbroot1(root)
except node_manipulation.NodeNotFoundException:
caught_it = True
self.assertTrue(caught_it)

View File

@ -0,0 +1,388 @@
import logging
from datetime import datetime
from pathlib import Path
import requests
from cmapi_server.controllers.dispatcher import _version
from cmapi_server.test.unittest_global import BaseServerTestCase
from mcs_node_control.models.dbrm import DBRM
logging.basicConfig(level='DEBUG')
requests.urllib3.disable_warnings()
class ConfigTestCase(BaseServerTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/node/config'
def test_config(self):
for msg, headers, status_code in self.TEST_PARAMS:
with self.subTest(
msg=msg, headers=headers, status_code=status_code
):
r = requests.get(self.URL, verify=False, headers=headers)
self.assertEqual(r.status_code, status_code)
class StatusTestCase(BaseServerTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/node/status'
def test_status(self):
for msg, headers, status_code in self.TEST_PARAMS:
with self.subTest(
msg=msg, headers=headers, status_code=status_code
):
r = requests.get(self.URL, verify=False, headers=headers)
self.assertEqual(r.status_code, status_code)
class BeginTestCase(BaseServerTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/node/begin'
def test_wrong_content_type(self):
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
self.assertEqual(r.status_code, 415)
def test_no_timeout(self):
body = {'id': 42}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(r.json(), {'error': 'id or timeout is not set.'})
def test_no_auth(self):
body = {'id': 42, 'timeout': 300}
r = requests.put(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS, json=body
)
self.assertEqual(r.status_code, 401)
def test_ok(self):
txn_id_local = 42
txn_timeout = 300
txn_timeout_local = 300 + int(datetime.now().timestamp())
body = {'id': txn_id_local, 'timeout': txn_timeout}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 200)
txn_section = self.app.config.get('txn', None)
self.assertTrue(txn_section is not None)
txn_id = txn_section.get('id', None)
txn_timeout = txn_section.get('timeout', None)
txn_manager_address = txn_section.get('manager_address', None)
txn_config_changed = txn_section.get('config_changed', None)
txn = [txn_id, txn_timeout, txn_manager_address, txn_config_changed]
self.assertTrue(None not in txn)
self.assertTrue(txn_id == txn_id_local)
self.assertTrue(txn_timeout - txn_timeout_local <= 2)
def test_multiple_begin(self):
txn_id_local = 42
txn_timeout = 300
body = {'id': txn_id_local, 'timeout': txn_timeout}
_ = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(
r.json(), {'error': 'There is an active operation.'}
)
class CommitTestCase(BaseServerTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/node/commit'
def test_wrong_content_type(self):
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
self.assertEqual(r.status_code, 415)
def test_no_operation(self):
body = {'id': 42}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(r.json(), {'error': 'No operation to commit.'})
def test_begin_and_commit(self):
txn_timeout = 300
txn_id = 42
body = {'id': txn_id, 'timeout': txn_timeout}
r = requests.put(
BeginTestCase.URL, verify=False, headers=self.HEADERS, json=body
)
txn_section = self.app.config.get('txn', None)
self.assertTrue(txn_section is not None)
self.assertEqual(r.status_code, 200)
body = {'id': 42}
r = requests.put(self.URL, verify=False, headers=self.HEADERS, json=body)
self.assertEqual(r.status_code, 200)
txn_id = txn_section.get('id', None)
txn_timeout = txn_section.get('timeout', None)
txn_manager_address = txn_section.get('manager_address', None)
txn_config_changed = txn_section.get('config_changed', None)
self.assertTrue(txn_id == 0)
self.assertEqual(txn_timeout, 0)
self.assertEqual(txn_manager_address, '')
self.assertFalse(txn_config_changed)
def test_multiple_commit(self):
body = {'id': 42}
_ = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
class RollbackTestCase(BaseServerTestCase):
URL = f"https://localhost:8640/cmapi/{_version}/node/rollback"
def test_wrong_content_type(self):
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
self.assertEqual(r.status_code, 415)
def test_no_operation(self):
body = {'id': 42}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(r.json(), {'error': 'No operation to rollback.'})
def test_begin_and_rollback(self):
txn_timeout = 300
txn_id = 42
body = {'id': txn_id, 'timeout': txn_timeout}
r = requests.put(
BeginTestCase.URL, verify=False, headers=self.HEADERS, json=body
)
txn_section = self.app.config.get('txn', None)
self.assertTrue(txn_section is not None)
self.assertEqual(r.status_code, 200)
body = {'id': 42}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 200)
txn_id = txn_section.get('id', None)
txn_timeout = txn_section.get('timeout', None)
txn_manager_address = txn_section.get('manager_address', None)
txn_config_changed = txn_section.get('config_changed', None)
self.assertTrue(txn_id == 0)
self.assertEqual(txn_timeout, 0)
self.assertEqual(txn_manager_address, '')
self.assertFalse(txn_config_changed)
def test_no_operation_again(self):
body = {'id': 42}
_ = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
class ConfigPutTestCase(BaseServerTestCase):
URL = ConfigTestCase.URL
def setUp(self):
if 'skip_setUp' not in self.shortDescription():
body = {'id': 42, 'timeout': 42}
_ = requests.put(
BeginTestCase.URL,
verify=False, headers=self.HEADERS, json=body
)
return super().setUp()
def tearDown(self):
body = {'id': 42}
_ = requests.put(
RollbackTestCase.URL, verify=False, headers=self.HEADERS, json=body
)
return super().tearDownClass()
def test_wrong_content_type(self):
"""Test wrong Content-Type."""
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
self.assertEqual(r.status_code, 415)
def test_no_active_operation(self):
"""Test no active operation. skip_setUp"""
body = {
'revision': 42,
'manager': '1.1.1.1',
'timeout': 42,
'config': "<Columnstore>...</Columnstore>",
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(
r.json(), {'error': 'PUT /config called outside of an operation.'}
)
def test_no_mandatory_attributes(self):
"""Test no mandatory attributes. skip_setUp"""
body = {'id': 42, 'timeout': 42}
r = requests.put(
BeginTestCase.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 200)
body = {
'revision': 42,
'timeout': 42,
'config': "<Columnstore>...</Columnstore>",
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(
r.json(), {'error': 'Mandatory attribute is missing.'}
)
body = {
'manager': '1.1.1.1',
'revision': 42,
'config': "<Columnstore>...</Columnstore>",
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(
r.json(), {'error': 'Mandatory attribute is missing.'}
)
body = {
'manager': '1.1.1.1',
'revision': 42,
'timeout': 42,
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertEqual(
r.json(), {'error': 'Mandatory attribute is missing.'}
)
def test_no_auth(self):
"""Test no auth."""
body = {
'revision': 42,
'manager': '1.1.1.1',
'timeout': 42,
'config': "<Columnstore>...</Columnstore>",
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.NO_AUTH_HEADERS, json=body
)
self.assertEqual(r.status_code, 401)
def test_send_rollback(self):
"""Test send rollback."""
body = {'id': 42}
r = requests.put(
RollbackTestCase.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 200)
def test_wrong_cluster_mode(self):
"""Test wrong cluster mode."""
body = {
'revision': 42,
'manager': '1.1.1.1',
'timeout': 42,
'cluster_mode': 'somemode',
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 422)
self.assertTrue(
"Error occured setting cluster" in r.content.decode('ASCII')
)
def test_set_mode(self):
"""Test set mode."""
mode = 'readwrite'
body = {
'revision': 42,
'manager': '1.1.1.1',
'timeout': 42,
'cluster_mode': mode,
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
# DBRM controller must be up and running
self.assertEqual(r.status_code, 200)
r = requests.get(
StatusTestCase.URL, verify=False, headers=self.HEADERS
)
self.assertEqual(r.status_code, 200)
fake_mode = mode
with DBRM() as dbrm:
if dbrm.get_dbrm_status() != 'master':
fake_mode = 'readonly'
self.assertEqual(r.json()['cluster_mode'], fake_mode)
self.assertEqual(dbrm._get_cluster_mode(), mode)
def test_apply_config(self):
"""Test apply config."""
body = {'id': 42, 'timeout': 42}
_ = requests.put(
BeginTestCase.URL,
verify=False, headers=self.HEADERS, json=body
)
config_file = Path(self.mcs_config_filename)
config = config_file.read_text()
body = {
'revision': 42,
'manager': '1.1.1.1',
'timeout': 15,
'config': config,
'mcs_config_filename': self.mcs_config_filename
}
r = requests.put(
self.URL, verify=False, headers=self.HEADERS, json=body
)
self.assertEqual(r.status_code, 200)
txn_section = self.app.config.get('txn', None)
self.assertTrue(txn_section is not None)
txn_config_changed = txn_section.get('config_changed', None)
self.assertEqual(txn_config_changed, True)
r = requests.get(
ConfigTestCase.URL, verify=False, headers=self.HEADERS
)
self.assertEqual(r.status_code, 200)
# commenting this out until we get global config
# self.assertEqual(r.json()['config'], config)
class PrimaryTestCase(BaseServerTestCase):
URL = f'https://localhost:8640/cmapi/{_version}/node/primary'
def test_is_primary(self):
r = requests.get(self.URL, verify=False)
self.assertEqual(r.status_code, 200)

View File

@ -0,0 +1,160 @@
import cherrypy
import unittest
import os
import socket
from shutil import copyfile
from contextlib import contextmanager
from cmapi_server import helpers, node_manipulation
from mcs_node_control.models.node_config import NodeConfig
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
from cmapi_server.test.unittest_global import create_self_signed_certificate, \
cert_filename, mcs_config_filename, cmapi_config_filename, \
tmp_mcs_config_filename, tmp_cmapi_config_filename
@contextmanager
def start_server():
if not os.path.exists(cert_filename):
create_self_signed_certificate()
app = cherrypy.tree.mount(root = None, config = cmapi_config_filename)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': cmapi_config_filename,
},
})
cherrypy.config.update(cmapi_config_filename)
cherrypy.engine.start()
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
yield
cherrypy.engine.exit()
cherrypy.engine.block()
class TestTransactions(unittest.TestCase):
def setUp(self):
if not os.path.exists(tmp_mcs_config_filename):
f = open(tmp_mcs_config_filename, 'x')
f.close()
copyfile(mcs_config_filename, tmp_mcs_config_filename)
def tearDown(self):
if os.path.exists(tmp_mcs_config_filename):
copyfile(tmp_mcs_config_filename, mcs_config_filename)
os.remove(tmp_mcs_config_filename)
@classmethod
def setUpClass(cls):
if not os.path.exists(tmp_cmapi_config_filename):
f = open(tmp_cmapi_config_filename, 'x')
f.close()
copyfile(cmapi_config_filename, tmp_cmapi_config_filename)
@classmethod
def tearDownClass(cls):
if os.path.exists(tmp_cmapi_config_filename):
copyfile(tmp_cmapi_config_filename, cmapi_config_filename)
os.remove(tmp_cmapi_config_filename)
def test_start_commit(self):
print(" ******** Running TestTransactions.test_start_commit()")
with start_server():
try:
hostname = socket.gethostname()
myaddr = socket.gethostbyname(hostname)
node_manipulation.add_node(
myaddr, mcs_config_filename, mcs_config_filename
)
result = helpers.start_transaction(
cmapi_config_filename, mcs_config_filename,
optional_nodes = [myaddr]
)
self.assertTrue(result[0])
self.assertEqual(len(result[2]), 1)
self.assertEqual(result[2][0], myaddr)
helpers.commit_transaction(result[1], cmapi_config_filename, mcs_config_filename, nodes = result[2])
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
def test_start_rollback(self):
print(" ******** Running TestTransactions.test_start_rollback()")
with start_server():
try:
hostname = socket.gethostname()
myaddr = socket.gethostbyname(hostname)
node_manipulation.add_node(
myaddr, mcs_config_filename, mcs_config_filename
)
result = helpers.start_transaction(
cmapi_config_filename, mcs_config_filename,
optional_nodes = [myaddr]
)
self.assertTrue(result[0])
self.assertEqual(len(result[2]), 1)
self.assertEqual(result[2][0], myaddr)
helpers.rollback_transaction(result[1], cmapi_config_filename, mcs_config_filename) # not specifying nodes here to exercise the nodes = None path
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
def test_broadcast_new_config(self):
print(" ******** Running TestTransactions.test_broadcast_new_config()")
with start_server():
try:
myaddr = socket.gethostbyname(socket.gethostname())
node_manipulation.add_node(myaddr, mcs_config_filename, mcs_config_filename)
# Note, 1.2.3.4 is intentional -> doesn't exist, so shouldn't end up in the node list returned
print("\n\nNOTE! This is expected to pause here for ~10s, this isn't an error, yet.\n")
result = helpers.start_transaction(
cmapi_config_filename, mcs_config_filename,
optional_nodes = ['1.2.3.4']
)
self.assertTrue(result[0])
self.assertEqual(len(result[2]), 1)
self.assertEqual(result[2][0], myaddr)
success = helpers.broadcast_new_config(
mcs_config_filename,
cmapi_config_filename=cmapi_config_filename,
test_mode=True,
nodes = result[2]
)
# not specifying nodes here to exercise the nodes = None path
helpers.commit_transaction(
result[1], cmapi_config_filename, mcs_config_filename
)
self.assertTrue(success)
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
def test_update_rev_and_manager(self):
print(" ******** Running TestTransactions.test_update_rev_and_manager()")
with start_server():
try:
myaddr = socket.gethostbyname(socket.gethostname())
node_manipulation.add_node(
myaddr, mcs_config_filename, mcs_config_filename
)
helpers.update_revision_and_manager(mcs_config_filename, "./update_rev1.xml")
nc = NodeConfig()
root = nc.get_current_config_root("./update_rev1.xml")
self.assertEqual(root.find("./ConfigRevision").text, "2")
self.assertEqual(root.find("./ClusterManager").text, socket.gethostbyname(socket.gethostname()))
except:
cherrypy.engine.exit()
cherrypy.engine.block()
raise
os.remove("./update_rev1.xml")

View File

@ -0,0 +1,203 @@
import logging
import os
import unittest
from contextlib import contextmanager
from datetime import datetime, timedelta
from shutil import copyfile
from tempfile import TemporaryDirectory
import cherrypy
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography import x509
from cryptography.x509.oid import NameOID
from cryptography.hazmat.primitives import hashes
from cmapi_server import helpers
from cmapi_server.constants import CMAPI_CONF_PATH
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
from cmapi_server.managers.process import MCSProcessManager
TEST_API_KEY = 'somekey123'
cert_filename = './cmapi_server/self-signed.crt'
MCS_CONFIG_FILEPATH = '/etc/columnstore/Columnstore.xml'
COPY_MCS_CONFIG_FILEPATH = './cmapi_server/test/original_Columnstore.xml'
TEST_MCS_CONFIG_FILEPATH = './cmapi_server/test/CS-config-test.xml'
# TODO:
# - rename after fix in all places
# - fix path to abs
mcs_config_filename = './cmapi_server/test/CS-config-test.xml'
tmp_mcs_config_filename = './cmapi_server/test/tmp.xml'
cmapi_config_filename = './cmapi_server/cmapi_server.conf'
tmp_cmapi_config_filename = './cmapi_server/test/tmp.conf'
# constants for process dispatchers
DDL_SERVICE = 'mcs-ddlproc'
CONTROLLERNODE_SERVICE = 'mcs-controllernode.service'
UNKNOWN_SERVICE = 'unknown_service'
SYSTEMCTL = 'sudo systemctl'
logging.basicConfig(level=logging.DEBUG)
def create_self_signed_certificate():
key_filename = './cmapi_server/self-signed.key'
key = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
backend=default_backend()
)
with open(key_filename, "wb") as f:
f.write(key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption()),
)
subject = issuer = x509.Name([
x509.NameAttribute(NameOID.COUNTRY_NAME, u"US"),
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, u"California"),
x509.NameAttribute(NameOID.LOCALITY_NAME, u"Redwood City"),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, u"MariaDB"),
x509.NameAttribute(NameOID.COMMON_NAME, u"mariadb.com"),
])
basic_contraints = x509.BasicConstraints(ca=True, path_length=0)
cert = x509.CertificateBuilder(
).subject_name(
subject
).issuer_name(
issuer
).public_key(
key.public_key()
).serial_number(
x509.random_serial_number()
).not_valid_before(
datetime.utcnow()
).not_valid_after(
datetime.utcnow() + timedelta(days=365)
).add_extension(
basic_contraints,
False
).add_extension(
x509.SubjectAlternativeName([x509.DNSName(u"localhost")]),
critical=False
).sign(key, hashes.SHA256(), default_backend())
with open(cert_filename, "wb") as f:
f.write(cert.public_bytes(serialization.Encoding.PEM))
def run_detect_processes():
cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH)
d_name, d_path = helpers.get_dispatcher_name_and_path(cfg_parser)
MCSProcessManager.detect(d_name, d_path)
@contextmanager
def run_server():
if not os.path.exists(cert_filename):
create_self_signed_certificate()
cherrypy.engine.start()
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
run_detect_processes() #TODO: Move cause slow down each test for 5s
yield
cherrypy.engine.exit()
cherrypy.engine.block()
class BaseServerTestCase(unittest.TestCase):
HEADERS = {'x-api-key': TEST_API_KEY}
NO_AUTH_HEADERS = {'x-api-key': None}
TEST_PARAMS = (
('auth ok', HEADERS, 200),
('no auth', NO_AUTH_HEADERS, 401)
)
def run(self, result=None):
with TemporaryDirectory() as tmp_dir:
self.tmp_dir = tmp_dir
self.cmapi_config_filename = os.path.join(
tmp_dir, 'tmp_cmapi_config.conf'
)
self.mcs_config_filename = os.path.join(
tmp_dir, 'tmp_mcs_config.xml'
)
copyfile(cmapi_config_filename, self.cmapi_config_filename)
copyfile(TEST_MCS_CONFIG_FILEPATH, self.mcs_config_filename)
self.app = cherrypy.tree.mount(
root=None, config=self.cmapi_config_filename
)
self.app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': self.cmapi_config_filename,
},
'Authentication' : self.HEADERS
})
cherrypy.config.update(self.cmapi_config_filename)
with run_server():
return super().run(result=result)
class BaseNodeManipTestCase(unittest.TestCase):
NEW_NODE_NAME = 'mysql.com' # something that has a DNS entry everywhere
def setUp(self):
self.tmp_files = []
copyfile(TEST_MCS_CONFIG_FILEPATH, tmp_mcs_config_filename)
def tearDown(self):
for tmp_file in self.tmp_files:
if os.path.exists(tmp_file):
os.remove(tmp_file)
if os.path.exists(tmp_mcs_config_filename):
os.remove(tmp_mcs_config_filename)
class BaseProcessDispatcherCase(unittest.TestCase):
node_started = None
@classmethod
def setUpClass(cls) -> None:
run_detect_processes()
cls.node_started = MCSProcessManager.get_running_mcs_procs() != 0
return super().setUpClass()
@classmethod
def tearDownClass(cls) -> None:
if (MCSProcessManager.get_running_mcs_procs() !=0) == cls.node_started:
return super().tearDownClass()
if cls.node_started:
MCSProcessManager.start_node(is_primary=True)
else:
MCSProcessManager.stop_node(is_primary=True)
return super().tearDownClass()
def setUp(self) -> None:
if MCSProcessManager.process_dispatcher.is_service_running(
CONTROLLERNODE_SERVICE
):
self.controller_node_cmd = 'start'
else:
self.controller_node_cmd = 'stop'
# prevent to get 'start-limit-hit' systemd error, see MCOL-5186
os.system(f'{SYSTEMCTL} reset-failed')
return super().setUp()
def tearDown(self) -> None:
os.system(
f'{SYSTEMCTL} {self.controller_node_cmd} {CONTROLLERNODE_SERVICE}'
)
return super().tearDown()

1
cmapi/conffiles.template Normal file
View File

@ -0,0 +1 @@
${ETC_DIR}/cmapi_server.conf

View File

@ -0,0 +1,13 @@
#!/bin/bash
# This script allows to gracefully start MCS
/bin/systemctl start mcs-workernode
/bin/systemctl start mcs-controllernode
/bin/systemctl start mcs-primproc
/bin/systemctl start mcs-writeengineserver
/bin/systemctl start mcs-exemgr
/bin/systemctl start mcs-dmlproc
/bin/systemctl start mcs-ddlproc
exit 0

View File

@ -0,0 +1,14 @@
#!/bin/bash
# This script allows to gracefully shut down MCS
/bin/systemctl stop mcs-dmlproc
/bin/systemctl stop mcs-ddlproc
/bin/systemctl stop mcs-exemgr
/bin/systemctl stop mcs-writeengineserver
/bin/systemctl stop mcs-primproc
/bin/systemctl stop mcs-controllernode
/bin/systemctl stop mcs-workernode
/bin/systemctl stop mcs-storagemanager
exit 0

155
cmapi/engine_files/mcs-loadbrm.py Executable file
View File

@ -0,0 +1,155 @@
#!/usr/bin/env python3
import subprocess
import sys
import xml.etree.ElementTree as ET
from pathlib import Path
import time
import configparser
import os
import datetime
API_CONFIG_PATH = '/etc/columnstore/cmapi_server.conf'
BYPASS_SM_PATH = '/tmp/columnstore_tmp_files/rdwrscratch/BRM_saves'
def get_key():
cmapi_config = configparser.ConfigParser()
cmapi_config.read(API_CONFIG_PATH)
if 'Authentication' not in cmapi_config.sections():
return ''
return cmapi_config['Authentication'].get('x-api-key', '')
def get_version():
return '0.4.0'
def get_port():
return '8640'
if __name__ == '__main__':
# To avoid systemd in container environment
use_systemd = True
if len(sys.argv) > 1:
use_systemd = not sys.argv[1] == 'no'
sm_config = configparser.ConfigParser()
sm_config.read('/etc/columnstore/storagemanager.cnf')
cs_config = ET.parse('/etc/columnstore/Columnstore.xml')
config_root = cs_config.getroot()
storage = sm_config.get('ObjectStorage', 'service')
if storage is None:
storage = 'LocalStorage'
bucket = sm_config.get('S3', 'bucket')
if bucket is None:
bucket = 'some_bucket'
dbrmroot = config_root.find('./SystemConfig/DBRMRoot').text
pmCount = int(config_root.find('./SystemModuleConfig/ModuleCount3').text)
loadbrm = '/usr/bin/load_brm'
brm_saves_current = ''
if storage.lower() == 's3' and not bucket.lower() == 'some_bucket':
# start SM using systemd
if use_systemd is True:
cmd = 'systemctl start mcs-storagemanager'
retcode = subprocess.call(cmd, shell=True)
if retcode < 0:
print('Failed to start storagemanager. \
{} exits with {}.'.format(cmd, retcode))
sys.exit(1)
# delay to allow storagemanager to init
time.sleep(1)
brm = 'data1/systemFiles/dbrm/BRM_saves_current'
config_root.find('./Installation/DBRootStorageType').text = "StorageManager"
config_root.find('./StorageManager/Enabled').text = "Y"
if config_root.find('./SystemConfig/DataFilePlugin') is None:
config_root.find('./SystemConfig').append(ET.Element("DataFilePlugin"))
config_root.find('./SystemConfig/DataFilePlugin').text = "libcloudio.so"
cs_config.write('/etc/columnstore/Columnstore.xml.loadbrm')
os.replace('/etc/columnstore/Columnstore.xml.loadbrm', '/etc/columnstore/Columnstore.xml') # atomic replacement
# Single-node on S3
if storage.lower() == 's3' and not bucket.lower() == 'some_bucket' and pmCount == 1:
try:
print("Running smcat")
brm_saves_current = subprocess.check_output(['smcat', brm])
except subprocess.CalledProcessError as e:
# will happen when brm file does not exist
print('{} does not exist.'.format(brm), file=sys.stderr)
else:
brm = '{}_current'.format(dbrmroot)
# Multi-node
if pmCount > 1:
try:
import requests
requests.packages.urllib3.disable_warnings()
except ImportError as e:
print('requests Python module does not exist. \
Please install CMAPI first.', file=sys.stderr)
sys.exit(1)
try:
primary_address = config_root.find('./DBRM_Controller/IPAddr').text
api_key = get_key()
if len(api_key) == 0:
print('Failed to find API key in {}.'.format(API_CONFIG_PATH), \
file=sys.stderr)
sys.exit(1)
headers = {'x-api-key': api_key}
api_version = get_version()
api_port = get_port()
elems = ['em', 'journal', 'vbbm', 'vss']
for e in elems:
print("Pulling {} from the primary node.".format(e))
url = "https://{}:{}/cmapi/{}/node/meta/{}".format(primary_address, \
api_port, api_version, e)
r = requests.get(url, verify=False, headers=headers, timeout=30)
if (r.status_code != 200):
raise RuntimeError("Error requesting {} from the primary \
node.".format(e))
# To avoid SM storing BRM files
if storage.lower() == 's3' and bucket.lower() != 'some_bucket':
dbrmroot = BYPASS_SM_PATH
if not os.path.exists(dbrmroot):
os.makedirs(dbrmroot)
current_name = '{}_{}'.format(dbrmroot, e)
print ("Saving {} to {}".format(e, current_name))
path = Path(current_name)
path.write_bytes(r.content)
except Exception as e:
print(str(e))
print('Failed to load BRM data from the primary \
node {}.'.format(primary_address), file=sys.stderr)
sys.exit(1)
brm_saves_current = b"BRM_saves\n"
else:
# load local dbrm
try:
brm_saves_current = subprocess.check_output(['cat', brm])
except subprocess.CalledProcessError as e:
# will happen when brm file does not exist
print('{} does not exist.'.format(brm), file=sys.stderr)
if brm_saves_current:
cmd = '{} {}{}'.format(loadbrm, dbrmroot, \
brm_saves_current.decode("utf-8").replace("BRM_saves", ""))
print(f"{datetime.datetime.now()} : Running {cmd}")
try:
retcode = subprocess.call(cmd, shell=True)
if retcode < 0:
print('{} exits with {}.'.format(cmd, retcode))
sys.exit(1)
except OSError as e:
sys.exit(1)

View File

@ -0,0 +1,84 @@
#!/usr/bin/env python3
import subprocess
import sys
import xml.etree.ElementTree as ET
import configparser
XML_CONFIG_PATH = '/etc/columnstore/Columnstore.xml'
SM_CONFIG_PATH = '/etc/columnstore/storagemanager.cnf'
REST_REQUEST_TO = 2
def get_version():
return '0.4.0'
def get_port():
return '8640'
if __name__ == '__main__':
master_addr = ''
pm_count = 0
try:
cs_config = ET.parse(XML_CONFIG_PATH)
config_root = cs_config.getroot()
master_addr = config_root.find('./DBRM_Controller/IPAddr').text
pm_count = int(config_root.find('./SystemModuleConfig/ModuleCount3').text)
except (FileNotFoundError, AttributeError, ValueError) as e:
print("Exception had been raised. Continue anyway")
print(str(e))
storage = 'LocalStorage'
sm_config = configparser.ConfigParser()
files_read = len(sm_config.read(SM_CONFIG_PATH))
if files_read == 1:
storage = sm_config.get('ObjectStorage', 'service')
default_addr = '127.0.0.1'
savebrm = 'save_brm'
is_primary = False
# For multi-node with local storage or default installations
if (storage.lower() != 's3' and master_addr != default_addr) or \
master_addr == default_addr:
is_primary = True
print('Multi-node with local-storage detected.')
else:
has_requests = False
try:
import requests
requests.packages.urllib3.disable_warnings()
has_requests = True
except ImportError as e:
print('requests Python module does not exist. \
Please install CMAPI first.')
if has_requests is True:
try:
print('Requesting for the primary node status.')
api_version = get_version()
api_port = get_port()
url = "https://{}:{}/cmapi/{}/node/primary".format(default_addr, \
api_port, api_version)
resp = requests.get(url,
verify=False,
timeout=REST_REQUEST_TO)
if (resp.status_code != 200):
print("Error sending GET /node/primary.")
else:
is_primary = resp.json()['is_primary'] == 'True'
except:
print('Failed to request.')
print(str(e))
if is_primary is True:
try:
retcode = subprocess.call(savebrm, shell=True)
if retcode < 0:
print('{} exits with {}.'.format(savebrm, retcode))
sys.exit(0)
except OSError as e:
print(str(e))
sys.exit(0)
sys.exit(0)

View File

@ -0,0 +1,13 @@
[Unit]
Description=mcs-workernode
After=mcs-loadbrm.service
[Service]
Type=simple
ExecStart=/usr/bin/workernode DBRM_Worker1 fg
Restart=on-failure
ExecStop=/usr/bin/env bash -c "kill -15 $MAINPID"
ExecStopPost=-/usr/bin/mcs-savebrm.py
ExecStopPost=/usr/bin/env bash -c "clearShm > /dev/null 2>&1"
TimeoutStopSec=120
EnvironmentFile=-/etc/columnstore/systemd.env

View File

View File

@ -0,0 +1,266 @@
# this class handles the comm with the agent; whatever it will be
import datetime
import logging
import threading
import time
logger = logging.getLogger('agent_comm')
# First an agent base class
class AgentBase:
def activateNodes(self, nodes):
print("AgentBase: Got activateNodes({})".format(nodes))
def deactivateNodes(self, nodes):
print("AgentBase: Got deactivateNodes({})".format(nodes))
def movePrimaryNode(self, placeholder):
print("AgentBase: Got movePrimaryNode()")
def enterStandbyMode(self):
print("AgentBase: Got enterStandbyMode()")
def getNodeHealth(self):
print("AgentBase: Got getNodeHealth()")
return 0
def raiseAlarm(self, msg):
print("AgentBase: Got raiseAlarm({})".format(msg))
def startTransaction(self, extra_nodes = [], remove_nodes = []):
print(f"AgentBase: Got startTransaction, extra_nodes={extra_nodes}, remove_nodes={remove_nodes}")
return 0
def commitTransaction(self, txnid, nodes):
print("AgentBase: Got commitTransaction")
def rollbackTransaction(self, txnid, nodes):
print("AgentBase: Got abortTransaction")
class OpAndArgs:
name = None # a callable in AgentBase
args = None # a tuple containing the args for the callable
def __init__(self, name, *args):
self.name = name
self.args = args
def __str__(self):
return f"{str(self.name.__qualname__)}{str(self.args)}"
def __hash__(self):
return hash((self.name.__qualname__, str(self.args)))
def __eq__(self, other):
return self.name == other.name and self.args == other.args
def __ne__(self, other):
return not self.__eq__(other)
def run(self):
self.name(*self.args)
# The AgentComm class
# Doesn't do anything but pass along events to the Agent yet
# TODO: implement an event queue and a thread to pluck events and issue them
# to the agent. Done?
# TODO: de-dup events as they come in from the node monitor,
# add to the event queue\
# TODO: rewrite using builtin Queue class
class AgentComm:
def __init__(self, agent = None):
if agent is None:
self._agent = AgentBase()
else:
self._agent = agent
# deduper contains queue contents, events in progress, and finished
# events up to 10s after they finished
self._deduper = {}
self._die = False
self._queue = []
self._mutex = threading.Lock()
self._thread = None
def __del__(self):
self.die()
def start(self):
self._die = False
self._thread = threading.Thread(target=self._runner, name='AgentComm')
self._thread.start()
# TODO: rename to stop
def die(self):
self._die = True
self._thread.join()
# returns (len-of-event-queue, len-of-deduper)
def getQueueSize(self):
self._mutex.acquire()
ret = (len(self._queue), len(self._deduper))
self._mutex.release()
return ret
def activateNodes(self, nodes):
self._addEvent(self._agent.activateNodes, (nodes))
def deactivateNodes(self, nodes):
self._addEvent(self._agent.deactivateNodes, (nodes))
def movePrimaryNode(self):
self._addEvent(self._agent.movePrimaryNode, ())
def enterStandbyMode(self):
# The other events are moot if this node has to enter standby mode
self._mutex.acquire()
op = OpAndArgs(self._agent.enterStandbyMode, ())
self._queue = [ op ]
self._deduper = { op : datetime.datetime.now() }
self._mutex.release()
def getNodeHealth(self):
return self._agent.getNodeHealth()
def raiseAlarm(self, msg):
self._agent.raiseAlarm(msg)
def _addEvent(self, name, args):
"""Interface to the event queue."""
op = OpAndArgs(name, args)
self._mutex.acquire()
if op not in self._deduper:
self._deduper[op] = None
self._queue.append(op)
self._mutex.release()
def _getEvents(self):
"""
This gets all queued events at once and prunes events older than
10 seconds from the deduper.
"""
self._mutex.acquire()
ret = self._queue
self._queue = []
# prune events that finished more than 10 secs ago from the deduper
tenSecsAgo = datetime.datetime.now() - datetime.timedelta(seconds = 10)
for (op, finishTime) in list(self._deduper.items()):
if finishTime is not None and finishTime < tenSecsAgo:
del self._deduper[op]
self._mutex.release()
return ret
def _requeueEvents(self, events):
self._mutex.acquire()
# events has commands issued before what is currently in _queue
events.extend(self._queue)
self._queue = events
self._mutex.release()
def _markEventsFinished(self, events):
self._mutex.acquire()
now = datetime.datetime.now()
for event in events:
self._deduper[event] = now
self._mutex.release()
def _runner(self):
while not self._die:
try:
self.__runner()
except Exception:
logger.error(
'AgentComm.runner(): got an unrecognised exception.',
exc_info=True
)
if not self._die:
time.sleep(1)
logger.info('AgentComm.runner() exiting normally...')
def __runner(self):
while not self._die:
events = self._getEvents()
logger.trace(f'Get events from queue "{events}".')
if len(events) == 0:
time.sleep(5)
continue
nextPollTime = datetime.datetime.now() + datetime.timedelta(seconds = 5)
nodes_added = set()
nodes_removed = set()
# scan the list of events, put together the extra_nodes and remove_nodes parameters to
# startTransaction(). Note, we could consolidate the activate / deactivate calls here,
# but that's a minor optimization not worth doing yet.
needs_transaction = False
for event in events: # TODO: combine with loop below.
#print(f"got event: {event}")
# determine whether we need a transaction at all.
# List the fcns that require a txn here.
if not needs_transaction and event.name in (
self._agent.activateNodes,
self._agent.deactivateNodes,
self._agent.movePrimaryNode):
needs_transaction = True
if event.name == self._agent.activateNodes:
nodes = event.args[0]
for node in nodes:
nodes_added.add(node)
elif event.name == self._agent.deactivateNodes:
nodes = event.args[0]
for node in nodes:
nodes_removed.add(node)
if needs_transaction:
logger.debug(
'Failover starts transaction to run upcoming event.'
)
(txn_id, nodes) = self._agent.startTransaction(
extra_nodes=list(nodes_added),
remove_nodes=list(nodes_removed)
)
# The problem with this is that it's all-or-nothing
# It would be preferable to commit what has been done up to the point of failure
# and discard the event that failed.
# If the problem is with the event itself, then it may keep happening and block all
# progress.
try:
for event in events:
#print(f"Running {event}")
event.run()
except Exception as e:
logger.error(
'AgentComm.runner(): got an unrecognised exception.',
exc_info=True
)
if needs_transaction:
logger.warning(
f'Aborting transaction {txn_id}',
exc_info=True
)
self._agent.rollbackTransaction(txn_id, nodes=nodes)
# on failure, requeue the events in this batch to pick them up
# again on the next iteration
self._requeueEvents(events)
else:
if needs_transaction:
self._agent.commitTransaction(txn_id, nodes = nodes)
self._markEventsFinished(events)
finishTime = datetime.datetime.now()
if nextPollTime > finishTime:
time.sleep((nextPollTime - finishTime).seconds)

177
cmapi/failover/config.py Normal file
View File

@ -0,0 +1,177 @@
import configparser
import logging
import threading
from os.path import getmtime
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, DEFAULT_SM_CONF_PATH
from mcs_node_control.models.node_config import NodeConfig
class Config:
config_file = ''
# params read from the config file
_desired_nodes = []
_active_nodes = []
_inactive_nodes = []
_primary_node = ''
_my_name = None # derived from config file
config_lock = threading.Lock()
last_mtime = 0
die = False
logger = None
def __init__(self, config_file=DEFAULT_MCS_CONF_PATH):
self.config_file = config_file
self.logger = logging.getLogger()
def getDesiredNodes(self):
self.config_lock.acquire()
self.check_reload()
ret = self._desired_nodes
self.config_lock.release()
return ret
def getActiveNodes(self):
self.config_lock.acquire()
self.check_reload()
ret = self._active_nodes
self.config_lock.release()
return ret
def getInactiveNodes(self):
self.config_lock.acquire()
self.check_reload()
ret = self._inactive_nodes
self.config_lock.release()
return ret
def getAllNodes(self):
"""Returns a 3-element tuple describing the status of all nodes.
index 0 = all nodes in the cluster
index 1 = all active nodes
index 2 = all inactive nodes
"""
self.config_lock.acquire()
self.check_reload()
ret = (self._desired_nodes, self._active_nodes, self._inactive_nodes)
self.config_lock.release()
return ret
def getPrimaryNode(self):
self.config_lock.acquire()
self.check_reload()
ret = self._primary_node
self.config_lock.release()
return ret
def is_shared_storage(self, sm_config_file=DEFAULT_SM_CONF_PATH):
"""Check if SM is S3 or not.
:param sm_config_file: path to SM config,
defaults to DEFAULT_SM_CONF_PATH
:type sm_config_file: str, optional
:return: True if SM is S3 otherwise False
:rtype: bool
TODO: remove in next releases, useless?
"""
sm_config = configparser.ConfigParser()
sm_config.read(sm_config_file)
# only LocalStorage or S3 can be returned for now
storage = sm_config.get(
'ObjectStorage', 'service', fallback='LocalStorage'
)
return storage.lower() == 's3'
def check_reload(self):
"""Check config reload.
Returns True if reload happened, False otherwise.
"""
if self.last_mtime != getmtime(self.config_file):
self.load_config()
return True
return False
def who_am_I(self):
self.config_lock.acquire()
self.check_reload()
ret = self._my_name
self.config_lock.release()
return ret
def load_config(self):
try:
node_config = NodeConfig()
root = node_config.get_current_config_root(self.config_file)
last_mtime = getmtime(self.config_file)
except Exception:
self.logger.warning(
f'Failed to parse config file {self.config_file}.',
exc_info=True
)
return False
node_tmp = root.findall('./DesiredNodes/Node')
if len(node_tmp) == 0:
self.logger.warning(
f'The config file {self.config_file} is missing entries '
'in the DesiredNodes section'
)
return False
desired_nodes = [node.text for node in node_tmp]
active_nodes = [
node.text for node in root.findall('./ActiveNodes/Node')
]
inactive_nodes = [
node.text for node in root.findall('./InactiveNodes/Node')
]
node_tmp = root.find('./PrimaryNode')
if node_tmp is None or len(node_tmp.text) == 0:
self.logger.warning(
f'The config file {self.config_file} is missing a valid '
'PrimaryNode entry'
)
return False
primary_node = node_tmp.text
# find my name in this cluster
names = set(node_config.get_network_addresses_and_names())
all_nodes = set(desired_nodes)
intersection = all_nodes & names
if len(intersection) > 1:
my_name = intersection.pop()
self.logger.warning(
'This node has multiple names in the list of desired nodes, '
'was it added more than once? Some things may not work in '
f'this configuration. Using {my_name} as the name for this '
'node.'
)
elif len(intersection) == 0:
self.logger.warning(
'This node has no entry in the list of desired nodes.'
)
my_name = None
elif len(intersection) == 1:
my_name = intersection.pop()
# handles the initial 0-node special case
if my_name == '127.0.0.1':
my_name = None
self.logger.info(f'Loaded the config file, my name is {my_name}')
desired_nodes.sort()
active_nodes.sort()
inactive_nodes.sort()
self._desired_nodes = desired_nodes
self._active_nodes = active_nodes
self._inactive_nodes = inactive_nodes
self._primary_node = primary_node
self.last_mtime = last_mtime
self._my_name = my_name
return True

View File

@ -0,0 +1,95 @@
from array import array
from threading import Lock
# for tracking the history of heartbeat responses
class InvalidNode:
pass
class HBHistory:
# consts to denote state of the responses
NoResponse = 1
GoodResponse = 2
LateResponse = -1
NewNode = 0
# By default, keep a 600 heartbeat history for each node (10 mins @ 1hb/s)
# and consider a response late if it arrives 3+ ticks late. 3 is an arbitrary small value.
def __init__(self, tickWindow=600, lateWindow=3):
# a list of a heartbeats for each node. index = str, value = array of int,
# history flushes each time threaad restarted
self.nodeHistory = {}
# current tick resets to zero each time thread restarted
self.currentTick = 0
self.lateWindow = lateWindow
self.mutex = Lock()
self.tickWindow = tickWindow
def _initNode(self, node, defaultValue = GoodResponse):
self.nodeHistory[node] = array(
'b', [ defaultValue for _ in range(self.tickWindow) ]
)
def removeNode(self, node):
self.mutex.acquire()
if node in self.nodeHistory:
del self.nodeHistory[node]
self.mutex.release()
def keepOnlyTheseNodes(self, nodes):
self.mutex.acquire()
nodesToKeep = set(nodes)
historicalNodes = set(self.nodeHistory.keys())
for node in historicalNodes:
if node not in nodesToKeep:
del self.nodeHistory[node]
self.mutex.release()
def setCurrentTick(self, tick):
self.mutex.acquire()
self.currentTick = tick
for pongs in self.nodeHistory.values():
pongs[tick % self.tickWindow] = self.NoResponse
self.mutex.release()
def gotHeartbeat(self, node, tickID):
if tickID <= self.currentTick - self.lateWindow:
status = self.LateResponse
else:
status = self.GoodResponse
self.mutex.acquire()
if node not in self.nodeHistory:
self._initNode(node)
self.nodeHistory[node][tickID % self.tickWindow] = status
self.mutex.release()
# defaultValue is used to init a fake history for a node this code is learning about
# 'now'. If a node is inserted into the active list, we do not want to remove
# it right away b/c it hasn't responded to any pings yet. Likewise,
# if a node is inserted into the inactive list, we do not want to activate it
# right away b/c it has responded to all pings sent so far (0). TBD if we want
# to add logic to handle an 'init' value in the history.
def getNodeHistory(self, node, tickInterval, defaultValue = GoodResponse):
self.mutex.acquire()
if node not in self.nodeHistory:
self._initNode(node, defaultValue = defaultValue)
# We don't want to return values in the range where we are likely to be
# gathering responses.
# The return value is the range of heartbeat responses from node from
# tickInterval + lateWindow ticks ago to lateWindow ticks ago
lastIndex = (self.currentTick - self.lateWindow) % self.tickWindow
firstIndex = lastIndex - tickInterval
history = self.nodeHistory[node]
if firstIndex < 0:
ret = history[firstIndex:]
ret.extend(history[:lastIndex])
else:
ret = history[firstIndex:lastIndex]
self.mutex.release()
return ret

View File

@ -0,0 +1,121 @@
import logging
import threading
import time
from socket import socket, SOCK_DGRAM
from struct import pack, unpack_from
class HeartBeater:
port = 9051
dieMsg = b'die!00'
areYouThereMsg = b'AYTM'
yesIAmMsg = b'YIAM'
def __init__(self, config, history):
self.config = config
self.die = False
self.history = history
self.sequenceNum = 0
self.responseThread = None
self.sock = None
self.sockMutex = threading.Lock()
self.logger = logging.getLogger('heartbeater')
def start(self):
self.initSockets()
self.die = False
self.responseThread = threading.Thread(
target=self.listenAndRespond, name='HeartBeater'
)
self.responseThread.start()
def stop(self):
self.die = True
# break out of the recv loop
sock = socket(type=SOCK_DGRAM)
sock.sendto(self.dieMsg, ('localhost', self.port))
time.sleep(1)
self.sock.close()
self.responseThread.join()
def initSockets(self):
self.sock = socket(type=SOCK_DGRAM)
self.sock.bind(('0.0.0.0', self.port))
def listenAndRespond(self):
self.logger.info('Starting the heartbeat listener.')
while not self.die:
try:
self._listenAndRespond()
except Exception:
self.logger.warning(
'Caught an exception while listening and responding.',
exc_info=True
)
time.sleep(1)
self.logger.info('Heartbeat listener exiting normally...')
def _listenAndRespond(self):
(data, remote) = self.sock.recvfrom(300)
if len(data) < 6:
return
(msg_type, seq) = unpack_from('4sH', data, 0)
if msg_type == self.areYouThereMsg:
self.logger.trace(f'Got "are you there?" from {remote[0]}')
name = self.config.who_am_I()
if name is None:
self.logger.warning(
'Heartbeater: got an "are you there?" msg from '
f'{remote[0]}, but this node is not in the list of '
'desired nodes for the cluster. '
'This node needs a config update.'
)
return
bname = name.encode('ascii')
if len(bname) > 255:
bname = bname[:255]
msg = pack(f'4sH{len(bname)}s', self.yesIAmMsg, seq, bname)
self.send(msg, remote[0])
self.logger.trace(f'Send "yes I Am" to {remote[0]}')
elif msg_type == self.yesIAmMsg:
if len(data) > 6:
name = data[6:].decode('ascii')
self.logger.trace(f'Got "yes I am" from {name}')
self.history.gotHeartbeat(name, seq)
def send(self, msg, destaddr):
self.sockMutex.acquire()
try:
self.sock.sendto(msg, (destaddr, self.port))
except Exception:
self.logger.warning(
f'Heartbeater.send(): caught error sending msg to {destaddr}',
exc_info=True
)
finally:
self.sockMutex.release()
def sendHeartbeats(self):
nodes = self.config.getDesiredNodes()
my_name = self.config.who_am_I()
msg = pack('4sH', self.areYouThereMsg, self.sequenceNum)
self.sockMutex.acquire()
for node in nodes:
if node == my_name:
continue
try:
self.logger.trace(f'Send "are you there" node {node}')
self.sock.sendto(msg, (node, self.port))
except Exception as e:
pass
# Suppressing these logs.
# In docker the whole dns entry gets removed when a container
# goes away.
# Ends up spamming the logs until the node is removed from
# the cluster via the rest endpoint, or the node comes back up.
# self.logger.warning("Heartbeater.sendHeartbeats():
# caught an exception sending heartbeat to {}: {}".
# format(node, e))
self.sockMutex.release()
self.sequenceNum = (self.sequenceNum + 1) % 65535
self.history.setCurrentTick(self.sequenceNum)

View File

@ -0,0 +1,230 @@
import logging
import time
import threading
from .heartbeater import HeartBeater
from .config import Config
from .heartbeat_history import HBHistory
from .agent_comm import AgentComm
class NodeMonitor:
def __init__(
self, agent=None, config=None, samplingInterval=30,
flakyNodeThreshold=0.5
):
self._agentComm = AgentComm(agent)
self._die = False
self._inStandby = False
self._testMode = False # TODO: remove
self._hbHistory = HBHistory()
self._logger = logging.getLogger('node_monitor')
self._runner = None
if config is not None:
self._config = config
else:
self._config = Config()
self._hb = HeartBeater(self._config, self._hbHistory)
self.samplingInterval = samplingInterval
# not used yet, KI-V-SS for V1 [old comment from Patrick]
self.flakyNodeThreshold = flakyNodeThreshold
self.myName = self._config.who_am_I()
#self._logger.info("Using {} as my name".format(self.myName))
def __del__(self):
self.stop()
def start(self):
self._agentComm.start()
self._hb.start()
self._die = False
self._runner = threading.Thread(
target=self.monitor, name='NodeMonitor'
)
self._runner.start()
def stop(self):
self._die = True
self._agentComm.die()
if not self._testMode:
self._hb.stop()
self._runner.join()
def _removeRemovedNodes(self, desiredNodes):
self._hbHistory.keepOnlyTheseNodes(desiredNodes)
def _pickNewActor(self, nodes):
if not nodes:
return
if self.myName == nodes[0]:
self._isActorOfCohort = True
else:
self._isActorOfCohort = False
def _chooseNewPrimaryNode(self):
self._agentComm.movePrimaryNode()
def monitor(self):
while not self._die:
try:
self._logger.info('Starting the monitor logic')
self._monitor()
except Exception:
self._logger.error(
f'monitor() caught an exception.',
exc_info=True
)
if not self._die:
time.sleep(1)
self._logger.info("node monitor logic exiting normally...")
def _monitor(self):
"""
This works like the main loop of a game.
1) check current state
2) identify the differences
3) update based on the differences
"""
(desiredNodes, activeNodes, inactiveNodes) = self._config.getAllNodes()
self._pickNewActor(activeNodes)
logged_idleness_msg = False
logged_active_msg = False
inStandbyMode = False
while not self._die:
# these things would normally go at the end of the loop; doing it here
# to reduce line count & chance of missing something as we add more code
oldActiveNodes = activeNodes
wasActorOfCohort = self._isActorOfCohort
self._logger.trace(
f'Previous actor of cohort state is {wasActorOfCohort}'
)
time.sleep(1)
# get config updates
(desiredNodes, activeNodes, inactiveNodes) = self._config.getAllNodes()
self.myName = self._config.who_am_I()
self.primaryNode = self._config.getPrimaryNode()
# remove nodes from history that have been removed from the cluster
self._removeRemovedNodes(desiredNodes)
# if there are less than 3 nodes in the cluster, do nothing
if len(desiredNodes) < 3:
if not logged_idleness_msg:
self._logger.info(
'Failover support is inactive; '
'requires at least 3 nodes and a shared storage system'
)
logged_idleness_msg = True
logged_active_msg = False
elif not logged_active_msg:
self._logger.info(
'Failover support is active, '
f'monitoring nodes {desiredNodes}'
)
logged_active_msg = True
logged_idleness_msg = False
# nothing to do in this case
if len(desiredNodes) == 1:
continue
# has this node been reactivated?
if self.myName in activeNodes:
#TODO: remove useless flag or use it in future releases
self._inStandby = False
# has it been deactivated?
else:
self._logger.trace('Node not in active nodes, do nothing.')
self._inStandby = True
continue # wait to be activated
# send heartbeats
self._hb.sendHeartbeats()
# decide if action is necessary based on config changes
# get the list of nodes no longer responding to heartbeats
# V1: only remove a node that hasn't responded to any pings in the sampling period
deactivateSet = set()
for node in activeNodes:
if node == self.myName:
continue
history = self._hbHistory.getNodeHistory(node, self.samplingInterval, HBHistory.GoodResponse)
self._logger.trace(f'Get history "{history}" for node {node}')
noResponses = [ x for x in history if x == HBHistory.NoResponse ]
if len(noResponses) == self.samplingInterval:
deactivateSet.add(node)
# get the list of nodes that have started responding
# reactivate live nodes that have begun responding to heartbeats
# V1: only reactivate a node if we have good responses for the whole sampling period
activateSet = set()
for node in inactiveNodes:
history = self._hbHistory.getNodeHistory(node, self.samplingInterval, HBHistory.NoResponse)
goodResponses = [ x for x in history if x == HBHistory.GoodResponse ]
if len(goodResponses) == self.samplingInterval:
activateSet.add(node)
# effectiveActiveNodeList can be described as activeNodes after pending config changes
# have been applied. Another way to view it is that it reflects current reality, whereas
# the config file reflects a fixed point in the recent past.
effectiveActiveNodeList = sorted((set(activeNodes) - deactivateSet) | activateSet)
# if there was a change to the list of active nodes
# decide if this node is the effective actor in the cohort.
if effectiveActiveNodeList != activeNodes:
self._pickNewActor(effectiveActiveNodeList)
self._logger.trace(
f'Effective list changed, actor state is {self._isActorOfCohort}'
)
elif oldActiveNodes != activeNodes:
self._pickNewActor(activeNodes)
self._logger.trace(
f'Active list changed, actor state is {self._isActorOfCohort}'
)
# if we are in a cohort that has <= 50% of the desired nodes, enter standby
if len(activeNodes)/len(desiredNodes) <= 0.5 and len(effectiveActiveNodeList)/len(desiredNodes) <= 0.5:
if not inStandbyMode:
msg = "Only {} out of {} nodes are active. At least {} are required. Entering standby mode to protect the system."\
.format(len(activeNodes), len(desiredNodes), int(len(desiredNodes)/2) + 1)
self._agentComm.raiseAlarm(msg)
self._logger.critical(msg)
self._agentComm.enterStandbyMode()
inStandbyMode = True
continue
elif inStandbyMode and len(effectiveActiveNodeList)/len(desiredNodes) > 0.5:
self._logger.info("Exiting standby mode, waiting for config update")
inStandbyMode = False
# (wasActorOfCohort and not isActorOfCohort) indicates that a new Actor has come online.
# To hand over the crown, perform one last act as Actor to add it back to the cluster
# and synchronize its config file.
# if not the actor, nothing else for this node to do
if not self._isActorOfCohort and not wasActorOfCohort:
continue
# as of here, this node is the actor of its quorum
if len(deactivateSet) > 0:
self._agentComm.deactivateNodes(list(deactivateSet))
if len(activateSet) > 0:
self._agentComm.activateNodes(activateSet)
# if the primary node is in this list to be deactivated, or its already on the inactive list
# choose a new primary node. The deadNode list is a sanity check for cases like the cluster
# starting with the primary node already in inactive-nodes.
deadNodeList = list(deactivateSet) + inactiveNodes
if self.primaryNode in deadNodeList:
self._chooseNewPrimaryNode()
# methods for testing
def turnOffHBResponder(self):
self.stop()

View File

@ -0,0 +1,18 @@
<root>
<DesiredNodes>
<Node>node1</Node>
<Node>node2</Node>
<Node>node3</Node>
<Node>node4</Node>
</DesiredNodes>
<ActiveNodes>
<Node>node1</Node>
<Node>node2</Node>
<Node>node3</Node>
</ActiveNodes>
<InactiveNodes>
<Node>node4</Node>
</InactiveNodes>
<PrimaryNode>node2</PrimaryNode>
<ConfigRevision>1</ConfigRevision>
</root>

View File

@ -0,0 +1,129 @@
import unittest
import time
import socket
import datetime
import cherrypy
import os.path
from contextlib import contextmanager
from ..agent_comm import AgentComm
from cmapi_server.failover_agent import FailoverAgent
from mcs_node_control.models.node_config import NodeConfig
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
from cmapi_server.test.unittest_global import create_self_signed_certificate, cert_filename
from cmapi_server import helpers, node_manipulation
config_filename = './cmapi_server/cmapi_server.conf'
@contextmanager
def start_server():
if not os.path.exists(cert_filename):
create_self_signed_certificate()
app = cherrypy.tree.mount(root = None, config = config_filename)
app.config.update({
'/': {
'request.dispatch': dispatcher,
'error_page.default': jsonify_error,
},
'config': {
'path': config_filename,
},
})
cherrypy.config.update(config_filename)
cherrypy.engine.start()
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
yield
cherrypy.engine.exit()
cherrypy.engine.block()
class TestAgentComm(unittest.TestCase):
def test_with_agent_base(self):
agent = AgentComm()
# Add events except for enterStandbyMode
agent.activateNodes(["mysql.com"])
agent.activateNodes(["mysql.com"]) # an intentional dup
agent.designatePrimaryNode("mysql.com")
agent.deactivateNodes(["mysql.com"])
agent.deactivateNodes(["mysql.com"])
agent.designatePrimaryNode(socket.gethostname())
health = agent.getNodeHealth()
agent.raiseAlarm("Hello world!")
print("Waiting up to 20s for queued events to be processed and removed")
stop_time = datetime.datetime.now() + datetime.timedelta(seconds = 20)
success = False
while datetime.datetime.now() < stop_time and not success:
sizes = agent.getQueueSize()
if sizes != (0, 0):
time.sleep(1)
else:
print("Event queue & deduper are now empty")
success = True
print("Waiting for the agent comm thread to die.")
agent.die()
self.assertTrue(success)
# This is the beginnings of an integration test, will need perms to modify the real config file
def test_with_failover_agent(self):
print("\n\n") # make a little whitespace between tests
# check for existence of and permissions to write to the real config file
try:
f = open("/etc/columnstore/Columnstore.xml", "a")
f.close()
except PermissionError:
print(f"Skipping {__name__}, got a permissions error opening /etc/columnstore/Columnstore.xml for writing")
return
success = False
with start_server():
try:
agent = FailoverAgent()
agentcomm = AgentComm(agent)
# make sure the AC thread has a chance to start before we start issuing cmds.
# If it grabs jobs in the middle of this block, we'll try to send the config file
# to mysql.com. :D
time.sleep(1)
# do the same as above.
agentcomm.activateNodes(["mysql.com"])
agentcomm.activateNodes(["mysql.com"]) # an intentional dup
agentcomm.designatePrimaryNode("mysql.com")
agentcomm.deactivateNodes(["mysql.com"])
agentcomm.deactivateNodes(["mysql.com"])
agentcomm.designatePrimaryNode(socket.gethostname())
health = agent.getNodeHealth()
agent.raiseAlarm("Hello world!")
print("Waiting up to 30s for queued events to be processed and removed")
stop_time = datetime.datetime.now() + datetime.timedelta(seconds = 30)
while datetime.datetime.now() < stop_time and not success:
sizes = agentcomm.getQueueSize()
if sizes != (0, 0):
time.sleep(1)
else:
print("Event queue & deduper are now empty")
success = True
if not success:
raise Exception("The event queue or de-duper did not empty within 30s")
agentcomm.die()
except Exception as e:
agentcomm.die()
cherrypy.engine.exit()
cherrypy.engine.block()
raise
# clean up the config file, remove mysql.com
txnid = helpers.start_transaction()
node_manipulation.remove_node("mysql.com")
helpers.update_revision_and_manager()
helpers.broadcast_new_config()
helpers.commit_transaction(txnid)

View File

@ -0,0 +1,83 @@
from .. import config
import time
from socket import *
import struct
import sys
_config = config.Config("failover/test/config-test.xml")
print("got desired_nodes = {}".format(_config.getDesiredNodes()))
print("got active_nodes = {}".format(_config.getActiveNodes()))
print("got inacive_nodes = {}".format(_config.getInactiveNodes()))
print("got all nodes = {}".format(_config.getAllNodes()))
print("got primarynode = {}".format(_config.getPrimaryNode()))
print()
from ..heartbeater import HeartBeater
from ..heartbeat_history import HBHistory
hbh = HBHistory()
hb = HeartBeater(_config, hbh)
hb.start()
sock = socket(type = SOCK_DGRAM)
sock.bind(('localhost', 12345))
# Updated heartbeater to send the reply to its own port, rather than to
# the port of the sending socket. Need to update this.
#msg = struct.pack("4sH", hb.areYouThereMsg, 1234)
#sock.sendto(msg, ('localhost', hb.port))
#print("sent the are-you-there msg")
#(data, remote) = sock.recvfrom(6)
#(data, seq) = struct.unpack("4sH", data)
#if data == hb.yesIAmMsg:
# print("got the yes-i-am msg, seq = {}".format(seq))
#else:
# print("got something other than the yes-i-am-msg")
hb.stop()
#from heartbeat_history import HBHistory
#hbh = HBHistory()
hbh.setCurrentTick(0)
hbh.gotHeartbeat("node1", 0)
hbh.setCurrentTick(1)
hbh.gotHeartbeat("node2", 1)
hbh.setCurrentTick(2)
hbh.setCurrentTick(10)
hbh.gotHeartbeat("node1", 9)
hbh.gotHeartbeat("node1", 2)
pongs = hbh.getNodeHistory("node1", 20)
print("Got pongs: {}".format(pongs))
print('''
This is currently a 'manual' test, meaning the user should watch for the expected output
In this case, because NM's identity checker will return 'node1', and that does not match
node[2-4], those nodes will appear to NodeMonitor to be offline. Our starting condition
is that nodes 1-3 are active, and node4 is inactive. After 15s, nodes 2 & 3
should be deactivated, a new primary node will be chosen, and and our AgentBase will start
printing these events.
''')
def testNodeMonitor1(nm):
nm.start()
print("Waiting for 20 secs, watch for output from AgentBase")
time.sleep(20)
nm.stop()
time.sleep(1)
print("NodeMonitor was stopped, did it produce the right output?")
from ..node_monitor import NodeMonitor
nm = NodeMonitor(config = _config, samplingInterval = 10)
# check whether node[1-4] are in the /etc/hosts file as localhost
addr1 = gethostbyname("node1")
addr2 = gethostbyname("node2")
addr3 = gethostbyname("node3")
addr4 = gethostbyname("node4")
if addr1 == '127.0.0.1' and addr2 == '127.0.0.1' and addr3 == '127.0.0.1' and addr4 == '127.0.0.1':
testNodeMonitor1(nm)
else:
print("Skipping testNodeMonitor1(). node[1-4] needs to be defined as 127.0.0.1 in /etc/hosts")
print("tester is finished")

1
cmapi/mcs.template Executable file
View File

@ -0,0 +1 @@
PYTHONPATH="${CMAPI_DIR}:${CMAPI_DIR}/deps" ${CMAPI_DIR}/python/bin/python3 -m mcs_cluster_tool $@

View File

View File

@ -0,0 +1,30 @@
import logging
import sys
import typer
from cmapi_server.logging_management import dict_config, add_logging_level
from mcs_cluster_tool import cluster_app
from mcs_cluster_tool.constants import MCS_CLI_LOG_CONF_PATH
# don't show --install-completion and --show-completion options in help message
app = typer.Typer(
add_completion=False,
help=(
'The MCS Command Line Interface is a unified tool to manage your '
'MCS services'
),
)
app.add_typer(cluster_app.app, name="cluster")
if __name__ == "__main__":
add_logging_level('TRACE', 5) #TODO: remove when stadalone mode added.
dict_config(MCS_CLI_LOG_CONF_PATH)
logger = logging.getLogger('mcs_cli')
# add separator between cli commands logging
logger.debug(f'{"-":-^80}')
cl_args_line = ' '.join(sys.argv[1:])
logger.debug(f'Called "mcs {cl_args_line}"')
app(prog_name='mcs')

View File

@ -0,0 +1,140 @@
"""Cluster typer application.
Formally this module contains all subcommands for "mcs cluster" cli command.
"""
import logging
from typing import List, Optional
import pyotp
import typer
from cmapi_server.constants import SECRET_KEY
from cmapi_server.handlers.cluster import ClusterHandler
from mcs_cluster_tool.decorators import handle_output
logger = logging.getLogger('mcs_cli')
app = typer.Typer(
help='MariaDB Columnstore cluster management command line tool.'
)
node_app = typer.Typer(help='Cluster nodes management.')
app.add_typer(node_app, name='node')
set_app = typer.Typer(help='Set cluster parameters.')
app.add_typer(set_app, name='set')
@app.command()
@handle_output
def status():
"""Get status information."""
return ClusterHandler.status(logger=logger)
@app.command()
@handle_output
def stop():
"""Stop the Columnstore cluster."""
return ClusterHandler.shutdown(logger=logger)
@app.command()
@handle_output
def start():
"""Start the Columnstore cluster."""
return ClusterHandler.start(logger=logger)
@app.command()
@handle_output
def restart():
"""Restart the Columnstore cluster."""
stop_result = ClusterHandler.shutdown(logger=logger)
if 'error' in stop_result:
return stop_result
result = ClusterHandler.start(logger=logger)
result['stop_timestamp'] = stop_result['timestamp']
return result
@node_app.command()
@handle_output
def add(
nodes: Optional[List[str]] = typer.Option(
...,
'--node', # command line argument name
help=(
'node IP, name or FQDN. '
'Can be used multiple times to add several nodes at a time.'
)
)
):
"""Add nodes to the Columnstore cluster."""
result = []
for node in nodes:
result.append(ClusterHandler.add_node(node, logger=logger))
return result
@node_app.command()
@handle_output
def remove(nodes: Optional[List[str]] = typer.Option(
...,
'--node', # command line argument name
help=(
'node IP, name or FQDN. '
'Can be used multiple times to remove several nodes at a time.'
)
)
):
"""Remove nodes from the Columnstore cluster."""
result = []
for node in nodes:
result.append(ClusterHandler.remove_node(node, logger=logger))
return result
@set_app.command()
@handle_output
def mode(cluster_mode: str = typer.Option(
...,
'--mode',
help=(
'cluster mode to set. '
'"readonly" or "readwrite" are the only acceptable values.'
)
)
):
"""Set Columnstore cluster mode."""
if cluster_mode not in ('readonly', 'readwrite'):
raise typer.BadParameter(
'"readonly" or "readwrite" are the only acceptable modes now.'
)
return ClusterHandler.set_mode(cluster_mode, logger=logger)
@set_app.command()
@handle_output
def api_key(key: str = typer.Option(..., help='API key to set.')):
"""Set API key for communication with cluster nodes via API.
WARNING: this command will affect API key value on all cluster nodes.
"""
if not key:
raise typer.BadParameter('Empty API key not allowed.')
totp = pyotp.TOTP(SECRET_KEY)
return ClusterHandler.set_api_key(key, totp.now(), logger=logger)
@set_app.command()
@handle_output
def log_level(level: str = typer.Option(..., help='Logging level to set.')):
"""Set logging level on all cluster nodes for develop purposes.
WARNING: this could dramatically affect the number of log lines.
"""
if not level:
raise typer.BadParameter('Empty log level not allowed.')
return ClusterHandler.set_log_level(level, logger=logger)

View File

@ -0,0 +1,4 @@
import os
MCS_CLI_ROOT_PATH = os.path.dirname(__file__)
MCS_CLI_LOG_CONF_PATH = os.path.join(MCS_CLI_ROOT_PATH, 'mcs_cli_log.conf')

View File

@ -0,0 +1,35 @@
"""Module contains decorators for typer cli commands."""
import json
import logging
from functools import wraps
import typer
from cmapi_server.exceptions import CMAPIBasicError
def handle_output(func):
"""Decorator for handling output errors and add result to log file."""
@wraps(func)
def wrapper(*args, **kwargs):
logger = logging.getLogger('mcs_cli')
return_code = 1
try:
result = func(*args, **kwargs)
typer.echo(json.dumps(result, indent=2))
logger.debug(f'Command returned: {result}')
return_code = 0
except CMAPIBasicError as err:
typer.echo(err.message, err=True)
logger.error('Error while command execution', exc_info=True)
except typer.BadParameter as err:
logger.error('Bad command line parameter.')
raise err
except Exception:
logger.error(
'Undefined error while command execution',
exc_info=True
)
typer.echo('Unknown error, check the log file.', err=True)
raise typer.Exit(return_code)
return wrapper

View File

@ -0,0 +1,31 @@
{
"version": 1,
"disable_existing_loggers": true,
"formatters": {
"default": {
"format": "%(asctime)s [%(levelname)s] (%(name)s) %(message)s",
"datefmt": "%d/%b/%Y %H:%M:%S"
}
},
"handlers": {
"file": {
"class" : "logging.handlers.RotatingFileHandler",
"formatter": "default",
"filename": "/var/log/mariadb/columnstore/mcs_cli.log",
"mode": "a",
"maxBytes": 1048576,
"backupCount": 10
}
},
"loggers": {
"": {
"level": "DEBUG",
"handlers": ["file"]
},
"mcs_cli": {
"level": "DEBUG",
"handlers": ["file"],
"propagate": false
}
}
}

View File

View File

@ -0,0 +1,269 @@
#!/bin/bash
# TODO: remove in next releases
programname=$0
function usage {
echo "usage: $programname op [service_name] [is_primary]"
echo " op - operation name [start|stop]"
echo " service_name - [mcs-controllernode|mcs-workernode etc]"
echo " is_primary - [0|1]"
exit 1
}
operation=$1
service_name=$2
is_primary=$3
if [[ -z "$operation" || -z "$service_name" || $is_primary -ne 0 && $is_primary -ne 1 ]]; then
usage
fi
LOG_FILE=/var/log/mariadb/columnstore/container-sh.log
start_up_to_workernode() {
# Set Variables
IFLAG=/etc/columnstore/container-initialized
LOG_PREFIX=/var/log/mariadb/columnstore
MCS_INSTALL_PATH=/var/lib/columnstore
MCS_INSTALL_BIN=/usr/bin
PROGS='StorageManager mcs-loadbrm.py workernode'
JEMALLOC_PATH=$(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
if [ -z "$JEMALLOC_PATH" && -f $MCS_INSTALL_PATH/libjemalloc.so.2 ]; then
JEMALLOC_PATH=$MCS_INSTALL_PATH/libjemalloc.so.2
fi
export LD_PRELOAD=$JEMALLOC_PATH
# Intialize Container If Necessary
if [ ! -e $IFLAG ]; then
$MCS_INSTALL_BIN/columnstore-init &>> $LOG_PREFIX/columnstore-init.log
fi
# Verify All Programs Are Available
for i in $PROGS ; do
if [ ! -x $MCS_INSTALL_BIN/$i ] ; then
echo "$i doesn't exist."
exit 1
fi
done
# Start System
echo `date`: start_up_to_workernode\(\)... >> $LOG_FILE
touch $LOG_PREFIX/storagemanager.log && chmod 666 $LOG_PREFIX/storagemanager.log
$MCS_INSTALL_BIN/StorageManager &>> $LOG_PREFIX/storagemanager.log &
echo `date`: StorageManager PID = $! >> $LOG_FILE
sleep 1
echo `date`: loading BRM >> $LOG_FILE
touch $LOG_PREFIX/mcs-loadbrm.log && chmod 666 $LOG_PREFIX/mcs-loadbrm.log
# Argument "no" here means don't use systemd to start SM
$MCS_INSTALL_BIN/mcs-loadbrm.py no >> $LOG_PREFIX/mcs-loadbrm.log 2>&1
touch $LOG_PREFIX/workernode.log && chmod 666 $LOG_PREFIX/workernode.log
$MCS_INSTALL_BIN/workernode DBRM_Worker1 &>> $LOG_PREFIX/workernode.log &
echo `date`: workernode PID = $! >> $LOG_FILE
exit 0
}
start_those_left_at_master() {
# Set Variables
LOG_PREFIX=/var/log/mariadb/columnstore
MCS_INSTALL_PATH=/var/lib/columnstore
MCS_INSTALL_BIN=/usr/bin
# TODO: remove fast fix
# skip check binary for ExeMgr
PROGS='controllernode PrimProc WriteEngineServer DMLProc DDLProc'
JEMALLOC_PATH=$(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
if [ -z "$JEMALLOC_PATH" && -f $MCS_INSTALL_PATH/libjemalloc.so.2 ]; then
JEMALLOC_PATH=$MCS_INSTALL_PATH/libjemalloc.so.2
fi
export LD_PRELOAD=$JEMALLOC_PATH
# Verify All Programs Are Available (except ExeMgr)
for i in $PROGS ; do
if [ ! -x $MCS_INSTALL_BIN/$i ] ; then
echo "$i doesn't exist."
exit 1
fi
done
echo `date`: start_those_left_at_master\(\) >> $LOG_FILE
if [[ $is_primary -eq 1 ]]; then
touch $LOG_PREFIX/controllernode.log && chmod 666 $LOG_PREFIX/controllernode.log
$MCS_INSTALL_BIN/controllernode fg &>> $LOG_PREFIX/controllernode.log &
echo `date`: controllernode PID = $! >> $LOG_FILE
fi
touch $LOG_PREFIX/primproc.log && chmod 666 $LOG_PREFIX/primproc.log
$MCS_INSTALL_BIN/PrimProc &>> $LOG_PREFIX/primproc.log &
echo `date`: PrimProc PID = $! >> $LOG_FILE
sleep 1
if [ -e $MCS_INSTALL_BIN/ExeMgr ] ; then
touch $LOG_PREFIX/exemgr.log && chmod 666 $LOG_PREFIX/exemgr.log
$MCS_INSTALL_BIN/ExeMgr &>> $LOG_PREFIX/exemgr.log &
echo `date`: ExeMgr PID = $! >> $LOG_FILE
fi
touch $LOG_PREFIX/writeengineserver.log && chmod 666 $LOG_PREFIX/writeengineserver.log
$MCS_INSTALL_BIN/WriteEngineServer &>> $LOG_PREFIX/writeengineserver.log &
echo `date`: WriteEngineServer PID = $! >> $LOG_FILE
sleep 3
touch $LOG_PREFIX/dmlproc.log && chmod 666 $LOG_PREFIX/dmlproc.log
$MCS_INSTALL_BIN/DMLProc &>> $LOG_PREFIX/dmlproc.log &
echo `date`: DMLProc PID = $! >> $LOG_FILE
touch $LOG_PREFIX/ddlproc.log && chmod 666 $LOG_PREFIX/ddlproc.log
$MCS_INSTALL_BIN/DDLProc &>> $LOG_PREFIX/ddlproc.log &
echo `date`: DDLProc PID = $! >> $LOG_FILE
exit 0
}
start() {
# Set Variables
IFLAG=/etc/columnstore/container-initialized
LOG_PREFIX=/var/log/mariadb/columnstore
MCS_INSTALL_PATH=/var/lib/columnstore
MCS_INSTALL_BIN=/usr/bin
# TODO: remove fast fix
# skip check binary for ExeMgr
PROGS='StorageManager load_brm workernode controllernode PrimProc WriteEngineServer DMLProc DDLProc'
JEMALLOC_PATH=$(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
if [ -z "$JEMALLOC_PATH" && -f $MCS_INSTALL_PATH/libjemalloc.so.2 ]; then
JEMALLOC_PATH=$MCS_INSTALL_PATH/libjemalloc.so.2
fi
export LD_PRELOAD=$JEMALLOC_PATH
# Intialize Container If Necessary
if [ ! -e $IFLAG ]; then
$MCS_INSTALL_BIN/columnstore-init &>> $LOG_PREFIX/columnstore-init.log
fi
# Verify All Programs Are Available (except ExeMgr)
for i in $PROGS ; do
if [ ! -x $MCS_INSTALL_BIN/$i ] ; then
echo "$i doesn't exist."
exit 1
fi
done
# Start System
echo `date`: start\(\)... >> $LOG_FILE
touch $LOG_PREFIX/storagemanager.log && chmod 666 $LOG_PREFIX/storagemanager.log
$MCS_INSTALL_BIN/StorageManager &>> $LOG_PREFIX/storagemanager.log &
echo `date`: StorageManager PID = $! >> $LOG_FILE
sleep 1
echo `date`: loading BRM >> $LOG_FILE
touch $LOG_PREFIX/mcs-loadbrm.log && chmod 666 $LOG_PREFIX/mcs-loadbrm.log
# Argument "no" here means don't use systemd to start SM
$MCS_INSTALL_BIN/mcs-loadbrm.py no >> $LOG_PREFIX/mcs-loadbrm.log 2>&1
touch $LOG_PREFIX/workernode.log && chmod 666 $LOG_PREFIX/workernode.log
$MCS_INSTALL_BIN/workernode DBRM_Worker2 &>> $LOG_PREFIX/workernode.log &
echo `date`: workernode PID = $! >> $LOG_FILE
sleep 2
if [[ $is_primary -eq 1 ]]; then
touch $LOG_PREFIX/controllernode.log && chmod 666 $LOG_PREFIX/controllernode.log
$MCS_INSTALL_BIN/controllernode fg &>> $LOG_PREFIX/controllernode.log &
echo `date`: controllernode PID = $! >> $LOG_FILE
fi
touch $LOG_PREFIX/primproc.log && chmod 666 $LOG_PREFIX/primproc.log
$MCS_INSTALL_BIN/PrimProc &>> $LOG_PREFIX/primproc.log &
echo `date`: PrimProc PID = $! >> $LOG_FILE
sleep 1
if [ -e $MCS_INSTALL_BIN/ExeMgr ] ; then
touch $LOG_PREFIX/exemgr.log && chmod 666 $LOG_PREFIX/exemgr.log
$MCS_INSTALL_BIN/ExeMgr &>> $LOG_PREFIX/exemgr.log &
echo `date`: ExeMgr PID = $! >> $LOG_FILE
fi
touch $LOG_PREFIX/writeengineserver.log && chmod 666 $LOG_PREFIX/writeengineserver.log
$MCS_INSTALL_BIN/WriteEngineServer &>> $LOG_PREFIX/writeengineserver.log &
echo `date`: WriteEngineServer PID = $! >> $LOG_FILE
sleep 3
if [[ $is_primary -eq 1 ]]; then
touch $LOG_PREFIX/dmlproc.log && chmod 666 $LOG_PREFIX/dmlproc.log
$MCS_INSTALL_BIN/DMLProc &>> $LOG_PREFIX/dmlproc.log &
echo `date`: DMLProc PID = $! >> $LOG_FILE
touch $LOG_PREFIX/ddlproc.log && chmod 666 $LOG_PREFIX/ddlproc.log
$MCS_INSTALL_BIN/DDLProc &>> $LOG_PREFIX/ddlproc.log &
echo `date`: DDLProc PID = $! >> $LOG_FILE
fi
exit 0
}
stop() {
# TODO: remove fast fix
# skip check binary for ExeMgr
PROGS='DMLProc DDLProc WriteEngineServer PrimProc workernode controllernode StorageManager'
MCS_INSTALL_BIN=/usr/bin
LOG_PREFIX=/var/log/mariadb/columnstore
# Stop System
echo `date`: Stopping... >> $LOG_FILE
if [[ ! -z "$(pidof $PROGS)" ]]; then
# Save BRM only on the primary node now.
if [[ ! -z "$(pidof controllernode)" ]]; then
$MCS_INSTALL_BIN/mcs-savebrm.py &>> $LOG_PREFIX/savebrm.log 2>&1
fi
echo `date`: Sending SIGTERM >> $LOG_FILE
kill $(pidof $PROGS) > /dev/null
sleep 3
# Make sure StorageManager had a chance to shutdown clean
counter=1
while [ -n "$(pidof StorageManager)" -a $counter -le 60 ]
do
sleep 1
((counter++))
done
echo `date`: Sending SIGKILL >> $LOG_FILE
kill -9 $(pidof $PROGS) > /dev/null
fi
echo `date`: Clearing SHM >> $LOG_FILE
$MCS_INSTALL_BIN/clearShm
exit 0
}
case "$operation" in
'start')
# We start everything when controllernode starts at primary node and with workernode at non-primary
if [[ $is_primary -eq 1 && "mcs-workernode" == "$service_name" ]]; then
start_up_to_workernode $is_primary
elif [[ $is_primary -eq 1 && "mcs-controllernode" == "$service_name" ]]; then
start_those_left_at_master $is_primary
elif [[ $is_primary -eq 0 && "mcs-workernode" == "$service_name" ]]; then
start $is_primary
fi
;;
'stop')
if [[ $is_primary -eq 1 && "mcs-controllernode" == "$service_name" || $is_primary -eq 0 && "mcs-workernode" == "$service_name" ]]; then
stop
fi
;;
esac

View File

@ -0,0 +1,2 @@
from mcs_node_control.models.node_status import NodeStatus

View File

@ -0,0 +1,220 @@
import logging
import socket
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH
from mcs_node_control.models.dbrm_socket import (
DBRM_COMMAND_BYTES, DEFAULT_HOST, DEFAULT_PORT, DBRMSocketHandler
)
from mcs_node_control.models.node_config import NodeConfig
from mcs_node_control.models.process import Process
# TODO: why we need bitwise shift here? May be constant values?
SYSTEM_STATE_FLAGS = {
"SS_READY": 1 << 0, # 1
"SS_SUSPENDED": 1 << 1, # 2
"SS_SUSPEND_PENDING": 1 << 2, # 4
"SS_SHUTDOWN_PENDING": 1 << 3, # 8
"SS_ROLLBACK": 1 << 4, # 16
"SS_FORCE": 1 << 5, # 32
"SS_QUERY_READY": 1 << 6, # 64
}
module_logger = logging.getLogger()
class DBRM:
"""Class DBRM commands"""
def __init__(
self, root=None, config_filename: str = DEFAULT_MCS_CONF_PATH
):
self.dbrm_socket = DBRMSocketHandler()
self.root = root
self.config_filename = config_filename
def connect(self):
node_config = NodeConfig()
root = self.root or node_config.get_current_config_root(
self.config_filename
)
master_conn_info = node_config.get_dbrm_conn_info(root)
if master_conn_info is None:
module_logger.warning(
'DBRB.connect: No DBRM info in the Columnstore.xml.'
)
dbrm_host = master_conn_info['IPAddr'] or DEFAULT_HOST
dbrm_port = int(master_conn_info['Port']) or DEFAULT_PORT
self.dbrm_socket.connect(dbrm_host, dbrm_port)
def close(self):
self.dbrm_socket.close()
def __enter__(self):
self.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
if exc_type:
return False
return True
def _send_command(self, command_name, command_value=None):
if command_name not in DBRM_COMMAND_BYTES:
module_logger.warning(
f'DBRM._send_command: Wrong command requested {command_name}'
)
return None
module_logger.info(
f'DBRM._send_command: Command {command_name} '
f'was requested with value {command_value}'
)
self.dbrm_socket.send(command_name, command_value)
response_value_bytes = self.dbrm_socket.receive()
if command_name == 'readonly':
reply = int.from_bytes(response_value_bytes, 'little')
else:
# get first byte, it's an error message
err = int.from_bytes(response_value_bytes[:1], 'little')
if err != 0:
module_logger.warning(
f'DBRM._send_command: Command {command_name} '
'returned error on server'
)
raise RuntimeError(
f'Controller Node replied error with code {err} '
f'for command {command_name}'
)
if len(response_value_bytes) < 2:
return None
reply = int.from_bytes(response_value_bytes[1:], 'little')
return reply
def get_system_state(self):
state = self._send_command('get_system_state')
return [
flag_name for flag_name, flag_value in SYSTEM_STATE_FLAGS.items()
# TODO: looks like weird logic? Not readable.
if flag_value & state
]
def _edit_system_state(self, states: list, command: str):
state = 0
# TODO: why we need this? States type is list.
# May be str without loop inside is more appropriate here.
if isinstance(states, str):
states = (states,)
for state_name in states:
if state_name not in SYSTEM_STATE_FLAGS:
module_logger.warning(
f'DBRM.{command}: Wrong system state requested: '
f'{state_name}'
)
continue
# TODO: For that case it's same with simple addition?
# So why we need bitwise OR?
state |= SYSTEM_STATE_FLAGS[state_name]
self._send_command(command, state)
def set_system_state(self, states: list):
self._edit_system_state(states, 'set_system_state')
def clear_system_state(self, states: list):
self._edit_system_state(states, 'clear_system_state')
@staticmethod
def get_dbrm_status():
"""Reads DBRM status
DBRM Block Resolution Manager operates in two modes:
- master
- slave
This method returns the mode of this DBRM node
looking for controllernode process running.
:return: mode of this DBRM node
:rtype: string
"""
if Process.check_process_alive('controllernode'):
return 'master'
return 'slave'
def _get_cluster_mode(self):
"""Get DBRM cluster mode for internal usage.
Returns real DBRM cluster mode from socket response.
"""
# state can be 1(readonly) or 0(readwrite) or exception raised
state = self._send_command('readonly')
if state == 1:
return 'readonly'
elif state == 0:
return 'readwrite'
def get_cluster_mode(self):
"""Get DBRM cluster mode for external usage.
There are some kind of weird logic.
It's requested from management.
TODO: Here we can cause a logic error.
E.g. set non master node to "readwrite" and
we got a "readonly" in return value.
:return: DBRM cluster mode
:rtype: str
"""
real_mode = self._get_cluster_mode()
if self.get_dbrm_status() == 'master':
return real_mode
else:
return 'readonly'
def set_cluster_mode(self, mode):
"""Set cluster mode requested
Connects to the DBRM master's socket and
send a command to set cluster mode.
:rtype: str :error or cluster mode set
"""
if mode == 'readonly':
command = 'set_readonly'
elif mode == 'readwrite':
command = 'set_readwrite'
else:
return ''
_ = self._send_command(command)
return self.get_cluster_mode()
def set_cluster_mode(
mode: str, root=None, config_filename: str = DEFAULT_MCS_CONF_PATH
):
"""Set cluster mode requested
Connects to the DBRM master's socket and send a command to
set cluster mode.
:rtype: str :error or cluster mode set
"""
try:
with DBRM(root, config_filename) as dbrm:
return dbrm.set_cluster_mode(mode)
except (ConnectionRefusedError, RuntimeError, socket.error):
module_logger.warning(
'Cannot establish DBRM connection.', exc_info=True
)
return 'readonly'

View File

@ -0,0 +1,248 @@
import logging
import socket
MAGIC_BYTES = 0x14fbc137.to_bytes(4, 'little')
# value is tuple(command_bytes, command_value_length)
DBRM_COMMAND_BYTES = {
'readonly': ((20).to_bytes(1, 'little'), 0),
'set_readonly': ((14).to_bytes(1, 'little'), 0),
'set_readwrite': ((15).to_bytes(1, 'little'), 0),
'set_system_state': ((55).to_bytes(1, 'little'), 4),
'get_system_state': ((54).to_bytes(1, 'little'), 4),
'clear_system_state': ((57).to_bytes(1, 'little'), 4),
}
DEFAULT_HOST = 'localhost'
DEFAULT_PORT = 8616
SOCK_TIMEOUT = 5
class DBRMSocketHandler():
"""Class for stream socket operations.
Include all logic for detecting bytestream protocol version, reading and
parsing magic inside, getting command bytes and command value length
by command name.
"""
long_strings = None
def __init__(
self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0,
fileno=None
) -> None:
self._socket = None
self._family = family
self._type = type
self._proto = proto
self._fileno = fileno
self._host = None
self._port = None
self._recreate_socket()
@property
def _connect_called(self):
"""Is connect method called previously.
This is the instance state to determine if "connect" method called
previously. This is not quaranteed that connection still alive.
:return: connected state
:rtype: bool
"""
if self._host and self._port:
return True
return False
def _recreate_socket(self) -> None:
"""Create new internal _socket object.
Create\recreate new _socket object and connects to it if was already
connected.
"""
if self._socket is not None:
self._socket.close()
self._socket = socket.socket(
family=self._family, type=self._type,
proto=self._proto, fileno=self._fileno
)
if self._connect_called:
self.connect(self._host, self._port)
def _detect_protocol(self) -> None:
"""Detect dbrm socket bytestream version.
This method normally will be called only in first instance
at first "send" method call.
After that header will be formed and parsed depending on
"long_strings" class variable value.
Sends "readonly" message with "old" protocol version (before MCS 6.2.1)
If timeout error raised, sends message with "new" protocol version
(after MCS 6.2.1) with extra 4 bytes in header.
If both attemts are failed raise RuntimeError and return the
"long_strings" variable to initial state - None.
:raises RuntimeError: [description]
"""
success = False
# check at first old protocol because 5.x.x version got an issue if
# we try to send new format packages.
for long_strings in (False, True):
DBRMSocketHandler.long_strings = long_strings
self.send('readonly')
try:
_ = self.receive()
success = True
break
except (socket.timeout, TimeoutError):
# wrong packet sended could cause errors on the mcs engine side
self._recreate_socket()
continue
if not success:
# something went wrong so return to unknown protocol state
DBRMSocketHandler.long_strings = None
raise RuntimeError(
'Can\'t detect DBRM bytestream protocol version.'
)
else:
dbrm_protocol_version = (
'new' if DBRMSocketHandler.long_strings else 'old'
)
logging.info(
f'Detected "{dbrm_protocol_version}" DBRM bytestream protocol'
)
def _make_msg(self, command_name: str, command_value: int) -> bytes:
"""Make bytes msg by command name and value.
:param command_name: name of a command
:type command_name: str
:param command_value: command value
:type command_value: int or None
:return: msg to send throught socket
:rtype: bytes
"""
command_bytes, command_value_length = DBRM_COMMAND_BYTES[command_name]
data_length = (
command_value_length + len(command_bytes)
).to_bytes(4, 'little')
# bytestream protocol before MCS 6.2.1 version
package_header = MAGIC_BYTES + data_length
if DBRMSocketHandler.long_strings:
# bytestream protocol after MCS 6.2.1 version
long_strings_count = (0).to_bytes(4, 'little')
package_header += long_strings_count
msg_bytes = package_header + command_bytes
if command_value is not None:
msg_bytes += command_value.to_bytes(
command_value_length, 'little'
)
return msg_bytes
def _receive_magic(self):
"""Reads the stream up to the uncompressed magic.
The magic is a constant delimeter that occurs at the begging
of the stream.
"""
data: bytes
recv_data: bytes = b''
while recv_data != MAGIC_BYTES:
data = self._socket.recv(1)
# TODO: advanced error handling
if data == b'':
raise RuntimeError(
'Socket connection broken while receiving magic'
)
recv_data += data
if not MAGIC_BYTES.startswith(recv_data):
recv_data = data
continue
def _receive(self, length: int):
"""Receive raw data from socket by length.
:param length: length in bytes to receive
:type length: int
:raises RuntimeError: if socket connection is broken while receiving
:return: received bytes
:rtype: bytes
"""
chunks = []
bytes_recd = 0
while bytes_recd < length:
chunk = self._socket.recv(min(length - bytes_recd, 2048))
if chunk == b'':
raise RuntimeError(
'Socket connection broken while receiving data.'
)
chunks.append(chunk)
bytes_recd += len(chunk)
return b''.join(chunks)
def _send(self, msg: bytes):
"""Send msg in bytes through the socket.
:param msg: string in bytes to send
:type msg: bytes
:raises RuntimeError: if connection is broken while sending
"""
totalsent = 0
while totalsent < len(msg):
sent = self._socket.send(msg[totalsent:])
if sent == 0:
raise RuntimeError(
'DBRM socket connection broken while sending.'
)
totalsent = totalsent + sent
def connect(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT):
"""Connect to socket.
By default it connects with DBRM master.
"""
self._host = host
self._port = port
self._socket.settimeout(SOCK_TIMEOUT)
self._socket.connect((host, port))
def close(self):
"""Closing the socket.
Set _host and _port instance variables to None to change state to
not connected. Then close the _socket.
"""
self._host = None
self._port = None
self._socket.close()
def send(self, command_name: str, command_value: int = None):
"""Top level send by command name and value.
param command_name: name of a command
:type command_name: str
:param command_value: command value, defaults to None
:type command_value: int, optional
"""
if DBRMSocketHandler.long_strings is None:
self._detect_protocol()
msg_bytes = self._make_msg(command_name, command_value)
self._send(msg_bytes)
def receive(self):
"""Top level method to receive data from socket.
Automatically reads the magic and data length from data header.
:return: received bytes without header
:rtype: bytes
"""
self._receive_magic()
data_length = int.from_bytes(self._receive(4), 'little')
if DBRMSocketHandler.long_strings:
# receive long strings count to meet new bytestream protocol
# requirements (after MCS 6.2.1 release)
long_strings_count_bytes = self._receive(4)
data_bytes = self._receive(data_length)
return data_bytes

View File

@ -0,0 +1,114 @@
from __future__ import annotations
import logging
from pathlib import Path
from lxml import etree
from cmapi_server.constants import (
DEFAULT_MCS_CONF_PATH, MCS_DATA_PATH, MCS_MODULE_FILE_PATH,
)
module_logger = logging.getLogger()
def read_module_id():
"""Retrieves module ID from MCS_MODULE_FILE_PATH.
:rtype: int : seconds
"""
module_file = Path(MCS_MODULE_FILE_PATH)
return int(module_file.read_text()[2:])
# TODO: Useless for now, newer called in code
# Nodeconfig.apply_config doing this.
def set_module_id(module_id: int = 1):
"""Sets current module ID from MCS_MODULE_FILE_PATH.
:rtype: int : seconds
"""
module_file = Path(MCS_MODULE_FILE_PATH)
return module_file.write_text(f'pm{module_id}\n')
def get_dbroots_list(path: str = MCS_DATA_PATH):
"""searches for services
The method returns numeric ids of dbroots available.
:rtype: generator of ints
"""
func_name = 'get_dbroots_list'
path = Path(path)
for child in path.glob('data[1-9]*'):
dir_list = str(child).split('/') # presume Linux only
dbroot_id = int(''.join(list(filter(str.isdigit, dir_list[-1]))))
module_logger.debug(f'{func_name} The node has dbroot {dbroot_id}')
yield dbroot_id
def get_workernodes() -> dict[dict[str, int]]:
"""Get workernodes list.
Returns a list of network address of all workernodes.
This is an equivalent of all nodes.
:return: workernodes dict
:rtype: dict[dict[str, int]]
"""
# TODO: fix in MCOL-5147, get xml path from class that will handle xml
root = current_config_root()
workernodes = {}
# searches for all tags starts with DBRM_Worker, eg DBRM_Worker1
workernodes_elements = root.xpath(
"//*[starts-with(local-name(), 'DBRM_Worker')]"
)
for workernode_el in workernodes_elements:
workernode_ip = workernode_el.find('./IPAddr').text
if workernode_ip == '0.0.0.0':
# skip elements with specific ip
continue
try:
workernode_port = int(workernode_el.find('./Port').text)
except (AttributeError, ValueError):
# AttributeError for not found Port tag, so got None.text
# ValueError for non numeric values in tag text
module_logger.error(
'No Port tag found or wrong Port value for tag '
f'"{workernode_el.tag}".'
)
workernode_port = 8700
workernodes[workernode_el.tag] = {
'IPAddr': workernode_ip, 'Port': workernode_port
}
return workernodes
def get_dbrm_master(config_filename: str = DEFAULT_MCS_CONF_PATH) -> dict:
"""Get DBRM master ip and port.
:param config_filename: path to xml conf, defaults to DEFAULT_MCS_CONF_PATH
:type config_filename: str, optional
:return: ipaddress and port of DBRM master
:rtype: dict
"""
# TODO: fix in MCOL-5147, get xml path from class that will handle xml
# Use NodeConfig class as a template?
root = current_config_root(config_filename)
return {
'IPAddr': root.find("./DBRM_Controller/IPAddr").text,
'Port': root.find("./DBRM_Controller/Port").text
}
def current_config_root(config_filename: str = DEFAULT_MCS_CONF_PATH):
"""Retrievs current configuration
Read the config and returns Element
:rtype: lxml.Element
"""
parser = etree.XMLParser(load_dtd=True)
tree = etree.parse(config_filename, parser=parser)
return tree.getroot()

View File

@ -0,0 +1,114 @@
# Based on https://gist.github.com/provegard/1536682, which was
# Based on getifaddrs.py from pydlnadms [http://code.google.com/p/pydlnadms/].
# Only tested on Linux!
# WARNING: Not working on Mac OS (tested on 10.12 Sierra)
# TODO: move to psutil lib
from socket import AF_INET, AF_INET6, inet_ntop
from ctypes import (
Structure, Union, POINTER,
pointer, get_errno, cast,
c_ushort, c_byte, c_void_p, c_char_p, c_uint, c_int, c_uint16, c_uint32
)
import ctypes.util
import ctypes
class struct_sockaddr(Structure):
_fields_ = [
('sa_family', c_ushort),
('sa_data', c_byte * 14),]
class struct_sockaddr_in(Structure):
_fields_ = [
('sin_family', c_ushort),
('sin_port', c_uint16),
('sin_addr', c_byte * 4)]
class struct_sockaddr_in6(Structure):
_fields_ = [
('sin6_family', c_ushort),
('sin6_port', c_uint16),
('sin6_flowinfo', c_uint32),
('sin6_addr', c_byte * 16),
('sin6_scope_id', c_uint32)]
class union_ifa_ifu(Union):
_fields_ = [
('ifu_broadaddr', POINTER(struct_sockaddr)),
('ifu_dstaddr', POINTER(struct_sockaddr)),]
class struct_ifaddrs(Structure):
pass
struct_ifaddrs._fields_ = [
('ifa_next', POINTER(struct_ifaddrs)),
('ifa_name', c_char_p),
('ifa_flags', c_uint),
('ifa_addr', POINTER(struct_sockaddr)),
('ifa_netmask', POINTER(struct_sockaddr)),
('ifa_ifu', union_ifa_ifu),
('ifa_data', c_void_p),]
libc = ctypes.CDLL(ctypes.util.find_library('c'))
def ifap_iter(ifap):
ifa = ifap.contents
while True:
yield ifa
if not ifa.ifa_next:
break
ifa = ifa.ifa_next.contents
def getfamaddr(sa):
family = sa.sa_family
addr = None
if family == AF_INET:
sa = cast(pointer(sa), POINTER(struct_sockaddr_in)).contents
addr = inet_ntop(family, sa.sin_addr)
elif family == AF_INET6:
sa = cast(pointer(sa), POINTER(struct_sockaddr_in6)).contents
addr = inet_ntop(family, sa.sin6_addr)
return family, addr
class NetworkInterface(object):
def __init__(self, name):
self.name = name
self.index = libc.if_nametoindex(name)
self.addresses = {}
def __str__(self):
return "%s [index=%d, IPv4=%s, IPv6=%s]" % (
self.name, self.index,
self.addresses.get(AF_INET),
self.addresses.get(AF_INET6))
def get_network_interfaces():
ifap = POINTER(struct_ifaddrs)()
result = libc.getifaddrs(pointer(ifap))
if result != 0:
raise OSError(get_errno())
del result
try:
retval = {}
for ifa in ifap_iter(ifap):
name = ifa.ifa_name.decode("UTF-8")
i = retval.get(name)
if not i:
i = retval[name] = NetworkInterface(name)
family, addr = getfamaddr(ifa.ifa_addr.contents)
if addr:
if family not in i.addresses:
i.addresses[family] = list()
i.addresses[family].append(addr)
return retval.values()
finally:
libc.freeifaddrs(ifap)

View File

@ -0,0 +1,574 @@
import configparser
import grp
import logging
import pwd
import re
import socket
from os import mkdir, replace, chown
from pathlib import Path
from shutil import copyfile
from xml.dom import minidom # to pick up pretty printing functionality
from lxml import etree
from cmapi_server.constants import (
DEFAULT_MCS_CONF_PATH, DEFAULT_SM_CONF_PATH,
MCS_MODULE_FILE_PATH,
)
# from cmapi_server.managers.process import MCSProcessManager
from mcs_node_control.models.misc import (
read_module_id, get_dbroots_list
)
from mcs_node_control.models.network_ifaces import get_network_interfaces
module_logger = logging.getLogger()
class NodeConfig:
"""Class to operate with the configuration file.
The class instance applies new config or retrives current.
config_filename and output_filename allow tests to override
the input & output of this fcn
The output in this case may be a config file upgraded to version 1.
"""
def get_current_config_root(
self, config_filename: str = DEFAULT_MCS_CONF_PATH, upgrade=True
):
"""Retrievs current configuration.
Read the config and returns Element.
TODO: pretty the same function in misc.py - review
:rtype: lxml.Element
"""
parser = etree.XMLParser(load_dtd=True)
tree = etree.parse(config_filename, parser=parser)
self.upgrade_config(tree=tree, upgrade=upgrade)
return tree.getroot()
def get_root_from_string(self, config_string: str):
root = etree.fromstring(config_string)
self.upgrade_config(root=root)
return root
def upgrade_from_v0(self, root):
revision = etree.SubElement(root, 'ConfigRevision')
revision.text = '1'
cluster_manager = etree.SubElement(root, 'ClusterManager')
cluster_manager.text = str(self.get_module_net_address(root=root))
cluster_name = etree.SubElement(root, 'ClusterName')
cluster_name.text = 'MyCluster'
# Need to get the addresses/host names of all nodes.
# Should all be listed as DBRM_worker nodes
addrs = set()
num = 1
max_node = 1
while True:
node = root.find(f'./DBRM_Worker{num}/IPAddr')
if node is None:
break
if node.text != '0.0.0.0':
addrs.add(node.text)
if max_node < num:
max_node = num
num += 1
# NextNodeId can be derived from the max DBRM_worker entry with non-0
# ip address
next_node_id = etree.SubElement(root, 'NextNodeId')
next_node_id.text = str(max_node + 1)
# NextDBRootId is the max current dbroot in use + 1
num = 1
max_dbroot = 1
while num < 100:
node = root.find(f'./SystemConfig/DBRoot{num}')
if node is not None:
max_dbroot = num
num += 1
next_dbroot_id = etree.SubElement(root, 'NextDBRootId')
next_dbroot_id.text = str(max_dbroot + 1)
# The current primary node is listed under DBRMControllerNode.
# Might as well start with that.
primary_node_addr = root.find('./DBRM_Controller/IPAddr').text
# Put them all in the DesiredNodes and ActiveNodes sections
desired_nodes = etree.SubElement(root, 'DesiredNodes')
active_nodes = etree.SubElement(root, 'ActiveNodes')
for addr in addrs:
node = etree.SubElement(desired_nodes, 'Node')
node.text = addr
node = etree.SubElement(active_nodes, 'Node')
node.text = addr
# Add an empty InactiveNodes section and set the primary node addr
inactive_nodes = etree.SubElement(root, 'InactiveNodes')
primary_node = etree.SubElement(root, 'PrimaryNode')
primary_node.text = primary_node_addr
# Add Maintenance tag and set to False
maintenance = etree.SubElement(root, 'Maintenance')
maintenance.text = str(False).lower()
def upgrade_config(self, tree=None, root=None, upgrade=True):
"""
Add the parts that might be missing after an upgrade from an earlier
version.
.. note:: one or the other optional parameter should be specified (?)
"""
if root is None and tree is not None:
root = tree.getroot()
rev_node = root.find('./ConfigRevision')
if rev_node is None and upgrade:
self.upgrade_from_v0(root)
# as we add revisions, add add'l checks on rev_node.text here
def write_config(self, tree, filename=DEFAULT_MCS_CONF_PATH):
tmp_filename = filename + ".cmapi.tmp"
with open(tmp_filename, "w") as f:
f.write(self.to_string(tree))
replace(tmp_filename, filename) # atomic replacement
def to_string(self, tree):
# TODO: try to use lxml to do this to avoid the add'l dependency
xmlstr = minidom.parseString(etree.tostring(tree)).toprettyxml(
indent=" "
)
# fix annoying issue of extra newlines added by toprettyxml()
xmlstr = '\n'.join([
line.rstrip() for line in xmlstr.split('\n') if line.strip() != ""
])
return xmlstr
def get_dbrm_conn_info(self, root=None):
"""Retrievs current DBRM master IP and port
Read the config and returns a dict with the connection information.
:rtype: dict
"""
if root is None:
return None
addr = ''
port = 0
for el in root:
if el.tag == 'DBRM_Controller':
for subel in el:
if subel.tag == 'IPAddr':
addr = subel.text
elif subel.tag == 'Port':
port = subel.text
return {'IPAddr': addr, 'Port': port}
return None
def apply_config(
self, config_filename: str = DEFAULT_MCS_CONF_PATH,
xml_string: str = None, sm_config_filename: str = None,
sm_config_string: str = None
):
"""Applies the configuration WIP.
Instance iterates over the xml nodes.
: param config_filename: string 4 testing
: param xml_string: string
:rtype: bool
"""
if xml_string is None:
return
current_root = self.get_current_config_root(config_filename)
parser = etree.XMLParser(load_dtd=True)
new_root = etree.fromstring(xml_string, parser=parser)
try:
# We don't change module ids for non-single nodes.
# if self.is_single_node(root=current_root):
# set_module_id(self.get_new_module_id(new_root))
# make sure all of the dbroot directories exist on this node
for dbroot in self.get_all_dbroots(new_root):
try:
node = new_root.find(f'./SystemConfig/DBRoot{dbroot}')
mkdir(node.text, mode=0o755)
# if we are using the systemd dispatcher we need to change
# ownership on any created dirs to mysql:mysql
# TODO: remove conditional once container dispatcher will
# use non-root by default
# TODO: what happened if we change ownership in container?
# check the container installations works as expected
# from cmapi_server.managers.process import MCSProcessManager
# if MCSProcessManager.dispatcher_name == 'systemd':
uid = pwd.getpwnam('mysql').pw_uid
gid = grp.getgrnam('mysql').gr_gid
chown(node.text, uid, gid)
except FileExistsError:
pass
# Save current config
config_file = Path(config_filename)
config_dir = config_file.resolve().parent
copyfile(
config_file, f'{config_dir}/{config_file.name}.cmapi.save'
)
# Save new config
self.write_config(tree=new_root, filename=config_filename)
# Save current and new storagemanager config
if sm_config_string and sm_config_filename:
sm_config_file = Path(sm_config_filename)
sm_config_dir = sm_config_file.resolve().parent
copyfile(
sm_config_file,
f'{sm_config_dir}/{sm_config_file.name}.cmapi.save'
)
with open(sm_config_filename, 'w') as sm_config_file:
sm_config_file.write(sm_config_string)
# TODO: review
# figure out what to put in the 'module' file to make
# the old oam library happy
module_file = None
try:
pm_num = self.get_current_pm_num(new_root)
with open(MCS_MODULE_FILE_PATH, 'w') as module_file:
module_file.write(f'pm{pm_num}\n')
module_logger.info(
f'Wrote "pm{pm_num}" to {MCS_MODULE_FILE_PATH}'
)
except Exception:
module_logger.error(
'Failed to get or set this node\'s pm number.\n'
'You may observe add\'l errors as a result.\n',
exc_info=True
)
except:
# Raise an appropriate exception
module_logger.error(
f'{self.apply_config.__name__} throws an exception.'
'The original config must be restored by '
'explicit ROLLBACK command or timeout.',
exc_info=True
)
raise
def in_active_nodes(self, root):
my_names = set(self.get_network_addresses_and_names())
active_nodes = [
node.text for node in root.findall("./ActiveNodes/Node")
]
for node in active_nodes:
if node in my_names:
return True
return False
def get_current_pm_num(self, root):
# Find this node in the Module* tags, return the module number
my_names = set(self.get_network_addresses_and_names())
smc_node = root.find("./SystemModuleConfig")
pm_count = int(smc_node.find("./ModuleCount3").text)
for pm_num in range(1, pm_count + 1):
ip_addr = smc_node.find(f"./ModuleIPAddr{pm_num}-1-3").text
name = smc_node.find(f"./ModuleHostName{pm_num}-1-3").text
if ip_addr in my_names:
module_logger.info(f" -- Matching against ModuleIPAddr{pm_num}-1-3, which says {ip_addr}")
return pm_num
if name in my_names:
module_logger.info(f" -- Matching against ModuleHostName{pm_num}-1-3, which says {name}")
return pm_num
raise Exception("Did not find my IP addresses or names in the SystemModuleConfig section")
def rollback_config(self, config_filename: str = DEFAULT_MCS_CONF_PATH):
"""Rollback the configuration.
Copyback the copy of the configuration file.
: param config_filename: Columnstore config file path
:rtype: dict
"""
# TODO: Rollback doesn't restart needed processes?
config_file = Path(config_filename)
config_dir = config_file.resolve().parent
backup_path = f"{config_dir}/{config_file.name}.cmapi.save"
config_file_copy = Path(backup_path)
if config_file_copy.exists():
replace(backup_path, config_file) # atomic replacement
def get_current_config(self, config_filename: str = DEFAULT_MCS_CONF_PATH):
"""Retrievs current configuration.
Read the config and convert it into bytes string.
:rtype: string
..TODO: fix using self.get_current_config_root()
"""
parser = etree.XMLParser(load_dtd=True)
tree = etree.parse(config_filename, parser=parser)
self.upgrade_config(tree=tree)
# TODO: Unicode? UTF-8 may be?
return etree.tostring(
tree.getroot(), pretty_print=True, encoding='unicode'
)
def get_current_sm_config(
self, config_filename: str = DEFAULT_SM_CONF_PATH
) -> str:
"""Retrievs current SM configuration
Read the config and convert it into a string.
:rtype: str
"""
func_name = 'get_current_sm_config'
sm_config_path = Path(config_filename)
try:
return sm_config_path.read_text(encoding='utf-8')
except FileNotFoundError:
module_logger.error(f"{func_name} SM config {config_filename} not found.")
return ''
def s3_enabled(self, config_filename: str = DEFAULT_SM_CONF_PATH) -> bool:
"""Checks if SM is enabled
Reads SM config and checks if storage set to S3.
It also checks for additional settings in the XML that must be set too.
:rtype: bool
"""
func_name = 's3_enabled'
sm_config = configparser.ConfigParser()
if len(sm_config.read(config_filename)) > 0:
storage = sm_config.get('ObjectStorage', 'service')
if storage is None:
storage = 'LocalStorage'
if storage.lower() == 's3':
config_root = self.get_current_config_root()
if not config_root.find('./Installation/DBRootStorageType').text.lower() == "storagemanager":
module_logger.error(f"{func_name} DBRootStorageType.lower() != storagemanager")
if not config_root.find('./StorageManager/Enabled').text.lower() == "y":
module_logger.error(f"{func_name} StorageManager/Enabled.lower() != y")
if not config_root.find('./SystemConfig/DataFilePlugin').text == "libcloudio.so":
module_logger.error(f"{func_name} SystemConfig/DataFilePlugin != libcloudio.so")
return True
else:
module_logger.error(f"{func_name} SM config {config_filename} not found.")
return False
def get_network_addresses(self):
"""Retrievs the list of the network addresses
Generator that yields network interface addresses.
:rtype: str
"""
for ni in get_network_interfaces():
for fam in [socket.AF_INET, socket.AF_INET6]:
addrs = ni.addresses.get(fam)
if addrs is not None:
for addr in addrs:
yield(addr)
def get_network_addresses_and_names(self):
"""Retrievs the list of the network addresses, hostnames, and aliases
Generator that yields network interface addresses, hostnames, and aliases
:rtype: str
"""
for ni in get_network_interfaces():
for fam in [socket.AF_INET, socket.AF_INET6]:
addrs = ni.addresses.get(fam)
if addrs is not None:
for addr in addrs:
yield(addr)
try:
(host, aliases, _) = socket.gethostbyaddr(addr)
except:
continue
yield host
for alias in aliases:
yield alias
def is_primary_node(self, root=None):
"""Checks if this node is the primary node.
Reads the config and compares DBRM_Controller IP or
hostname with the this node's IP and hostname.
:rtype: bool
"""
if root is None:
root = self.get_current_config_root()
primary_address = self.get_dbrm_conn_info(root)['IPAddr']
return primary_address in self.get_network_addresses_and_names()
def is_single_node(self,
root=None):
"""Checks if this node is the single node.
Reads the config and compares DBRMMaster IP with the predefined localhost addresses.
:rtype: bool
"""
if root is None:
root = self.get_current_config_root()
master_address = self.get_dbrm_conn_info(root)['IPAddr']
if master_address in ['127.0.0.1', 'localhost', '::1']:
return True
return False
def get_new_module_id(self, new_root=None):
"""Retrieves new module id.
Reads new XML config and searches IP belongs to this host in SystemModuleConfig.ModuleIPAddrX-1-3. X is the new module id.
:rtype: int
"""
func_name = 'get_new_module_id'
current_module_id = read_module_id()
if new_root is None:
module_logger.error(f'{func_name} Empty new XML tree root.')
return current_module_id
net_address = self.get_module_net_address(new_root, current_module_id)
# Use getaddrinfo in case of IPv6
if net_address is None:
module_logger.error(f'{func_name} Columnstore.xml has unknown value in SystemModuleConfig.\
ModuleIPAddr{current_module_id}-1-3.')
raise RuntimeError('net_address is None.')
if socket.gethostbyname(net_address) in self.get_network_addresses():
return current_module_id
# Use getaddrinfo in case of IPv6
# This fires for a added node when node id changes from 1 to something
for module_entry in self.get_modules_addresses(new_root):
if module_entry['addr'] is not None:
net_addr = socket.gethostbyname(module_entry['addr'])
if net_addr in self.get_network_addresses():
module_logger.debug(f'{func_name} New module id \
{module_entry["id"]}')
return int(module_entry['id'])
module_logger.error(f'{func_name} Cannot find new module id for \
the node.')
raise RuntimeError('Fail to find module id.')
def get_module_net_address(self, root=None, module_id: int = None):
"""Retrieves the module network address.
Reads new XML config and returns IP or
hostname from SystemModuleConfig.ModuleIPAddrX-1-3.
:rtype: string
"""
func_name = 'get_module_net_address'
if module_id is None:
module_id = read_module_id()
if root is None:
module_logger.error(f'{func_name} Empty XML root.')
return
for el in root:
if el.tag == 'SystemModuleConfig':
for subel in el:
if subel.tag == f'ModuleIPAddr{module_id}-1-3':
module_logger.debug(
f'{func_name} Module {module_id} '
f'network address {subel.text}'
)
return subel.text
module_logger.error(f'{func_name} Module {module_id} was not found.')
return
def get_modules_addresses(self, root=None):
"""Retrieves the modules network addresses.
Reads new XML config and returns IP or hostname from
SystemModuleConfig.ModuleIPAddrX-1-3 with X being a node id.
:rtype: dict
"""
func_name = 'get_module_addresses'
if root is None:
module_logger.error(f'{func_name} Empty XML root.')
return None
regex_string = 'ModuleIPAddr[0-9]+-1-3'
for el in root:
if el.tag == 'SystemModuleConfig':
for subel in el:
module_ip_m = re.match(regex_string, subel.tag)
if module_ip_m is not None:
id_m = re.search('[0-9]+', module_ip_m.group(0))
module_id = id_m.group(0)
module_logger.debug(
f'{func_name} Module {module_id} '
f'network address {subel.text}'
)
yield {'addr': subel.text, 'id': module_id}
module_logger.error(f'{func_name} Module {module_id} was not found.')
return None
def dbroots_to_create(self, root=None, module_id:int=None):
"""Generates dbroot ids if there are new dbroots to be created/renamed
Reads new XML config and generates dbroot ids if on-disk dbroots differs from the config's set.
:rtype: generator of strings
"""
func_name = 'dbroots_to_create'
if module_id is None:
module_id = read_module_id()
if root is None:
module_logger.error(f'{func_name} Empty XML root.')
return
current_dbroot_list = get_dbroots_list()
regex_string = f'ModuleDBRootID{module_id}-[0-9]+-3'
for el in root:
if el.tag == 'SystemModuleConfig':
for subel in el:
if re.match(regex_string, subel.tag) is not None and \
int(subel.text) not in current_dbroot_list:
module_logger.debug(f'{func_name} Module {module_id} \
has dbroot {subel.text}')
yield int(subel.text)
return
def get_all_dbroots(self, root):
dbroots = []
smc_node = root.find("./SystemModuleConfig")
mod_count = int(smc_node.find("./ModuleCount3").text)
for i in range(1, mod_count+1):
for j in range(1, int(smc_node.find(f"./ModuleDBRootCount{i}-3").text) + 1):
dbroots.append(smc_node.find(f"./ModuleDBRootID{i}-{j}-3").text)
return dbroots

View File

@ -0,0 +1,91 @@
import logging
import socket
from cmapi_server.constants import MCS_DATA_PATH, MCS_MODULE_FILE_PATH
from mcs_node_control.models.dbrm import DBRM
from mcs_node_control.models.misc import get_dbroots_list, read_module_id
from mcs_node_control.models.process import get_host_uptime
PROC_NAMES = ['ExeMgr', 'PrimProc', 'WriteEngine', 'controllernode',
'workernode', 'cmagent', 'DMLProc', 'DDLProc']
module_logger = logging.getLogger()
class NodeStatus:
"""Class to tell the status of the node.
Inspects runtime of the cluster and OS and returns its observations.
"""
def get_cluster_mode(self):
"""Reads cluster mode.
Cluster can be in readwrite or readonly modes. It can be also ready or
not ready but it is not important at this point. We pesume if there is
no connection with DBRM master then the cluster is readonly.
TODO:
- Is it ok to have those method here in NodeStatus?
Move to DBRM.
- pass 'root' and config_filename arguments
(likewise dbrm.set_cluster_mode)
:rtype: string
"""
try:
with DBRM() as dbrm:
return dbrm.get_cluster_mode()
except (ConnectionRefusedError, RuntimeError, socket.error):
module_logger.error(
'Cannot establish or use DBRM connection.',
exc_info=True
)
return 'readonly'
def get_dbrm_status(self):
"""reads DBRM status
DBRM Block Resolution Manager operates in two modes:
master and slave. This m() returns the mode of this node
looking for controllernode process running.
:rtype: string
"""
return DBRM.get_dbrm_status()
def get_dbroots(self, path:str = MCS_DATA_PATH):
"""searches for services
The method returns numeric ids of dbroots available.
:rtype: generator of ints
"""
for id in get_dbroots_list(path):
yield id
def get_host_uptime(self):
"""Retrieves uptime in seconds.
:rtype: int : seconds
"""
return get_host_uptime()
def get_module_id(self):
"""Retrieves module ID from MCS_MODULE_FILE_PATH.
:rtype: int : seconds
"""
func_name = 'get_module_id'
try:
module_id = read_module_id()
except FileNotFoundError:
module_id = 0
module_logger.error(
f'{func_name} {MCS_MODULE_FILE_PATH} file is absent.'
)
return module_id

View File

@ -0,0 +1,110 @@
import os
import time
import psutil
PROCFS_PATH = '/proc/' # Linux only
def open_binary(fname, **kwargs):
return open(fname, "rb", **kwargs)
def get_host_uptime():
"""
Return the system boot time expressed in seconds since the epoch.
:rtype: int : diff b/w current epoch and boot epoch
"""
path = f'{PROCFS_PATH}stat'
boot_time = 0
with open_binary(path) as f:
for line in f:
if line.startswith(b'btime'):
boot_time = float(line.strip().split()[1])
return int(time.time() - int(boot_time))
return 0
class Process():
"""An interface to retrieve data from proc."""
def get_proc_iterator(self):
for pid in self.pids():
yield pid
def pids(self):
"""Returns a list of PIDs currently running on the system."""
return [int(x) for x in os.listdir(PROCFS_PATH) if x.isdigit()]
def name(self, pid: int):
"""Method to retrive name associated with the pid."""
return self.parse_stat_file(pid)['name']
def parse_stat_file(self, pid: int):
"""Parse /proc/{pid}/stat file and return a dict with various
process info.
Using "man proc" as a reference: where "man proc" refers to
position N always substract 3 (e.g ppid position 4 in
'man proc' == position 1 in here).
"""
ret = {}
try:
with open_binary(f"{PROCFS_PATH}{pid}/stat") as f:
data = f.read()
# Process name is between parentheses. It can contain spaces and
# other parentheses. This is taken into account by looking for
# the first occurrence of "(" and the last occurence of ")".
rpar = data.rfind(b')')
name = data[data.find(b'(') + 1:rpar]
fields = data[rpar + 2:].split()
ret['name'] = name
ret['status'] = fields[0]
ret['ppid'] = fields[1]
ret['ttynr'] = fields[4]
ret['utime'] = fields[11]
ret['stime'] = fields[12]
ret['children_utime'] = fields[13]
ret['children_stime'] = fields[14]
ret['create_time'] = fields[19]
ret['cpu_num'] = fields[36]
ret['blkio_ticks'] = fields[39] # aka 'delayacct_blkio_ticks'
except (PermissionError, ProcessLookupError, FileNotFoundError):
ret['name'] = ''
ret['status'] = ''
ret['ppid'] = ''
ret['ttynr'] = ''
ret['utime'] = ''
ret['stime'] = ''
ret['children_utime'] = ''
ret['children_stime'] = ''
ret['create_time'] = ''
ret['cpu_num'] = ''
ret['blkio_ticks'] = '' # aka 'delayacct_blkio_ticks'
return ret
@staticmethod
def check_process_alive(proc_name: str) -> bool:
"""Check process running.
:param proc_name: process name
:type proc_name: str
:return: True if process running, otherwise False
:rtype: bool
"""
# Iterate over the all the running process
for proc in psutil.process_iter():
try:
# Check if process name equals to the given name string.
if proc_name.lower() == proc.name().lower():
return True
except (
psutil.NoSuchProcess, psutil.AccessDenied,
psutil.ZombieProcess
):
pass
return False

View File

@ -0,0 +1,587 @@
<Columnstore Version="V1.0.1">
<!--
WARNING: Do not make changes to this file unless directed to do so by
MariaDB service engineers. Incorrect settings can render your system
unusable and will require a service call to correct.
-->
<Manager>MaxScale IP</Manager>
<Sequence>42</Sequence>
<ExeMgr1>
<IPAddr>192.168.0.101</IPAddr>
<Port>8601</Port>
<Module>um1</Module>
</ExeMgr1>
<JobProc>
<IPAddr>0.0.0.0</IPAddr>
<Port>8602</Port>
</JobProc>
<ProcMgr>
<IPAddr>192.168.0.102</IPAddr>
<Port>8603</Port>
</ProcMgr>
<ProcMgr_Alarm>
<IPAddr>192.168.0.102</IPAddr>
<Port>8606</Port>
</ProcMgr_Alarm>
<ProcStatusControl>
<IPAddr>192.168.0.102</IPAddr>
<Port>8604</Port>
</ProcStatusControl>
<ProcStatusControlStandby>
<IPAddr>192.168.0.103</IPAddr>
<Port>8605</Port>
</ProcStatusControlStandby>
<!-- Disabled
<ProcHeartbeatControl>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcHeartbeatControl>
-->
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
<localhost_ProcessMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8800</Port>
</localhost_ProcessMonitor>
<dm1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</dm1_ProcessMonitor>
<um1_ProcessMonitor>
<IPAddr>192.168.0.101</IPAddr>
<Port>8800</Port>
</um1_ProcessMonitor>
<pm1_ProcessMonitor>
<IPAddr>192.168.0.102</IPAddr>
<Port>8800</Port>
</pm1_ProcessMonitor>
<dm1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</dm1_ServerMonitor>
<um1_ServerMonitor>
<IPAddr>192.168.0.101</IPAddr>
<Port>8622</Port>
</um1_ServerMonitor>
<pm1_ServerMonitor>
<IPAddr>192.168.0.102</IPAddr>
<Port>8622</Port>
</pm1_ServerMonitor>
<pm1_WriteEngineServer>
<IPAddr>192.168.0.102</IPAddr>
<Port>8630</Port>
</pm1_WriteEngineServer>
<DDLProc>
<IPAddr>192.168.0.101</IPAddr>
<Port>8612</Port>
</DDLProc>
<DMLProc>
<IPAddr>192.168.0.101</IPAddr>
<Port>8614</Port>
</DMLProc>
<BatchInsert>
<RowsPerBatch>10000</RowsPerBatch>
</BatchInsert>
<PrimitiveServers>
<Count>4</Count>
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
<ProcessorThreshold>128</ProcessorThreshold>
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
<DebugLevel>0</DebugLevel>
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
<PrefetchThreshold>1</PrefetchThreshold>
<PTTrace>0</PTTrace>
<RotatingDestination>y</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
<DirectIO>y</DirectIO>
<HighPriorityPercentage/>
<MediumPriorityPercentage/>
<LowPriorityPercentage/>
</PrimitiveServers>
<PMS1>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS1>
<PMS2>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS2>
<PMS3>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS3>
<PMS4>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS4>
<PMS5>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS5>
<PMS6>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS6>
<PMS7>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS7>
<PMS8>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS8>
<PMS9>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS9>
<PMS10>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS10>
<PMS11>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS11>
<PMS12>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS12>
<PMS13>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS13>
<PMS14>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS14>
<PMS15>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS15>
<PMS16>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS16>
<PMS17>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS17>
<PMS18>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS18>
<PMS19>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS19>
<PMS20>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS20>
<PMS21>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS21>
<PMS22>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS22>
<PMS23>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS23>
<PMS24>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS24>
<PMS25>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS25>
<PMS26>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS26>
<PMS27>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS27>
<PMS28>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS28>
<PMS29>
<IPAddr>192.168.0.102</IPAddr>
<Port>8620</Port>
</PMS29>
<PMS30>
<IPAddr>192.168.0.103</IPAddr>
<Port>8620</Port>
</PMS30>
<PMS31>
<IPAddr>192.168.0.105</IPAddr>
<Port>8620</Port>
</PMS31>
<PMS32>
<IPAddr>192.168.0.106</IPAddr>
<Port>8620</Port>
</PMS32>
<SystemConfig>
<SystemLang>en_US.utf8</SystemLang>
<SystemName>columnstore-1</SystemName>
<ParentOAMModuleName>pm1</ParentOAMModuleName>
<PrimaryUMModuleName>um1</PrimaryUMModuleName>
<!-- Warning: Do not change this value once database is built -->
<DBRootCount>4</DBRootCount>
<DBRoot1>/usr/local/mariadb/columnstore/data1</DBRoot1>
<DBRMRoot>$INSTALLDIR/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
<TableLockSaveFile>$INSTALLDIR/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
<DBRMTimeOut>20</DBRMTimeOut> <!-- in seconds -->
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
<CalpontHome>$INSTALLDIR</CalpontHome>
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
<DataFileLog>OFF</DataFileLog>
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
-->
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
files are left behind. -->
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
<DBRoot2>/usr/local/mariadb/columnstore/data2</DBRoot2>
<DBRoot3>/usr/local/mariadb/columnstore/data3</DBRoot3>
<DBRoot4>/usr/local/mariadb/columnstore/data4</DBRoot4>
</SystemConfig>
<SystemModuleConfig>
<ModuleType1>dm</ModuleType1>
<ModuleDesc1>Director Module</ModuleDesc1>
<ModuleCount1>0</ModuleCount1>
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
<ModuleType2>um</ModuleType2>
<ModuleDesc2>User Module</ModuleDesc2>
<ModuleCount2>2</ModuleCount2>
<ModuleIPAddr1-1-2>192.168.0.101</ModuleIPAddr1-1-2>
<ModuleHostName1-1-2>nvm002314</ModuleHostName1-1-2>
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
<ModuleType3>pm</ModuleType3>
<ModuleDesc3>Performance Module</ModuleDesc3>
<ModuleCount3>4</ModuleCount3>
<ModuleIPAddr1-1-3>192.168.0.102</ModuleIPAddr1-1-3>
<ModuleHostName1-1-3>nvm002315</ModuleHostName1-1-3>
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
<ModuleDisableState2-2>ENABLED</ModuleDisableState2-2>
<ModuleHostName2-1-2>192.168.0.104</ModuleHostName2-1-2>
<ModuleIPAddr2-1-2>192.168.0.104</ModuleIPAddr2-1-2>
<ModuleDBRootCount2-3>1</ModuleDBRootCount2-3>
<ModuleDBRootID2-1-3>2</ModuleDBRootID2-1-3>
<ModuleDBRootCount3-3>1</ModuleDBRootCount3-3>
<ModuleDBRootID3-1-3>3</ModuleDBRootID3-1-3>
<ModuleDBRootCount4-3>1</ModuleDBRootCount4-3>
<ModuleDBRootID4-1-3>4</ModuleDBRootID4-1-3>
<ModuleDisableState2-3>ENABLED</ModuleDisableState2-3>
<ModuleHostName2-1-3>nvm002316</ModuleHostName2-1-3>
<ModuleIPAddr2-1-3>192.168.0.103</ModuleIPAddr2-1-3>
<ModuleDisableState3-3>ENABLED</ModuleDisableState3-3>
<ModuleHostName3-1-3>nvm002980</ModuleHostName3-1-3>
<ModuleIPAddr3-1-3>192.168.0.105</ModuleIPAddr3-1-3>
<ModuleDisableState4-3>ENABLED</ModuleDisableState4-3>
<ModuleHostName4-1-3>nvm002981</ModuleHostName4-1-3>
<ModuleIPAddr4-1-3>192.168.0.106</ModuleIPAddr4-1-3>
<ModuleHostName1-2-2>unassigned</ModuleHostName1-2-2>
<ModuleIPAddr1-2-2>0.0.0.0</ModuleIPAddr1-2-2>
<ModuleHostName2-2-2>unassigned</ModuleHostName2-2-2>
<ModuleIPAddr2-2-2>0.0.0.0</ModuleIPAddr2-2-2>
<ModuleHostName1-2-3>unassigned</ModuleHostName1-2-3>
<ModuleIPAddr1-2-3>0.0.0.0</ModuleIPAddr1-2-3>
<ModuleHostName2-2-3>unassigned</ModuleHostName2-2-3>
<ModuleIPAddr2-2-3>0.0.0.0</ModuleIPAddr2-2-3>
<ModuleHostName3-2-3>unassigned</ModuleHostName3-2-3>
<ModuleIPAddr3-2-3>0.0.0.0</ModuleIPAddr3-2-3>
<ModuleHostName4-2-3>unassigned</ModuleHostName4-2-3>
<ModuleIPAddr4-2-3>0.0.0.0</ModuleIPAddr4-2-3>
</SystemModuleConfig>
<SystemExtDeviceConfig>
<Count>0</Count>
<Name1>unassigned</Name1>
<IPAddr1>0.0.0.0</IPAddr1>
<DisableState1>ENABLED</DisableState1>
</SystemExtDeviceConfig>
<SessionManager>
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
<TxnIDFile>$INSTALLDIR/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
</SessionManager>
<VersionBuffer>
<!-- VersionBufferFileSize must be a multiple of 8192.
One version buffer file will be put on each DB root. -->
<VersionBufferFileSize>1GB</VersionBufferFileSize>
</VersionBuffer>
<OIDManager>
<!-- Do not change this file after database built -->
<OIDBitmapFile>$INSTALLDIR/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
<!-- Do not change this value after database built -->
<FirstOID>3000</FirstOID>
</OIDManager>
<WriteEngine>
<BulkRoot>$INSTALLDIR/data/bulk</BulkRoot>
<BulkRollbackDir>$INSTALLDIR/data1/systemFiles/bulkRollback</BulkRollbackDir>
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
<FastDelete>n</FastDelete>
</WriteEngine>
<DBRM_Controller>
<NumWorkers>6</NumWorkers>
<IPAddr>192.168.0.102</IPAddr>
<Port>8616</Port>
</DBRM_Controller>
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
<DBRM_Worker1>
<IPAddr>192.168.0.102</IPAddr>
<Port>8700</Port>
<Module>pm1</Module>
</DBRM_Worker1>
<DBRM_Worker2>
<IPAddr>192.168.0.101</IPAddr>
<Port>8700</Port>
<Module>um1</Module>
</DBRM_Worker2>
<DBRM_Worker3>
<IPAddr>192.168.0.104</IPAddr>
<Port>8700</Port>
<Module>um2</Module>
</DBRM_Worker3>
<DBRM_Worker4>
<IPAddr>192.168.0.103</IPAddr>
<Port>8700</Port>
<Module>pm2</Module>
</DBRM_Worker4>
<DBRM_Worker5>
<IPAddr>192.168.0.105</IPAddr>
<Port>8700</Port>
<Module>pm3</Module>
</DBRM_Worker5>
<DBRM_Worker6>
<IPAddr>192.168.0.106</IPAddr>
<Port>8700</Port>
<Module>pm4</Module>
</DBRM_Worker6>
<DBRM_Worker7>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker7>
<DBRM_Worker8>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker8>
<DBRM_Worker9>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker9>
<DBRM_Worker10>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker10>
<DBBC>
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
<!-- Alternatively, this can be specified in absolute terms using
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
<!-- <NumBlocksPct>70</NumBlocksPct> -->
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
<IOMTracing>0</IOMTracing>
<BRPTracing>0</BRPTracing>
<ReportFrequency>65536</ReportFrequency>
<MaxOpenFiles>2K</MaxOpenFiles>
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
<FDCacheTrace>0</FDCacheTrace>
<NumBlocksPct>65</NumBlocksPct>
</DBBC>
<Installation>
<ServerTypeInstall>1</ServerTypeInstall>
<PMwithUM>n</PMwithUM>
<MySQLRep>y</MySQLRep>
<DBRootStorageType>external</DBRootStorageType>
<UMStorageType>internal</UMStorageType>
<DistributedInstall>y</DistributedInstall>
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
<DataRedundancyNetworkType/>
</Installation>
<ExtentMap>
<!--
WARNING: these can only be changed on an empty system. Once any object has been allocated
it cannot be changed!. Extent size is 8M rows.
-->
<FilesPerColumnPartition>8</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
<BRM_UID>0x0</BRM_UID>
</ExtentMap>
<HashJoin>
<MaxBuckets>128</MaxBuckets>
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
<PmMaxMemorySmallSide>64M</PmMaxMemorySmallSide><!-- divide by 48 to getapproximate row count -->
<TotalUmMemory>25%</TotalUmMemory>
<CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin>
<JobList>
<FlushInterval>16K</FlushInterval>
<FifoSize>32</FifoSize>
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
each extent. The default is 16. MaxOutstandingRequests is the size of
the window of work in terms of extents. A value of 20 means there
is 20 extents worth of work for the PMs to process at any given time.
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
as many threads are available across all PMs. -->
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
<MaxOutstandingRequests>40</MaxOutstandingRequests>
<ThreadPoolSize>100</ThreadPoolSize>
</JobList>
<RowAggregation>
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is 4 or number of cores when less than 4 -->
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
</RowAggregation>
<CrossEngineSupport>
<Host>127.0.0.1</Host>
<Port>3306</Port>
<User>root</User>
<Password/>
<TLSCA/>
<TLSClientCert/>
<TLSClientKey/>
</CrossEngineSupport>
<QueryStats>
<Enabled>Y</Enabled>
</QueryStats>
<UserPriority>
<Enabled>N</Enabled>
</UserPriority>
<NetworkCompression>
<Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression>
<QueryTele>
<Host>127.0.0.1</Host>
<Port>0</Port>
</QueryTele>
<um2_ProcessMonitor>
<IPAddr>192.168.0.104</IPAddr>
<Port>8800</Port>
</um2_ProcessMonitor>
<um2_ServerMonitor>
<IPAddr>192.168.0.104</IPAddr>
<Port>8622</Port>
</um2_ServerMonitor>
<ExeMgr2>
<IPAddr>192.168.0.104</IPAddr>
<Port>8601</Port>
<Module>um2</Module>
</ExeMgr2>
<pm2_ProcessMonitor>
<IPAddr>192.168.0.103</IPAddr>
<Port>8800</Port>
</pm2_ProcessMonitor>
<pm2_ServerMonitor>
<IPAddr>192.168.0.103</IPAddr>
<Port>8622</Port>
</pm2_ServerMonitor>
<pm2_WriteEngineServer>
<IPAddr>192.168.0.103</IPAddr>
<Port>8630</Port>
</pm2_WriteEngineServer>
<pm3_ProcessMonitor>
<IPAddr>192.168.0.105</IPAddr>
<Port>8800</Port>
</pm3_ProcessMonitor>
<pm3_ServerMonitor>
<IPAddr>192.168.0.105</IPAddr>
<Port>8622</Port>
</pm3_ServerMonitor>
<pm3_WriteEngineServer>
<IPAddr>192.168.0.105</IPAddr>
<Port>8630</Port>
</pm3_WriteEngineServer>
<pm4_ProcessMonitor>
<IPAddr>192.168.0.106</IPAddr>
<Port>8800</Port>
</pm4_ProcessMonitor>
<pm4_ServerMonitor>
<IPAddr>192.168.0.106</IPAddr>
<Port>8622</Port>
</pm4_ServerMonitor>
<pm4_WriteEngineServer>
<IPAddr>192.168.0.106</IPAddr>
<Port>8630</Port>
</pm4_WriteEngineServer>
<DataRedundancyConfig>
<DBRoot1PMs/>
<DBRoot2PMs/>
<DBRoot3PMs/>
<DBRoot4PMs/>
</DataRedundancyConfig>
<ProcHeartbeatControl>
<IPAddr>192.168.0.102</IPAddr>
</ProcHeartbeatControl>
</Columnstore>

View File

@ -0,0 +1,531 @@
<Columnstore Version="V1.0.0">
<!--
WARNING: Do not make changes to this file unless directed to do so by
MariaDB service engineers. Incorrect settings can render your system
unusable and will require a service call to correct.
-->
<ExeMgr1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8601</Port>
<Module>unassigned</Module>
</ExeMgr1>
<JobProc>
<IPAddr>0.0.0.0</IPAddr>
<Port>8602</Port>
</JobProc>
<ProcMgr>
<IPAddr>127.0.0.1</IPAddr>
<Port>8603</Port>
</ProcMgr>
<ProcMgr_Alarm>
<IPAddr>127.0.0.1</IPAddr>
<Port>8606</Port>
</ProcMgr_Alarm>
<ProcStatusControl>
<IPAddr>127.0.0.1</IPAddr>
<Port>8604</Port>
</ProcStatusControl>
<ProcStatusControlStandby>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcStatusControlStandby>
<!-- Disabled
<ProcHeartbeatControl>
<IPAddr>0.0.0.0</IPAddr>
<Port>8605</Port>
</ProcHeartbeatControl>
-->
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
<localhost_ProcessMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8800</Port>
</localhost_ProcessMonitor>
<dm1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</dm1_ProcessMonitor>
<um1_ProcessMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8800</Port>
</um1_ProcessMonitor>
<pm1_ProcessMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8800</Port>
</pm1_ProcessMonitor>
<dm1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</dm1_ServerMonitor>
<um1_ServerMonitor>
<IPAddr>0.0.0.0</IPAddr>
<Port>8622</Port>
</um1_ServerMonitor>
<pm1_ServerMonitor>
<IPAddr>127.0.0.1</IPAddr>
<Port>8622</Port>
</pm1_ServerMonitor>
<pm1_WriteEngineServer>
<IPAddr>127.0.0.1</IPAddr>
<Port>8630</Port>
</pm1_WriteEngineServer>
<DDLProc>
<IPAddr>127.0.0.1</IPAddr>
<Port>8612</Port>
</DDLProc>
<DMLProc>
<IPAddr>127.0.0.1</IPAddr>
<Port>8614</Port>
</DMLProc>
<BatchInsert>
<RowsPerBatch>10000</RowsPerBatch>
</BatchInsert>
<PrimitiveServers>
<Count>1</Count>
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
<ProcessorThreshold>128</ProcessorThreshold>
<ProcessorQueueSize>10K</ProcessorQueueSize>
<!-- minimum of extent size 8192 -->
<DebugLevel>0</DebugLevel>
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks>
<!-- s/b factor of extent size 8192 -->
<!-- <BPPCount>16</BPPCount> -->
<!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
<PrefetchThreshold>1</PrefetchThreshold>
<PTTrace>0</PTTrace>
<RotatingDestination>n</RotatingDestination>
<!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
<DirectIO>y</DirectIO>
<HighPriorityPercentage/>
<MediumPriorityPercentage/>
<LowPriorityPercentage/>
</PrimitiveServers>
<PMS1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS1>
<PMS2>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS2>
<PMS3>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS3>
<PMS4>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS4>
<PMS5>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS5>
<PMS6>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS6>
<PMS7>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS7>
<PMS8>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS8>
<PMS9>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS9>
<PMS10>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS10>
<PMS11>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS11>
<PMS12>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS12>
<PMS13>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS13>
<PMS14>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS14>
<PMS15>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS15>
<PMS16>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS16>
<PMS17>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS17>
<PMS18>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS18>
<PMS19>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS19>
<PMS20>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS20>
<PMS21>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS21>
<PMS22>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS22>
<PMS23>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS23>
<PMS24>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS24>
<PMS25>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS25>
<PMS26>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS26>
<PMS27>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS27>
<PMS28>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS28>
<PMS29>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS29>
<PMS30>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS30>
<PMS31>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS31>
<PMS32>
<IPAddr>127.0.0.1</IPAddr>
<Port>8620</Port>
</PMS32>
<SystemConfig>
<SystemLang>C</SystemLang>
<SystemName>columnstore-1</SystemName>
<ParentOAMModuleName>pm1</ParentOAMModuleName>
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
<!-- Warning: Do not change this value once database is built -->
<DBRootCount>1</DBRootCount>
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
<DBRMTimeOut>20</DBRMTimeOut>
<!-- in seconds -->
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
<!-- default SWSDL max element save size -->
<WaitPeriod>10</WaitPeriod>
<!-- in seconds -->
<MemoryCheckPercent>95</MemoryCheckPercent>
<!-- Max real memory to limit growth of buffers to -->
<DataFileLog>OFF</DataFileLog>
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
-->
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
files are left behind. -->
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
</SystemConfig>
<SystemModuleConfig>
<ModuleType1>dm</ModuleType1>
<ModuleDesc1>Director Module</ModuleDesc1>
<ModuleCount1>0</ModuleCount1>
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
<ModuleType2>um</ModuleType2>
<ModuleDesc2>User Module</ModuleDesc2>
<ModuleCount2>0</ModuleCount2>
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
<ModuleType3>pm</ModuleType3>
<ModuleDesc3>Performance Module</ModuleDesc3>
<ModuleCount3>1</ModuleCount3>
<ModuleIPAddr1-1-3>127.0.0.1</ModuleIPAddr1-1-3>
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
</SystemModuleConfig>
<SystemExtDeviceConfig>
<Count>0</Count>
<Name1>unassigned</Name1>
<IPAddr1>0.0.0.0</IPAddr1>
<DisableState1>ENABLED</DisableState1>
</SystemExtDeviceConfig>
<SessionManager>
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
</SessionManager>
<VersionBuffer>
<!-- VersionBufferFileSize must be a multiple of 8192.
One version buffer file will be put on each DB root. -->
<VersionBufferFileSize>1GB</VersionBufferFileSize>
</VersionBuffer>
<OIDManager>
<!-- Do not change this file after database built -->
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
<!-- Do not change this value after database built -->
<FirstOID>3000</FirstOID>
</OIDManager>
<WriteEngine>
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
<FastDelete>n</FastDelete>
</WriteEngine>
<DBRM_Controller>
<NumWorkers>1</NumWorkers>
<IPAddr>127.0.0.1</IPAddr>
<Port>8616</Port>
</DBRM_Controller>
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
<DBRM_Worker1>
<IPAddr>127.0.0.1</IPAddr>
<Port>8700</Port>
<Module>pm1</Module>
</DBRM_Worker1>
<DBRM_Worker2>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker2>
<DBRM_Worker3>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker3>
<DBRM_Worker4>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker4>
<DBRM_Worker5>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker5>
<DBRM_Worker6>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker6>
<DBRM_Worker7>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker7>
<DBRM_Worker8>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker8>
<DBRM_Worker9>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker9>
<DBRM_Worker10>
<IPAddr>0.0.0.0</IPAddr>
<Port>8700</Port>
<Module>unassigned</Module>
</DBRM_Worker10>
<DBBC>
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
<!-- Alternatively, this can be specified in absolute terms using
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
<!-- <NumBlocksPct>70</NumBlocksPct> -->
<!-- <NumThreads>16</NumThreads> -->
<!-- 1-256. Default is 16. -->
<NumCaches>1</NumCaches>
<!-- # of parallel caches to instantiate -->
<IOMTracing>0</IOMTracing>
<BRPTracing>0</BRPTracing>
<ReportFrequency>65536</ReportFrequency>
<MaxOpenFiles>2K</MaxOpenFiles>
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
<FDCacheTrace>0</FDCacheTrace>
<NumBlocksPct>50</NumBlocksPct>
</DBBC>
<Installation>
<ServerTypeInstall>2</ServerTypeInstall>
<PMwithUM>n</PMwithUM>
<MySQLRep>n</MySQLRep>
<DBRootStorageType>internal</DBRootStorageType>
<UMStorageType>internal</UMStorageType>
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
<DataRedundancyNetworkType/>
</Installation>
<ExtentMap>
<!--
WARNING: these can only be changed on an empty system. Once any object has been allocated
it cannot be changed!. Extent size is 8M rows.
-->
<FilesPerColumnPartition>4</FilesPerColumnPartition>
<!-- should be multiple of DBRootCount -->
<BRM_UID>0x0</BRM_UID>
</ExtentMap>
<HashJoin>
<MaxBuckets>128</MaxBuckets>
<MaxElems>128K</MaxElems>
<!-- 128 buckets * 128K * 16 = 256 MB -->
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
<TotalUmMemory>25%</TotalUmMemory>
<CPUniqueLimit>100</CPUniqueLimit>
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
<TempFileCompression>Y</TempFileCompression>
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
</HashJoin>
<JobList>
<FlushInterval>16K</FlushInterval>
<FifoSize>16</FifoSize>
<RequestSize>1</RequestSize>
<!-- Number of extents per request, should be
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
each extent. The default is 16. MaxOutstandingRequests is the size of
the window of work in terms of extents. A value of 20 means there
is 20 extents worth of work for the PMs to process at any given time.
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
as many threads are available across all PMs. -->
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
<!-- MaxOutstandingRequests is going to default to the num of cores available
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
but will be lower bounded by 20 -->
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
<ThreadPoolSize>100</ThreadPoolSize>
</JobList>
<RowAggregation>
<!-- <RowAggrThreads>4</RowAggrThreads> -->
<!-- Default value is the number of cores -->
<!-- <RowAggrBuckets>32</RowAggrBuckets> -->
<!-- Default value is number of cores * 4 -->
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> -->
<!-- Default value is 20 -->
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
</RowAggregation>
<CrossEngineSupport>
<Host>127.0.0.1</Host>
<Port>3306</Port>
<User>root</User>
<Password/>
<TLSCA/>
<TLSClientCert/>
<TLSClientKey/>
</CrossEngineSupport>
<QueryStats>
<Enabled>N</Enabled>
</QueryStats>
<UserPriority>
<Enabled>N</Enabled>
</UserPriority>
<NetworkCompression>
<Enabled>Y</Enabled>
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
</NetworkCompression>
<QueryTele>
<Host>127.0.0.1</Host>
<Port>0</Port>
</QueryTele>
<StorageManager>
<MaxSockets>30</MaxSockets>
<Enabled>N</Enabled>
</StorageManager>
<DataRedundancyConfig>
<DBRoot1PMs/>
</DataRedundancyConfig>
<ConfigRevision>1</ConfigRevision>
<ClusterManager>127.0.0.1</ClusterManager>
<ClusterName>MyCluster</ClusterName>
<NextNodeId>2</NextNodeId>
<NextDBRootId>2</NextDBRootId>
<DesiredNodes>
<Node>127.0.0.1</Node>
</DesiredNodes>
<ActiveNodes>
<Node>127.0.0.1</Node>
</ActiveNodes>
<InactiveNodes/>
<PrimaryNode>127.0.0.1</PrimaryNode>
</Columnstore>

View File

View File

@ -0,0 +1,7 @@
import os
from cmapi_server.constants import CMAPI_DEFAULT_CONF_PATH
CONFIG_PATH_NEW = './mcs_node_control/test/Columnstore_new.xml'
CONFIG_PATH_OLD = './mcs_node_control/test/Columnstore_old.xml'

View File

@ -0,0 +1,29 @@
import io
import logging
import unittest
from mcs_node_control.models.dbrm_socket import MAGIC_BYTES, DBRMSocketHandler
logging.basicConfig(level='DEBUG')
class TestDBRMSocketHandler(unittest.TestCase):
def test_myreceive_to_magic(self):
response_data = b'\x01\x00\x00\x00\x00'
valid_magic = b'%s%s' % (MAGIC_BYTES, response_data)
first_unknow = b'A%s%s' % (MAGIC_BYTES, response_data)
partial_first_magic = b'%s%s%s' % (
MAGIC_BYTES[:3], MAGIC_BYTES, response_data
)
sock_responses = [valid_magic, first_unknow, partial_first_magic]
for sock_response in sock_responses:
with self.subTest(sock_response=sock_response):
data_stream = io.BytesIO(sock_response)
data_stream.recv = data_stream.read
dbrm_socket = DBRMSocketHandler()
# pylint: disable=protected-access
dbrm_socket._socket = data_stream
dbrm_socket._receive_magic()
self.assertEqual(data_stream.read(), response_data)

View File

@ -0,0 +1,13 @@
import unittest
class MiscTest(unittest.TestCase):
def test_read_module_id(self):
pass
def test_set_module_id(self):
pass
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,288 @@
import logging
import os
import subprocess
import unittest
import xml.etree.ElementTree as ET
from pathlib import Path
from shutil import copyfile
from tempfile import TemporaryDirectory
from unittest import TestCase, mock
from lxml import etree
from cmapi_server.constants import CMAPI_DEFAULT_CONF_PATH
from mcs_node_control.models.dbrm import (
DBRM, set_cluster_mode
)
from mcs_node_control.models.node_config import NodeConfig
from mcs_node_control.models.misc import read_module_id
from mcs_node_control.models.node_status import NodeStatus
from mcs_node_control.test.settings import CONFIG_PATH_NEW, CONFIG_PATH_OLD
MCS_NODE_MODELS = 'mcs_node_control.models'
NODE_CONFIG_MODULE = f'{MCS_NODE_MODELS}.node_config'
logging.basicConfig(level='DEBUG')
# These tests needs working DBRM worker.
class NodeConfigTest(TestCase):
@mock.patch(f'{NODE_CONFIG_MODULE}.mkdir')
@mock.patch(f'{NODE_CONFIG_MODULE}.chown')
@mock.patch(f'{NODE_CONFIG_MODULE}.read_module_id', return_value=1)
@mock.patch(
f'{NODE_CONFIG_MODULE}.NodeConfig.in_active_nodes',
return_value=False
)
def test_apply_config(self, *_args):
"""Test apply configuration file."""
with TemporaryDirectory() as tmp_dirname:
config_filepath = os.path.join(tmp_dirname, 'Columnstore.xml')
copyfile(CONFIG_PATH_OLD, config_filepath)
# change config
parser = etree.XMLParser(load_dtd=True)
# new_tree = etree.parse('/etc/columnstore/Columnstore.xml', parser=parser)
new_tree = etree.parse(CONFIG_PATH_NEW, parser=parser)
node_config = NodeConfig()
xml_string = node_config.to_string(new_tree)
node_config.apply_config(config_filepath, xml_string)
# compare configurations
config_file = Path(config_filepath)
xml_string_written = config_file.read_text()
self.assertEqual(xml_string_written, xml_string)
# copy must exists
config_file_copy = Path(f"{config_filepath}.cmapi.save")
self.assertTrue(config_file_copy.exists())
@mock.patch(f'{NODE_CONFIG_MODULE}.mkdir')
@mock.patch(f'{NODE_CONFIG_MODULE}.chown')
@mock.patch(f'{NODE_CONFIG_MODULE}.read_module_id', return_value=1)
@mock.patch(
f'{NODE_CONFIG_MODULE}.NodeConfig.in_active_nodes',
return_value=False
)
def test_rollback_config(self, *_args):
""""Test rollback applied configuration file."""
with TemporaryDirectory() as tmp_dirname:
config_filepath = os.path.join(tmp_dirname, 'Columnstore.xml')
copyfile(CONFIG_PATH_OLD, config_filepath)
old_config_file = Path(CONFIG_PATH_OLD)
old_xml_string = old_config_file.read_text()
new_config_file = Path(CONFIG_PATH_NEW)
new_xml_string = new_config_file.read_text()
node_config = NodeConfig()
node_config.apply_config(config_filepath, new_xml_string)
node_config.rollback_config(config_filepath)
config_file = Path(config_filepath)
xml_string_restored = config_file.read_text()
self.assertEqual(xml_string_restored, old_xml_string)
def test_get_current_config(self):
"""Test get current config from file."""
config_file = Path(CONFIG_PATH_OLD)
node_config = NodeConfig()
self.assertEqual(
node_config.get_current_config(CONFIG_PATH_OLD),
config_file.read_text()
)
def test_set_cluster_mode(self):
"""Test set cluster mode.
TODO:
- move from here. There are no set_cluster_mode in NodeConfig
- split to unit and integrational tests
- make unittests for raising exception
"""
for mode in ['readonly', 'readwrite']:
with self.subTest(mode=mode):
fake_mode = mode
set_cluster_mode(mode)
with DBRM() as dbrm:
if dbrm.get_dbrm_status() != 'master':
fake_mode = 'readonly'
self.assertEqual(dbrm.get_cluster_mode(), fake_mode)
self.assertEqual(dbrm._get_cluster_mode(), mode)
def test_get_dbrm_conn_info(self):
node_config = NodeConfig()
root = node_config.get_current_config_root(CONFIG_PATH_OLD)
master_conn_info = node_config.get_dbrm_conn_info(root)
tree = ET.parse(CONFIG_PATH_OLD)
master_ip = tree.find('./DBRM_Controller/IPAddr').text
master_port = tree.find('./DBRM_Controller/Port').text
self.assertEqual(master_conn_info['IPAddr'], master_ip)
self.assertEqual(master_conn_info['Port'], master_port)
def test_is_primary_node(self):
try:
current_master = None
node_config = NodeConfig()
root = node_config.get_current_config_root()
current_master = node_config.get_dbrm_conn_info(root)['IPAddr']
list_ips = "ip -4 -o addr | awk '!/^[0-9]*: ?lo|link\/ether/ {print $4}'"
result = subprocess.run(list_ips,
shell=True,
stdout=subprocess.PIPE)
local_addresses = result.stdout.decode('ASCII').split('\n')
local_addresses = [addr.split('/')[0] for addr in local_addresses if len(addr)]
os.system(f"mcsSetConfig DBRM_Controller IPAddr {local_addresses[0]}")
self.assertTrue(node_config.is_primary_node())
os.system(f"mcsSetConfig DBRM_Controller IPAddr 8.8.8.8")
self.assertFalse(node_config.is_primary_node())
os.system(f"mcsSetConfig DBRM_Controller IPAddr {current_master}")
except AssertionError as e:
if current_master is not None:
os.system(f"mcsSetConfig DBRM_Controller IPAddr \
{current_master}")
raise e
def test_get_network_interfaces(self):
node_config = NodeConfig()
addresses = list(node_config.get_network_addresses())
exemplar_addresses = []
list_ips = "ip -4 -o addr | awk '!/^[0-9]*: ?lo|link\/ether/ {print $4}'"
result = subprocess.run(list_ips,
shell=True,
stdout=subprocess.PIPE)
exemplar_addresses += result.stdout.decode('ASCII').split('\n')
list_ips = "ip -6 -o addr | awk '!/^[0-9]*: ?lo|link\/ether/ {print $4}'"
result = subprocess.run(list_ips,
shell=True,
stdout=subprocess.PIPE)
exemplar_addresses += result.stdout.decode('ASCII').split('\n')
golden_addresses = [addr.split('/')[0] for addr in exemplar_addresses if len(addr) > 0]
for addr in golden_addresses:
self.assertTrue(addr in addresses)
def test_is_single_node(self):
try:
current_master = None
node_config = NodeConfig()
root = node_config.get_current_config_root()
current_master = node_config.get_dbrm_conn_info(root)['IPAddr']
os.system(f"mcsSetConfig DBRM_Controller IPAddr 127.0.0.1")
self.assertTrue(node_config.is_single_node())
os.system(f"mcsSetConfig DBRM_Controller IPAddr 8.8.8.8")
self.assertFalse(node_config.is_single_node())
os.system(f"mcsSetConfig DBRM_Controller IPAddr {current_master}")
except AssertionError as e:
if current_master is not None:
os.system(f"mcsSetConfig DBRM_Controller IPAddr \
{current_master}")
raise e
@mock.patch(f'{NODE_CONFIG_MODULE}.read_module_id', return_value=1)
def test_get_module_net_address(self, *args):
with TemporaryDirectory() as tmp_dirname:
config_filepath = os.path.join(tmp_dirname, 'Columnstore.xml')
copyfile(CONFIG_PATH_OLD, config_filepath)
module_address = None
node_config = NodeConfig()
current_module_id = read_module_id()
module_address_sh = (
f'mcsGetConfig -c {config_filepath} '
f'SystemModuleConfig ModuleIPAddr{current_module_id}-1-3'
)
result = subprocess.run(
module_address_sh, shell=True, stdout=subprocess.PIPE
)
module_address = result.stdout.decode('ASCII').split('\n')[0]
dummy_address = '8.8.8.8'
os.system(
f'mcsSetConfig -c {config_filepath} '
f'SystemModuleConfig ModuleIPAddr{current_module_id}-1-3 '
f'{dummy_address}'
)
root = node_config.get_current_config_root(config_filepath)
self.assertEqual(
dummy_address, node_config.get_module_net_address(root)
)
self.assertNotEqual(
module_address, node_config.get_module_net_address(root)
)
os.system(
f'mcsSetConfig -c {config_filepath} SystemModuleConfig '
f'ModuleIPAddr{current_module_id}-1-3 {module_address}'
)
root = node_config.get_current_config_root(config_filepath)
self.assertEqual(
module_address, node_config.get_module_net_address(root)
)
def test_get_new_module_id(self):
try:
current_module_id = None
current_module_address = None
node_config = NodeConfig()
current_module_id = read_module_id()
root = node_config.get_current_config_root()
current_module_address = node_config.get_module_net_address(root)
os.system(f"mcsSetConfig SystemModuleConfig \
ModuleIPAddr{current_module_id}-1-3 8.8.8.8")
os.system(f"mcsSetConfig SystemModuleConfig \
ModuleIPAddr{current_module_id+42}-1-3 {current_module_address}")
root = node_config.get_current_config_root()
self.assertEqual(current_module_id+42,
node_config.get_new_module_id(root))
self.assertNotEqual(current_module_id,
node_config.get_new_module_id(root))
os.system(f"mcsSetConfig SystemModuleConfig \
ModuleIPAddr{current_module_id}-1-3 {current_module_address}")
os.system(f"mcsSetConfig -x SystemModuleConfig \
ModuleIPAddr{current_module_id+42}-1-3 {current_module_address}")
root = node_config.get_current_config_root()
self.assertEqual(current_module_id,
node_config.get_new_module_id(root))
except AssertionError as e:
if current_module_id is not None and current_module_address is not None:
os.system(f"mcsSetConfig SystemModuleConfig \
ModuleIPAddr{current_module_id}-1-3 {current_module_address}")
os.system(f"mcsSetConfig -x SystemModuleConfig \
ModuleIPAddr{current_module_id+42}-1-3 {current_module_address}")
def test_dbroots_to_create(self):
try:
node_config = NodeConfig()
current_module_id = read_module_id()
dummy_dbroots = [42, 43]
dbroot_seq_id = 2
for d in dummy_dbroots:
os.system(f"mcsSetConfig SystemModuleConfig \
ModuleDBRootID{current_module_id}-{dbroot_seq_id}-3 {d}")
dbroot_seq_id += 1
root = node_config.get_current_config_root()
dbroots_to_create = list(node_config.dbroots_to_create(root=root, module_id=current_module_id))
for d in dbroots_to_create:
self.assertTrue(d in dummy_dbroots)
except AssertionError as e:
dbroot_seq_id = 2
for d in dummy_dbroots:
os.system(f"mcsSetConfig -x SystemModuleConfig \
ModuleDBRootID{current_module_id}-{dbroot_seq_id}-3 {d}")
dbroot_seq_id += 1
raise e
dbroot_seq_id = 2
for d in dummy_dbroots:
os.system(f"mcsSetConfig -x SystemModuleConfig \
ModuleDBRootID{current_module_id}-{dbroot_seq_id}-3 {d}")
dbroot_seq_id += 1
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,50 @@
import logging
import os
import unittest
from pathlib import Path
from shutil import rmtree
from cmapi_server.constants import MCS_MODULE_FILE_PATH
from mcs_node_control.models.node_status import NodeStatus
logging.basicConfig(level='DEBUG')
class NodeStatusTest(unittest.TestCase):
def test_dbrm_cluster_mode(self):
node_status = NodeStatus()
# use subprocess.run to capture stdout
os.system('/usr/bin/dbrmctl readwrite')
self.assertEqual(node_status.get_cluster_mode(), 'readwrite')
os.system('/usr/bin/dbrmctl readonly')
self.assertEqual(node_status.get_cluster_mode(), 'readonly')
# kill controllernode and test it
def test_dbrm_status(self):
node_status = NodeStatus()
self.assertEqual(node_status.get_dbrm_status(), 'master')
def test_dbroots(self):
try:
node_status = NodeStatus()
dbroot_ids = [1, 2, 3]
path = '/tmp/dbroots/'
for e in dbroot_ids:
p = Path(path + 'data' + str(e))
p.mkdir(parents = True, exist_ok = True)
for e in node_status.get_dbroots(path=path):
self.assertEqual(e in dbroot_ids, True)
except AssertionError as e:
rmtree(path)
raise e
def test_module_id(self):
node_status = NodeStatus()
module_file = Path(MCS_MODULE_FILE_PATH)
examplar_id = int(module_file.read_text()[2:])
self.assertEqual(examplar_id, node_status.get_module_id())
if __name__ == '__main__':
unittest.main()

16
cmapi/postinst.template Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
# only for postinstall in CentOS
if [ -f ${CMAPI_CONF_FILEPATH}.rpmsave ]; then
echo "warning: found previously saved configuration file ${CMAPI_CONF_FILEPATH}.rpmsave"
mv ${CMAPI_CONF_FILEPATH} ${CMAPI_CONF_FILEPATH}.rpmnew
echo "warning: newly installed configuration file ${CMAPI_CONF_FILEPATH} saved as ${CMAPI_CONF_FILEPATH}.rpmnew"
mv ${CMAPI_CONF_FILEPATH}.rpmsave ${CMAPI_CONF_FILEPATH}
echo "warning: previously saved configuration file ${CMAPI_CONF_FILEPATH}.rpmsave applied as current config file ${CMAPI_CONF_FILEPATH}"
fi
systemctl enable ${SYSTEMD_UNIT_NAME}
systemctl start ${SYSTEMD_UNIT_NAME}
systemctl mask ${SYSTEMD_ENGINE_UNIT_NAME}

9
cmapi/prerm.template Executable file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
systemctl stop ${SYSTEMD_UNIT_NAME}
find ${CMAPI_DIR} -type d -name __pycache__ -print0 | xargs --null --no-run-if-empty rm -rf
systemctl disable ${SYSTEMD_UNIT_NAME}
systemctl unmask ${SYSTEMD_ENGINE_UNIT_NAME}

73
cmapi/requirements.txt Normal file
View File

@ -0,0 +1,73 @@
awscli==1.25.56
CherryPy==18.6.1
cryptography==36.0.1
furl==2.1.3
gsutil==5.12
lxml==4.7.1
psutil==5.9.1
pyotp==2.6.0
requests==2.27.1
typer==0.4.1
# indirect dependencies
aiohttp==3.8.1
aiosignal==1.2.0
argcomplete==2.0.0
async-timeout==4.0.2
asynctest==0.13.0
attrs==22.1.0
boto==2.49.0
boto3==1.24.55
botocore==1.27.55
cachetools==5.2.0
certifi==2021.10.8
cffi==1.15.0
charset-normalizer==2.0.12
cheroot==8.6.0
click==8.1.3
colorama==0.4.4
crcmod==1.7
docutils==0.16
fasteners==0.17.3
frozenlist==1.3.1
gcs-oauth2-boto-plugin==3.0
google-apitools==0.5.32
google-auth==2.10.0
google-reauth==0.1.1
httplib2==0.20.4
idna==3.3
importlib-resources==5.4.0
importlib-metadata==4.12.0
jaraco.classes==3.2.1
jaraco.collections==3.5.1
jaraco.context==4.1.1
jaraco.functools==3.5.0
jaraco.text==3.7.0
jmespath==1.0.1
monotonic==1.6
more-itertools==8.12.0
multidict==6.0.2
oauth2client==4.1.3
orderedmultidict==1.0.1
portend==3.1.0
pyasn1-modules==0.2.8
pyasn1==0.4.8
pycparser==2.21
pyOpenSSL==22.0.0
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2021.3
pyu2f==0.1.5
PyYAML==5.4.1
repoze.lru==0.7
retry-decorator==1.1.1
Routes==2.5.1
rsa==4.7.2
s3transfer==0.6.0
six==1.16.0
tempora==5.0.1
typing-extensions==4.3.0
urllib3==1.26.8
yarl==1.8.1
zc.lockfile==2.0
zipp==3.7.0

41
cmapi/run_tests.py Normal file
View File

@ -0,0 +1,41 @@
import datetime
import logging
import sys
import unittest
from cmapi_server.logging_management import add_logging_level
class DatedTextTestResult(unittest.TextTestResult):
def startTest(self, test: unittest.case.TestCase):
self.stream.write('\n')
self.stream.write(
datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S]: ")
)
return super().startTest(test)
def run_tests_from_package(p_name: str):
logging.info(f'Starting tests from package {p_name}')
loader = unittest.TestLoader()
testsuite = loader.discover(
pattern='test_*.py', start_dir=p_name, top_level_dir='./'
)
runner = unittest.runner.TextTestRunner(
verbosity=3, failfast=True, resultclass=DatedTextTestResult
)
result = runner.run(testsuite)
failed = False
if not result.wasSuccessful():
failed = True
sys.exit(failed)
logging.info(f'Finished tests from package {p_name}')
if __name__ == "__main__":
add_logging_level('TRACE', 5)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s [%(levelname)s] (%(name)s) %(message)s'
)
run_tests_from_package('cmapi_server')
run_tests_from_package('mcs_node_control')

151
cmapi/service.sh Executable file
View File

@ -0,0 +1,151 @@
#!/bin/bash
CPACK_PACKAGE_DESCRIPTION_SUMMARY="Mariadb Columnstore Cluster Manager API"
SVC_NAME="mariadb-columnstore-cmapi"
SVC_CMD=$1
arg_2=${2}
UNIT_PATH=/usr/lib/systemd/system/${SVC_NAME}.service
TEMPLATE_PATH=./service.template
TEMP_PATH=./service.temp
SYSTEMD_ENV_FILE=/etc/columnstore/systemd.env
CMAPI_DIR=$(pwd)
CMAPI_USER=root
CONFIG_FOLDER=/etc/columnstore
CONFIG_FILENAME=cmapi_server.conf
user_id=$(id -u)
# systemctl must run as sudo
if [ $user_id -ne 0 ]; then
echo "Must run as sudo"
exit 1
fi
function failed()
{
local error=${1:-Undefined error}
echo "Failed: $error" >&2
exit 1
}
if [ ! -f "${TEMPLATE_PATH}" ]; then
failed "Must run from package folder or install is corrupt"
fi
# check if we run as root
if [[ $(id -u) != "0" ]]; then
echo "Failed: This script requires to run with sudo." >&2
exit 1
fi
function install()
{
echo "Creating service in ${UNIT_PATH}"
if [ -f "${UNIT_PATH}" ]; then
failed "error: exists ${UNIT_PATH}"
fi
if [ -f "${TEMP_PATH}" ]; then
rm "${TEMP_PATH}" || failed "failed to delete ${TEMP_PATH}"
fi
# can optionally use username supplied
#run_as_user=${arg_2:-$SUDO_USER}
#echo "Run as user: ${run_as_user}"
#run_as_uid=$(id -u ${run_as_user}) || failed "User does not exist"
#echo "Run as uid: ${run_as_uid}"
#run_as_gid=$(id -g ${run_as_user}) || failed "Group not available"
#echo "gid: ${run_as_gid}"
sed "s/\${CPACK_PACKAGE_DESCRIPTION_SUMMARY}/${CPACK_PACKAGE_DESCRIPTION_SUMMARY}/g; s/\${CMAPI_USER}/${CMAPI_USER}/g; s/\${CMAPI_DIR}/$(echo ${CMAPI_DIR} | sed -e 's/[\/&]/\\&/g')/g;" "${TEMPLATE_PATH}" > "${TEMP_PATH}" || failed "failed to create replacement temp file"
mv "${TEMP_PATH}" "${UNIT_PATH}" || failed "failed to copy unit file"
if [ ! -d "${CONFIG_FOLDER}" ]; then
mkdir $CONFIG_FOLDER || failed "failed to create configuration folder"
fi
if [ ! -f "${CONFIG_FOLDER}/${CONFIG_FILENAME}" ]; then
cp cmapi_server/cmapi_server.conf.default "${CONFIG_FOLDER}/${CONFIG_FILENAME}" || failed "failed to copy config file"
fi
# Unit file should not be executable and world writable
chmod 664 ${UNIT_PATH} || failed "failed to set permissions on ${UNIT_PATH}"
# Since we started with sudo, files will be owned by root. Change this to specific user
#chown -R ${run_as_uid}:${run_as_gid} $CMAPI_DIR || failed "failed to set owner for $CMAPI_DIR"
systemctl enable ${SVC_NAME} || failed "failed to enable ${SVC_NAME}"
# chown ${run_as_uid}:${run_as_gid} ${CONFIG_FOLDER}/${CONFIG_FILENAME} || failed "failed to set permission for ${CONFIG_FOLDER}/${CONFIG_FILENAME}"
echo PYTHONPATH=${CMAPI_DIR}/deps > ${SYSTEMD_ENV_FILE}
systemctl daemon-reload || failed "failed to reload daemons"
}
function start()
{
systemctl start ${SVC_NAME} || failed "failed to start ${SVC_NAME}"
status
}
function stop()
{
systemctl stop ${SVC_NAME} || failed "failed to stop ${SVC_NAME}"
status
}
function uninstall()
{
stop
systemctl disable ${SVC_NAME} || failed "failed to disable ${SVC_NAME}"
rm "${UNIT_PATH}" || failed "failed to delete ${UNIT_PATH}"
rm "${SYSTEMD_ENV_FILE}" || failed "failed to delete ${SYSTEMD_ENV_FILE}"
systemctl daemon-reload || failed "failed to reload daemons"
}
function status()
{
if [ -f "${UNIT_PATH}" ]; then
echo
echo "${UNIT_PATH}"
else
echo
echo "not installed"
echo
return
fi
systemctl --no-pager status ${SVC_NAME}
}
function usage()
{
echo
echo Usage:
echo "./install.sh [install, start, stop, status, uninstall]"
echo "Commands:"
#echo " install [user]: Install as Root or specified user"
echo " install: Install"
echo " start: Manually start"
echo " stop: Manually stop"
echo " status: Display intallation status"
echo " uninstall: Uninstall"
echo
}
case $SVC_CMD in
"install") install;;
"status") status;;
"uninstall") uninstall;;
"start") start;;
"stop") stop;;
"status") status;;
*) usage;;
esac
exit 0

12
cmapi/service.template Normal file
View File

@ -0,0 +1,12 @@
[Unit]
Description=${CPACK_PACKAGE_DESCRIPTION_SUMMARY}
[Service]
Environment=PYTHONPATH=${CMAPI_DIR}/deps
ExecStart=${CMAPI_DIR}/python/bin/python3 -m cmapi_server
ExecStartPost=/bin/sh -c ${CMAPI_DIR}/check_ready.sh
User=${CMAPI_USER}
WorkingDirectory=${CMAPI_DIR}
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1 @@
PYTHONPATH=${CMAPI_DIR}/deps