mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
MCOL-5496: Merge CMAPI code to engine repo.
[add] cmapi code to engine
This commit is contained in:
parent
77eedd1756
commit
a079a2c944
89
cmapi/.gitignore
vendored
Normal file
89
cmapi/.gitignore
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
venv/
|
||||
.python-version
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
#Ipython Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
*.py.swp
|
||||
python/
|
||||
deps/
|
||||
engine/
|
||||
cmapi_server/test/tmp.xml
|
||||
systemd.env
|
||||
mariadb-columnstore-cmapi.service
|
||||
prerm
|
||||
postinst
|
||||
conffiles
|
||||
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
CMakeScripts
|
||||
Makefile
|
||||
cmake_install.cmake
|
||||
install_manifest.txt
|
||||
*CPack*
|
||||
*.rpm
|
||||
*.deb
|
||||
result
|
||||
centos8
|
||||
ubuntu20.04
|
||||
buildinfo.txt
|
107
cmapi/CMakeLists.txt
Normal file
107
cmapi/CMakeLists.txt
Normal file
@ -0,0 +1,107 @@
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 3.11)
|
||||
PROJECT(cmapi NONE)
|
||||
|
||||
SET(CPACK_PACKAGE_NAME "MariaDB-columnstore-cmapi")
|
||||
|
||||
FILE(STRINGS VERSION CRUDE_CMAPI_VERSION_MAJOR REGEX "^CMAPI_VERSION_MAJOR=")
|
||||
FILE(STRINGS VERSION CRUDE_CMAPI_VERSION_MINOR REGEX "^CMAPI_VERSION_MINOR=")
|
||||
FILE(STRINGS VERSION CRUDE_CMAPI_VERSION_PATCH REGEX "^CMAPI_VERSION_PATCH=")
|
||||
STRING(REPLACE "CMAPI_VERSION_MAJOR=" "" CMAPI_VERSION_MAJOR ${CRUDE_CMAPI_VERSION_MAJOR})
|
||||
STRING(REPLACE "CMAPI_VERSION_MINOR=" "" CMAPI_VERSION_MINOR ${CRUDE_CMAPI_VERSION_MINOR})
|
||||
STRING(REPLACE "CMAPI_VERSION_PATCH=" "" CMAPI_VERSION_PATCH ${CRUDE_CMAPI_VERSION_PATCH})
|
||||
SET(PACKAGE_VERSION "${CMAPI_VERSION_MAJOR}.${CMAPI_VERSION_MINOR}.${CMAPI_VERSION_PATCH}")
|
||||
SET(CMAPI_USER "root")
|
||||
|
||||
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "MariaDB ColumnStore CMAPI: cluster management API and command line tool.")
|
||||
SET(CPACK_PACKAGE_URL "http://www.mariadb.com")
|
||||
SET(CPACK_PACKAGE_CONTACT "MariaDB Corporation Ab")
|
||||
SET(CPACK_PACKAGE_SUMMARY "MariaDB ColumnStore CMAPI: cluster management API and command line tool.")
|
||||
SET(CPACK_PACKAGE_VENDOR "MariaDB Corporation Ab")
|
||||
SET(CPACK_PACKAGE_LICENSE "Copyright (c) 2023 MariaDB Corporation Ab.; redistributable under the terms of the GPLv2, see the file LICENSE.GPL2 for details.")
|
||||
|
||||
SET(BIN_DIR "/usr/bin")
|
||||
SET(ETC_DIR "/etc/columnstore")
|
||||
SET(SHARE_DIR "/usr/share/columnstore")
|
||||
SET(CMAPI_DIR "${SHARE_DIR}/cmapi")
|
||||
SET(SYSTEMD_UNIT_DIR "/usr/lib/systemd/system")
|
||||
SET(SYSTEMD_ENGINE_UNIT_NAME "mariadb-columnstore")
|
||||
SET(CMAPI_CONF_FILEPATH "${ETC_DIR}/cmapi_server.conf")
|
||||
|
||||
STRING(TOLOWER ${CPACK_PACKAGE_NAME} SYSTEMD_UNIT_NAME)
|
||||
|
||||
CONFIGURE_FILE(service.template ${SYSTEMD_UNIT_NAME}.service)
|
||||
CONFIGURE_FILE(systemd.env.template systemd.env)
|
||||
CONFIGURE_FILE(postinst.template postinst)
|
||||
CONFIGURE_FILE(prerm.template prerm)
|
||||
CONFIGURE_FILE(conffiles.template conffiles)
|
||||
CONFIGURE_FILE(mcs.template mcs)
|
||||
|
||||
INSTALL(DIRECTORY python deps mcs_node_control failover cmapi_server engine_files mcs_cluster_tool
|
||||
DESTINATION ${CMAPI_DIR}
|
||||
USE_SOURCE_PERMISSIONS
|
||||
PATTERN "test" EXCLUDE
|
||||
PATTERN "cmapi_server.conf" EXCLUDE)
|
||||
INSTALL(FILES LICENSE.GPL2 VERSION
|
||||
DESTINATION ${CMAPI_DIR})
|
||||
INSTALL(FILES check_ready.sh
|
||||
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
|
||||
DESTINATION ${CMAPI_DIR})
|
||||
INSTALL(FILES cmapi_server/cmapi_server.conf systemd.env
|
||||
DESTINATION ${ETC_DIR})
|
||||
INSTALL(FILES ${SYSTEMD_UNIT_NAME}.service
|
||||
DESTINATION ${SYSTEMD_UNIT_DIR})
|
||||
INSTALL(FILES mcs
|
||||
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
|
||||
DESTINATION ${BIN_DIR})
|
||||
|
||||
OPTION(RPM "Build an RPM" OFF)
|
||||
IF(RPM)
|
||||
SET(CPACK_GENERATOR "RPM")
|
||||
|
||||
SET(CPACK_RPM_PACKAGE_VERSION ${PACKAGE_VERSION})
|
||||
SET(CPACK_RPM_PACKAGE_NAME ${CPACK_PACKAGE_NAME})
|
||||
|
||||
SET(CPACK_RPM_PACKAGE_LICENSE "GPLv2")
|
||||
SET(CPACK_RPM_PACKAGE_GROUP "Applications/Databases")
|
||||
SET(CPACK_RPM_PACKAGE_URL ${CPACK_PACKAGE_URL})
|
||||
SET(CPACK_RPM_PACKAGE_SUMMARY ${CPACK_PACKAGE_SUMMARY})
|
||||
SET(CPACK_RPM_PACKAGE_VENDOR ${CPACK_PACKAGE_VENDOR})
|
||||
SET(CPACK_RPM_PACKAGE_LICENSE ${CPACK_PACKAGE_LICENSE})
|
||||
SET(CPACK_RPM_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION_SUMMARY})
|
||||
|
||||
SET(CPACK_RPM_SPEC_MORE_DEFINE "%undefine __brp_mangle_shebangs")
|
||||
SET(CPACK_RPM_PACKAGE_AUTOREQ "no")
|
||||
|
||||
SET(CPACK_RPM_POST_INSTALL_SCRIPT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/postinst)
|
||||
SET(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/prerm)
|
||||
SET(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION ${ETC_DIR} ${SHARE_DIR})
|
||||
SET(CPACK_RPM_USER_FILELIST "%config(noreplace) ${CMAPI_CONF_FILEPATH}")
|
||||
|
||||
SET(CPACK_RPM_PACKAGE_OBSOLETES "mariadb-columnstore-cmapi")
|
||||
SET(CPACK_DEBIAN_PACKAGE_DEPENDS "curl")
|
||||
ENDIF()
|
||||
|
||||
OPTION(DEB "Build a DEB" OFF)
|
||||
IF(DEB)
|
||||
|
||||
SET(CPACK_GENERATOR "DEB")
|
||||
|
||||
SET(CPACK_DEBIAN_PACKAGE_VERSION ${PACKAGE_VERSION})
|
||||
# TODO: different names in deb and rpm packages, fix it in next releases.
|
||||
STRING(TOLOWER ${CPACK_PACKAGE_NAME} CPACK_DEBIAN_PACKAGE_NAME)
|
||||
STRING(TOLOWER ${CPACK_PACKAGE_NAME} CPACK_PACKAGE_NAME)
|
||||
SET(CPACK_DEBIAN_PACKAGE_LICENSE "GPLv2")
|
||||
SET(CPACK_DEBIAN_PACKAGE_URL ${CPACK_PACKAGE_URL})
|
||||
SET(CPACK_DEBIAN_PACKAGE_SUMMARY ${CPACK_PACKAGE_SUMMARY})
|
||||
SET(CPACK_DEBIAN_PACKAGE_VENDOR ${CPACK_PACKAGE_VENDOR})
|
||||
SET(CPACK_DEBIAN_PACKAGE_LICENSE ${CPACK_PACKAGE_LICENSE})
|
||||
SET(CPACK_DEBIAN_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION_SUMMARY})
|
||||
|
||||
SET(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/prerm;${CMAKE_CURRENT_SOURCE_DIR}/postinst;${CMAKE_CURRENT_SOURCE_DIR}/conffiles")
|
||||
|
||||
SET(CPACK_DEBIAN_PACKAGE_REPLACES "mariadb-columnstore-cmapi")
|
||||
SET(CPACK_RPM_PACKAGE_REQUIRES "curl")
|
||||
ENDIF()
|
||||
|
||||
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${PACKAGE_VERSION}.${CMAKE_HOST_SYSTEM_PROCESSOR}")
|
||||
INCLUDE (CPack)
|
339
cmapi/LICENSE.GPL2
Normal file
339
cmapi/LICENSE.GPL2
Normal file
@ -0,0 +1,339 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
61
cmapi/README.md
Normal file
61
cmapi/README.md
Normal file
@ -0,0 +1,61 @@
|
||||
# CMAPI REST server
|
||||
[](https://ci.columnstore.mariadb.net/mariadb-corporation/mariadb-columnstore-cmapi)
|
||||
|
||||
## Overview
|
||||
This RESTfull server enables multi-node setups for MCS.
|
||||
|
||||
## Requirements
|
||||
|
||||
See requirements.txt file.
|
||||
|
||||
All the Python packages prerequisits are shipped with a pre-built Python enterpreter.
|
||||
|
||||
## Usage
|
||||
|
||||
To run the server using defaults call:
|
||||
```sh
|
||||
python3 -m cmapi_server
|
||||
```
|
||||
There is a configuration server inside cmapi_server.
|
||||
|
||||
## Testing
|
||||
|
||||
To launch the integration and unit tests use unittest discovery mode.
|
||||
```sh
|
||||
python3 -m unittest discover -v mcs_node_control
|
||||
python3 -m unittest discover -v cmapi_server
|
||||
python3 -m unittest discover -v failover
|
||||
```
|
||||
|
||||
mcs_control_node unit tests ask for root privileges and additional systemd unit
|
||||
to run smoothly.
|
||||
|
||||
## Build packages
|
||||
|
||||
Packages have bundled python interpreter and python dependencies.
|
||||
|
||||
## Get dependencies
|
||||
|
||||
# get portable python
|
||||
wget -qO- https://cspkg.s3.amazonaws.com/python-dist-no-nis.tar.gz | tar xzf - -C ./
|
||||
|
||||
# install python dependencies
|
||||
python/bin/pip3 install -t deps --only-binary :all -r requirements.txt
|
||||
|
||||
## RPM
|
||||
|
||||
```sh
|
||||
./cleanup.sh
|
||||
yum install -y wget cmake make rpm-build
|
||||
cmake -DRPM=1 .
|
||||
make package
|
||||
```
|
||||
|
||||
## DEB
|
||||
|
||||
```sh
|
||||
./cleanup.sh
|
||||
DEBIAN_FRONTEND=noninteractive apt update && apt install -y cmake make
|
||||
cmake -DDEB=1 .
|
||||
make package
|
||||
```
|
3
cmapi/VERSION
Normal file
3
cmapi/VERSION
Normal file
@ -0,0 +1,3 @@
|
||||
CMAPI_VERSION_MAJOR=23
|
||||
CMAPI_VERSION_MINOR=03
|
||||
CMAPI_VERSION_PATCH=1b
|
19
cmapi/check_ready.sh
Executable file
19
cmapi/check_ready.sh
Executable file
@ -0,0 +1,19 @@
|
||||
SEC_TO_WAIT=15
|
||||
echo -n "Waiting CMAPI to finish startup"
|
||||
success=false
|
||||
for i in $(seq 1 $SEC_TO_WAIT); do
|
||||
echo -n "..$i"
|
||||
if ! $(curl -k -s --output /dev/null --fail https://127.0.0.1:8640/cmapi/ready); then
|
||||
sleep 1
|
||||
else
|
||||
success=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
echo
|
||||
if $success; then
|
||||
echo "CMAPI ready to handle requests."
|
||||
else
|
||||
echo "CMAPI not ready after waiting $SEC_TO_WAIT seconds. Check log file for further details."
|
||||
fi
|
21
cmapi/cleanup.sh
Executable file
21
cmapi/cleanup.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
|
||||
rm -rf \
|
||||
cmapi_server/test/tmp.xml \
|
||||
systemd.env \
|
||||
*.service \
|
||||
prerm \
|
||||
postinst \
|
||||
CMakeCache.txt \
|
||||
CMakeFiles \
|
||||
CMakeScripts \
|
||||
Makefile \
|
||||
cmake_install.cmake \
|
||||
install_manifest.txt \
|
||||
*CPack* \
|
||||
# buildinfo.txt
|
||||
|
||||
find . -type d -name __pycache__ -exec rm -rf {} +
|
||||
find . -type f -iname '*.swp' -exec rm -rf {} +
|
249
cmapi/cmapi_server/SingleNode.xml
Normal file
249
cmapi/cmapi_server/SingleNode.xml
Normal file
@ -0,0 +1,249 @@
|
||||
<Columnstore Version="V1.0.0">
|
||||
<!--
|
||||
WARNING: Do not make changes to this file unless directed to do so by
|
||||
MariaDB service engineers. Incorrect settings can render your system
|
||||
unusable and will require a service call to correct.
|
||||
-->
|
||||
<ExeMgr1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>unassigned</Module>
|
||||
</ExeMgr1>
|
||||
<pm1_WriteEngineServer>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm1_WriteEngineServer>
|
||||
<DDLProc>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8612</Port>
|
||||
</DDLProc>
|
||||
<DMLProc>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8614</Port>
|
||||
</DMLProc>
|
||||
<BatchInsert>
|
||||
<RowsPerBatch>10000</RowsPerBatch>
|
||||
</BatchInsert>
|
||||
<PrimitiveServers>
|
||||
<Count>1</Count>
|
||||
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
|
||||
<ProcessorThreshold>128</ProcessorThreshold>
|
||||
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
|
||||
<DebugLevel>0</DebugLevel>
|
||||
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
|
||||
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
|
||||
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
|
||||
<PrefetchThreshold>1</PrefetchThreshold>
|
||||
<PTTrace>0</PTTrace>
|
||||
<RotatingDestination>n</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
|
||||
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
|
||||
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
|
||||
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
|
||||
<DirectIO>y</DirectIO>
|
||||
<HighPriorityPercentage/>
|
||||
<MediumPriorityPercentage/>
|
||||
<LowPriorityPercentage/>
|
||||
</PrimitiveServers>
|
||||
<PMS1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS1>
|
||||
<SystemConfig>
|
||||
<SystemName>columnstore-1</SystemName>
|
||||
<ParentOAMModuleName>pm1</ParentOAMModuleName>
|
||||
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
|
||||
<!-- Warning: Do not change this value once database is built -->
|
||||
<DBRootCount>1</DBRootCount>
|
||||
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
|
||||
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
|
||||
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
|
||||
<DBRMTimeOut>15</DBRMTimeOut> <!-- in seconds -->
|
||||
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
|
||||
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
|
||||
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
|
||||
<DataFileLog>OFF</DataFileLog>
|
||||
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
|
||||
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
|
||||
-->
|
||||
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
|
||||
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
|
||||
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
|
||||
files are left behind. -->
|
||||
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
|
||||
</SystemConfig>
|
||||
<SystemModuleConfig>
|
||||
<ModuleType2>um</ModuleType2>
|
||||
<ModuleDesc2>User Module</ModuleDesc2>
|
||||
<ModuleCount2>0</ModuleCount2>
|
||||
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
|
||||
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
|
||||
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
|
||||
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
|
||||
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
|
||||
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
|
||||
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
|
||||
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
|
||||
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
|
||||
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
|
||||
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
|
||||
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
|
||||
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
|
||||
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
|
||||
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
|
||||
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
|
||||
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
|
||||
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
|
||||
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
|
||||
<ModuleType3>pm</ModuleType3>
|
||||
<ModuleDesc3>Performance Module</ModuleDesc3>
|
||||
<ModuleCount3>1</ModuleCount3>
|
||||
<ModuleIPAddr1-1-3>127.0.0.1</ModuleIPAddr1-1-3>
|
||||
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
|
||||
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
|
||||
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
|
||||
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
|
||||
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
|
||||
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
|
||||
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
|
||||
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
|
||||
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
|
||||
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
|
||||
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
|
||||
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
|
||||
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
|
||||
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
|
||||
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
|
||||
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
|
||||
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
|
||||
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
|
||||
</SystemModuleConfig>
|
||||
<SessionManager>
|
||||
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
|
||||
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
|
||||
</SessionManager>
|
||||
<VersionBuffer>
|
||||
<!-- VersionBufferFileSize must be a multiple of 8192.
|
||||
One version buffer file will be put on each DB root. -->
|
||||
<VersionBufferFileSize>1GB</VersionBufferFileSize>
|
||||
</VersionBuffer>
|
||||
<OIDManager>
|
||||
<!-- Do not change this file after database built -->
|
||||
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
|
||||
<!-- Do not change this value after database built -->
|
||||
<FirstOID>3000</FirstOID>
|
||||
</OIDManager>
|
||||
<WriteEngine>
|
||||
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
|
||||
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
|
||||
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
|
||||
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
|
||||
<FastDelete>n</FastDelete>
|
||||
</WriteEngine>
|
||||
<DBRM_Controller>
|
||||
<NumWorkers>1</NumWorkers>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8616</Port>
|
||||
</DBRM_Controller>
|
||||
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
|
||||
<DBRM_Worker1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm1</Module>
|
||||
</DBRM_Worker1>
|
||||
<DBBC>
|
||||
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
|
||||
<!-- Alternatively, this can be specified in absolute terms using
|
||||
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
|
||||
<!-- <NumBlocksPct>95</NumBlocksPct> -->
|
||||
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
|
||||
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
|
||||
<IOMTracing>0</IOMTracing>
|
||||
<BRPTracing>0</BRPTracing>
|
||||
<ReportFrequency>65536</ReportFrequency>
|
||||
<MaxOpenFiles>2K</MaxOpenFiles>
|
||||
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
|
||||
<FDCacheTrace>0</FDCacheTrace>
|
||||
<NumBlocksPct>50</NumBlocksPct>
|
||||
</DBBC>
|
||||
<Installation>
|
||||
<ServerTypeInstall>2</ServerTypeInstall>
|
||||
<PMwithUM>y</PMwithUM>
|
||||
<MySQLRep>n</MySQLRep>
|
||||
<DBRootStorageType>internal</DBRootStorageType>
|
||||
<UMStorageType>internal</UMStorageType>
|
||||
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
|
||||
<DataRedundancyNetworkType/>
|
||||
</Installation>
|
||||
<ExtentMap>
|
||||
<!--
|
||||
WARNING: these can only be changed on an empty system. Once any object has been allocated
|
||||
it cannot be changed!. Extent size is 8M rows.
|
||||
-->
|
||||
<FilesPerColumnPartition>4</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
|
||||
<BRM_UID>0x0</BRM_UID>
|
||||
</ExtentMap>
|
||||
<HashJoin>
|
||||
<MaxBuckets>128</MaxBuckets>
|
||||
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
|
||||
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
|
||||
<TotalUmMemory>25%</TotalUmMemory>
|
||||
<CPUniqueLimit>100</CPUniqueLimit>
|
||||
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
|
||||
<TempFileCompression>Y</TempFileCompression>
|
||||
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
|
||||
</HashJoin>
|
||||
<JobList>
|
||||
<FlushInterval>16K</FlushInterval>
|
||||
<FifoSize>16</FifoSize>
|
||||
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
|
||||
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
|
||||
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
|
||||
each extent. The default is 16. MaxOutstandingRequests is the size of
|
||||
the window of work in terms of extents. A value of 20 means there
|
||||
is 20 extents worth of work for the PMs to process at any given time.
|
||||
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
|
||||
as many threads are available across all PMs. -->
|
||||
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
|
||||
<!-- MaxOutstandingRequests is going to default to the num of cores available
|
||||
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
|
||||
but will be lower bounded by 20 -->
|
||||
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
|
||||
<ThreadPoolSize>100</ThreadPoolSize>
|
||||
</JobList>
|
||||
<RowAggregation>
|
||||
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is the number of cores -->
|
||||
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
|
||||
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
|
||||
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
|
||||
</RowAggregation>
|
||||
<CrossEngineSupport>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>3306</Port>
|
||||
<User>root</User>
|
||||
<Password/>
|
||||
<TLSCA/>
|
||||
<TLSClientCert/>
|
||||
<TLSClientKey/>
|
||||
</CrossEngineSupport>
|
||||
<QueryStats>
|
||||
<Enabled>N</Enabled>
|
||||
</QueryStats>
|
||||
<UserPriority>
|
||||
<Enabled>N</Enabled>
|
||||
</UserPriority>
|
||||
<NetworkCompression>
|
||||
<Enabled>Y</Enabled>
|
||||
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
|
||||
</NetworkCompression>
|
||||
<QueryTele>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>0</Port>
|
||||
</QueryTele>
|
||||
<StorageManager>
|
||||
<MaxSockets>30</MaxSockets>
|
||||
<Enabled>N</Enabled>
|
||||
</StorageManager>
|
||||
<DataRedundancyConfig>
|
||||
<DBRoot1PMs/>
|
||||
</DataRedundancyConfig>
|
||||
</Columnstore>
|
0
cmapi/cmapi_server/__init__.py
Normal file
0
cmapi/cmapi_server/__init__.py
Normal file
293
cmapi/cmapi_server/__main__.py
Normal file
293
cmapi/cmapi_server/__main__.py
Normal file
@ -0,0 +1,293 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
CherryPy-based webservice daemon with background threads
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.process import plugins
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
from cryptography.hazmat.primitives import serialization, hashes
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from cryptography.x509.oid import NameOID
|
||||
|
||||
# TODO: fix dispatcher choose logic because code executing in endpoints.py
|
||||
# while import process, this cause module logger misconfiguration
|
||||
from cmapi_server.logging_management import config_cmapi_server_logging
|
||||
config_cmapi_server_logging()
|
||||
|
||||
from cmapi_server import helpers
|
||||
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, CMAPI_CONF_PATH
|
||||
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
|
||||
from cmapi_server.failover_agent import FailoverAgent
|
||||
from cmapi_server.managers.process import MCSProcessManager
|
||||
from cmapi_server.managers.application import AppManager
|
||||
from failover.node_monitor import NodeMonitor
|
||||
from mcs_node_control.models.dbrm_socket import SOCK_TIMEOUT, DBRMSocketHandler
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
cert_filename = './cmapi_server/self-signed.crt'
|
||||
|
||||
|
||||
def worker():
|
||||
"""Background Timer that runs clean_txn_by_timeout() every 5 seconds
|
||||
TODO: this needs to be fixed/optimized. I don't like creating the thread
|
||||
repeatedly.
|
||||
"""
|
||||
while True:
|
||||
t = threading.Timer(5.0, clean_txn_by_timeout)
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
|
||||
def clean_txn_by_timeout():
|
||||
txn_section = app.config.get('txn', None)
|
||||
timeout_timestamp = txn_section.get('timeout') if txn_section is not None else None
|
||||
current_timestamp = int(datetime.now().timestamp())
|
||||
if timeout_timestamp is not None and current_timestamp > timeout_timestamp:
|
||||
txn_config_changed = txn_section.get('config_changed', None)
|
||||
if txn_config_changed is True:
|
||||
node_config = NodeConfig()
|
||||
node_config.rollback_config()
|
||||
node_config.apply_config(
|
||||
xml_string=node_config.get_current_config()
|
||||
)
|
||||
app.config.update({
|
||||
'txn': {
|
||||
'id': 0,
|
||||
'timeout': 0,
|
||||
'manager_address': '',
|
||||
'config_changed': False,
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class TxnBackgroundThread(plugins.SimplePlugin):
|
||||
"""CherryPy plugin to create a background worker thread"""
|
||||
app = None
|
||||
|
||||
def __init__(self, bus, app):
|
||||
super(TxnBackgroundThread, self).__init__(bus)
|
||||
self.t = None
|
||||
self.app = app
|
||||
|
||||
def start(self):
|
||||
"""Plugin entrypoint"""
|
||||
|
||||
self.t = threading.Thread(target=worker, name='TxnBackgroundThread')
|
||||
self.t.daemon = True
|
||||
self.t.start()
|
||||
|
||||
# Start at a higher priority than "Daemonize" (which we're not using
|
||||
# yet but may in the future)
|
||||
start.priority = 85
|
||||
|
||||
|
||||
class FailoverBackgroundThread(plugins.SimplePlugin):
|
||||
"""CherryPy plugin to start the thread for failover monitoring."""
|
||||
|
||||
def __init__(self, bus, turned_on):
|
||||
super().__init__(bus)
|
||||
self.node_monitor = NodeMonitor(agent=FailoverAgent())
|
||||
self.running = False
|
||||
self.turned_on = turned_on
|
||||
if self.turned_on:
|
||||
logging.info(
|
||||
'Failover is turned ON by default or in CMAPI config file.'
|
||||
)
|
||||
else:
|
||||
logging.info('Failover is turned OFF in CMAPI config file.')
|
||||
|
||||
def _start(self):
|
||||
if self.running:
|
||||
return
|
||||
self.bus.log('Starting Failover monitor thread.')
|
||||
self.node_monitor.start()
|
||||
self.running = True
|
||||
|
||||
def _stop(self):
|
||||
if not self.running:
|
||||
return
|
||||
self.bus.log('Stopping Failover monitor thread.')
|
||||
self.node_monitor.stop()
|
||||
self.running = False
|
||||
|
||||
def _subscriber(self, run_failover: bool):
|
||||
if not self.turned_on:
|
||||
return
|
||||
if not isinstance(run_failover, bool):
|
||||
self.bus.log(f'Got wrong obj in failover channel {run_failover}')
|
||||
return
|
||||
if run_failover:
|
||||
self._start()
|
||||
else:
|
||||
self._stop()
|
||||
|
||||
def start(self):
|
||||
self.bus.subscribe('failover', self._subscriber)
|
||||
|
||||
def stop(self):
|
||||
cherrypy.engine.unsubscribe('failover', self._subscriber)
|
||||
self._stop()
|
||||
|
||||
|
||||
def create_self_signed_certificate():
|
||||
key_filename = './cmapi_server/self-signed.key'
|
||||
|
||||
key = rsa.generate_private_key(
|
||||
public_exponent=65537,
|
||||
key_size=2048,
|
||||
backend=default_backend()
|
||||
)
|
||||
|
||||
with open(key_filename, "wb") as f:
|
||||
f.write(key.private_bytes(
|
||||
encoding=serialization.Encoding.PEM,
|
||||
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
||||
encryption_algorithm=serialization.NoEncryption()),
|
||||
)
|
||||
|
||||
subject = issuer = x509.Name([
|
||||
x509.NameAttribute(NameOID.COUNTRY_NAME, 'US'),
|
||||
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, 'California'),
|
||||
x509.NameAttribute(NameOID.LOCALITY_NAME, 'Redwood City'),
|
||||
x509.NameAttribute(NameOID.ORGANIZATION_NAME, 'MariaDB'),
|
||||
x509.NameAttribute(NameOID.COMMON_NAME, 'mariadb.com'),
|
||||
])
|
||||
|
||||
basic_contraints = x509.BasicConstraints(ca=True, path_length=0)
|
||||
|
||||
cert = x509.CertificateBuilder(
|
||||
).subject_name(
|
||||
subject
|
||||
).issuer_name(
|
||||
issuer
|
||||
).public_key(
|
||||
key.public_key()
|
||||
).serial_number(
|
||||
x509.random_serial_number()
|
||||
).not_valid_before(
|
||||
datetime.utcnow()
|
||||
).not_valid_after(
|
||||
datetime.utcnow() + timedelta(days=365)
|
||||
).add_extension(
|
||||
basic_contraints,
|
||||
False
|
||||
).add_extension(
|
||||
x509.SubjectAlternativeName([x509.DNSName('localhost')]),
|
||||
critical=False
|
||||
).sign(key, hashes.SHA256(), default_backend())
|
||||
|
||||
with open(cert_filename, 'wb') as f:
|
||||
f.write(cert.public_bytes(serialization.Encoding.PEM))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.info(f'CMAPI Version: {AppManager.get_version()}')
|
||||
|
||||
# TODO: read cmapi config filepath as an argument
|
||||
helpers.cmapi_config_check()
|
||||
|
||||
if not os.path.exists(cert_filename):
|
||||
create_self_signed_certificate()
|
||||
|
||||
app = cherrypy.tree.mount(root=None, config=CMAPI_CONF_PATH)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': CMAPI_CONF_PATH,
|
||||
},
|
||||
})
|
||||
|
||||
cherrypy.config.update(CMAPI_CONF_PATH)
|
||||
cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH)
|
||||
dispatcher_name, dispatcher_path = helpers.get_dispatcher_name_and_path(
|
||||
cfg_parser
|
||||
)
|
||||
MCSProcessManager.detect(dispatcher_name, dispatcher_path)
|
||||
# If we don't have auto_failover flag in the config turn it ON by default.
|
||||
turn_on_failover = cfg_parser.getboolean(
|
||||
'application', 'auto_failover', fallback=True
|
||||
)
|
||||
TxnBackgroundThread(cherrypy.engine, app).subscribe()
|
||||
# subscribe FailoverBackgroundThread plugin code to bus channels
|
||||
# code below not starting "real" failover background thread
|
||||
FailoverBackgroundThread(cherrypy.engine, turn_on_failover).subscribe()
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
|
||||
|
||||
success = False
|
||||
config_mtime = os.path.getmtime(DEFAULT_MCS_CONF_PATH)
|
||||
# if the mtime changed, we infer that a put_config was run on this node,
|
||||
# and we now have a current config file.
|
||||
# TODO: Research all affected cases and remove/rewrite this loop below.
|
||||
# Previously this affects endless waiting time while starting
|
||||
# application after upgrade.
|
||||
# Do we have any cases when we need to try syncing config with other
|
||||
# nodes with endless retry?
|
||||
if not helpers.in_maintenance_state(DEFAULT_MCS_CONF_PATH):
|
||||
while (
|
||||
not success
|
||||
and config_mtime == os.path.getmtime(DEFAULT_MCS_CONF_PATH)
|
||||
):
|
||||
try:
|
||||
success = helpers.get_current_config_file()
|
||||
except Exception:
|
||||
logging.info(
|
||||
'Main got exception while get_current_config_file',
|
||||
exc_info=True
|
||||
)
|
||||
success = False
|
||||
if not success:
|
||||
delay = 10
|
||||
logging.warning(
|
||||
'Failed to fetch the current config file, '
|
||||
f'retrying in {delay}s'
|
||||
)
|
||||
time.sleep(delay)
|
||||
|
||||
config_mtime = os.path.getmtime(DEFAULT_MCS_CONF_PATH)
|
||||
helpers.wait_for_deactivation_or_put_config(config_mtime)
|
||||
|
||||
dbrm_socket = DBRMSocketHandler()
|
||||
# TODO: fix DBRM message show on nodes restart.
|
||||
# Use DBRM() context manager.
|
||||
try:
|
||||
dbrm_socket.connect()
|
||||
dbrm_socket._detect_protocol()
|
||||
dbrm_socket.close()
|
||||
except Exception:
|
||||
logging.warning(
|
||||
'Something went wrong while trying to detect dbrm protocol.\n'
|
||||
'Seems "controllernode" process isn\'t started.\n'
|
||||
'This is just a notification, not a problem.\n'
|
||||
'Next detection will started at first node\\cluster '
|
||||
'status check.\n'
|
||||
f'This can cause extra {SOCK_TIMEOUT} seconds delay while\n'
|
||||
'first attempt to get status.',
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
'In maintenance state, not syncing config from other nodes.'
|
||||
)
|
||||
|
||||
if turn_on_failover:
|
||||
if not helpers.in_maintenance_state(DEFAULT_MCS_CONF_PATH):
|
||||
cherrypy.engine.publish('failover', True)
|
||||
else:
|
||||
logging.info('In maintenance state, not starting Failover.')
|
||||
|
||||
AppManager.started = True
|
||||
cherrypy.engine.block()
|
83
cmapi/cmapi_server/cmapi_logger.conf
Normal file
83
cmapi/cmapi_server/cmapi_logger.conf
Normal file
@ -0,0 +1,83 @@
|
||||
{
|
||||
"version": 1,
|
||||
"filters": {
|
||||
"add_ip_filter": {
|
||||
"()": "cmapi_server.logging_management.AddIpFilter"
|
||||
}
|
||||
},
|
||||
"formatters": {
|
||||
"cmapi_server": {
|
||||
"format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(ip)s %(message)s",
|
||||
"datefmt": "%d/%b/%Y %H:%M:%S"
|
||||
},
|
||||
"default": {
|
||||
"format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(message)s",
|
||||
"datefmt": "%d/%b/%Y %H:%M:%S"
|
||||
},
|
||||
"container_sh": {
|
||||
"format" : "`%(asctime)s`: %(message)s",
|
||||
"datefmt": "%a %d %b %Y %I:%M:%S %p %Z"
|
||||
}
|
||||
},
|
||||
"handlers": {
|
||||
"cmapi_server": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.StreamHandler",
|
||||
"filters": ["add_ip_filter"],
|
||||
"formatter": "cmapi_server",
|
||||
"stream": "ext://sys.stdout"
|
||||
},
|
||||
"console": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "default",
|
||||
"stream": "ext://sys.stdout"
|
||||
},
|
||||
"file": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"formatter": "default",
|
||||
"filename": "/var/log/mariadb/columnstore/cmapi_server.log",
|
||||
"mode": "a",
|
||||
"maxBytes": 1048576,
|
||||
"backupCount": 10,
|
||||
"encoding": "utf8"
|
||||
},
|
||||
"container_sh_file": {
|
||||
"level": "DEBUG",
|
||||
"class": "logging.handlers.RotatingFileHandler",
|
||||
"formatter": "container_sh",
|
||||
"filename": "/var/log/mariadb/columnstore/container-sh.log",
|
||||
"mode": "a",
|
||||
"maxBytes": 1024,
|
||||
"backupCount": 3,
|
||||
"encoding": "utf8"
|
||||
}
|
||||
},
|
||||
"loggers": {
|
||||
"cherrypy.access": {
|
||||
"handlers": ["console", "file"],
|
||||
"level": "INFO",
|
||||
"propagate": false
|
||||
},
|
||||
"cherrypy.error": {
|
||||
"handlers": ["console", "file"],
|
||||
"level": "INFO",
|
||||
"propagate": false
|
||||
},
|
||||
"cmapi_server": {
|
||||
"handlers": ["cmapi_server", "file"],
|
||||
"level": "DEBUG",
|
||||
"propagate": false
|
||||
},
|
||||
"container_sh": {
|
||||
"handlers": ["file", "container_sh_file"],
|
||||
"level": "DEBUG",
|
||||
"propagate": false
|
||||
},
|
||||
"": {
|
||||
"handlers": ["console", "file"],
|
||||
"level": "DEBUG"
|
||||
}
|
||||
}
|
||||
}
|
9
cmapi/cmapi_server/cmapi_server.conf
Normal file
9
cmapi/cmapi_server/cmapi_server.conf
Normal file
@ -0,0 +1,9 @@
|
||||
[global]
|
||||
server.socket_host = '0.0.0.0'
|
||||
server.socket_port = 8640
|
||||
server.ssl_module = 'builtin'
|
||||
server.ssl_certificate = './cmapi_server/self-signed.crt'
|
||||
server.ssl_private_key = './cmapi_server/self-signed.key'
|
||||
engine.autoreload.on = False
|
||||
log.access_file = ''
|
||||
log.error_file = ''
|
84
cmapi/cmapi_server/constants.py
Normal file
84
cmapi/cmapi_server/constants.py
Normal file
@ -0,0 +1,84 @@
|
||||
"""Module contains constants values for cmapi, failover and other .py files.
|
||||
|
||||
TODO: move main constant paths here and replace in files in next releases.
|
||||
"""
|
||||
import os
|
||||
from typing import NamedTuple
|
||||
|
||||
|
||||
# default MARIADB ColumnStore config path
|
||||
MCS_ETC_PATH = '/etc/columnstore'
|
||||
DEFAULT_MCS_CONF_PATH = os.path.join(MCS_ETC_PATH, 'Columnstore.xml')
|
||||
|
||||
# default Storage Manager config path
|
||||
DEFAULT_SM_CONF_PATH = os.path.join(MCS_ETC_PATH, 'storagemanager.cnf')
|
||||
|
||||
# MCSDATADIR (in mcs engine code) and related paths
|
||||
MCS_DATA_PATH = '/var/lib/columnstore'
|
||||
MCS_MODULE_FILE_PATH = os.path.join(MCS_DATA_PATH, 'local/module')
|
||||
EM_PATH_SUFFIX = 'data1/systemFiles/dbrm'
|
||||
MCS_EM_PATH = os.path.join(MCS_DATA_PATH, EM_PATH_SUFFIX)
|
||||
MCS_BRM_CURRENT_PATH = os.path.join(MCS_EM_PATH, 'BRM_saves_current')
|
||||
S3_BRM_CURRENT_PATH = os.path.join(EM_PATH_SUFFIX, 'BRM_saves_current')
|
||||
# keys file for CEJ password encryption\decryption
|
||||
# (CrossEngineSupport section in Columnstore.xml)
|
||||
MCS_SECRETS_FILE_PATH = os.path.join(MCS_DATA_PATH, '.secrets')
|
||||
|
||||
# CMAPI SERVER
|
||||
CMAPI_CONFIG_FILENAME = 'cmapi_server.conf'
|
||||
CMAPI_ROOT_PATH = os.path.dirname(__file__)
|
||||
PROJECT_PATH = os.path.dirname(CMAPI_ROOT_PATH)
|
||||
# path to VERSION file
|
||||
VERSION_PATH = os.path.join(PROJECT_PATH, 'VERSION')
|
||||
CMAPI_LOG_CONF_PATH = os.path.join(CMAPI_ROOT_PATH, 'cmapi_logger.conf')
|
||||
# path to CMAPI default config
|
||||
CMAPI_DEFAULT_CONF_PATH = os.path.join(CMAPI_ROOT_PATH, CMAPI_CONFIG_FILENAME)
|
||||
# CMAPI config path
|
||||
CMAPI_CONF_PATH = os.path.join(MCS_ETC_PATH, CMAPI_CONFIG_FILENAME)
|
||||
|
||||
# TOTP secret key
|
||||
SECRET_KEY = 'MCSIsTheBestEver' # not just a random string! (base32)
|
||||
|
||||
|
||||
# network constants
|
||||
LOCALHOSTS = ('localhost', '127.0.0.1', '::1')
|
||||
|
||||
CMAPI_INSTALL_PATH = '/usr/share/columnstore/cmapi/'
|
||||
CMAPI_PYTHON_BIN = os.path.join(CMAPI_INSTALL_PATH, "python/bin/python3")
|
||||
CMAPI_PYTHON_DEPS_PATH = os.path.join(CMAPI_INSTALL_PATH, "deps")
|
||||
CMAPI_PYTHON_BINARY_DEPS_PATH = os.path.join(CMAPI_PYTHON_DEPS_PATH, "bin")
|
||||
CMAPI_SINGLE_NODE_XML = os.path.join(
|
||||
CMAPI_INSTALL_PATH, 'cmapi_server/SingleNode.xml'
|
||||
)
|
||||
|
||||
# constants for dispatchers
|
||||
class ProgInfo(NamedTuple):
|
||||
"""NamedTuple for some additional info about handling mcs processes."""
|
||||
stop_priority: int # priority for building stop sequence
|
||||
service_name: str # systemd service name
|
||||
subcommand: str # subcommand for process run in docker container
|
||||
only_primary: bool # use this process only on primary
|
||||
delay: int = 0 # delay after process start in docker container
|
||||
|
||||
# mcs-loadbrm and mcs-savebrm are dependencies for workernode and resolved
|
||||
# on top level of process handling
|
||||
# mcs-storagemanager starts conditionally inside mcs-loadbrm, but should be
|
||||
# stopped using cmapi
|
||||
ALL_MCS_PROGS = {
|
||||
# workernode starts on primary and non primary node with 1 or 2 added
|
||||
# to subcommand (DBRM_Worker1 - on primary, DBRM_Worker2 - non primary)
|
||||
'StorageManager': ProgInfo(15, 'mcs-storagemanager', '', False, 1),
|
||||
'workernode': ProgInfo(13, 'mcs-workernode', 'DBRM_Worker{}', False, 1),
|
||||
'controllernode': ProgInfo(11, 'mcs-controllernode', 'fg', True),
|
||||
'PrimProc': ProgInfo(5, 'mcs-primproc', '', False, 1),
|
||||
'ExeMgr': ProgInfo(9, 'mcs-exemgr', '', False, 1),
|
||||
'WriteEngineServer': ProgInfo(7, 'mcs-writeengineserver', '', False, 3),
|
||||
'DMLProc': ProgInfo(3, 'mcs-dmlproc', '', False),
|
||||
'DDLProc': ProgInfo(1, 'mcs-ddlproc', '', False),
|
||||
}
|
||||
|
||||
# constants for docker container dispatcher
|
||||
MCS_INSTALL_BIN = '/usr/bin'
|
||||
IFLAG = os.path.join(MCS_ETC_PATH, 'container-initialized')
|
||||
LIBJEMALLOC_DEFAULT_PATH = os.path.join(MCS_DATA_PATH, 'libjemalloc.so.2')
|
||||
MCS_LOG_PATH = '/var/log/mariadb/columnstore'
|
0
cmapi/cmapi_server/controllers/__init__.py
Normal file
0
cmapi/cmapi_server/controllers/__init__.py
Normal file
262
cmapi/cmapi_server/controllers/dispatcher.py
Normal file
262
cmapi/cmapi_server/controllers/dispatcher.py
Normal file
@ -0,0 +1,262 @@
|
||||
import json
|
||||
|
||||
import cherrypy
|
||||
|
||||
from cmapi_server.controllers.endpoints import (
|
||||
StatusController, ConfigController, BeginController, CommitController,
|
||||
RollbackController, StartController, ShutdownController,
|
||||
ExtentMapController, ClusterController, ApiKeyController,
|
||||
LoggingConfigController, AppController
|
||||
)
|
||||
|
||||
from cmapi_server.controllers.s3dataload import S3DataLoadController
|
||||
|
||||
_version = '0.4.0'
|
||||
dispatcher = cherrypy.dispatch.RoutesDispatcher()
|
||||
|
||||
|
||||
# /_version/status (GET)
|
||||
dispatcher.connect(name = 'status',
|
||||
route = f'/cmapi/{_version}/node/status',
|
||||
action = 'get_status',
|
||||
controller = StatusController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/master (GET)
|
||||
dispatcher.connect(name = 'get_primary',
|
||||
route = f'/cmapi/{_version}/node/primary',
|
||||
action = 'get_primary',
|
||||
controller = StatusController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/new_primary (GET)
|
||||
dispatcher.connect(name = 'get_new_primary',
|
||||
route = f'/cmapi/{_version}/node/new_primary',
|
||||
action = 'get_new_primary',
|
||||
controller = StatusController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/config/ (GET)
|
||||
dispatcher.connect(name = 'get_config', # what does this name is used for?
|
||||
route = f'/cmapi/{_version}/node/config',
|
||||
action = 'get_config',
|
||||
controller = ConfigController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/config/ (PUT)
|
||||
dispatcher.connect(name = 'put_config',
|
||||
route = f'/cmapi/{_version}/node/config',
|
||||
action = 'put_config',
|
||||
controller = ConfigController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/begin/ (PUT)
|
||||
dispatcher.connect(name = 'put_begin',
|
||||
route = f'/cmapi/{_version}/node/begin',
|
||||
action = 'put_begin',
|
||||
controller = BeginController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/rollback/ (PUT)
|
||||
dispatcher.connect(name = 'put_rollback',
|
||||
route = f'/cmapi/{_version}/node/rollback',
|
||||
action = 'put_rollback',
|
||||
controller = RollbackController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/commit/ (PUT)
|
||||
dispatcher.connect(name = 'put_commit',
|
||||
route = f'/cmapi/{_version}/node/commit',
|
||||
action = 'put_commit',
|
||||
controller = CommitController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/start/ (PUT)
|
||||
dispatcher.connect(name = 'start',
|
||||
route = f'/cmapi/{_version}/node/start',
|
||||
action = 'put_start',
|
||||
controller = StartController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/shutdown/ (PUT)
|
||||
dispatcher.connect(name = 'shutdown',
|
||||
route = f'/cmapi/{_version}/node/shutdown',
|
||||
action = 'put_shutdown',
|
||||
controller = ShutdownController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/meta/em/ (GET)
|
||||
dispatcher.connect(name = 'get_em',
|
||||
route = f'/cmapi/{_version}/node/meta/em',
|
||||
action = 'get_em',
|
||||
controller = ExtentMapController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/meta/journal/ (GET)
|
||||
dispatcher.connect(name = 'get_journal',
|
||||
route = f'/cmapi/{_version}/node/meta/journal',
|
||||
action = 'get_journal',
|
||||
controller = ExtentMapController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/meta/vss/ (GET)
|
||||
dispatcher.connect(name = 'get_vss',
|
||||
route = f'/cmapi/{_version}/node/meta/vss',
|
||||
action = 'get_vss',
|
||||
controller = ExtentMapController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/meta/vbbm/ (GET)
|
||||
dispatcher.connect(name = 'get_vbbm',
|
||||
route = f'/cmapi/{_version}/node/meta/vbbm',
|
||||
action = 'get_vbbm',
|
||||
controller = ExtentMapController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/meta/footprint/ (GET)
|
||||
dispatcher.connect(name = 'get_footprint',
|
||||
route = f'/cmapi/{_version}/node/meta/footprint',
|
||||
action = 'get_footprint',
|
||||
controller = ExtentMapController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/cluster/start/ (PUT)
|
||||
dispatcher.connect(name = 'cluster_start',
|
||||
route = f'/cmapi/{_version}/cluster/start',
|
||||
action = 'put_start',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/cluster/shutdown/ (PUT)
|
||||
dispatcher.connect(name = 'cluster_shutdown',
|
||||
route = f'/cmapi/{_version}/cluster/shutdown',
|
||||
action = 'put_shutdown',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/cluster/mode-set/ (PUT)
|
||||
dispatcher.connect(name = 'cluster_mode_set',
|
||||
route = f'/cmapi/{_version}/cluster/mode-set',
|
||||
action = 'put_mode_set',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['PUT']})
|
||||
|
||||
|
||||
# /_version/cluster/node/ (POST, PUT)
|
||||
dispatcher.connect(name = 'cluster_add_node',
|
||||
route = f'/cmapi/{_version}/cluster/node',
|
||||
action = 'put_add_node',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['POST', 'PUT']})
|
||||
|
||||
|
||||
# /_version/cluster/node/ (DELETE)
|
||||
dispatcher.connect(name = 'cluster_remove_node',
|
||||
route = f'/cmapi/{_version}/cluster/node',
|
||||
action = 'delete_remove_node',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['DELETE']})
|
||||
|
||||
|
||||
# /_version/cluster/status/ (GET)
|
||||
dispatcher.connect(name = 'cluster_status',
|
||||
route = f'/cmapi/{_version}/cluster/status',
|
||||
action = 'get_status',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['GET']})
|
||||
|
||||
|
||||
# /_version/node/apikey-set/ (PUT)
|
||||
dispatcher.connect(
|
||||
name = 'node_set_api_key',
|
||||
route = f'/cmapi/{_version}/node/apikey-set',
|
||||
action = 'set_api_key',
|
||||
controller = ApiKeyController(),
|
||||
conditions = {'method': ['PUT']}
|
||||
)
|
||||
|
||||
|
||||
# /_version/cluster/apikey-set/ (PUT)
|
||||
dispatcher.connect(
|
||||
name = 'cluster_set_api_key',
|
||||
route = f'/cmapi/{_version}/cluster/apikey-set',
|
||||
action = 'set_api_key',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['PUT']}
|
||||
)
|
||||
|
||||
|
||||
# /_version/cluster/node/ (POST, PUT)
|
||||
dispatcher.connect(name = 'cluster_load_s3data',
|
||||
route = f'/cmapi/{_version}/cluster/load_s3data',
|
||||
action = 'load_s3data',
|
||||
controller = S3DataLoadController(),
|
||||
conditions = {'method': ['POST', 'PUT']})
|
||||
|
||||
|
||||
# /_version/node/log-config/ (PUT)
|
||||
dispatcher.connect(
|
||||
name = 'node_set_log_level',
|
||||
route = f'/cmapi/{_version}/node/log-level',
|
||||
action = 'set_log_level',
|
||||
controller = LoggingConfigController(),
|
||||
conditions = {'method': ['PUT']}
|
||||
)
|
||||
|
||||
|
||||
# /_version/cluster/log-config'/ (PUT)
|
||||
dispatcher.connect(
|
||||
name = 'cluster_set_log_level',
|
||||
route = f'/cmapi/{_version}/cluster/log-level',
|
||||
action = 'set_log_level',
|
||||
controller = ClusterController(),
|
||||
conditions = {'method': ['PUT']}
|
||||
)
|
||||
|
||||
|
||||
# /ready (GET)
|
||||
dispatcher.connect(
|
||||
name = 'app_ready',
|
||||
route = '/cmapi/ready',
|
||||
action = 'ready',
|
||||
controller = AppController(),
|
||||
conditions = {'method': ['GET']}
|
||||
)
|
||||
|
||||
|
||||
def jsonify_error(status, message, traceback, version): \
|
||||
# pylint: disable=unused-argument
|
||||
"""JSONify all CherryPy error responses (created by raising the
|
||||
cherrypy.HTTPError exception)
|
||||
"""
|
||||
|
||||
cherrypy.response.headers['Content-Type'] = 'application/json'
|
||||
response_body = json.dumps(
|
||||
{
|
||||
'error': {
|
||||
'http_status': status,
|
||||
'message': message,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
cherrypy.response.status = status
|
||||
|
||||
return response_body
|
1139
cmapi/cmapi_server/controllers/endpoints.py
Normal file
1139
cmapi/cmapi_server/controllers/endpoints.py
Normal file
File diff suppressed because it is too large
Load Diff
12
cmapi/cmapi_server/controllers/error.py
Normal file
12
cmapi/cmapi_server/controllers/error.py
Normal file
@ -0,0 +1,12 @@
|
||||
import json
|
||||
import cherrypy as cp
|
||||
|
||||
class APIError(cp.HTTPError):
|
||||
def __init__(self, status: int = 500, message: str = ''):
|
||||
super().__init__(status=status)
|
||||
self._error_message = message
|
||||
|
||||
def set_response(self):
|
||||
super().set_response()
|
||||
response = cp.serving.response
|
||||
response.body = json.dumps({'error': self._error_message}).encode()
|
335
cmapi/cmapi_server/controllers/s3dataload.py
Normal file
335
cmapi/cmapi_server/controllers/s3dataload.py
Normal file
@ -0,0 +1,335 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import selectors
|
||||
import tempfile
|
||||
import uuid
|
||||
from subprocess import PIPE, Popen, run, CalledProcessError
|
||||
|
||||
import cherrypy
|
||||
import furl
|
||||
from cmapi_server.constants import (
|
||||
CMAPI_PYTHON_BIN, CMAPI_PYTHON_BINARY_DEPS_PATH, CMAPI_PYTHON_DEPS_PATH
|
||||
)
|
||||
|
||||
from cmapi_server.controllers.endpoints import raise_422_error
|
||||
|
||||
|
||||
module_logger = logging.getLogger('cmapi_server')
|
||||
|
||||
|
||||
def response_error(text):
|
||||
raise_422_error(module_logger, 'load_s3data', text)
|
||||
|
||||
|
||||
class S3DataLoadController:
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.json_out()
|
||||
@cherrypy.tools.validate_api_key() # pylint: disable=no-member
|
||||
def load_s3data(self):
|
||||
"""
|
||||
Handler for /cluster/load_s3data (POST, PUT)
|
||||
Invokes cpimport with passed params
|
||||
This is internal columnstore engine handler
|
||||
Not targeted for manual usage
|
||||
|
||||
Waits for json dictionary params in request
|
||||
bucket - S3 bucket with table data
|
||||
table - table name to load data into
|
||||
filename - name of file in S3 with table data
|
||||
key - S3 secret key
|
||||
secret - S3 secret
|
||||
region - S3 region
|
||||
database - db name to load data into
|
||||
"""
|
||||
|
||||
def checkShellParamsAreOK(param, paramname):
|
||||
"""Check shell params for dangerous symbols.
|
||||
|
||||
As this params will be passed to shell, we should check,
|
||||
there is no shell injection
|
||||
AWS Access Key ID is 20 alpha-numeric characters
|
||||
like022QF06E7MXBSH9DHM02
|
||||
AWS Secret Access Key is 40 alpha-numeric-slash-plus characters
|
||||
like kWcrlUX5JEDGM/LtmEENI/aVmYvHNif5zB+d9+ct
|
||||
AWS buckets names are alpha-numeric-dot-underscore
|
||||
like log-delivery-march-2020.com
|
||||
AWS regions names, table names, file names are also not allowed
|
||||
for dangerous symbols so just raise error for injection dangerous
|
||||
symbols in params.
|
||||
"""
|
||||
dangerous_symbols = ' &|;\n\r`$'
|
||||
for symbol in dangerous_symbols:
|
||||
if symbol in param:
|
||||
response_error(
|
||||
f'S3 configuration parameters wrong: {paramname}'
|
||||
f'cannot contain "{symbol}"'
|
||||
)
|
||||
|
||||
def getKey(keyname, request_body, skip_check=False, required=True):
|
||||
value = request_body.get(keyname, None)
|
||||
|
||||
if not value and required:
|
||||
response_error(
|
||||
f'Some S3 configuration parameters missing: {keyname} '
|
||||
'not provided'
|
||||
)
|
||||
|
||||
if not skip_check:
|
||||
checkShellParamsAreOK(value, keyname)
|
||||
|
||||
return value
|
||||
|
||||
def prepare_aws(bucket, filename, secret, key, region):
|
||||
"""Prepare aws_cli popen object.
|
||||
|
||||
Invoke aws_cli download, and return proc for further
|
||||
use with cpimport.
|
||||
|
||||
:param bucket: bucket name
|
||||
:type bucket: str
|
||||
:param filename: filename in bucket
|
||||
:type filename: str
|
||||
:param secret: aws secret
|
||||
:type secret: str
|
||||
:param key: aws key
|
||||
:type key: str
|
||||
:param region: aws region
|
||||
:type region: str
|
||||
:return: popen aws_cli object
|
||||
:rtype: subprocess.Popen
|
||||
"""
|
||||
my_env = os.environ.copy()
|
||||
my_env['AWS_ACCESS_KEY_ID'] = key
|
||||
my_env['AWS_SECRET_ACCESS_KEY'] = secret
|
||||
my_env['PYTHONPATH'] = CMAPI_PYTHON_DEPS_PATH
|
||||
|
||||
aws_cli_binary = os.path.join(CMAPI_PYTHON_BINARY_DEPS_PATH, 'aws')
|
||||
s3_url = furl.furl(bucket).add(path=filename).url
|
||||
aws_command_line = [
|
||||
CMAPI_PYTHON_BIN, aws_cli_binary,
|
||||
"s3", "cp", "--source-region", region, s3_url, "-"
|
||||
]
|
||||
module_logger.debug(
|
||||
f'AWS commandline: {" ".join(aws_command_line)}')
|
||||
try:
|
||||
aws_proc = Popen(
|
||||
aws_command_line, env=my_env, stdout=PIPE,
|
||||
stderr=PIPE, shell=False, encoding='utf-8'
|
||||
)
|
||||
except CalledProcessError as exc:
|
||||
response_error(exc.stderr.split('\n')[0])
|
||||
|
||||
return aws_proc
|
||||
|
||||
def prepare_google_storage(
|
||||
bucket, filename, secret, key, temporary_config
|
||||
):
|
||||
"""Prepare gsutil popen object.
|
||||
|
||||
Invoke gsutil download, and return proc for further use
|
||||
with cpimport.
|
||||
|
||||
:param bucket: bucket name
|
||||
:type bucket: str
|
||||
:param filename: filename in bucket
|
||||
:type filename: str
|
||||
:param secret: gsutil secret
|
||||
:type secret: str
|
||||
:param key: gsutil key
|
||||
:type key: str
|
||||
:param temporary_config: temp config for gsutil
|
||||
:type temporary_config: str
|
||||
:return: popen gsutil object
|
||||
:rtype: subprocess.Popen
|
||||
"""
|
||||
project_id = 'project_id'
|
||||
gs_cli_binary = os.path.join(
|
||||
CMAPI_PYTHON_BINARY_DEPS_PATH, 'gsutil'
|
||||
)
|
||||
|
||||
commandline = (
|
||||
f'/usr/bin/bash -c '
|
||||
f'\'echo -e "{key}\n{secret}\n{project_id}"\' | '
|
||||
f'{CMAPI_PYTHON_BIN} {gs_cli_binary} '
|
||||
f'config -a -o {temporary_config}'
|
||||
)
|
||||
|
||||
module_logger.debug(
|
||||
f'gsutil config commadline: '
|
||||
f'{commandline.encode("unicode_escape").decode("utf-8")}'
|
||||
)
|
||||
|
||||
my_env = os.environ.copy()
|
||||
my_env['PYTHONPATH'] = CMAPI_PYTHON_DEPS_PATH
|
||||
my_env['BOTO_CONFIG'] = temporary_config
|
||||
|
||||
try:
|
||||
p = run(
|
||||
commandline, capture_output=True,
|
||||
shell=True, encoding='utf-8', check=True, env=my_env
|
||||
)
|
||||
except CalledProcessError as exc:
|
||||
response_error(exc.stderr.split('\n')[0])
|
||||
|
||||
try:
|
||||
check_commandline = [
|
||||
CMAPI_PYTHON_BIN, gs_cli_binary, 'version', '-l'
|
||||
]
|
||||
p = run(
|
||||
check_commandline, capture_output=True,
|
||||
shell=False, encoding='utf-8', check=True, env=my_env
|
||||
)
|
||||
module_logger.debug(
|
||||
f'gsutil config check commandline : '
|
||||
f'{" ".join(check_commandline)}'
|
||||
)
|
||||
module_logger.debug(f'gsutil config : {p.stdout}')
|
||||
|
||||
except CalledProcessError as exc:
|
||||
response_error(exc.stderr.split('\n')[0])
|
||||
|
||||
gs_url = furl.furl(bucket).add(path=filename).url
|
||||
gs_command_line = [
|
||||
CMAPI_PYTHON_BIN, gs_cli_binary, 'cat', gs_url
|
||||
]
|
||||
module_logger.debug(
|
||||
f'gsutil cat commandline : {" ".join(gs_command_line)}'
|
||||
)
|
||||
|
||||
try:
|
||||
gs_process = Popen(
|
||||
gs_command_line, env=my_env, stdout=PIPE, stderr=PIPE,
|
||||
shell=False, encoding='utf-8'
|
||||
)
|
||||
except CalledProcessError as exc:
|
||||
response_error(exc.stderr.split('\n')[0])
|
||||
|
||||
return gs_process
|
||||
|
||||
module_logger.debug(f'LOAD S3 Data')
|
||||
request = cherrypy.request
|
||||
request_body = request.json
|
||||
|
||||
bucket = getKey('bucket', request_body)
|
||||
|
||||
if bucket.startswith(r's3://'):
|
||||
storage = 'aws'
|
||||
elif bucket.startswith(r'gs://'):
|
||||
storage = 'gs'
|
||||
else:
|
||||
error = (
|
||||
'Incorrect bucket. Should start with s3://for AWS S3 or '
|
||||
'gs:// for Google Storage'
|
||||
)
|
||||
response_error(error)
|
||||
|
||||
table = getKey('table', request_body)
|
||||
filename = getKey('filename', request_body)
|
||||
key = getKey('key', request_body)
|
||||
secret = getKey('secret', request_body)
|
||||
region = getKey('region', request_body, required=storage=='aws')
|
||||
database = getKey('database', request_body)
|
||||
terminated_by = getKey('terminated_by', request_body, skip_check=True)
|
||||
enclosed_by = getKey(
|
||||
'enclosed_by', request_body, skip_check=True, required=False
|
||||
)
|
||||
escaped_by = getKey(
|
||||
'escaped_by', request_body, skip_check=True, required=False
|
||||
)
|
||||
|
||||
if storage == 'aws':
|
||||
download_proc = prepare_aws(bucket, filename, secret, key, region)
|
||||
elif storage == 'gs':
|
||||
temporary_config = os.path.join(
|
||||
tempfile.gettempdir(), '.boto.' + str(uuid.uuid4())
|
||||
)
|
||||
|
||||
download_proc = prepare_google_storage(
|
||||
bucket, filename, secret, key, temporary_config
|
||||
)
|
||||
else:
|
||||
response_error('Unknown storage detected. Internal error')
|
||||
|
||||
cpimport_command_line = [
|
||||
'cpimport', database, table, '-s', terminated_by
|
||||
]
|
||||
if escaped_by:
|
||||
cpimport_command_line += ['-C', escaped_by]
|
||||
if enclosed_by:
|
||||
cpimport_command_line += ['-E', enclosed_by]
|
||||
|
||||
module_logger.debug(
|
||||
f'cpimport command line: {" ".join(cpimport_command_line)}'
|
||||
)
|
||||
|
||||
cpimport_proc = Popen(
|
||||
cpimport_command_line, shell=False, stdin=download_proc.stdout,
|
||||
stdout=PIPE, stderr=PIPE, encoding='utf-8'
|
||||
)
|
||||
|
||||
selector = selectors.DefaultSelector()
|
||||
for stream in [
|
||||
download_proc.stderr, cpimport_proc.stderr, cpimport_proc.stdout
|
||||
]:
|
||||
os.set_blocking(stream.fileno(), False)
|
||||
|
||||
selector.register(
|
||||
download_proc.stderr, selectors.EVENT_READ, data='downloader_error'
|
||||
)
|
||||
selector.register(
|
||||
cpimport_proc.stderr, selectors.EVENT_READ, data='cpimport_error'
|
||||
)
|
||||
selector.register(
|
||||
cpimport_proc.stdout, selectors.EVENT_READ, data='cpimport_output'
|
||||
)
|
||||
|
||||
downloader_error = ''
|
||||
cpimport_error = ''
|
||||
cpimport_output = ''
|
||||
|
||||
alive = 3
|
||||
while alive > 0:
|
||||
events = selector.select()
|
||||
for key, mask in events:
|
||||
name = key.data
|
||||
line = key.fileobj.readline().rstrip()
|
||||
if not line:
|
||||
# EOF
|
||||
alive -= 1
|
||||
selector.unregister(key.fileobj)
|
||||
continue
|
||||
if name == 'downloader_error':
|
||||
downloader_error += line + '\n'
|
||||
if name == 'cpimport_error':
|
||||
cpimport_error += line + '\n'
|
||||
if name == 'cpimport_output':
|
||||
cpimport_output += line + '\n'
|
||||
|
||||
# clean after Prepare Google
|
||||
if storage == 'gs' and os.path.exists(temporary_config):
|
||||
os.remove(temporary_config)
|
||||
|
||||
if downloader_error:
|
||||
response_error(downloader_error)
|
||||
|
||||
if cpimport_error:
|
||||
response_error(cpimport_error)
|
||||
|
||||
module_logger.debug(f'LOAD S3 Data stdout: {cpimport_output}')
|
||||
|
||||
pattern = '([0-9]+) rows processed and ([0-9]+) rows inserted'
|
||||
match = re.search(pattern, cpimport_output)
|
||||
|
||||
if not match:
|
||||
return {
|
||||
'success': False,
|
||||
'inserted': 0,
|
||||
'processed': 0
|
||||
}
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'inserted': match.group(2),
|
||||
'processed': match.group(1)
|
||||
}
|
22
cmapi/cmapi_server/exceptions.py
Normal file
22
cmapi/cmapi_server/exceptions.py
Normal file
@ -0,0 +1,22 @@
|
||||
"""Module contains custom exceptions."""
|
||||
|
||||
|
||||
class CMAPIBasicError(Exception):
|
||||
"""Basic exception raised for CMAPI related processes.
|
||||
|
||||
Attributes:
|
||||
message -- explanation of the error
|
||||
"""
|
||||
def __init__(self, message: str) -> None:
|
||||
self.message = message
|
||||
super().__init__(self.message)
|
||||
def __str__(self) -> str:
|
||||
return self.message
|
||||
|
||||
|
||||
class CEJError(CMAPIBasicError):
|
||||
"""Exception raised for CEJ related processes.
|
||||
|
||||
Attributes:
|
||||
message -- explanation of the error
|
||||
"""
|
185
cmapi/cmapi_server/failover_agent.py
Normal file
185
cmapi/cmapi_server/failover_agent.py
Normal file
@ -0,0 +1,185 @@
|
||||
'''
|
||||
This class implements the interface used by the failover module to notify
|
||||
the cluster of events like node-up / node-down, etc.
|
||||
'''
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from cmapi_server import helpers, node_manipulation
|
||||
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH
|
||||
from cmapi_server.exceptions import CMAPIBasicError
|
||||
from cmapi_server.managers.process import MCSProcessManager
|
||||
from failover.agent_comm import AgentBase
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
# Bug in pylint https://github.com/PyCQA/pylint/issues/4584
|
||||
requests.packages.urllib3.disable_warnings() # pylint: disable=no-member
|
||||
logger = logging.getLogger('failover_agent')
|
||||
|
||||
|
||||
class FailoverAgent(AgentBase):
|
||||
|
||||
def activateNodes(
|
||||
self, nodes, input_config_filename=DEFAULT_MCS_CONF_PATH,
|
||||
output_config_filename=None, test_mode=False
|
||||
):
|
||||
logger.info(f'FA.activateNodes(): activating nodes: {nodes}')
|
||||
new_node_count = 0
|
||||
for node in nodes:
|
||||
try:
|
||||
logger.info(f'FA.activateNodes(): adding node {node}')
|
||||
node_manipulation.add_node(
|
||||
node, input_config_filename, output_config_filename
|
||||
)
|
||||
new_node_count += 1
|
||||
except Exception:
|
||||
logger.error(f'FA.activateNodes(): failed to add node {node}')
|
||||
raise
|
||||
return new_node_count
|
||||
|
||||
def deactivateNodes(
|
||||
self, nodes, input_config_filename=DEFAULT_MCS_CONF_PATH,
|
||||
output_config_filename=None, test_mode=False
|
||||
):
|
||||
logger.info(f'FA.deactivateNodes(): deactivating nodes: {nodes}')
|
||||
|
||||
removed_node_count = 0
|
||||
for node in nodes:
|
||||
try:
|
||||
logger.info(f'FA.deactivateNodes(): deactivating node {node}')
|
||||
node_manipulation.remove_node(
|
||||
node, input_config_filename, output_config_filename,
|
||||
deactivate_only=True, test_mode=test_mode
|
||||
)
|
||||
removed_node_count += 1
|
||||
except Exception as err:
|
||||
logger.error(
|
||||
f'FA.deactivateNodes(): failed to deactivate node {node}, '
|
||||
f'got {str(err)}'
|
||||
)
|
||||
raise
|
||||
return removed_node_count
|
||||
|
||||
|
||||
# the 'hack' parameter is a placeholder. When run by agent_comm, this function gets a first parameter
|
||||
# of (). When that is the input_config_filename, that's bad. Need to fix.
|
||||
def movePrimaryNode(self, hack, input_config_filename = None, output_config_filename = None, test_mode = False):
|
||||
logger.info(f"FA.movePrimaryNode(): moving primary node functionality")
|
||||
|
||||
# to save a little typing in testing
|
||||
kwargs = {
|
||||
"cs_config_filename": input_config_filename,
|
||||
"input_config_filename" : input_config_filename,
|
||||
"output_config_filename" : output_config_filename,
|
||||
"test_mode" : test_mode
|
||||
}
|
||||
|
||||
try:
|
||||
node_manipulation.move_primary_node(**kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"FA.movePrimaryNode(): failed to move primary node, got {str(e)}")
|
||||
raise
|
||||
|
||||
def enterStandbyMode(self, test_mode = False):
|
||||
nc = NodeConfig()
|
||||
node_name = nc.get_module_net_address(nc.get_current_config_root())
|
||||
logger.info(
|
||||
f'FA.enterStandbyMode(): shutting down node "{node_name}"'
|
||||
)
|
||||
|
||||
# this gets retried by the caller on error
|
||||
try:
|
||||
# TODO: remove test_mode condition and add mock for testing
|
||||
if not test_mode:
|
||||
MCSProcessManager.stop_node(is_primary=nc.is_primary_node())
|
||||
logger.info(
|
||||
'FA.enterStandbyMode(): successfully stopped node.'
|
||||
)
|
||||
except CMAPIBasicError as err:
|
||||
logger.error(
|
||||
'FA.enterStandbyMode(): caught error while stopping node.'
|
||||
f'{err.message}'
|
||||
)
|
||||
|
||||
|
||||
def raiseAlarm(self, msg):
|
||||
logger.critical(msg)
|
||||
|
||||
|
||||
# The start/commit/rollback transaction fcns use the active list to decide which
|
||||
# nodes to send to; when we're adding a node the new node isn't in the active list yet
|
||||
# extra_nodes gives us add'l hostnames/addrs to send the transaction to.
|
||||
# Likewise for removing a node. Presumably that node is not reachable, so must be
|
||||
# removed from the list to send to.
|
||||
def startTransaction(self, extra_nodes = [], remove_nodes = []):
|
||||
got_txn = False
|
||||
count = 0
|
||||
while not got_txn:
|
||||
msg = None
|
||||
try:
|
||||
(got_txn, txn_id, nodes) = helpers.start_transaction(
|
||||
extra_nodes=extra_nodes, remove_nodes=remove_nodes
|
||||
)
|
||||
except Exception as e:
|
||||
got_txn = False
|
||||
msg = (
|
||||
f'FA.start_transaction(): attempt #{count+1}, '
|
||||
f'failed to get a transaction, got {str(e)}'
|
||||
)
|
||||
|
||||
if not got_txn:
|
||||
if msg is None:
|
||||
msg = (
|
||||
f'FA.start_transaction(): attempt #{count+1}, '
|
||||
'failed to get a transaction'
|
||||
)
|
||||
if count < 5:
|
||||
logger.warning(msg)
|
||||
else:
|
||||
logger.error(msg)
|
||||
time.sleep(1)
|
||||
count += 1
|
||||
logger.info(f'FA.startTransaction(): started transaction {txn_id}')
|
||||
return (txn_id, nodes)
|
||||
|
||||
|
||||
# These shouldn't throw for now
|
||||
def commitTransaction(self, txn_id, nodes, **kwargs):
|
||||
try:
|
||||
helpers.update_revision_and_manager()
|
||||
# broadcacting new config invokes node restart
|
||||
helpers.broadcast_new_config(nodes=nodes)
|
||||
helpers.commit_transaction(txn_id, nodes=nodes)
|
||||
except Exception:
|
||||
logger.error(
|
||||
(
|
||||
'FA.commitTransaction(): failed to commit transaciton '
|
||||
f'{txn_id}'
|
||||
),
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f'FA.commitTransaction(): committed transaction {txn_id}'
|
||||
)
|
||||
|
||||
|
||||
def rollbackTransaction(self, txn_id, nodes):
|
||||
try:
|
||||
helpers.rollback_transaction(txn_id, nodes = nodes)
|
||||
except Exception:
|
||||
logger.error(
|
||||
(
|
||||
'FA.rollbackTransaction(): failed to rollback transaction '
|
||||
f'{txn_id}. Got unrecognised error.'
|
||||
),
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f'FA.rollbackTransaction(): rolled back transaction {txn_id})'
|
||||
)
|
0
cmapi/cmapi_server/handlers/__init__.py
Normal file
0
cmapi/cmapi_server/handlers/__init__.py
Normal file
119
cmapi/cmapi_server/handlers/cej.py
Normal file
119
cmapi/cmapi_server/handlers/cej.py
Normal file
@ -0,0 +1,119 @@
|
||||
"""Module contains all things related to working with .secrets file."""
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
from cryptography.hazmat.primitives import padding
|
||||
|
||||
from cmapi_server.constants import MCS_SECRETS_FILE_PATH
|
||||
from cmapi_server.exceptions import CEJError
|
||||
|
||||
|
||||
AES_BLOCK_SIZE_BITS = algorithms.AES.block_size
|
||||
AES_IV_BIN_SIZE = int(AES_BLOCK_SIZE_BITS/8)
|
||||
# two hex chars for each byte
|
||||
AES_IV_HEX_SIZE = AES_IV_BIN_SIZE * 2
|
||||
|
||||
|
||||
class CEJPasswordHandler():
|
||||
"""Handler for CrossEngineSupport password decryption."""
|
||||
|
||||
@classmethod
|
||||
def secretsfile_exists(cls):
|
||||
"""Check the .secrets file in MCS_SECRETS_FILE_PATH.
|
||||
|
||||
:return: True if file exists and not empty.
|
||||
:rtype: bool
|
||||
"""
|
||||
try:
|
||||
if (
|
||||
os.path.isfile(MCS_SECRETS_FILE_PATH) and
|
||||
os.path.getsize(MCS_SECRETS_FILE_PATH) > 0
|
||||
):
|
||||
return True
|
||||
except Exception:
|
||||
# TODO: remove after check if python 3.8 everytime exist
|
||||
# in package because isfile and getsize not rasing
|
||||
# exceptions after 3.8
|
||||
logging.warning(
|
||||
'Something went wrong while detecting the .secrets file.',
|
||||
exc_info=True
|
||||
)
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_secrets_json(cls):
|
||||
"""Get json from .secrets file.
|
||||
|
||||
:raises CEJError: on empty\corrupted\wrong format .secrets file
|
||||
:return: json from .secrets file
|
||||
:rtype: dict
|
||||
"""
|
||||
if not cls.secretsfile_exists():
|
||||
raise CEJError(f'{MCS_SECRETS_FILE_PATH} file does not exist.')
|
||||
with open(MCS_SECRETS_FILE_PATH) as secrets_file:
|
||||
try:
|
||||
secrets_json = json.load(secrets_file)
|
||||
except Exception:
|
||||
logging.error(
|
||||
'Something went wrong while loading json from '
|
||||
f'{MCS_SECRETS_FILE_PATH}',
|
||||
exc_info=True
|
||||
)
|
||||
raise CEJError(
|
||||
f'Looks like file {MCS_SECRETS_FILE_PATH} is corrupted or'
|
||||
'has wrong format.'
|
||||
) from None
|
||||
return secrets_json
|
||||
|
||||
@classmethod
|
||||
def decrypt_password(cls, enc_data:str):
|
||||
"""Decrypt CEJ password if needed.
|
||||
|
||||
:param enc_data: encrypted initialization vector + password in hex str
|
||||
:type enc_data: str
|
||||
:return: decrypted CEJ password
|
||||
:rtype: str
|
||||
"""
|
||||
if not cls.secretsfile_exists():
|
||||
logging.warning('Unencrypted CrossEngineSupport password used.')
|
||||
return enc_data
|
||||
|
||||
logging.info('Encrypted CrossEngineSupport password found.')
|
||||
|
||||
try:
|
||||
iv = bytes.fromhex(enc_data[:AES_IV_HEX_SIZE])
|
||||
encrypted_passwd = bytes.fromhex(enc_data[AES_IV_HEX_SIZE:])
|
||||
except ValueError as value_error:
|
||||
raise CEJError(
|
||||
'Non-hexadecimal number found in encrypted CEJ password.'
|
||||
) from value_error
|
||||
|
||||
secrets_json = cls.get_secrets_json()
|
||||
encryption_key_hex = secrets_json.get('encryption_key')
|
||||
if not encryption_key_hex:
|
||||
raise CEJError(
|
||||
f'Empty "encryption key" found in {MCS_SECRETS_FILE_PATH}'
|
||||
)
|
||||
try:
|
||||
encryption_key = bytes.fromhex(encryption_key_hex)
|
||||
except ValueError as value_error:
|
||||
raise CEJError(
|
||||
'Non-hexadecimal number found in encryption key from '
|
||||
f'{MCS_SECRETS_FILE_PATH} file.'
|
||||
) from value_error
|
||||
cipher = Cipher(
|
||||
algorithms.AES(encryption_key),
|
||||
modes.CBC(iv)
|
||||
)
|
||||
decryptor = cipher.decryptor()
|
||||
unpadder = padding.PKCS7(AES_BLOCK_SIZE_BITS).unpadder()
|
||||
padded_passwd_bytes = (
|
||||
decryptor.update(encrypted_passwd)
|
||||
+ decryptor.finalize()
|
||||
)
|
||||
passwd_bytes = (
|
||||
unpadder.update(padded_passwd_bytes) + unpadder.finalize()
|
||||
)
|
||||
return passwd_bytes.decode()
|
579
cmapi/cmapi_server/handlers/cluster.py
Normal file
579
cmapi/cmapi_server/handlers/cluster.py
Normal file
@ -0,0 +1,579 @@
|
||||
"""Module contains Cluster business logic functions."""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
from cmapi_server.constants import (
|
||||
CMAPI_CONF_PATH, DEFAULT_MCS_CONF_PATH,
|
||||
)
|
||||
from cmapi_server.exceptions import CMAPIBasicError
|
||||
from cmapi_server.helpers import (
|
||||
broadcast_new_config, commit_transaction, get_active_nodes, get_dbroots,
|
||||
get_config_parser, get_current_key, get_id, get_version, start_transaction,
|
||||
rollback_transaction, update_revision_and_manager,
|
||||
)
|
||||
from cmapi_server.node_manipulation import (
|
||||
add_node, add_dbroot, remove_node, switch_node_maintenance,
|
||||
)
|
||||
from mcs_node_control.models.misc import get_dbrm_master
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
class ClusterHandler():
|
||||
"""Class for handling MCS Cluster operations."""
|
||||
|
||||
@staticmethod
|
||||
def status(
|
||||
config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to get MCS Cluster status information
|
||||
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: if catch some exception while getting status
|
||||
from each node separately
|
||||
:return: status result
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug('Cluster status command called. Getting status.')
|
||||
|
||||
response = {'timestamp': str(datetime.now())}
|
||||
active_nodes = get_active_nodes(config)
|
||||
cmapi_cfg_parser = get_config_parser(CMAPI_CONF_PATH)
|
||||
api_key = get_current_key(cmapi_cfg_parser)
|
||||
headers = {'x-api-key': api_key}
|
||||
num_nodes = 0
|
||||
|
||||
for node in active_nodes:
|
||||
url = f'https://{node}:8640/cmapi/{get_version()}/node/status'
|
||||
try:
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
r.raise_for_status()
|
||||
r_json = r.json()
|
||||
if len(r_json.get('services', 0)) == 0:
|
||||
r_json['dbrm_mode'] = 'offline'
|
||||
|
||||
response[f'{str(node)}'] = r_json
|
||||
num_nodes += 1
|
||||
except Exception as err:
|
||||
raise CMAPIBasicError(
|
||||
f'Got an error retrieving status from node {node}'
|
||||
) from err
|
||||
|
||||
response['num_nodes'] = num_nodes
|
||||
logger.debug('Successfully finished getting cluster status.')
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def start(
|
||||
config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to start MCS Cluster.
|
||||
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: on exception while starting transaction
|
||||
:raises CMAPIBasicError: if transaction start isn't successful
|
||||
:raises CMAPIBasicError: if no nodes in the cluster
|
||||
:raises CMAPIBasicError: on exception while distributing new config
|
||||
:raises CMAPIBasicError: on unsuccessful distibuting config file
|
||||
:raises CMAPIBasicError: on exception while committing transaction
|
||||
:return: start timestamp
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug('Cluster start command called. Starting the cluster.')
|
||||
start_time = str(datetime.now())
|
||||
transaction_id = get_id()
|
||||
|
||||
try:
|
||||
suceeded, transaction_id, successes = start_transaction(
|
||||
cs_config_filename=config, id=transaction_id
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while starting the transaction.'
|
||||
) from err
|
||||
if not suceeded:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Starting transaction isn\'t successful.')
|
||||
|
||||
if suceeded and len(successes) == 0:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('There are no nodes in the cluster.')
|
||||
|
||||
switch_node_maintenance(False)
|
||||
update_revision_and_manager()
|
||||
|
||||
# TODO: move this from multiple places to one, eg to helpers
|
||||
try:
|
||||
broadcast_successful = broadcast_new_config(config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while distributing config file.'
|
||||
) from err
|
||||
|
||||
if not broadcast_successful:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
||||
|
||||
try:
|
||||
commit_transaction(transaction_id, cs_config_filename=config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while committing transaction.'
|
||||
) from err
|
||||
|
||||
logger.debug('Successfully finished cluster start.')
|
||||
return {'timestamp': start_time}
|
||||
|
||||
@staticmethod
|
||||
def shutdown(
|
||||
config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to stop the MCS Cluster.
|
||||
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: if no nodes in the cluster
|
||||
:return: start timestamp
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug(
|
||||
'Cluster shutdown command called. Shutting down the cluster.'
|
||||
)
|
||||
|
||||
start_time = str(datetime.now())
|
||||
transaction_id = get_id()
|
||||
|
||||
try:
|
||||
suceeded, transaction_id, successes = start_transaction(
|
||||
cs_config_filename=config, id=transaction_id
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while starting the transaction.'
|
||||
) from err
|
||||
if not suceeded:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Starting transaction isn\'t successful.')
|
||||
|
||||
if suceeded and len(successes) == 0:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('There are no nodes in the cluster.')
|
||||
|
||||
switch_node_maintenance(True)
|
||||
update_revision_and_manager()
|
||||
|
||||
# TODO: move this from multiple places to one, eg to helpers
|
||||
try:
|
||||
broadcast_successful = broadcast_new_config(config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while distributing config file.'
|
||||
) from err
|
||||
|
||||
if not broadcast_successful:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
||||
|
||||
try:
|
||||
commit_transaction(transaction_id, cs_config_filename=config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while committing transaction.'
|
||||
) from err
|
||||
|
||||
logger.debug('Successfully finished shutting down the cluster.')
|
||||
return {'timestamp': start_time}
|
||||
|
||||
@staticmethod
|
||||
def add_node(
|
||||
node: str, config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to add node to MCS CLuster.
|
||||
|
||||
:param node: node IP or name or FQDN
|
||||
:type node: str
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: on exception while starting transaction
|
||||
:raises CMAPIBasicError: if transaction start isn't successful
|
||||
:raises CMAPIBasicError: on exception while adding node
|
||||
:raises CMAPIBasicError: on exception while distributing new config
|
||||
:raises CMAPIBasicError: on unsuccessful distibuting config file
|
||||
:raises CMAPIBasicError: on exception while committing transaction
|
||||
:return: result of adding node
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug(f'Cluster add node command called. Adding node {node}.')
|
||||
|
||||
response = {'timestamp': str(datetime.now())}
|
||||
transaction_id = get_id()
|
||||
|
||||
try:
|
||||
suceeded, transaction_id, successes = start_transaction(
|
||||
cs_config_filename=config, extra_nodes=[node],
|
||||
id=transaction_id
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while starting the transaction.'
|
||||
) from err
|
||||
if not suceeded:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Starting transaction isn\'t successful.')
|
||||
|
||||
try:
|
||||
add_node(
|
||||
node, input_config_filename=config,
|
||||
output_config_filename=config
|
||||
)
|
||||
if not get_dbroots(node, config):
|
||||
add_dbroot(
|
||||
host=node, input_config_filename=config,
|
||||
output_config_filename=config
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Error while adding node.') from err
|
||||
|
||||
response['node_id'] = node
|
||||
update_revision_and_manager(
|
||||
input_config_filename=config, output_config_filename=config
|
||||
)
|
||||
|
||||
try:
|
||||
broadcast_successful = broadcast_new_config(config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while distributing config file.'
|
||||
) from err
|
||||
|
||||
if not broadcast_successful:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
||||
|
||||
try:
|
||||
commit_transaction(transaction_id, cs_config_filename=config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while committing transaction.'
|
||||
) from err
|
||||
|
||||
logger.debug(f'Successfully finished adding node {node}.')
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def remove_node(
|
||||
node: str, config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to remove node from MCS CLuster.
|
||||
|
||||
:param node: node IP or name or FQDN
|
||||
:type node: str
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: on exception while starting transaction
|
||||
:raises CMAPIBasicError: if transaction start isn't successful
|
||||
:raises CMAPIBasicError: on exception while removing node
|
||||
:raises CMAPIBasicError: on exception while distributing new config
|
||||
:raises CMAPIBasicError: on unsuccessful distibuting config file
|
||||
:raises CMAPIBasicError: on exception while committing transaction
|
||||
:return: result of node removing
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug(
|
||||
f'Cluster remove node command called. Removing node {node}.'
|
||||
)
|
||||
response = {'timestamp': str(datetime.now())}
|
||||
transaction_id = get_id()
|
||||
|
||||
try:
|
||||
suceeded, transaction_id, txn_nodes = start_transaction(
|
||||
cs_config_filename=config, remove_nodes=[node],
|
||||
id=transaction_id
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while starting the transaction.'
|
||||
) from err
|
||||
if not suceeded:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Starting transaction isn\'t successful.')
|
||||
|
||||
try:
|
||||
remove_node(
|
||||
node, input_config_filename=config,
|
||||
output_config_filename=config
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(
|
||||
transaction_id, nodes=txn_nodes, cs_config_filename=config
|
||||
)
|
||||
raise CMAPIBasicError('Error while removing node.') from err
|
||||
|
||||
response['node_id'] = node
|
||||
if len(txn_nodes) > 0:
|
||||
update_revision_and_manager(
|
||||
input_config_filename=config, output_config_filename=config
|
||||
)
|
||||
try:
|
||||
broadcast_successful = broadcast_new_config(
|
||||
config, nodes=txn_nodes
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(
|
||||
transaction_id, nodes=txn_nodes, cs_config_filename=config
|
||||
)
|
||||
raise CMAPIBasicError(
|
||||
'Error while distributing config file.'
|
||||
) from err
|
||||
if not broadcast_successful:
|
||||
rollback_transaction(
|
||||
transaction_id, nodes=txn_nodes, cs_config_filename=config
|
||||
)
|
||||
raise CMAPIBasicError('Config distribution isn\'t successful.')
|
||||
|
||||
try:
|
||||
commit_transaction(transaction_id, cs_config_filename=config)
|
||||
except Exception as err:
|
||||
rollback_transaction(
|
||||
transaction_id, nodes=txn_nodes, cs_config_filename=config
|
||||
)
|
||||
raise CMAPIBasicError(
|
||||
'Error while committing transaction.'
|
||||
) from err
|
||||
|
||||
logger.debug(f'Successfully finished removing node {node}.')
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def set_mode(
|
||||
mode: str, timeout:int = 60, config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to set MCS CLuster mode.
|
||||
|
||||
:param mode: cluster mode to set, can be only "readonly" or "readwrite"
|
||||
:type mode: str
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: if no master found in the cluster
|
||||
:raises CMAPIBasicError: on exception while starting transaction
|
||||
:raises CMAPIBasicError: if transaction start isn't successful
|
||||
:raises CMAPIBasicError: on exception while adding node
|
||||
:raises CMAPIBasicError: on exception while distributing new config
|
||||
:raises CMAPIBasicError: on unsuccessful distibuting config file
|
||||
:raises CMAPIBasicError: on exception while committing transaction
|
||||
:return: result of adding node
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug(
|
||||
f'Cluster mode set command called. Setting mode to {mode}.'
|
||||
)
|
||||
|
||||
response = {'timestamp': str(datetime.now())}
|
||||
cmapi_cfg_parser = get_config_parser(CMAPI_CONF_PATH)
|
||||
api_key = get_current_key(cmapi_cfg_parser)
|
||||
headers = {'x-api-key': api_key}
|
||||
transaction_id = get_id()
|
||||
|
||||
master = None
|
||||
if len(get_active_nodes(config)) != 0:
|
||||
master = get_dbrm_master(config)
|
||||
|
||||
if master is None:
|
||||
raise CMAPIBasicError('No master found in the cluster.')
|
||||
else:
|
||||
master = master['IPAddr']
|
||||
payload = {'cluster_mode': mode}
|
||||
url = f'https://{master}:8640/cmapi/{get_version()}/node/config'
|
||||
|
||||
try:
|
||||
suceeded, transaction_id, successes = start_transaction(
|
||||
cs_config_filename=config, id=transaction_id
|
||||
)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while starting the transaction.'
|
||||
) from err
|
||||
if not suceeded:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError('Starting transaction isn\'t successful.')
|
||||
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config_filename=config)
|
||||
payload['manager'] = root.find('./ClusterManager').text
|
||||
payload['revision'] = root.find('./ConfigRevision').text
|
||||
payload['timeout'] = timeout
|
||||
payload['cluster_mode'] = mode
|
||||
|
||||
try:
|
||||
r = requests.put(url, headers=headers, json=payload, verify=False)
|
||||
r.raise_for_status()
|
||||
response['cluster-mode'] = mode
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
f'Error while setting cluster mode to {mode}'
|
||||
) from err
|
||||
|
||||
try:
|
||||
commit_transaction(transaction_id, cs_config_filename=config)
|
||||
except Exception as err:
|
||||
rollback_transaction(transaction_id, cs_config_filename=config)
|
||||
raise CMAPIBasicError(
|
||||
'Error while committing transaction.'
|
||||
) from err
|
||||
|
||||
logger.debug(f'Successfully set cluster mode to {mode}.')
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def set_api_key(
|
||||
api_key: str, verification_key: str,
|
||||
config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to set API key for each CMAPI node in cluster.
|
||||
|
||||
:param api_key: API key to set
|
||||
:type api_key: str
|
||||
:param verification_key: TOTP key to verify
|
||||
:type verification_key: str
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:raises CMAPIBasicError: if catch some exception while setting API key
|
||||
to each node
|
||||
:return: status result
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug('Cluster set API key command called.')
|
||||
|
||||
active_nodes = get_active_nodes(config)
|
||||
body = {
|
||||
'api_key': api_key,
|
||||
'verification_key': verification_key
|
||||
}
|
||||
response = {}
|
||||
# only for changing response object below
|
||||
active_nodes_count = len(active_nodes)
|
||||
|
||||
if not active_nodes:
|
||||
# set api key in configuration file on this node
|
||||
logger.debug(
|
||||
'No active nodes found, set API key into current CMAPI conf.'
|
||||
)
|
||||
active_nodes.append('localhost')
|
||||
|
||||
for node in active_nodes:
|
||||
logger.debug(f'Setting new api key to "{node}".')
|
||||
url = f'https://{node}:8640/cmapi/{get_version()}/node/apikey-set'
|
||||
try:
|
||||
resp = requests.put(url, verify=False, json=body)
|
||||
resp.raise_for_status()
|
||||
r_json = resp.json()
|
||||
if active_nodes_count > 0:
|
||||
response[str(node)] = r_json
|
||||
except Exception as err:
|
||||
raise CMAPIBasicError(
|
||||
f'Got an error setting API key to "{node}".'
|
||||
) from err
|
||||
logger.debug(f'Successfully set new api key to "{node}".')
|
||||
|
||||
response['timestamp'] = str(datetime.now())
|
||||
logger.debug(
|
||||
'Successfully finished setting new API key to all nodes.'
|
||||
)
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def set_log_level(
|
||||
level: str, config: str = DEFAULT_MCS_CONF_PATH,
|
||||
logger: logging.Logger = logging.getLogger('cmapi_server')
|
||||
) -> dict:
|
||||
"""Method to set level for loggers on each CMAPI node in cluster.
|
||||
|
||||
:param level: logging level, including custom
|
||||
:type level: str
|
||||
:param config: columnstore xml config file path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:param logger: logger, defaults to logging.getLogger('cmapi_server')
|
||||
:type logger: logging.Logger, optional
|
||||
:return: status result
|
||||
:rtype: dict
|
||||
"""
|
||||
logger.debug('Cluster set new logging level called.')
|
||||
|
||||
active_nodes = get_active_nodes(config)
|
||||
body = {'level': level}
|
||||
response = {}
|
||||
# only for changing response object below
|
||||
active_nodes_count = len(active_nodes)
|
||||
|
||||
if not active_nodes:
|
||||
# set api key in configuration file on this node
|
||||
logger.debug(
|
||||
'No active nodes found, set log level onluy for current node.'
|
||||
)
|
||||
active_nodes.append('localhost')
|
||||
|
||||
for node in active_nodes:
|
||||
logger.debug(f'Setting new log level to "{node}".')
|
||||
url = f'https://{node}:8640/cmapi/{get_version()}/node/log-level'
|
||||
try:
|
||||
resp = requests.put(url, verify=False, json=body)
|
||||
resp.raise_for_status()
|
||||
r_json = resp.json()
|
||||
if active_nodes_count > 0:
|
||||
response[str(node)] = r_json
|
||||
except Exception as err:
|
||||
raise CMAPIBasicError(
|
||||
f'Got an error setting log level to "{node}".'
|
||||
) from err
|
||||
logger.debug(f'Successfully set new log level to "{node}".')
|
||||
|
||||
response['timestamp'] = str(datetime.now())
|
||||
logger.debug(
|
||||
'Successfully finished setting new log level to all nodes.'
|
||||
)
|
||||
return response
|
847
cmapi/cmapi_server/helpers.py
Normal file
847
cmapi/cmapi_server/helpers.py
Normal file
@ -0,0 +1,847 @@
|
||||
"""Module with helpers functions.
|
||||
|
||||
TODO: remove NodeConfig usage and move to arguments (eg. nc or root)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import concurrent
|
||||
import configparser
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
from functools import partial
|
||||
from random import random
|
||||
from shutil import copyfile
|
||||
from typing import Tuple, Optional
|
||||
|
||||
import lxml.objectify
|
||||
import requests
|
||||
|
||||
from cmapi_server.exceptions import CMAPIBasicError
|
||||
# Bug in pylint https://github.com/PyCQA/pylint/issues/4584
|
||||
requests.packages.urllib3.disable_warnings() # pylint: disable=no-member
|
||||
|
||||
from cmapi_server.constants import (
|
||||
CMAPI_CONF_PATH, CMAPI_DEFAULT_CONF_PATH, DEFAULT_MCS_CONF_PATH,
|
||||
DEFAULT_SM_CONF_PATH, LOCALHOSTS
|
||||
)
|
||||
from cmapi_server.handlers.cej import CEJPasswordHandler
|
||||
from cmapi_server.managers.process import MCSProcessManager
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
def get_id():
|
||||
return int(random() * 1000000)
|
||||
|
||||
|
||||
def start_transaction(
|
||||
config_filename=CMAPI_CONF_PATH,
|
||||
cs_config_filename=DEFAULT_MCS_CONF_PATH,
|
||||
extra_nodes=None,
|
||||
remove_nodes=None,
|
||||
optional_nodes=None,
|
||||
id=get_id()
|
||||
):
|
||||
"""Start internal CMAPI transaction.
|
||||
|
||||
Returns (success, txnid, nodes). success = True means it successfully
|
||||
started a transaction, False means it didn't. If True, then txnid will have
|
||||
the transaction ID and the list of nodes the transaction was started on.
|
||||
If False, the txnid and nodes have undefined values.
|
||||
|
||||
:param config_filename: cmapi config filepath,
|
||||
defaults to CMAPI_CONF_PATH
|
||||
:type config_filename: str
|
||||
:param cs_config_filename: columnstore xml config filepath,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type cs_config_filename: str, optional
|
||||
:param extra_nodes: extra nodes, defaults to None
|
||||
:type extra_nodes: list, optional
|
||||
:param remove_nodes: remove nodes, defaults to None
|
||||
:type remove_nodes: list, optional
|
||||
:param optional_nodes: optional nodes, defaults to None
|
||||
:type optional_nodes: list, optional
|
||||
:return: (success, txnid, nodes)
|
||||
:rtype: tuple
|
||||
"""
|
||||
# TODO: Somehow change that logic for eg using several input types
|
||||
# (str\list\set) and detect which one we got.
|
||||
extra_nodes = extra_nodes or []
|
||||
remove_nodes = remove_nodes or []
|
||||
optional_nodes = optional_nodes or []
|
||||
|
||||
cfg_parser = get_config_parser(config_filename)
|
||||
api_key = get_current_key(cfg_parser)
|
||||
|
||||
version = get_version()
|
||||
|
||||
headers = {'x-api-key': api_key}
|
||||
body = {'id' : id}
|
||||
final_time = datetime.datetime.now() + datetime.timedelta(seconds=300)
|
||||
|
||||
success = False
|
||||
while datetime.datetime.now() < final_time and not success:
|
||||
successes = []
|
||||
|
||||
# it's painful to look at, but if this call fails to get a lock on
|
||||
# every server, it may be because a node went down, and the config file
|
||||
# was updated. So, update the list on every iteration.
|
||||
#
|
||||
# There is a race here between reading the config and getting the txn.
|
||||
# What can stop it with the current design is using a mutex here,
|
||||
# and having config updates come from only one node.
|
||||
# For changes coming from failover, this will be true.
|
||||
#
|
||||
# There is also a race on the config file in general.
|
||||
# Need to read it before you can get a lock, and need to lock it before
|
||||
# it can be read reliably. Resolution TBD. File locking? Retries?
|
||||
|
||||
# TODO: need to work with data types of nodes here
|
||||
unfiltered_nodes = [
|
||||
*get_active_nodes(cs_config_filename),
|
||||
*extra_nodes,
|
||||
*optional_nodes
|
||||
]
|
||||
tmp_active_nodes = {
|
||||
node for node in unfiltered_nodes
|
||||
if node not in remove_nodes
|
||||
}
|
||||
active_nodes = set()
|
||||
|
||||
# resolve localhost addrs
|
||||
for node in tmp_active_nodes:
|
||||
if node in ['127.0.0.1', 'localhost', '::1']:
|
||||
active_nodes.add(socket.gethostbyname(socket.gethostname()))
|
||||
else:
|
||||
active_nodes.add(node)
|
||||
# this copy will be updated if an optional node can't be reached
|
||||
real_active_nodes = set(active_nodes)
|
||||
logging.trace(f'Active nodes on start transaction {active_nodes}')
|
||||
for node in active_nodes:
|
||||
url = f'https://{node}:8640/cmapi/{version}/node/begin'
|
||||
node_success = False
|
||||
logging.trace(f'Processing node "{node}"')
|
||||
for retry in range(5):
|
||||
logging.trace(
|
||||
f'In {retry} attempt for node {node} and active nodes var '
|
||||
f'is {active_nodes} and real active nodes var is '
|
||||
f'{real_active_nodes}'
|
||||
)
|
||||
try:
|
||||
# who knows how much time has gone by...
|
||||
# Update timeout to keep nodes in sync +/-
|
||||
body['timeout'] = (
|
||||
final_time - datetime.datetime.now()
|
||||
).seconds
|
||||
r = requests.put(
|
||||
url, verify=False, headers=headers, json=body,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
# a 4xx error from our endpoint;
|
||||
# likely another txn is running
|
||||
# Breaking here will cause a rollback on nodes we have
|
||||
# successfully started a txn on so far. Then it will try
|
||||
# again to get a transaction on all nodes. Put all
|
||||
# conditions where that is the desired behavior here.
|
||||
if int(r.status_code / 100) == 4:
|
||||
logging.debug(
|
||||
'Got a 4xx error while beginning transaction '
|
||||
f'with response text {r.text}'
|
||||
)
|
||||
break # TODO: useless, got break in finally statement
|
||||
# TODO: is there any case to separate 4xx
|
||||
# from all other error codes
|
||||
r.raise_for_status()
|
||||
node_success = True
|
||||
break
|
||||
except requests.Timeout:
|
||||
logging.warning(
|
||||
f'start_transaction(): timeout on node {node}'
|
||||
)
|
||||
except Exception:
|
||||
logging.warning(
|
||||
'start_transaction(): got error during request '
|
||||
f'to node {node}',
|
||||
exc_info=True
|
||||
)
|
||||
finally:
|
||||
if not node_success and node in optional_nodes:
|
||||
logging.info(
|
||||
f'start_transaction(): node {node} is optional;'
|
||||
'ignoring the error'
|
||||
)
|
||||
real_active_nodes.remove(node)
|
||||
break
|
||||
|
||||
# wait 1 sec and try on this node again
|
||||
time.sleep(1)
|
||||
|
||||
if not node_success and node not in optional_nodes:
|
||||
rollback_txn_attempt(api_key, version, id, successes)
|
||||
# wait up to 5 secs and try the whole thing again
|
||||
time.sleep(random() * 5)
|
||||
break
|
||||
elif node_success:
|
||||
successes.append(node)
|
||||
|
||||
# TODO: a little more work needs to be done here. If not all of the active-nodes
|
||||
# are up when start is called, this will fail. It should succeed if 'enough' nodes
|
||||
# are up (> 50%).
|
||||
success = (len(successes) == len(real_active_nodes))
|
||||
|
||||
return (success, id, successes)
|
||||
|
||||
def rollback_txn_attempt(key, version, txnid, nodes):
|
||||
headers = {'x-api-key': key}
|
||||
body = {'id': txnid}
|
||||
for node in nodes:
|
||||
url = f"https://{node}:8640/cmapi/{version}/node/rollback"
|
||||
for retry in range(5):
|
||||
try:
|
||||
r = requests.put(
|
||||
url, verify=False, headers=headers, json=body, timeout=5
|
||||
)
|
||||
r.raise_for_status()
|
||||
except requests.Timeout:
|
||||
logging.warning(
|
||||
f'rollback_txn_attempt(): timeout on node "{node}"'
|
||||
)
|
||||
except Exception:
|
||||
logging.error(
|
||||
(
|
||||
f'rollback_txn_attempt(): got unrecognised error '
|
||||
f'during request to "{node}".'
|
||||
),
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
# on a failure to rollback or commit a txn on a subset of nodes, what are the options?
|
||||
# - open a new txn and revert the changes on the nodes that respond
|
||||
# - go forward with the subset. If those nodes are still up, they will have a config that is out of sync.
|
||||
# -> for now, going to assume that the node went down, and that when it comes back up, its config
|
||||
# will be sync'd
|
||||
|
||||
def rollback_transaction(
|
||||
id, config_filename=CMAPI_CONF_PATH,
|
||||
cs_config_filename=DEFAULT_MCS_CONF_PATH, nodes=None
|
||||
):
|
||||
cfg_parser = get_config_parser(config_filename)
|
||||
key = get_current_key(cfg_parser)
|
||||
version = get_version()
|
||||
if nodes is None:
|
||||
nodes = get_active_nodes(cs_config_filename)
|
||||
rollback_txn_attempt(key, version, id, nodes)
|
||||
|
||||
|
||||
def commit_transaction(
|
||||
id, config_filename=CMAPI_CONF_PATH,
|
||||
cs_config_filename=DEFAULT_MCS_CONF_PATH, nodes = None
|
||||
):
|
||||
cfg_parser = get_config_parser(config_filename)
|
||||
key = get_current_key(cfg_parser)
|
||||
version = get_version()
|
||||
if nodes is None:
|
||||
nodes = get_active_nodes(cs_config_filename)
|
||||
|
||||
headers = {'x-api-key': key}
|
||||
body = {'id': id}
|
||||
|
||||
for node in nodes:
|
||||
url = f"https://{node}:8640/cmapi/{version}/node/commit"
|
||||
for retry in range(5):
|
||||
try:
|
||||
r = requests.put(url, verify = False, headers = headers, json = body, timeout = 5)
|
||||
r.raise_for_status()
|
||||
except requests.Timeout as e:
|
||||
logging.warning(f"commit_transaction(): timeout on node {node}")
|
||||
except Exception as e:
|
||||
logging.warning(f"commit_transaction(): got error during request to {node}: {str(e)}")
|
||||
else:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def broadcast_new_config(
|
||||
cs_config_filename: str = DEFAULT_MCS_CONF_PATH,
|
||||
cmapi_config_filename: str = CMAPI_CONF_PATH,
|
||||
sm_config_filename: str = DEFAULT_SM_CONF_PATH,
|
||||
test_mode: bool = False,
|
||||
nodes: Optional[list] = None,
|
||||
) -> bool:
|
||||
"""Send new config to nodes. Now in async way.
|
||||
|
||||
:param cs_config_filename: Columnstore.xml path,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type cs_config_filename: str, optional
|
||||
:param cmapi_config_filename: cmapi config path,
|
||||
defaults to CMAPI_CONF_PATH
|
||||
:type cmapi_config_filename: str, optional
|
||||
:param sm_config_filename: storage manager config path,
|
||||
defaults to DEFAULT_SM_CONF_PATH
|
||||
:type sm_config_filename: str, optional
|
||||
:param test_mode: for test purposes, defaults to False TODO: remove
|
||||
:type test_mode: bool, optional
|
||||
:param nodes: nodes list for config put, defaults to None
|
||||
:type nodes: Optional[list], optional
|
||||
:return: success state
|
||||
:rtype: _type_
|
||||
"""
|
||||
|
||||
cfg_parser = get_config_parser(cmapi_config_filename)
|
||||
key = get_current_key(cfg_parser)
|
||||
version = get_version()
|
||||
if nodes is None:
|
||||
nodes = get_active_nodes(cs_config_filename)
|
||||
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config_filename=cs_config_filename)
|
||||
with open(cs_config_filename) as f:
|
||||
config_text = f.read()
|
||||
|
||||
with open(sm_config_filename) as f:
|
||||
sm_config_text = f.read()
|
||||
|
||||
headers = {'x-api-key': key}
|
||||
body = {
|
||||
'manager': root.find('./ClusterManager').text,
|
||||
'revision': root.find('./ConfigRevision').text,
|
||||
'timeout': 300,
|
||||
'config': config_text,
|
||||
'cs_config_filename': cs_config_filename,
|
||||
'sm_config_filename': sm_config_filename,
|
||||
'sm_config': sm_config_text
|
||||
}
|
||||
# TODO: remove test mode here and replace it by mock in tests
|
||||
if test_mode:
|
||||
body['test'] = True
|
||||
|
||||
failed_nodes = []
|
||||
success_nodes = []
|
||||
|
||||
async def update_config(node, success_nodes, failed_nodes, headers, body):
|
||||
url = f'https://{node}:8640/cmapi/{version}/node/config'
|
||||
request_put = partial(
|
||||
requests.put, url, verify=False, headers=headers, json=body,
|
||||
timeout=120
|
||||
)
|
||||
success = False
|
||||
executor = concurrent.futures.ThreadPoolExecutor()
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# TODO: remove this retry, it cause retries and long waiting time
|
||||
# for eg if some of mcs processes couldn't properly start/stop.
|
||||
# Fix error handling, could be raising error instead of returning
|
||||
# bool value
|
||||
for retry in range(5):
|
||||
try:
|
||||
r = await loop.run_in_executor(executor, request_put)
|
||||
r.raise_for_status()
|
||||
except requests.Timeout as e:
|
||||
logging.warning(
|
||||
f'Timeout while pushing new config to "{node}"'
|
||||
)
|
||||
except Exception as e:
|
||||
logging.warning(
|
||||
f'Got an unexpected error pushing new config to "{node}"',
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
success_nodes.append(node)
|
||||
success = True
|
||||
break
|
||||
if not success:
|
||||
failed_nodes.append(node)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
tasks = [
|
||||
update_config(node, success_nodes, failed_nodes, headers, body)
|
||||
for node in nodes
|
||||
]
|
||||
loop.run_until_complete(asyncio.wait(tasks))
|
||||
loop.close()
|
||||
|
||||
if len(success_nodes) > 0:
|
||||
logging.info(
|
||||
f'Successfully pushed new config file to {success_nodes}'
|
||||
)
|
||||
if len(failed_nodes) > 0:
|
||||
logging.error(
|
||||
f'Failed to push the new config to {failed_nodes}'
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# Might be more appropriate to put these in node_manipulation?
|
||||
def update_revision_and_manager(
|
||||
input_config_filename: Optional[str] = None,
|
||||
output_config_filename: Optional[str] = None
|
||||
):
|
||||
"""Update MCS xml config revision and cluster manager tags.
|
||||
|
||||
:param input_config_filename: , defaults to None
|
||||
:type input_config_filename: Optional[str], optional
|
||||
:param output_config_filename: _description_, defaults to None
|
||||
:type output_config_filename: Optional[str], optional
|
||||
"""
|
||||
nc = NodeConfig()
|
||||
|
||||
if input_config_filename is None:
|
||||
root = nc.get_current_config_root()
|
||||
else:
|
||||
root = nc.get_current_config_root(input_config_filename)
|
||||
|
||||
try:
|
||||
rev_node = root.find('./ConfigRevision')
|
||||
cur_revision = int(rev_node.text) + 1
|
||||
rev_node.text = str(cur_revision)
|
||||
root.find('./ClusterManager').text = str(
|
||||
nc.get_module_net_address(root=root, module_id=1)
|
||||
)
|
||||
except Exception:
|
||||
logging.error(
|
||||
'Caught exception while updating MCS config revision cluster '
|
||||
'manager tag, will not write new config',
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
if output_config_filename is None:
|
||||
nc.write_config(root)
|
||||
else:
|
||||
nc.write_config(root, filename = output_config_filename)
|
||||
|
||||
|
||||
def get_config_parser(
|
||||
config_filepath: str = CMAPI_CONF_PATH
|
||||
) -> configparser.ConfigParser:
|
||||
"""Get config parser from cmapi server ini config file.
|
||||
|
||||
:param config_filename: cmapi server conf path, defaults to CMAPI_CONF_PATH
|
||||
:type config_filename: str, optional
|
||||
:return: config parser
|
||||
:rtype: configparser.ConfigParser
|
||||
"""
|
||||
cfg_parser = configparser.ConfigParser()
|
||||
try:
|
||||
with open(config_filepath, 'r', encoding='utf-8') as cfg_file:
|
||||
cfg_parser.read_file(cfg_file)
|
||||
except PermissionError as e:
|
||||
# TODO: looks like it's useless here, because of creating config
|
||||
# from default on cmapi server startup
|
||||
# Anyway looks like it have to raise error and then
|
||||
# return 500 error
|
||||
logging.error(
|
||||
'CMAPI cannot create configuration file. '
|
||||
'API key stored in memory only.',
|
||||
exc_info=True
|
||||
)
|
||||
return cfg_parser
|
||||
|
||||
|
||||
def save_cmapi_conf_file(cfg_parser, config_filepath: str = CMAPI_CONF_PATH):
|
||||
"""Save config file from config parser.
|
||||
|
||||
:param cfg_parser: config parser to save
|
||||
:type cfg_parser: configparser.ConfigParser
|
||||
:param config_filepath: cmapi config filepath, defaults to CMAPI_CONF_PATH
|
||||
:type config_filepath: str, optional
|
||||
"""
|
||||
try:
|
||||
with open(config_filepath, 'w', encoding='utf-8') as cfg_file:
|
||||
cfg_parser.write(cfg_file)
|
||||
except PermissionError:
|
||||
logging.error(
|
||||
'CMAPI cannot save configuration file due to permissions. '
|
||||
'Some values still can be stored in memory.',
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
|
||||
def get_active_nodes(config:str = DEFAULT_MCS_CONF_PATH) -> list:
|
||||
"""Get active nodes from Columnstore.xml.
|
||||
|
||||
Actually this is only names of nodes by which node have been added.
|
||||
|
||||
:param config: xml config path, defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config: str, optional
|
||||
:return: active nodes
|
||||
:rtype: list
|
||||
"""
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config, upgrade=False)
|
||||
nodes = root.findall('./ActiveNodes/Node')
|
||||
return [ node.text for node in nodes ]
|
||||
|
||||
|
||||
def get_desired_nodes(config=DEFAULT_MCS_CONF_PATH):
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config, upgrade=False)
|
||||
nodes = root.findall("./DesiredNodes/Node")
|
||||
return [ node.text for node in nodes ]
|
||||
|
||||
|
||||
def in_maintenance_state(config=DEFAULT_MCS_CONF_PATH):
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config, upgrade=False)
|
||||
raw_state = root.find('./Maintenance')
|
||||
# if no Maintainace tag in xml config found
|
||||
state = False
|
||||
if raw_state is not None:
|
||||
# returns True on "true" string else return false
|
||||
state = lxml.objectify.BoolElement(raw_state.text)
|
||||
return state
|
||||
|
||||
|
||||
def get_current_key(config_parser):
|
||||
"""Get API key for cmapi server endpoints from ini config.
|
||||
|
||||
:param config_parser: config parser
|
||||
:type config_parser: configparser.ConfigParser
|
||||
:return: api key
|
||||
:rtype: str
|
||||
"""
|
||||
# ConfigParser reading value as is , for eg with quotes
|
||||
return config_parser.get('Authentication', 'x-api-key', fallback='')
|
||||
|
||||
|
||||
def get_version():
|
||||
from cmapi_server.controllers.dispatcher import _version
|
||||
return _version
|
||||
|
||||
|
||||
def get_dbroots(node, config=DEFAULT_MCS_CONF_PATH):
|
||||
# TODO: somehow duplicated with NodeConfig.get_all_dbroots?
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config)
|
||||
dbroots = []
|
||||
smc_node = root.find('./SystemModuleConfig')
|
||||
mod_count = int(smc_node.find('./ModuleCount3').text)
|
||||
for i in range(1, mod_count+1):
|
||||
ip_addr = smc_node.find(f'./ModuleIPAddr{i}-1-3').text
|
||||
hostname = smc_node.find(f'./ModuleHostName{i}-1-3').text
|
||||
node_fqdn = socket.gethostbyaddr(hostname)[0]
|
||||
|
||||
if node in LOCALHOSTS and hostname != 'localhost':
|
||||
node = socket.gethostbyaddr(socket.gethostname())[0]
|
||||
elif node not in LOCALHOSTS and hostname == 'localhost':
|
||||
# hostname will only be loclahost if we are in one node cluster
|
||||
hostname = socket.gethostbyaddr(socket.gethostname())[0]
|
||||
|
||||
|
||||
if node == ip_addr or node == hostname or node == node_fqdn:
|
||||
for j in range(
|
||||
1, int(smc_node.find(f"./ModuleDBRootCount{i}-3").text) + 1
|
||||
):
|
||||
dbroots.append(
|
||||
smc_node.find(f"./ModuleDBRootID{i}-{j}-3").text
|
||||
)
|
||||
return dbroots
|
||||
|
||||
|
||||
def get_current_config_file(
|
||||
config_filename=DEFAULT_MCS_CONF_PATH,
|
||||
cmapi_config_filename=CMAPI_CONF_PATH
|
||||
):
|
||||
"""Start a transaction on all DesiredNodes, which are all optional.
|
||||
|
||||
- the transaction prevents config changes from being made at the same time
|
||||
- get the config from each node
|
||||
- discard config files for different clusters
|
||||
- call put_config on the config file with the highest revision number found
|
||||
- end the transaction
|
||||
"""
|
||||
|
||||
logging.info('get_current_config_file(): seeking the current config file')
|
||||
|
||||
cfg_parser = get_config_parser(cmapi_config_filename)
|
||||
key = get_current_key(cfg_parser)
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config_filename = config_filename)
|
||||
# TODO: here we got set of ip addresses of DesiredNodes
|
||||
# but after that we convert them to list and send as
|
||||
# an optional_nodes argument to start_transaction()
|
||||
# So need to work with data type of nodes.
|
||||
desired_nodes = {
|
||||
node.text for node in root.findall('./DesiredNodes/Node')
|
||||
}
|
||||
if len(desired_nodes) <= 1:
|
||||
return True
|
||||
|
||||
current_rev = int(root.find('ConfigRevision').text)
|
||||
cluster_name = root.find('ClusterName').text
|
||||
highest_rev = current_rev
|
||||
highest_node = 'localhost'
|
||||
highest_config = nc.to_string(root)
|
||||
|
||||
# TODO: data type of optional_nodes set -> list
|
||||
# Need to work with it inside and outside of start_transaction
|
||||
(success, txn_id, nodes) = start_transaction(
|
||||
cs_config_filename=config_filename,
|
||||
optional_nodes=list(desired_nodes)
|
||||
)
|
||||
localhost_aliases = set(nc.get_network_addresses_and_names())
|
||||
other_nodes = set(nodes) - localhost_aliases
|
||||
if not success or len(other_nodes) == 0:
|
||||
if success:
|
||||
commit_transaction(txn_id, nodes = nodes)
|
||||
return False
|
||||
|
||||
nodes_in_same_cluster = 0
|
||||
for node in nodes:
|
||||
if node in localhost_aliases:
|
||||
continue
|
||||
|
||||
headers = {'x-api-key' : key}
|
||||
url = f'https://{node}:8640/cmapi/{get_version()}/node/config'
|
||||
try:
|
||||
r = requests.get(url, verify=False, headers=headers, timeout=5)
|
||||
r.raise_for_status()
|
||||
config = r.json()['config']
|
||||
except Exception as e:
|
||||
logging.warning(
|
||||
'get_current_config_file(): got an error fetching the '
|
||||
f'config file from {node}: {str(e)}'
|
||||
)
|
||||
continue
|
||||
tmp_root = nc.get_root_from_string(config)
|
||||
name_node = tmp_root.find('ClusterName')
|
||||
if name_node is None or name_node.text != cluster_name:
|
||||
continue
|
||||
nodes_in_same_cluster += 1
|
||||
rev_node = tmp_root.find('ConfigRevision')
|
||||
if rev_node is None or int(rev_node.text) <= highest_rev:
|
||||
continue
|
||||
highest_rev = int(rev_node.text)
|
||||
highest_config = config
|
||||
highest_node = node
|
||||
|
||||
nc.apply_config(config_filename=config_filename, xml_string=highest_config)
|
||||
# TODO: do we need restart node here?
|
||||
commit_transaction(txn_id, cs_config_filename=config_filename, nodes=nodes)
|
||||
|
||||
# todo, we might want stronger criteria for a large cluster.
|
||||
# Right now we want to reach at least one other node
|
||||
# (if there is another node)
|
||||
if len(desired_nodes) > 1 and nodes_in_same_cluster < 1:
|
||||
logging.error(
|
||||
'get_current_config_file(): failed to contact enough nodes '
|
||||
f'in my cluster ({cluster_name}) to reliably retrieve a current '
|
||||
'configuration file. Manual intervention may be required.'
|
||||
)
|
||||
# TODO: addition error handling.
|
||||
try:
|
||||
MCSProcessManager.stop_node(is_primary=nc.is_primary_node())
|
||||
except CMAPIBasicError as err:
|
||||
logging.error(err.message)
|
||||
return False
|
||||
|
||||
if highest_rev != current_rev:
|
||||
logging.info(
|
||||
'get_current_config_file(): Accepted the config file from'
|
||||
f' {highest_node}'
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
'get_current_config_file(): This node has the current config file'
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def wait_for_deactivation_or_put_config(
|
||||
config_mtime, config_filename=DEFAULT_MCS_CONF_PATH
|
||||
):
|
||||
'''
|
||||
if a multi-node cluster...
|
||||
Wait for either a put_config operation (as determined by monitoring the mtime of config_filename),
|
||||
or wait for this node to be removed from active_nodes,
|
||||
or wait for a period long enough for this to be considered a 'long' outage (30s right now, as determined
|
||||
by the failover code. TODO: make that time period configurable...
|
||||
|
||||
Activating failover after one of these three events should allow this node to join the cluster either as part
|
||||
of the failover behavior, or as part of the cluster-wide start cmd.
|
||||
'''
|
||||
|
||||
my_names = set(NodeConfig().get_network_addresses_and_names())
|
||||
desired_nodes = get_desired_nodes(config_filename)
|
||||
if len(desired_nodes) == 1 and desired_nodes[0] in my_names:
|
||||
logging.info("wait_for_deactivation_or_put_config: Single-node cluster, safe to continue")
|
||||
return
|
||||
|
||||
final_time = datetime.datetime.now() + datetime.timedelta(seconds = 40)
|
||||
while config_mtime == os.path.getmtime(config_filename) and \
|
||||
len(my_names.intersection(set(get_active_nodes(config_filename)))) > 0 and \
|
||||
datetime.datetime.now() < final_time:
|
||||
logging.info("wait_for_deactivation_or_put_config: Waiting...")
|
||||
time.sleep(5)
|
||||
|
||||
if config_mtime != os.path.getmtime(config_filename):
|
||||
logging.info("wait_for_deactivation_or_put_config: A new config was received, safe to continue.")
|
||||
elif len(my_names.intersection(set(get_active_nodes(config_filename)))) == 0:
|
||||
logging.info("wait_for_deactivation_or_put_config: Was removed from the cluster, safe to continue.")
|
||||
else:
|
||||
logging.info("wait_for_deactivation_or_put_config: Time limit reached, continuing.")
|
||||
|
||||
|
||||
# This isn't used currently. Remove once we decide there is no need for it.
|
||||
def if_primary_restart(
|
||||
config_filename=DEFAULT_MCS_CONF_PATH,
|
||||
cmapi_config_filename=CMAPI_CONF_PATH
|
||||
):
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config_filename = config_filename)
|
||||
primary_node = root.find("./PrimaryNode").text
|
||||
|
||||
if primary_node not in nc.get_network_addresses_and_names():
|
||||
return
|
||||
|
||||
cfg_parser = get_config_parser(cmapi_config_filename)
|
||||
key = get_current_key(cfg_parser)
|
||||
headers = { "x-api-key" : key }
|
||||
body = { "config": config_filename }
|
||||
|
||||
logging.info("if_primary_restart(): restarting the cluster.")
|
||||
url = f"https://{primary_node}:8640/cmapi/{get_version()}/cluster/start"
|
||||
endtime = datetime.datetime.now() + datetime.timedelta(seconds = 600) # questionable how long to retry
|
||||
success = False
|
||||
while not success and datetime.datetime.now() < endtime:
|
||||
try:
|
||||
response = requests.put(url, verify = False, headers = headers, json = body, timeout = 60)
|
||||
response.raise_for_status()
|
||||
success = True
|
||||
except Exception as e:
|
||||
logging.warning(f"if_primary_restart(): failed to start the cluster, got {str(e)}")
|
||||
time.sleep(10)
|
||||
if not success:
|
||||
logging.error(f"if_primary_restart(): failed to start the cluster. Manual intervention is required.")
|
||||
|
||||
|
||||
def get_cej_info(config_root):
|
||||
"""Get CEJ (Cross Engine Join) info.
|
||||
|
||||
Get credentials from CrossEngineSupport section in Columnstore.xml .
|
||||
Decrypt CEJ user password if needed.
|
||||
|
||||
:param config_root: config root element from Columnstore.xml file
|
||||
:type config_root: lxml.Element
|
||||
:return: cej_host, cej_port, cej_username, cej_password
|
||||
:rtype: tuple
|
||||
"""
|
||||
cej_node = config_root.find('./CrossEngineSupport')
|
||||
cej_host = cej_node.find('Host').text or '127.0.0.1'
|
||||
cej_port = cej_node.find('Port').text or '3306'
|
||||
cej_username = cej_node.find('./User').text
|
||||
cej_password = cej_node.find('./Password').text or ''
|
||||
|
||||
if not cej_username:
|
||||
logging.error(
|
||||
'Columnstore.xml has an empty CrossEngineSupport.User tag'
|
||||
)
|
||||
if not cej_password:
|
||||
logging.warning(
|
||||
'Columnstore.xml has an empty CrossEngineSupport.Password tag'
|
||||
)
|
||||
|
||||
if CEJPasswordHandler.secretsfile_exists():
|
||||
cej_password = CEJPasswordHandler.decrypt_password(cej_password)
|
||||
|
||||
return cej_host, cej_port, cej_username, cej_password
|
||||
|
||||
|
||||
def system_ready(config_filename=DEFAULT_MCS_CONF_PATH):
|
||||
"""Indicates whether the node is ready to accept queries.
|
||||
|
||||
:param config_filename: columnstore xml config filepath,
|
||||
defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config_filename: str, optional
|
||||
:return: tuple of 2 booleans
|
||||
:rtype: tuple
|
||||
"""
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(config_filename)
|
||||
host, port, username, password = get_cej_info(root)
|
||||
|
||||
if username is None:
|
||||
# Second False indicates not to retry inside calling function's
|
||||
# retry loop
|
||||
return False, False
|
||||
|
||||
cmd = (
|
||||
f"/usr/bin/mariadb -h '{host}' "
|
||||
f"-P '{port}' "
|
||||
f"-u '{username}' "
|
||||
f"--password='{password}' "
|
||||
"-sN -e "
|
||||
"\"SELECT mcssystemready();\""
|
||||
)
|
||||
|
||||
import subprocess
|
||||
ret = subprocess.run(cmd, stdout=subprocess.PIPE, shell = True)
|
||||
if ret.returncode == 0:
|
||||
response = ret.stdout.decode("utf-8").strip()
|
||||
if response == '1':
|
||||
return True, False
|
||||
else:
|
||||
return False, True
|
||||
return False, False
|
||||
|
||||
|
||||
def cmapi_config_check(cmapi_conf_path: str = CMAPI_CONF_PATH):
|
||||
"""Check if cmapi config file exists and copy default config if not.
|
||||
|
||||
:param cmapi_conf_path: cmapi conf path, defaults to CMAPI_CONF_PATH
|
||||
:type cmapi_conf_path: str, optional
|
||||
"""
|
||||
if not os.path.exists(cmapi_conf_path):
|
||||
logging.info(
|
||||
f'There are no config file at "{cmapi_conf_path}". '
|
||||
f'So copy default config from {CMAPI_DEFAULT_CONF_PATH} there.'
|
||||
)
|
||||
copyfile(CMAPI_DEFAULT_CONF_PATH, cmapi_conf_path)
|
||||
|
||||
|
||||
def dequote(input_str: str) -> str:
|
||||
"""Dequote input string.
|
||||
|
||||
If a string has single or double quotes around it, remove them.
|
||||
Make sure the pair of quotes match.
|
||||
If a matching pair of quotes is not found, return the string unchanged.
|
||||
|
||||
:param input_str: input probably quoted string
|
||||
:type input_str: str
|
||||
:return: unquoted string
|
||||
:rtype: str
|
||||
"""
|
||||
if (
|
||||
len(input_str) >= 2 and
|
||||
input_str[0] == input_str[-1]
|
||||
) and input_str.startswith(("'", '"')):
|
||||
return input_str[1:-1]
|
||||
return input_str
|
||||
|
||||
|
||||
def get_dispatcher_name_and_path(
|
||||
config_parser: configparser.ConfigParser
|
||||
) -> Tuple[str, str]:
|
||||
"""Get dispatcher name and path from cmapi conf file.
|
||||
|
||||
:param config_parser: cmapi conf file parser
|
||||
:type config_parser: configparser.ConfigParser
|
||||
:return: dispatcher name and path strings
|
||||
:rtype: tuple[str, str]
|
||||
"""
|
||||
dispatcher_name = dequote(
|
||||
config_parser.get('Dispatcher', 'name', fallback='systemd')
|
||||
)
|
||||
# TODO: used only for next releases for CustomDispatcher class
|
||||
# remove if useless
|
||||
dispatcher_path = dequote(
|
||||
config_parser.get('Dispatcher', 'path', fallback='')
|
||||
)
|
||||
return dispatcher_name, dispatcher_path
|
131
cmapi/cmapi_server/logging_management.py
Normal file
131
cmapi/cmapi_server/logging_management.py
Normal file
@ -0,0 +1,131 @@
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
from functools import partial, partialmethod
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import _cperror
|
||||
|
||||
from cmapi_server.constants import CMAPI_LOG_CONF_PATH
|
||||
|
||||
|
||||
class AddIpFilter(logging.Filter):
|
||||
"""Filter to add IP address to logging record."""
|
||||
def filter(self, record):
|
||||
record.ip = cherrypy.request.remote.name or cherrypy.request.remote.ip
|
||||
return True
|
||||
|
||||
|
||||
def custom_cherrypy_error(
|
||||
self, msg='', context='', severity=logging.INFO, traceback=False
|
||||
):
|
||||
"""Write the given ``msg`` to the error log. [now without hardcoded time]
|
||||
|
||||
This is not just for errors! [looks awful, but cherrypy realisation as is]
|
||||
Applications may call this at any time to log application-specific
|
||||
information.
|
||||
|
||||
If ``traceback`` is True, the traceback of the current exception
|
||||
(if any) will be appended to ``msg``.
|
||||
|
||||
..Note:
|
||||
All informatio
|
||||
"""
|
||||
exc_info = None
|
||||
if traceback:
|
||||
exc_info = _cperror._exc_info()
|
||||
|
||||
self.error_log.log(severity, ' '.join((context, msg)), exc_info=exc_info)
|
||||
|
||||
|
||||
def dict_config(config_filepath: str):
|
||||
with open(config_filepath, 'r', encoding='utf-8') as json_config:
|
||||
config_dict = json.load(json_config)
|
||||
logging.config.dictConfig(config_dict)
|
||||
|
||||
|
||||
def add_logging_level(level_name, level_num, method_name=None):
|
||||
"""
|
||||
Comprehensively adds a new logging level to the `logging` module and the
|
||||
currently configured logging class.
|
||||
|
||||
`level_name` becomes an attribute of the `logging` module with the value
|
||||
`level_num`.
|
||||
`methodName` becomes a convenience method for both `logging` itself
|
||||
and the class returned by `logging.getLoggerClass()` (usually just
|
||||
`logging.Logger`).
|
||||
If `methodName` is not specified, `levelName.lower()` is used.
|
||||
|
||||
To avoid accidental clobberings of existing attributes, this method will
|
||||
raise an `AttributeError` if the level name is already an attribute of the
|
||||
`logging` module or if the method name is already present
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> add_logging_level('TRACE', logging.DEBUG - 5)
|
||||
>>> logging.getLogger(__name__).setLevel('TRACE')
|
||||
>>> logging.getLogger(__name__).trace('that worked')
|
||||
>>> logging.trace('so did this')
|
||||
>>> logging.TRACE
|
||||
5
|
||||
|
||||
"""
|
||||
if not method_name:
|
||||
method_name = level_name.lower()
|
||||
|
||||
if hasattr(logging, level_name):
|
||||
raise AttributeError(f'{level_name} already defined in logging module')
|
||||
if hasattr(logging, method_name):
|
||||
raise AttributeError(
|
||||
f'{method_name} already defined in logging module'
|
||||
)
|
||||
if hasattr(logging.getLoggerClass(), method_name):
|
||||
raise AttributeError(f'{method_name} already defined in logger class')
|
||||
|
||||
# This method was inspired by the answers to Stack Overflow post
|
||||
# http://stackoverflow.com/q/2183233/2988730, especially
|
||||
# https://stackoverflow.com/a/35804945
|
||||
# https://stackoverflow.com/a/55276759
|
||||
logging.addLevelName(level_num, level_name)
|
||||
setattr(logging, level_name, level_num)
|
||||
setattr(
|
||||
logging.getLoggerClass(), method_name,
|
||||
partialmethod(logging.getLoggerClass().log, level_num)
|
||||
)
|
||||
setattr(logging, method_name, partial(logging.log, level_num))
|
||||
|
||||
|
||||
def config_cmapi_server_logging():
|
||||
# add custom level TRACE only for develop purposes
|
||||
# could be activated using API endpoints or cli tool without relaunching
|
||||
add_logging_level('TRACE', 5)
|
||||
cherrypy._cplogging.LogManager.error = custom_cherrypy_error
|
||||
# reconfigure cherrypy.access log message format
|
||||
# Default access_log_format '{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"'
|
||||
# h - remote.name or remote.ip, l - "-",
|
||||
# u - getattr(request, 'login', None) or '-', t - self.time(),
|
||||
# r - request.request_line, s - status,
|
||||
# b - dict.get(outheaders, 'Content-Length', '') or '-',
|
||||
# f - dict.get(inheaders, 'Referer', ''),
|
||||
# a - dict.get(inheaders, 'User-Agent', ''),
|
||||
# o - dict.get(inheaders, 'Host', '-'),
|
||||
# i - request.unique_id, z - LazyRfc3339UtcTime()
|
||||
cherrypy._cplogging.LogManager.access_log_format = (
|
||||
'{h} ACCESS "{r}" code {s}, bytes {b}, user-agent "{a}"'
|
||||
)
|
||||
dict_config(CMAPI_LOG_CONF_PATH)
|
||||
|
||||
|
||||
def change_loggers_level(level: str):
|
||||
"""Set level for each custom logger except cherrypy library.
|
||||
|
||||
:param level: logging level to set
|
||||
:type level: str
|
||||
"""
|
||||
loggers = [
|
||||
logging.getLogger(name) for name in logging.root.manager.loggerDict
|
||||
if 'cherrypy' not in name
|
||||
]
|
||||
loggers.append(logging.getLogger()) # add RootLogger
|
||||
for logger in loggers:
|
||||
logger.setLevel(level)
|
0
cmapi/cmapi_server/managers/__init__.py
Normal file
0
cmapi/cmapi_server/managers/__init__.py
Normal file
29
cmapi/cmapi_server/managers/application.py
Normal file
29
cmapi/cmapi_server/managers/application.py
Normal file
@ -0,0 +1,29 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from cmapi_server.constants import VERSION_PATH
|
||||
|
||||
|
||||
class AppManager:
|
||||
started: bool = False
|
||||
version: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def get_version(cls) -> str:
|
||||
"""Get CMAPI version.
|
||||
|
||||
:return: cmapi version
|
||||
:rtype: str
|
||||
"""
|
||||
if cls.version:
|
||||
return cls.version
|
||||
with open(VERSION_PATH, encoding='utf-8') as version_file:
|
||||
version = '.'.join([
|
||||
i.strip().split('=')[1]
|
||||
for i in version_file.read().splitlines() if i
|
||||
])
|
||||
if not version:
|
||||
logging.error('Couldn\'t detect version from VERSION file!')
|
||||
version = 'Undefined'
|
||||
cls.version = version
|
||||
return cls.version
|
439
cmapi/cmapi_server/managers/process.py
Normal file
439
cmapi/cmapi_server/managers/process.py
Normal file
@ -0,0 +1,439 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
import os.path
|
||||
import socket
|
||||
from time import sleep
|
||||
|
||||
import psutil
|
||||
|
||||
from cmapi_server.exceptions import CMAPIBasicError
|
||||
from cmapi_server.constants import MCS_INSTALL_BIN, ALL_MCS_PROGS
|
||||
from cmapi_server.process_dispatchers.systemd import SystemdDispatcher
|
||||
from cmapi_server.process_dispatchers.container import (
|
||||
ContainerDispatcher
|
||||
)
|
||||
from mcs_node_control.models.dbrm import DBRM
|
||||
from mcs_node_control.models.dbrm_socket import SOCK_TIMEOUT
|
||||
from mcs_node_control.models.misc import get_workernodes
|
||||
from mcs_node_control.models.process import Process
|
||||
|
||||
|
||||
PROCESS_DISPATCHERS = {
|
||||
'systemd': SystemdDispatcher,
|
||||
# could be used in docker containers and OSes w/o systemd
|
||||
'container': ContainerDispatcher,
|
||||
}
|
||||
PRIMARY_PROGS = ('controllernode', 'DMLProc', 'DDLProc')
|
||||
|
||||
|
||||
class MCSProcessManager:
|
||||
"""Class to run process operations.
|
||||
|
||||
e.g. re/-start or stop systemd services, run executable.
|
||||
"""
|
||||
CONTROLLER_MAX_RETRY = 30
|
||||
mcs_progs = {}
|
||||
mcs_version_info = None
|
||||
dispatcher_name = None
|
||||
process_dispatcher = None
|
||||
|
||||
@classmethod
|
||||
def _get_prog_name(cls, name: str) -> str:
|
||||
"""Get proper service name for systemd or non-systemd installations.
|
||||
|
||||
:param name: service name
|
||||
:type name: str
|
||||
:return: correct service name
|
||||
:rtype: str
|
||||
"""
|
||||
if cls.dispatcher_name == 'systemd':
|
||||
return ALL_MCS_PROGS[name].service_name
|
||||
return name
|
||||
|
||||
@classmethod
|
||||
def _get_sorted_progs(
|
||||
cls, is_primary: bool, reverse: bool = False
|
||||
) -> dict:
|
||||
"""Get sorted services dict.
|
||||
|
||||
:param is_primary: is primary node or not
|
||||
:type is_primary: bool
|
||||
:param reverse: reverse sort order, defaults to False
|
||||
:type reverse: bool, optional
|
||||
:return: dict with sorted services in correct start/stop order
|
||||
:rtype: dict
|
||||
"""
|
||||
unsorted_progs: dict
|
||||
if is_primary:
|
||||
unsorted_progs = cls.mcs_progs
|
||||
else:
|
||||
unsorted_progs = {
|
||||
prog_name: prog_info
|
||||
for prog_name, prog_info in cls.mcs_progs.items()
|
||||
if prog_name not in PRIMARY_PROGS
|
||||
}
|
||||
if reverse:
|
||||
# stop sequence builds using stop_priority property
|
||||
return dict(
|
||||
sorted(
|
||||
unsorted_progs.items(),
|
||||
key=lambda item: item[1].stop_priority,
|
||||
)
|
||||
)
|
||||
# start up sequence is a dict default sequence
|
||||
return unsorted_progs
|
||||
|
||||
@classmethod
|
||||
def _detect_processes(cls) -> None:
|
||||
"""Detect existing mcs services. Depends on MCS version."""
|
||||
if cls.mcs_progs:
|
||||
logging.warning('Mcs ProcessHandler already detected processes.')
|
||||
|
||||
for prog_name, prog_info in ALL_MCS_PROGS.items():
|
||||
if os.path.exists(os.path.join(MCS_INSTALL_BIN, prog_name)):
|
||||
cls.mcs_progs[prog_name] = prog_info
|
||||
|
||||
@classmethod
|
||||
def detect(cls, dispatcher_name: str, dispatcher_path: str = None) -> None:
|
||||
"""Detect mcs version info and installed processes.
|
||||
|
||||
:param dispatcher_name: process dispatcher name
|
||||
:type dispatcher_name: str
|
||||
:param dispatcher_path: path to custom dispatcher,
|
||||
for next releases, defaults to None
|
||||
:type dispatcher_path: str, optional
|
||||
:raises CMAPIBasicError: if custom dispatcher path doesn't exists
|
||||
:raises CMAPIBasicError: Not implemented custom dispatcher error
|
||||
"""
|
||||
cls._detect_processes()
|
||||
# detect mcs version info by processes
|
||||
if len(cls.mcs_progs) == 8:
|
||||
cls.mcs_version_info = '6.4.x and lower'
|
||||
elif len(cls.mcs_progs) == 7 and 'ExeMgr' not in cls.mcs_progs:
|
||||
cls.mcs_version_info = '22.08.x and higher'
|
||||
else:
|
||||
cls.mcs_version_info = 'Undefined'
|
||||
logging.warning(
|
||||
'MCS version haven\'t been detected properly.'
|
||||
'Please try to update your CMAPI version or contact support.'
|
||||
)
|
||||
logging.info(
|
||||
f'Detected {len(cls.mcs_progs)} MCS services.'
|
||||
f'MCS version is {cls.mcs_version_info}'
|
||||
)
|
||||
# TODO: For next releases. Do we really need custom dispatchers?
|
||||
if dispatcher_name not in PROCESS_DISPATCHERS:
|
||||
logging.warning(
|
||||
f'Custom process dispatcher with name "{dispatcher_name}" '
|
||||
f'and path "{dispatcher_path}" used.'
|
||||
)
|
||||
if not dispatcher_path or not os.path.exists(dispatcher_path):
|
||||
err_msg = 'Wrong dispatcher path in cmapi_config file.'
|
||||
logging.error(err_msg)
|
||||
raise CMAPIBasicError(err_msg)
|
||||
cls.dispatcher_name = 'custom'
|
||||
raise CMAPIBasicError('Custom dispatchers yet not implemented!')
|
||||
|
||||
cls.dispatcher_name = dispatcher_name
|
||||
cls.process_dispatcher = PROCESS_DISPATCHERS[dispatcher_name]
|
||||
cls.process_dispatcher.init()
|
||||
|
||||
@classmethod
|
||||
def _wait_for_workernodes(cls) -> bool:
|
||||
"""Wait for workernodes processes.
|
||||
|
||||
Waiting for all workernodes to come up before starting
|
||||
controllernode on a primary.
|
||||
|
||||
:return: True on success
|
||||
:rtype: bool
|
||||
"""
|
||||
logging.debug(
|
||||
'Waiting for all workernodes to come up before starting '
|
||||
'controllernode on a primary.'
|
||||
)
|
||||
workernodes = get_workernodes()
|
||||
attempts = cls.CONTROLLER_MAX_RETRY
|
||||
while attempts > 0 and len(workernodes) > 0:
|
||||
logging.debug(f'Waiting for "{list(workernodes)}"....{attempts}')
|
||||
# creating a separated list with workernode names
|
||||
# for safe deleting items from source dict
|
||||
for name in list(workernodes):
|
||||
try:
|
||||
sock = socket.socket(
|
||||
socket.AF_INET, socket.SOCK_STREAM
|
||||
)
|
||||
sock.settimeout(SOCK_TIMEOUT)
|
||||
sock.connect(
|
||||
(
|
||||
workernodes[name]['IPAddr'],
|
||||
workernodes[name]['Port']
|
||||
)
|
||||
)
|
||||
except socket.timeout:
|
||||
logging.debug(
|
||||
f'"{name}" {workernodes[name]["IPAddr"]}:'
|
||||
f'{workernodes[name]["Port"]} not started yet.'
|
||||
)
|
||||
else:
|
||||
# delete started workernode from workernodes dict
|
||||
del workernodes[name]
|
||||
finally:
|
||||
sock.close()
|
||||
attempts -= 1
|
||||
|
||||
if workernodes:
|
||||
logging.error(
|
||||
f'Some workernodes: "{workernodes}" are not reachable after '
|
||||
f'{cls.CONTROLLER_MAX_RETRY} attempts to connect with '
|
||||
f'{SOCK_TIMEOUT} seconds timeout.'
|
||||
'Starting mcs-controllernode anyway.'
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _wait_for_controllernode(cls) -> bool:
|
||||
"""Waiting for controllernode to come up on a primary.
|
||||
|
||||
:return: True on success
|
||||
:rtype: bool
|
||||
"""
|
||||
logging.debug(
|
||||
'Waiting for controllernode to come up before starting '
|
||||
'ddlproc/dmlproc on non-primary nodes.'
|
||||
)
|
||||
attempts = cls.CONTROLLER_MAX_RETRY
|
||||
success = False
|
||||
while attempts > 0:
|
||||
try:
|
||||
with DBRM():
|
||||
# check connection
|
||||
success = True
|
||||
except (ConnectionRefusedError, RuntimeError, socket.error):
|
||||
logging.info(
|
||||
'Cannot establish connection to controllernode.'
|
||||
f'Controller node still not started. Waiting...{attempts}'
|
||||
)
|
||||
else:
|
||||
break
|
||||
attempts -= 1
|
||||
|
||||
if not success:
|
||||
logging.error(
|
||||
'Controllernode is not reachable after '
|
||||
f'{cls.CONTROLLER_MAX_RETRY} attempts to connect with '
|
||||
f'{SOCK_TIMEOUT} seconds timeout.'
|
||||
'Starting mcs-dmlproc/mcs-ddlproc anyway.'
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _wait_for_DMLProc_stop(cls, timeout: int = 10) -> bool:
|
||||
"""Waiting DMLProc process to stop.
|
||||
|
||||
:param timeout: timeout to wait, defaults to 10
|
||||
:type timeout: int, optional
|
||||
:return: True on success
|
||||
:rtype: bool
|
||||
"""
|
||||
logging.info(f'Waiting for DMLProc to stop in {timeout} seconds')
|
||||
dmlproc_stopped = False
|
||||
while timeout > 0:
|
||||
logging.info(
|
||||
f'Waiting for DMLProc to stop. Seconds left {timeout}.'
|
||||
)
|
||||
if not Process.check_process_alive('DMLProc'):
|
||||
logging.info('DMLProc gracefully stopped by DBRM command.')
|
||||
dmlproc_stopped = True
|
||||
break
|
||||
sleep(1)
|
||||
timeout -= 1
|
||||
else:
|
||||
logging.error(
|
||||
f'DMLProc did not stopped gracefully by DBRM command within '
|
||||
f'{timeout} seconds. Will be stopped directly.'
|
||||
)
|
||||
return dmlproc_stopped
|
||||
|
||||
@classmethod
|
||||
def noop(cls, *args, **kwargs):
|
||||
"""No operation. TODO: looks like useless."""
|
||||
cls.process_dispatcher.noop()
|
||||
|
||||
@classmethod
|
||||
def start(cls, name: str, is_primary: bool, use_sudo: bool) -> bool:
|
||||
"""Start mcs process.
|
||||
|
||||
:param name: mcs process name
|
||||
:type name: str
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool
|
||||
:param use_sudo: use sudo or not
|
||||
:type use_sudo: bool
|
||||
:return: True if process started successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
return cls.process_dispatcher.start(
|
||||
cls._get_prog_name(name), is_primary, use_sudo
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def stop(
|
||||
cls, name: str, is_primary: bool, use_sudo: bool, timeout: int = 10
|
||||
) -> bool:
|
||||
"""Stop mcs process.
|
||||
|
||||
:param name: mcs process name
|
||||
:type name: str
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool
|
||||
:param use_sudo: use sudo or not
|
||||
:type use_sudo: bool
|
||||
:param timeout: timeout for DMLProc gracefully stop using DBRM, seconds
|
||||
:type timeout: int
|
||||
:return: True if process stopped successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
# TODO: do we need here force stop DMLProc as a method argument?
|
||||
|
||||
if is_primary and name == 'DMLProc':
|
||||
logging.info(
|
||||
'Trying to gracefully stop DMLProc using DBRM commands.'
|
||||
)
|
||||
try:
|
||||
with DBRM() as dbrm:
|
||||
dbrm.set_system_state(
|
||||
['SS_ROLLBACK', 'SS_SHUTDOWN_PENDING']
|
||||
)
|
||||
except (ConnectionRefusedError, RuntimeError):
|
||||
logging.error(
|
||||
'Cannot set SS_ROLLBACK and SS_SHUTDOWN_PENDING '
|
||||
'using DBRM while trying to gracefully auto stop DMLProc.'
|
||||
'Continue with a regular stop method.'
|
||||
)
|
||||
# stop DMLProc using regular signals or systemd
|
||||
return cls.process_dispatcher.stop(
|
||||
cls._get_prog_name(name), is_primary, use_sudo
|
||||
)
|
||||
# DMLProc gracefully stopped using DBRM commands otherwise
|
||||
# continue with a regular stop method
|
||||
if cls._wait_for_DMLProc_stop(timeout):
|
||||
return True
|
||||
return cls.process_dispatcher.stop(
|
||||
cls._get_prog_name(name), is_primary, use_sudo
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def restart(cls, name: str, is_primary: bool, use_sudo: bool) -> bool:
|
||||
"""Restart mcs process.
|
||||
|
||||
:param name: mcs process name
|
||||
:type name: str
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool
|
||||
:param use_sudo: use sudo or not
|
||||
:type use_sudo: bool
|
||||
:return: True if process restarted successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
return cls.process_dispatcher.restart(
|
||||
cls._get_prog_name(name), is_primary, use_sudo
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_running_mcs_procs(cls) -> list[dict]:
|
||||
"""Search for mcs processes.
|
||||
|
||||
The method returns PIDs of MCS services in both container or systemd
|
||||
environments.
|
||||
|
||||
:return: list of dicts with name and pid of mcs process
|
||||
:rtype: list[dict]
|
||||
"""
|
||||
return [
|
||||
{'name': proc.name(), 'pid': proc.pid}
|
||||
for proc in psutil.process_iter(['pid', 'name'])
|
||||
if proc.name() in cls.mcs_progs
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def is_node_processes_ok(
|
||||
cls, is_primary: bool, node_stopped: bool
|
||||
) -> bool:
|
||||
"""Check if needed processes exists or not.
|
||||
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool
|
||||
:param node_stopped: is node stopped or started
|
||||
:type node_stopped: bool
|
||||
:return: True if there are expected value of processes, else False
|
||||
:rtype: bool
|
||||
|
||||
..NOTE: For next releases. Now only used in tests.
|
||||
"""
|
||||
running_procs = cls.get_running_mcs_procs()
|
||||
if node_stopped:
|
||||
return len(running_procs) == 0
|
||||
node_progs = cls._get_sorted_progs(is_primary)
|
||||
return set(node_progs) == set(p['name'] for p in running_procs)
|
||||
|
||||
@classmethod
|
||||
def start_node(cls, is_primary: bool, use_sudo: bool = True):
|
||||
"""Start mcs node processes.
|
||||
|
||||
:param is_primary: is node primary or not, defaults to True
|
||||
:type is_primary: bool
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:raises CMAPIBasicError: immediately if one mcs process not started
|
||||
"""
|
||||
for prog_name in cls._get_sorted_progs(is_primary):
|
||||
if (
|
||||
cls.dispatcher_name == 'systemd'
|
||||
and prog_name == 'StorageManager'
|
||||
):
|
||||
# TODO: MCOL-5458
|
||||
logging.info(
|
||||
f'Skip starting {prog_name} with systemd dispatcher.'
|
||||
)
|
||||
continue
|
||||
# TODO: additional error handling
|
||||
if prog_name == 'controllernode':
|
||||
cls._wait_for_workernodes()
|
||||
if prog_name in ('DMLProc', 'DDLProc'):
|
||||
cls._wait_for_controllernode()
|
||||
if not cls.start(prog_name, is_primary, use_sudo):
|
||||
logging.error(f'Process "{prog_name}" not started properly.')
|
||||
raise CMAPIBasicError(f'Error while starting "{prog_name}".')
|
||||
|
||||
@classmethod
|
||||
def stop_node(
|
||||
cls, is_primary: bool, use_sudo: bool = True, timeout: int = 10
|
||||
):
|
||||
"""Stop mcs node processes.
|
||||
|
||||
:param is_primary: is node primary or not, defaults to True
|
||||
:type is_primary: bool
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:param timeout: timeout for DMLProc gracefully stop using DBRM, seconds
|
||||
:type timeout: int
|
||||
:raises CMAPIBasicError: immediately if one mcs process not stopped
|
||||
"""
|
||||
# Every time try to stop all processes no matter primary it or slave,
|
||||
# so use full available list of processes. Otherwise, it could cause
|
||||
# undefined behaviour when primary gone and then recovers (failover
|
||||
# triggered 2 times).
|
||||
for prog_name in cls._get_sorted_progs(True, reverse=True):
|
||||
if not cls.stop(prog_name, is_primary, use_sudo):
|
||||
logging.error(f'Process "{prog_name}" not stopped properly.')
|
||||
raise CMAPIBasicError(f'Error while stopping "{prog_name}"')
|
||||
|
||||
@classmethod
|
||||
def restart_node(cls, is_primary: bool, use_sudo: bool):
|
||||
"""TODO: For next releases."""
|
||||
if cls.get_running_mcs_procs():
|
||||
cls.stop_node(is_primary, use_sudo)
|
||||
cls.start_node(is_primary, use_sudo)
|
1124
cmapi/cmapi_server/node_manipulation.py
Normal file
1124
cmapi/cmapi_server/node_manipulation.py
Normal file
File diff suppressed because it is too large
Load Diff
0
cmapi/cmapi_server/process_dispatchers/__init__.py
Normal file
0
cmapi/cmapi_server/process_dispatchers/__init__.py
Normal file
146
cmapi/cmapi_server/process_dispatchers/base.py
Normal file
146
cmapi/cmapi_server/process_dispatchers/base.py
Normal file
@ -0,0 +1,146 @@
|
||||
"""Module contains base process dispatcher class implementation.
|
||||
|
||||
Formally this is must have interface for subclasses.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shlex
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, TextIO, Tuple
|
||||
|
||||
from cmapi_server.constants import MCS_INSTALL_BIN, MCS_LOG_PATH
|
||||
|
||||
|
||||
class BaseDispatcher:
|
||||
"""Class with base interfaces for dispatchers."""
|
||||
|
||||
@staticmethod
|
||||
def _create_mcs_process_logfile(filename: str) -> str:
|
||||
"""Create log file by name.
|
||||
|
||||
:param filename: log filename
|
||||
:type filename: str
|
||||
:return: full path of created log file
|
||||
:rtype: str
|
||||
"""
|
||||
log_fullpath = os.path.join(MCS_LOG_PATH, filename)
|
||||
Path(log_fullpath).touch(mode=666)
|
||||
return log_fullpath
|
||||
|
||||
@staticmethod
|
||||
def exec_command(
|
||||
command: str, daemonize: bool = False, silent: bool = False,
|
||||
stdout: TextIO = subprocess.PIPE, env: Optional[Dict] = None
|
||||
) -> Tuple[bool, str]:
|
||||
"""Run command using subprocess.
|
||||
|
||||
:param command: command to run
|
||||
:type command: str
|
||||
:param daemonize: run command in detached mode, defaults to False
|
||||
:type daemonize: bool, optional
|
||||
:param silent: prevent error logs on non-zero exit status,
|
||||
defaults to False
|
||||
:type silent: bool, optional
|
||||
:param stdout: stdout argument for Popen, defaults to subprocess.STDOUT
|
||||
:type stdout: TextIO, optional
|
||||
:param env: environment argument for Popen, defaults to None
|
||||
:type env: Optional[Dict], optional
|
||||
:return: tuple with success status and output string from subprocess,
|
||||
if there are multiple lines in output they should be splitted
|
||||
:rtype: Tuple[bool, str]
|
||||
"""
|
||||
output: str = ''
|
||||
result: Tuple = (False, output)
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
shlex.split(command),
|
||||
stdout=stdout,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=daemonize,
|
||||
env=env,
|
||||
encoding='utf-8'
|
||||
)
|
||||
except Exception:
|
||||
logging.error(f'Failed on run command "{command}".', exc_info=True)
|
||||
# TODO: cmapi have to close with exception here
|
||||
# to stop docker container?
|
||||
# raise
|
||||
return result
|
||||
if daemonize:
|
||||
# remove Popen object. optionally gc.collect could be invoked.
|
||||
# this is made to prevent eventually spawning duplicated "defunct"
|
||||
# (zombie) python parented processes. This could happened
|
||||
# previously after cluster restart. It didn't affects cluster
|
||||
# condition, only makes "mcs cluster status" command output
|
||||
# confusing and ugly.
|
||||
del proc
|
||||
result = (True, output)
|
||||
else:
|
||||
logging.debug('Waiting command to finish.')
|
||||
stdout_str, _ = proc.communicate()
|
||||
returncode = proc.wait()
|
||||
if stdout_str is not None:
|
||||
# output guaranteed to be empty string not None
|
||||
output = stdout_str
|
||||
result = (True, output)
|
||||
if returncode != 0:
|
||||
if not silent:
|
||||
logging.error(
|
||||
f'Calling "{command}" finished with return code: '
|
||||
f'"{returncode}" and stderr+stdout "{output}".'
|
||||
)
|
||||
result = (False, output)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def _run_dbbuilder(cls, use_su=False) -> None:
|
||||
# attempt to run dbbuilder on primary node
|
||||
# e.g., s3 was setup after columnstore install
|
||||
logging.info('Attempt to run dbbuilder on primary node')
|
||||
dbbuilder_path = os.path.join(MCS_INSTALL_BIN, 'dbbuilder')
|
||||
dbbuilder_arg = '7'
|
||||
dbb_command = f'{dbbuilder_path} {dbbuilder_arg}'
|
||||
if use_su:
|
||||
# TODO: move mysql user to constants
|
||||
dbb_command = f'su -s /bin/sh -c "{dbb_command}" mysql'
|
||||
dbb_log_path = cls._create_mcs_process_logfile('dbbuilder.log')
|
||||
with open(dbb_log_path, 'a', encoding='utf-8') as dbb_log_fh:
|
||||
dbb_start_time = datetime.now().strftime('%d/%b/%Y %H:%M:%S')
|
||||
dbb_log_fh.write(f'-----Started at {dbb_start_time}.-----\n')
|
||||
# TODO: error handling?
|
||||
# check if exist for next releases?
|
||||
success, _ = cls.exec_command(dbb_command, stdout=dbb_log_fh)
|
||||
dbb_log_fh.write('-----Finished run.-----\n\n')
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
"""Method for dispatcher initialisation."""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def is_service_running(cls, service: str, use_sudo: bool) -> bool:
|
||||
"""Check if systemd proceess/service is running."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def start(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
|
||||
"""Start process/service."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def stop(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
|
||||
"""Stop process/service."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def restart(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
|
||||
"""Restart process/service."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def reload(cls, service: str, is_primary: bool, use_sudo: bool) -> bool:
|
||||
"""Reload process/service."""
|
||||
raise NotImplementedError
|
294
cmapi/cmapi_server/process_dispatchers/container.py
Normal file
294
cmapi/cmapi_server/process_dispatchers/container.py
Normal file
@ -0,0 +1,294 @@
|
||||
"""
|
||||
Module contains non-systemd/container process dispatcher class implementation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os.path
|
||||
import re
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
|
||||
import psutil
|
||||
|
||||
from cmapi_server.constants import (
|
||||
IFLAG, LIBJEMALLOC_DEFAULT_PATH, MCS_INSTALL_BIN, ALL_MCS_PROGS
|
||||
)
|
||||
from cmapi_server.exceptions import CMAPIBasicError
|
||||
from cmapi_server.process_dispatchers.base import BaseDispatcher
|
||||
|
||||
|
||||
class ContainerDispatcher(BaseDispatcher):
|
||||
"""Manipulates processes in docker container.
|
||||
|
||||
It's possible to use in any OS/container environment in cases when
|
||||
we don't want to use systemd or don't have it.
|
||||
"""
|
||||
libjemalloc_path = None
|
||||
|
||||
@staticmethod
|
||||
def _set_iflag():
|
||||
"""Create IFLAG file.
|
||||
|
||||
Means Columnstore container init finished.
|
||||
"""
|
||||
Path(IFLAG).touch()
|
||||
|
||||
@classmethod
|
||||
def _get_proc_object(cls, name: str) -> psutil.Process:
|
||||
"""Getting psutil Process object by service name.
|
||||
|
||||
:param name: process name
|
||||
:type name: str
|
||||
:raises psutil.NoSuchProcess: if no process with such name presented
|
||||
:return: Process object with specified name
|
||||
:rtype: psutil.Process
|
||||
|
||||
...TODO: add types-psutil to requirements for mypy checks
|
||||
"""
|
||||
for proc in psutil.process_iter(['pid', 'name', 'username']):
|
||||
if proc.name().lower() == name.lower():
|
||||
return proc
|
||||
raise psutil.NoSuchProcess(pid=None, name=name)
|
||||
|
||||
@classmethod
|
||||
def get_libjemalloc_path(cls) -> str:
|
||||
"""Get libjemalloc.so path.
|
||||
|
||||
:raises CMAPIBasicError: raises if ldconfig execution returned non zero
|
||||
:raises FileNotFoundError: if no libjemalloc.so.2 found
|
||||
:return: libjemalloc.so.2 path
|
||||
:rtype: str
|
||||
"""
|
||||
logger = logging.getLogger('container_sh')
|
||||
if cls.libjemalloc_path:
|
||||
return cls.libjemalloc_path
|
||||
# pylint: disable=line-too-long
|
||||
# for reference: https://github.com/pyinstaller/pyinstaller/blob/f29b577df4e1659cf65aacb797034763308fd298/PyInstaller/depend/utils.py#L304
|
||||
|
||||
splitlines_count = 1
|
||||
pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)')
|
||||
success, result = cls.exec_command('ldconfig -p')
|
||||
if not success:
|
||||
raise CMAPIBasicError('Failed executing ldconfig.')
|
||||
|
||||
text = result.strip().splitlines()[splitlines_count:]
|
||||
|
||||
for line in text:
|
||||
# this assumes library names do not contain whitespace
|
||||
p_match = pattern.match(line)
|
||||
# Sanitize away any abnormal lines of output.
|
||||
if p_match is None:
|
||||
continue
|
||||
|
||||
lib_path = p_match.groups()[-1]
|
||||
lib_name = p_match.group(1)
|
||||
if 'libjemalloc' in lib_name:
|
||||
# use the first entry
|
||||
# TODO: do we need path or name here?
|
||||
# $(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
|
||||
cls.libjemalloc_path = lib_path
|
||||
break
|
||||
|
||||
if not cls.libjemalloc_path:
|
||||
if not os.path.exists(LIBJEMALLOC_DEFAULT_PATH):
|
||||
logger.error('No libjemalloc.so.2 found.')
|
||||
raise FileNotFoundError
|
||||
cls.libjemalloc_path = LIBJEMALLOC_DEFAULT_PATH
|
||||
|
||||
return cls.libjemalloc_path
|
||||
|
||||
@classmethod
|
||||
def is_service_running(cls, service: str, use_sudo: bool = True) -> bool:
|
||||
"""Check if mcs process is running.
|
||||
|
||||
:param service: service name
|
||||
:type service: str
|
||||
:param use_sudo: interface requirement, unused here, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service is running, otherwise False
|
||||
:rtype: bool
|
||||
"""
|
||||
try:
|
||||
cls._get_proc_object(service)
|
||||
except psutil.NoSuchProcess:
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _make_cmd(service: str) -> str:
|
||||
"""Make shell command by service name.
|
||||
|
||||
:param service: service name
|
||||
:type service: str
|
||||
:return: command with arguments if needed
|
||||
:rtype: str
|
||||
"""
|
||||
service_info = ALL_MCS_PROGS[service]
|
||||
command = os.path.join(MCS_INSTALL_BIN, service)
|
||||
|
||||
if service_info.subcommand:
|
||||
subcommand = service_info.subcommand
|
||||
command = f'{command} {subcommand}'
|
||||
|
||||
return command
|
||||
|
||||
@classmethod
|
||||
def start(
|
||||
cls, service: str, is_primary: bool, use_sudo: bool = True
|
||||
) -> bool:
|
||||
"""Start process in docker container.
|
||||
|
||||
:param service: process name
|
||||
:type service: str
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: interface required, unused here, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service started successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
logger = logging.getLogger('container_sh')
|
||||
if cls.is_service_running(service):
|
||||
return True
|
||||
|
||||
logger.debug(f'Starting {service}')
|
||||
env_vars = {"LD_PRELOAD": cls.get_libjemalloc_path()}
|
||||
command = cls._make_cmd(service)
|
||||
|
||||
if service == 'workernode':
|
||||
# workernode starts on primary and non primary node with 1 or 2
|
||||
# added to the end of argument:
|
||||
# DBRM_Worker1 - on primary, DBRM_Worker2 - non primary
|
||||
command = command.format(1 if is_primary else 2)
|
||||
|
||||
# start mcs-loadbrm.py before workernode
|
||||
logger.debug('Waiting to load BRM.')
|
||||
loadbrm_path = os.path.join(MCS_INSTALL_BIN, 'mcs-loadbrm.py')
|
||||
loadbrm_logpath = cls._create_mcs_process_logfile(
|
||||
'mcs-loadbrm.log'
|
||||
)
|
||||
with open(loadbrm_logpath, 'a', encoding='utf-8') as loadbrm_logfh:
|
||||
success, _ = cls.exec_command(
|
||||
f'{loadbrm_path} no', stdout=loadbrm_logfh, env=env_vars
|
||||
)
|
||||
if not success:
|
||||
logger.error('Error while loading BRM.')
|
||||
else:
|
||||
logger.debug('Successfully loaded BRM.')
|
||||
|
||||
service_log_path = cls._create_mcs_process_logfile(
|
||||
f'{service.lower()}.log'
|
||||
)
|
||||
success, _ = cls.exec_command(
|
||||
command, daemonize=True,
|
||||
stdout=open(service_log_path, 'a', encoding='utf-8'),
|
||||
env=env_vars
|
||||
)
|
||||
# TODO: any other way to detect service finished its initialisation?
|
||||
sleep(ALL_MCS_PROGS[service].delay)
|
||||
logger.debug(f'Started "{service}".')
|
||||
|
||||
if is_primary and service == 'DDLProc':
|
||||
cls._run_dbbuilder()
|
||||
|
||||
return cls.is_service_running(service)
|
||||
|
||||
@classmethod
|
||||
def stop(
|
||||
cls, service: str, is_primary: bool, use_sudo: bool = True
|
||||
) -> bool:
|
||||
"""Stop process in docker container.
|
||||
|
||||
:param service: process name
|
||||
:type service: str
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: interface required, unused here, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service started successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
logger = logging.getLogger('container_sh')
|
||||
if not cls.is_service_running(service):
|
||||
return True
|
||||
|
||||
logger.debug(f'Stopping {service}')
|
||||
service_proc = cls._get_proc_object(service)
|
||||
|
||||
if service == 'workernode':
|
||||
# start mcs-savebrm.py before stoping workernode
|
||||
logger.debug('Waiting to save BRM.')
|
||||
savebrm_path = os.path.join(MCS_INSTALL_BIN, 'mcs-savebrm.py')
|
||||
savebrm_logpath = cls._create_mcs_process_logfile(
|
||||
'mcs-savebrm.log'
|
||||
)
|
||||
with open(savebrm_logpath, 'a', encoding='utf-8') as savebrm_logfh:
|
||||
success, _ = cls.exec_command(
|
||||
savebrm_path, stdout=savebrm_logfh
|
||||
)
|
||||
if not success:
|
||||
logger.error('Error while saving BRM.')
|
||||
else:
|
||||
logger.debug('Successfully saved BRM.')
|
||||
|
||||
logger.debug('Start clearing SHM.')
|
||||
clearshm_path = os.path.join(MCS_INSTALL_BIN, 'clearShm')
|
||||
success, _ = cls.exec_command(clearshm_path)
|
||||
if not success:
|
||||
logger.error('Error while clearing SHM.')
|
||||
else:
|
||||
logger.debug('Successfully cleared SHM.')
|
||||
|
||||
service_proc.terminate()
|
||||
# timeout got from old container.sh
|
||||
# TODO: this is still not enough for controllernode process
|
||||
# it should be always stop by SIGKILL, need to investigate.
|
||||
timeout = 3
|
||||
if service == 'StorageManager':
|
||||
timeout = 300 # 5 minutes
|
||||
logger.debug(f'Waiting to gracefully stop "{service}".')
|
||||
# This function will return as soon as all processes terminate
|
||||
# or when timeout (seconds) occurs.
|
||||
gone, alive = psutil.wait_procs([service_proc], timeout=timeout)
|
||||
if alive:
|
||||
logger.debug(
|
||||
f'{service} not terminated with SIGTERM, sending SIGKILL.'
|
||||
)
|
||||
# only one process could be in a list
|
||||
alive[0].kill()
|
||||
gone, alive = psutil.wait_procs([service_proc], timeout=timeout)
|
||||
if gone:
|
||||
logger.debug(f'Successfully killed "{service}".')
|
||||
else:
|
||||
logger.warning(
|
||||
f'Service "{service}" still alive after sending "kill -9" '
|
||||
f'and waiting {timeout} seconds.'
|
||||
)
|
||||
else:
|
||||
logger.debug(f'Gracefully stopped "{service}".')
|
||||
|
||||
return not cls.is_service_running(service)
|
||||
|
||||
@classmethod
|
||||
def restart(
|
||||
cls, service: str, is_primary: bool, use_sudo: bool = True
|
||||
) -> bool:
|
||||
"""Restart process in docker container.
|
||||
|
||||
:param service: process name
|
||||
:type service: str
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: interface required, unused here, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service started successfully
|
||||
:rtype: bool
|
||||
|
||||
...TODO: for next releases. Additional error handling.
|
||||
"""
|
||||
if cls.is_service_running(service):
|
||||
# TODO: retry?
|
||||
stop_success = cls.stop(service, is_primary, use_sudo)
|
||||
start_success = cls.start(service, is_primary, use_sudo)
|
||||
|
||||
return stop_success and start_success
|
231
cmapi/cmapi_server/process_dispatchers/systemd.py
Normal file
231
cmapi/cmapi_server/process_dispatchers/systemd.py
Normal file
@ -0,0 +1,231 @@
|
||||
"""Module contains systemd process dispatcher class implementation."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Union, Tuple
|
||||
|
||||
from cmapi_server.process_dispatchers.base import BaseDispatcher
|
||||
|
||||
|
||||
class SystemdDispatcher(BaseDispatcher):
|
||||
"""Manipulates with systemd services."""
|
||||
systemctl_version: int = 219 #CentOS 7 version
|
||||
|
||||
@classmethod
|
||||
def _systemctl_call(
|
||||
cls, command: str, service: str, use_sudo: bool = True,
|
||||
return_output=False, *args, **kwargs
|
||||
) -> Union[Tuple[bool, str], bool]:
|
||||
"""Run "systemctl" with arguments.
|
||||
|
||||
:param command: command for systemctl
|
||||
:type command: str
|
||||
:param service: systemd service name
|
||||
:type service: str
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: return status of operation, True if success, otherwise False
|
||||
:rtype: Union[Tuple[bool, str], bool]
|
||||
"""
|
||||
cmd = f'systemctl {command} {service}'
|
||||
if use_sudo:
|
||||
cmd = f'sudo {cmd}'
|
||||
logging.debug(f'Call "{command}" on service "{service}" with "{cmd}".')
|
||||
success, output = cls.exec_command(cmd, *args, **kwargs)
|
||||
if return_output:
|
||||
return success, output
|
||||
return success
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
cmd = 'systemctl --version'
|
||||
success, output = cls.exec_command(cmd)
|
||||
if success:
|
||||
# raw result will be like
|
||||
# "systemd 239 (245.4-4ubuntu3.17)\n <string with compile flags>"
|
||||
cls.systemctl_version = int(
|
||||
re.search(r'systemd (\d+)', output).group(1)
|
||||
)
|
||||
logging.info(f'Detected {cls.systemctl_version} SYSTEMD version.')
|
||||
else:
|
||||
logging.error('Couldn\'t detect SYSTEMD version')
|
||||
|
||||
@classmethod
|
||||
def is_service_running(cls, service: str, use_sudo: bool = True) -> bool:
|
||||
"""Check if systemd service is running.
|
||||
|
||||
:param service: service name
|
||||
:type service: str, optional
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service is running, otherwise False
|
||||
:rtype: bool
|
||||
|
||||
..Note:
|
||||
Not working with multiple services at a time.
|
||||
"""
|
||||
logging.debug(f'Checking "{service}" is running.')
|
||||
# TODO: remove conditions below when we'll drop CentOS 7 support
|
||||
cmd = 'show -p ActiveState --value'
|
||||
if cls.systemctl_version < 230: # not supported --value in old version
|
||||
cmd = 'show -p ActiveState'
|
||||
_, output = cls._systemctl_call(
|
||||
cmd,
|
||||
service, use_sudo, return_output=True
|
||||
)
|
||||
service_state = output.strip()
|
||||
if cls.systemctl_version < 230: # result like 'ActiveState=active'
|
||||
service_state = service_state.split('=')[1]
|
||||
logging.debug(f'Service "{service}" is in "{service_state}" state')
|
||||
# interpret non "active" state as not running service
|
||||
if service_state == 'active':
|
||||
return True
|
||||
# output could be inactive, activating or even empty if
|
||||
# command execution was unsuccessfull
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _workernode_get_service_name(is_primary: bool) -> str:
|
||||
"""Get proper workernode service name based on primary status.
|
||||
|
||||
:param is_primary: is node where we running primary?
|
||||
:type is_primary: bool
|
||||
:return: correct workernode service name
|
||||
:rtype: str
|
||||
"""
|
||||
service = 'mcs-workernode'
|
||||
return f'{service}@1.service' if is_primary else f'{service}@2.service'
|
||||
|
||||
@classmethod
|
||||
def _workernode_enable(cls, enable: bool, use_sudo: bool = True) -> None:
|
||||
"""Enable workernode service.
|
||||
|
||||
:param enable: enable or disable
|
||||
:type enable: bool
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
"""
|
||||
sub_cmd = 'enable' if enable else 'disable'
|
||||
service = 'mcs-workernode@1.service'
|
||||
|
||||
if not cls._systemctl_call(sub_cmd, service, use_sudo):
|
||||
# enabling\disabling service is not critical, just log failure
|
||||
logging.warning(f'Failed to {sub_cmd} {service}')
|
||||
|
||||
@classmethod
|
||||
def start(
|
||||
cls, service: str, is_primary: bool = True, use_sudo: bool = True
|
||||
) -> bool:
|
||||
"""Start systemd service.
|
||||
|
||||
:param service: service name
|
||||
:type service: str, optional
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service started successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
service_name = service
|
||||
if service_name == 'mcs-workernode':
|
||||
service_name = cls._workernode_get_service_name(is_primary)
|
||||
if is_primary:
|
||||
cls._workernode_enable(True, use_sudo)
|
||||
|
||||
if cls.is_service_running(service_name, use_sudo):
|
||||
return True
|
||||
|
||||
logging.debug(f'Starting "{service_name}".')
|
||||
if not cls._systemctl_call('start', service_name, use_sudo):
|
||||
logging.error(f'Failed while starting "{service_name}".')
|
||||
return False
|
||||
|
||||
if is_primary and service == 'mcs-ddlproc':
|
||||
cls._run_dbbuilder(use_su=True)
|
||||
|
||||
logging.debug(f'Successfully started {service_name}.')
|
||||
return cls.is_service_running(service_name, use_sudo)
|
||||
|
||||
@classmethod
|
||||
def stop(
|
||||
cls, service: str, is_primary: bool = True, use_sudo: bool = True
|
||||
) -> bool:
|
||||
"""Stop systemd service.
|
||||
|
||||
:param service: service name
|
||||
:type service: str, optional
|
||||
:param is_primary: is node primary or not
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service stopped successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
service_name = service
|
||||
if service_name == 'mcs-workernode':
|
||||
service_name = f'{service_name}@1.service {service_name}@2.service'
|
||||
cls._workernode_enable(False, use_sudo)
|
||||
|
||||
logging.debug(f'Stopping "{service_name}".')
|
||||
if not cls._systemctl_call('stop', service_name, use_sudo):
|
||||
logging.error(f'Failed while stopping "{service_name}".')
|
||||
return False
|
||||
|
||||
return not cls.is_service_running(service, use_sudo)
|
||||
|
||||
@classmethod
|
||||
def restart(
|
||||
cls, service: str, is_primary: bool = True, use_sudo: bool = True
|
||||
) -> bool:
|
||||
"""Restart systemd service.
|
||||
|
||||
:param service: service name
|
||||
:type service: str, optional
|
||||
:param is_primary: is node primary or not, defaults to True
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service restarted successfully
|
||||
:rtype: bool
|
||||
"""
|
||||
service_name = service
|
||||
if service_name == 'mcs-workernode':
|
||||
service_name = cls._workernode_get_service_name(is_primary)
|
||||
|
||||
logging.debug(f'Restarting "{service_name}".')
|
||||
if not cls._systemctl_call('restart', service_name, use_sudo):
|
||||
logging.error(f'Failed while restarting "{service_name}".')
|
||||
return False
|
||||
|
||||
return cls.is_service_running(service, use_sudo)
|
||||
|
||||
@classmethod
|
||||
def reload(
|
||||
cls, service: str, is_primary: bool = True, use_sudo: bool=True
|
||||
) -> bool:
|
||||
"""Reload systemd service.
|
||||
|
||||
:param service: service name, defaults to 'Unknown_service'
|
||||
:type service: str, optional
|
||||
:param is_primary: is node primary or not, defaults to True
|
||||
:type is_primary: bool, optional
|
||||
:param use_sudo: use sudo or not, defaults to True
|
||||
:type use_sudo: bool, optional
|
||||
:return: True if service reloaded successfully
|
||||
:rtype: bool
|
||||
|
||||
..NOTE: For next releases. It should become important when we teach
|
||||
MCS to add/remove nodes w/o whole cluster restart.
|
||||
Additional error handling?
|
||||
"""
|
||||
service_name = service
|
||||
if service_name == 'mcs-workernode':
|
||||
service_name = cls._workernode_get_service_name(is_primary)
|
||||
|
||||
logging.debug(f'Reloading "{service_name}".')
|
||||
if not cls._systemctl_call('reload', service_name, use_sudo):
|
||||
logging.error(f'Failed while reloading "{service_name}".')
|
||||
return False
|
||||
|
||||
return not cls.is_service_running(service, use_sudo)
|
501
cmapi/cmapi_server/test/CS-config-test.xml
Normal file
501
cmapi/cmapi_server/test/CS-config-test.xml
Normal file
@ -0,0 +1,501 @@
|
||||
<Columnstore Version="V1.0.0">
|
||||
<!--
|
||||
WARNING: Do not make changes to this file unless directed to do so by
|
||||
MariaDB service engineers. Incorrect settings can render your system
|
||||
unusable and will require a service call to correct.
|
||||
-->
|
||||
<ExeMgr1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>unassigned</Module>
|
||||
</ExeMgr1>
|
||||
<JobProc>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8602</Port>
|
||||
</JobProc>
|
||||
<ProcMgr>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8603</Port>
|
||||
</ProcMgr>
|
||||
<ProcMgr_Alarm>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8606</Port>
|
||||
</ProcMgr_Alarm>
|
||||
<ProcStatusControl>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8604</Port>
|
||||
</ProcStatusControl>
|
||||
<ProcStatusControlStandby>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcStatusControlStandby>
|
||||
<!-- Disabled
|
||||
<ProcHeartbeatControl>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcHeartbeatControl>
|
||||
-->
|
||||
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
|
||||
<localhost_ProcessMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</localhost_ProcessMonitor>
|
||||
<dm1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</dm1_ProcessMonitor>
|
||||
<um1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</um1_ProcessMonitor>
|
||||
<pm1_ProcessMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm1_ProcessMonitor>
|
||||
<dm1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</dm1_ServerMonitor>
|
||||
<um1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</um1_ServerMonitor>
|
||||
<pm1_ServerMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm1_ServerMonitor>
|
||||
<pm1_WriteEngineServer>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm1_WriteEngineServer>
|
||||
<DDLProc>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8612</Port>
|
||||
</DDLProc>
|
||||
<DMLProc>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8614</Port>
|
||||
</DMLProc>
|
||||
<BatchInsert>
|
||||
<RowsPerBatch>10000</RowsPerBatch>
|
||||
</BatchInsert>
|
||||
<PrimitiveServers>
|
||||
<Count>1</Count>
|
||||
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
|
||||
<ProcessorThreshold>128</ProcessorThreshold>
|
||||
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
|
||||
<DebugLevel>0</DebugLevel>
|
||||
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
|
||||
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
|
||||
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
|
||||
<PrefetchThreshold>1</PrefetchThreshold>
|
||||
<PTTrace>0</PTTrace>
|
||||
<RotatingDestination>n</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
|
||||
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
|
||||
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
|
||||
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
|
||||
<DirectIO>y</DirectIO>
|
||||
<HighPriorityPercentage/>
|
||||
<MediumPriorityPercentage/>
|
||||
<LowPriorityPercentage/>
|
||||
</PrimitiveServers>
|
||||
<PMS1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS1>
|
||||
<PMS2>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS2>
|
||||
<PMS3>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS3>
|
||||
<PMS4>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS4>
|
||||
<PMS5>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS5>
|
||||
<PMS6>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS6>
|
||||
<PMS7>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS7>
|
||||
<PMS8>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS8>
|
||||
<PMS9>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS9>
|
||||
<PMS10>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS10>
|
||||
<PMS11>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS11>
|
||||
<PMS12>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS12>
|
||||
<PMS13>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS13>
|
||||
<PMS14>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS14>
|
||||
<PMS15>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS15>
|
||||
<PMS16>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS16>
|
||||
<PMS17>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS17>
|
||||
<PMS18>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS18>
|
||||
<PMS19>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS19>
|
||||
<PMS20>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS20>
|
||||
<PMS21>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS21>
|
||||
<PMS22>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS22>
|
||||
<PMS23>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS23>
|
||||
<PMS24>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS24>
|
||||
<PMS25>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS25>
|
||||
<PMS26>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS26>
|
||||
<PMS27>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS27>
|
||||
<PMS28>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS28>
|
||||
<PMS29>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS29>
|
||||
<PMS30>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS30>
|
||||
<PMS31>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS31>
|
||||
<PMS32>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS32>
|
||||
<SystemConfig>
|
||||
<SystemName>columnstore-1</SystemName>
|
||||
<ParentOAMModuleName>pm1</ParentOAMModuleName>
|
||||
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
|
||||
<!-- Warning: Do not change this value once database is built -->
|
||||
<DBRootCount>1</DBRootCount>
|
||||
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
|
||||
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
|
||||
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
|
||||
<DBRMTimeOut>15</DBRMTimeOut> <!-- in seconds -->
|
||||
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
|
||||
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
|
||||
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
|
||||
<DataFileLog>OFF</DataFileLog>
|
||||
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
|
||||
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
|
||||
-->
|
||||
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
|
||||
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
|
||||
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
|
||||
files are left behind. -->
|
||||
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
|
||||
</SystemConfig>
|
||||
<SystemModuleConfig>
|
||||
<ModuleType1>dm</ModuleType1>
|
||||
<ModuleDesc1>Director Module</ModuleDesc1>
|
||||
<ModuleCount1>0</ModuleCount1>
|
||||
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
|
||||
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
|
||||
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
|
||||
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
|
||||
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
|
||||
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
|
||||
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
|
||||
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
|
||||
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
|
||||
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
|
||||
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
|
||||
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
|
||||
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
|
||||
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
|
||||
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
|
||||
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
|
||||
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
|
||||
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
|
||||
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
|
||||
<ModuleType2>um</ModuleType2>
|
||||
<ModuleDesc2>User Module</ModuleDesc2>
|
||||
<ModuleCount2>0</ModuleCount2>
|
||||
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
|
||||
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
|
||||
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
|
||||
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
|
||||
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
|
||||
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
|
||||
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
|
||||
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
|
||||
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
|
||||
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
|
||||
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
|
||||
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
|
||||
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
|
||||
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
|
||||
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
|
||||
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
|
||||
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
|
||||
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
|
||||
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
|
||||
<ModuleType3>pm</ModuleType3>
|
||||
<ModuleDesc3>Performance Module</ModuleDesc3>
|
||||
<ModuleCount3>1</ModuleCount3>
|
||||
<ModuleIPAddr1-1-3>127.0.0.1</ModuleIPAddr1-1-3>
|
||||
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
|
||||
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
|
||||
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
|
||||
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
|
||||
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
|
||||
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
|
||||
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
|
||||
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
|
||||
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
|
||||
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
|
||||
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
|
||||
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
|
||||
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
|
||||
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
|
||||
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
|
||||
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
|
||||
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
|
||||
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
|
||||
</SystemModuleConfig>
|
||||
<SystemExtDeviceConfig>
|
||||
<Count>0</Count>
|
||||
<Name1>unassigned</Name1>
|
||||
<IPAddr1>0.0.0.0</IPAddr1>
|
||||
<DisableState1>ENABLED</DisableState1>
|
||||
</SystemExtDeviceConfig>
|
||||
<SessionManager>
|
||||
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
|
||||
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
|
||||
</SessionManager>
|
||||
<VersionBuffer>
|
||||
<!-- VersionBufferFileSize must be a multiple of 8192.
|
||||
One version buffer file will be put on each DB root. -->
|
||||
<VersionBufferFileSize>1GB</VersionBufferFileSize>
|
||||
</VersionBuffer>
|
||||
<OIDManager>
|
||||
<!-- Do not change this file after database built -->
|
||||
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
|
||||
<!-- Do not change this value after database built -->
|
||||
<FirstOID>3000</FirstOID>
|
||||
</OIDManager>
|
||||
<WriteEngine>
|
||||
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
|
||||
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
|
||||
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
|
||||
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
|
||||
<FastDelete>n</FastDelete>
|
||||
</WriteEngine>
|
||||
<DBRM_Controller>
|
||||
<NumWorkers>1</NumWorkers>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8616</Port>
|
||||
</DBRM_Controller>
|
||||
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
|
||||
<DBRM_Worker1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm1</Module>
|
||||
</DBRM_Worker1>
|
||||
<DBRM_Worker2>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker2>
|
||||
<DBRM_Worker3>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker3>
|
||||
<DBRM_Worker4>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker4>
|
||||
<DBRM_Worker5>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker5>
|
||||
<DBRM_Worker6>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker6>
|
||||
<DBRM_Worker7>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker7>
|
||||
<DBRM_Worker8>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker8>
|
||||
<DBRM_Worker9>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker9>
|
||||
<DBRM_Worker10>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker10>
|
||||
<DBBC>
|
||||
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
|
||||
<!-- Alternatively, this can be specified in absolute terms using
|
||||
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
|
||||
<!-- <NumBlocksPct>95</NumBlocksPct> -->
|
||||
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
|
||||
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
|
||||
<IOMTracing>0</IOMTracing>
|
||||
<BRPTracing>0</BRPTracing>
|
||||
<ReportFrequency>65536</ReportFrequency>
|
||||
<MaxOpenFiles>2K</MaxOpenFiles>
|
||||
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
|
||||
<FDCacheTrace>0</FDCacheTrace>
|
||||
<NumBlocksPct>50</NumBlocksPct>
|
||||
</DBBC>
|
||||
<Installation>
|
||||
<ServerTypeInstall>2</ServerTypeInstall>
|
||||
<PMwithUM>n</PMwithUM>
|
||||
<MySQLRep>n</MySQLRep>
|
||||
<DBRootStorageType>internal</DBRootStorageType>
|
||||
<UMStorageType>internal</UMStorageType>
|
||||
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
|
||||
<DataRedundancyNetworkType/>
|
||||
</Installation>
|
||||
<ExtentMap>
|
||||
<!--
|
||||
WARNING: these can only be changed on an empty system. Once any object has been allocated
|
||||
it cannot be changed!. Extent size is 8M rows.
|
||||
-->
|
||||
<FilesPerColumnPartition>4</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
|
||||
<BRM_UID>0x0</BRM_UID>
|
||||
</ExtentMap>
|
||||
<HashJoin>
|
||||
<MaxBuckets>128</MaxBuckets>
|
||||
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
|
||||
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
|
||||
<TotalUmMemory>25%</TotalUmMemory>
|
||||
<CPUniqueLimit>100</CPUniqueLimit>
|
||||
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
|
||||
<TempFileCompression>Y</TempFileCompression>
|
||||
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
|
||||
</HashJoin>
|
||||
<JobList>
|
||||
<FlushInterval>16K</FlushInterval>
|
||||
<FifoSize>16</FifoSize>
|
||||
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
|
||||
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
|
||||
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
|
||||
each extent. The default is 16. MaxOutstandingRequests is the size of
|
||||
the window of work in terms of extents. A value of 20 means there
|
||||
is 20 extents worth of work for the PMs to process at any given time.
|
||||
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
|
||||
as many threads are available across all PMs. -->
|
||||
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
|
||||
<!-- MaxOutstandingRequests is going to default to the num of cores available
|
||||
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
|
||||
but will be lower bounded by 20 -->
|
||||
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
|
||||
<ThreadPoolSize>100</ThreadPoolSize>
|
||||
</JobList>
|
||||
<RowAggregation>
|
||||
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is the number of cores -->
|
||||
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
|
||||
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
|
||||
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
|
||||
</RowAggregation>
|
||||
<CrossEngineSupport>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>3306</Port>
|
||||
<User>root</User>
|
||||
<Password/>
|
||||
<TLSCA/>
|
||||
<TLSClientCert/>
|
||||
<TLSClientKey/>
|
||||
</CrossEngineSupport>
|
||||
<QueryStats>
|
||||
<Enabled>N</Enabled>
|
||||
</QueryStats>
|
||||
<UserPriority>
|
||||
<Enabled>N</Enabled>
|
||||
</UserPriority>
|
||||
<NetworkCompression>
|
||||
<Enabled>Y</Enabled>
|
||||
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
|
||||
</NetworkCompression>
|
||||
<QueryTele>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>0</Port>
|
||||
</QueryTele>
|
||||
<StorageManager>
|
||||
<MaxSockets>30</MaxSockets>
|
||||
<Enabled>N</Enabled>
|
||||
</StorageManager>
|
||||
<DataRedundancyConfig>
|
||||
<DBRoot1PMs/>
|
||||
</DataRedundancyConfig>
|
||||
</Columnstore>
|
535
cmapi/cmapi_server/test/Columnstore_apply_config.xml
Normal file
535
cmapi/cmapi_server/test/Columnstore_apply_config.xml
Normal file
@ -0,0 +1,535 @@
|
||||
<Columnstore Version="V1.0.0">
|
||||
<!--
|
||||
WARNING: Do not make changes to this file unless directed to do so by
|
||||
MariaDB service engineers. Incorrect settings can render your system
|
||||
unusable and will require a service call to correct.
|
||||
-->
|
||||
<ClusterManager>0.0.0.0</ClusterManager>
|
||||
<ConfigRevision>2</ConfigRevision>
|
||||
<NextNodeId>3</NextNodeId>
|
||||
<NextDBRootId>5</NextDBRootId>
|
||||
<ExeMgr1>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>pm1</Module>
|
||||
</ExeMgr1>
|
||||
<JobProc>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8602</Port>
|
||||
</JobProc>
|
||||
<ProcMgr>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8603</Port>
|
||||
</ProcMgr>
|
||||
<ProcMgr_Alarm>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8606</Port>
|
||||
</ProcMgr_Alarm>
|
||||
<ProcStatusControl>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8604</Port>
|
||||
</ProcStatusControl>
|
||||
<ProcStatusControlStandby>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcStatusControlStandby>
|
||||
<!-- Disabled
|
||||
<ProcHeartbeatControl>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcHeartbeatControl>
|
||||
-->
|
||||
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
|
||||
<localhost_ProcessMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</localhost_ProcessMonitor>
|
||||
<dm1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</dm1_ProcessMonitor>
|
||||
<um1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</um1_ProcessMonitor>
|
||||
<pm1_ProcessMonitor>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm1_ProcessMonitor>
|
||||
<dm1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</dm1_ServerMonitor>
|
||||
<um1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</um1_ServerMonitor>
|
||||
<pm1_ServerMonitor>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm1_ServerMonitor>
|
||||
<pm1_WriteEngineServer>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm1_WriteEngineServer>
|
||||
<DDLProc>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8612</Port>
|
||||
</DDLProc>
|
||||
<DMLProc>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8614</Port>
|
||||
</DMLProc>
|
||||
<BatchInsert>
|
||||
<RowsPerBatch>10000</RowsPerBatch>
|
||||
</BatchInsert>
|
||||
<PrimitiveServers>
|
||||
<Count>2</Count>
|
||||
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
|
||||
<ProcessorThreshold>128</ProcessorThreshold>
|
||||
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
|
||||
<DebugLevel>0</DebugLevel>
|
||||
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
|
||||
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
|
||||
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
|
||||
<PrefetchThreshold>1</PrefetchThreshold>
|
||||
<PTTrace>0</PTTrace>
|
||||
<RotatingDestination>n</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
|
||||
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
|
||||
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
|
||||
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
|
||||
<DirectIO>y</DirectIO>
|
||||
</PrimitiveServers>
|
||||
<PMS1>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS1>
|
||||
<PMS2>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS2>
|
||||
<PMS3>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS3>
|
||||
<PMS4>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS4>
|
||||
<PMS5>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS5>
|
||||
<PMS6>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS6>
|
||||
<PMS7>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS7>
|
||||
<PMS8>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS8>
|
||||
<PMS9>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS9>
|
||||
<PMS10>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS10>
|
||||
<PMS11>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS11>
|
||||
<PMS12>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS12>
|
||||
<PMS13>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS13>
|
||||
<PMS14>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS14>
|
||||
<PMS15>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS15>
|
||||
<PMS16>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS16>
|
||||
<PMS17>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS17>
|
||||
<PMS18>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS18>
|
||||
<PMS19>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS19>
|
||||
<PMS20>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS20>
|
||||
<PMS21>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS21>
|
||||
<PMS22>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS22>
|
||||
<PMS23>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS23>
|
||||
<PMS24>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS24>
|
||||
<PMS25>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS25>
|
||||
<PMS26>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS26>
|
||||
<PMS27>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS27>
|
||||
<PMS28>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS28>
|
||||
<PMS29>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS29>
|
||||
<PMS30>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS30>
|
||||
<PMS31>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS31>
|
||||
<PMS32>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS32>
|
||||
<SystemConfig>
|
||||
<SystemLang>C</SystemLang>
|
||||
<SystemName>columnstore-1</SystemName>
|
||||
<ParentOAMModuleName>pm1</ParentOAMModuleName>
|
||||
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
|
||||
<!-- Warning: Do not change this value once database is built -->
|
||||
<DBRootCount>2</DBRootCount>
|
||||
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
|
||||
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
|
||||
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
|
||||
<DBRMTimeOut>20</DBRMTimeOut> <!-- in seconds -->
|
||||
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
|
||||
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
|
||||
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
|
||||
<DataFileLog>OFF</DataFileLog>
|
||||
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
|
||||
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
|
||||
-->
|
||||
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
|
||||
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
|
||||
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
|
||||
files are left behind. -->
|
||||
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
|
||||
<DataFilePlugin/>
|
||||
<DBRoot2>/var/lib/columnstore/data2</DBRoot2>
|
||||
</SystemConfig>
|
||||
<SystemModuleConfig>
|
||||
<ModuleType1>dm</ModuleType1>
|
||||
<ModuleDesc1>Director Module</ModuleDesc1>
|
||||
<ModuleCount1>0</ModuleCount1>
|
||||
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
|
||||
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
|
||||
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
|
||||
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
|
||||
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
|
||||
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
|
||||
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
|
||||
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
|
||||
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
|
||||
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
|
||||
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
|
||||
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
|
||||
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
|
||||
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
|
||||
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
|
||||
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
|
||||
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
|
||||
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
|
||||
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
|
||||
<ModuleType2>um</ModuleType2>
|
||||
<ModuleDesc2>User Module</ModuleDesc2>
|
||||
<ModuleCount2>0</ModuleCount2>
|
||||
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
|
||||
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
|
||||
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
|
||||
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
|
||||
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
|
||||
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
|
||||
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
|
||||
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
|
||||
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
|
||||
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
|
||||
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
|
||||
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
|
||||
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
|
||||
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
|
||||
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
|
||||
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
|
||||
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
|
||||
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
|
||||
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
|
||||
|
||||
<ModuleType3>pm</ModuleType3>
|
||||
<ModuleDesc3>Performance Module</ModuleDesc3>
|
||||
<ModuleCount3>2</ModuleCount3>
|
||||
<ModuleIPAddr1-1-3>10.128.0.23</ModuleIPAddr1-1-3>
|
||||
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
|
||||
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
|
||||
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
|
||||
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
|
||||
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
|
||||
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
|
||||
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
|
||||
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
|
||||
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
|
||||
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
|
||||
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
|
||||
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
|
||||
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
|
||||
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
|
||||
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
|
||||
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
|
||||
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
|
||||
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
|
||||
<ModuleHostName1-2-3>unassigned</ModuleHostName1-2-3>
|
||||
<ModuleIPAddr1-2-3>0.0.0.0</ModuleIPAddr1-2-3>
|
||||
<ModuleHostName1-3-3>unassigned</ModuleHostName1-3-3>
|
||||
<ModuleIPAddr1-3-3>0.0.0.0</ModuleIPAddr1-3-3>
|
||||
<ModuleDisableState2-3>ENABLED</ModuleDisableState2-3>
|
||||
<ModuleHostName2-1-3>pm2</ModuleHostName2-1-3>
|
||||
<ModuleIPAddr2-1-3>172.30.0.144</ModuleIPAddr2-1-3>
|
||||
<ModuleHostName2-2-3>unassigned</ModuleHostName2-2-3>
|
||||
<ModuleIPAddr2-2-3>0.0.0.0</ModuleIPAddr2-2-3>
|
||||
<ModuleDBRootID2-1-3>3</ModuleDBRootID2-1-3>
|
||||
<ModuleDBRootID2-2-3>4</ModuleDBRootID2-2-3>
|
||||
<ModuleDBRootID2-3-3>5</ModuleDBRootID2-3-3>
|
||||
<ModuleDBRootCount2-3>3</ModuleDBRootCount2-3>
|
||||
</SystemModuleConfig>
|
||||
<SystemExtDeviceConfig>
|
||||
<Count>0</Count>
|
||||
<Name1>unassigned</Name1>
|
||||
<IPAddr1>0.0.0.0</IPAddr1>
|
||||
<DisableState1>ENABLED</DisableState1>
|
||||
</SystemExtDeviceConfig>
|
||||
<SessionManager>
|
||||
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
|
||||
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
|
||||
</SessionManager>
|
||||
<VersionBuffer>
|
||||
<!-- VersionBufferFileSize must be a multiple of 8192.
|
||||
One version buffer file will be put on each DB root. -->
|
||||
<VersionBufferFileSize>1GB</VersionBufferFileSize>
|
||||
</VersionBuffer>
|
||||
<OIDManager>
|
||||
<!-- Do not change this file after database built -->
|
||||
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
|
||||
<!-- Do not change this value after database built -->
|
||||
<FirstOID>3000</FirstOID>
|
||||
</OIDManager>
|
||||
<WriteEngine>
|
||||
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
|
||||
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
|
||||
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
|
||||
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
|
||||
<FastDelete>n</FastDelete>
|
||||
</WriteEngine>
|
||||
<DBRM_Controller>
|
||||
<NumWorkers>2</NumWorkers>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8616</Port>
|
||||
</DBRM_Controller>
|
||||
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
|
||||
<DBRM_Worker1>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm1</Module>
|
||||
</DBRM_Worker1>
|
||||
<DBRM_Worker2>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm2</Module>
|
||||
</DBRM_Worker2>
|
||||
<DBRM_Worker3>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker3>
|
||||
<DBRM_Worker4>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker4>
|
||||
<DBRM_Worker5>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker5>
|
||||
<DBRM_Worker6>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker6>
|
||||
<DBRM_Worker7>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker7>
|
||||
<DBRM_Worker8>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker8>
|
||||
<DBRM_Worker9>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker9>
|
||||
<DBRM_Worker10>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker10>
|
||||
<DBBC>
|
||||
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
|
||||
<!-- Alternatively, this can be specified in absolute terms using
|
||||
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
|
||||
<!-- <NumBlocksPct>70</NumBlocksPct> -->
|
||||
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
|
||||
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
|
||||
<IOMTracing>0</IOMTracing>
|
||||
<BRPTracing>0</BRPTracing>
|
||||
<ReportFrequency>65536</ReportFrequency>
|
||||
<MaxOpenFiles>2K</MaxOpenFiles>
|
||||
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
|
||||
<FDCacheTrace>0</FDCacheTrace>
|
||||
<NumBlocksPct>50</NumBlocksPct>
|
||||
</DBBC>
|
||||
<Installation>
|
||||
<ServerTypeInstall>2</ServerTypeInstall>
|
||||
<PMwithUM>n</PMwithUM>
|
||||
<MySQLRep>y</MySQLRep>
|
||||
<DBRootStorageType>internal</DBRootStorageType>
|
||||
<UMStorageType>internal</UMStorageType>
|
||||
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
|
||||
</Installation>
|
||||
<ExtentMap>
|
||||
<!--
|
||||
WARNING: these can only be changed on an empty system. Once any object has been allocated
|
||||
it cannot be changed!. Extent size is 8M rows.
|
||||
-->
|
||||
<FilesPerColumnPartition>4</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
|
||||
<BRM_UID>0x0</BRM_UID>
|
||||
</ExtentMap>
|
||||
<HashJoin>
|
||||
<MaxBuckets>128</MaxBuckets>
|
||||
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
|
||||
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
|
||||
<TotalUmMemory>25%</TotalUmMemory>
|
||||
<CPUniqueLimit>100</CPUniqueLimit>
|
||||
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
|
||||
<TempFileCompression>Y</TempFileCompression>
|
||||
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
|
||||
</HashJoin>
|
||||
<JobList>
|
||||
<FlushInterval>16K</FlushInterval>
|
||||
<FifoSize>16</FifoSize>
|
||||
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
|
||||
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
|
||||
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
|
||||
each extent. The default is 16. MaxOutstandingRequests is the size of
|
||||
the window of work in terms of extents. A value of 20 means there
|
||||
is 20 extents worth of work for the PMs to process at any given time.
|
||||
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
|
||||
as many threads are available across all PMs. -->
|
||||
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
|
||||
<!-- MaxOutstandingRequests is going to default to the num of cores available
|
||||
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
|
||||
but will be lower bounded by 20 -->
|
||||
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
|
||||
<ThreadPoolSize>100</ThreadPoolSize>
|
||||
</JobList>
|
||||
<RowAggregation>
|
||||
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is the number of cores -->
|
||||
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
|
||||
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
|
||||
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
|
||||
</RowAggregation>
|
||||
<CrossEngineSupport>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>3306</Port>
|
||||
<User>root</User>
|
||||
<Password/>
|
||||
<TLSCA/>
|
||||
<TLSClientCert/>
|
||||
<TLSClientKey/>
|
||||
</CrossEngineSupport>
|
||||
<QueryStats>
|
||||
<Enabled>N</Enabled>
|
||||
</QueryStats>
|
||||
<UserPriority>
|
||||
<Enabled>N</Enabled>
|
||||
</UserPriority>
|
||||
<NetworkCompression>
|
||||
<Enabled>Y</Enabled>
|
||||
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
|
||||
</NetworkCompression>
|
||||
<QueryTele>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>0</Port>
|
||||
</QueryTele>
|
||||
<StorageManager>
|
||||
<MaxSockets>30</MaxSockets>
|
||||
<Enabled>N</Enabled>
|
||||
</StorageManager>
|
||||
<ProcHeartbeatControl>
|
||||
<IPAddr>10.128.0.23</IPAddr>
|
||||
</ProcHeartbeatControl>
|
||||
<pm2_ProcessMonitor>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm2_ProcessMonitor>
|
||||
<pm2_ServerMonitor>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm2_ServerMonitor>
|
||||
<pm2_WriteEngineServer>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm2_WriteEngineServer>
|
||||
<ExeMgr2>
|
||||
<IPAddr>172.30.0.144</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>pm2</Module>
|
||||
</ExeMgr2>
|
||||
</Columnstore>
|
0
cmapi/cmapi_server/test/__init__.py
Normal file
0
cmapi/cmapi_server/test/__init__.py
Normal file
50
cmapi/cmapi_server/test/config_apply_example.py
Normal file
50
cmapi/cmapi_server/test/config_apply_example.py
Normal file
@ -0,0 +1,50 @@
|
||||
import unittest
|
||||
import requests
|
||||
import configparser
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from cmapi_server.controllers.dispatcher import _version
|
||||
|
||||
config_filename = './cmapi_server/cmapi_server.conf'
|
||||
|
||||
url = f"https://localhost:8640/cmapi/{_version}/node/config"
|
||||
begin_url = f"https://localhost:8640/cmapi/{_version}/node/begin"
|
||||
config_path = './cmapi_server/test/Columnstore_apply_config.xml'
|
||||
|
||||
# create tmp dir
|
||||
tmp_prefix = '/tmp/mcs_config_test'
|
||||
tmp_path = Path(tmp_prefix)
|
||||
tmp_path.mkdir(parents = True, exist_ok = True)
|
||||
copyfile(config_path_old, tmp_prefix + '/Columnstore.xml')
|
||||
|
||||
|
||||
def get_current_key():
|
||||
app_config = configparser.ConfigParser()
|
||||
try:
|
||||
with open(config_filename, 'r') as _config_file:
|
||||
app_config.read_file(_config_file)
|
||||
except FileNotFoundError:
|
||||
return ''
|
||||
if 'Authentication' not in app_config.sections():
|
||||
return ''
|
||||
return app_config['Authentication'].get('x-api-key', '')
|
||||
|
||||
headers = {'x-api-key': get_current_key()}
|
||||
body = {'id': 42, 'timeout': 120}
|
||||
r = requests.put(begin_url, verify=False, headers=headers, json=body)
|
||||
|
||||
config_file = Path(config_path)
|
||||
config = config_file.read_text()
|
||||
|
||||
body = {
|
||||
'revision': 42,
|
||||
'manager': '1.1.1.1',
|
||||
'timeout': 0,
|
||||
'config': config,
|
||||
}
|
||||
|
||||
#print(config)
|
||||
|
||||
#r = requests.put(url, verify=False, headers=headers, json=body)
|
||||
|
35
cmapi/cmapi_server/test/test_cej.py
Normal file
35
cmapi/cmapi_server/test/test_cej.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""Tests for all the CEJ (Cross Engine Join) related stuff."""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import unittest
|
||||
from shutil import which
|
||||
|
||||
from cmapi_server.handlers.cej import CEJPasswordHandler
|
||||
from cmapi_server.constants import MCS_SECRETS_FILE_PATH
|
||||
|
||||
|
||||
class SecretsTestCase(unittest.TestCase):
|
||||
"""Test case for checking .secrets file related stuff."""
|
||||
|
||||
@unittest.skipIf(which('cskeys') is None,
|
||||
'This MCS version doesn\'t provide "cskeys" tool.')
|
||||
def test_cspasswd_decrypt_algorithm(self) -> None:
|
||||
"""Test to check decrypt algorithm.
|
||||
|
||||
Check that CEJ password decrypting algorithm is the same between
|
||||
"cspasswd" tool in MCS and in CMAPI.
|
||||
"""
|
||||
|
||||
test_passwd = 'columstore is the best'
|
||||
# create .secrets file using cskeys util
|
||||
ret = subprocess.run(
|
||||
'cskeys', shell=True, stdout=subprocess.PIPE, check=True
|
||||
)
|
||||
encrypted_passwd = subprocess.check_output(
|
||||
['cspasswd', test_passwd]
|
||||
).decode(sys.stdout.encoding).strip()
|
||||
self.assertEqual(
|
||||
test_passwd, CEJPasswordHandler.decrypt_password(encrypted_passwd)
|
||||
)
|
||||
os.remove(MCS_SECRETS_FILE_PATH)
|
236
cmapi/cmapi_server/test/test_cluster.py
Normal file
236
cmapi/cmapi_server/test/test_cluster.py
Normal file
@ -0,0 +1,236 @@
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
from shutil import copyfile
|
||||
|
||||
import requests
|
||||
|
||||
from cmapi_server.controllers.dispatcher import _version
|
||||
from cmapi_server.managers.process import MCSProcessManager
|
||||
from cmapi_server.test.unittest_global import (
|
||||
BaseServerTestCase, MCS_CONFIG_FILEPATH, COPY_MCS_CONFIG_FILEPATH,
|
||||
TEST_MCS_CONFIG_FILEPATH,
|
||||
)
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
requests.urllib3.disable_warnings()
|
||||
|
||||
|
||||
class BaseClusterTestCase(BaseServerTestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
copyfile(MCS_CONFIG_FILEPATH, COPY_MCS_CONFIG_FILEPATH)
|
||||
return super().setUpClass()
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
copyfile(COPY_MCS_CONFIG_FILEPATH, MCS_CONFIG_FILEPATH)
|
||||
os.remove(os.path.abspath(COPY_MCS_CONFIG_FILEPATH))
|
||||
MCSProcessManager.stop_node(is_primary=True)
|
||||
MCSProcessManager.start_node(is_primary=True)
|
||||
return super().tearDownClass()
|
||||
|
||||
def setUp(self) -> None:
|
||||
copyfile(TEST_MCS_CONFIG_FILEPATH, MCS_CONFIG_FILEPATH)
|
||||
MCSProcessManager.stop_node(is_primary=True)
|
||||
MCSProcessManager.start_node(is_primary=True)
|
||||
return super().setUp()
|
||||
|
||||
|
||||
class ClusterStartTestCase(BaseClusterTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/cluster/start'
|
||||
|
||||
def test_endpoint_with_no_api_key(self):
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
|
||||
json={}
|
||||
)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
|
||||
def test_endpoint_with_no_nodes_in_cluster(self):
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS,
|
||||
json={}
|
||||
)
|
||||
error = r.json()['error']
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(error, 'There are no nodes in the cluster.')
|
||||
|
||||
def test_start_after_adding_a_node(self):
|
||||
payload = {'node': socket.gethostname()}
|
||||
resp = requests.post(
|
||||
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
|
||||
json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
payload = {'node': None}
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
# test_columnstore_started
|
||||
controllernode = subprocess.check_output(['pgrep', 'controllernode'])
|
||||
self.assertIsNotNone(controllernode)
|
||||
|
||||
|
||||
class ClusterShutdownTestCase(BaseClusterTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/cluster/shutdown'
|
||||
|
||||
def test_endpoint_with_no_api_key(self):
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
|
||||
json={}
|
||||
)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
|
||||
def test_endpoint_with_no_nodes_in_cluster(self):
|
||||
resp = requests.put(self.URL, verify=False, headers=self.HEADERS,
|
||||
json={}
|
||||
)
|
||||
error = resp.json()['error']
|
||||
self.assertEqual(resp.status_code, 422)
|
||||
self.assertEqual(error, 'There are no nodes in the cluster.')
|
||||
|
||||
def test_add_node_and_shutdown(self):
|
||||
payload = {'node': socket.gethostname()}
|
||||
resp = requests.post(
|
||||
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
|
||||
json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
# note: POST node starts up node
|
||||
try:
|
||||
controllernode = subprocess.check_output(
|
||||
['pgrep', 'controllernode']
|
||||
)
|
||||
except Exception as e:
|
||||
controllernode = None
|
||||
self.assertIsNotNone(controllernode)
|
||||
|
||||
payload = {'timeout': 60}
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS,
|
||||
json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
# Check columnstore stopped
|
||||
try:
|
||||
controllernode = subprocess.check_output(
|
||||
['pgrep', 'controllernode']
|
||||
)
|
||||
except Exception as e:
|
||||
controllernode = None
|
||||
self.assertIsNone(controllernode)
|
||||
|
||||
|
||||
class ClusterModesetTestCase(BaseClusterTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/cluster/mode-set'
|
||||
|
||||
def test_endpoint_with_no_api_key(self):
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
|
||||
json={}
|
||||
)
|
||||
self.assertEqual(resp.status_code, 401)
|
||||
|
||||
def test_endpoint_with_no_nodes_in_cluster(self):
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS,
|
||||
json={}
|
||||
)
|
||||
error = resp.json()['error']
|
||||
self.assertEqual(resp.status_code, 422)
|
||||
self.assertEqual(error, 'No master found in the cluster.')
|
||||
|
||||
def test_add_node_and_set_readonly(self):
|
||||
payload = {'node': socket.gethostname()}
|
||||
resp = requests.post(
|
||||
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
|
||||
json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
payload = {'mode': 'readonly'}
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
# return readwrite mode back
|
||||
payload = {'mode': 'readwrite'}
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
class ClusterAddNodeTestCase(BaseClusterTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/cluster/node'
|
||||
|
||||
def test_endpoint_with_no_apikey(self):
|
||||
resp = requests.post(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
|
||||
json={}
|
||||
)
|
||||
self.assertEqual(resp.status_code, 401)
|
||||
|
||||
def test_endpoint_with_missing_node_parameter(self):
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS,
|
||||
json={}
|
||||
)
|
||||
error = resp.json()['error']
|
||||
self.assertEqual(resp.status_code, 422)
|
||||
self.assertEqual(error, 'missing node argument')
|
||||
|
||||
def test_endpoint(self):
|
||||
payload = {'node': socket.gethostname()}
|
||||
resp = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS,
|
||||
json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
# Check Columntore started
|
||||
controllernode = subprocess.check_output(
|
||||
['pgrep', 'controllernode'])
|
||||
self.assertIsNotNone(controllernode)
|
||||
|
||||
|
||||
class ClusterRemoveNodeTestCase(BaseClusterTestCase):
|
||||
URL = ClusterAddNodeTestCase.URL
|
||||
|
||||
def test_endpoint_with_no_apikey(self):
|
||||
resp = requests.delete(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS,
|
||||
json={}
|
||||
)
|
||||
self.assertEqual(resp.status_code, 401)
|
||||
|
||||
def test_endpoint_with_missing_node_parameter(self):
|
||||
resp = requests.delete(
|
||||
self.URL, verify=False, headers=self.HEADERS,
|
||||
json={}
|
||||
)
|
||||
error = resp.json()['error']
|
||||
self.assertEqual(resp.status_code, 422)
|
||||
self.assertEqual(error, 'missing node argument')
|
||||
|
||||
def test_add_node_and_remove(self):
|
||||
payload = {'node': socket.gethostname()}
|
||||
resp = requests.post(
|
||||
ClusterAddNodeTestCase.URL, verify=False, headers=self.HEADERS,
|
||||
json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
resp = requests.delete(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=payload
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
217
cmapi/cmapi_server/test/test_em_endpoints.py
Normal file
217
cmapi/cmapi_server/test/test_em_endpoints.py
Normal file
@ -0,0 +1,217 @@
|
||||
import configparser
|
||||
import subprocess
|
||||
import unittest
|
||||
from contextlib import contextmanager
|
||||
from os import path, remove
|
||||
from pathlib import Path
|
||||
from shutil import copyfile
|
||||
|
||||
import cherrypy
|
||||
import requests
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
|
||||
from cmapi_server.constants import (
|
||||
EM_PATH_SUFFIX, MCS_EM_PATH, MCS_BRM_CURRENT_PATH, S3_BRM_CURRENT_PATH
|
||||
)
|
||||
from cmapi_server.controllers.dispatcher import (
|
||||
dispatcher, jsonify_error,_version
|
||||
)
|
||||
from cmapi_server.test.unittest_global import (
|
||||
create_self_signed_certificate, cert_filename, cmapi_config_filename,
|
||||
tmp_cmapi_config_filename
|
||||
)
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
@contextmanager
|
||||
def run_server():
|
||||
if not path.exists(cert_filename):
|
||||
create_self_signed_certificate()
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
|
||||
yield
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
|
||||
|
||||
def get_current_key():
|
||||
app_config = configparser.ConfigParser()
|
||||
try:
|
||||
with open(cmapi_config_filename, 'r') as _config_file:
|
||||
app_config.read_file(_config_file)
|
||||
except FileNotFoundError:
|
||||
return ''
|
||||
|
||||
if 'Authentication' not in app_config.sections():
|
||||
return ''
|
||||
return app_config['Authentication'].get('x-api-key', '')
|
||||
|
||||
class TestEMEndpoints(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
if not path.exists(tmp_cmapi_config_filename):
|
||||
f = open(tmp_cmapi_config_filename, 'x')
|
||||
f.close()
|
||||
copyfile(cmapi_config_filename, tmp_cmapi_config_filename)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if path.exists(tmp_cmapi_config_filename):
|
||||
copyfile(tmp_cmapi_config_filename, cmapi_config_filename)
|
||||
remove(tmp_cmapi_config_filename)
|
||||
|
||||
def get_examplar_bytes(self, element: str):
|
||||
node_config = NodeConfig()
|
||||
if node_config.s3_enabled():
|
||||
ret = subprocess.run(
|
||||
["smcat", S3_BRM_CURRENT_PATH], stdout=subprocess.PIPE
|
||||
)
|
||||
element_current_suffix = ret.stdout.decode("utf-8").rstrip()
|
||||
element_current_filename = f'{EM_PATH_SUFFIX}/{element_current_suffix}_{element}'
|
||||
ret = subprocess.run(
|
||||
["smcat", element_current_filename], stdout=subprocess.PIPE
|
||||
)
|
||||
result = ret.stdout
|
||||
else:
|
||||
element_current_name = Path(MCS_BRM_CURRENT_PATH)
|
||||
element_current_filename = element_current_name.read_text().rstrip()
|
||||
element_current_file = Path(
|
||||
f'{MCS_EM_PATH}/{element_current_filename}_{element}'
|
||||
)
|
||||
result = element_current_file.read_bytes()
|
||||
return result
|
||||
|
||||
def test_em(self):
|
||||
app = cherrypy.tree.mount(root=None,
|
||||
config=cmapi_config_filename)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': cmapi_config_filename,
|
||||
},
|
||||
})
|
||||
cherrypy.config.update(cmapi_config_filename)
|
||||
|
||||
api_key = get_current_key()
|
||||
try:
|
||||
with run_server():
|
||||
url = f"https://localhost:8640/cmapi/{_version}/node/meta/em"
|
||||
# Auth failure
|
||||
headers = {'x-api-key': None}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
# OK
|
||||
headers = {'x-api-key': api_key}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
extent_map = self.get_examplar_bytes('em')
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertEqual(r.content, extent_map)
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
|
||||
def test_journal(self):
|
||||
app = cherrypy.tree.mount(root=None,
|
||||
config=cmapi_config_filename)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': cmapi_config_filename,
|
||||
},
|
||||
})
|
||||
cherrypy.config.update(cmapi_config_filename)
|
||||
|
||||
api_key = get_current_key()
|
||||
try:
|
||||
with run_server():
|
||||
url = f"https://localhost:8640/cmapi/{_version}/node/meta/journal"
|
||||
# Auth failure
|
||||
headers = {'x-api-key': None}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
# OK
|
||||
headers = {'x-api-key': api_key}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
journal = self.get_examplar_bytes('journal')
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertEqual(r.content, journal)
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
|
||||
def test_vss(self):
|
||||
app = cherrypy.tree.mount(root=None,
|
||||
config=cmapi_config_filename)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': cmapi_config_filename,
|
||||
},
|
||||
})
|
||||
cherrypy.config.update(cmapi_config_filename)
|
||||
|
||||
api_key = get_current_key()
|
||||
try:
|
||||
with run_server():
|
||||
url = f"https://localhost:8640/cmapi/{_version}/node/meta/vss"
|
||||
# Auth failure
|
||||
headers = {'x-api-key': None}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
# OK
|
||||
headers = {'x-api-key': api_key}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
vss = self.get_examplar_bytes('vss')
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertEqual(r.content, vss)
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
|
||||
def test_vbbm(self):
|
||||
app = cherrypy.tree.mount(root=None,
|
||||
config=cmapi_config_filename)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': cmapi_config_filename,
|
||||
},
|
||||
})
|
||||
cherrypy.config.update(cmapi_config_filename)
|
||||
|
||||
api_key = get_current_key()
|
||||
try:
|
||||
with run_server():
|
||||
url = f"https://localhost:8640/cmapi/{_version}/node/meta/vbbm"
|
||||
# Auth failure
|
||||
headers = {'x-api-key': None}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
# OK
|
||||
headers = {'x-api-key': api_key}
|
||||
r = requests.get(url, verify=False, headers=headers)
|
||||
vbbm = self.get_examplar_bytes('vbbm')
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertEqual(r.content, vbbm)
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
124
cmapi/cmapi_server/test/test_failover_agent.py
Normal file
124
cmapi/cmapi_server/test/test_failover_agent.py
Normal file
@ -0,0 +1,124 @@
|
||||
import logging
|
||||
import socket
|
||||
|
||||
from cmapi_server.failover_agent import FailoverAgent
|
||||
from cmapi_server.node_manipulation import add_node, remove_node
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
from cmapi_server.test.unittest_global import (
|
||||
tmp_mcs_config_filename, BaseNodeManipTestCase
|
||||
)
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
|
||||
|
||||
class TestFailoverAgent(BaseNodeManipTestCase):
|
||||
|
||||
def test_activateNodes(self):
|
||||
self.tmp_files = ('./activate0.xml', './activate1.xml')
|
||||
hostaddr = socket.gethostbyname(socket.gethostname())
|
||||
fa = FailoverAgent()
|
||||
fa.activateNodes(
|
||||
[self.NEW_NODE_NAME], tmp_mcs_config_filename, self.tmp_files[0],
|
||||
test_mode=True
|
||||
)
|
||||
add_node(
|
||||
hostaddr, self.tmp_files[0], self.tmp_files[1]
|
||||
)
|
||||
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[1])
|
||||
pm_count = int(root.find('./PrimitiveServers/Count').text)
|
||||
self.assertEqual(pm_count, 2)
|
||||
node = root.find('./PMS1/IPAddr')
|
||||
self.assertEqual(node.text, self.NEW_NODE_NAME)
|
||||
node = root.find('./pm1_WriteEngineServer/IPAddr')
|
||||
self.assertEqual(node.text, self.NEW_NODE_NAME)
|
||||
node = root.find('./PMS2/IPAddr')
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
node = root.find('./pm2_WriteEngineServer/IPAddr')
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
remove_node(self.NEW_NODE_NAME, self.tmp_files[1], self.tmp_files[1])
|
||||
|
||||
def test_deactivateNodes(self):
|
||||
self.tmp_files = (
|
||||
'./deactivate0.xml','./deactivate1.xml', './deactivate2.xml'
|
||||
)
|
||||
fa = FailoverAgent()
|
||||
hostname = socket.gethostname()
|
||||
hostaddr = socket.gethostbyname(hostname)
|
||||
add_node(
|
||||
hostaddr, tmp_mcs_config_filename, self.tmp_files[0]
|
||||
)
|
||||
fa.activateNodes(
|
||||
[self.NEW_NODE_NAME], self.tmp_files[0], self.tmp_files[1],
|
||||
test_mode=True
|
||||
)
|
||||
fa.deactivateNodes(
|
||||
[self.NEW_NODE_NAME], self.tmp_files[1], self.tmp_files[2],
|
||||
test_mode=True
|
||||
)
|
||||
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[2])
|
||||
pm_count = int(root.find('./PrimitiveServers/Count').text)
|
||||
self.assertEqual(pm_count, 1)
|
||||
node = root.find('./PMS1/IPAddr')
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
# TODO: Fix node_manipulation add_node logic and _replace_localhost
|
||||
# node = root.find('./PMS2/IPAddr')
|
||||
# self.assertEqual(node, None)
|
||||
node = root.find('./pm1_WriteEngineServer/IPAddr')
|
||||
self.assertTrue(node.text, hostaddr)
|
||||
node = root.find('./pm2_WriteEngineServer/IPAddr')
|
||||
self.assertTrue(node is None)
|
||||
#node = root.find("./ConfigRevision")
|
||||
#self.assertEqual(node.text, "3")
|
||||
|
||||
# make sure there are no traces of mysql.com,
|
||||
# or an ip addr that isn't localhost or 127.0.0.1
|
||||
all_nodes = root.findall('./')
|
||||
for node in all_nodes:
|
||||
self.assertFalse(node.text == self.NEW_NODE_NAME)
|
||||
if node.tag in ['IPAddr', 'Node']:
|
||||
self.assertTrue(node.text in [hostname, hostaddr])
|
||||
|
||||
def test_designatePrimaryNode(self):
|
||||
self.tmp_files = (
|
||||
'./primary-node0.xml', './primary-node1.xml', './primary-node2.xml'
|
||||
)
|
||||
fa = FailoverAgent()
|
||||
hostaddr = socket.gethostbyname(socket.gethostname())
|
||||
fa.activateNodes(
|
||||
[self.NEW_NODE_NAME], tmp_mcs_config_filename, self.tmp_files[0],
|
||||
test_mode=True
|
||||
)
|
||||
add_node(
|
||||
hostaddr, self.tmp_files[0], self.tmp_files[1]
|
||||
)
|
||||
fa.movePrimaryNode(
|
||||
'placeholder', self.tmp_files[1], self.tmp_files[2], test_mode=True
|
||||
)
|
||||
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[2])
|
||||
pm_count = int(root.find('./PrimitiveServers/Count').text)
|
||||
self.assertEqual(pm_count, 2)
|
||||
node = root.find('./PMS1/IPAddr')
|
||||
self.assertEqual(node.text, self.NEW_NODE_NAME)
|
||||
node = root.find('./PMS2/IPAddr')
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
node = root.find('./pm1_WriteEngineServer/IPAddr')
|
||||
self.assertEqual(node.text, self.NEW_NODE_NAME)
|
||||
node = root.find('./pm2_WriteEngineServer/IPAddr')
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
|
||||
for tag in ['ExeMgr1', 'DMLProc', 'DDLProc']:
|
||||
node = root.find(f'./{tag}/IPAddr')
|
||||
self.assertEqual(node.text, self.NEW_NODE_NAME)
|
||||
|
||||
self.assertEqual(self.NEW_NODE_NAME, root.find('./PrimaryNode').text)
|
||||
|
||||
def test_enterStandbyMode(self):
|
||||
fa = FailoverAgent()
|
||||
fa.enterStandbyMode(test_mode=True)
|
117
cmapi/cmapi_server/test/test_mcs_process_operations.py
Normal file
117
cmapi/cmapi_server/test/test_mcs_process_operations.py
Normal file
@ -0,0 +1,117 @@
|
||||
import os
|
||||
|
||||
from cmapi_server.managers.process import MCSProcessManager
|
||||
from cmapi_server.process_dispatchers.systemd import SystemdDispatcher
|
||||
from cmapi_server.test.unittest_global import (
|
||||
DDL_SERVICE, CONTROLLERNODE_SERVICE, SYSTEMCTL,
|
||||
BaseProcessDispatcherCase
|
||||
)
|
||||
|
||||
class SystemdTest(BaseProcessDispatcherCase):
|
||||
|
||||
def test_systemd_status_start(self):
|
||||
os.system(f'{SYSTEMCTL} stop {DDL_SERVICE}')
|
||||
self.assertFalse(
|
||||
SystemdDispatcher.is_service_running(DDL_SERVICE)
|
||||
)
|
||||
self.assertTrue(SystemdDispatcher.start(DDL_SERVICE))
|
||||
|
||||
os.system(f'{SYSTEMCTL} stop {CONTROLLERNODE_SERVICE}')
|
||||
self.assertFalse(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
result = SystemdDispatcher.start(CONTROLLERNODE_SERVICE)
|
||||
self.assertTrue(result)
|
||||
self.assertTrue(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
|
||||
def test_systemd_status_stop(self):
|
||||
os.system(f'{SYSTEMCTL} start {CONTROLLERNODE_SERVICE}')
|
||||
self.assertTrue(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
self.assertTrue(SystemdDispatcher.stop(CONTROLLERNODE_SERVICE))
|
||||
self.assertFalse(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
|
||||
def test_systemd_status_restart(self):
|
||||
os.system(f'{SYSTEMCTL} start {CONTROLLERNODE_SERVICE}')
|
||||
self.assertTrue(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
self.assertTrue(SystemdDispatcher.restart(CONTROLLERNODE_SERVICE))
|
||||
self.assertTrue(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
|
||||
os.system(f'{SYSTEMCTL} stop {CONTROLLERNODE_SERVICE}')
|
||||
self.assertFalse(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
self.assertTrue(SystemdDispatcher.restart(CONTROLLERNODE_SERVICE))
|
||||
self.assertTrue(
|
||||
SystemdDispatcher.is_service_running(CONTROLLERNODE_SERVICE)
|
||||
)
|
||||
|
||||
|
||||
class MCSProcessManagerTest(BaseProcessDispatcherCase):
|
||||
|
||||
def get_systemd_serv_name(self, service_name):
|
||||
if service_name == 'mcs-workernode':
|
||||
return f'{service_name}@1'
|
||||
return service_name
|
||||
|
||||
def test_mcs_process_manager(self):
|
||||
MCSProcessManager.detect('systemd', '')
|
||||
for prog in MCSProcessManager._get_sorted_progs(True, True).values():
|
||||
serv_name = self.get_systemd_serv_name(prog.service_name)
|
||||
os.system(f'{SYSTEMCTL} stop {serv_name}')
|
||||
self.assertIsNone(MCSProcessManager.start_node(True))
|
||||
|
||||
for prog in MCSProcessManager.mcs_progs.values():
|
||||
serv_name = self.get_systemd_serv_name(prog.service_name)
|
||||
if serv_name == 'mcs-storagemanager':
|
||||
continue
|
||||
self.assertTrue(
|
||||
MCSProcessManager.process_dispatcher.is_service_running(
|
||||
serv_name
|
||||
)
|
||||
)
|
||||
|
||||
self.assertIsNone(MCSProcessManager.stop_node(is_primary=True))
|
||||
for prog in MCSProcessManager.mcs_progs.values():
|
||||
serv_name = self.get_systemd_serv_name(prog.service_name)
|
||||
self.assertFalse(
|
||||
MCSProcessManager.process_dispatcher.is_service_running(
|
||||
serv_name
|
||||
)
|
||||
)
|
||||
self.assertEqual(len(MCSProcessManager.get_running_mcs_procs()), 0)
|
||||
self.assertTrue(
|
||||
MCSProcessManager.is_node_processes_ok(
|
||||
is_primary=True, node_stopped=True
|
||||
)
|
||||
)
|
||||
|
||||
for prog in MCSProcessManager._get_sorted_progs(True).values():
|
||||
serv_name = self.get_systemd_serv_name(prog.service_name)
|
||||
os.system(f'{SYSTEMCTL} start {serv_name}')
|
||||
|
||||
for prog in MCSProcessManager.mcs_progs.values():
|
||||
serv_name = self.get_systemd_serv_name(prog.service_name)
|
||||
self.assertTrue(
|
||||
MCSProcessManager.process_dispatcher.is_service_running(
|
||||
serv_name
|
||||
)
|
||||
)
|
||||
self.assertEqual(
|
||||
len(MCSProcessManager.get_running_mcs_procs()),
|
||||
len(MCSProcessManager.mcs_progs.keys())
|
||||
)
|
||||
self.assertTrue(
|
||||
MCSProcessManager.is_node_processes_ok(
|
||||
is_primary=True, node_stopped=False
|
||||
)
|
||||
)
|
211
cmapi/cmapi_server/test/test_node_manip.py
Normal file
211
cmapi/cmapi_server/test/test_node_manip.py
Normal file
@ -0,0 +1,211 @@
|
||||
import logging
|
||||
import socket
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from cmapi_server import node_manipulation
|
||||
from cmapi_server.constants import MCS_DATA_PATH
|
||||
from cmapi_server.test.unittest_global import (
|
||||
tmp_mcs_config_filename, BaseNodeManipTestCase
|
||||
)
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
|
||||
|
||||
class NodeManipTester(BaseNodeManipTestCase):
|
||||
|
||||
def test_add_remove_node(self):
|
||||
self.tmp_files = (
|
||||
'./test-output0.xml','./test-output1.xml','./test-output2.xml'
|
||||
)
|
||||
hostaddr = socket.gethostbyname(socket.gethostname())
|
||||
node_manipulation.add_node(
|
||||
self.NEW_NODE_NAME, tmp_mcs_config_filename, self.tmp_files[0]
|
||||
)
|
||||
node_manipulation.add_node(
|
||||
hostaddr, self.tmp_files[0], self.tmp_files[1]
|
||||
)
|
||||
|
||||
# get a NodeConfig, read test.xml
|
||||
# look for some of the expected changes.
|
||||
# Total verification will take too long to code up right now.
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[1])
|
||||
pms_node_ipaddr = root.find('./PMS1/IPAddr')
|
||||
self.assertEqual(pms_node_ipaddr.text, self.NEW_NODE_NAME)
|
||||
pms_node_ipaddr = root.find('./PMS2/IPAddr')
|
||||
self.assertEqual(pms_node_ipaddr.text, hostaddr)
|
||||
node = root.find("./ExeMgr2/IPAddr")
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
|
||||
node_manipulation.remove_node(
|
||||
self.NEW_NODE_NAME, self.tmp_files[1], self.tmp_files[2],
|
||||
test_mode=True
|
||||
)
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[2])
|
||||
node = root.find('./PMS1/IPAddr')
|
||||
self.assertEqual(node.text, hostaddr)
|
||||
# TODO: Fix node_manipulation add_node logic and _replace_localhost
|
||||
# node = root.find('./PMS2/IPAddr')
|
||||
# self.assertEqual(node, None)
|
||||
|
||||
def test_add_dbroots_nodes_rebalance(self):
|
||||
self.tmp_files = (
|
||||
'./extra-dbroots-0.xml', './extra-dbroots-1.xml',
|
||||
'./extra-dbroots-2.xml'
|
||||
)
|
||||
# add 2 dbroots, let's see what happen
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(tmp_mcs_config_filename)
|
||||
|
||||
sysconf_node = root.find('./SystemConfig')
|
||||
dbroot_count_node = sysconf_node.find('./DBRootCount')
|
||||
dbroot_count = int(dbroot_count_node.text) + 2
|
||||
dbroot_count_node.text = str(dbroot_count)
|
||||
etree.SubElement(sysconf_node, 'DBRoot2').text = '/dummy_path/data2'
|
||||
etree.SubElement(sysconf_node, 'DBRoot10').text = '/dummy_path/data10'
|
||||
nc.write_config(root, self.tmp_files[0])
|
||||
|
||||
node_manipulation.add_node(
|
||||
self.NEW_NODE_NAME, self.tmp_files[0], self.tmp_files[1]
|
||||
)
|
||||
|
||||
# get a NodeConfig, read test.xml
|
||||
# look for some of the expected changes.
|
||||
# Total verification will take too long to code up right now.
|
||||
# Do eyeball verification for now.
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[1])
|
||||
node = root.find("./PMS2/IPAddr")
|
||||
self.assertEqual(node.text, self.NEW_NODE_NAME)
|
||||
|
||||
hostname = socket.gethostname()
|
||||
# awesome, I saw dbroots 1 and 10 get assigned to node 1,
|
||||
# and dbroot 2 assigned to node 2
|
||||
# now, remove node 1 (hostname) and see what we get
|
||||
|
||||
node_manipulation.remove_node(
|
||||
hostname, self.tmp_files[1], self.tmp_files[2],
|
||||
test_mode=True
|
||||
)
|
||||
|
||||
def test_add_dbroot(self):
|
||||
self.tmp_files = (
|
||||
'./dbroot-test0.xml', './dbroot-test1.xml', './dbroot-test2.xml',
|
||||
'./dbroot-test3.xml', './dbroot-test4.xml'
|
||||
)
|
||||
# add a dbroot, verify it exists
|
||||
|
||||
id = node_manipulation.add_dbroot(
|
||||
tmp_mcs_config_filename, self.tmp_files[0]
|
||||
)
|
||||
self.assertEqual(id, 2)
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root(self.tmp_files[0])
|
||||
self.assertEqual(2, int(root.find('./SystemConfig/DBRootCount').text))
|
||||
self.assertEqual(
|
||||
f'{MCS_DATA_PATH}/data2',
|
||||
root.find('./SystemConfig/DBRoot2').text
|
||||
)
|
||||
|
||||
# add a node, verify we can add a dbroot to each of them
|
||||
hostname = socket.gethostname()
|
||||
node_manipulation.add_node(
|
||||
hostname, tmp_mcs_config_filename, self.tmp_files[1]
|
||||
)
|
||||
node_manipulation.add_node(
|
||||
self.NEW_NODE_NAME, self.tmp_files[1], self.tmp_files[2]
|
||||
)
|
||||
id1 = node_manipulation.add_dbroot(
|
||||
self.tmp_files[2], self.tmp_files[3], host=self.NEW_NODE_NAME
|
||||
)
|
||||
id2 = node_manipulation.add_dbroot(
|
||||
self.tmp_files[3], self.tmp_files[4], host=hostname
|
||||
)
|
||||
self.assertEqual(id1, 2)
|
||||
self.assertEqual(id2, 3)
|
||||
|
||||
root = nc.get_current_config_root(self.tmp_files[4])
|
||||
dbroot_count1 = int(
|
||||
root.find('./SystemModuleConfig/ModuleDBRootCount1-3').text
|
||||
)
|
||||
dbroot_count2 = int(
|
||||
root.find('./SystemModuleConfig/ModuleDBRootCount2-3').text
|
||||
)
|
||||
self.assertEqual(dbroot_count1 + dbroot_count2, 3)
|
||||
|
||||
unique_dbroots = set()
|
||||
for i in range(1, dbroot_count1 + 1):
|
||||
unique_dbroots.add(int(
|
||||
root.find(f'./SystemModuleConfig/ModuleDBRootID1-{i}-3').text)
|
||||
)
|
||||
for i in range(1, dbroot_count2 + 1):
|
||||
unique_dbroots.add(int(
|
||||
root.find(f'./SystemModuleConfig/ModuleDBRootID2-{i}-3').text)
|
||||
)
|
||||
|
||||
self.assertEqual(list(unique_dbroots), [1, 2, 3])
|
||||
|
||||
def test_change_primary_node(self):
|
||||
# add a node, make it the primary, verify expected result
|
||||
self.tmp_files = ('./primary-node0.xml', './primary-node1.xml')
|
||||
node_manipulation.add_node(
|
||||
self.NEW_NODE_NAME, tmp_mcs_config_filename, self.tmp_files[0]
|
||||
)
|
||||
node_manipulation.move_primary_node(
|
||||
self.tmp_files[0], self.tmp_files[1]
|
||||
)
|
||||
|
||||
root = NodeConfig().get_current_config_root(self.tmp_files[1])
|
||||
|
||||
self.assertEqual(
|
||||
root.find('./ExeMgr1/IPAddr').text, self.NEW_NODE_NAME
|
||||
)
|
||||
self.assertEqual(
|
||||
root.find('./DMLProc/IPAddr').text, self.NEW_NODE_NAME
|
||||
)
|
||||
self.assertEqual(
|
||||
root.find('./DDLProc/IPAddr').text, self.NEW_NODE_NAME
|
||||
)
|
||||
# This version doesn't support IPv6
|
||||
dbrm_controller_ip = root.find("./DBRM_Controller/IPAddr").text
|
||||
self.assertEqual(dbrm_controller_ip, self.NEW_NODE_NAME)
|
||||
self.assertEqual(root.find('./PrimaryNode').text, self.NEW_NODE_NAME)
|
||||
|
||||
def test_unassign_dbroot1(self):
|
||||
self.tmp_files = (
|
||||
'./tud-0.xml', './tud-1.xml', './tud-2.xml', './tud-3.xml',
|
||||
)
|
||||
node_manipulation.add_node(
|
||||
self.NEW_NODE_NAME, tmp_mcs_config_filename, self.tmp_files[0]
|
||||
)
|
||||
root = NodeConfig().get_current_config_root(self.tmp_files[0])
|
||||
(name, addr) = node_manipulation.find_dbroot1(root)
|
||||
self.assertEqual(name, self.NEW_NODE_NAME)
|
||||
|
||||
# add a second node and more dbroots to make the test slightly more robust
|
||||
node_manipulation.add_node(
|
||||
socket.gethostname(), self.tmp_files[0], self.tmp_files[1]
|
||||
)
|
||||
node_manipulation.add_dbroot(
|
||||
self.tmp_files[1], self.tmp_files[2], socket.gethostname()
|
||||
)
|
||||
node_manipulation.add_dbroot(
|
||||
self.tmp_files[2], self.tmp_files[3], self.NEW_NODE_NAME
|
||||
)
|
||||
|
||||
root = NodeConfig().get_current_config_root(self.tmp_files[3])
|
||||
(name, addr) = node_manipulation.find_dbroot1(root)
|
||||
self.assertEqual(name, self.NEW_NODE_NAME)
|
||||
|
||||
node_manipulation.unassign_dbroot1(root)
|
||||
caught_it = False
|
||||
try:
|
||||
node_manipulation.find_dbroot1(root)
|
||||
except node_manipulation.NodeNotFoundException:
|
||||
caught_it = True
|
||||
|
||||
self.assertTrue(caught_it)
|
388
cmapi/cmapi_server/test/test_server.py
Normal file
388
cmapi/cmapi_server/test/test_server.py
Normal file
@ -0,0 +1,388 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from cmapi_server.controllers.dispatcher import _version
|
||||
from cmapi_server.test.unittest_global import BaseServerTestCase
|
||||
from mcs_node_control.models.dbrm import DBRM
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
requests.urllib3.disable_warnings()
|
||||
|
||||
|
||||
class ConfigTestCase(BaseServerTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/node/config'
|
||||
|
||||
def test_config(self):
|
||||
for msg, headers, status_code in self.TEST_PARAMS:
|
||||
with self.subTest(
|
||||
msg=msg, headers=headers, status_code=status_code
|
||||
):
|
||||
r = requests.get(self.URL, verify=False, headers=headers)
|
||||
self.assertEqual(r.status_code, status_code)
|
||||
|
||||
|
||||
class StatusTestCase(BaseServerTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/node/status'
|
||||
|
||||
def test_status(self):
|
||||
for msg, headers, status_code in self.TEST_PARAMS:
|
||||
with self.subTest(
|
||||
msg=msg, headers=headers, status_code=status_code
|
||||
):
|
||||
r = requests.get(self.URL, verify=False, headers=headers)
|
||||
self.assertEqual(r.status_code, status_code)
|
||||
|
||||
|
||||
class BeginTestCase(BaseServerTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/node/begin'
|
||||
|
||||
def test_wrong_content_type(self):
|
||||
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
|
||||
self.assertEqual(r.status_code, 415)
|
||||
|
||||
def test_no_timeout(self):
|
||||
body = {'id': 42}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(r.json(), {'error': 'id or timeout is not set.'})
|
||||
|
||||
def test_no_auth(self):
|
||||
body = {'id': 42, 'timeout': 300}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
|
||||
def test_ok(self):
|
||||
txn_id_local = 42
|
||||
txn_timeout = 300
|
||||
txn_timeout_local = 300 + int(datetime.now().timestamp())
|
||||
body = {'id': txn_id_local, 'timeout': txn_timeout}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
txn_section = self.app.config.get('txn', None)
|
||||
self.assertTrue(txn_section is not None)
|
||||
txn_id = txn_section.get('id', None)
|
||||
txn_timeout = txn_section.get('timeout', None)
|
||||
txn_manager_address = txn_section.get('manager_address', None)
|
||||
txn_config_changed = txn_section.get('config_changed', None)
|
||||
txn = [txn_id, txn_timeout, txn_manager_address, txn_config_changed]
|
||||
self.assertTrue(None not in txn)
|
||||
self.assertTrue(txn_id == txn_id_local)
|
||||
self.assertTrue(txn_timeout - txn_timeout_local <= 2)
|
||||
|
||||
def test_multiple_begin(self):
|
||||
txn_id_local = 42
|
||||
txn_timeout = 300
|
||||
body = {'id': txn_id_local, 'timeout': txn_timeout}
|
||||
_ = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(
|
||||
r.json(), {'error': 'There is an active operation.'}
|
||||
)
|
||||
|
||||
|
||||
class CommitTestCase(BaseServerTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/node/commit'
|
||||
|
||||
def test_wrong_content_type(self):
|
||||
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
|
||||
self.assertEqual(r.status_code, 415)
|
||||
|
||||
def test_no_operation(self):
|
||||
body = {'id': 42}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(r.json(), {'error': 'No operation to commit.'})
|
||||
|
||||
def test_begin_and_commit(self):
|
||||
txn_timeout = 300
|
||||
txn_id = 42
|
||||
body = {'id': txn_id, 'timeout': txn_timeout}
|
||||
r = requests.put(
|
||||
BeginTestCase.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
txn_section = self.app.config.get('txn', None)
|
||||
self.assertTrue(txn_section is not None)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
body = {'id': 42}
|
||||
r = requests.put(self.URL, verify=False, headers=self.HEADERS, json=body)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
txn_id = txn_section.get('id', None)
|
||||
txn_timeout = txn_section.get('timeout', None)
|
||||
txn_manager_address = txn_section.get('manager_address', None)
|
||||
txn_config_changed = txn_section.get('config_changed', None)
|
||||
self.assertTrue(txn_id == 0)
|
||||
self.assertEqual(txn_timeout, 0)
|
||||
self.assertEqual(txn_manager_address, '')
|
||||
self.assertFalse(txn_config_changed)
|
||||
|
||||
def test_multiple_commit(self):
|
||||
body = {'id': 42}
|
||||
_ = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
|
||||
|
||||
class RollbackTestCase(BaseServerTestCase):
|
||||
URL = f"https://localhost:8640/cmapi/{_version}/node/rollback"
|
||||
|
||||
def test_wrong_content_type(self):
|
||||
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
|
||||
self.assertEqual(r.status_code, 415)
|
||||
|
||||
def test_no_operation(self):
|
||||
body = {'id': 42}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(r.json(), {'error': 'No operation to rollback.'})
|
||||
|
||||
def test_begin_and_rollback(self):
|
||||
txn_timeout = 300
|
||||
txn_id = 42
|
||||
body = {'id': txn_id, 'timeout': txn_timeout}
|
||||
r = requests.put(
|
||||
BeginTestCase.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
txn_section = self.app.config.get('txn', None)
|
||||
self.assertTrue(txn_section is not None)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
body = {'id': 42}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
txn_id = txn_section.get('id', None)
|
||||
txn_timeout = txn_section.get('timeout', None)
|
||||
txn_manager_address = txn_section.get('manager_address', None)
|
||||
txn_config_changed = txn_section.get('config_changed', None)
|
||||
self.assertTrue(txn_id == 0)
|
||||
self.assertEqual(txn_timeout, 0)
|
||||
self.assertEqual(txn_manager_address, '')
|
||||
self.assertFalse(txn_config_changed)
|
||||
|
||||
def test_no_operation_again(self):
|
||||
body = {'id': 42}
|
||||
_ = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
|
||||
|
||||
class ConfigPutTestCase(BaseServerTestCase):
|
||||
URL = ConfigTestCase.URL
|
||||
|
||||
def setUp(self):
|
||||
if 'skip_setUp' not in self.shortDescription():
|
||||
body = {'id': 42, 'timeout': 42}
|
||||
_ = requests.put(
|
||||
BeginTestCase.URL,
|
||||
verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
return super().setUp()
|
||||
|
||||
def tearDown(self):
|
||||
body = {'id': 42}
|
||||
_ = requests.put(
|
||||
RollbackTestCase.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
return super().tearDownClass()
|
||||
|
||||
def test_wrong_content_type(self):
|
||||
"""Test wrong Content-Type."""
|
||||
r = requests.put(self.URL, verify=False, headers=self.HEADERS)
|
||||
self.assertEqual(r.status_code, 415)
|
||||
|
||||
def test_no_active_operation(self):
|
||||
"""Test no active operation. skip_setUp"""
|
||||
body = {
|
||||
'revision': 42,
|
||||
'manager': '1.1.1.1',
|
||||
'timeout': 42,
|
||||
'config': "<Columnstore>...</Columnstore>",
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(
|
||||
r.json(), {'error': 'PUT /config called outside of an operation.'}
|
||||
)
|
||||
|
||||
def test_no_mandatory_attributes(self):
|
||||
"""Test no mandatory attributes. skip_setUp"""
|
||||
body = {'id': 42, 'timeout': 42}
|
||||
r = requests.put(
|
||||
BeginTestCase.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
body = {
|
||||
'revision': 42,
|
||||
'timeout': 42,
|
||||
'config': "<Columnstore>...</Columnstore>",
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(
|
||||
r.json(), {'error': 'Mandatory attribute is missing.'}
|
||||
)
|
||||
body = {
|
||||
'manager': '1.1.1.1',
|
||||
'revision': 42,
|
||||
'config': "<Columnstore>...</Columnstore>",
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(
|
||||
r.json(), {'error': 'Mandatory attribute is missing.'}
|
||||
)
|
||||
body = {
|
||||
'manager': '1.1.1.1',
|
||||
'revision': 42,
|
||||
'timeout': 42,
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertEqual(
|
||||
r.json(), {'error': 'Mandatory attribute is missing.'}
|
||||
)
|
||||
|
||||
def test_no_auth(self):
|
||||
"""Test no auth."""
|
||||
body = {
|
||||
'revision': 42,
|
||||
'manager': '1.1.1.1',
|
||||
'timeout': 42,
|
||||
'config': "<Columnstore>...</Columnstore>",
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.NO_AUTH_HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 401)
|
||||
|
||||
def test_send_rollback(self):
|
||||
"""Test send rollback."""
|
||||
body = {'id': 42}
|
||||
r = requests.put(
|
||||
RollbackTestCase.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
|
||||
def test_wrong_cluster_mode(self):
|
||||
"""Test wrong cluster mode."""
|
||||
body = {
|
||||
'revision': 42,
|
||||
'manager': '1.1.1.1',
|
||||
'timeout': 42,
|
||||
'cluster_mode': 'somemode',
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 422)
|
||||
self.assertTrue(
|
||||
"Error occured setting cluster" in r.content.decode('ASCII')
|
||||
)
|
||||
|
||||
def test_set_mode(self):
|
||||
"""Test set mode."""
|
||||
mode = 'readwrite'
|
||||
body = {
|
||||
'revision': 42,
|
||||
'manager': '1.1.1.1',
|
||||
'timeout': 42,
|
||||
'cluster_mode': mode,
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
# DBRM controller must be up and running
|
||||
self.assertEqual(r.status_code, 200)
|
||||
r = requests.get(
|
||||
StatusTestCase.URL, verify=False, headers=self.HEADERS
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
|
||||
fake_mode = mode
|
||||
with DBRM() as dbrm:
|
||||
if dbrm.get_dbrm_status() != 'master':
|
||||
fake_mode = 'readonly'
|
||||
self.assertEqual(r.json()['cluster_mode'], fake_mode)
|
||||
self.assertEqual(dbrm._get_cluster_mode(), mode)
|
||||
|
||||
def test_apply_config(self):
|
||||
"""Test apply config."""
|
||||
body = {'id': 42, 'timeout': 42}
|
||||
_ = requests.put(
|
||||
BeginTestCase.URL,
|
||||
verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
config_file = Path(self.mcs_config_filename)
|
||||
config = config_file.read_text()
|
||||
body = {
|
||||
'revision': 42,
|
||||
'manager': '1.1.1.1',
|
||||
'timeout': 15,
|
||||
'config': config,
|
||||
'mcs_config_filename': self.mcs_config_filename
|
||||
}
|
||||
r = requests.put(
|
||||
self.URL, verify=False, headers=self.HEADERS, json=body
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
txn_section = self.app.config.get('txn', None)
|
||||
self.assertTrue(txn_section is not None)
|
||||
txn_config_changed = txn_section.get('config_changed', None)
|
||||
self.assertEqual(txn_config_changed, True)
|
||||
r = requests.get(
|
||||
ConfigTestCase.URL, verify=False, headers=self.HEADERS
|
||||
)
|
||||
self.assertEqual(r.status_code, 200)
|
||||
# commenting this out until we get global config
|
||||
# self.assertEqual(r.json()['config'], config)
|
||||
|
||||
|
||||
class PrimaryTestCase(BaseServerTestCase):
|
||||
URL = f'https://localhost:8640/cmapi/{_version}/node/primary'
|
||||
|
||||
def test_is_primary(self):
|
||||
r = requests.get(self.URL, verify=False)
|
||||
self.assertEqual(r.status_code, 200)
|
160
cmapi/cmapi_server/test/test_txns.py
Normal file
160
cmapi/cmapi_server/test/test_txns.py
Normal file
@ -0,0 +1,160 @@
|
||||
import cherrypy
|
||||
import unittest
|
||||
import os
|
||||
import socket
|
||||
from shutil import copyfile
|
||||
from contextlib import contextmanager
|
||||
|
||||
from cmapi_server import helpers, node_manipulation
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
|
||||
from cmapi_server.test.unittest_global import create_self_signed_certificate, \
|
||||
cert_filename, mcs_config_filename, cmapi_config_filename, \
|
||||
tmp_mcs_config_filename, tmp_cmapi_config_filename
|
||||
|
||||
|
||||
@contextmanager
|
||||
def start_server():
|
||||
if not os.path.exists(cert_filename):
|
||||
create_self_signed_certificate()
|
||||
|
||||
app = cherrypy.tree.mount(root = None, config = cmapi_config_filename)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': cmapi_config_filename,
|
||||
},
|
||||
})
|
||||
cherrypy.config.update(cmapi_config_filename)
|
||||
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
|
||||
yield
|
||||
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
|
||||
class TestTransactions(unittest.TestCase):
|
||||
def setUp(self):
|
||||
if not os.path.exists(tmp_mcs_config_filename):
|
||||
f = open(tmp_mcs_config_filename, 'x')
|
||||
f.close()
|
||||
copyfile(mcs_config_filename, tmp_mcs_config_filename)
|
||||
|
||||
def tearDown(self):
|
||||
if os.path.exists(tmp_mcs_config_filename):
|
||||
copyfile(tmp_mcs_config_filename, mcs_config_filename)
|
||||
os.remove(tmp_mcs_config_filename)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
if not os.path.exists(tmp_cmapi_config_filename):
|
||||
f = open(tmp_cmapi_config_filename, 'x')
|
||||
f.close()
|
||||
copyfile(cmapi_config_filename, tmp_cmapi_config_filename)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if os.path.exists(tmp_cmapi_config_filename):
|
||||
copyfile(tmp_cmapi_config_filename, cmapi_config_filename)
|
||||
os.remove(tmp_cmapi_config_filename)
|
||||
|
||||
def test_start_commit(self):
|
||||
print(" ******** Running TestTransactions.test_start_commit()")
|
||||
with start_server():
|
||||
try:
|
||||
hostname = socket.gethostname()
|
||||
myaddr = socket.gethostbyname(hostname)
|
||||
node_manipulation.add_node(
|
||||
myaddr, mcs_config_filename, mcs_config_filename
|
||||
)
|
||||
result = helpers.start_transaction(
|
||||
cmapi_config_filename, mcs_config_filename,
|
||||
optional_nodes = [myaddr]
|
||||
)
|
||||
self.assertTrue(result[0])
|
||||
self.assertEqual(len(result[2]), 1)
|
||||
self.assertEqual(result[2][0], myaddr)
|
||||
helpers.commit_transaction(result[1], cmapi_config_filename, mcs_config_filename, nodes = result[2])
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
def test_start_rollback(self):
|
||||
print(" ******** Running TestTransactions.test_start_rollback()")
|
||||
with start_server():
|
||||
try:
|
||||
hostname = socket.gethostname()
|
||||
myaddr = socket.gethostbyname(hostname)
|
||||
node_manipulation.add_node(
|
||||
myaddr, mcs_config_filename, mcs_config_filename
|
||||
)
|
||||
result = helpers.start_transaction(
|
||||
cmapi_config_filename, mcs_config_filename,
|
||||
optional_nodes = [myaddr]
|
||||
)
|
||||
self.assertTrue(result[0])
|
||||
self.assertEqual(len(result[2]), 1)
|
||||
self.assertEqual(result[2][0], myaddr)
|
||||
helpers.rollback_transaction(result[1], cmapi_config_filename, mcs_config_filename) # not specifying nodes here to exercise the nodes = None path
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
def test_broadcast_new_config(self):
|
||||
print(" ******** Running TestTransactions.test_broadcast_new_config()")
|
||||
with start_server():
|
||||
try:
|
||||
myaddr = socket.gethostbyname(socket.gethostname())
|
||||
node_manipulation.add_node(myaddr, mcs_config_filename, mcs_config_filename)
|
||||
|
||||
# Note, 1.2.3.4 is intentional -> doesn't exist, so shouldn't end up in the node list returned
|
||||
print("\n\nNOTE! This is expected to pause here for ~10s, this isn't an error, yet.\n")
|
||||
result = helpers.start_transaction(
|
||||
cmapi_config_filename, mcs_config_filename,
|
||||
optional_nodes = ['1.2.3.4']
|
||||
)
|
||||
self.assertTrue(result[0])
|
||||
self.assertEqual(len(result[2]), 1)
|
||||
self.assertEqual(result[2][0], myaddr)
|
||||
success = helpers.broadcast_new_config(
|
||||
mcs_config_filename,
|
||||
cmapi_config_filename=cmapi_config_filename,
|
||||
test_mode=True,
|
||||
nodes = result[2]
|
||||
)
|
||||
# not specifying nodes here to exercise the nodes = None path
|
||||
helpers.commit_transaction(
|
||||
result[1], cmapi_config_filename, mcs_config_filename
|
||||
)
|
||||
self.assertTrue(success)
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
def test_update_rev_and_manager(self):
|
||||
print(" ******** Running TestTransactions.test_update_rev_and_manager()")
|
||||
with start_server():
|
||||
try:
|
||||
|
||||
myaddr = socket.gethostbyname(socket.gethostname())
|
||||
node_manipulation.add_node(
|
||||
myaddr, mcs_config_filename, mcs_config_filename
|
||||
)
|
||||
helpers.update_revision_and_manager(mcs_config_filename, "./update_rev1.xml")
|
||||
nc = NodeConfig()
|
||||
root = nc.get_current_config_root("./update_rev1.xml")
|
||||
self.assertEqual(root.find("./ConfigRevision").text, "2")
|
||||
self.assertEqual(root.find("./ClusterManager").text, socket.gethostbyname(socket.gethostname()))
|
||||
except:
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
os.remove("./update_rev1.xml")
|
203
cmapi/cmapi_server/test/unittest_global.py
Normal file
203
cmapi/cmapi_server/test/unittest_global.py
Normal file
@ -0,0 +1,203 @@
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timedelta
|
||||
from shutil import copyfile
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import cherrypy
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from cryptography import x509
|
||||
from cryptography.x509.oid import NameOID
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
|
||||
from cmapi_server import helpers
|
||||
from cmapi_server.constants import CMAPI_CONF_PATH
|
||||
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
|
||||
from cmapi_server.managers.process import MCSProcessManager
|
||||
|
||||
|
||||
TEST_API_KEY = 'somekey123'
|
||||
cert_filename = './cmapi_server/self-signed.crt'
|
||||
MCS_CONFIG_FILEPATH = '/etc/columnstore/Columnstore.xml'
|
||||
COPY_MCS_CONFIG_FILEPATH = './cmapi_server/test/original_Columnstore.xml'
|
||||
TEST_MCS_CONFIG_FILEPATH = './cmapi_server/test/CS-config-test.xml'
|
||||
# TODO:
|
||||
# - rename after fix in all places
|
||||
# - fix path to abs
|
||||
mcs_config_filename = './cmapi_server/test/CS-config-test.xml'
|
||||
tmp_mcs_config_filename = './cmapi_server/test/tmp.xml'
|
||||
cmapi_config_filename = './cmapi_server/cmapi_server.conf'
|
||||
tmp_cmapi_config_filename = './cmapi_server/test/tmp.conf'
|
||||
# constants for process dispatchers
|
||||
DDL_SERVICE = 'mcs-ddlproc'
|
||||
CONTROLLERNODE_SERVICE = 'mcs-controllernode.service'
|
||||
UNKNOWN_SERVICE = 'unknown_service'
|
||||
SYSTEMCTL = 'sudo systemctl'
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
||||
def create_self_signed_certificate():
|
||||
key_filename = './cmapi_server/self-signed.key'
|
||||
|
||||
key = rsa.generate_private_key(
|
||||
public_exponent=65537,
|
||||
key_size=2048,
|
||||
backend=default_backend()
|
||||
)
|
||||
|
||||
with open(key_filename, "wb") as f:
|
||||
f.write(key.private_bytes(
|
||||
encoding=serialization.Encoding.PEM,
|
||||
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
||||
encryption_algorithm=serialization.NoEncryption()),
|
||||
)
|
||||
|
||||
subject = issuer = x509.Name([
|
||||
x509.NameAttribute(NameOID.COUNTRY_NAME, u"US"),
|
||||
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, u"California"),
|
||||
x509.NameAttribute(NameOID.LOCALITY_NAME, u"Redwood City"),
|
||||
x509.NameAttribute(NameOID.ORGANIZATION_NAME, u"MariaDB"),
|
||||
x509.NameAttribute(NameOID.COMMON_NAME, u"mariadb.com"),
|
||||
])
|
||||
|
||||
basic_contraints = x509.BasicConstraints(ca=True, path_length=0)
|
||||
|
||||
cert = x509.CertificateBuilder(
|
||||
).subject_name(
|
||||
subject
|
||||
).issuer_name(
|
||||
issuer
|
||||
).public_key(
|
||||
key.public_key()
|
||||
).serial_number(
|
||||
x509.random_serial_number()
|
||||
).not_valid_before(
|
||||
datetime.utcnow()
|
||||
).not_valid_after(
|
||||
datetime.utcnow() + timedelta(days=365)
|
||||
).add_extension(
|
||||
basic_contraints,
|
||||
False
|
||||
).add_extension(
|
||||
x509.SubjectAlternativeName([x509.DNSName(u"localhost")]),
|
||||
critical=False
|
||||
).sign(key, hashes.SHA256(), default_backend())
|
||||
|
||||
with open(cert_filename, "wb") as f:
|
||||
f.write(cert.public_bytes(serialization.Encoding.PEM))
|
||||
|
||||
|
||||
def run_detect_processes():
|
||||
cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH)
|
||||
d_name, d_path = helpers.get_dispatcher_name_and_path(cfg_parser)
|
||||
MCSProcessManager.detect(d_name, d_path)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def run_server():
|
||||
if not os.path.exists(cert_filename):
|
||||
create_self_signed_certificate()
|
||||
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
|
||||
run_detect_processes() #TODO: Move cause slow down each test for 5s
|
||||
yield
|
||||
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
|
||||
|
||||
class BaseServerTestCase(unittest.TestCase):
|
||||
HEADERS = {'x-api-key': TEST_API_KEY}
|
||||
NO_AUTH_HEADERS = {'x-api-key': None}
|
||||
TEST_PARAMS = (
|
||||
('auth ok', HEADERS, 200),
|
||||
('no auth', NO_AUTH_HEADERS, 401)
|
||||
)
|
||||
|
||||
def run(self, result=None):
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
self.tmp_dir = tmp_dir
|
||||
self.cmapi_config_filename = os.path.join(
|
||||
tmp_dir, 'tmp_cmapi_config.conf'
|
||||
)
|
||||
self.mcs_config_filename = os.path.join(
|
||||
tmp_dir, 'tmp_mcs_config.xml'
|
||||
)
|
||||
copyfile(cmapi_config_filename, self.cmapi_config_filename)
|
||||
copyfile(TEST_MCS_CONFIG_FILEPATH, self.mcs_config_filename)
|
||||
self.app = cherrypy.tree.mount(
|
||||
root=None, config=self.cmapi_config_filename
|
||||
)
|
||||
self.app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': self.cmapi_config_filename,
|
||||
},
|
||||
'Authentication' : self.HEADERS
|
||||
})
|
||||
cherrypy.config.update(self.cmapi_config_filename)
|
||||
|
||||
with run_server():
|
||||
return super().run(result=result)
|
||||
|
||||
|
||||
class BaseNodeManipTestCase(unittest.TestCase):
|
||||
NEW_NODE_NAME = 'mysql.com' # something that has a DNS entry everywhere
|
||||
|
||||
def setUp(self):
|
||||
self.tmp_files = []
|
||||
copyfile(TEST_MCS_CONFIG_FILEPATH, tmp_mcs_config_filename)
|
||||
|
||||
def tearDown(self):
|
||||
for tmp_file in self.tmp_files:
|
||||
if os.path.exists(tmp_file):
|
||||
os.remove(tmp_file)
|
||||
if os.path.exists(tmp_mcs_config_filename):
|
||||
os.remove(tmp_mcs_config_filename)
|
||||
|
||||
|
||||
class BaseProcessDispatcherCase(unittest.TestCase):
|
||||
node_started = None
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
run_detect_processes()
|
||||
cls.node_started = MCSProcessManager.get_running_mcs_procs() != 0
|
||||
return super().setUpClass()
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
if (MCSProcessManager.get_running_mcs_procs() !=0) == cls.node_started:
|
||||
return super().tearDownClass()
|
||||
if cls.node_started:
|
||||
MCSProcessManager.start_node(is_primary=True)
|
||||
else:
|
||||
MCSProcessManager.stop_node(is_primary=True)
|
||||
return super().tearDownClass()
|
||||
|
||||
def setUp(self) -> None:
|
||||
if MCSProcessManager.process_dispatcher.is_service_running(
|
||||
CONTROLLERNODE_SERVICE
|
||||
):
|
||||
self.controller_node_cmd = 'start'
|
||||
else:
|
||||
self.controller_node_cmd = 'stop'
|
||||
# prevent to get 'start-limit-hit' systemd error, see MCOL-5186
|
||||
os.system(f'{SYSTEMCTL} reset-failed')
|
||||
return super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
os.system(
|
||||
f'{SYSTEMCTL} {self.controller_node_cmd} {CONTROLLERNODE_SERVICE}'
|
||||
)
|
||||
return super().tearDown()
|
1
cmapi/conffiles.template
Normal file
1
cmapi/conffiles.template
Normal file
@ -0,0 +1 @@
|
||||
${ETC_DIR}/cmapi_server.conf
|
13
cmapi/engine_files/mariadb-columnstore-start.sh
Normal file
13
cmapi/engine_files/mariadb-columnstore-start.sh
Normal file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script allows to gracefully start MCS
|
||||
|
||||
/bin/systemctl start mcs-workernode
|
||||
/bin/systemctl start mcs-controllernode
|
||||
/bin/systemctl start mcs-primproc
|
||||
/bin/systemctl start mcs-writeengineserver
|
||||
/bin/systemctl start mcs-exemgr
|
||||
/bin/systemctl start mcs-dmlproc
|
||||
/bin/systemctl start mcs-ddlproc
|
||||
|
||||
exit 0
|
14
cmapi/engine_files/mariadb-columnstore-stop.sh
Normal file
14
cmapi/engine_files/mariadb-columnstore-stop.sh
Normal file
@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script allows to gracefully shut down MCS
|
||||
|
||||
/bin/systemctl stop mcs-dmlproc
|
||||
/bin/systemctl stop mcs-ddlproc
|
||||
/bin/systemctl stop mcs-exemgr
|
||||
/bin/systemctl stop mcs-writeengineserver
|
||||
/bin/systemctl stop mcs-primproc
|
||||
/bin/systemctl stop mcs-controllernode
|
||||
/bin/systemctl stop mcs-workernode
|
||||
/bin/systemctl stop mcs-storagemanager
|
||||
|
||||
exit 0
|
155
cmapi/engine_files/mcs-loadbrm.py
Executable file
155
cmapi/engine_files/mcs-loadbrm.py
Executable file
@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
import time
|
||||
import configparser
|
||||
import os
|
||||
import datetime
|
||||
|
||||
API_CONFIG_PATH = '/etc/columnstore/cmapi_server.conf'
|
||||
BYPASS_SM_PATH = '/tmp/columnstore_tmp_files/rdwrscratch/BRM_saves'
|
||||
|
||||
|
||||
def get_key():
|
||||
cmapi_config = configparser.ConfigParser()
|
||||
cmapi_config.read(API_CONFIG_PATH)
|
||||
if 'Authentication' not in cmapi_config.sections():
|
||||
return ''
|
||||
return cmapi_config['Authentication'].get('x-api-key', '')
|
||||
|
||||
|
||||
def get_version():
|
||||
return '0.4.0'
|
||||
|
||||
|
||||
def get_port():
|
||||
return '8640'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# To avoid systemd in container environment
|
||||
use_systemd = True
|
||||
if len(sys.argv) > 1:
|
||||
use_systemd = not sys.argv[1] == 'no'
|
||||
sm_config = configparser.ConfigParser()
|
||||
|
||||
sm_config.read('/etc/columnstore/storagemanager.cnf')
|
||||
cs_config = ET.parse('/etc/columnstore/Columnstore.xml')
|
||||
config_root = cs_config.getroot()
|
||||
|
||||
storage = sm_config.get('ObjectStorage', 'service')
|
||||
if storage is None:
|
||||
storage = 'LocalStorage'
|
||||
bucket = sm_config.get('S3', 'bucket')
|
||||
if bucket is None:
|
||||
bucket = 'some_bucket'
|
||||
|
||||
dbrmroot = config_root.find('./SystemConfig/DBRMRoot').text
|
||||
pmCount = int(config_root.find('./SystemModuleConfig/ModuleCount3').text)
|
||||
loadbrm = '/usr/bin/load_brm'
|
||||
|
||||
brm_saves_current = ''
|
||||
|
||||
if storage.lower() == 's3' and not bucket.lower() == 'some_bucket':
|
||||
# start SM using systemd
|
||||
if use_systemd is True:
|
||||
cmd = 'systemctl start mcs-storagemanager'
|
||||
retcode = subprocess.call(cmd, shell=True)
|
||||
if retcode < 0:
|
||||
print('Failed to start storagemanager. \
|
||||
{} exits with {}.'.format(cmd, retcode))
|
||||
sys.exit(1)
|
||||
# delay to allow storagemanager to init
|
||||
time.sleep(1)
|
||||
|
||||
brm = 'data1/systemFiles/dbrm/BRM_saves_current'
|
||||
config_root.find('./Installation/DBRootStorageType').text = "StorageManager"
|
||||
config_root.find('./StorageManager/Enabled').text = "Y"
|
||||
|
||||
if config_root.find('./SystemConfig/DataFilePlugin') is None:
|
||||
config_root.find('./SystemConfig').append(ET.Element("DataFilePlugin"))
|
||||
|
||||
config_root.find('./SystemConfig/DataFilePlugin').text = "libcloudio.so"
|
||||
|
||||
cs_config.write('/etc/columnstore/Columnstore.xml.loadbrm')
|
||||
os.replace('/etc/columnstore/Columnstore.xml.loadbrm', '/etc/columnstore/Columnstore.xml') # atomic replacement
|
||||
|
||||
# Single-node on S3
|
||||
if storage.lower() == 's3' and not bucket.lower() == 'some_bucket' and pmCount == 1:
|
||||
try:
|
||||
print("Running smcat")
|
||||
brm_saves_current = subprocess.check_output(['smcat', brm])
|
||||
except subprocess.CalledProcessError as e:
|
||||
# will happen when brm file does not exist
|
||||
print('{} does not exist.'.format(brm), file=sys.stderr)
|
||||
else:
|
||||
brm = '{}_current'.format(dbrmroot)
|
||||
# Multi-node
|
||||
if pmCount > 1:
|
||||
try:
|
||||
import requests
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
except ImportError as e:
|
||||
print('requests Python module does not exist. \
|
||||
Please install CMAPI first.', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
try:
|
||||
primary_address = config_root.find('./DBRM_Controller/IPAddr').text
|
||||
api_key = get_key()
|
||||
if len(api_key) == 0:
|
||||
print('Failed to find API key in {}.'.format(API_CONFIG_PATH), \
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
headers = {'x-api-key': api_key}
|
||||
api_version = get_version()
|
||||
api_port = get_port()
|
||||
elems = ['em', 'journal', 'vbbm', 'vss']
|
||||
for e in elems:
|
||||
print("Pulling {} from the primary node.".format(e))
|
||||
url = "https://{}:{}/cmapi/{}/node/meta/{}".format(primary_address, \
|
||||
api_port, api_version, e)
|
||||
r = requests.get(url, verify=False, headers=headers, timeout=30)
|
||||
if (r.status_code != 200):
|
||||
raise RuntimeError("Error requesting {} from the primary \
|
||||
node.".format(e))
|
||||
|
||||
# To avoid SM storing BRM files
|
||||
if storage.lower() == 's3' and bucket.lower() != 'some_bucket':
|
||||
dbrmroot = BYPASS_SM_PATH
|
||||
|
||||
if not os.path.exists(dbrmroot):
|
||||
os.makedirs(dbrmroot)
|
||||
|
||||
current_name = '{}_{}'.format(dbrmroot, e)
|
||||
|
||||
print ("Saving {} to {}".format(e, current_name))
|
||||
path = Path(current_name)
|
||||
path.write_bytes(r.content)
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
print('Failed to load BRM data from the primary \
|
||||
node {}.'.format(primary_address), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
brm_saves_current = b"BRM_saves\n"
|
||||
else:
|
||||
# load local dbrm
|
||||
try:
|
||||
brm_saves_current = subprocess.check_output(['cat', brm])
|
||||
except subprocess.CalledProcessError as e:
|
||||
# will happen when brm file does not exist
|
||||
print('{} does not exist.'.format(brm), file=sys.stderr)
|
||||
|
||||
if brm_saves_current:
|
||||
cmd = '{} {}{}'.format(loadbrm, dbrmroot, \
|
||||
brm_saves_current.decode("utf-8").replace("BRM_saves", ""))
|
||||
print(f"{datetime.datetime.now()} : Running {cmd}")
|
||||
try:
|
||||
retcode = subprocess.call(cmd, shell=True)
|
||||
if retcode < 0:
|
||||
print('{} exits with {}.'.format(cmd, retcode))
|
||||
sys.exit(1)
|
||||
except OSError as e:
|
||||
sys.exit(1)
|
84
cmapi/engine_files/mcs-savebrm.py
Executable file
84
cmapi/engine_files/mcs-savebrm.py
Executable file
@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
import configparser
|
||||
|
||||
XML_CONFIG_PATH = '/etc/columnstore/Columnstore.xml'
|
||||
SM_CONFIG_PATH = '/etc/columnstore/storagemanager.cnf'
|
||||
REST_REQUEST_TO = 2
|
||||
|
||||
|
||||
def get_version():
|
||||
return '0.4.0'
|
||||
|
||||
|
||||
def get_port():
|
||||
return '8640'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
master_addr = ''
|
||||
pm_count = 0
|
||||
try:
|
||||
cs_config = ET.parse(XML_CONFIG_PATH)
|
||||
config_root = cs_config.getroot()
|
||||
master_addr = config_root.find('./DBRM_Controller/IPAddr').text
|
||||
pm_count = int(config_root.find('./SystemModuleConfig/ModuleCount3').text)
|
||||
except (FileNotFoundError, AttributeError, ValueError) as e:
|
||||
print("Exception had been raised. Continue anyway")
|
||||
print(str(e))
|
||||
|
||||
storage = 'LocalStorage'
|
||||
sm_config = configparser.ConfigParser()
|
||||
files_read = len(sm_config.read(SM_CONFIG_PATH))
|
||||
if files_read == 1:
|
||||
storage = sm_config.get('ObjectStorage', 'service')
|
||||
|
||||
default_addr = '127.0.0.1'
|
||||
savebrm = 'save_brm'
|
||||
is_primary = False
|
||||
|
||||
# For multi-node with local storage or default installations
|
||||
if (storage.lower() != 's3' and master_addr != default_addr) or \
|
||||
master_addr == default_addr:
|
||||
is_primary = True
|
||||
print('Multi-node with local-storage detected.')
|
||||
else:
|
||||
has_requests = False
|
||||
try:
|
||||
import requests
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
has_requests = True
|
||||
except ImportError as e:
|
||||
print('requests Python module does not exist. \
|
||||
Please install CMAPI first.')
|
||||
if has_requests is True:
|
||||
try:
|
||||
print('Requesting for the primary node status.')
|
||||
api_version = get_version()
|
||||
api_port = get_port()
|
||||
url = "https://{}:{}/cmapi/{}/node/primary".format(default_addr, \
|
||||
api_port, api_version)
|
||||
resp = requests.get(url,
|
||||
verify=False,
|
||||
timeout=REST_REQUEST_TO)
|
||||
if (resp.status_code != 200):
|
||||
print("Error sending GET /node/primary.")
|
||||
else:
|
||||
is_primary = resp.json()['is_primary'] == 'True'
|
||||
except:
|
||||
print('Failed to request.')
|
||||
print(str(e))
|
||||
|
||||
if is_primary is True:
|
||||
try:
|
||||
retcode = subprocess.call(savebrm, shell=True)
|
||||
if retcode < 0:
|
||||
print('{} exits with {}.'.format(savebrm, retcode))
|
||||
sys.exit(0)
|
||||
except OSError as e:
|
||||
print(str(e))
|
||||
sys.exit(0)
|
||||
|
||||
sys.exit(0)
|
13
cmapi/engine_files/mcs-workernode.service
Normal file
13
cmapi/engine_files/mcs-workernode.service
Normal file
@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=mcs-workernode
|
||||
After=mcs-loadbrm.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/workernode DBRM_Worker1 fg
|
||||
Restart=on-failure
|
||||
ExecStop=/usr/bin/env bash -c "kill -15 $MAINPID"
|
||||
ExecStopPost=-/usr/bin/mcs-savebrm.py
|
||||
ExecStopPost=/usr/bin/env bash -c "clearShm > /dev/null 2>&1"
|
||||
TimeoutStopSec=120
|
||||
EnvironmentFile=-/etc/columnstore/systemd.env
|
0
cmapi/failover/__init__.py
Normal file
0
cmapi/failover/__init__.py
Normal file
266
cmapi/failover/agent_comm.py
Normal file
266
cmapi/failover/agent_comm.py
Normal file
@ -0,0 +1,266 @@
|
||||
# this class handles the comm with the agent; whatever it will be
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
logger = logging.getLogger('agent_comm')
|
||||
|
||||
|
||||
# First an agent base class
|
||||
class AgentBase:
|
||||
|
||||
def activateNodes(self, nodes):
|
||||
print("AgentBase: Got activateNodes({})".format(nodes))
|
||||
|
||||
def deactivateNodes(self, nodes):
|
||||
print("AgentBase: Got deactivateNodes({})".format(nodes))
|
||||
|
||||
def movePrimaryNode(self, placeholder):
|
||||
print("AgentBase: Got movePrimaryNode()")
|
||||
|
||||
def enterStandbyMode(self):
|
||||
print("AgentBase: Got enterStandbyMode()")
|
||||
|
||||
def getNodeHealth(self):
|
||||
print("AgentBase: Got getNodeHealth()")
|
||||
return 0
|
||||
|
||||
def raiseAlarm(self, msg):
|
||||
print("AgentBase: Got raiseAlarm({})".format(msg))
|
||||
|
||||
def startTransaction(self, extra_nodes = [], remove_nodes = []):
|
||||
print(f"AgentBase: Got startTransaction, extra_nodes={extra_nodes}, remove_nodes={remove_nodes}")
|
||||
return 0
|
||||
|
||||
def commitTransaction(self, txnid, nodes):
|
||||
print("AgentBase: Got commitTransaction")
|
||||
|
||||
def rollbackTransaction(self, txnid, nodes):
|
||||
print("AgentBase: Got abortTransaction")
|
||||
|
||||
|
||||
|
||||
class OpAndArgs:
|
||||
name = None # a callable in AgentBase
|
||||
args = None # a tuple containing the args for the callable
|
||||
|
||||
def __init__(self, name, *args):
|
||||
self.name = name
|
||||
self.args = args
|
||||
|
||||
def __str__(self):
|
||||
return f"{str(self.name.__qualname__)}{str(self.args)}"
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.name.__qualname__, str(self.args)))
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name and self.args == other.args
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def run(self):
|
||||
self.name(*self.args)
|
||||
|
||||
|
||||
# The AgentComm class
|
||||
# Doesn't do anything but pass along events to the Agent yet
|
||||
# TODO: implement an event queue and a thread to pluck events and issue them
|
||||
# to the agent. Done?
|
||||
# TODO: de-dup events as they come in from the node monitor,
|
||||
# add to the event queue\
|
||||
# TODO: rewrite using builtin Queue class
|
||||
class AgentComm:
|
||||
|
||||
def __init__(self, agent = None):
|
||||
if agent is None:
|
||||
self._agent = AgentBase()
|
||||
else:
|
||||
self._agent = agent
|
||||
|
||||
# deduper contains queue contents, events in progress, and finished
|
||||
# events up to 10s after they finished
|
||||
self._deduper = {}
|
||||
self._die = False
|
||||
self._queue = []
|
||||
self._mutex = threading.Lock()
|
||||
self._thread = None
|
||||
|
||||
def __del__(self):
|
||||
self.die()
|
||||
|
||||
def start(self):
|
||||
self._die = False
|
||||
self._thread = threading.Thread(target=self._runner, name='AgentComm')
|
||||
self._thread.start()
|
||||
|
||||
# TODO: rename to stop
|
||||
def die(self):
|
||||
self._die = True
|
||||
self._thread.join()
|
||||
|
||||
# returns (len-of-event-queue, len-of-deduper)
|
||||
def getQueueSize(self):
|
||||
self._mutex.acquire()
|
||||
ret = (len(self._queue), len(self._deduper))
|
||||
self._mutex.release()
|
||||
return ret
|
||||
|
||||
def activateNodes(self, nodes):
|
||||
self._addEvent(self._agent.activateNodes, (nodes))
|
||||
|
||||
def deactivateNodes(self, nodes):
|
||||
self._addEvent(self._agent.deactivateNodes, (nodes))
|
||||
|
||||
def movePrimaryNode(self):
|
||||
self._addEvent(self._agent.movePrimaryNode, ())
|
||||
|
||||
def enterStandbyMode(self):
|
||||
# The other events are moot if this node has to enter standby mode
|
||||
self._mutex.acquire()
|
||||
op = OpAndArgs(self._agent.enterStandbyMode, ())
|
||||
self._queue = [ op ]
|
||||
self._deduper = { op : datetime.datetime.now() }
|
||||
self._mutex.release()
|
||||
|
||||
def getNodeHealth(self):
|
||||
return self._agent.getNodeHealth()
|
||||
|
||||
def raiseAlarm(self, msg):
|
||||
self._agent.raiseAlarm(msg)
|
||||
|
||||
def _addEvent(self, name, args):
|
||||
"""Interface to the event queue."""
|
||||
op = OpAndArgs(name, args)
|
||||
|
||||
self._mutex.acquire()
|
||||
if op not in self._deduper:
|
||||
self._deduper[op] = None
|
||||
self._queue.append(op)
|
||||
self._mutex.release()
|
||||
|
||||
def _getEvents(self):
|
||||
"""
|
||||
This gets all queued events at once and prunes events older than
|
||||
10 seconds from the deduper.
|
||||
"""
|
||||
self._mutex.acquire()
|
||||
ret = self._queue
|
||||
self._queue = []
|
||||
|
||||
# prune events that finished more than 10 secs ago from the deduper
|
||||
tenSecsAgo = datetime.datetime.now() - datetime.timedelta(seconds = 10)
|
||||
for (op, finishTime) in list(self._deduper.items()):
|
||||
if finishTime is not None and finishTime < tenSecsAgo:
|
||||
del self._deduper[op]
|
||||
|
||||
self._mutex.release()
|
||||
return ret
|
||||
|
||||
def _requeueEvents(self, events):
|
||||
self._mutex.acquire()
|
||||
# events has commands issued before what is currently in _queue
|
||||
events.extend(self._queue)
|
||||
self._queue = events
|
||||
self._mutex.release()
|
||||
|
||||
def _markEventsFinished(self, events):
|
||||
self._mutex.acquire()
|
||||
now = datetime.datetime.now()
|
||||
for event in events:
|
||||
self._deduper[event] = now
|
||||
self._mutex.release()
|
||||
|
||||
def _runner(self):
|
||||
while not self._die:
|
||||
try:
|
||||
self.__runner()
|
||||
except Exception:
|
||||
logger.error(
|
||||
'AgentComm.runner(): got an unrecognised exception.',
|
||||
exc_info=True
|
||||
)
|
||||
if not self._die:
|
||||
time.sleep(1)
|
||||
logger.info('AgentComm.runner() exiting normally...')
|
||||
|
||||
def __runner(self):
|
||||
while not self._die:
|
||||
events = self._getEvents()
|
||||
logger.trace(f'Get events from queue "{events}".')
|
||||
if len(events) == 0:
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
nextPollTime = datetime.datetime.now() + datetime.timedelta(seconds = 5)
|
||||
|
||||
nodes_added = set()
|
||||
nodes_removed = set()
|
||||
|
||||
# scan the list of events, put together the extra_nodes and remove_nodes parameters to
|
||||
# startTransaction(). Note, we could consolidate the activate / deactivate calls here,
|
||||
# but that's a minor optimization not worth doing yet.
|
||||
needs_transaction = False
|
||||
for event in events: # TODO: combine with loop below.
|
||||
#print(f"got event: {event}")
|
||||
|
||||
# determine whether we need a transaction at all.
|
||||
# List the fcns that require a txn here.
|
||||
if not needs_transaction and event.name in (
|
||||
self._agent.activateNodes,
|
||||
self._agent.deactivateNodes,
|
||||
self._agent.movePrimaryNode):
|
||||
needs_transaction = True
|
||||
|
||||
if event.name == self._agent.activateNodes:
|
||||
nodes = event.args[0]
|
||||
for node in nodes:
|
||||
nodes_added.add(node)
|
||||
elif event.name == self._agent.deactivateNodes:
|
||||
nodes = event.args[0]
|
||||
for node in nodes:
|
||||
nodes_removed.add(node)
|
||||
|
||||
if needs_transaction:
|
||||
logger.debug(
|
||||
'Failover starts transaction to run upcoming event.'
|
||||
)
|
||||
(txn_id, nodes) = self._agent.startTransaction(
|
||||
extra_nodes=list(nodes_added),
|
||||
remove_nodes=list(nodes_removed)
|
||||
)
|
||||
|
||||
# The problem with this is that it's all-or-nothing
|
||||
# It would be preferable to commit what has been done up to the point of failure
|
||||
# and discard the event that failed.
|
||||
# If the problem is with the event itself, then it may keep happening and block all
|
||||
# progress.
|
||||
try:
|
||||
for event in events:
|
||||
#print(f"Running {event}")
|
||||
event.run()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'AgentComm.runner(): got an unrecognised exception.',
|
||||
exc_info=True
|
||||
)
|
||||
if needs_transaction:
|
||||
logger.warning(
|
||||
f'Aborting transaction {txn_id}',
|
||||
exc_info=True
|
||||
)
|
||||
self._agent.rollbackTransaction(txn_id, nodes=nodes)
|
||||
# on failure, requeue the events in this batch to pick them up
|
||||
# again on the next iteration
|
||||
self._requeueEvents(events)
|
||||
else:
|
||||
if needs_transaction:
|
||||
self._agent.commitTransaction(txn_id, nodes = nodes)
|
||||
self._markEventsFinished(events)
|
||||
finishTime = datetime.datetime.now()
|
||||
if nextPollTime > finishTime:
|
||||
time.sleep((nextPollTime - finishTime).seconds)
|
177
cmapi/failover/config.py
Normal file
177
cmapi/failover/config.py
Normal file
@ -0,0 +1,177 @@
|
||||
import configparser
|
||||
import logging
|
||||
import threading
|
||||
from os.path import getmtime
|
||||
|
||||
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, DEFAULT_SM_CONF_PATH
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
|
||||
|
||||
class Config:
|
||||
config_file = ''
|
||||
|
||||
# params read from the config file
|
||||
_desired_nodes = []
|
||||
_active_nodes = []
|
||||
_inactive_nodes = []
|
||||
_primary_node = ''
|
||||
_my_name = None # derived from config file
|
||||
|
||||
config_lock = threading.Lock()
|
||||
last_mtime = 0
|
||||
die = False
|
||||
logger = None
|
||||
|
||||
def __init__(self, config_file=DEFAULT_MCS_CONF_PATH):
|
||||
self.config_file = config_file
|
||||
self.logger = logging.getLogger()
|
||||
|
||||
def getDesiredNodes(self):
|
||||
self.config_lock.acquire()
|
||||
self.check_reload()
|
||||
ret = self._desired_nodes
|
||||
self.config_lock.release()
|
||||
return ret
|
||||
|
||||
def getActiveNodes(self):
|
||||
self.config_lock.acquire()
|
||||
self.check_reload()
|
||||
ret = self._active_nodes
|
||||
self.config_lock.release()
|
||||
return ret
|
||||
|
||||
def getInactiveNodes(self):
|
||||
self.config_lock.acquire()
|
||||
self.check_reload()
|
||||
ret = self._inactive_nodes
|
||||
self.config_lock.release()
|
||||
return ret
|
||||
|
||||
def getAllNodes(self):
|
||||
"""Returns a 3-element tuple describing the status of all nodes.
|
||||
|
||||
index 0 = all nodes in the cluster
|
||||
index 1 = all active nodes
|
||||
index 2 = all inactive nodes
|
||||
"""
|
||||
self.config_lock.acquire()
|
||||
self.check_reload()
|
||||
ret = (self._desired_nodes, self._active_nodes, self._inactive_nodes)
|
||||
self.config_lock.release()
|
||||
return ret
|
||||
|
||||
def getPrimaryNode(self):
|
||||
self.config_lock.acquire()
|
||||
self.check_reload()
|
||||
ret = self._primary_node
|
||||
self.config_lock.release()
|
||||
return ret
|
||||
|
||||
def is_shared_storage(self, sm_config_file=DEFAULT_SM_CONF_PATH):
|
||||
"""Check if SM is S3 or not.
|
||||
|
||||
:param sm_config_file: path to SM config,
|
||||
defaults to DEFAULT_SM_CONF_PATH
|
||||
:type sm_config_file: str, optional
|
||||
:return: True if SM is S3 otherwise False
|
||||
:rtype: bool
|
||||
|
||||
TODO: remove in next releases, useless?
|
||||
"""
|
||||
sm_config = configparser.ConfigParser()
|
||||
sm_config.read(sm_config_file)
|
||||
# only LocalStorage or S3 can be returned for now
|
||||
storage = sm_config.get(
|
||||
'ObjectStorage', 'service', fallback='LocalStorage'
|
||||
)
|
||||
return storage.lower() == 's3'
|
||||
|
||||
def check_reload(self):
|
||||
"""Check config reload.
|
||||
|
||||
Returns True if reload happened, False otherwise.
|
||||
"""
|
||||
if self.last_mtime != getmtime(self.config_file):
|
||||
self.load_config()
|
||||
return True
|
||||
return False
|
||||
|
||||
def who_am_I(self):
|
||||
self.config_lock.acquire()
|
||||
self.check_reload()
|
||||
ret = self._my_name
|
||||
self.config_lock.release()
|
||||
return ret
|
||||
|
||||
def load_config(self):
|
||||
try:
|
||||
node_config = NodeConfig()
|
||||
root = node_config.get_current_config_root(self.config_file)
|
||||
last_mtime = getmtime(self.config_file)
|
||||
except Exception:
|
||||
self.logger.warning(
|
||||
f'Failed to parse config file {self.config_file}.',
|
||||
exc_info=True
|
||||
)
|
||||
return False
|
||||
|
||||
node_tmp = root.findall('./DesiredNodes/Node')
|
||||
if len(node_tmp) == 0:
|
||||
self.logger.warning(
|
||||
f'The config file {self.config_file} is missing entries '
|
||||
'in the DesiredNodes section'
|
||||
)
|
||||
return False
|
||||
|
||||
desired_nodes = [node.text for node in node_tmp]
|
||||
active_nodes = [
|
||||
node.text for node in root.findall('./ActiveNodes/Node')
|
||||
]
|
||||
inactive_nodes = [
|
||||
node.text for node in root.findall('./InactiveNodes/Node')
|
||||
]
|
||||
|
||||
node_tmp = root.find('./PrimaryNode')
|
||||
if node_tmp is None or len(node_tmp.text) == 0:
|
||||
self.logger.warning(
|
||||
f'The config file {self.config_file} is missing a valid '
|
||||
'PrimaryNode entry'
|
||||
)
|
||||
return False
|
||||
primary_node = node_tmp.text
|
||||
|
||||
# find my name in this cluster
|
||||
names = set(node_config.get_network_addresses_and_names())
|
||||
all_nodes = set(desired_nodes)
|
||||
intersection = all_nodes & names
|
||||
if len(intersection) > 1:
|
||||
my_name = intersection.pop()
|
||||
self.logger.warning(
|
||||
'This node has multiple names in the list of desired nodes, '
|
||||
'was it added more than once? Some things may not work in '
|
||||
f'this configuration. Using {my_name} as the name for this '
|
||||
'node.'
|
||||
)
|
||||
elif len(intersection) == 0:
|
||||
self.logger.warning(
|
||||
'This node has no entry in the list of desired nodes.'
|
||||
)
|
||||
my_name = None
|
||||
elif len(intersection) == 1:
|
||||
my_name = intersection.pop()
|
||||
# handles the initial 0-node special case
|
||||
if my_name == '127.0.0.1':
|
||||
my_name = None
|
||||
|
||||
self.logger.info(f'Loaded the config file, my name is {my_name}')
|
||||
|
||||
desired_nodes.sort()
|
||||
active_nodes.sort()
|
||||
inactive_nodes.sort()
|
||||
self._desired_nodes = desired_nodes
|
||||
self._active_nodes = active_nodes
|
||||
self._inactive_nodes = inactive_nodes
|
||||
self._primary_node = primary_node
|
||||
self.last_mtime = last_mtime
|
||||
self._my_name = my_name
|
||||
return True
|
95
cmapi/failover/heartbeat_history.py
Normal file
95
cmapi/failover/heartbeat_history.py
Normal file
@ -0,0 +1,95 @@
|
||||
from array import array
|
||||
from threading import Lock
|
||||
|
||||
# for tracking the history of heartbeat responses
|
||||
|
||||
class InvalidNode:
|
||||
pass
|
||||
|
||||
class HBHistory:
|
||||
# consts to denote state of the responses
|
||||
NoResponse = 1
|
||||
GoodResponse = 2
|
||||
LateResponse = -1
|
||||
NewNode = 0
|
||||
|
||||
# By default, keep a 600 heartbeat history for each node (10 mins @ 1hb/s)
|
||||
# and consider a response late if it arrives 3+ ticks late. 3 is an arbitrary small value.
|
||||
def __init__(self, tickWindow=600, lateWindow=3):
|
||||
# a list of a heartbeats for each node. index = str, value = array of int,
|
||||
# history flushes each time threaad restarted
|
||||
self.nodeHistory = {}
|
||||
# current tick resets to zero each time thread restarted
|
||||
self.currentTick = 0
|
||||
self.lateWindow = lateWindow
|
||||
self.mutex = Lock()
|
||||
self.tickWindow = tickWindow
|
||||
|
||||
def _initNode(self, node, defaultValue = GoodResponse):
|
||||
self.nodeHistory[node] = array(
|
||||
'b', [ defaultValue for _ in range(self.tickWindow) ]
|
||||
)
|
||||
|
||||
def removeNode(self, node):
|
||||
self.mutex.acquire()
|
||||
if node in self.nodeHistory:
|
||||
del self.nodeHistory[node]
|
||||
self.mutex.release()
|
||||
|
||||
def keepOnlyTheseNodes(self, nodes):
|
||||
self.mutex.acquire()
|
||||
nodesToKeep = set(nodes)
|
||||
historicalNodes = set(self.nodeHistory.keys())
|
||||
for node in historicalNodes:
|
||||
if node not in nodesToKeep:
|
||||
del self.nodeHistory[node]
|
||||
self.mutex.release()
|
||||
|
||||
def setCurrentTick(self, tick):
|
||||
self.mutex.acquire()
|
||||
|
||||
self.currentTick = tick
|
||||
for pongs in self.nodeHistory.values():
|
||||
pongs[tick % self.tickWindow] = self.NoResponse
|
||||
|
||||
self.mutex.release()
|
||||
|
||||
def gotHeartbeat(self, node, tickID):
|
||||
if tickID <= self.currentTick - self.lateWindow:
|
||||
status = self.LateResponse
|
||||
else:
|
||||
status = self.GoodResponse
|
||||
|
||||
self.mutex.acquire()
|
||||
if node not in self.nodeHistory:
|
||||
self._initNode(node)
|
||||
self.nodeHistory[node][tickID % self.tickWindow] = status
|
||||
self.mutex.release()
|
||||
|
||||
# defaultValue is used to init a fake history for a node this code is learning about
|
||||
# 'now'. If a node is inserted into the active list, we do not want to remove
|
||||
# it right away b/c it hasn't responded to any pings yet. Likewise,
|
||||
# if a node is inserted into the inactive list, we do not want to activate it
|
||||
# right away b/c it has responded to all pings sent so far (0). TBD if we want
|
||||
# to add logic to handle an 'init' value in the history.
|
||||
def getNodeHistory(self, node, tickInterval, defaultValue = GoodResponse):
|
||||
self.mutex.acquire()
|
||||
if node not in self.nodeHistory:
|
||||
self._initNode(node, defaultValue = defaultValue)
|
||||
|
||||
# We don't want to return values in the range where we are likely to be
|
||||
# gathering responses.
|
||||
# The return value is the range of heartbeat responses from node from
|
||||
# tickInterval + lateWindow ticks ago to lateWindow ticks ago
|
||||
|
||||
lastIndex = (self.currentTick - self.lateWindow) % self.tickWindow
|
||||
firstIndex = lastIndex - tickInterval
|
||||
history = self.nodeHistory[node]
|
||||
if firstIndex < 0:
|
||||
ret = history[firstIndex:]
|
||||
ret.extend(history[:lastIndex])
|
||||
else:
|
||||
ret = history[firstIndex:lastIndex]
|
||||
|
||||
self.mutex.release()
|
||||
return ret
|
121
cmapi/failover/heartbeater.py
Normal file
121
cmapi/failover/heartbeater.py
Normal file
@ -0,0 +1,121 @@
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from socket import socket, SOCK_DGRAM
|
||||
from struct import pack, unpack_from
|
||||
|
||||
|
||||
class HeartBeater:
|
||||
port = 9051
|
||||
dieMsg = b'die!00'
|
||||
areYouThereMsg = b'AYTM'
|
||||
yesIAmMsg = b'YIAM'
|
||||
|
||||
def __init__(self, config, history):
|
||||
self.config = config
|
||||
self.die = False
|
||||
self.history = history
|
||||
self.sequenceNum = 0
|
||||
self.responseThread = None
|
||||
self.sock = None
|
||||
self.sockMutex = threading.Lock()
|
||||
self.logger = logging.getLogger('heartbeater')
|
||||
|
||||
def start(self):
|
||||
self.initSockets()
|
||||
self.die = False
|
||||
self.responseThread = threading.Thread(
|
||||
target=self.listenAndRespond, name='HeartBeater'
|
||||
)
|
||||
self.responseThread.start()
|
||||
|
||||
def stop(self):
|
||||
self.die = True
|
||||
# break out of the recv loop
|
||||
sock = socket(type=SOCK_DGRAM)
|
||||
sock.sendto(self.dieMsg, ('localhost', self.port))
|
||||
time.sleep(1)
|
||||
self.sock.close()
|
||||
self.responseThread.join()
|
||||
|
||||
def initSockets(self):
|
||||
self.sock = socket(type=SOCK_DGRAM)
|
||||
self.sock.bind(('0.0.0.0', self.port))
|
||||
|
||||
def listenAndRespond(self):
|
||||
self.logger.info('Starting the heartbeat listener.')
|
||||
while not self.die:
|
||||
try:
|
||||
self._listenAndRespond()
|
||||
except Exception:
|
||||
self.logger.warning(
|
||||
'Caught an exception while listening and responding.',
|
||||
exc_info=True
|
||||
)
|
||||
time.sleep(1)
|
||||
self.logger.info('Heartbeat listener exiting normally...')
|
||||
|
||||
def _listenAndRespond(self):
|
||||
(data, remote) = self.sock.recvfrom(300)
|
||||
if len(data) < 6:
|
||||
return
|
||||
(msg_type, seq) = unpack_from('4sH', data, 0)
|
||||
if msg_type == self.areYouThereMsg:
|
||||
self.logger.trace(f'Got "are you there?" from {remote[0]}')
|
||||
name = self.config.who_am_I()
|
||||
if name is None:
|
||||
self.logger.warning(
|
||||
'Heartbeater: got an "are you there?" msg from '
|
||||
f'{remote[0]}, but this node is not in the list of '
|
||||
'desired nodes for the cluster. '
|
||||
'This node needs a config update.'
|
||||
)
|
||||
return
|
||||
bname = name.encode('ascii')
|
||||
if len(bname) > 255:
|
||||
bname = bname[:255]
|
||||
msg = pack(f'4sH{len(bname)}s', self.yesIAmMsg, seq, bname)
|
||||
self.send(msg, remote[0])
|
||||
self.logger.trace(f'Send "yes I Am" to {remote[0]}')
|
||||
elif msg_type == self.yesIAmMsg:
|
||||
if len(data) > 6:
|
||||
name = data[6:].decode('ascii')
|
||||
self.logger.trace(f'Got "yes I am" from {name}')
|
||||
self.history.gotHeartbeat(name, seq)
|
||||
|
||||
def send(self, msg, destaddr):
|
||||
self.sockMutex.acquire()
|
||||
try:
|
||||
self.sock.sendto(msg, (destaddr, self.port))
|
||||
except Exception:
|
||||
self.logger.warning(
|
||||
f'Heartbeater.send(): caught error sending msg to {destaddr}',
|
||||
exc_info=True
|
||||
)
|
||||
finally:
|
||||
self.sockMutex.release()
|
||||
|
||||
def sendHeartbeats(self):
|
||||
nodes = self.config.getDesiredNodes()
|
||||
my_name = self.config.who_am_I()
|
||||
msg = pack('4sH', self.areYouThereMsg, self.sequenceNum)
|
||||
self.sockMutex.acquire()
|
||||
for node in nodes:
|
||||
if node == my_name:
|
||||
continue
|
||||
try:
|
||||
self.logger.trace(f'Send "are you there" node {node}')
|
||||
self.sock.sendto(msg, (node, self.port))
|
||||
except Exception as e:
|
||||
pass
|
||||
# Suppressing these logs.
|
||||
# In docker the whole dns entry gets removed when a container
|
||||
# goes away.
|
||||
# Ends up spamming the logs until the node is removed from
|
||||
# the cluster via the rest endpoint, or the node comes back up.
|
||||
# self.logger.warning("Heartbeater.sendHeartbeats():
|
||||
# caught an exception sending heartbeat to {}: {}".
|
||||
# format(node, e))
|
||||
self.sockMutex.release()
|
||||
self.sequenceNum = (self.sequenceNum + 1) % 65535
|
||||
self.history.setCurrentTick(self.sequenceNum)
|
230
cmapi/failover/node_monitor.py
Normal file
230
cmapi/failover/node_monitor.py
Normal file
@ -0,0 +1,230 @@
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
|
||||
from .heartbeater import HeartBeater
|
||||
from .config import Config
|
||||
from .heartbeat_history import HBHistory
|
||||
from .agent_comm import AgentComm
|
||||
|
||||
|
||||
class NodeMonitor:
|
||||
|
||||
def __init__(
|
||||
self, agent=None, config=None, samplingInterval=30,
|
||||
flakyNodeThreshold=0.5
|
||||
):
|
||||
self._agentComm = AgentComm(agent)
|
||||
self._die = False
|
||||
self._inStandby = False
|
||||
self._testMode = False # TODO: remove
|
||||
self._hbHistory = HBHistory()
|
||||
self._logger = logging.getLogger('node_monitor')
|
||||
self._runner = None
|
||||
if config is not None:
|
||||
self._config = config
|
||||
else:
|
||||
self._config = Config()
|
||||
self._hb = HeartBeater(self._config, self._hbHistory)
|
||||
self.samplingInterval = samplingInterval
|
||||
# not used yet, KI-V-SS for V1 [old comment from Patrick]
|
||||
self.flakyNodeThreshold = flakyNodeThreshold
|
||||
self.myName = self._config.who_am_I()
|
||||
#self._logger.info("Using {} as my name".format(self.myName))
|
||||
|
||||
def __del__(self):
|
||||
self.stop()
|
||||
|
||||
def start(self):
|
||||
self._agentComm.start()
|
||||
self._hb.start()
|
||||
self._die = False
|
||||
self._runner = threading.Thread(
|
||||
target=self.monitor, name='NodeMonitor'
|
||||
)
|
||||
self._runner.start()
|
||||
|
||||
def stop(self):
|
||||
self._die = True
|
||||
self._agentComm.die()
|
||||
if not self._testMode:
|
||||
self._hb.stop()
|
||||
self._runner.join()
|
||||
|
||||
def _removeRemovedNodes(self, desiredNodes):
|
||||
self._hbHistory.keepOnlyTheseNodes(desiredNodes)
|
||||
|
||||
def _pickNewActor(self, nodes):
|
||||
if not nodes:
|
||||
return
|
||||
if self.myName == nodes[0]:
|
||||
self._isActorOfCohort = True
|
||||
else:
|
||||
self._isActorOfCohort = False
|
||||
|
||||
def _chooseNewPrimaryNode(self):
|
||||
self._agentComm.movePrimaryNode()
|
||||
|
||||
def monitor(self):
|
||||
while not self._die:
|
||||
try:
|
||||
self._logger.info('Starting the monitor logic')
|
||||
self._monitor()
|
||||
except Exception:
|
||||
self._logger.error(
|
||||
f'monitor() caught an exception.',
|
||||
exc_info=True
|
||||
)
|
||||
if not self._die:
|
||||
time.sleep(1)
|
||||
self._logger.info("node monitor logic exiting normally...")
|
||||
|
||||
def _monitor(self):
|
||||
"""
|
||||
This works like the main loop of a game.
|
||||
1) check current state
|
||||
2) identify the differences
|
||||
3) update based on the differences
|
||||
"""
|
||||
|
||||
(desiredNodes, activeNodes, inactiveNodes) = self._config.getAllNodes()
|
||||
self._pickNewActor(activeNodes)
|
||||
|
||||
logged_idleness_msg = False
|
||||
logged_active_msg = False
|
||||
inStandbyMode = False
|
||||
while not self._die:
|
||||
# these things would normally go at the end of the loop; doing it here
|
||||
# to reduce line count & chance of missing something as we add more code
|
||||
oldActiveNodes = activeNodes
|
||||
wasActorOfCohort = self._isActorOfCohort
|
||||
self._logger.trace(
|
||||
f'Previous actor of cohort state is {wasActorOfCohort}'
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
# get config updates
|
||||
(desiredNodes, activeNodes, inactiveNodes) = self._config.getAllNodes()
|
||||
self.myName = self._config.who_am_I()
|
||||
self.primaryNode = self._config.getPrimaryNode()
|
||||
|
||||
# remove nodes from history that have been removed from the cluster
|
||||
self._removeRemovedNodes(desiredNodes)
|
||||
|
||||
# if there are less than 3 nodes in the cluster, do nothing
|
||||
if len(desiredNodes) < 3:
|
||||
if not logged_idleness_msg:
|
||||
self._logger.info(
|
||||
'Failover support is inactive; '
|
||||
'requires at least 3 nodes and a shared storage system'
|
||||
)
|
||||
logged_idleness_msg = True
|
||||
logged_active_msg = False
|
||||
elif not logged_active_msg:
|
||||
self._logger.info(
|
||||
'Failover support is active, '
|
||||
f'monitoring nodes {desiredNodes}'
|
||||
)
|
||||
logged_active_msg = True
|
||||
logged_idleness_msg = False
|
||||
|
||||
# nothing to do in this case
|
||||
if len(desiredNodes) == 1:
|
||||
continue
|
||||
|
||||
# has this node been reactivated?
|
||||
if self.myName in activeNodes:
|
||||
#TODO: remove useless flag or use it in future releases
|
||||
self._inStandby = False
|
||||
# has it been deactivated?
|
||||
else:
|
||||
self._logger.trace('Node not in active nodes, do nothing.')
|
||||
self._inStandby = True
|
||||
continue # wait to be activated
|
||||
|
||||
# send heartbeats
|
||||
self._hb.sendHeartbeats()
|
||||
|
||||
# decide if action is necessary based on config changes
|
||||
|
||||
# get the list of nodes no longer responding to heartbeats
|
||||
# V1: only remove a node that hasn't responded to any pings in the sampling period
|
||||
deactivateSet = set()
|
||||
for node in activeNodes:
|
||||
if node == self.myName:
|
||||
continue
|
||||
history = self._hbHistory.getNodeHistory(node, self.samplingInterval, HBHistory.GoodResponse)
|
||||
self._logger.trace(f'Get history "{history}" for node {node}')
|
||||
noResponses = [ x for x in history if x == HBHistory.NoResponse ]
|
||||
if len(noResponses) == self.samplingInterval:
|
||||
deactivateSet.add(node)
|
||||
|
||||
# get the list of nodes that have started responding
|
||||
# reactivate live nodes that have begun responding to heartbeats
|
||||
# V1: only reactivate a node if we have good responses for the whole sampling period
|
||||
activateSet = set()
|
||||
for node in inactiveNodes:
|
||||
history = self._hbHistory.getNodeHistory(node, self.samplingInterval, HBHistory.NoResponse)
|
||||
goodResponses = [ x for x in history if x == HBHistory.GoodResponse ]
|
||||
if len(goodResponses) == self.samplingInterval:
|
||||
activateSet.add(node)
|
||||
|
||||
# effectiveActiveNodeList can be described as activeNodes after pending config changes
|
||||
# have been applied. Another way to view it is that it reflects current reality, whereas
|
||||
# the config file reflects a fixed point in the recent past.
|
||||
effectiveActiveNodeList = sorted((set(activeNodes) - deactivateSet) | activateSet)
|
||||
|
||||
# if there was a change to the list of active nodes
|
||||
# decide if this node is the effective actor in the cohort.
|
||||
if effectiveActiveNodeList != activeNodes:
|
||||
self._pickNewActor(effectiveActiveNodeList)
|
||||
self._logger.trace(
|
||||
f'Effective list changed, actor state is {self._isActorOfCohort}'
|
||||
)
|
||||
elif oldActiveNodes != activeNodes:
|
||||
self._pickNewActor(activeNodes)
|
||||
self._logger.trace(
|
||||
f'Active list changed, actor state is {self._isActorOfCohort}'
|
||||
)
|
||||
|
||||
|
||||
# if we are in a cohort that has <= 50% of the desired nodes, enter standby
|
||||
if len(activeNodes)/len(desiredNodes) <= 0.5 and len(effectiveActiveNodeList)/len(desiredNodes) <= 0.5:
|
||||
if not inStandbyMode:
|
||||
msg = "Only {} out of {} nodes are active. At least {} are required. Entering standby mode to protect the system."\
|
||||
.format(len(activeNodes), len(desiredNodes), int(len(desiredNodes)/2) + 1)
|
||||
self._agentComm.raiseAlarm(msg)
|
||||
self._logger.critical(msg)
|
||||
self._agentComm.enterStandbyMode()
|
||||
inStandbyMode = True
|
||||
continue
|
||||
elif inStandbyMode and len(effectiveActiveNodeList)/len(desiredNodes) > 0.5:
|
||||
self._logger.info("Exiting standby mode, waiting for config update")
|
||||
inStandbyMode = False
|
||||
|
||||
# (wasActorOfCohort and not isActorOfCohort) indicates that a new Actor has come online.
|
||||
# To hand over the crown, perform one last act as Actor to add it back to the cluster
|
||||
# and synchronize its config file.
|
||||
|
||||
# if not the actor, nothing else for this node to do
|
||||
if not self._isActorOfCohort and not wasActorOfCohort:
|
||||
continue
|
||||
|
||||
# as of here, this node is the actor of its quorum
|
||||
|
||||
if len(deactivateSet) > 0:
|
||||
self._agentComm.deactivateNodes(list(deactivateSet))
|
||||
|
||||
if len(activateSet) > 0:
|
||||
self._agentComm.activateNodes(activateSet)
|
||||
|
||||
# if the primary node is in this list to be deactivated, or its already on the inactive list
|
||||
# choose a new primary node. The deadNode list is a sanity check for cases like the cluster
|
||||
# starting with the primary node already in inactive-nodes.
|
||||
deadNodeList = list(deactivateSet) + inactiveNodes
|
||||
if self.primaryNode in deadNodeList:
|
||||
self._chooseNewPrimaryNode()
|
||||
|
||||
# methods for testing
|
||||
def turnOffHBResponder(self):
|
||||
self.stop()
|
18
cmapi/failover/test/config-test.xml
Normal file
18
cmapi/failover/test/config-test.xml
Normal file
@ -0,0 +1,18 @@
|
||||
<root>
|
||||
<DesiredNodes>
|
||||
<Node>node1</Node>
|
||||
<Node>node2</Node>
|
||||
<Node>node3</Node>
|
||||
<Node>node4</Node>
|
||||
</DesiredNodes>
|
||||
<ActiveNodes>
|
||||
<Node>node1</Node>
|
||||
<Node>node2</Node>
|
||||
<Node>node3</Node>
|
||||
</ActiveNodes>
|
||||
<InactiveNodes>
|
||||
<Node>node4</Node>
|
||||
</InactiveNodes>
|
||||
<PrimaryNode>node2</PrimaryNode>
|
||||
<ConfigRevision>1</ConfigRevision>
|
||||
</root>
|
129
cmapi/failover/test/test_agent_comm.py
Normal file
129
cmapi/failover/test/test_agent_comm.py
Normal file
@ -0,0 +1,129 @@
|
||||
import unittest
|
||||
import time
|
||||
import socket
|
||||
import datetime
|
||||
import cherrypy
|
||||
import os.path
|
||||
from contextlib import contextmanager
|
||||
from ..agent_comm import AgentComm
|
||||
from cmapi_server.failover_agent import FailoverAgent
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error
|
||||
from cmapi_server.test.unittest_global import create_self_signed_certificate, cert_filename
|
||||
from cmapi_server import helpers, node_manipulation
|
||||
|
||||
config_filename = './cmapi_server/cmapi_server.conf'
|
||||
|
||||
@contextmanager
|
||||
def start_server():
|
||||
if not os.path.exists(cert_filename):
|
||||
create_self_signed_certificate()
|
||||
|
||||
app = cherrypy.tree.mount(root = None, config = config_filename)
|
||||
app.config.update({
|
||||
'/': {
|
||||
'request.dispatch': dispatcher,
|
||||
'error_page.default': jsonify_error,
|
||||
},
|
||||
'config': {
|
||||
'path': config_filename,
|
||||
},
|
||||
})
|
||||
cherrypy.config.update(config_filename)
|
||||
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.wait(cherrypy.engine.states.STARTED)
|
||||
yield
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
|
||||
|
||||
class TestAgentComm(unittest.TestCase):
|
||||
|
||||
def test_with_agent_base(self):
|
||||
agent = AgentComm()
|
||||
# Add events except for enterStandbyMode
|
||||
agent.activateNodes(["mysql.com"])
|
||||
agent.activateNodes(["mysql.com"]) # an intentional dup
|
||||
agent.designatePrimaryNode("mysql.com")
|
||||
agent.deactivateNodes(["mysql.com"])
|
||||
agent.deactivateNodes(["mysql.com"])
|
||||
agent.designatePrimaryNode(socket.gethostname())
|
||||
|
||||
health = agent.getNodeHealth()
|
||||
agent.raiseAlarm("Hello world!")
|
||||
print("Waiting up to 20s for queued events to be processed and removed")
|
||||
stop_time = datetime.datetime.now() + datetime.timedelta(seconds = 20)
|
||||
success = False
|
||||
while datetime.datetime.now() < stop_time and not success:
|
||||
sizes = agent.getQueueSize()
|
||||
if sizes != (0, 0):
|
||||
time.sleep(1)
|
||||
else:
|
||||
print("Event queue & deduper are now empty")
|
||||
success = True
|
||||
|
||||
print("Waiting for the agent comm thread to die.")
|
||||
agent.die()
|
||||
self.assertTrue(success)
|
||||
|
||||
|
||||
# This is the beginnings of an integration test, will need perms to modify the real config file
|
||||
def test_with_failover_agent(self):
|
||||
|
||||
print("\n\n") # make a little whitespace between tests
|
||||
|
||||
# check for existence of and permissions to write to the real config file
|
||||
try:
|
||||
f = open("/etc/columnstore/Columnstore.xml", "a")
|
||||
f.close()
|
||||
except PermissionError:
|
||||
print(f"Skipping {__name__}, got a permissions error opening /etc/columnstore/Columnstore.xml for writing")
|
||||
return
|
||||
|
||||
success = False
|
||||
with start_server():
|
||||
try:
|
||||
agent = FailoverAgent()
|
||||
agentcomm = AgentComm(agent)
|
||||
|
||||
# make sure the AC thread has a chance to start before we start issuing cmds.
|
||||
# If it grabs jobs in the middle of this block, we'll try to send the config file
|
||||
# to mysql.com. :D
|
||||
time.sleep(1)
|
||||
|
||||
# do the same as above.
|
||||
agentcomm.activateNodes(["mysql.com"])
|
||||
agentcomm.activateNodes(["mysql.com"]) # an intentional dup
|
||||
agentcomm.designatePrimaryNode("mysql.com")
|
||||
agentcomm.deactivateNodes(["mysql.com"])
|
||||
agentcomm.deactivateNodes(["mysql.com"])
|
||||
agentcomm.designatePrimaryNode(socket.gethostname())
|
||||
|
||||
health = agent.getNodeHealth()
|
||||
agent.raiseAlarm("Hello world!")
|
||||
print("Waiting up to 30s for queued events to be processed and removed")
|
||||
stop_time = datetime.datetime.now() + datetime.timedelta(seconds = 30)
|
||||
|
||||
while datetime.datetime.now() < stop_time and not success:
|
||||
sizes = agentcomm.getQueueSize()
|
||||
if sizes != (0, 0):
|
||||
time.sleep(1)
|
||||
else:
|
||||
print("Event queue & deduper are now empty")
|
||||
success = True
|
||||
if not success:
|
||||
raise Exception("The event queue or de-duper did not empty within 30s")
|
||||
agentcomm.die()
|
||||
except Exception as e:
|
||||
agentcomm.die()
|
||||
cherrypy.engine.exit()
|
||||
cherrypy.engine.block()
|
||||
raise
|
||||
|
||||
# clean up the config file, remove mysql.com
|
||||
txnid = helpers.start_transaction()
|
||||
node_manipulation.remove_node("mysql.com")
|
||||
helpers.update_revision_and_manager()
|
||||
helpers.broadcast_new_config()
|
||||
helpers.commit_transaction(txnid)
|
83
cmapi/failover/test/tester.py
Normal file
83
cmapi/failover/test/tester.py
Normal file
@ -0,0 +1,83 @@
|
||||
from .. import config
|
||||
import time
|
||||
from socket import *
|
||||
import struct
|
||||
import sys
|
||||
|
||||
_config = config.Config("failover/test/config-test.xml")
|
||||
print("got desired_nodes = {}".format(_config.getDesiredNodes()))
|
||||
print("got active_nodes = {}".format(_config.getActiveNodes()))
|
||||
print("got inacive_nodes = {}".format(_config.getInactiveNodes()))
|
||||
print("got all nodes = {}".format(_config.getAllNodes()))
|
||||
print("got primarynode = {}".format(_config.getPrimaryNode()))
|
||||
print()
|
||||
|
||||
from ..heartbeater import HeartBeater
|
||||
from ..heartbeat_history import HBHistory
|
||||
|
||||
hbh = HBHistory()
|
||||
hb = HeartBeater(_config, hbh)
|
||||
hb.start()
|
||||
sock = socket(type = SOCK_DGRAM)
|
||||
sock.bind(('localhost', 12345))
|
||||
|
||||
# Updated heartbeater to send the reply to its own port, rather than to
|
||||
# the port of the sending socket. Need to update this.
|
||||
#msg = struct.pack("4sH", hb.areYouThereMsg, 1234)
|
||||
#sock.sendto(msg, ('localhost', hb.port))
|
||||
#print("sent the are-you-there msg")
|
||||
#(data, remote) = sock.recvfrom(6)
|
||||
#(data, seq) = struct.unpack("4sH", data)
|
||||
#if data == hb.yesIAmMsg:
|
||||
# print("got the yes-i-am msg, seq = {}".format(seq))
|
||||
#else:
|
||||
# print("got something other than the yes-i-am-msg")
|
||||
|
||||
hb.stop()
|
||||
|
||||
#from heartbeat_history import HBHistory
|
||||
|
||||
#hbh = HBHistory()
|
||||
hbh.setCurrentTick(0)
|
||||
hbh.gotHeartbeat("node1", 0)
|
||||
hbh.setCurrentTick(1)
|
||||
hbh.gotHeartbeat("node2", 1)
|
||||
hbh.setCurrentTick(2)
|
||||
hbh.setCurrentTick(10)
|
||||
hbh.gotHeartbeat("node1", 9)
|
||||
hbh.gotHeartbeat("node1", 2)
|
||||
pongs = hbh.getNodeHistory("node1", 20)
|
||||
print("Got pongs: {}".format(pongs))
|
||||
|
||||
print('''
|
||||
This is currently a 'manual' test, meaning the user should watch for the expected output
|
||||
In this case, because NM's identity checker will return 'node1', and that does not match
|
||||
node[2-4], those nodes will appear to NodeMonitor to be offline. Our starting condition
|
||||
is that nodes 1-3 are active, and node4 is inactive. After 15s, nodes 2 & 3
|
||||
should be deactivated, a new primary node will be chosen, and and our AgentBase will start
|
||||
printing these events.
|
||||
''')
|
||||
def testNodeMonitor1(nm):
|
||||
nm.start()
|
||||
print("Waiting for 20 secs, watch for output from AgentBase")
|
||||
time.sleep(20)
|
||||
nm.stop()
|
||||
time.sleep(1)
|
||||
print("NodeMonitor was stopped, did it produce the right output?")
|
||||
|
||||
from ..node_monitor import NodeMonitor
|
||||
nm = NodeMonitor(config = _config, samplingInterval = 10)
|
||||
# check whether node[1-4] are in the /etc/hosts file as localhost
|
||||
addr1 = gethostbyname("node1")
|
||||
addr2 = gethostbyname("node2")
|
||||
addr3 = gethostbyname("node3")
|
||||
addr4 = gethostbyname("node4")
|
||||
if addr1 == '127.0.0.1' and addr2 == '127.0.0.1' and addr3 == '127.0.0.1' and addr4 == '127.0.0.1':
|
||||
testNodeMonitor1(nm)
|
||||
else:
|
||||
print("Skipping testNodeMonitor1(). node[1-4] needs to be defined as 127.0.0.1 in /etc/hosts")
|
||||
|
||||
|
||||
|
||||
print("tester is finished")
|
||||
|
1
cmapi/mcs.template
Executable file
1
cmapi/mcs.template
Executable file
@ -0,0 +1 @@
|
||||
PYTHONPATH="${CMAPI_DIR}:${CMAPI_DIR}/deps" ${CMAPI_DIR}/python/bin/python3 -m mcs_cluster_tool $@
|
0
cmapi/mcs_cluster_tool/__init__.py
Normal file
0
cmapi/mcs_cluster_tool/__init__.py
Normal file
30
cmapi/mcs_cluster_tool/__main__.py
Normal file
30
cmapi/mcs_cluster_tool/__main__.py
Normal file
@ -0,0 +1,30 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import typer
|
||||
|
||||
from cmapi_server.logging_management import dict_config, add_logging_level
|
||||
from mcs_cluster_tool import cluster_app
|
||||
from mcs_cluster_tool.constants import MCS_CLI_LOG_CONF_PATH
|
||||
|
||||
|
||||
# don't show --install-completion and --show-completion options in help message
|
||||
app = typer.Typer(
|
||||
add_completion=False,
|
||||
help=(
|
||||
'The MCS Command Line Interface is a unified tool to manage your '
|
||||
'MCS services'
|
||||
),
|
||||
)
|
||||
app.add_typer(cluster_app.app, name="cluster")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
add_logging_level('TRACE', 5) #TODO: remove when stadalone mode added.
|
||||
dict_config(MCS_CLI_LOG_CONF_PATH)
|
||||
logger = logging.getLogger('mcs_cli')
|
||||
# add separator between cli commands logging
|
||||
logger.debug(f'{"-":-^80}')
|
||||
cl_args_line = ' '.join(sys.argv[1:])
|
||||
logger.debug(f'Called "mcs {cl_args_line}"')
|
||||
app(prog_name='mcs')
|
140
cmapi/mcs_cluster_tool/cluster_app.py
Normal file
140
cmapi/mcs_cluster_tool/cluster_app.py
Normal file
@ -0,0 +1,140 @@
|
||||
"""Cluster typer application.
|
||||
|
||||
Formally this module contains all subcommands for "mcs cluster" cli command.
|
||||
"""
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
import pyotp
|
||||
import typer
|
||||
|
||||
from cmapi_server.constants import SECRET_KEY
|
||||
from cmapi_server.handlers.cluster import ClusterHandler
|
||||
from mcs_cluster_tool.decorators import handle_output
|
||||
|
||||
|
||||
logger = logging.getLogger('mcs_cli')
|
||||
app = typer.Typer(
|
||||
help='MariaDB Columnstore cluster management command line tool.'
|
||||
)
|
||||
node_app = typer.Typer(help='Cluster nodes management.')
|
||||
app.add_typer(node_app, name='node')
|
||||
set_app = typer.Typer(help='Set cluster parameters.')
|
||||
app.add_typer(set_app, name='set')
|
||||
|
||||
|
||||
@app.command()
|
||||
@handle_output
|
||||
def status():
|
||||
"""Get status information."""
|
||||
return ClusterHandler.status(logger=logger)
|
||||
|
||||
|
||||
@app.command()
|
||||
@handle_output
|
||||
def stop():
|
||||
"""Stop the Columnstore cluster."""
|
||||
return ClusterHandler.shutdown(logger=logger)
|
||||
|
||||
|
||||
@app.command()
|
||||
@handle_output
|
||||
def start():
|
||||
"""Start the Columnstore cluster."""
|
||||
return ClusterHandler.start(logger=logger)
|
||||
|
||||
|
||||
@app.command()
|
||||
@handle_output
|
||||
def restart():
|
||||
"""Restart the Columnstore cluster."""
|
||||
stop_result = ClusterHandler.shutdown(logger=logger)
|
||||
if 'error' in stop_result:
|
||||
return stop_result
|
||||
result = ClusterHandler.start(logger=logger)
|
||||
result['stop_timestamp'] = stop_result['timestamp']
|
||||
return result
|
||||
|
||||
|
||||
@node_app.command()
|
||||
@handle_output
|
||||
def add(
|
||||
nodes: Optional[List[str]] = typer.Option(
|
||||
...,
|
||||
'--node', # command line argument name
|
||||
help=(
|
||||
'node IP, name or FQDN. '
|
||||
'Can be used multiple times to add several nodes at a time.'
|
||||
)
|
||||
)
|
||||
):
|
||||
"""Add nodes to the Columnstore cluster."""
|
||||
result = []
|
||||
for node in nodes:
|
||||
result.append(ClusterHandler.add_node(node, logger=logger))
|
||||
return result
|
||||
|
||||
|
||||
@node_app.command()
|
||||
@handle_output
|
||||
def remove(nodes: Optional[List[str]] = typer.Option(
|
||||
...,
|
||||
'--node', # command line argument name
|
||||
help=(
|
||||
'node IP, name or FQDN. '
|
||||
'Can be used multiple times to remove several nodes at a time.'
|
||||
)
|
||||
)
|
||||
):
|
||||
"""Remove nodes from the Columnstore cluster."""
|
||||
result = []
|
||||
for node in nodes:
|
||||
result.append(ClusterHandler.remove_node(node, logger=logger))
|
||||
return result
|
||||
|
||||
|
||||
@set_app.command()
|
||||
@handle_output
|
||||
def mode(cluster_mode: str = typer.Option(
|
||||
...,
|
||||
'--mode',
|
||||
help=(
|
||||
'cluster mode to set. '
|
||||
'"readonly" or "readwrite" are the only acceptable values.'
|
||||
)
|
||||
)
|
||||
):
|
||||
"""Set Columnstore cluster mode."""
|
||||
if cluster_mode not in ('readonly', 'readwrite'):
|
||||
raise typer.BadParameter(
|
||||
'"readonly" or "readwrite" are the only acceptable modes now.'
|
||||
)
|
||||
return ClusterHandler.set_mode(cluster_mode, logger=logger)
|
||||
|
||||
|
||||
@set_app.command()
|
||||
@handle_output
|
||||
def api_key(key: str = typer.Option(..., help='API key to set.')):
|
||||
"""Set API key for communication with cluster nodes via API.
|
||||
|
||||
WARNING: this command will affect API key value on all cluster nodes.
|
||||
"""
|
||||
if not key:
|
||||
raise typer.BadParameter('Empty API key not allowed.')
|
||||
|
||||
totp = pyotp.TOTP(SECRET_KEY)
|
||||
|
||||
return ClusterHandler.set_api_key(key, totp.now(), logger=logger)
|
||||
|
||||
|
||||
@set_app.command()
|
||||
@handle_output
|
||||
def log_level(level: str = typer.Option(..., help='Logging level to set.')):
|
||||
"""Set logging level on all cluster nodes for develop purposes.
|
||||
|
||||
WARNING: this could dramatically affect the number of log lines.
|
||||
"""
|
||||
if not level:
|
||||
raise typer.BadParameter('Empty log level not allowed.')
|
||||
|
||||
return ClusterHandler.set_log_level(level, logger=logger)
|
4
cmapi/mcs_cluster_tool/constants.py
Normal file
4
cmapi/mcs_cluster_tool/constants.py
Normal file
@ -0,0 +1,4 @@
|
||||
import os
|
||||
|
||||
MCS_CLI_ROOT_PATH = os.path.dirname(__file__)
|
||||
MCS_CLI_LOG_CONF_PATH = os.path.join(MCS_CLI_ROOT_PATH, 'mcs_cli_log.conf')
|
35
cmapi/mcs_cluster_tool/decorators.py
Normal file
35
cmapi/mcs_cluster_tool/decorators.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""Module contains decorators for typer cli commands."""
|
||||
import json
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
import typer
|
||||
|
||||
from cmapi_server.exceptions import CMAPIBasicError
|
||||
|
||||
|
||||
def handle_output(func):
|
||||
"""Decorator for handling output errors and add result to log file."""
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
logger = logging.getLogger('mcs_cli')
|
||||
return_code = 1
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
logger.debug(f'Command returned: {result}')
|
||||
return_code = 0
|
||||
except CMAPIBasicError as err:
|
||||
typer.echo(err.message, err=True)
|
||||
logger.error('Error while command execution', exc_info=True)
|
||||
except typer.BadParameter as err:
|
||||
logger.error('Bad command line parameter.')
|
||||
raise err
|
||||
except Exception:
|
||||
logger.error(
|
||||
'Undefined error while command execution',
|
||||
exc_info=True
|
||||
)
|
||||
typer.echo('Unknown error, check the log file.', err=True)
|
||||
raise typer.Exit(return_code)
|
||||
return wrapper
|
31
cmapi/mcs_cluster_tool/mcs_cli_log.conf
Normal file
31
cmapi/mcs_cluster_tool/mcs_cli_log.conf
Normal file
@ -0,0 +1,31 @@
|
||||
{
|
||||
"version": 1,
|
||||
"disable_existing_loggers": true,
|
||||
"formatters": {
|
||||
"default": {
|
||||
"format": "%(asctime)s [%(levelname)s] (%(name)s) %(message)s",
|
||||
"datefmt": "%d/%b/%Y %H:%M:%S"
|
||||
}
|
||||
},
|
||||
"handlers": {
|
||||
"file": {
|
||||
"class" : "logging.handlers.RotatingFileHandler",
|
||||
"formatter": "default",
|
||||
"filename": "/var/log/mariadb/columnstore/mcs_cli.log",
|
||||
"mode": "a",
|
||||
"maxBytes": 1048576,
|
||||
"backupCount": 10
|
||||
}
|
||||
},
|
||||
"loggers": {
|
||||
"": {
|
||||
"level": "DEBUG",
|
||||
"handlers": ["file"]
|
||||
},
|
||||
"mcs_cli": {
|
||||
"level": "DEBUG",
|
||||
"handlers": ["file"],
|
||||
"propagate": false
|
||||
}
|
||||
}
|
||||
}
|
0
cmapi/mcs_node_control/__init__.py
Normal file
0
cmapi/mcs_node_control/__init__.py
Normal file
269
cmapi/mcs_node_control/custom_dispatchers/container.sh
Executable file
269
cmapi/mcs_node_control/custom_dispatchers/container.sh
Executable file
@ -0,0 +1,269 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO: remove in next releases
|
||||
|
||||
programname=$0
|
||||
|
||||
function usage {
|
||||
echo "usage: $programname op [service_name] [is_primary]"
|
||||
echo " op - operation name [start|stop]"
|
||||
echo " service_name - [mcs-controllernode|mcs-workernode etc]"
|
||||
echo " is_primary - [0|1]"
|
||||
exit 1
|
||||
}
|
||||
|
||||
operation=$1
|
||||
service_name=$2
|
||||
is_primary=$3
|
||||
|
||||
if [[ -z "$operation" || -z "$service_name" || $is_primary -ne 0 && $is_primary -ne 1 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
LOG_FILE=/var/log/mariadb/columnstore/container-sh.log
|
||||
|
||||
start_up_to_workernode() {
|
||||
# Set Variables
|
||||
IFLAG=/etc/columnstore/container-initialized
|
||||
LOG_PREFIX=/var/log/mariadb/columnstore
|
||||
MCS_INSTALL_PATH=/var/lib/columnstore
|
||||
MCS_INSTALL_BIN=/usr/bin
|
||||
PROGS='StorageManager mcs-loadbrm.py workernode'
|
||||
JEMALLOC_PATH=$(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
|
||||
if [ -z "$JEMALLOC_PATH" && -f $MCS_INSTALL_PATH/libjemalloc.so.2 ]; then
|
||||
JEMALLOC_PATH=$MCS_INSTALL_PATH/libjemalloc.so.2
|
||||
fi
|
||||
export LD_PRELOAD=$JEMALLOC_PATH
|
||||
|
||||
# Intialize Container If Necessary
|
||||
if [ ! -e $IFLAG ]; then
|
||||
$MCS_INSTALL_BIN/columnstore-init &>> $LOG_PREFIX/columnstore-init.log
|
||||
fi
|
||||
|
||||
# Verify All Programs Are Available
|
||||
for i in $PROGS ; do
|
||||
if [ ! -x $MCS_INSTALL_BIN/$i ] ; then
|
||||
echo "$i doesn't exist."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Start System
|
||||
echo `date`: start_up_to_workernode\(\)... >> $LOG_FILE
|
||||
|
||||
touch $LOG_PREFIX/storagemanager.log && chmod 666 $LOG_PREFIX/storagemanager.log
|
||||
$MCS_INSTALL_BIN/StorageManager &>> $LOG_PREFIX/storagemanager.log &
|
||||
echo `date`: StorageManager PID = $! >> $LOG_FILE
|
||||
|
||||
sleep 1
|
||||
|
||||
echo `date`: loading BRM >> $LOG_FILE
|
||||
touch $LOG_PREFIX/mcs-loadbrm.log && chmod 666 $LOG_PREFIX/mcs-loadbrm.log
|
||||
# Argument "no" here means don't use systemd to start SM
|
||||
$MCS_INSTALL_BIN/mcs-loadbrm.py no >> $LOG_PREFIX/mcs-loadbrm.log 2>&1
|
||||
|
||||
touch $LOG_PREFIX/workernode.log && chmod 666 $LOG_PREFIX/workernode.log
|
||||
$MCS_INSTALL_BIN/workernode DBRM_Worker1 &>> $LOG_PREFIX/workernode.log &
|
||||
echo `date`: workernode PID = $! >> $LOG_FILE
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
start_those_left_at_master() {
|
||||
# Set Variables
|
||||
LOG_PREFIX=/var/log/mariadb/columnstore
|
||||
MCS_INSTALL_PATH=/var/lib/columnstore
|
||||
MCS_INSTALL_BIN=/usr/bin
|
||||
# TODO: remove fast fix
|
||||
# skip check binary for ExeMgr
|
||||
PROGS='controllernode PrimProc WriteEngineServer DMLProc DDLProc'
|
||||
JEMALLOC_PATH=$(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
|
||||
if [ -z "$JEMALLOC_PATH" && -f $MCS_INSTALL_PATH/libjemalloc.so.2 ]; then
|
||||
JEMALLOC_PATH=$MCS_INSTALL_PATH/libjemalloc.so.2
|
||||
fi
|
||||
export LD_PRELOAD=$JEMALLOC_PATH
|
||||
|
||||
# Verify All Programs Are Available (except ExeMgr)
|
||||
for i in $PROGS ; do
|
||||
if [ ! -x $MCS_INSTALL_BIN/$i ] ; then
|
||||
echo "$i doesn't exist."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo `date`: start_those_left_at_master\(\) >> $LOG_FILE
|
||||
|
||||
if [[ $is_primary -eq 1 ]]; then
|
||||
touch $LOG_PREFIX/controllernode.log && chmod 666 $LOG_PREFIX/controllernode.log
|
||||
$MCS_INSTALL_BIN/controllernode fg &>> $LOG_PREFIX/controllernode.log &
|
||||
echo `date`: controllernode PID = $! >> $LOG_FILE
|
||||
fi
|
||||
|
||||
touch $LOG_PREFIX/primproc.log && chmod 666 $LOG_PREFIX/primproc.log
|
||||
$MCS_INSTALL_BIN/PrimProc &>> $LOG_PREFIX/primproc.log &
|
||||
echo `date`: PrimProc PID = $! >> $LOG_FILE
|
||||
|
||||
sleep 1
|
||||
|
||||
if [ -e $MCS_INSTALL_BIN/ExeMgr ] ; then
|
||||
touch $LOG_PREFIX/exemgr.log && chmod 666 $LOG_PREFIX/exemgr.log
|
||||
$MCS_INSTALL_BIN/ExeMgr &>> $LOG_PREFIX/exemgr.log &
|
||||
echo `date`: ExeMgr PID = $! >> $LOG_FILE
|
||||
fi
|
||||
|
||||
touch $LOG_PREFIX/writeengineserver.log && chmod 666 $LOG_PREFIX/writeengineserver.log
|
||||
$MCS_INSTALL_BIN/WriteEngineServer &>> $LOG_PREFIX/writeengineserver.log &
|
||||
echo `date`: WriteEngineServer PID = $! >> $LOG_FILE
|
||||
|
||||
sleep 3
|
||||
|
||||
touch $LOG_PREFIX/dmlproc.log && chmod 666 $LOG_PREFIX/dmlproc.log
|
||||
$MCS_INSTALL_BIN/DMLProc &>> $LOG_PREFIX/dmlproc.log &
|
||||
echo `date`: DMLProc PID = $! >> $LOG_FILE
|
||||
|
||||
touch $LOG_PREFIX/ddlproc.log && chmod 666 $LOG_PREFIX/ddlproc.log
|
||||
$MCS_INSTALL_BIN/DDLProc &>> $LOG_PREFIX/ddlproc.log &
|
||||
echo `date`: DDLProc PID = $! >> $LOG_FILE
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
||||
|
||||
start() {
|
||||
# Set Variables
|
||||
IFLAG=/etc/columnstore/container-initialized
|
||||
LOG_PREFIX=/var/log/mariadb/columnstore
|
||||
MCS_INSTALL_PATH=/var/lib/columnstore
|
||||
MCS_INSTALL_BIN=/usr/bin
|
||||
# TODO: remove fast fix
|
||||
# skip check binary for ExeMgr
|
||||
PROGS='StorageManager load_brm workernode controllernode PrimProc WriteEngineServer DMLProc DDLProc'
|
||||
JEMALLOC_PATH=$(ldconfig -p | grep -m1 libjemalloc | awk '{print $1}')
|
||||
if [ -z "$JEMALLOC_PATH" && -f $MCS_INSTALL_PATH/libjemalloc.so.2 ]; then
|
||||
JEMALLOC_PATH=$MCS_INSTALL_PATH/libjemalloc.so.2
|
||||
fi
|
||||
export LD_PRELOAD=$JEMALLOC_PATH
|
||||
|
||||
# Intialize Container If Necessary
|
||||
if [ ! -e $IFLAG ]; then
|
||||
$MCS_INSTALL_BIN/columnstore-init &>> $LOG_PREFIX/columnstore-init.log
|
||||
fi
|
||||
|
||||
# Verify All Programs Are Available (except ExeMgr)
|
||||
for i in $PROGS ; do
|
||||
if [ ! -x $MCS_INSTALL_BIN/$i ] ; then
|
||||
echo "$i doesn't exist."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Start System
|
||||
echo `date`: start\(\)... >> $LOG_FILE
|
||||
|
||||
touch $LOG_PREFIX/storagemanager.log && chmod 666 $LOG_PREFIX/storagemanager.log
|
||||
$MCS_INSTALL_BIN/StorageManager &>> $LOG_PREFIX/storagemanager.log &
|
||||
echo `date`: StorageManager PID = $! >> $LOG_FILE
|
||||
sleep 1
|
||||
|
||||
echo `date`: loading BRM >> $LOG_FILE
|
||||
touch $LOG_PREFIX/mcs-loadbrm.log && chmod 666 $LOG_PREFIX/mcs-loadbrm.log
|
||||
# Argument "no" here means don't use systemd to start SM
|
||||
$MCS_INSTALL_BIN/mcs-loadbrm.py no >> $LOG_PREFIX/mcs-loadbrm.log 2>&1
|
||||
|
||||
touch $LOG_PREFIX/workernode.log && chmod 666 $LOG_PREFIX/workernode.log
|
||||
$MCS_INSTALL_BIN/workernode DBRM_Worker2 &>> $LOG_PREFIX/workernode.log &
|
||||
echo `date`: workernode PID = $! >> $LOG_FILE
|
||||
|
||||
sleep 2
|
||||
|
||||
if [[ $is_primary -eq 1 ]]; then
|
||||
touch $LOG_PREFIX/controllernode.log && chmod 666 $LOG_PREFIX/controllernode.log
|
||||
$MCS_INSTALL_BIN/controllernode fg &>> $LOG_PREFIX/controllernode.log &
|
||||
echo `date`: controllernode PID = $! >> $LOG_FILE
|
||||
fi
|
||||
|
||||
touch $LOG_PREFIX/primproc.log && chmod 666 $LOG_PREFIX/primproc.log
|
||||
$MCS_INSTALL_BIN/PrimProc &>> $LOG_PREFIX/primproc.log &
|
||||
echo `date`: PrimProc PID = $! >> $LOG_FILE
|
||||
|
||||
sleep 1
|
||||
|
||||
if [ -e $MCS_INSTALL_BIN/ExeMgr ] ; then
|
||||
touch $LOG_PREFIX/exemgr.log && chmod 666 $LOG_PREFIX/exemgr.log
|
||||
$MCS_INSTALL_BIN/ExeMgr &>> $LOG_PREFIX/exemgr.log &
|
||||
echo `date`: ExeMgr PID = $! >> $LOG_FILE
|
||||
fi
|
||||
|
||||
touch $LOG_PREFIX/writeengineserver.log && chmod 666 $LOG_PREFIX/writeengineserver.log
|
||||
$MCS_INSTALL_BIN/WriteEngineServer &>> $LOG_PREFIX/writeengineserver.log &
|
||||
echo `date`: WriteEngineServer PID = $! >> $LOG_FILE
|
||||
|
||||
sleep 3
|
||||
|
||||
if [[ $is_primary -eq 1 ]]; then
|
||||
touch $LOG_PREFIX/dmlproc.log && chmod 666 $LOG_PREFIX/dmlproc.log
|
||||
$MCS_INSTALL_BIN/DMLProc &>> $LOG_PREFIX/dmlproc.log &
|
||||
echo `date`: DMLProc PID = $! >> $LOG_FILE
|
||||
touch $LOG_PREFIX/ddlproc.log && chmod 666 $LOG_PREFIX/ddlproc.log
|
||||
$MCS_INSTALL_BIN/DDLProc &>> $LOG_PREFIX/ddlproc.log &
|
||||
echo `date`: DDLProc PID = $! >> $LOG_FILE
|
||||
fi
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
stop() {
|
||||
# TODO: remove fast fix
|
||||
# skip check binary for ExeMgr
|
||||
PROGS='DMLProc DDLProc WriteEngineServer PrimProc workernode controllernode StorageManager'
|
||||
MCS_INSTALL_BIN=/usr/bin
|
||||
LOG_PREFIX=/var/log/mariadb/columnstore
|
||||
|
||||
# Stop System
|
||||
echo `date`: Stopping... >> $LOG_FILE
|
||||
|
||||
if [[ ! -z "$(pidof $PROGS)" ]]; then
|
||||
# Save BRM only on the primary node now.
|
||||
if [[ ! -z "$(pidof controllernode)" ]]; then
|
||||
$MCS_INSTALL_BIN/mcs-savebrm.py &>> $LOG_PREFIX/savebrm.log 2>&1
|
||||
fi
|
||||
|
||||
echo `date`: Sending SIGTERM >> $LOG_FILE
|
||||
kill $(pidof $PROGS) > /dev/null
|
||||
sleep 3
|
||||
# Make sure StorageManager had a chance to shutdown clean
|
||||
counter=1
|
||||
while [ -n "$(pidof StorageManager)" -a $counter -le 60 ]
|
||||
do
|
||||
sleep 1
|
||||
((counter++))
|
||||
done
|
||||
echo `date`: Sending SIGKILL >> $LOG_FILE
|
||||
kill -9 $(pidof $PROGS) > /dev/null
|
||||
fi
|
||||
|
||||
echo `date`: Clearing SHM >> $LOG_FILE
|
||||
$MCS_INSTALL_BIN/clearShm
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
case "$operation" in
|
||||
'start')
|
||||
# We start everything when controllernode starts at primary node and with workernode at non-primary
|
||||
if [[ $is_primary -eq 1 && "mcs-workernode" == "$service_name" ]]; then
|
||||
start_up_to_workernode $is_primary
|
||||
elif [[ $is_primary -eq 1 && "mcs-controllernode" == "$service_name" ]]; then
|
||||
start_those_left_at_master $is_primary
|
||||
elif [[ $is_primary -eq 0 && "mcs-workernode" == "$service_name" ]]; then
|
||||
start $is_primary
|
||||
fi
|
||||
;;
|
||||
|
||||
'stop')
|
||||
if [[ $is_primary -eq 1 && "mcs-controllernode" == "$service_name" || $is_primary -eq 0 && "mcs-workernode" == "$service_name" ]]; then
|
||||
stop
|
||||
fi
|
||||
;;
|
||||
esac
|
2
cmapi/mcs_node_control/models/__init__.py
Normal file
2
cmapi/mcs_node_control/models/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
|
||||
from mcs_node_control.models.node_status import NodeStatus
|
220
cmapi/mcs_node_control/models/dbrm.py
Normal file
220
cmapi/mcs_node_control/models/dbrm.py
Normal file
@ -0,0 +1,220 @@
|
||||
import logging
|
||||
import socket
|
||||
|
||||
from cmapi_server.constants import DEFAULT_MCS_CONF_PATH
|
||||
from mcs_node_control.models.dbrm_socket import (
|
||||
DBRM_COMMAND_BYTES, DEFAULT_HOST, DEFAULT_PORT, DBRMSocketHandler
|
||||
)
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
from mcs_node_control.models.process import Process
|
||||
|
||||
|
||||
# TODO: why we need bitwise shift here? May be constant values?
|
||||
SYSTEM_STATE_FLAGS = {
|
||||
"SS_READY": 1 << 0, # 1
|
||||
"SS_SUSPENDED": 1 << 1, # 2
|
||||
"SS_SUSPEND_PENDING": 1 << 2, # 4
|
||||
"SS_SHUTDOWN_PENDING": 1 << 3, # 8
|
||||
"SS_ROLLBACK": 1 << 4, # 16
|
||||
"SS_FORCE": 1 << 5, # 32
|
||||
"SS_QUERY_READY": 1 << 6, # 64
|
||||
}
|
||||
|
||||
|
||||
module_logger = logging.getLogger()
|
||||
|
||||
|
||||
class DBRM:
|
||||
"""Class DBRM commands"""
|
||||
def __init__(
|
||||
self, root=None, config_filename: str = DEFAULT_MCS_CONF_PATH
|
||||
):
|
||||
self.dbrm_socket = DBRMSocketHandler()
|
||||
self.root = root
|
||||
self.config_filename = config_filename
|
||||
|
||||
def connect(self):
|
||||
node_config = NodeConfig()
|
||||
root = self.root or node_config.get_current_config_root(
|
||||
self.config_filename
|
||||
)
|
||||
master_conn_info = node_config.get_dbrm_conn_info(root)
|
||||
if master_conn_info is None:
|
||||
module_logger.warning(
|
||||
'DBRB.connect: No DBRM info in the Columnstore.xml.'
|
||||
)
|
||||
dbrm_host = master_conn_info['IPAddr'] or DEFAULT_HOST
|
||||
dbrm_port = int(master_conn_info['Port']) or DEFAULT_PORT
|
||||
self.dbrm_socket.connect(dbrm_host, dbrm_port)
|
||||
|
||||
def close(self):
|
||||
self.dbrm_socket.close()
|
||||
|
||||
def __enter__(self):
|
||||
self.connect()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
if exc_type:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _send_command(self, command_name, command_value=None):
|
||||
if command_name not in DBRM_COMMAND_BYTES:
|
||||
module_logger.warning(
|
||||
f'DBRM._send_command: Wrong command requested {command_name}'
|
||||
)
|
||||
return None
|
||||
|
||||
module_logger.info(
|
||||
f'DBRM._send_command: Command {command_name} '
|
||||
f'was requested with value {command_value}'
|
||||
)
|
||||
|
||||
self.dbrm_socket.send(command_name, command_value)
|
||||
response_value_bytes = self.dbrm_socket.receive()
|
||||
|
||||
if command_name == 'readonly':
|
||||
reply = int.from_bytes(response_value_bytes, 'little')
|
||||
else:
|
||||
# get first byte, it's an error message
|
||||
err = int.from_bytes(response_value_bytes[:1], 'little')
|
||||
|
||||
if err != 0:
|
||||
module_logger.warning(
|
||||
f'DBRM._send_command: Command {command_name} '
|
||||
'returned error on server'
|
||||
)
|
||||
raise RuntimeError(
|
||||
f'Controller Node replied error with code {err} '
|
||||
f'for command {command_name}'
|
||||
)
|
||||
|
||||
if len(response_value_bytes) < 2:
|
||||
return None
|
||||
|
||||
reply = int.from_bytes(response_value_bytes[1:], 'little')
|
||||
return reply
|
||||
|
||||
def get_system_state(self):
|
||||
state = self._send_command('get_system_state')
|
||||
return [
|
||||
flag_name for flag_name, flag_value in SYSTEM_STATE_FLAGS.items()
|
||||
# TODO: looks like weird logic? Not readable.
|
||||
if flag_value & state
|
||||
]
|
||||
|
||||
def _edit_system_state(self, states: list, command: str):
|
||||
state = 0
|
||||
# TODO: why we need this? States type is list.
|
||||
# May be str without loop inside is more appropriate here.
|
||||
if isinstance(states, str):
|
||||
states = (states,)
|
||||
|
||||
for state_name in states:
|
||||
if state_name not in SYSTEM_STATE_FLAGS:
|
||||
module_logger.warning(
|
||||
f'DBRM.{command}: Wrong system state requested: '
|
||||
f'{state_name}'
|
||||
)
|
||||
continue
|
||||
# TODO: For that case it's same with simple addition?
|
||||
# So why we need bitwise OR?
|
||||
state |= SYSTEM_STATE_FLAGS[state_name]
|
||||
|
||||
self._send_command(command, state)
|
||||
|
||||
def set_system_state(self, states: list):
|
||||
self._edit_system_state(states, 'set_system_state')
|
||||
|
||||
def clear_system_state(self, states: list):
|
||||
self._edit_system_state(states, 'clear_system_state')
|
||||
|
||||
@staticmethod
|
||||
def get_dbrm_status():
|
||||
"""Reads DBRM status
|
||||
|
||||
DBRM Block Resolution Manager operates in two modes:
|
||||
- master
|
||||
- slave
|
||||
|
||||
This method returns the mode of this DBRM node
|
||||
looking for controllernode process running.
|
||||
|
||||
:return: mode of this DBRM node
|
||||
:rtype: string
|
||||
"""
|
||||
if Process.check_process_alive('controllernode'):
|
||||
return 'master'
|
||||
return 'slave'
|
||||
|
||||
def _get_cluster_mode(self):
|
||||
"""Get DBRM cluster mode for internal usage.
|
||||
|
||||
Returns real DBRM cluster mode from socket response.
|
||||
"""
|
||||
# state can be 1(readonly) or 0(readwrite) or exception raised
|
||||
state = self._send_command('readonly')
|
||||
if state == 1:
|
||||
return 'readonly'
|
||||
elif state == 0:
|
||||
return 'readwrite'
|
||||
|
||||
def get_cluster_mode(self):
|
||||
"""Get DBRM cluster mode for external usage.
|
||||
|
||||
There are some kind of weird logic.
|
||||
It's requested from management.
|
||||
TODO: Here we can cause a logic error.
|
||||
E.g. set non master node to "readwrite" and
|
||||
we got a "readonly" in return value.
|
||||
|
||||
:return: DBRM cluster mode
|
||||
:rtype: str
|
||||
"""
|
||||
real_mode = self._get_cluster_mode()
|
||||
if self.get_dbrm_status() == 'master':
|
||||
return real_mode
|
||||
else:
|
||||
return 'readonly'
|
||||
|
||||
def set_cluster_mode(self, mode):
|
||||
"""Set cluster mode requested
|
||||
|
||||
Connects to the DBRM master's socket and
|
||||
send a command to set cluster mode.
|
||||
|
||||
:rtype: str :error or cluster mode set
|
||||
"""
|
||||
|
||||
if mode == 'readonly':
|
||||
command = 'set_readonly'
|
||||
elif mode == 'readwrite':
|
||||
command = 'set_readwrite'
|
||||
else:
|
||||
return ''
|
||||
|
||||
_ = self._send_command(command)
|
||||
|
||||
return self.get_cluster_mode()
|
||||
|
||||
|
||||
def set_cluster_mode(
|
||||
mode: str, root=None, config_filename: str = DEFAULT_MCS_CONF_PATH
|
||||
):
|
||||
"""Set cluster mode requested
|
||||
|
||||
Connects to the DBRM master's socket and send a command to
|
||||
set cluster mode.
|
||||
|
||||
:rtype: str :error or cluster mode set
|
||||
"""
|
||||
try:
|
||||
with DBRM(root, config_filename) as dbrm:
|
||||
return dbrm.set_cluster_mode(mode)
|
||||
except (ConnectionRefusedError, RuntimeError, socket.error):
|
||||
module_logger.warning(
|
||||
'Cannot establish DBRM connection.', exc_info=True
|
||||
)
|
||||
return 'readonly'
|
248
cmapi/mcs_node_control/models/dbrm_socket.py
Normal file
248
cmapi/mcs_node_control/models/dbrm_socket.py
Normal file
@ -0,0 +1,248 @@
|
||||
import logging
|
||||
import socket
|
||||
|
||||
|
||||
MAGIC_BYTES = 0x14fbc137.to_bytes(4, 'little')
|
||||
# value is tuple(command_bytes, command_value_length)
|
||||
DBRM_COMMAND_BYTES = {
|
||||
'readonly': ((20).to_bytes(1, 'little'), 0),
|
||||
'set_readonly': ((14).to_bytes(1, 'little'), 0),
|
||||
'set_readwrite': ((15).to_bytes(1, 'little'), 0),
|
||||
'set_system_state': ((55).to_bytes(1, 'little'), 4),
|
||||
'get_system_state': ((54).to_bytes(1, 'little'), 4),
|
||||
'clear_system_state': ((57).to_bytes(1, 'little'), 4),
|
||||
}
|
||||
DEFAULT_HOST = 'localhost'
|
||||
DEFAULT_PORT = 8616
|
||||
SOCK_TIMEOUT = 5
|
||||
|
||||
|
||||
class DBRMSocketHandler():
|
||||
"""Class for stream socket operations.
|
||||
|
||||
Include all logic for detecting bytestream protocol version, reading and
|
||||
parsing magic inside, getting command bytes and command value length
|
||||
by command name.
|
||||
|
||||
"""
|
||||
long_strings = None
|
||||
|
||||
def __init__(
|
||||
self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0,
|
||||
fileno=None
|
||||
) -> None:
|
||||
self._socket = None
|
||||
self._family = family
|
||||
self._type = type
|
||||
self._proto = proto
|
||||
self._fileno = fileno
|
||||
self._host = None
|
||||
self._port = None
|
||||
self._recreate_socket()
|
||||
|
||||
@property
|
||||
def _connect_called(self):
|
||||
"""Is connect method called previously.
|
||||
|
||||
This is the instance state to determine if "connect" method called
|
||||
previously. This is not quaranteed that connection still alive.
|
||||
:return: connected state
|
||||
:rtype: bool
|
||||
"""
|
||||
if self._host and self._port:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _recreate_socket(self) -> None:
|
||||
"""Create new internal _socket object.
|
||||
|
||||
Create\recreate new _socket object and connects to it if was already
|
||||
connected.
|
||||
"""
|
||||
if self._socket is not None:
|
||||
self._socket.close()
|
||||
self._socket = socket.socket(
|
||||
family=self._family, type=self._type,
|
||||
proto=self._proto, fileno=self._fileno
|
||||
)
|
||||
if self._connect_called:
|
||||
self.connect(self._host, self._port)
|
||||
|
||||
def _detect_protocol(self) -> None:
|
||||
"""Detect dbrm socket bytestream version.
|
||||
|
||||
This method normally will be called only in first instance
|
||||
at first "send" method call.
|
||||
After that header will be formed and parsed depending on
|
||||
"long_strings" class variable value.
|
||||
|
||||
Sends "readonly" message with "old" protocol version (before MCS 6.2.1)
|
||||
If timeout error raised, sends message with "new" protocol version
|
||||
(after MCS 6.2.1) with extra 4 bytes in header.
|
||||
If both attemts are failed raise RuntimeError and return the
|
||||
"long_strings" variable to initial state - None.
|
||||
|
||||
:raises RuntimeError: [description]
|
||||
"""
|
||||
success = False
|
||||
# check at first old protocol because 5.x.x version got an issue if
|
||||
# we try to send new format packages.
|
||||
for long_strings in (False, True):
|
||||
DBRMSocketHandler.long_strings = long_strings
|
||||
self.send('readonly')
|
||||
try:
|
||||
_ = self.receive()
|
||||
success = True
|
||||
break
|
||||
except (socket.timeout, TimeoutError):
|
||||
# wrong packet sended could cause errors on the mcs engine side
|
||||
self._recreate_socket()
|
||||
continue
|
||||
if not success:
|
||||
# something went wrong so return to unknown protocol state
|
||||
DBRMSocketHandler.long_strings = None
|
||||
raise RuntimeError(
|
||||
'Can\'t detect DBRM bytestream protocol version.'
|
||||
)
|
||||
else:
|
||||
dbrm_protocol_version = (
|
||||
'new' if DBRMSocketHandler.long_strings else 'old'
|
||||
)
|
||||
logging.info(
|
||||
f'Detected "{dbrm_protocol_version}" DBRM bytestream protocol'
|
||||
)
|
||||
|
||||
def _make_msg(self, command_name: str, command_value: int) -> bytes:
|
||||
"""Make bytes msg by command name and value.
|
||||
|
||||
:param command_name: name of a command
|
||||
:type command_name: str
|
||||
:param command_value: command value
|
||||
:type command_value: int or None
|
||||
:return: msg to send throught socket
|
||||
:rtype: bytes
|
||||
"""
|
||||
command_bytes, command_value_length = DBRM_COMMAND_BYTES[command_name]
|
||||
data_length = (
|
||||
command_value_length + len(command_bytes)
|
||||
).to_bytes(4, 'little')
|
||||
# bytestream protocol before MCS 6.2.1 version
|
||||
package_header = MAGIC_BYTES + data_length
|
||||
if DBRMSocketHandler.long_strings:
|
||||
# bytestream protocol after MCS 6.2.1 version
|
||||
long_strings_count = (0).to_bytes(4, 'little')
|
||||
package_header += long_strings_count
|
||||
|
||||
msg_bytes = package_header + command_bytes
|
||||
if command_value is not None:
|
||||
msg_bytes += command_value.to_bytes(
|
||||
command_value_length, 'little'
|
||||
)
|
||||
return msg_bytes
|
||||
|
||||
def _receive_magic(self):
|
||||
"""Reads the stream up to the uncompressed magic.
|
||||
|
||||
The magic is a constant delimeter that occurs at the begging
|
||||
of the stream.
|
||||
"""
|
||||
data: bytes
|
||||
recv_data: bytes = b''
|
||||
while recv_data != MAGIC_BYTES:
|
||||
data = self._socket.recv(1)
|
||||
# TODO: advanced error handling
|
||||
if data == b'':
|
||||
raise RuntimeError(
|
||||
'Socket connection broken while receiving magic'
|
||||
)
|
||||
recv_data += data
|
||||
if not MAGIC_BYTES.startswith(recv_data):
|
||||
recv_data = data
|
||||
continue
|
||||
|
||||
def _receive(self, length: int):
|
||||
"""Receive raw data from socket by length.
|
||||
|
||||
:param length: length in bytes to receive
|
||||
:type length: int
|
||||
:raises RuntimeError: if socket connection is broken while receiving
|
||||
:return: received bytes
|
||||
:rtype: bytes
|
||||
"""
|
||||
chunks = []
|
||||
bytes_recd = 0
|
||||
while bytes_recd < length:
|
||||
chunk = self._socket.recv(min(length - bytes_recd, 2048))
|
||||
if chunk == b'':
|
||||
raise RuntimeError(
|
||||
'Socket connection broken while receiving data.'
|
||||
)
|
||||
chunks.append(chunk)
|
||||
bytes_recd += len(chunk)
|
||||
return b''.join(chunks)
|
||||
|
||||
def _send(self, msg: bytes):
|
||||
"""Send msg in bytes through the socket.
|
||||
|
||||
:param msg: string in bytes to send
|
||||
:type msg: bytes
|
||||
:raises RuntimeError: if connection is broken while sending
|
||||
"""
|
||||
totalsent = 0
|
||||
while totalsent < len(msg):
|
||||
sent = self._socket.send(msg[totalsent:])
|
||||
if sent == 0:
|
||||
raise RuntimeError(
|
||||
'DBRM socket connection broken while sending.'
|
||||
)
|
||||
totalsent = totalsent + sent
|
||||
|
||||
def connect(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT):
|
||||
"""Connect to socket.
|
||||
|
||||
By default it connects with DBRM master.
|
||||
"""
|
||||
self._host = host
|
||||
self._port = port
|
||||
self._socket.settimeout(SOCK_TIMEOUT)
|
||||
self._socket.connect((host, port))
|
||||
|
||||
def close(self):
|
||||
"""Closing the socket.
|
||||
|
||||
Set _host and _port instance variables to None to change state to
|
||||
not connected. Then close the _socket.
|
||||
"""
|
||||
self._host = None
|
||||
self._port = None
|
||||
self._socket.close()
|
||||
|
||||
def send(self, command_name: str, command_value: int = None):
|
||||
"""Top level send by command name and value.
|
||||
|
||||
param command_name: name of a command
|
||||
:type command_name: str
|
||||
:param command_value: command value, defaults to None
|
||||
:type command_value: int, optional
|
||||
"""
|
||||
if DBRMSocketHandler.long_strings is None:
|
||||
self._detect_protocol()
|
||||
msg_bytes = self._make_msg(command_name, command_value)
|
||||
self._send(msg_bytes)
|
||||
|
||||
def receive(self):
|
||||
"""Top level method to receive data from socket.
|
||||
|
||||
Automatically reads the magic and data length from data header.
|
||||
|
||||
:return: received bytes without header
|
||||
:rtype: bytes
|
||||
"""
|
||||
self._receive_magic()
|
||||
data_length = int.from_bytes(self._receive(4), 'little')
|
||||
if DBRMSocketHandler.long_strings:
|
||||
# receive long strings count to meet new bytestream protocol
|
||||
# requirements (after MCS 6.2.1 release)
|
||||
long_strings_count_bytes = self._receive(4)
|
||||
data_bytes = self._receive(data_length)
|
||||
return data_bytes
|
114
cmapi/mcs_node_control/models/misc.py
Normal file
114
cmapi/mcs_node_control/models/misc.py
Normal file
@ -0,0 +1,114 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from cmapi_server.constants import (
|
||||
DEFAULT_MCS_CONF_PATH, MCS_DATA_PATH, MCS_MODULE_FILE_PATH,
|
||||
)
|
||||
|
||||
|
||||
module_logger = logging.getLogger()
|
||||
|
||||
|
||||
def read_module_id():
|
||||
"""Retrieves module ID from MCS_MODULE_FILE_PATH.
|
||||
|
||||
:rtype: int : seconds
|
||||
"""
|
||||
module_file = Path(MCS_MODULE_FILE_PATH)
|
||||
return int(module_file.read_text()[2:])
|
||||
|
||||
|
||||
# TODO: Useless for now, newer called in code
|
||||
# Nodeconfig.apply_config doing this.
|
||||
def set_module_id(module_id: int = 1):
|
||||
"""Sets current module ID from MCS_MODULE_FILE_PATH.
|
||||
|
||||
:rtype: int : seconds
|
||||
"""
|
||||
module_file = Path(MCS_MODULE_FILE_PATH)
|
||||
return module_file.write_text(f'pm{module_id}\n')
|
||||
|
||||
|
||||
def get_dbroots_list(path: str = MCS_DATA_PATH):
|
||||
"""searches for services
|
||||
|
||||
The method returns numeric ids of dbroots available.
|
||||
|
||||
:rtype: generator of ints
|
||||
"""
|
||||
func_name = 'get_dbroots_list'
|
||||
path = Path(path)
|
||||
for child in path.glob('data[1-9]*'):
|
||||
dir_list = str(child).split('/') # presume Linux only
|
||||
dbroot_id = int(''.join(list(filter(str.isdigit, dir_list[-1]))))
|
||||
module_logger.debug(f'{func_name} The node has dbroot {dbroot_id}')
|
||||
yield dbroot_id
|
||||
|
||||
|
||||
def get_workernodes() -> dict[dict[str, int]]:
|
||||
"""Get workernodes list.
|
||||
|
||||
Returns a list of network address of all workernodes.
|
||||
This is an equivalent of all nodes.
|
||||
|
||||
:return: workernodes dict
|
||||
:rtype: dict[dict[str, int]]
|
||||
"""
|
||||
# TODO: fix in MCOL-5147, get xml path from class that will handle xml
|
||||
root = current_config_root()
|
||||
workernodes = {}
|
||||
# searches for all tags starts with DBRM_Worker, eg DBRM_Worker1
|
||||
workernodes_elements = root.xpath(
|
||||
"//*[starts-with(local-name(), 'DBRM_Worker')]"
|
||||
)
|
||||
for workernode_el in workernodes_elements:
|
||||
workernode_ip = workernode_el.find('./IPAddr').text
|
||||
if workernode_ip == '0.0.0.0':
|
||||
# skip elements with specific ip
|
||||
continue
|
||||
try:
|
||||
workernode_port = int(workernode_el.find('./Port').text)
|
||||
except (AttributeError, ValueError):
|
||||
# AttributeError for not found Port tag, so got None.text
|
||||
# ValueError for non numeric values in tag text
|
||||
module_logger.error(
|
||||
'No Port tag found or wrong Port value for tag '
|
||||
f'"{workernode_el.tag}".'
|
||||
)
|
||||
workernode_port = 8700
|
||||
workernodes[workernode_el.tag] = {
|
||||
'IPAddr': workernode_ip, 'Port': workernode_port
|
||||
}
|
||||
return workernodes
|
||||
|
||||
|
||||
def get_dbrm_master(config_filename: str = DEFAULT_MCS_CONF_PATH) -> dict:
|
||||
"""Get DBRM master ip and port.
|
||||
|
||||
:param config_filename: path to xml conf, defaults to DEFAULT_MCS_CONF_PATH
|
||||
:type config_filename: str, optional
|
||||
:return: ipaddress and port of DBRM master
|
||||
:rtype: dict
|
||||
"""
|
||||
# TODO: fix in MCOL-5147, get xml path from class that will handle xml
|
||||
# Use NodeConfig class as a template?
|
||||
root = current_config_root(config_filename)
|
||||
return {
|
||||
'IPAddr': root.find("./DBRM_Controller/IPAddr").text,
|
||||
'Port': root.find("./DBRM_Controller/Port").text
|
||||
}
|
||||
|
||||
|
||||
def current_config_root(config_filename: str = DEFAULT_MCS_CONF_PATH):
|
||||
"""Retrievs current configuration
|
||||
|
||||
Read the config and returns Element
|
||||
|
||||
:rtype: lxml.Element
|
||||
"""
|
||||
parser = etree.XMLParser(load_dtd=True)
|
||||
tree = etree.parse(config_filename, parser=parser)
|
||||
return tree.getroot()
|
114
cmapi/mcs_node_control/models/network_ifaces.py
Normal file
114
cmapi/mcs_node_control/models/network_ifaces.py
Normal file
@ -0,0 +1,114 @@
|
||||
# Based on https://gist.github.com/provegard/1536682, which was
|
||||
# Based on getifaddrs.py from pydlnadms [http://code.google.com/p/pydlnadms/].
|
||||
# Only tested on Linux!
|
||||
# WARNING: Not working on Mac OS (tested on 10.12 Sierra)
|
||||
# TODO: move to psutil lib
|
||||
|
||||
|
||||
from socket import AF_INET, AF_INET6, inet_ntop
|
||||
from ctypes import (
|
||||
Structure, Union, POINTER,
|
||||
pointer, get_errno, cast,
|
||||
c_ushort, c_byte, c_void_p, c_char_p, c_uint, c_int, c_uint16, c_uint32
|
||||
)
|
||||
import ctypes.util
|
||||
import ctypes
|
||||
|
||||
|
||||
class struct_sockaddr(Structure):
|
||||
_fields_ = [
|
||||
('sa_family', c_ushort),
|
||||
('sa_data', c_byte * 14),]
|
||||
|
||||
|
||||
class struct_sockaddr_in(Structure):
|
||||
_fields_ = [
|
||||
('sin_family', c_ushort),
|
||||
('sin_port', c_uint16),
|
||||
('sin_addr', c_byte * 4)]
|
||||
|
||||
|
||||
class struct_sockaddr_in6(Structure):
|
||||
_fields_ = [
|
||||
('sin6_family', c_ushort),
|
||||
('sin6_port', c_uint16),
|
||||
('sin6_flowinfo', c_uint32),
|
||||
('sin6_addr', c_byte * 16),
|
||||
('sin6_scope_id', c_uint32)]
|
||||
|
||||
|
||||
class union_ifa_ifu(Union):
|
||||
_fields_ = [
|
||||
('ifu_broadaddr', POINTER(struct_sockaddr)),
|
||||
('ifu_dstaddr', POINTER(struct_sockaddr)),]
|
||||
|
||||
|
||||
class struct_ifaddrs(Structure):
|
||||
pass
|
||||
struct_ifaddrs._fields_ = [
|
||||
('ifa_next', POINTER(struct_ifaddrs)),
|
||||
('ifa_name', c_char_p),
|
||||
('ifa_flags', c_uint),
|
||||
('ifa_addr', POINTER(struct_sockaddr)),
|
||||
('ifa_netmask', POINTER(struct_sockaddr)),
|
||||
('ifa_ifu', union_ifa_ifu),
|
||||
('ifa_data', c_void_p),]
|
||||
|
||||
libc = ctypes.CDLL(ctypes.util.find_library('c'))
|
||||
|
||||
|
||||
def ifap_iter(ifap):
|
||||
ifa = ifap.contents
|
||||
while True:
|
||||
yield ifa
|
||||
if not ifa.ifa_next:
|
||||
break
|
||||
ifa = ifa.ifa_next.contents
|
||||
|
||||
|
||||
def getfamaddr(sa):
|
||||
family = sa.sa_family
|
||||
addr = None
|
||||
if family == AF_INET:
|
||||
sa = cast(pointer(sa), POINTER(struct_sockaddr_in)).contents
|
||||
addr = inet_ntop(family, sa.sin_addr)
|
||||
elif family == AF_INET6:
|
||||
sa = cast(pointer(sa), POINTER(struct_sockaddr_in6)).contents
|
||||
addr = inet_ntop(family, sa.sin6_addr)
|
||||
return family, addr
|
||||
|
||||
|
||||
class NetworkInterface(object):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.index = libc.if_nametoindex(name)
|
||||
self.addresses = {}
|
||||
|
||||
def __str__(self):
|
||||
return "%s [index=%d, IPv4=%s, IPv6=%s]" % (
|
||||
self.name, self.index,
|
||||
self.addresses.get(AF_INET),
|
||||
self.addresses.get(AF_INET6))
|
||||
|
||||
|
||||
def get_network_interfaces():
|
||||
ifap = POINTER(struct_ifaddrs)()
|
||||
result = libc.getifaddrs(pointer(ifap))
|
||||
if result != 0:
|
||||
raise OSError(get_errno())
|
||||
del result
|
||||
try:
|
||||
retval = {}
|
||||
for ifa in ifap_iter(ifap):
|
||||
name = ifa.ifa_name.decode("UTF-8")
|
||||
i = retval.get(name)
|
||||
if not i:
|
||||
i = retval[name] = NetworkInterface(name)
|
||||
family, addr = getfamaddr(ifa.ifa_addr.contents)
|
||||
if addr:
|
||||
if family not in i.addresses:
|
||||
i.addresses[family] = list()
|
||||
i.addresses[family].append(addr)
|
||||
return retval.values()
|
||||
finally:
|
||||
libc.freeifaddrs(ifap)
|
574
cmapi/mcs_node_control/models/node_config.py
Normal file
574
cmapi/mcs_node_control/models/node_config.py
Normal file
@ -0,0 +1,574 @@
|
||||
import configparser
|
||||
import grp
|
||||
import logging
|
||||
import pwd
|
||||
import re
|
||||
import socket
|
||||
from os import mkdir, replace, chown
|
||||
from pathlib import Path
|
||||
from shutil import copyfile
|
||||
from xml.dom import minidom # to pick up pretty printing functionality
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from cmapi_server.constants import (
|
||||
DEFAULT_MCS_CONF_PATH, DEFAULT_SM_CONF_PATH,
|
||||
MCS_MODULE_FILE_PATH,
|
||||
)
|
||||
# from cmapi_server.managers.process import MCSProcessManager
|
||||
from mcs_node_control.models.misc import (
|
||||
read_module_id, get_dbroots_list
|
||||
)
|
||||
from mcs_node_control.models.network_ifaces import get_network_interfaces
|
||||
|
||||
|
||||
module_logger = logging.getLogger()
|
||||
|
||||
|
||||
class NodeConfig:
|
||||
"""Class to operate with the configuration file.
|
||||
|
||||
The class instance applies new config or retrives current.
|
||||
|
||||
config_filename and output_filename allow tests to override
|
||||
the input & output of this fcn
|
||||
The output in this case may be a config file upgraded to version 1.
|
||||
"""
|
||||
def get_current_config_root(
|
||||
self, config_filename: str = DEFAULT_MCS_CONF_PATH, upgrade=True
|
||||
):
|
||||
"""Retrievs current configuration.
|
||||
|
||||
Read the config and returns Element.
|
||||
TODO: pretty the same function in misc.py - review
|
||||
|
||||
:rtype: lxml.Element
|
||||
"""
|
||||
parser = etree.XMLParser(load_dtd=True)
|
||||
tree = etree.parse(config_filename, parser=parser)
|
||||
self.upgrade_config(tree=tree, upgrade=upgrade)
|
||||
return tree.getroot()
|
||||
|
||||
def get_root_from_string(self, config_string: str):
|
||||
root = etree.fromstring(config_string)
|
||||
self.upgrade_config(root=root)
|
||||
return root
|
||||
|
||||
def upgrade_from_v0(self, root):
|
||||
revision = etree.SubElement(root, 'ConfigRevision')
|
||||
revision.text = '1'
|
||||
cluster_manager = etree.SubElement(root, 'ClusterManager')
|
||||
cluster_manager.text = str(self.get_module_net_address(root=root))
|
||||
cluster_name = etree.SubElement(root, 'ClusterName')
|
||||
cluster_name.text = 'MyCluster'
|
||||
|
||||
# Need to get the addresses/host names of all nodes.
|
||||
# Should all be listed as DBRM_worker nodes
|
||||
addrs = set()
|
||||
num = 1
|
||||
max_node = 1
|
||||
while True:
|
||||
node = root.find(f'./DBRM_Worker{num}/IPAddr')
|
||||
if node is None:
|
||||
break
|
||||
if node.text != '0.0.0.0':
|
||||
addrs.add(node.text)
|
||||
if max_node < num:
|
||||
max_node = num
|
||||
num += 1
|
||||
|
||||
# NextNodeId can be derived from the max DBRM_worker entry with non-0
|
||||
# ip address
|
||||
next_node_id = etree.SubElement(root, 'NextNodeId')
|
||||
next_node_id.text = str(max_node + 1)
|
||||
|
||||
# NextDBRootId is the max current dbroot in use + 1
|
||||
num = 1
|
||||
max_dbroot = 1
|
||||
while num < 100:
|
||||
node = root.find(f'./SystemConfig/DBRoot{num}')
|
||||
if node is not None:
|
||||
max_dbroot = num
|
||||
num += 1
|
||||
next_dbroot_id = etree.SubElement(root, 'NextDBRootId')
|
||||
next_dbroot_id.text = str(max_dbroot + 1)
|
||||
|
||||
# The current primary node is listed under DBRMControllerNode.
|
||||
# Might as well start with that.
|
||||
primary_node_addr = root.find('./DBRM_Controller/IPAddr').text
|
||||
|
||||
# Put them all in the DesiredNodes and ActiveNodes sections
|
||||
desired_nodes = etree.SubElement(root, 'DesiredNodes')
|
||||
active_nodes = etree.SubElement(root, 'ActiveNodes')
|
||||
for addr in addrs:
|
||||
node = etree.SubElement(desired_nodes, 'Node')
|
||||
node.text = addr
|
||||
node = etree.SubElement(active_nodes, 'Node')
|
||||
node.text = addr
|
||||
|
||||
# Add an empty InactiveNodes section and set the primary node addr
|
||||
inactive_nodes = etree.SubElement(root, 'InactiveNodes')
|
||||
primary_node = etree.SubElement(root, 'PrimaryNode')
|
||||
primary_node.text = primary_node_addr
|
||||
|
||||
# Add Maintenance tag and set to False
|
||||
maintenance = etree.SubElement(root, 'Maintenance')
|
||||
maintenance.text = str(False).lower()
|
||||
|
||||
|
||||
def upgrade_config(self, tree=None, root=None, upgrade=True):
|
||||
"""
|
||||
Add the parts that might be missing after an upgrade from an earlier
|
||||
version.
|
||||
|
||||
.. note:: one or the other optional parameter should be specified (?)
|
||||
"""
|
||||
if root is None and tree is not None:
|
||||
root = tree.getroot()
|
||||
|
||||
rev_node = root.find('./ConfigRevision')
|
||||
|
||||
if rev_node is None and upgrade:
|
||||
self.upgrade_from_v0(root)
|
||||
# as we add revisions, add add'l checks on rev_node.text here
|
||||
|
||||
def write_config(self, tree, filename=DEFAULT_MCS_CONF_PATH):
|
||||
tmp_filename = filename + ".cmapi.tmp"
|
||||
with open(tmp_filename, "w") as f:
|
||||
f.write(self.to_string(tree))
|
||||
replace(tmp_filename, filename) # atomic replacement
|
||||
|
||||
def to_string(self, tree):
|
||||
# TODO: try to use lxml to do this to avoid the add'l dependency
|
||||
xmlstr = minidom.parseString(etree.tostring(tree)).toprettyxml(
|
||||
indent=" "
|
||||
)
|
||||
# fix annoying issue of extra newlines added by toprettyxml()
|
||||
xmlstr = '\n'.join([
|
||||
line.rstrip() for line in xmlstr.split('\n') if line.strip() != ""
|
||||
])
|
||||
return xmlstr
|
||||
|
||||
def get_dbrm_conn_info(self, root=None):
|
||||
"""Retrievs current DBRM master IP and port
|
||||
|
||||
Read the config and returns a dict with the connection information.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
if root is None:
|
||||
return None
|
||||
addr = ''
|
||||
port = 0
|
||||
for el in root:
|
||||
if el.tag == 'DBRM_Controller':
|
||||
for subel in el:
|
||||
if subel.tag == 'IPAddr':
|
||||
addr = subel.text
|
||||
elif subel.tag == 'Port':
|
||||
port = subel.text
|
||||
return {'IPAddr': addr, 'Port': port}
|
||||
|
||||
return None
|
||||
|
||||
def apply_config(
|
||||
self, config_filename: str = DEFAULT_MCS_CONF_PATH,
|
||||
xml_string: str = None, sm_config_filename: str = None,
|
||||
sm_config_string: str = None
|
||||
):
|
||||
"""Applies the configuration WIP.
|
||||
|
||||
Instance iterates over the xml nodes.
|
||||
|
||||
: param config_filename: string 4 testing
|
||||
: param xml_string: string
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
if xml_string is None:
|
||||
return
|
||||
|
||||
current_root = self.get_current_config_root(config_filename)
|
||||
parser = etree.XMLParser(load_dtd=True)
|
||||
new_root = etree.fromstring(xml_string, parser=parser)
|
||||
|
||||
try:
|
||||
# We don't change module ids for non-single nodes.
|
||||
# if self.is_single_node(root=current_root):
|
||||
# set_module_id(self.get_new_module_id(new_root))
|
||||
|
||||
# make sure all of the dbroot directories exist on this node
|
||||
for dbroot in self.get_all_dbroots(new_root):
|
||||
try:
|
||||
node = new_root.find(f'./SystemConfig/DBRoot{dbroot}')
|
||||
mkdir(node.text, mode=0o755)
|
||||
|
||||
# if we are using the systemd dispatcher we need to change
|
||||
# ownership on any created dirs to mysql:mysql
|
||||
# TODO: remove conditional once container dispatcher will
|
||||
# use non-root by default
|
||||
# TODO: what happened if we change ownership in container?
|
||||
# check the container installations works as expected
|
||||
# from cmapi_server.managers.process import MCSProcessManager
|
||||
# if MCSProcessManager.dispatcher_name == 'systemd':
|
||||
uid = pwd.getpwnam('mysql').pw_uid
|
||||
gid = grp.getgrnam('mysql').gr_gid
|
||||
chown(node.text, uid, gid)
|
||||
except FileExistsError:
|
||||
pass
|
||||
# Save current config
|
||||
config_file = Path(config_filename)
|
||||
config_dir = config_file.resolve().parent
|
||||
copyfile(
|
||||
config_file, f'{config_dir}/{config_file.name}.cmapi.save'
|
||||
)
|
||||
|
||||
# Save new config
|
||||
self.write_config(tree=new_root, filename=config_filename)
|
||||
|
||||
# Save current and new storagemanager config
|
||||
if sm_config_string and sm_config_filename:
|
||||
sm_config_file = Path(sm_config_filename)
|
||||
sm_config_dir = sm_config_file.resolve().parent
|
||||
copyfile(
|
||||
sm_config_file,
|
||||
f'{sm_config_dir}/{sm_config_file.name}.cmapi.save'
|
||||
)
|
||||
with open(sm_config_filename, 'w') as sm_config_file:
|
||||
sm_config_file.write(sm_config_string)
|
||||
# TODO: review
|
||||
# figure out what to put in the 'module' file to make
|
||||
# the old oam library happy
|
||||
module_file = None
|
||||
try:
|
||||
pm_num = self.get_current_pm_num(new_root)
|
||||
with open(MCS_MODULE_FILE_PATH, 'w') as module_file:
|
||||
module_file.write(f'pm{pm_num}\n')
|
||||
module_logger.info(
|
||||
f'Wrote "pm{pm_num}" to {MCS_MODULE_FILE_PATH}'
|
||||
)
|
||||
except Exception:
|
||||
module_logger.error(
|
||||
'Failed to get or set this node\'s pm number.\n'
|
||||
'You may observe add\'l errors as a result.\n',
|
||||
exc_info=True
|
||||
)
|
||||
except:
|
||||
# Raise an appropriate exception
|
||||
module_logger.error(
|
||||
f'{self.apply_config.__name__} throws an exception.'
|
||||
'The original config must be restored by '
|
||||
'explicit ROLLBACK command or timeout.',
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
def in_active_nodes(self, root):
|
||||
my_names = set(self.get_network_addresses_and_names())
|
||||
active_nodes = [
|
||||
node.text for node in root.findall("./ActiveNodes/Node")
|
||||
]
|
||||
for node in active_nodes:
|
||||
if node in my_names:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_current_pm_num(self, root):
|
||||
# Find this node in the Module* tags, return the module number
|
||||
|
||||
my_names = set(self.get_network_addresses_and_names())
|
||||
smc_node = root.find("./SystemModuleConfig")
|
||||
pm_count = int(smc_node.find("./ModuleCount3").text)
|
||||
for pm_num in range(1, pm_count + 1):
|
||||
ip_addr = smc_node.find(f"./ModuleIPAddr{pm_num}-1-3").text
|
||||
name = smc_node.find(f"./ModuleHostName{pm_num}-1-3").text
|
||||
if ip_addr in my_names:
|
||||
module_logger.info(f" -- Matching against ModuleIPAddr{pm_num}-1-3, which says {ip_addr}")
|
||||
return pm_num
|
||||
if name in my_names:
|
||||
module_logger.info(f" -- Matching against ModuleHostName{pm_num}-1-3, which says {name}")
|
||||
return pm_num
|
||||
raise Exception("Did not find my IP addresses or names in the SystemModuleConfig section")
|
||||
|
||||
|
||||
def rollback_config(self, config_filename: str = DEFAULT_MCS_CONF_PATH):
|
||||
"""Rollback the configuration.
|
||||
|
||||
Copyback the copy of the configuration file.
|
||||
|
||||
: param config_filename: Columnstore config file path
|
||||
:rtype: dict
|
||||
"""
|
||||
# TODO: Rollback doesn't restart needed processes?
|
||||
config_file = Path(config_filename)
|
||||
config_dir = config_file.resolve().parent
|
||||
backup_path = f"{config_dir}/{config_file.name}.cmapi.save"
|
||||
config_file_copy = Path(backup_path)
|
||||
if config_file_copy.exists():
|
||||
replace(backup_path, config_file) # atomic replacement
|
||||
|
||||
|
||||
def get_current_config(self, config_filename: str = DEFAULT_MCS_CONF_PATH):
|
||||
"""Retrievs current configuration.
|
||||
|
||||
Read the config and convert it into bytes string.
|
||||
|
||||
:rtype: string
|
||||
|
||||
..TODO: fix using self.get_current_config_root()
|
||||
"""
|
||||
parser = etree.XMLParser(load_dtd=True)
|
||||
tree = etree.parse(config_filename, parser=parser)
|
||||
self.upgrade_config(tree=tree)
|
||||
# TODO: Unicode? UTF-8 may be?
|
||||
return etree.tostring(
|
||||
tree.getroot(), pretty_print=True, encoding='unicode'
|
||||
)
|
||||
|
||||
|
||||
def get_current_sm_config(
|
||||
self, config_filename: str = DEFAULT_SM_CONF_PATH
|
||||
) -> str:
|
||||
"""Retrievs current SM configuration
|
||||
|
||||
Read the config and convert it into a string.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
func_name = 'get_current_sm_config'
|
||||
sm_config_path = Path(config_filename)
|
||||
try:
|
||||
return sm_config_path.read_text(encoding='utf-8')
|
||||
except FileNotFoundError:
|
||||
module_logger.error(f"{func_name} SM config {config_filename} not found.")
|
||||
return ''
|
||||
|
||||
|
||||
def s3_enabled(self, config_filename: str = DEFAULT_SM_CONF_PATH) -> bool:
|
||||
"""Checks if SM is enabled
|
||||
|
||||
Reads SM config and checks if storage set to S3.
|
||||
It also checks for additional settings in the XML that must be set too.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
func_name = 's3_enabled'
|
||||
sm_config = configparser.ConfigParser()
|
||||
if len(sm_config.read(config_filename)) > 0:
|
||||
storage = sm_config.get('ObjectStorage', 'service')
|
||||
if storage is None:
|
||||
storage = 'LocalStorage'
|
||||
|
||||
if storage.lower() == 's3':
|
||||
config_root = self.get_current_config_root()
|
||||
if not config_root.find('./Installation/DBRootStorageType').text.lower() == "storagemanager":
|
||||
module_logger.error(f"{func_name} DBRootStorageType.lower() != storagemanager")
|
||||
if not config_root.find('./StorageManager/Enabled').text.lower() == "y":
|
||||
module_logger.error(f"{func_name} StorageManager/Enabled.lower() != y")
|
||||
if not config_root.find('./SystemConfig/DataFilePlugin').text == "libcloudio.so":
|
||||
module_logger.error(f"{func_name} SystemConfig/DataFilePlugin != libcloudio.so")
|
||||
|
||||
return True
|
||||
else:
|
||||
module_logger.error(f"{func_name} SM config {config_filename} not found.")
|
||||
|
||||
return False
|
||||
|
||||
def get_network_addresses(self):
|
||||
"""Retrievs the list of the network addresses
|
||||
|
||||
Generator that yields network interface addresses.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
for ni in get_network_interfaces():
|
||||
for fam in [socket.AF_INET, socket.AF_INET6]:
|
||||
addrs = ni.addresses.get(fam)
|
||||
if addrs is not None:
|
||||
for addr in addrs:
|
||||
yield(addr)
|
||||
|
||||
def get_network_addresses_and_names(self):
|
||||
"""Retrievs the list of the network addresses, hostnames, and aliases
|
||||
|
||||
Generator that yields network interface addresses, hostnames, and aliases
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
for ni in get_network_interfaces():
|
||||
for fam in [socket.AF_INET, socket.AF_INET6]:
|
||||
addrs = ni.addresses.get(fam)
|
||||
if addrs is not None:
|
||||
for addr in addrs:
|
||||
yield(addr)
|
||||
try:
|
||||
(host, aliases, _) = socket.gethostbyaddr(addr)
|
||||
except:
|
||||
continue
|
||||
yield host
|
||||
for alias in aliases:
|
||||
yield alias
|
||||
|
||||
def is_primary_node(self, root=None):
|
||||
"""Checks if this node is the primary node.
|
||||
|
||||
Reads the config and compares DBRM_Controller IP or
|
||||
hostname with the this node's IP and hostname.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
if root is None:
|
||||
root = self.get_current_config_root()
|
||||
|
||||
primary_address = self.get_dbrm_conn_info(root)['IPAddr']
|
||||
return primary_address in self.get_network_addresses_and_names()
|
||||
|
||||
def is_single_node(self,
|
||||
root=None):
|
||||
"""Checks if this node is the single node.
|
||||
|
||||
Reads the config and compares DBRMMaster IP with the predefined localhost addresses.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
if root is None:
|
||||
root = self.get_current_config_root()
|
||||
|
||||
master_address = self.get_dbrm_conn_info(root)['IPAddr']
|
||||
if master_address in ['127.0.0.1', 'localhost', '::1']:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_new_module_id(self, new_root=None):
|
||||
"""Retrieves new module id.
|
||||
|
||||
Reads new XML config and searches IP belongs to this host in SystemModuleConfig.ModuleIPAddrX-1-3. X is the new module id.
|
||||
|
||||
:rtype: int
|
||||
"""
|
||||
func_name = 'get_new_module_id'
|
||||
current_module_id = read_module_id()
|
||||
|
||||
if new_root is None:
|
||||
module_logger.error(f'{func_name} Empty new XML tree root.')
|
||||
return current_module_id
|
||||
|
||||
net_address = self.get_module_net_address(new_root, current_module_id)
|
||||
# Use getaddrinfo in case of IPv6
|
||||
if net_address is None:
|
||||
module_logger.error(f'{func_name} Columnstore.xml has unknown value in SystemModuleConfig.\
|
||||
ModuleIPAddr{current_module_id}-1-3.')
|
||||
raise RuntimeError('net_address is None.')
|
||||
if socket.gethostbyname(net_address) in self.get_network_addresses():
|
||||
return current_module_id
|
||||
|
||||
# Use getaddrinfo in case of IPv6
|
||||
# This fires for a added node when node id changes from 1 to something
|
||||
for module_entry in self.get_modules_addresses(new_root):
|
||||
if module_entry['addr'] is not None:
|
||||
net_addr = socket.gethostbyname(module_entry['addr'])
|
||||
if net_addr in self.get_network_addresses():
|
||||
module_logger.debug(f'{func_name} New module id \
|
||||
{module_entry["id"]}')
|
||||
return int(module_entry['id'])
|
||||
|
||||
module_logger.error(f'{func_name} Cannot find new module id for \
|
||||
the node.')
|
||||
raise RuntimeError('Fail to find module id.')
|
||||
|
||||
def get_module_net_address(self, root=None, module_id: int = None):
|
||||
"""Retrieves the module network address.
|
||||
|
||||
Reads new XML config and returns IP or
|
||||
hostname from SystemModuleConfig.ModuleIPAddrX-1-3.
|
||||
|
||||
:rtype: string
|
||||
"""
|
||||
func_name = 'get_module_net_address'
|
||||
if module_id is None:
|
||||
module_id = read_module_id()
|
||||
|
||||
if root is None:
|
||||
module_logger.error(f'{func_name} Empty XML root.')
|
||||
return
|
||||
|
||||
for el in root:
|
||||
if el.tag == 'SystemModuleConfig':
|
||||
for subel in el:
|
||||
if subel.tag == f'ModuleIPAddr{module_id}-1-3':
|
||||
module_logger.debug(
|
||||
f'{func_name} Module {module_id} '
|
||||
f'network address {subel.text}'
|
||||
)
|
||||
return subel.text
|
||||
|
||||
module_logger.error(f'{func_name} Module {module_id} was not found.')
|
||||
return
|
||||
|
||||
def get_modules_addresses(self, root=None):
|
||||
"""Retrieves the modules network addresses.
|
||||
|
||||
Reads new XML config and returns IP or hostname from
|
||||
SystemModuleConfig.ModuleIPAddrX-1-3 with X being a node id.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
func_name = 'get_module_addresses'
|
||||
if root is None:
|
||||
module_logger.error(f'{func_name} Empty XML root.')
|
||||
return None
|
||||
|
||||
regex_string = 'ModuleIPAddr[0-9]+-1-3'
|
||||
for el in root:
|
||||
if el.tag == 'SystemModuleConfig':
|
||||
for subel in el:
|
||||
module_ip_m = re.match(regex_string, subel.tag)
|
||||
if module_ip_m is not None:
|
||||
id_m = re.search('[0-9]+', module_ip_m.group(0))
|
||||
module_id = id_m.group(0)
|
||||
module_logger.debug(
|
||||
f'{func_name} Module {module_id} '
|
||||
f'network address {subel.text}'
|
||||
)
|
||||
yield {'addr': subel.text, 'id': module_id}
|
||||
|
||||
|
||||
module_logger.error(f'{func_name} Module {module_id} was not found.')
|
||||
return None
|
||||
|
||||
def dbroots_to_create(self, root=None, module_id:int=None):
|
||||
"""Generates dbroot ids if there are new dbroots to be created/renamed
|
||||
|
||||
Reads new XML config and generates dbroot ids if on-disk dbroots differs from the config's set.
|
||||
|
||||
:rtype: generator of strings
|
||||
"""
|
||||
func_name = 'dbroots_to_create'
|
||||
if module_id is None:
|
||||
module_id = read_module_id()
|
||||
|
||||
if root is None:
|
||||
module_logger.error(f'{func_name} Empty XML root.')
|
||||
return
|
||||
|
||||
current_dbroot_list = get_dbroots_list()
|
||||
|
||||
regex_string = f'ModuleDBRootID{module_id}-[0-9]+-3'
|
||||
for el in root:
|
||||
if el.tag == 'SystemModuleConfig':
|
||||
for subel in el:
|
||||
if re.match(regex_string, subel.tag) is not None and \
|
||||
int(subel.text) not in current_dbroot_list:
|
||||
module_logger.debug(f'{func_name} Module {module_id} \
|
||||
has dbroot {subel.text}')
|
||||
yield int(subel.text)
|
||||
return
|
||||
|
||||
def get_all_dbroots(self, root):
|
||||
dbroots = []
|
||||
smc_node = root.find("./SystemModuleConfig")
|
||||
mod_count = int(smc_node.find("./ModuleCount3").text)
|
||||
for i in range(1, mod_count+1):
|
||||
for j in range(1, int(smc_node.find(f"./ModuleDBRootCount{i}-3").text) + 1):
|
||||
dbroots.append(smc_node.find(f"./ModuleDBRootID{i}-{j}-3").text)
|
||||
return dbroots
|
91
cmapi/mcs_node_control/models/node_status.py
Normal file
91
cmapi/mcs_node_control/models/node_status.py
Normal file
@ -0,0 +1,91 @@
|
||||
import logging
|
||||
import socket
|
||||
|
||||
from cmapi_server.constants import MCS_DATA_PATH, MCS_MODULE_FILE_PATH
|
||||
from mcs_node_control.models.dbrm import DBRM
|
||||
from mcs_node_control.models.misc import get_dbroots_list, read_module_id
|
||||
from mcs_node_control.models.process import get_host_uptime
|
||||
|
||||
|
||||
PROC_NAMES = ['ExeMgr', 'PrimProc', 'WriteEngine', 'controllernode',
|
||||
'workernode', 'cmagent', 'DMLProc', 'DDLProc']
|
||||
|
||||
|
||||
module_logger = logging.getLogger()
|
||||
|
||||
|
||||
class NodeStatus:
|
||||
"""Class to tell the status of the node.
|
||||
|
||||
Inspects runtime of the cluster and OS and returns its observations.
|
||||
"""
|
||||
def get_cluster_mode(self):
|
||||
"""Reads cluster mode.
|
||||
|
||||
Cluster can be in readwrite or readonly modes. It can be also ready or
|
||||
not ready but it is not important at this point. We pesume if there is
|
||||
no connection with DBRM master then the cluster is readonly.
|
||||
|
||||
TODO:
|
||||
- Is it ok to have those method here in NodeStatus?
|
||||
Move to DBRM.
|
||||
- pass 'root' and config_filename arguments
|
||||
(likewise dbrm.set_cluster_mode)
|
||||
|
||||
:rtype: string
|
||||
"""
|
||||
try:
|
||||
with DBRM() as dbrm:
|
||||
return dbrm.get_cluster_mode()
|
||||
except (ConnectionRefusedError, RuntimeError, socket.error):
|
||||
module_logger.error(
|
||||
'Cannot establish or use DBRM connection.',
|
||||
exc_info=True
|
||||
)
|
||||
return 'readonly'
|
||||
|
||||
|
||||
def get_dbrm_status(self):
|
||||
"""reads DBRM status
|
||||
|
||||
DBRM Block Resolution Manager operates in two modes:
|
||||
master and slave. This m() returns the mode of this node
|
||||
looking for controllernode process running.
|
||||
|
||||
:rtype: string
|
||||
"""
|
||||
return DBRM.get_dbrm_status()
|
||||
|
||||
def get_dbroots(self, path:str = MCS_DATA_PATH):
|
||||
"""searches for services
|
||||
|
||||
The method returns numeric ids of dbroots available.
|
||||
|
||||
:rtype: generator of ints
|
||||
"""
|
||||
for id in get_dbroots_list(path):
|
||||
yield id
|
||||
|
||||
|
||||
def get_host_uptime(self):
|
||||
"""Retrieves uptime in seconds.
|
||||
|
||||
:rtype: int : seconds
|
||||
"""
|
||||
return get_host_uptime()
|
||||
|
||||
|
||||
def get_module_id(self):
|
||||
"""Retrieves module ID from MCS_MODULE_FILE_PATH.
|
||||
|
||||
:rtype: int : seconds
|
||||
"""
|
||||
func_name = 'get_module_id'
|
||||
try:
|
||||
module_id = read_module_id()
|
||||
except FileNotFoundError:
|
||||
module_id = 0
|
||||
module_logger.error(
|
||||
f'{func_name} {MCS_MODULE_FILE_PATH} file is absent.'
|
||||
)
|
||||
return module_id
|
110
cmapi/mcs_node_control/models/process.py
Normal file
110
cmapi/mcs_node_control/models/process.py
Normal file
@ -0,0 +1,110 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import psutil
|
||||
|
||||
PROCFS_PATH = '/proc/' # Linux only
|
||||
|
||||
|
||||
def open_binary(fname, **kwargs):
|
||||
return open(fname, "rb", **kwargs)
|
||||
|
||||
|
||||
def get_host_uptime():
|
||||
"""
|
||||
Return the system boot time expressed in seconds since the epoch.
|
||||
|
||||
:rtype: int : diff b/w current epoch and boot epoch
|
||||
"""
|
||||
path = f'{PROCFS_PATH}stat'
|
||||
boot_time = 0
|
||||
with open_binary(path) as f:
|
||||
for line in f:
|
||||
if line.startswith(b'btime'):
|
||||
boot_time = float(line.strip().split()[1])
|
||||
return int(time.time() - int(boot_time))
|
||||
return 0
|
||||
|
||||
|
||||
class Process():
|
||||
"""An interface to retrieve data from proc."""
|
||||
def get_proc_iterator(self):
|
||||
for pid in self.pids():
|
||||
yield pid
|
||||
|
||||
|
||||
def pids(self):
|
||||
"""Returns a list of PIDs currently running on the system."""
|
||||
return [int(x) for x in os.listdir(PROCFS_PATH) if x.isdigit()]
|
||||
|
||||
|
||||
def name(self, pid: int):
|
||||
"""Method to retrive name associated with the pid."""
|
||||
return self.parse_stat_file(pid)['name']
|
||||
|
||||
|
||||
def parse_stat_file(self, pid: int):
|
||||
"""Parse /proc/{pid}/stat file and return a dict with various
|
||||
process info.
|
||||
|
||||
Using "man proc" as a reference: where "man proc" refers to
|
||||
position N always substract 3 (e.g ppid position 4 in
|
||||
'man proc' == position 1 in here).
|
||||
"""
|
||||
ret = {}
|
||||
try:
|
||||
with open_binary(f"{PROCFS_PATH}{pid}/stat") as f:
|
||||
data = f.read()
|
||||
# Process name is between parentheses. It can contain spaces and
|
||||
# other parentheses. This is taken into account by looking for
|
||||
# the first occurrence of "(" and the last occurence of ")".
|
||||
rpar = data.rfind(b')')
|
||||
name = data[data.find(b'(') + 1:rpar]
|
||||
fields = data[rpar + 2:].split()
|
||||
|
||||
ret['name'] = name
|
||||
ret['status'] = fields[0]
|
||||
ret['ppid'] = fields[1]
|
||||
ret['ttynr'] = fields[4]
|
||||
ret['utime'] = fields[11]
|
||||
ret['stime'] = fields[12]
|
||||
ret['children_utime'] = fields[13]
|
||||
ret['children_stime'] = fields[14]
|
||||
ret['create_time'] = fields[19]
|
||||
ret['cpu_num'] = fields[36]
|
||||
ret['blkio_ticks'] = fields[39] # aka 'delayacct_blkio_ticks'
|
||||
except (PermissionError, ProcessLookupError, FileNotFoundError):
|
||||
ret['name'] = ''
|
||||
ret['status'] = ''
|
||||
ret['ppid'] = ''
|
||||
ret['ttynr'] = ''
|
||||
ret['utime'] = ''
|
||||
ret['stime'] = ''
|
||||
ret['children_utime'] = ''
|
||||
ret['children_stime'] = ''
|
||||
ret['create_time'] = ''
|
||||
ret['cpu_num'] = ''
|
||||
ret['blkio_ticks'] = '' # aka 'delayacct_blkio_ticks'
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def check_process_alive(proc_name: str) -> bool:
|
||||
"""Check process running.
|
||||
|
||||
:param proc_name: process name
|
||||
:type proc_name: str
|
||||
:return: True if process running, otherwise False
|
||||
:rtype: bool
|
||||
"""
|
||||
# Iterate over the all the running process
|
||||
for proc in psutil.process_iter():
|
||||
try:
|
||||
# Check if process name equals to the given name string.
|
||||
if proc_name.lower() == proc.name().lower():
|
||||
return True
|
||||
except (
|
||||
psutil.NoSuchProcess, psutil.AccessDenied,
|
||||
psutil.ZombieProcess
|
||||
):
|
||||
pass
|
||||
return False
|
587
cmapi/mcs_node_control/test/Columnstore_new.xml
Normal file
587
cmapi/mcs_node_control/test/Columnstore_new.xml
Normal file
@ -0,0 +1,587 @@
|
||||
<Columnstore Version="V1.0.1">
|
||||
<!--
|
||||
WARNING: Do not make changes to this file unless directed to do so by
|
||||
MariaDB service engineers. Incorrect settings can render your system
|
||||
unusable and will require a service call to correct.
|
||||
-->
|
||||
<Manager>MaxScale IP</Manager>
|
||||
<Sequence>42</Sequence>
|
||||
<ExeMgr1>
|
||||
<IPAddr>192.168.0.101</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>um1</Module>
|
||||
</ExeMgr1>
|
||||
<JobProc>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8602</Port>
|
||||
</JobProc>
|
||||
<ProcMgr>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8603</Port>
|
||||
</ProcMgr>
|
||||
<ProcMgr_Alarm>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8606</Port>
|
||||
</ProcMgr_Alarm>
|
||||
<ProcStatusControl>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8604</Port>
|
||||
</ProcStatusControl>
|
||||
<ProcStatusControlStandby>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcStatusControlStandby>
|
||||
<!-- Disabled
|
||||
<ProcHeartbeatControl>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcHeartbeatControl>
|
||||
-->
|
||||
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
|
||||
<localhost_ProcessMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</localhost_ProcessMonitor>
|
||||
<dm1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</dm1_ProcessMonitor>
|
||||
<um1_ProcessMonitor>
|
||||
<IPAddr>192.168.0.101</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</um1_ProcessMonitor>
|
||||
<pm1_ProcessMonitor>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm1_ProcessMonitor>
|
||||
<dm1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</dm1_ServerMonitor>
|
||||
<um1_ServerMonitor>
|
||||
<IPAddr>192.168.0.101</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</um1_ServerMonitor>
|
||||
<pm1_ServerMonitor>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm1_ServerMonitor>
|
||||
<pm1_WriteEngineServer>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm1_WriteEngineServer>
|
||||
<DDLProc>
|
||||
<IPAddr>192.168.0.101</IPAddr>
|
||||
<Port>8612</Port>
|
||||
</DDLProc>
|
||||
<DMLProc>
|
||||
<IPAddr>192.168.0.101</IPAddr>
|
||||
<Port>8614</Port>
|
||||
</DMLProc>
|
||||
<BatchInsert>
|
||||
<RowsPerBatch>10000</RowsPerBatch>
|
||||
</BatchInsert>
|
||||
<PrimitiveServers>
|
||||
<Count>4</Count>
|
||||
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
|
||||
<ProcessorThreshold>128</ProcessorThreshold>
|
||||
<ProcessorQueueSize>10K</ProcessorQueueSize> <!-- minimum of extent size 8192 -->
|
||||
<DebugLevel>0</DebugLevel>
|
||||
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
|
||||
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks> <!-- s/b factor of extent size 8192 -->
|
||||
<!-- <BPPCount>16</BPPCount> --> <!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
|
||||
<PrefetchThreshold>1</PrefetchThreshold>
|
||||
<PTTrace>0</PTTrace>
|
||||
<RotatingDestination>y</RotatingDestination> <!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
|
||||
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
|
||||
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
|
||||
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
|
||||
<DirectIO>y</DirectIO>
|
||||
<HighPriorityPercentage/>
|
||||
<MediumPriorityPercentage/>
|
||||
<LowPriorityPercentage/>
|
||||
</PrimitiveServers>
|
||||
<PMS1>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS1>
|
||||
<PMS2>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS2>
|
||||
<PMS3>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS3>
|
||||
<PMS4>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS4>
|
||||
<PMS5>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS5>
|
||||
<PMS6>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS6>
|
||||
<PMS7>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS7>
|
||||
<PMS8>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS8>
|
||||
<PMS9>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS9>
|
||||
<PMS10>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS10>
|
||||
<PMS11>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS11>
|
||||
<PMS12>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS12>
|
||||
<PMS13>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS13>
|
||||
<PMS14>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS14>
|
||||
<PMS15>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS15>
|
||||
<PMS16>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS16>
|
||||
<PMS17>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS17>
|
||||
<PMS18>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS18>
|
||||
<PMS19>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS19>
|
||||
<PMS20>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS20>
|
||||
<PMS21>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS21>
|
||||
<PMS22>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS22>
|
||||
<PMS23>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS23>
|
||||
<PMS24>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS24>
|
||||
<PMS25>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS25>
|
||||
<PMS26>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS26>
|
||||
<PMS27>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS27>
|
||||
<PMS28>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS28>
|
||||
<PMS29>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS29>
|
||||
<PMS30>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS30>
|
||||
<PMS31>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS31>
|
||||
<PMS32>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS32>
|
||||
<SystemConfig>
|
||||
<SystemLang>en_US.utf8</SystemLang>
|
||||
<SystemName>columnstore-1</SystemName>
|
||||
<ParentOAMModuleName>pm1</ParentOAMModuleName>
|
||||
<PrimaryUMModuleName>um1</PrimaryUMModuleName>
|
||||
<!-- Warning: Do not change this value once database is built -->
|
||||
<DBRootCount>4</DBRootCount>
|
||||
<DBRoot1>/usr/local/mariadb/columnstore/data1</DBRoot1>
|
||||
<DBRMRoot>$INSTALLDIR/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
|
||||
<TableLockSaveFile>$INSTALLDIR/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
|
||||
<DBRMTimeOut>20</DBRMTimeOut> <!-- in seconds -->
|
||||
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
|
||||
<WaitPeriod>10</WaitPeriod> <!-- in seconds -->
|
||||
<CalpontHome>$INSTALLDIR</CalpontHome>
|
||||
<MemoryCheckPercent>95</MemoryCheckPercent> <!-- Max real memory to limit growth of buffers to -->
|
||||
<DataFileLog>OFF</DataFileLog>
|
||||
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
|
||||
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
|
||||
-->
|
||||
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
|
||||
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
|
||||
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
|
||||
files are left behind. -->
|
||||
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
|
||||
<DBRoot2>/usr/local/mariadb/columnstore/data2</DBRoot2>
|
||||
<DBRoot3>/usr/local/mariadb/columnstore/data3</DBRoot3>
|
||||
<DBRoot4>/usr/local/mariadb/columnstore/data4</DBRoot4>
|
||||
</SystemConfig>
|
||||
<SystemModuleConfig>
|
||||
<ModuleType1>dm</ModuleType1>
|
||||
<ModuleDesc1>Director Module</ModuleDesc1>
|
||||
<ModuleCount1>0</ModuleCount1>
|
||||
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
|
||||
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
|
||||
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
|
||||
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
|
||||
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
|
||||
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
|
||||
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
|
||||
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
|
||||
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
|
||||
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
|
||||
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
|
||||
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
|
||||
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
|
||||
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
|
||||
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
|
||||
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
|
||||
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
|
||||
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
|
||||
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
|
||||
<ModuleType2>um</ModuleType2>
|
||||
<ModuleDesc2>User Module</ModuleDesc2>
|
||||
<ModuleCount2>2</ModuleCount2>
|
||||
<ModuleIPAddr1-1-2>192.168.0.101</ModuleIPAddr1-1-2>
|
||||
<ModuleHostName1-1-2>nvm002314</ModuleHostName1-1-2>
|
||||
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
|
||||
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
|
||||
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
|
||||
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
|
||||
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
|
||||
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
|
||||
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
|
||||
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
|
||||
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
|
||||
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
|
||||
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
|
||||
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
|
||||
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
|
||||
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
|
||||
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
|
||||
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
|
||||
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
|
||||
<ModuleType3>pm</ModuleType3>
|
||||
<ModuleDesc3>Performance Module</ModuleDesc3>
|
||||
<ModuleCount3>4</ModuleCount3>
|
||||
<ModuleIPAddr1-1-3>192.168.0.102</ModuleIPAddr1-1-3>
|
||||
<ModuleHostName1-1-3>nvm002315</ModuleHostName1-1-3>
|
||||
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
|
||||
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
|
||||
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
|
||||
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
|
||||
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
|
||||
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
|
||||
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
|
||||
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
|
||||
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
|
||||
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
|
||||
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
|
||||
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
|
||||
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
|
||||
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
|
||||
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
|
||||
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
|
||||
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
|
||||
<ModuleDisableState2-2>ENABLED</ModuleDisableState2-2>
|
||||
<ModuleHostName2-1-2>192.168.0.104</ModuleHostName2-1-2>
|
||||
<ModuleIPAddr2-1-2>192.168.0.104</ModuleIPAddr2-1-2>
|
||||
<ModuleDBRootCount2-3>1</ModuleDBRootCount2-3>
|
||||
<ModuleDBRootID2-1-3>2</ModuleDBRootID2-1-3>
|
||||
<ModuleDBRootCount3-3>1</ModuleDBRootCount3-3>
|
||||
<ModuleDBRootID3-1-3>3</ModuleDBRootID3-1-3>
|
||||
<ModuleDBRootCount4-3>1</ModuleDBRootCount4-3>
|
||||
<ModuleDBRootID4-1-3>4</ModuleDBRootID4-1-3>
|
||||
<ModuleDisableState2-3>ENABLED</ModuleDisableState2-3>
|
||||
<ModuleHostName2-1-3>nvm002316</ModuleHostName2-1-3>
|
||||
<ModuleIPAddr2-1-3>192.168.0.103</ModuleIPAddr2-1-3>
|
||||
<ModuleDisableState3-3>ENABLED</ModuleDisableState3-3>
|
||||
<ModuleHostName3-1-3>nvm002980</ModuleHostName3-1-3>
|
||||
<ModuleIPAddr3-1-3>192.168.0.105</ModuleIPAddr3-1-3>
|
||||
<ModuleDisableState4-3>ENABLED</ModuleDisableState4-3>
|
||||
<ModuleHostName4-1-3>nvm002981</ModuleHostName4-1-3>
|
||||
<ModuleIPAddr4-1-3>192.168.0.106</ModuleIPAddr4-1-3>
|
||||
<ModuleHostName1-2-2>unassigned</ModuleHostName1-2-2>
|
||||
<ModuleIPAddr1-2-2>0.0.0.0</ModuleIPAddr1-2-2>
|
||||
<ModuleHostName2-2-2>unassigned</ModuleHostName2-2-2>
|
||||
<ModuleIPAddr2-2-2>0.0.0.0</ModuleIPAddr2-2-2>
|
||||
<ModuleHostName1-2-3>unassigned</ModuleHostName1-2-3>
|
||||
<ModuleIPAddr1-2-3>0.0.0.0</ModuleIPAddr1-2-3>
|
||||
<ModuleHostName2-2-3>unassigned</ModuleHostName2-2-3>
|
||||
<ModuleIPAddr2-2-3>0.0.0.0</ModuleIPAddr2-2-3>
|
||||
<ModuleHostName3-2-3>unassigned</ModuleHostName3-2-3>
|
||||
<ModuleIPAddr3-2-3>0.0.0.0</ModuleIPAddr3-2-3>
|
||||
<ModuleHostName4-2-3>unassigned</ModuleHostName4-2-3>
|
||||
<ModuleIPAddr4-2-3>0.0.0.0</ModuleIPAddr4-2-3>
|
||||
</SystemModuleConfig>
|
||||
<SystemExtDeviceConfig>
|
||||
<Count>0</Count>
|
||||
<Name1>unassigned</Name1>
|
||||
<IPAddr1>0.0.0.0</IPAddr1>
|
||||
<DisableState1>ENABLED</DisableState1>
|
||||
</SystemExtDeviceConfig>
|
||||
<SessionManager>
|
||||
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
|
||||
<TxnIDFile>$INSTALLDIR/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
|
||||
</SessionManager>
|
||||
<VersionBuffer>
|
||||
<!-- VersionBufferFileSize must be a multiple of 8192.
|
||||
One version buffer file will be put on each DB root. -->
|
||||
<VersionBufferFileSize>1GB</VersionBufferFileSize>
|
||||
</VersionBuffer>
|
||||
<OIDManager>
|
||||
<!-- Do not change this file after database built -->
|
||||
<OIDBitmapFile>$INSTALLDIR/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
|
||||
<!-- Do not change this value after database built -->
|
||||
<FirstOID>3000</FirstOID>
|
||||
</OIDManager>
|
||||
<WriteEngine>
|
||||
<BulkRoot>$INSTALLDIR/data/bulk</BulkRoot>
|
||||
<BulkRollbackDir>$INSTALLDIR/data1/systemFiles/bulkRollback</BulkRollbackDir>
|
||||
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
|
||||
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
|
||||
<FastDelete>n</FastDelete>
|
||||
</WriteEngine>
|
||||
<DBRM_Controller>
|
||||
<NumWorkers>6</NumWorkers>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8616</Port>
|
||||
</DBRM_Controller>
|
||||
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
|
||||
<DBRM_Worker1>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm1</Module>
|
||||
</DBRM_Worker1>
|
||||
<DBRM_Worker2>
|
||||
<IPAddr>192.168.0.101</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>um1</Module>
|
||||
</DBRM_Worker2>
|
||||
<DBRM_Worker3>
|
||||
<IPAddr>192.168.0.104</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>um2</Module>
|
||||
</DBRM_Worker3>
|
||||
<DBRM_Worker4>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm2</Module>
|
||||
</DBRM_Worker4>
|
||||
<DBRM_Worker5>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm3</Module>
|
||||
</DBRM_Worker5>
|
||||
<DBRM_Worker6>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm4</Module>
|
||||
</DBRM_Worker6>
|
||||
<DBRM_Worker7>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker7>
|
||||
<DBRM_Worker8>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker8>
|
||||
<DBRM_Worker9>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker9>
|
||||
<DBRM_Worker10>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker10>
|
||||
<DBBC>
|
||||
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
|
||||
<!-- Alternatively, this can be specified in absolute terms using
|
||||
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
|
||||
<!-- <NumBlocksPct>70</NumBlocksPct> -->
|
||||
<!-- <NumThreads>16</NumThreads> --> <!-- 1-256. Default is 16. -->
|
||||
<NumCaches>1</NumCaches><!-- # of parallel caches to instantiate -->
|
||||
<IOMTracing>0</IOMTracing>
|
||||
<BRPTracing>0</BRPTracing>
|
||||
<ReportFrequency>65536</ReportFrequency>
|
||||
<MaxOpenFiles>2K</MaxOpenFiles>
|
||||
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
|
||||
<FDCacheTrace>0</FDCacheTrace>
|
||||
<NumBlocksPct>65</NumBlocksPct>
|
||||
</DBBC>
|
||||
<Installation>
|
||||
<ServerTypeInstall>1</ServerTypeInstall>
|
||||
<PMwithUM>n</PMwithUM>
|
||||
<MySQLRep>y</MySQLRep>
|
||||
<DBRootStorageType>external</DBRootStorageType>
|
||||
<UMStorageType>internal</UMStorageType>
|
||||
<DistributedInstall>y</DistributedInstall>
|
||||
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
|
||||
<DataRedundancyNetworkType/>
|
||||
</Installation>
|
||||
<ExtentMap>
|
||||
<!--
|
||||
WARNING: these can only be changed on an empty system. Once any object has been allocated
|
||||
it cannot be changed!. Extent size is 8M rows.
|
||||
-->
|
||||
<FilesPerColumnPartition>8</FilesPerColumnPartition> <!-- should be multiple of DBRootCount -->
|
||||
<BRM_UID>0x0</BRM_UID>
|
||||
</ExtentMap>
|
||||
<HashJoin>
|
||||
<MaxBuckets>128</MaxBuckets>
|
||||
<MaxElems>128K</MaxElems> <!-- 128 buckets * 128K * 16 = 256 MB -->
|
||||
<PmMaxMemorySmallSide>64M</PmMaxMemorySmallSide><!-- divide by 48 to getapproximate row count -->
|
||||
<TotalUmMemory>25%</TotalUmMemory>
|
||||
<CPUniqueLimit>100</CPUniqueLimit>
|
||||
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
|
||||
<TempFileCompression>Y</TempFileCompression>
|
||||
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
|
||||
</HashJoin>
|
||||
<JobList>
|
||||
<FlushInterval>16K</FlushInterval>
|
||||
<FifoSize>32</FifoSize>
|
||||
<RequestSize>1</RequestSize> <!-- Number of extents per request, should be
|
||||
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
|
||||
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
|
||||
each extent. The default is 16. MaxOutstandingRequests is the size of
|
||||
the window of work in terms of extents. A value of 20 means there
|
||||
is 20 extents worth of work for the PMs to process at any given time.
|
||||
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
|
||||
as many threads are available across all PMs. -->
|
||||
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
|
||||
<MaxOutstandingRequests>40</MaxOutstandingRequests>
|
||||
<ThreadPoolSize>100</ThreadPoolSize>
|
||||
</JobList>
|
||||
<RowAggregation>
|
||||
<!-- <RowAggrThreads>4</RowAggrThreads> --> <!-- Default value is 4 or number of cores when less than 4 -->
|
||||
<!-- <RowAggrBuckets>32</RowAggrBuckets> --> <!-- Default value is number of cores * 4 -->
|
||||
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> --> <!-- Default value is 20 -->
|
||||
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
|
||||
</RowAggregation>
|
||||
<CrossEngineSupport>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>3306</Port>
|
||||
<User>root</User>
|
||||
<Password/>
|
||||
<TLSCA/>
|
||||
<TLSClientCert/>
|
||||
<TLSClientKey/>
|
||||
</CrossEngineSupport>
|
||||
<QueryStats>
|
||||
<Enabled>Y</Enabled>
|
||||
</QueryStats>
|
||||
<UserPriority>
|
||||
<Enabled>N</Enabled>
|
||||
</UserPriority>
|
||||
<NetworkCompression>
|
||||
<Enabled>Y</Enabled>
|
||||
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
|
||||
</NetworkCompression>
|
||||
<QueryTele>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>0</Port>
|
||||
</QueryTele>
|
||||
<um2_ProcessMonitor>
|
||||
<IPAddr>192.168.0.104</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</um2_ProcessMonitor>
|
||||
<um2_ServerMonitor>
|
||||
<IPAddr>192.168.0.104</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</um2_ServerMonitor>
|
||||
<ExeMgr2>
|
||||
<IPAddr>192.168.0.104</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>um2</Module>
|
||||
</ExeMgr2>
|
||||
<pm2_ProcessMonitor>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm2_ProcessMonitor>
|
||||
<pm2_ServerMonitor>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm2_ServerMonitor>
|
||||
<pm2_WriteEngineServer>
|
||||
<IPAddr>192.168.0.103</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm2_WriteEngineServer>
|
||||
<pm3_ProcessMonitor>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm3_ProcessMonitor>
|
||||
<pm3_ServerMonitor>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm3_ServerMonitor>
|
||||
<pm3_WriteEngineServer>
|
||||
<IPAddr>192.168.0.105</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm3_WriteEngineServer>
|
||||
<pm4_ProcessMonitor>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm4_ProcessMonitor>
|
||||
<pm4_ServerMonitor>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm4_ServerMonitor>
|
||||
<pm4_WriteEngineServer>
|
||||
<IPAddr>192.168.0.106</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm4_WriteEngineServer>
|
||||
<DataRedundancyConfig>
|
||||
<DBRoot1PMs/>
|
||||
<DBRoot2PMs/>
|
||||
<DBRoot3PMs/>
|
||||
<DBRoot4PMs/>
|
||||
</DataRedundancyConfig>
|
||||
<ProcHeartbeatControl>
|
||||
<IPAddr>192.168.0.102</IPAddr>
|
||||
</ProcHeartbeatControl>
|
||||
</Columnstore>
|
531
cmapi/mcs_node_control/test/Columnstore_old.xml
Normal file
531
cmapi/mcs_node_control/test/Columnstore_old.xml
Normal file
@ -0,0 +1,531 @@
|
||||
<Columnstore Version="V1.0.0">
|
||||
<!--
|
||||
WARNING: Do not make changes to this file unless directed to do so by
|
||||
MariaDB service engineers. Incorrect settings can render your system
|
||||
unusable and will require a service call to correct.
|
||||
-->
|
||||
<ExeMgr1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8601</Port>
|
||||
<Module>unassigned</Module>
|
||||
</ExeMgr1>
|
||||
<JobProc>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8602</Port>
|
||||
</JobProc>
|
||||
<ProcMgr>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8603</Port>
|
||||
</ProcMgr>
|
||||
<ProcMgr_Alarm>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8606</Port>
|
||||
</ProcMgr_Alarm>
|
||||
<ProcStatusControl>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8604</Port>
|
||||
</ProcStatusControl>
|
||||
<ProcStatusControlStandby>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcStatusControlStandby>
|
||||
<!-- Disabled
|
||||
<ProcHeartbeatControl>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8605</Port>
|
||||
</ProcHeartbeatControl>
|
||||
-->
|
||||
<!-- ProcessMonitor Port: 8800 - 8820 is reserved to support External Modules-->
|
||||
<localhost_ProcessMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</localhost_ProcessMonitor>
|
||||
<dm1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</dm1_ProcessMonitor>
|
||||
<um1_ProcessMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</um1_ProcessMonitor>
|
||||
<pm1_ProcessMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8800</Port>
|
||||
</pm1_ProcessMonitor>
|
||||
<dm1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</dm1_ServerMonitor>
|
||||
<um1_ServerMonitor>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</um1_ServerMonitor>
|
||||
<pm1_ServerMonitor>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8622</Port>
|
||||
</pm1_ServerMonitor>
|
||||
<pm1_WriteEngineServer>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8630</Port>
|
||||
</pm1_WriteEngineServer>
|
||||
<DDLProc>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8612</Port>
|
||||
</DDLProc>
|
||||
<DMLProc>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8614</Port>
|
||||
</DMLProc>
|
||||
<BatchInsert>
|
||||
<RowsPerBatch>10000</RowsPerBatch>
|
||||
</BatchInsert>
|
||||
<PrimitiveServers>
|
||||
<Count>1</Count>
|
||||
<ConnectionsPerPrimProc>2</ConnectionsPerPrimProc>
|
||||
<ProcessorThreshold>128</ProcessorThreshold>
|
||||
<ProcessorQueueSize>10K</ProcessorQueueSize>
|
||||
<!-- minimum of extent size 8192 -->
|
||||
<DebugLevel>0</DebugLevel>
|
||||
<ColScanBufferSizeBlocks>512</ColScanBufferSizeBlocks>
|
||||
<ColScanReadAheadBlocks>512</ColScanReadAheadBlocks>
|
||||
<!-- s/b factor of extent size 8192 -->
|
||||
<!-- <BPPCount>16</BPPCount> -->
|
||||
<!-- Default num cores * 2. A cap on the number of simultaneous primitives per jobstep -->
|
||||
<PrefetchThreshold>1</PrefetchThreshold>
|
||||
<PTTrace>0</PTTrace>
|
||||
<RotatingDestination>n</RotatingDestination>
|
||||
<!-- Iterate thru UM ports; set to 'n' if UM/PM on same server -->
|
||||
<!-- <HighPriorityPercentage>60</HighPriorityPercentage> -->
|
||||
<!-- <MediumPriorityPercentage>30</MediumPriorityPercentage> -->
|
||||
<!-- <LowPriorityPercentage>10</LowPriorityPercentage> -->
|
||||
<DirectIO>y</DirectIO>
|
||||
<HighPriorityPercentage/>
|
||||
<MediumPriorityPercentage/>
|
||||
<LowPriorityPercentage/>
|
||||
</PrimitiveServers>
|
||||
<PMS1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS1>
|
||||
<PMS2>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS2>
|
||||
<PMS3>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS3>
|
||||
<PMS4>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS4>
|
||||
<PMS5>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS5>
|
||||
<PMS6>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS6>
|
||||
<PMS7>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS7>
|
||||
<PMS8>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS8>
|
||||
<PMS9>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS9>
|
||||
<PMS10>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS10>
|
||||
<PMS11>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS11>
|
||||
<PMS12>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS12>
|
||||
<PMS13>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS13>
|
||||
<PMS14>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS14>
|
||||
<PMS15>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS15>
|
||||
<PMS16>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS16>
|
||||
<PMS17>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS17>
|
||||
<PMS18>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS18>
|
||||
<PMS19>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS19>
|
||||
<PMS20>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS20>
|
||||
<PMS21>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS21>
|
||||
<PMS22>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS22>
|
||||
<PMS23>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS23>
|
||||
<PMS24>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS24>
|
||||
<PMS25>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS25>
|
||||
<PMS26>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS26>
|
||||
<PMS27>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS27>
|
||||
<PMS28>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS28>
|
||||
<PMS29>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS29>
|
||||
<PMS30>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS30>
|
||||
<PMS31>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS31>
|
||||
<PMS32>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8620</Port>
|
||||
</PMS32>
|
||||
<SystemConfig>
|
||||
<SystemLang>C</SystemLang>
|
||||
<SystemName>columnstore-1</SystemName>
|
||||
<ParentOAMModuleName>pm1</ParentOAMModuleName>
|
||||
<PrimaryUMModuleName>pm1</PrimaryUMModuleName>
|
||||
<!-- Warning: Do not change this value once database is built -->
|
||||
<DBRootCount>1</DBRootCount>
|
||||
<DBRoot1>/var/lib/columnstore/data1</DBRoot1>
|
||||
<DBRMRoot>/var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves</DBRMRoot>
|
||||
<TableLockSaveFile>/var/lib/columnstore/data1/systemFiles/dbrm/tablelocks</TableLockSaveFile>
|
||||
<DBRMTimeOut>20</DBRMTimeOut>
|
||||
<!-- in seconds -->
|
||||
<DBRMSnapshotInterval>100000</DBRMSnapshotInterval>
|
||||
<!-- default SWSDL max element save size -->
|
||||
<WaitPeriod>10</WaitPeriod>
|
||||
<!-- in seconds -->
|
||||
<MemoryCheckPercent>95</MemoryCheckPercent>
|
||||
<!-- Max real memory to limit growth of buffers to -->
|
||||
<DataFileLog>OFF</DataFileLog>
|
||||
<!-- enable if you want to limit how much memory may be used for hdfs read/write memory buffers.
|
||||
<hdfsRdwrBufferMaxSize>8G</hdfsRdwrBufferMaxSize>
|
||||
-->
|
||||
<hdfsRdwrScratch>/rdwrscratch</hdfsRdwrScratch> <!-- Do not set to an hdfs file path -->
|
||||
<!-- Be careful modifying SystemTempFileDir! On start, ExeMgr deletes
|
||||
the entire subdirectories "joins" & "aggregates" and recreates it to make sure no
|
||||
files are left behind. -->
|
||||
<SystemTempFileDir>/tmp/columnstore_tmp_files</SystemTempFileDir>
|
||||
</SystemConfig>
|
||||
<SystemModuleConfig>
|
||||
<ModuleType1>dm</ModuleType1>
|
||||
<ModuleDesc1>Director Module</ModuleDesc1>
|
||||
<ModuleCount1>0</ModuleCount1>
|
||||
<ModuleIPAddr1-1-1>0.0.0.0</ModuleIPAddr1-1-1>
|
||||
<ModuleHostName1-1-1>unassigned</ModuleHostName1-1-1>
|
||||
<ModuleDisableState1-1>ENABLED</ModuleDisableState1-1>
|
||||
<ModuleCPUCriticalThreshold1>0</ModuleCPUCriticalThreshold1>
|
||||
<ModuleCPUMajorThreshold1>0</ModuleCPUMajorThreshold1>
|
||||
<ModuleCPUMinorThreshold1>0</ModuleCPUMinorThreshold1>
|
||||
<ModuleCPUMinorClearThreshold1>0</ModuleCPUMinorClearThreshold1>
|
||||
<ModuleDiskCriticalThreshold1>90</ModuleDiskCriticalThreshold1>
|
||||
<ModuleDiskMajorThreshold1>80</ModuleDiskMajorThreshold1>
|
||||
<ModuleDiskMinorThreshold1>70</ModuleDiskMinorThreshold1>
|
||||
<ModuleMemCriticalThreshold1>90</ModuleMemCriticalThreshold1>
|
||||
<ModuleMemMajorThreshold1>0</ModuleMemMajorThreshold1>
|
||||
<ModuleMemMinorThreshold1>0</ModuleMemMinorThreshold1>
|
||||
<ModuleSwapCriticalThreshold1>90</ModuleSwapCriticalThreshold1>
|
||||
<ModuleSwapMajorThreshold1>80</ModuleSwapMajorThreshold1>
|
||||
<ModuleSwapMinorThreshold1>70</ModuleSwapMinorThreshold1>
|
||||
<ModuleDiskMonitorFileSystem1-1>/</ModuleDiskMonitorFileSystem1-1>
|
||||
<ModuleDBRootCount1-1>unassigned</ModuleDBRootCount1-1>
|
||||
<ModuleDBRootID1-1-1>unassigned</ModuleDBRootID1-1-1>
|
||||
<ModuleType2>um</ModuleType2>
|
||||
<ModuleDesc2>User Module</ModuleDesc2>
|
||||
<ModuleCount2>0</ModuleCount2>
|
||||
<ModuleIPAddr1-1-2>0.0.0.0</ModuleIPAddr1-1-2>
|
||||
<ModuleHostName1-1-2>unassigned</ModuleHostName1-1-2>
|
||||
<ModuleDisableState1-2>ENABLED</ModuleDisableState1-2>
|
||||
<ModuleCPUCriticalThreshold2>0</ModuleCPUCriticalThreshold2>
|
||||
<ModuleCPUMajorThreshold2>0</ModuleCPUMajorThreshold2>
|
||||
<ModuleCPUMinorThreshold2>0</ModuleCPUMinorThreshold2>
|
||||
<ModuleCPUMinorClearThreshold2>0</ModuleCPUMinorClearThreshold2>
|
||||
<ModuleDiskCriticalThreshold2>90</ModuleDiskCriticalThreshold2>
|
||||
<ModuleDiskMajorThreshold2>80</ModuleDiskMajorThreshold2>
|
||||
<ModuleDiskMinorThreshold2>70</ModuleDiskMinorThreshold2>
|
||||
<ModuleMemCriticalThreshold2>90</ModuleMemCriticalThreshold2>
|
||||
<ModuleMemMajorThreshold2>0</ModuleMemMajorThreshold2>
|
||||
<ModuleMemMinorThreshold2>0</ModuleMemMinorThreshold2>
|
||||
<ModuleSwapCriticalThreshold2>90</ModuleSwapCriticalThreshold2>
|
||||
<ModuleSwapMajorThreshold2>80</ModuleSwapMajorThreshold2>
|
||||
<ModuleSwapMinorThreshold2>70</ModuleSwapMinorThreshold2>
|
||||
<ModuleDiskMonitorFileSystem1-2>/</ModuleDiskMonitorFileSystem1-2>
|
||||
<ModuleDBRootCount1-2>unassigned</ModuleDBRootCount1-2>
|
||||
<ModuleDBRootID1-1-2>unassigned</ModuleDBRootID1-1-2>
|
||||
<ModuleType3>pm</ModuleType3>
|
||||
<ModuleDesc3>Performance Module</ModuleDesc3>
|
||||
<ModuleCount3>1</ModuleCount3>
|
||||
<ModuleIPAddr1-1-3>127.0.0.1</ModuleIPAddr1-1-3>
|
||||
<ModuleHostName1-1-3>localhost</ModuleHostName1-1-3>
|
||||
<ModuleDisableState1-3>ENABLED</ModuleDisableState1-3>
|
||||
<ModuleCPUCriticalThreshold3>0</ModuleCPUCriticalThreshold3>
|
||||
<ModuleCPUMajorThreshold3>0</ModuleCPUMajorThreshold3>
|
||||
<ModuleCPUMinorThreshold3>0</ModuleCPUMinorThreshold3>
|
||||
<ModuleCPUMinorClearThreshold3>0</ModuleCPUMinorClearThreshold3>
|
||||
<ModuleDiskCriticalThreshold3>90</ModuleDiskCriticalThreshold3>
|
||||
<ModuleDiskMajorThreshold3>80</ModuleDiskMajorThreshold3>
|
||||
<ModuleDiskMinorThreshold3>70</ModuleDiskMinorThreshold3>
|
||||
<ModuleMemCriticalThreshold3>90</ModuleMemCriticalThreshold3>
|
||||
<ModuleMemMajorThreshold3>0</ModuleMemMajorThreshold3>
|
||||
<ModuleMemMinorThreshold3>0</ModuleMemMinorThreshold3>
|
||||
<ModuleSwapCriticalThreshold3>90</ModuleSwapCriticalThreshold3>
|
||||
<ModuleSwapMajorThreshold3>80</ModuleSwapMajorThreshold3>
|
||||
<ModuleSwapMinorThreshold3>70</ModuleSwapMinorThreshold3>
|
||||
<ModuleDiskMonitorFileSystem1-3>/</ModuleDiskMonitorFileSystem1-3>
|
||||
<ModuleDBRootCount1-3>1</ModuleDBRootCount1-3>
|
||||
<ModuleDBRootID1-1-3>1</ModuleDBRootID1-1-3>
|
||||
</SystemModuleConfig>
|
||||
<SystemExtDeviceConfig>
|
||||
<Count>0</Count>
|
||||
<Name1>unassigned</Name1>
|
||||
<IPAddr1>0.0.0.0</IPAddr1>
|
||||
<DisableState1>ENABLED</DisableState1>
|
||||
</SystemExtDeviceConfig>
|
||||
<SessionManager>
|
||||
<MaxConcurrentTransactions>1000</MaxConcurrentTransactions>
|
||||
<TxnIDFile>/var/lib/columnstore/data1/systemFiles/dbrm/SMTxnID</TxnIDFile>
|
||||
</SessionManager>
|
||||
<VersionBuffer>
|
||||
<!-- VersionBufferFileSize must be a multiple of 8192.
|
||||
One version buffer file will be put on each DB root. -->
|
||||
<VersionBufferFileSize>1GB</VersionBufferFileSize>
|
||||
</VersionBuffer>
|
||||
<OIDManager>
|
||||
<!-- Do not change this file after database built -->
|
||||
<OIDBitmapFile>/var/lib/columnstore/data1/systemFiles/dbrm/oidbitmap</OIDBitmapFile>
|
||||
<!-- Do not change this value after database built -->
|
||||
<FirstOID>3000</FirstOID>
|
||||
</OIDManager>
|
||||
<WriteEngine>
|
||||
<BulkRoot>/var/log/mariadb/columnstore/data/bulk</BulkRoot>
|
||||
<BulkRollbackDir>/var/lib/columnstore/data1/systemFiles/bulkRollback</BulkRollbackDir>
|
||||
<MaxFileSystemDiskUsagePct>98</MaxFileSystemDiskUsagePct>
|
||||
<CompressedPaddingBlocks>1</CompressedPaddingBlocks> <!-- Number of blocks used to pad compressed chunks -->
|
||||
<FastDelete>n</FastDelete>
|
||||
</WriteEngine>
|
||||
<DBRM_Controller>
|
||||
<NumWorkers>1</NumWorkers>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8616</Port>
|
||||
</DBRM_Controller>
|
||||
<!-- Worker Port: 8700 - 8720 is reserved to support External Modules-->
|
||||
<DBRM_Worker1>
|
||||
<IPAddr>127.0.0.1</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>pm1</Module>
|
||||
</DBRM_Worker1>
|
||||
<DBRM_Worker2>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker2>
|
||||
<DBRM_Worker3>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker3>
|
||||
<DBRM_Worker4>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker4>
|
||||
<DBRM_Worker5>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker5>
|
||||
<DBRM_Worker6>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker6>
|
||||
<DBRM_Worker7>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker7>
|
||||
<DBRM_Worker8>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker8>
|
||||
<DBRM_Worker9>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker9>
|
||||
<DBRM_Worker10>
|
||||
<IPAddr>0.0.0.0</IPAddr>
|
||||
<Port>8700</Port>
|
||||
<Module>unassigned</Module>
|
||||
</DBRM_Worker10>
|
||||
<DBBC>
|
||||
<!-- The percentage of RAM to use for the disk block cache. Defaults to 70% -->
|
||||
<!-- Alternatively, this can be specified in absolute terms using
|
||||
the suffixes 'm' or 'g' to denote size in megabytes or gigabytes.-->
|
||||
<!-- <NumBlocksPct>70</NumBlocksPct> -->
|
||||
<!-- <NumThreads>16</NumThreads> -->
|
||||
<!-- 1-256. Default is 16. -->
|
||||
<NumCaches>1</NumCaches>
|
||||
<!-- # of parallel caches to instantiate -->
|
||||
<IOMTracing>0</IOMTracing>
|
||||
<BRPTracing>0</BRPTracing>
|
||||
<ReportFrequency>65536</ReportFrequency>
|
||||
<MaxOpenFiles>2K</MaxOpenFiles>
|
||||
<DecreaseOpenFilesCount>200</DecreaseOpenFilesCount>
|
||||
<FDCacheTrace>0</FDCacheTrace>
|
||||
<NumBlocksPct>50</NumBlocksPct>
|
||||
</DBBC>
|
||||
<Installation>
|
||||
<ServerTypeInstall>2</ServerTypeInstall>
|
||||
<PMwithUM>n</PMwithUM>
|
||||
<MySQLRep>n</MySQLRep>
|
||||
<DBRootStorageType>internal</DBRootStorageType>
|
||||
<UMStorageType>internal</UMStorageType>
|
||||
<ProfileFile>/etc/profile.d/columnstoreAlias.sh</ProfileFile>
|
||||
<DataRedundancyNetworkType/>
|
||||
</Installation>
|
||||
<ExtentMap>
|
||||
<!--
|
||||
WARNING: these can only be changed on an empty system. Once any object has been allocated
|
||||
it cannot be changed!. Extent size is 8M rows.
|
||||
-->
|
||||
<FilesPerColumnPartition>4</FilesPerColumnPartition>
|
||||
<!-- should be multiple of DBRootCount -->
|
||||
<BRM_UID>0x0</BRM_UID>
|
||||
</ExtentMap>
|
||||
<HashJoin>
|
||||
<MaxBuckets>128</MaxBuckets>
|
||||
<MaxElems>128K</MaxElems>
|
||||
<!-- 128 buckets * 128K * 16 = 256 MB -->
|
||||
<PmMaxMemorySmallSide>1G</PmMaxMemorySmallSide>
|
||||
<TotalUmMemory>25%</TotalUmMemory>
|
||||
<CPUniqueLimit>100</CPUniqueLimit>
|
||||
<AllowDiskBasedJoin>N</AllowDiskBasedJoin>
|
||||
<TempFileCompression>Y</TempFileCompression>
|
||||
<TempFileCompressionType>Snappy</TempFileCompressionType> <!-- LZ4, Snappy -->
|
||||
</HashJoin>
|
||||
<JobList>
|
||||
<FlushInterval>16K</FlushInterval>
|
||||
<FifoSize>16</FifoSize>
|
||||
<RequestSize>1</RequestSize>
|
||||
<!-- Number of extents per request, should be
|
||||
less than MaxOutstandingRequests. Otherwise, default value 1 is used. -->
|
||||
<!-- ProcessorThreadsPerScan is the number of jobs issued to process
|
||||
each extent. The default is 16. MaxOutstandingRequests is the size of
|
||||
the window of work in terms of extents. A value of 20 means there
|
||||
is 20 extents worth of work for the PMs to process at any given time.
|
||||
ProcessorThreadsPerScan * MaxOutstandingRequests should be at least
|
||||
as many threads are available across all PMs. -->
|
||||
<!-- <ProcessorThreadsPerScan>16</ProcessorThreadsPerScan> -->
|
||||
<!-- MaxOutstandingRequests is going to default to the num of cores available
|
||||
across all performance modules * 4 divided by the ProcessorThreadsPerScan,
|
||||
but will be lower bounded by 20 -->
|
||||
<!-- <MaxOutstandingRequests>20</MaxOutstandingRequests> -->
|
||||
<ThreadPoolSize>100</ThreadPoolSize>
|
||||
</JobList>
|
||||
<RowAggregation>
|
||||
<!-- <RowAggrThreads>4</RowAggrThreads> -->
|
||||
<!-- Default value is the number of cores -->
|
||||
<!-- <RowAggrBuckets>32</RowAggrBuckets> -->
|
||||
<!-- Default value is number of cores * 4 -->
|
||||
<!-- <RowAggrRowGroupsPerThread>20</RowAggrRowGroupsPerThread> -->
|
||||
<!-- Default value is 20 -->
|
||||
<AllowDiskBasedAggregation>N</AllowDiskBasedAggregation>
|
||||
</RowAggregation>
|
||||
<CrossEngineSupport>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>3306</Port>
|
||||
<User>root</User>
|
||||
<Password/>
|
||||
<TLSCA/>
|
||||
<TLSClientCert/>
|
||||
<TLSClientKey/>
|
||||
</CrossEngineSupport>
|
||||
<QueryStats>
|
||||
<Enabled>N</Enabled>
|
||||
</QueryStats>
|
||||
<UserPriority>
|
||||
<Enabled>N</Enabled>
|
||||
</UserPriority>
|
||||
<NetworkCompression>
|
||||
<Enabled>Y</Enabled>
|
||||
<NetworkCompressionType>Snappy</NetworkCompressionType> <!-- LZ4, Snappy -->
|
||||
</NetworkCompression>
|
||||
<QueryTele>
|
||||
<Host>127.0.0.1</Host>
|
||||
<Port>0</Port>
|
||||
</QueryTele>
|
||||
<StorageManager>
|
||||
<MaxSockets>30</MaxSockets>
|
||||
<Enabled>N</Enabled>
|
||||
</StorageManager>
|
||||
<DataRedundancyConfig>
|
||||
<DBRoot1PMs/>
|
||||
</DataRedundancyConfig>
|
||||
<ConfigRevision>1</ConfigRevision>
|
||||
<ClusterManager>127.0.0.1</ClusterManager>
|
||||
<ClusterName>MyCluster</ClusterName>
|
||||
<NextNodeId>2</NextNodeId>
|
||||
<NextDBRootId>2</NextDBRootId>
|
||||
<DesiredNodes>
|
||||
<Node>127.0.0.1</Node>
|
||||
</DesiredNodes>
|
||||
<ActiveNodes>
|
||||
<Node>127.0.0.1</Node>
|
||||
</ActiveNodes>
|
||||
<InactiveNodes/>
|
||||
<PrimaryNode>127.0.0.1</PrimaryNode>
|
||||
</Columnstore>
|
0
cmapi/mcs_node_control/test/__init__.py
Normal file
0
cmapi/mcs_node_control/test/__init__.py
Normal file
7
cmapi/mcs_node_control/test/settings.py
Normal file
7
cmapi/mcs_node_control/test/settings.py
Normal file
@ -0,0 +1,7 @@
|
||||
import os
|
||||
|
||||
from cmapi_server.constants import CMAPI_DEFAULT_CONF_PATH
|
||||
|
||||
|
||||
CONFIG_PATH_NEW = './mcs_node_control/test/Columnstore_new.xml'
|
||||
CONFIG_PATH_OLD = './mcs_node_control/test/Columnstore_old.xml'
|
29
cmapi/mcs_node_control/test/test_dbrm_socket.py
Normal file
29
cmapi/mcs_node_control/test/test_dbrm_socket.py
Normal file
@ -0,0 +1,29 @@
|
||||
import io
|
||||
import logging
|
||||
import unittest
|
||||
|
||||
from mcs_node_control.models.dbrm_socket import MAGIC_BYTES, DBRMSocketHandler
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
|
||||
|
||||
class TestDBRMSocketHandler(unittest.TestCase):
|
||||
|
||||
def test_myreceive_to_magic(self):
|
||||
response_data = b'\x01\x00\x00\x00\x00'
|
||||
valid_magic = b'%s%s' % (MAGIC_BYTES, response_data)
|
||||
first_unknow = b'A%s%s' % (MAGIC_BYTES, response_data)
|
||||
partial_first_magic = b'%s%s%s' % (
|
||||
MAGIC_BYTES[:3], MAGIC_BYTES, response_data
|
||||
)
|
||||
sock_responses = [valid_magic, first_unknow, partial_first_magic]
|
||||
for sock_response in sock_responses:
|
||||
with self.subTest(sock_response=sock_response):
|
||||
data_stream = io.BytesIO(sock_response)
|
||||
data_stream.recv = data_stream.read
|
||||
dbrm_socket = DBRMSocketHandler()
|
||||
# pylint: disable=protected-access
|
||||
dbrm_socket._socket = data_stream
|
||||
dbrm_socket._receive_magic()
|
||||
self.assertEqual(data_stream.read(), response_data)
|
13
cmapi/mcs_node_control/test/test_misc.py
Normal file
13
cmapi/mcs_node_control/test/test_misc.py
Normal file
@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
|
||||
class MiscTest(unittest.TestCase):
|
||||
def test_read_module_id(self):
|
||||
pass
|
||||
|
||||
def test_set_module_id(self):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
288
cmapi/mcs_node_control/test/test_node_config.py
Normal file
288
cmapi/mcs_node_control/test/test_node_config.py
Normal file
@ -0,0 +1,288 @@
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import unittest
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from shutil import copyfile
|
||||
from tempfile import TemporaryDirectory
|
||||
from unittest import TestCase, mock
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from cmapi_server.constants import CMAPI_DEFAULT_CONF_PATH
|
||||
from mcs_node_control.models.dbrm import (
|
||||
DBRM, set_cluster_mode
|
||||
)
|
||||
from mcs_node_control.models.node_config import NodeConfig
|
||||
from mcs_node_control.models.misc import read_module_id
|
||||
from mcs_node_control.models.node_status import NodeStatus
|
||||
from mcs_node_control.test.settings import CONFIG_PATH_NEW, CONFIG_PATH_OLD
|
||||
|
||||
|
||||
MCS_NODE_MODELS = 'mcs_node_control.models'
|
||||
NODE_CONFIG_MODULE = f'{MCS_NODE_MODELS}.node_config'
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
|
||||
|
||||
# These tests needs working DBRM worker.
|
||||
class NodeConfigTest(TestCase):
|
||||
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.mkdir')
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.chown')
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.read_module_id', return_value=1)
|
||||
@mock.patch(
|
||||
f'{NODE_CONFIG_MODULE}.NodeConfig.in_active_nodes',
|
||||
return_value=False
|
||||
)
|
||||
def test_apply_config(self, *_args):
|
||||
"""Test apply configuration file."""
|
||||
with TemporaryDirectory() as tmp_dirname:
|
||||
config_filepath = os.path.join(tmp_dirname, 'Columnstore.xml')
|
||||
|
||||
copyfile(CONFIG_PATH_OLD, config_filepath)
|
||||
# change config
|
||||
parser = etree.XMLParser(load_dtd=True)
|
||||
# new_tree = etree.parse('/etc/columnstore/Columnstore.xml', parser=parser)
|
||||
new_tree = etree.parse(CONFIG_PATH_NEW, parser=parser)
|
||||
|
||||
node_config = NodeConfig()
|
||||
xml_string = node_config.to_string(new_tree)
|
||||
|
||||
node_config.apply_config(config_filepath, xml_string)
|
||||
|
||||
# compare configurations
|
||||
config_file = Path(config_filepath)
|
||||
xml_string_written = config_file.read_text()
|
||||
self.assertEqual(xml_string_written, xml_string)
|
||||
# copy must exists
|
||||
config_file_copy = Path(f"{config_filepath}.cmapi.save")
|
||||
self.assertTrue(config_file_copy.exists())
|
||||
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.mkdir')
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.chown')
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.read_module_id', return_value=1)
|
||||
@mock.patch(
|
||||
f'{NODE_CONFIG_MODULE}.NodeConfig.in_active_nodes',
|
||||
return_value=False
|
||||
)
|
||||
def test_rollback_config(self, *_args):
|
||||
""""Test rollback applied configuration file."""
|
||||
with TemporaryDirectory() as tmp_dirname:
|
||||
config_filepath = os.path.join(tmp_dirname, 'Columnstore.xml')
|
||||
copyfile(CONFIG_PATH_OLD, config_filepath)
|
||||
|
||||
old_config_file = Path(CONFIG_PATH_OLD)
|
||||
old_xml_string = old_config_file.read_text()
|
||||
new_config_file = Path(CONFIG_PATH_NEW)
|
||||
new_xml_string = new_config_file.read_text()
|
||||
|
||||
node_config = NodeConfig()
|
||||
node_config.apply_config(config_filepath, new_xml_string)
|
||||
node_config.rollback_config(config_filepath)
|
||||
|
||||
config_file = Path(config_filepath)
|
||||
xml_string_restored = config_file.read_text()
|
||||
self.assertEqual(xml_string_restored, old_xml_string)
|
||||
|
||||
def test_get_current_config(self):
|
||||
"""Test get current config from file."""
|
||||
config_file = Path(CONFIG_PATH_OLD)
|
||||
node_config = NodeConfig()
|
||||
self.assertEqual(
|
||||
node_config.get_current_config(CONFIG_PATH_OLD),
|
||||
config_file.read_text()
|
||||
)
|
||||
|
||||
def test_set_cluster_mode(self):
|
||||
"""Test set cluster mode.
|
||||
|
||||
TODO:
|
||||
- move from here. There are no set_cluster_mode in NodeConfig
|
||||
- split to unit and integrational tests
|
||||
- make unittests for raising exception
|
||||
"""
|
||||
|
||||
for mode in ['readonly', 'readwrite']:
|
||||
with self.subTest(mode=mode):
|
||||
fake_mode = mode
|
||||
set_cluster_mode(mode)
|
||||
with DBRM() as dbrm:
|
||||
if dbrm.get_dbrm_status() != 'master':
|
||||
fake_mode = 'readonly'
|
||||
self.assertEqual(dbrm.get_cluster_mode(), fake_mode)
|
||||
self.assertEqual(dbrm._get_cluster_mode(), mode)
|
||||
|
||||
def test_get_dbrm_conn_info(self):
|
||||
node_config = NodeConfig()
|
||||
root = node_config.get_current_config_root(CONFIG_PATH_OLD)
|
||||
master_conn_info = node_config.get_dbrm_conn_info(root)
|
||||
|
||||
tree = ET.parse(CONFIG_PATH_OLD)
|
||||
master_ip = tree.find('./DBRM_Controller/IPAddr').text
|
||||
master_port = tree.find('./DBRM_Controller/Port').text
|
||||
|
||||
self.assertEqual(master_conn_info['IPAddr'], master_ip)
|
||||
self.assertEqual(master_conn_info['Port'], master_port)
|
||||
|
||||
def test_is_primary_node(self):
|
||||
try:
|
||||
current_master = None
|
||||
node_config = NodeConfig()
|
||||
root = node_config.get_current_config_root()
|
||||
current_master = node_config.get_dbrm_conn_info(root)['IPAddr']
|
||||
list_ips = "ip -4 -o addr | awk '!/^[0-9]*: ?lo|link\/ether/ {print $4}'"
|
||||
result = subprocess.run(list_ips,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE)
|
||||
local_addresses = result.stdout.decode('ASCII').split('\n')
|
||||
local_addresses = [addr.split('/')[0] for addr in local_addresses if len(addr)]
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr {local_addresses[0]}")
|
||||
self.assertTrue(node_config.is_primary_node())
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr 8.8.8.8")
|
||||
self.assertFalse(node_config.is_primary_node())
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr {current_master}")
|
||||
except AssertionError as e:
|
||||
if current_master is not None:
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr \
|
||||
{current_master}")
|
||||
raise e
|
||||
|
||||
def test_get_network_interfaces(self):
|
||||
node_config = NodeConfig()
|
||||
addresses = list(node_config.get_network_addresses())
|
||||
exemplar_addresses = []
|
||||
list_ips = "ip -4 -o addr | awk '!/^[0-9]*: ?lo|link\/ether/ {print $4}'"
|
||||
result = subprocess.run(list_ips,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE)
|
||||
exemplar_addresses += result.stdout.decode('ASCII').split('\n')
|
||||
list_ips = "ip -6 -o addr | awk '!/^[0-9]*: ?lo|link\/ether/ {print $4}'"
|
||||
result = subprocess.run(list_ips,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE)
|
||||
exemplar_addresses += result.stdout.decode('ASCII').split('\n')
|
||||
golden_addresses = [addr.split('/')[0] for addr in exemplar_addresses if len(addr) > 0]
|
||||
for addr in golden_addresses:
|
||||
self.assertTrue(addr in addresses)
|
||||
|
||||
def test_is_single_node(self):
|
||||
try:
|
||||
current_master = None
|
||||
node_config = NodeConfig()
|
||||
root = node_config.get_current_config_root()
|
||||
current_master = node_config.get_dbrm_conn_info(root)['IPAddr']
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr 127.0.0.1")
|
||||
self.assertTrue(node_config.is_single_node())
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr 8.8.8.8")
|
||||
self.assertFalse(node_config.is_single_node())
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr {current_master}")
|
||||
except AssertionError as e:
|
||||
if current_master is not None:
|
||||
os.system(f"mcsSetConfig DBRM_Controller IPAddr \
|
||||
{current_master}")
|
||||
raise e
|
||||
|
||||
@mock.patch(f'{NODE_CONFIG_MODULE}.read_module_id', return_value=1)
|
||||
def test_get_module_net_address(self, *args):
|
||||
with TemporaryDirectory() as tmp_dirname:
|
||||
config_filepath = os.path.join(tmp_dirname, 'Columnstore.xml')
|
||||
copyfile(CONFIG_PATH_OLD, config_filepath)
|
||||
|
||||
module_address = None
|
||||
node_config = NodeConfig()
|
||||
current_module_id = read_module_id()
|
||||
module_address_sh = (
|
||||
f'mcsGetConfig -c {config_filepath} '
|
||||
f'SystemModuleConfig ModuleIPAddr{current_module_id}-1-3'
|
||||
)
|
||||
result = subprocess.run(
|
||||
module_address_sh, shell=True, stdout=subprocess.PIPE
|
||||
)
|
||||
module_address = result.stdout.decode('ASCII').split('\n')[0]
|
||||
dummy_address = '8.8.8.8'
|
||||
os.system(
|
||||
f'mcsSetConfig -c {config_filepath} '
|
||||
f'SystemModuleConfig ModuleIPAddr{current_module_id}-1-3 '
|
||||
f'{dummy_address}'
|
||||
)
|
||||
root = node_config.get_current_config_root(config_filepath)
|
||||
self.assertEqual(
|
||||
dummy_address, node_config.get_module_net_address(root)
|
||||
)
|
||||
self.assertNotEqual(
|
||||
module_address, node_config.get_module_net_address(root)
|
||||
)
|
||||
os.system(
|
||||
f'mcsSetConfig -c {config_filepath} SystemModuleConfig '
|
||||
f'ModuleIPAddr{current_module_id}-1-3 {module_address}'
|
||||
)
|
||||
root = node_config.get_current_config_root(config_filepath)
|
||||
self.assertEqual(
|
||||
module_address, node_config.get_module_net_address(root)
|
||||
)
|
||||
|
||||
def test_get_new_module_id(self):
|
||||
try:
|
||||
current_module_id = None
|
||||
current_module_address = None
|
||||
node_config = NodeConfig()
|
||||
current_module_id = read_module_id()
|
||||
root = node_config.get_current_config_root()
|
||||
current_module_address = node_config.get_module_net_address(root)
|
||||
os.system(f"mcsSetConfig SystemModuleConfig \
|
||||
ModuleIPAddr{current_module_id}-1-3 8.8.8.8")
|
||||
os.system(f"mcsSetConfig SystemModuleConfig \
|
||||
ModuleIPAddr{current_module_id+42}-1-3 {current_module_address}")
|
||||
root = node_config.get_current_config_root()
|
||||
self.assertEqual(current_module_id+42,
|
||||
node_config.get_new_module_id(root))
|
||||
self.assertNotEqual(current_module_id,
|
||||
node_config.get_new_module_id(root))
|
||||
os.system(f"mcsSetConfig SystemModuleConfig \
|
||||
ModuleIPAddr{current_module_id}-1-3 {current_module_address}")
|
||||
os.system(f"mcsSetConfig -x SystemModuleConfig \
|
||||
ModuleIPAddr{current_module_id+42}-1-3 {current_module_address}")
|
||||
root = node_config.get_current_config_root()
|
||||
self.assertEqual(current_module_id,
|
||||
node_config.get_new_module_id(root))
|
||||
except AssertionError as e:
|
||||
if current_module_id is not None and current_module_address is not None:
|
||||
os.system(f"mcsSetConfig SystemModuleConfig \
|
||||
ModuleIPAddr{current_module_id}-1-3 {current_module_address}")
|
||||
os.system(f"mcsSetConfig -x SystemModuleConfig \
|
||||
ModuleIPAddr{current_module_id+42}-1-3 {current_module_address}")
|
||||
|
||||
def test_dbroots_to_create(self):
|
||||
try:
|
||||
node_config = NodeConfig()
|
||||
current_module_id = read_module_id()
|
||||
dummy_dbroots = [42, 43]
|
||||
dbroot_seq_id = 2
|
||||
for d in dummy_dbroots:
|
||||
os.system(f"mcsSetConfig SystemModuleConfig \
|
||||
ModuleDBRootID{current_module_id}-{dbroot_seq_id}-3 {d}")
|
||||
dbroot_seq_id += 1
|
||||
root = node_config.get_current_config_root()
|
||||
dbroots_to_create = list(node_config.dbroots_to_create(root=root, module_id=current_module_id))
|
||||
for d in dbroots_to_create:
|
||||
self.assertTrue(d in dummy_dbroots)
|
||||
except AssertionError as e:
|
||||
dbroot_seq_id = 2
|
||||
for d in dummy_dbroots:
|
||||
os.system(f"mcsSetConfig -x SystemModuleConfig \
|
||||
ModuleDBRootID{current_module_id}-{dbroot_seq_id}-3 {d}")
|
||||
dbroot_seq_id += 1
|
||||
raise e
|
||||
|
||||
dbroot_seq_id = 2
|
||||
for d in dummy_dbroots:
|
||||
os.system(f"mcsSetConfig -x SystemModuleConfig \
|
||||
ModuleDBRootID{current_module_id}-{dbroot_seq_id}-3 {d}")
|
||||
dbroot_seq_id += 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
50
cmapi/mcs_node_control/test/test_node_status.py
Normal file
50
cmapi/mcs_node_control/test/test_node_status.py
Normal file
@ -0,0 +1,50 @@
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from shutil import rmtree
|
||||
|
||||
from cmapi_server.constants import MCS_MODULE_FILE_PATH
|
||||
from mcs_node_control.models.node_status import NodeStatus
|
||||
|
||||
|
||||
logging.basicConfig(level='DEBUG')
|
||||
|
||||
|
||||
class NodeStatusTest(unittest.TestCase):
|
||||
def test_dbrm_cluster_mode(self):
|
||||
node_status = NodeStatus()
|
||||
# use subprocess.run to capture stdout
|
||||
os.system('/usr/bin/dbrmctl readwrite')
|
||||
self.assertEqual(node_status.get_cluster_mode(), 'readwrite')
|
||||
os.system('/usr/bin/dbrmctl readonly')
|
||||
self.assertEqual(node_status.get_cluster_mode(), 'readonly')
|
||||
# kill controllernode and test it
|
||||
|
||||
def test_dbrm_status(self):
|
||||
node_status = NodeStatus()
|
||||
self.assertEqual(node_status.get_dbrm_status(), 'master')
|
||||
|
||||
def test_dbroots(self):
|
||||
try:
|
||||
node_status = NodeStatus()
|
||||
dbroot_ids = [1, 2, 3]
|
||||
path = '/tmp/dbroots/'
|
||||
for e in dbroot_ids:
|
||||
p = Path(path + 'data' + str(e))
|
||||
p.mkdir(parents = True, exist_ok = True)
|
||||
for e in node_status.get_dbroots(path=path):
|
||||
self.assertEqual(e in dbroot_ids, True)
|
||||
except AssertionError as e:
|
||||
rmtree(path)
|
||||
raise e
|
||||
|
||||
def test_module_id(self):
|
||||
node_status = NodeStatus()
|
||||
module_file = Path(MCS_MODULE_FILE_PATH)
|
||||
examplar_id = int(module_file.read_text()[2:])
|
||||
self.assertEqual(examplar_id, node_status.get_module_id())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
16
cmapi/postinst.template
Executable file
16
cmapi/postinst.template
Executable file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# only for postinstall in CentOS
|
||||
if [ -f ${CMAPI_CONF_FILEPATH}.rpmsave ]; then
|
||||
echo "warning: found previously saved configuration file ${CMAPI_CONF_FILEPATH}.rpmsave"
|
||||
mv ${CMAPI_CONF_FILEPATH} ${CMAPI_CONF_FILEPATH}.rpmnew
|
||||
echo "warning: newly installed configuration file ${CMAPI_CONF_FILEPATH} saved as ${CMAPI_CONF_FILEPATH}.rpmnew"
|
||||
mv ${CMAPI_CONF_FILEPATH}.rpmsave ${CMAPI_CONF_FILEPATH}
|
||||
echo "warning: previously saved configuration file ${CMAPI_CONF_FILEPATH}.rpmsave applied as current config file ${CMAPI_CONF_FILEPATH}"
|
||||
fi
|
||||
|
||||
systemctl enable ${SYSTEMD_UNIT_NAME}
|
||||
|
||||
systemctl start ${SYSTEMD_UNIT_NAME}
|
||||
|
||||
systemctl mask ${SYSTEMD_ENGINE_UNIT_NAME}
|
9
cmapi/prerm.template
Executable file
9
cmapi/prerm.template
Executable file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
systemctl stop ${SYSTEMD_UNIT_NAME}
|
||||
|
||||
find ${CMAPI_DIR} -type d -name __pycache__ -print0 | xargs --null --no-run-if-empty rm -rf
|
||||
|
||||
systemctl disable ${SYSTEMD_UNIT_NAME}
|
||||
|
||||
systemctl unmask ${SYSTEMD_ENGINE_UNIT_NAME}
|
73
cmapi/requirements.txt
Normal file
73
cmapi/requirements.txt
Normal file
@ -0,0 +1,73 @@
|
||||
awscli==1.25.56
|
||||
CherryPy==18.6.1
|
||||
cryptography==36.0.1
|
||||
furl==2.1.3
|
||||
gsutil==5.12
|
||||
lxml==4.7.1
|
||||
psutil==5.9.1
|
||||
pyotp==2.6.0
|
||||
requests==2.27.1
|
||||
typer==0.4.1
|
||||
|
||||
# indirect dependencies
|
||||
aiohttp==3.8.1
|
||||
aiosignal==1.2.0
|
||||
argcomplete==2.0.0
|
||||
async-timeout==4.0.2
|
||||
asynctest==0.13.0
|
||||
attrs==22.1.0
|
||||
boto==2.49.0
|
||||
boto3==1.24.55
|
||||
botocore==1.27.55
|
||||
cachetools==5.2.0
|
||||
certifi==2021.10.8
|
||||
cffi==1.15.0
|
||||
charset-normalizer==2.0.12
|
||||
cheroot==8.6.0
|
||||
click==8.1.3
|
||||
colorama==0.4.4
|
||||
crcmod==1.7
|
||||
docutils==0.16
|
||||
fasteners==0.17.3
|
||||
frozenlist==1.3.1
|
||||
gcs-oauth2-boto-plugin==3.0
|
||||
google-apitools==0.5.32
|
||||
google-auth==2.10.0
|
||||
google-reauth==0.1.1
|
||||
httplib2==0.20.4
|
||||
idna==3.3
|
||||
importlib-resources==5.4.0
|
||||
importlib-metadata==4.12.0
|
||||
jaraco.classes==3.2.1
|
||||
jaraco.collections==3.5.1
|
||||
jaraco.context==4.1.1
|
||||
jaraco.functools==3.5.0
|
||||
jaraco.text==3.7.0
|
||||
jmespath==1.0.1
|
||||
monotonic==1.6
|
||||
more-itertools==8.12.0
|
||||
multidict==6.0.2
|
||||
oauth2client==4.1.3
|
||||
orderedmultidict==1.0.1
|
||||
portend==3.1.0
|
||||
pyasn1-modules==0.2.8
|
||||
pyasn1==0.4.8
|
||||
pycparser==2.21
|
||||
pyOpenSSL==22.0.0
|
||||
pyparsing==3.0.9
|
||||
python-dateutil==2.8.2
|
||||
pytz==2021.3
|
||||
pyu2f==0.1.5
|
||||
PyYAML==5.4.1
|
||||
repoze.lru==0.7
|
||||
retry-decorator==1.1.1
|
||||
Routes==2.5.1
|
||||
rsa==4.7.2
|
||||
s3transfer==0.6.0
|
||||
six==1.16.0
|
||||
tempora==5.0.1
|
||||
typing-extensions==4.3.0
|
||||
urllib3==1.26.8
|
||||
yarl==1.8.1
|
||||
zc.lockfile==2.0
|
||||
zipp==3.7.0
|
41
cmapi/run_tests.py
Normal file
41
cmapi/run_tests.py
Normal file
@ -0,0 +1,41 @@
|
||||
import datetime
|
||||
import logging
|
||||
import sys
|
||||
import unittest
|
||||
from cmapi_server.logging_management import add_logging_level
|
||||
|
||||
|
||||
class DatedTextTestResult(unittest.TextTestResult):
|
||||
def startTest(self, test: unittest.case.TestCase):
|
||||
self.stream.write('\n')
|
||||
self.stream.write(
|
||||
datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S]: ")
|
||||
)
|
||||
return super().startTest(test)
|
||||
|
||||
|
||||
def run_tests_from_package(p_name: str):
|
||||
logging.info(f'Starting tests from package {p_name}')
|
||||
loader = unittest.TestLoader()
|
||||
testsuite = loader.discover(
|
||||
pattern='test_*.py', start_dir=p_name, top_level_dir='./'
|
||||
)
|
||||
runner = unittest.runner.TextTestRunner(
|
||||
verbosity=3, failfast=True, resultclass=DatedTextTestResult
|
||||
)
|
||||
result = runner.run(testsuite)
|
||||
failed = False
|
||||
if not result.wasSuccessful():
|
||||
failed = True
|
||||
sys.exit(failed)
|
||||
logging.info(f'Finished tests from package {p_name}')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
add_logging_level('TRACE', 5)
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s [%(levelname)s] (%(name)s) %(message)s'
|
||||
)
|
||||
run_tests_from_package('cmapi_server')
|
||||
run_tests_from_package('mcs_node_control')
|
151
cmapi/service.sh
Executable file
151
cmapi/service.sh
Executable file
@ -0,0 +1,151 @@
|
||||
#!/bin/bash
|
||||
|
||||
CPACK_PACKAGE_DESCRIPTION_SUMMARY="Mariadb Columnstore Cluster Manager API"
|
||||
SVC_NAME="mariadb-columnstore-cmapi"
|
||||
|
||||
SVC_CMD=$1
|
||||
arg_2=${2}
|
||||
|
||||
UNIT_PATH=/usr/lib/systemd/system/${SVC_NAME}.service
|
||||
TEMPLATE_PATH=./service.template
|
||||
TEMP_PATH=./service.temp
|
||||
SYSTEMD_ENV_FILE=/etc/columnstore/systemd.env
|
||||
|
||||
CMAPI_DIR=$(pwd)
|
||||
CMAPI_USER=root
|
||||
CONFIG_FOLDER=/etc/columnstore
|
||||
CONFIG_FILENAME=cmapi_server.conf
|
||||
|
||||
user_id=$(id -u)
|
||||
|
||||
# systemctl must run as sudo
|
||||
if [ $user_id -ne 0 ]; then
|
||||
echo "Must run as sudo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function failed()
|
||||
{
|
||||
local error=${1:-Undefined error}
|
||||
echo "Failed: $error" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ ! -f "${TEMPLATE_PATH}" ]; then
|
||||
failed "Must run from package folder or install is corrupt"
|
||||
fi
|
||||
|
||||
# check if we run as root
|
||||
if [[ $(id -u) != "0" ]]; then
|
||||
echo "Failed: This script requires to run with sudo." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function install()
|
||||
{
|
||||
echo "Creating service in ${UNIT_PATH}"
|
||||
if [ -f "${UNIT_PATH}" ]; then
|
||||
failed "error: exists ${UNIT_PATH}"
|
||||
fi
|
||||
|
||||
if [ -f "${TEMP_PATH}" ]; then
|
||||
rm "${TEMP_PATH}" || failed "failed to delete ${TEMP_PATH}"
|
||||
fi
|
||||
|
||||
# can optionally use username supplied
|
||||
#run_as_user=${arg_2:-$SUDO_USER}
|
||||
#echo "Run as user: ${run_as_user}"
|
||||
|
||||
#run_as_uid=$(id -u ${run_as_user}) || failed "User does not exist"
|
||||
#echo "Run as uid: ${run_as_uid}"
|
||||
|
||||
#run_as_gid=$(id -g ${run_as_user}) || failed "Group not available"
|
||||
#echo "gid: ${run_as_gid}"
|
||||
|
||||
sed "s/\${CPACK_PACKAGE_DESCRIPTION_SUMMARY}/${CPACK_PACKAGE_DESCRIPTION_SUMMARY}/g; s/\${CMAPI_USER}/${CMAPI_USER}/g; s/\${CMAPI_DIR}/$(echo ${CMAPI_DIR} | sed -e 's/[\/&]/\\&/g')/g;" "${TEMPLATE_PATH}" > "${TEMP_PATH}" || failed "failed to create replacement temp file"
|
||||
mv "${TEMP_PATH}" "${UNIT_PATH}" || failed "failed to copy unit file"
|
||||
|
||||
if [ ! -d "${CONFIG_FOLDER}" ]; then
|
||||
mkdir $CONFIG_FOLDER || failed "failed to create configuration folder"
|
||||
fi
|
||||
|
||||
if [ ! -f "${CONFIG_FOLDER}/${CONFIG_FILENAME}" ]; then
|
||||
cp cmapi_server/cmapi_server.conf.default "${CONFIG_FOLDER}/${CONFIG_FILENAME}" || failed "failed to copy config file"
|
||||
fi
|
||||
|
||||
# Unit file should not be executable and world writable
|
||||
chmod 664 ${UNIT_PATH} || failed "failed to set permissions on ${UNIT_PATH}"
|
||||
|
||||
# Since we started with sudo, files will be owned by root. Change this to specific user
|
||||
#chown -R ${run_as_uid}:${run_as_gid} $CMAPI_DIR || failed "failed to set owner for $CMAPI_DIR"
|
||||
|
||||
systemctl enable ${SVC_NAME} || failed "failed to enable ${SVC_NAME}"
|
||||
|
||||
# chown ${run_as_uid}:${run_as_gid} ${CONFIG_FOLDER}/${CONFIG_FILENAME} || failed "failed to set permission for ${CONFIG_FOLDER}/${CONFIG_FILENAME}"
|
||||
echo PYTHONPATH=${CMAPI_DIR}/deps > ${SYSTEMD_ENV_FILE}
|
||||
|
||||
systemctl daemon-reload || failed "failed to reload daemons"
|
||||
}
|
||||
|
||||
function start()
|
||||
{
|
||||
systemctl start ${SVC_NAME} || failed "failed to start ${SVC_NAME}"
|
||||
status
|
||||
}
|
||||
|
||||
function stop()
|
||||
{
|
||||
systemctl stop ${SVC_NAME} || failed "failed to stop ${SVC_NAME}"
|
||||
status
|
||||
}
|
||||
|
||||
function uninstall()
|
||||
{
|
||||
stop
|
||||
systemctl disable ${SVC_NAME} || failed "failed to disable ${SVC_NAME}"
|
||||
rm "${UNIT_PATH}" || failed "failed to delete ${UNIT_PATH}"
|
||||
rm "${SYSTEMD_ENV_FILE}" || failed "failed to delete ${SYSTEMD_ENV_FILE}"
|
||||
systemctl daemon-reload || failed "failed to reload daemons"
|
||||
}
|
||||
|
||||
function status()
|
||||
{
|
||||
if [ -f "${UNIT_PATH}" ]; then
|
||||
echo
|
||||
echo "${UNIT_PATH}"
|
||||
else
|
||||
echo
|
||||
echo "not installed"
|
||||
echo
|
||||
return
|
||||
fi
|
||||
|
||||
systemctl --no-pager status ${SVC_NAME}
|
||||
}
|
||||
|
||||
function usage()
|
||||
{
|
||||
echo
|
||||
echo Usage:
|
||||
echo "./install.sh [install, start, stop, status, uninstall]"
|
||||
echo "Commands:"
|
||||
#echo " install [user]: Install as Root or specified user"
|
||||
echo " install: Install"
|
||||
echo " start: Manually start"
|
||||
echo " stop: Manually stop"
|
||||
echo " status: Display intallation status"
|
||||
echo " uninstall: Uninstall"
|
||||
echo
|
||||
}
|
||||
|
||||
case $SVC_CMD in
|
||||
"install") install;;
|
||||
"status") status;;
|
||||
"uninstall") uninstall;;
|
||||
"start") start;;
|
||||
"stop") stop;;
|
||||
"status") status;;
|
||||
*) usage;;
|
||||
esac
|
||||
|
||||
exit 0
|
12
cmapi/service.template
Normal file
12
cmapi/service.template
Normal file
@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=${CPACK_PACKAGE_DESCRIPTION_SUMMARY}
|
||||
|
||||
[Service]
|
||||
Environment=PYTHONPATH=${CMAPI_DIR}/deps
|
||||
ExecStart=${CMAPI_DIR}/python/bin/python3 -m cmapi_server
|
||||
ExecStartPost=/bin/sh -c ${CMAPI_DIR}/check_ready.sh
|
||||
User=${CMAPI_USER}
|
||||
WorkingDirectory=${CMAPI_DIR}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
1
cmapi/systemd.env.template
Normal file
1
cmapi/systemd.env.template
Normal file
@ -0,0 +1 @@
|
||||
PYTHONPATH=${CMAPI_DIR}/deps
|
Loading…
x
Reference in New Issue
Block a user