From 3fea9bf82570054ad10e0b58a5c20650db6a8c49 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Thu, 28 Aug 2025 06:48:12 +0000 Subject: [PATCH 01/18] fix(installation): set selinux policy handling to the existing build/postInstall_storage_engine.sh and build/preUn_storage_engine.sh --- build/postInstall_storage_engine.sh | 16 +++++++++++++ build/preUn_storage_engine.sh | 7 ++++++ build/selinux_policy_rpm_post.sh | 28 ----------------------- build/selinux_policy_rpm_postun.sh | 15 ------------- cmake/selinux_policy.cmake | 35 ----------------------------- 5 files changed, 23 insertions(+), 78 deletions(-) delete mode 100644 build/selinux_policy_rpm_post.sh delete mode 100644 build/selinux_policy_rpm_postun.sh diff --git a/build/postInstall_storage_engine.sh b/build/postInstall_storage_engine.sh index dc5d0e96d..a98db2f9a 100644 --- a/build/postInstall_storage_engine.sh +++ b/build/postInstall_storage_engine.sh @@ -8,3 +8,19 @@ fi mkdir -p /var/lib/columnstore/local columnstore-post-install --rpmmode=$rpmmode +# Attempt to load ColumnStore SELinux policy (best-effort, no hard dependency) +POLICY_PATH="/usr/share/columnstore/policy/selinux/columnstore.pp" +if command -v getenforce >/dev/null 2>&1 && command -v semodule >/dev/null 2>&1; then + MODE=$(getenforce 2>/dev/null || echo Disabled) + case "$MODE" in + Enforcing|Permissive) + if [ -r "$POLICY_PATH" ]; then + semodule -i "$POLICY_PATH" || true + fi + ;; + *) + : + ;; + esac +fi + diff --git a/build/preUn_storage_engine.sh b/build/preUn_storage_engine.sh index b0e6fd721..75ffaea19 100644 --- a/build/preUn_storage_engine.sh +++ b/build/preUn_storage_engine.sh @@ -10,6 +10,13 @@ fi if [ $rpmmode = erase ]; then columnstore-pre-uninstall + + # Best-effort removal of ColumnStore SELinux policy on erase + if command -v semodule >/dev/null 2>&1; then + if semodule -l 2>/dev/null | grep -q '^columnstore\b'; then + semodule -r columnstore || true + fi + fi fi exit 0 diff --git a/build/selinux_policy_rpm_post.sh b/build/selinux_policy_rpm_post.sh deleted file mode 100644 index 0e77e2465..000000000 --- a/build/selinux_policy_rpm_post.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh -# Post-install script to load ColumnStore SELinux policy if SELinux is enabled -# This script must not introduce new runtime dependencies; it only uses coreutils and typical SELinux tools if present. - -set -e - -POLICY_PATH="/usr/share/columnstore/policy/selinux/columnstore.pp" - -# If SELinux tooling is not present, or policy file missing, silently exit -command -v getenforce >/dev/null 2>&1 || exit 0 -command -v semodule >/dev/null 2>&1 || exit 0 - -# Only attempt to install when SELinux is enforcing or permissive -MODE=$(getenforce 2>/dev/null || echo Disabled) -case "$MODE" in - Enforcing|Permissive) - if [ -r "$POLICY_PATH" ]; then - # Install or upgrade the module; do not fail the entire package if this fails - semodule -i "$POLICY_PATH" || true - fi - ;; - *) - # Disabled or unknown, do nothing - : - ;; -esac - -exit 0 diff --git a/build/selinux_policy_rpm_postun.sh b/build/selinux_policy_rpm_postun.sh deleted file mode 100644 index 10b8df5a0..000000000 --- a/build/selinux_policy_rpm_postun.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -# Post-uninstall script to remove ColumnStore SELinux policy module if present -# No new runtime dependencies; use SELinux tools only if available. - -set -e - -# If SELinux tooling is not present, silently exit -command -v semodule >/dev/null 2>&1 || exit 0 - -# Remove the module if it is installed; do not fail package removal if this fails -if semodule -l 2>/dev/null | grep -q '^columnstore\b'; then - semodule -r columnstore || true -fi - -exit 0 diff --git a/cmake/selinux_policy.cmake b/cmake/selinux_policy.cmake index 7382660da..5818121a9 100644 --- a/cmake/selinux_policy.cmake +++ b/cmake/selinux_policy.cmake @@ -63,38 +63,3 @@ install( COMPONENT columnstore-engine ) -# Register RPM post-install and post-uninstall scripts for the component -set(_selinux_post "${CMAKE_CURRENT_LIST_DIR}/../build/selinux_policy_rpm_post.sh") -set(_selinux_postun "${CMAKE_CURRENT_LIST_DIR}/../build/selinux_policy_rpm_postun.sh") - -# POST_INSTALL: preserve existing script if set by wrapping it -if(EXISTS "${_selinux_post}") - if(DEFINED CPACK_RPM_columnstore-engine_POST_INSTALL_SCRIPT_FILE - AND CPACK_RPM_columnstore-engine_POST_INSTALL_SCRIPT_FILE - ) - set(_orig_post "${CPACK_RPM_columnstore-engine_POST_INSTALL_SCRIPT_FILE}") - set(_wrap_post "${SELINUX_BUILD_DIR}/post_install_wrapper.sh") - file(WRITE "${_wrap_post}" "#!/bin/sh\n\n'${_orig_post}' \"$@\" || true\n'${_selinux_post}' \"$@\" || true\n") - execute_process(COMMAND ${CMAKE_COMMAND} -E chmod +x "${_wrap_post}") - set(CPACK_RPM_columnstore-engine_POST_INSTALL_SCRIPT_FILE "${_wrap_post}") - else() - set(CPACK_RPM_columnstore-engine_POST_INSTALL_SCRIPT_FILE "${_selinux_post}") - endif() -endif() - -# POST_UNINSTALL: preserve existing script if set by wrapping it -if(EXISTS "${_selinux_postun}") - if(DEFINED CPACK_RPM_columnstore-engine_POST_UNINSTALL_SCRIPT_FILE - AND CPACK_RPM_columnstore-engine_POST_UNINSTALL_SCRIPT_FILE - ) - set(_orig_postun "${CPACK_RPM_columnstore-engine_POST_UNINSTALL_SCRIPT_FILE}") - set(_wrap_postun "${SELINUX_BUILD_DIR}/post_uninstall_wrapper.sh") - file(WRITE "${_wrap_postun}" - "#!/bin/sh\n\n'${_orig_postun}' \"$@\" || true\n'${_selinux_postun}' \"$@\" || true\n" - ) - execute_process(COMMAND ${CMAKE_COMMAND} -E chmod +x "${_wrap_postun}") - set(CPACK_RPM_columnstore-engine_POST_UNINSTALL_SCRIPT_FILE "${_wrap_postun}") - else() - set(CPACK_RPM_columnstore-engine_POST_UNINSTALL_SCRIPT_FILE "${_selinux_postun}") - endif() -endif() From 6e464bcd182d1e1c8b5269f8bfda26cf4b559113 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Mon, 1 Sep 2025 16:52:11 +0000 Subject: [PATCH 02/18] fix(crash, plugin), chore(ci): catch missed exception, set FE identity, install dgb_symbols --- build/prepare_test_container.sh | 3 ++- dbcon/mysql/is_columnstore_columns.cpp | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/build/prepare_test_container.sh b/build/prepare_test_container.sh index f4cae8cdd..590eb2391 100755 --- a/build/prepare_test_container.sh +++ b/build/prepare_test_container.sh @@ -130,7 +130,8 @@ prepare_container() { if [[ "$RESULT" == *rocky* ]]; then execInnerDockerWithRetry "$CONTAINER_NAME" 'yum install -y MariaDB-columnstore-engine MariaDB-test' else - execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y mariadb-plugin-columnstore mariadb-test mariadb-test-data mariadb-plugin-columnstore-dbgsym' + #TODO think about server version! + execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y mariadb-plugin-columnstore mariadb-test mariadb-test-data mariadb-plugin-columnstore-dbgsym mariadb-client-10.6-dbgsym mariadb-client-core-10.6-dbgsym mariadb-server-10.6-dbgsym mariadb-server-core-10.6-dbgsym mariadb-test-dbgsym ' fi sleep 5 diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index d5bc8acaf..4039c87f2 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -62,12 +62,25 @@ static int is_columnstore_columns_fill(THD* thd, TABLE_LIST* tables, COND* cond) InformationSchemaCond isCond; execplan::CalpontSystemCatalog csc; - const std::vector< - std::pair > - catalog_tables = csc.getTables(); - + // Use FE path for syscat queries issued from mysqld csc.identity(execplan::CalpontSystemCatalog::FE); + std::vector< + std::pair > + catalog_tables; + try + { + catalog_tables = csc.getTables("", lower_case_table_names); + } + catch (IDBExcept&) + { + return 1; + } + catch (std::exception&) + { + return 1; + } + if (cond) { isCond.getCondItems(cond); From c2bf3e6b2a18a52ac3498ea70d41a150b55ba21e Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Mon, 1 Sep 2025 17:00:43 +0000 Subject: [PATCH 03/18] fix(tests): fix mcol_2000 --- .../columnstore/basic/r/mcol_2000.result | 50 +++++++++---------- mysql-test/columnstore/basic/t/mcol_2000.test | 10 ++-- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/mysql-test/columnstore/basic/r/mcol_2000.result b/mysql-test/columnstore/basic/r/mcol_2000.result index cb315b653..2769504d4 100644 --- a/mysql-test/columnstore/basic/r/mcol_2000.result +++ b/mysql-test/columnstore/basic/r/mcol_2000.result @@ -20,17 +20,17 @@ l date, m datetime, o time, s char(17) character set utf8, -t varchar(17) character set utf8mb4, +t varchar(17) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, w blob(10), x tinyblob, y blob, z mediumblob, aa longblob, bb text(17) character set utf8, -cc tinytext character set utf8mb4, -dd text character set utf8mb4, -ee mediumtext character set utf8mb4, -ff longtext character set utf8mb4 +cc tinytext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, +dd text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, +ee mediumtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, +ff longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin ) default charset=koi8r ENGINE=InnoDB; create table copy1 like orig; alter table copy1 engine=columnstore; @@ -53,17 +53,17 @@ orig CREATE TABLE `orig` ( `m` datetime DEFAULT NULL, `o` time DEFAULT NULL, `s` char(17) CHARACTER SET utf8mb3 DEFAULT NULL, - `t` varchar(17) CHARACTER SET utf8mb4 DEFAULT NULL, + `t` varchar(17) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, `w` tinyblob DEFAULT NULL, `x` tinyblob DEFAULT NULL, `y` blob DEFAULT NULL, `z` mediumblob DEFAULT NULL, `aa` longblob DEFAULT NULL, `bb` tinytext CHARACTER SET utf8mb3 DEFAULT NULL, - `cc` tinytext CHARACTER SET utf8mb4 DEFAULT NULL, - `dd` text CHARACTER SET utf8mb4 DEFAULT NULL, - `ee` mediumtext CHARACTER SET utf8mb4 DEFAULT NULL, - `ff` longtext CHARACTER SET utf8mb4 DEFAULT NULL + `cc` tinytext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `dd` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ee` mediumtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ff` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=koi8r show create table copy1; Table Create Table @@ -82,17 +82,17 @@ copy1 CREATE TABLE `copy1` ( `m` datetime DEFAULT NULL, `o` time DEFAULT NULL, `s` char(17) CHARACTER SET utf8mb3 DEFAULT NULL, - `t` varchar(17) CHARACTER SET utf8mb4 DEFAULT NULL, + `t` varchar(17) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, `w` tinyblob DEFAULT NULL, `x` tinyblob DEFAULT NULL, `y` blob DEFAULT NULL, `z` mediumblob DEFAULT NULL, `aa` longblob DEFAULT NULL, `bb` tinytext CHARACTER SET utf8mb3 DEFAULT NULL, - `cc` tinytext CHARACTER SET utf8mb4 DEFAULT NULL, - `dd` text CHARACTER SET utf8mb4 DEFAULT NULL, - `ee` mediumtext CHARACTER SET utf8mb4 DEFAULT NULL, - `ff` longtext CHARACTER SET utf8mb4 DEFAULT NULL + `cc` tinytext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `dd` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ee` mediumtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ff` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL ) ENGINE=Columnstore DEFAULT CHARSET=koi8r show create table copy2; Table Create Table @@ -111,17 +111,17 @@ copy2 CREATE TABLE `copy2` ( `m` datetime DEFAULT NULL, `o` time DEFAULT NULL, `s` char(17) CHARACTER SET utf8mb3 DEFAULT NULL, - `t` varchar(17) CHARACTER SET utf8mb4 DEFAULT NULL, + `t` varchar(17) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, `w` tinyblob DEFAULT NULL, `x` tinyblob DEFAULT NULL, `y` blob DEFAULT NULL, `z` mediumblob DEFAULT NULL, `aa` longblob DEFAULT NULL, `bb` tinytext CHARACTER SET utf8mb3 DEFAULT NULL, - `cc` tinytext CHARACTER SET utf8mb4 DEFAULT NULL, - `dd` text CHARACTER SET utf8mb4 DEFAULT NULL, - `ee` mediumtext CHARACTER SET utf8mb4 DEFAULT NULL, - `ff` longtext CHARACTER SET utf8mb4 DEFAULT NULL + `cc` tinytext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `dd` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ee` mediumtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ff` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL ) ENGINE=Columnstore DEFAULT CHARSET=koi8r show create table copy3; Table Create Table @@ -140,17 +140,17 @@ copy3 CREATE TABLE `copy3` ( `m` datetime DEFAULT NULL, `o` time DEFAULT NULL, `s` char(17) DEFAULT NULL, - `t` varchar(17) CHARACTER SET utf8mb4 DEFAULT NULL, + `t` varchar(17) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, `w` tinyblob DEFAULT NULL, `x` tinyblob DEFAULT NULL, `y` blob DEFAULT NULL, `z` mediumblob DEFAULT NULL, `aa` longblob DEFAULT NULL, `bb` tinytext DEFAULT NULL, - `cc` tinytext CHARACTER SET utf8mb4 DEFAULT NULL, - `dd` text CHARACTER SET utf8mb4 DEFAULT NULL, - `ee` mediumtext CHARACTER SET utf8mb4 DEFAULT NULL, - `ff` longtext CHARACTER SET utf8mb4 DEFAULT NULL + `cc` tinytext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `dd` text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ee` mediumtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, + `ff` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL ) ENGINE=Columnstore DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci drop table orig; drop table copy1; diff --git a/mysql-test/columnstore/basic/t/mcol_2000.test b/mysql-test/columnstore/basic/t/mcol_2000.test index 5dc47ad55..8f422df20 100644 --- a/mysql-test/columnstore/basic/t/mcol_2000.test +++ b/mysql-test/columnstore/basic/t/mcol_2000.test @@ -46,17 +46,17 @@ create table orig (a integer not null, m datetime, o time, s char(17) character set utf8, - t varchar(17) character set utf8mb4, + t varchar(17) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, w blob(10), x tinyblob, y blob, z mediumblob, aa longblob, bb text(17) character set utf8, - cc tinytext character set utf8mb4, - dd text character set utf8mb4, - ee mediumtext character set utf8mb4, - ff longtext character set utf8mb4 + cc tinytext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, + dd text CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, + ee mediumtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, + ff longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin ) default charset=koi8r ENGINE=InnoDB; create table copy1 like orig; From 027d09310becd6d9495481fbabb49874ae439b56 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Mon, 1 Sep 2025 17:01:17 +0000 Subject: [PATCH 04/18] fix(tests): disable tests for 11.4 due to known bug --- .../autopilot/t/mcs4010_autopilot_cross_engine_join.test | 1 + .../devregression/t/mcs7134_regression_bug3997.test | 1 + .../devregression/t/mcs7135_regression_bug3998.test | 1 + .../devregression/t/mcs7136_regression_bug4027.test | 1 + mysql-test/columnstore/include/disable_for_11.4_and_later.inc | 4 ++++ 5 files changed, 8 insertions(+) create mode 100644 mysql-test/columnstore/include/disable_for_11.4_and_later.inc diff --git a/mysql-test/columnstore/autopilot/t/mcs4010_autopilot_cross_engine_join.test b/mysql-test/columnstore/autopilot/t/mcs4010_autopilot_cross_engine_join.test index 4649a4d2f..a15980d1c 100755 --- a/mysql-test/columnstore/autopilot/t/mcs4010_autopilot_cross_engine_join.test +++ b/mysql-test/columnstore/autopilot/t/mcs4010_autopilot_cross_engine_join.test @@ -4,6 +4,7 @@ # Author: Daniel Lee, daniel.lee@mariadb.com # -------------------------------------------------------------- # # +--source ../include/disable_for_11.4_and_later.inc --source ../include/have_columnstore.inc --source ../include/detect_maxscale.inc # diff --git a/mysql-test/columnstore/devregression/t/mcs7134_regression_bug3997.test b/mysql-test/columnstore/devregression/t/mcs7134_regression_bug3997.test index 63c23bf5d..12edd67c0 100644 --- a/mysql-test/columnstore/devregression/t/mcs7134_regression_bug3997.test +++ b/mysql-test/columnstore/devregression/t/mcs7134_regression_bug3997.test @@ -4,6 +4,7 @@ # Author: Daniel Lee, daniel.lee@mariadb.com # -------------------------------------------------------------- # # +--source ../include/disable_for_11.4_and_later.inc --source ../include/have_columnstore.inc # USE tpch1; diff --git a/mysql-test/columnstore/devregression/t/mcs7135_regression_bug3998.test b/mysql-test/columnstore/devregression/t/mcs7135_regression_bug3998.test index fec8f605e..757460e7a 100644 --- a/mysql-test/columnstore/devregression/t/mcs7135_regression_bug3998.test +++ b/mysql-test/columnstore/devregression/t/mcs7135_regression_bug3998.test @@ -4,6 +4,7 @@ # Author: Daniel Lee, daniel.lee@mariadb.com # -------------------------------------------------------------- # # +--source ../include/disable_for_11.4_and_later.inc --source ../include/have_columnstore.inc # USE tpch1; diff --git a/mysql-test/columnstore/devregression/t/mcs7136_regression_bug4027.test b/mysql-test/columnstore/devregression/t/mcs7136_regression_bug4027.test index 1d04ad609..923aef3fa 100644 --- a/mysql-test/columnstore/devregression/t/mcs7136_regression_bug4027.test +++ b/mysql-test/columnstore/devregression/t/mcs7136_regression_bug4027.test @@ -4,6 +4,7 @@ # Author: Daniel Lee, daniel.lee@mariadb.com # -------------------------------------------------------------- # # +--source ../include/disable_for_11.4_and_later.inc --source ../include/have_columnstore.inc # USE tpch1; diff --git a/mysql-test/columnstore/include/disable_for_11.4_and_later.inc b/mysql-test/columnstore/include/disable_for_11.4_and_later.inc new file mode 100644 index 000000000..744b16155 --- /dev/null +++ b/mysql-test/columnstore/include/disable_for_11.4_and_later.inc @@ -0,0 +1,4 @@ +if (`SELECT (sys.version_major(), sys.version_minor(), sys.version_patch()) >= (11, 4, 0)`) +{ + skip Known multiupdate bug; +} From 30d6b82d48fbbbdd7a14cb728208ee80d62c591b Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Mon, 1 Sep 2025 17:05:27 +0000 Subject: [PATCH 05/18] chore(ci): add server 11.4 and apply format to drone.jsonnet --- .drone.jsonnet | 280 ++++++++++++++++++++++++++----------------------- 1 file changed, 151 insertions(+), 129 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 05847d37a..001b7ed9c 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -7,6 +7,11 @@ local servers = { [current_branch]: ["10.6-enterprise"], }; +local extra_servers = { + [current_branch]: ["11.4-enterprise"], +}; + + local platforms = { [current_branch]: ["rockylinux:8", "rockylinux:9", "rockylinux:10", "debian:12", "ubuntu:22.04", "ubuntu:24.04"], }; @@ -19,12 +24,13 @@ local builddir = "verylongdirnameforverystrangecpackbehavior"; local get_build_command(command) = "bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/" + command + " "; -local clang(version) = [get_build_command("install_clang_deb.sh " + version), - get_build_command("update-clang-version.sh " + version + " 100"), - get_build_command("install_libc++.sh " + version), - "export CC=/usr/bin/clang", - "export CXX=/usr/bin/clang++" - ]; +local clang(version) = [ + get_build_command("install_clang_deb.sh " + version), + get_build_command("update-clang-version.sh " + version + " 100"), + get_build_command("install_libc++.sh " + version), + "export CC=/usr/bin/clang", + "export CXX=/usr/bin/clang++", +]; local customEnvCommandsMap = { "clang-20": clang("20"), @@ -36,9 +42,9 @@ local customEnvCommands(envkey, builddir) = local customBootstrapParamsForExisitingPipelines(envkey) = - # errorprone if we pass --custom-cmake-flags twice, the last one will win + // errorprone if we pass --custom-cmake-flags twice, the last one will win local customBootstrapMap = { - "ubuntu:24.04": "--custom-cmake-flags '-DCOLUMNSTORE_ASAN_FOR_UNITTESTS=YES'", + //'ubuntu:24.04': "--custom-cmake-flags '-DCOLUMNSTORE_ASAN_FOR_UNITTESTS=YES'", }; (if (std.objectHas(customBootstrapMap, envkey)) then customBootstrapMap[envkey] else ""); @@ -48,8 +54,8 @@ local customBootstrapParamsForAdditionalPipelinesMap = { TSAN: "--tsan", UBSan: "--ubsan", MSan: "--msan", - "libcpp": "--libcpp --skip-unit-tests", - "gcc-toolset": "--gcc-toolset-for-rocky-8" + libcpp: "--libcpp --skip-unit-tests", + "gcc-toolset": "--gcc-toolset-for-rocky-8", }; local customBuildFlags(buildKey) = @@ -91,9 +97,11 @@ local upgrade_test_lists = { }, }; -local make_clickable_link(link) = "echo -e '\\e]8;;" + link + "\\e\\\\" + link + "\\e]8;;\\e\\\\'"; -local echo_running_on = ["echo running on ${DRONE_STAGE_MACHINE}", - make_clickable_link("https://us-east-1.console.aws.amazon.com/ec2/home?region=us-east-1#Instances:search=:${DRONE_STAGE_MACHINE};v=3;$case=tags:true%5C,client:false;$regex=tags:false%5C,client:false;sort=desc:launchTime")]; +local make_clickable_link(link) = "echo -e '\\e]8;;" + link + "\\e\\\\" + link + "\\e]8;;\\e\\\\'"; +local echo_running_on = [ + "echo running on ${DRONE_STAGE_MACHINE}", + make_clickable_link("https://us-east-1.console.aws.amazon.com/ec2/home?region=us-east-1#Instances:search=:${DRONE_STAGE_MACHINE};v=3;$case=tags:true%5C,client:false;$regex=tags:false%5C,client:false;sort=desc:launchTime"), +]; local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", customBootstrapParamsKey="", customBuildEnvCommandsMapKey="", ignoreFailureStepList=[]) = { local pkg_format = if (std.split(platform, ":")[0] == "rockylinux") then "rpm" else "deb", @@ -104,8 +112,8 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", local brancht = if (branch == "**") then "" else branch + "-", local platformKey = std.strReplace(std.strReplace(platform, ":", ""), "/", "-"), local result = platformKey + - (if customBuildEnvCommandsMapKey != "" then "_" + customBuildEnvCommandsMapKey else "") + - (if customBootstrapParamsKey != "" then "_" + customBootstrapParamsKey else ""), + (if customBuildEnvCommandsMapKey != "" then "_" + customBuildEnvCommandsMapKey else "") + + (if customBootstrapParamsKey != "" then "_" + customBootstrapParamsKey else ""), local packages_url = "https://cspkg.s3.amazonaws.com/" + branchp + event + "/${DRONE_BUILD_NUMBER}/" + server, local publish_pkg_url = "https://cspkg.s3.amazonaws.com/index.html?prefix=" + branchp + event + "/${DRONE_BUILD_NUMBER}/" + server + "/" + arch + "/" + result + "/", @@ -140,19 +148,19 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", "sleep 10", "ls -lR " + result, - //clean old versions of .deb/.rpm files - "source /mdb/" + builddir + "/storage/columnstore/columnstore/VERSION && " + - "CURRENT_VERSION=${COLUMNSTORE_VERSION_MAJOR}.${COLUMNSTORE_VERSION_MINOR}.${COLUMNSTORE_VERSION_PATCH} && " + - "aws s3 rm s3://cspkg/" + branchp + eventp + "/" + server + "/" + arch + "/" + result + "/ " + - "--recursive " + - "--exclude \"*\" " + - // include only debs/rpms with columnstore in names - "--include \"*columnstore*.deb\" " + - "--include \"*columnstore*.rpm\" " + - // but do not delete the ones matching CURRENT_VERSION - "--exclude \"*${CURRENT_VERSION}*.deb\" " + - "--exclude \"*${CURRENT_VERSION}*.rpm\" " + - "--only-show-errors", + //clean old versions of .deb/.rpm files + "source /mdb/" + builddir + "/storage/columnstore/columnstore/VERSION && " + + "CURRENT_VERSION=${COLUMNSTORE_VERSION_MAJOR}.${COLUMNSTORE_VERSION_MINOR}.${COLUMNSTORE_VERSION_PATCH} && " + + "aws s3 rm s3://cspkg/" + branchp + eventp + "/" + server + "/" + arch + "/" + result + "/ " + + "--recursive " + + '--exclude "*" ' + + // include only debs/rpms with columnstore in names + '--include "*columnstore*.deb" ' + + '--include "*columnstore*.rpm" ' + + // but do not delete the ones matching CURRENT_VERSION + '--exclude "*${CURRENT_VERSION}*.deb" ' + + '--exclude "*${CURRENT_VERSION}*.rpm" ' + + "--only-show-errors", "aws s3 sync " + result + "/" + " s3://cspkg/" + branchp + eventp + "/" + server + "/" + arch + "/" + result + " --only-show-errors", 'echo "Data uploaded to: ' + publish_pkg_url + '"', @@ -207,14 +215,14 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", " --result-path " + result + " --packages-url " + packages_url + " --do-setup " + std.toString(do_setup) + - (if result=="ubuntu24.04_clang-20_libcpp" then " --install-libcpp " else "") + + (if result == "ubuntu24.04_clang-20_libcpp" then " --install-libcpp " else "") + '"', local reportTestStage(containerName, result, stage) = 'sh -c "apk add bash && ' + get_build_command("report_test_stage.sh") + - ' --container-name ' + containerName + - ' --result-path ' + result + - ' --stage ' + stage + '"', + " --container-name " + containerName + + " --result-path " + result + + " --stage " + stage + '"', _volumes:: { @@ -235,7 +243,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", commands: [ prepareTestContainer(getContainerName("smoke"), result, true), get_build_command("run_smoke.sh") + - ' --container-name ' + getContainerName("smoke"), + " --container-name " + getContainerName("smoke"), ], }, smokelog:: { @@ -263,14 +271,15 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", commands: [ prepareTestContainer(getContainerName("upgrade") + version, result, false), - execInnerDocker('bash -c "./upgrade_setup_' + pkg_format + '.sh ' - + version + ' ' - + result + ' ' - + arch + ' ' - + repo_pkg_url_no_res - + ' $${UPGRADE_TOKEN}"', + execInnerDocker( + 'bash -c "./upgrade_setup_' + pkg_format + ".sh " + + version + " " + + result + " " + + arch + " " + + repo_pkg_url_no_res + + ' $${UPGRADE_TOKEN}"', getContainerName("upgrade") + version - ) + ), ], }, upgradelog:: { @@ -279,16 +288,16 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", image: "docker:28.2.2", volumes: [pipeline._volumes.docker, pipeline._volumes.mdb], commands: - ["echo"] + - std.map( - function(ver) - reportTestStage( - getContainerName("upgrade") + ver, - result, - "upgrade_" + ver - ), - mdb_server_versions - ), + ["echo"] + + std.map( + function(ver) + reportTestStage( + getContainerName("upgrade") + ver, + result, + "upgrade_" + ver + ), + mdb_server_versions + ), when: { status: ["success", "failure"], }, @@ -306,13 +315,13 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", prepareTestContainer(getContainerName("mtr"), result, true), 'MTR_SUITE_LIST=$([ "$MTR_FULL_SUITE" == true ] && echo "' + mtr_full_set + '" || echo "$MTR_SUITE_LIST")', - 'apk add bash &&' + + "apk add bash &&" + get_build_command("run_mtr.sh") + - ' --container-name ' + getContainerName("mtr") + - ' --distro ' + platform + - ' --suite-list $${MTR_SUITE_LIST}' + - ' --triggering-event ' + event - + if std.endsWith(result, 'ASan') then ' --run-as-extern' else '', + " --container-name " + getContainerName("mtr") + + " --distro " + platform + + " --suite-list $${MTR_SUITE_LIST}" + + " --triggering-event " + event + + if std.endsWith(result, "ASan") then " --run-as-extern" else "", ], [if (std.member(ignoreFailureStepList, "mtr")) then "failure"]: "ignore", @@ -331,7 +340,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", [if (std.member(ignoreFailureStepList, "mtr")) then "failure"]: "ignore", }, - regression(name, depends_on, ):: { + regression(name, depends_on,):: { name: name, depends_on: depends_on, image: "docker:git", @@ -506,18 +515,18 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", SERVER_SHA: "${SERVER_SHA:-" + server + "}", }, commands: echo_running_on + - [ - "echo $$SERVER_REF", - "echo $$SERVER_REMOTE", - "mkdir -p /mdb/" + builddir + " && cd /mdb/" + builddir, - 'git config --global url."https://github.com/".insteadOf git@github.com:', - 'git -c submodule."storage/rocksdb/rocksdb".update=none -c submodule."wsrep-lib".update=none -c submodule."storage/columnstore/columnstore".update=none clone --recurse-submodules --depth 200 --branch $$SERVER_REF $$SERVER_REMOTE .', - "git reset --hard $$SERVER_SHA", - "git rev-parse --abbrev-ref HEAD && git rev-parse HEAD", - "git config cmake.update-submodules no", - "rm -rf storage/columnstore/columnstore", - "cp -r /drone/src /mdb/" + builddir + "/storage/columnstore/columnstore", - ], + [ + "echo $$SERVER_REF", + "echo $$SERVER_REMOTE", + "mkdir -p /mdb/" + builddir + " && cd /mdb/" + builddir, + 'git config --global url."https://github.com/".insteadOf git@github.com:', + 'git -c submodule."storage/rocksdb/rocksdb".update=none -c submodule."wsrep-lib".update=none -c submodule."storage/columnstore/columnstore".update=none clone --recurse-submodules --depth 200 --branch $$SERVER_REF $$SERVER_REMOTE .', + "git reset --hard $$SERVER_SHA", + "git rev-parse --abbrev-ref HEAD && git rev-parse HEAD", + "git config cmake.update-submodules no", + "rm -rf storage/columnstore/columnstore", + "cp -r /drone/src /mdb/" + builddir + "/storage/columnstore/columnstore", + ], }, { name: "build", @@ -540,22 +549,22 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", SCCACHE_S3_KEY_PREFIX: result + branch + server + arch, }, - # errorprone if we pass --custom-cmake-flags twice, the last one will win + // errorprone if we pass --custom-cmake-flags twice, the last one will win commands: [ "mkdir /mdb/" + builddir + "/" + result, ] + customEnvCommands(customBuildEnvCommandsMapKey, builddir) + [ - 'bash -c "set -o pipefail && ' + + 'bash -c "set -o pipefail && ' + get_build_command("bootstrap_mcs.sh") + "--build-type RelWithDebInfo " + "--distro " + platform + " " + "--build-packages --install-deps --sccache " + "--build-path " + "/mdb/" + builddir + "/builddir " + - " " + customBootstrapParamsForExisitingPipelines(platform) + - " " + customBuildFlags(customBootstrapParamsKey) + - " | " + get_build_command("ansi2txt.sh") + - "/mdb/" + builddir + "/" + result + '/build.log "', + " " + customBootstrapParamsForExisitingPipelines(platform) + + " " + customBuildFlags(customBootstrapParamsKey) + + " | " + get_build_command("ansi2txt.sh") + + "/mdb/" + builddir + "/" + result + '/build.log "', ], }, { @@ -636,59 +645,73 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", }; -local AllPipelines = [ - Pipeline(b, p, e, a, s) - for b in std.objectFields(platforms) - for p in platforms[b] - for s in servers[b] - for e in events - for a in archs -] + -[ - Pipeline(any_branch, p, "custom", a, server) - for p in platforms[current_branch] - for server in servers[current_branch] - for a in archs -] + -[ - Pipeline(b, platform, triggeringEvent, a, server, "", buildenv) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for buildenv in std.objectFields(customEnvCommandsMap) - for triggeringEvent in events - for server in servers[current_branch] -] + -// last argument is to ignore mtr and regression failures -[ - Pipeline(b, platform, triggeringEvent, a, server, flag, envcommand, ["regression", "mtr"]) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for flag in ["libcpp"] - for envcommand in ["clang-20"] - for triggeringEvent in events - for server in servers[current_branch] -] + -// last argument is to ignore mtr and regression failures -[ - Pipeline(b, platform, triggeringEvent, a, server, flag, "", ["regression", "mtr"]) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for flag in ["ASan", "UBSan"] - for triggeringEvent in events - for server in servers[current_branch] -] + -[ - Pipeline(b, platform, triggeringEvent, a, server, flag, "") - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["rockylinux:8"] - for flag in ["gcc-toolset"] - for triggeringEvent in events - for server in servers[current_branch] -]; +local AllPipelines = + [ + Pipeline(b, platform, triggeringEvent, a, server, flag, "") + for a in ["amd64"] + for b in std.objectFields(platforms) + for platform in ["rockylinux:8"] + for flag in ["gcc-toolset"] + for triggeringEvent in events + for server in servers[current_branch] + ] + + [ + Pipeline(b, p, e, a, s) + for b in std.objectFields(platforms) + for p in platforms[b] + for s in servers[b] + for e in events + for a in archs + ] + + [ + Pipeline(any_branch, p, "custom", a, server) + for p in platforms[current_branch] + for server in servers[current_branch] + for a in archs + ] + + // clang + [ + Pipeline(b, platform, triggeringEvent, a, server, "", buildenv) + for a in ["amd64"] + for b in std.objectFields(platforms) + for platform in ["ubuntu:24.04"] + for buildenv in std.objectFields(customEnvCommandsMap) + for triggeringEvent in events + for server in servers[current_branch] + ] + + // last argument is to ignore mtr and regression failures + [ + Pipeline(b, platform, triggeringEvent, a, server, "", "", ["regression", "mtr"]) + for a in ["amd64"] + for b in std.objectFields(platforms) + for platform in ["ubuntu:24.04", "rockylinux:9"] + for triggeringEvent in events + for server in extra_servers[current_branch] + ] + + // // last argument is to ignore mtr and regression failures + [ + Pipeline(b, platform, triggeringEvent, a, server, flag, envcommand, ["regression", "mtr"]) + for a in ["amd64"] + for b in std.objectFields(platforms) + for platform in ["ubuntu:24.04"] + for flag in ["libcpp"] + for envcommand in ["clang-20"] + for triggeringEvent in events + for server in servers[current_branch] + ] + + // last argument is to ignore mtr and regression failures + [ + Pipeline(b, platform, triggeringEvent, a, server, flag, "", ["regression", "mtr"]) + for a in ["amd64"] + for b in std.objectFields(platforms) + for platform in ["ubuntu:24.04"] + for flag in ["ASan", "UBSan"] + for triggeringEvent in events + for server in servers[current_branch] + ] + + + []; + local FinalPipeline(branch, event) = { kind: "pipeline", @@ -718,7 +741,6 @@ local FinalPipeline(branch, event) = { }; - AllPipelines + [ FinalPipeline(b, "cron") From 0fc41e03877132728b54effbac476ac403a940d4 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 3 Sep 2025 10:15:56 +0000 Subject: [PATCH 06/18] fix(ci): install proper dbg symbols according server version --- build/prepare_test_container.sh | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/build/prepare_test_container.sh b/build/prepare_test_container.sh index 590eb2391..06a22e347 100755 --- a/build/prepare_test_container.sh +++ b/build/prepare_test_container.sh @@ -5,10 +5,11 @@ set -eo pipefail SCRIPT_LOCATION=$(dirname "$0") COLUMNSTORE_SOURCE_PATH=$(realpath "$SCRIPT_LOCATION"/../) +MDB_SOURCE_PATH=$(realpath "$SCRIPT_LOCATION"/../../../..) source "$SCRIPT_LOCATION"/utils.sh -echo "Arguments received: $@" +message "Arguments received: $@" optparse.define short=c long=container-name desc="Name of the Docker container to run tests in" variable=CONTAINER_NAME optparse.define short=i long=docker-image desc="Docker image name to start container from" variable=DOCKER_IMAGE @@ -25,7 +26,7 @@ if [[ "$EUID" -ne 0 ]]; then fi if [[ -z "${CONTAINER_NAME:-}" || -z "${DOCKER_IMAGE:-}" || -z "${RESULT:-}" || -z "${DO_SETUP:-}" || -z "${PACKAGES_URL:-}" ]]; then - echo "Please provide --container-name, --docker-image, --result-path, --packages-url and --do-setup parameters, e.g. ./prepare_test_stage.sh --container-name smoke11212 --docker-image detravi/ubuntu:24.04 --result-path ubuntu24.04 --packages-url https://cspkg.s3.amazonaws.com/stable-23.10/pull_request/91/10.6-enterprise --do-setup true" + warn "Please provide --container-name, --docker-image, --result-path, --packages-url and --do-setup parameters, e.g. ./prepare_test_stage.sh --container-name smoke11212 --docker-image detravi/ubuntu:24.04 --result-path ubuntu24.04 --packages-url https://cspkg.s3.amazonaws.com/stable-23.10/pull_request/91/10.6-enterprise --do-setup true" exit 1 fi @@ -62,7 +63,7 @@ start_container() { elif [[ "$CONTAINER_NAME" == *regression* ]]; then docker_run_args+=(--shm-size=500m --memory 15g) else - echo "Unknown container type: $CONTAINER_NAME" + error "Unknown container type: $CONTAINER_NAME" exit 1 fi @@ -126,16 +127,24 @@ prepare_container() { execInnerDocker "$CONTAINER_NAME" 'sysctl -w kernel.core_pattern="/core/%E_${RESULT}_core_dump.%p"' #Install columnstore in container - echo "Installing columnstore..." + message "Installing columnstore..." + SERVER_VERSION=$(grep -E 'MYSQL_VERSION_(MAJOR|MINOR)' $MDB_SOURCE_PATH/VERSION | cut -d'=' -f2 | paste -sd. -) + message "Server version of build is $SERVER_VERSION" + if [[ "$RESULT" == *rocky* ]]; then execInnerDockerWithRetry "$CONTAINER_NAME" 'yum install -y MariaDB-columnstore-engine MariaDB-test' else - #TODO think about server version! - execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y mariadb-plugin-columnstore mariadb-test mariadb-test-data mariadb-plugin-columnstore-dbgsym mariadb-client-10.6-dbgsym mariadb-client-core-10.6-dbgsym mariadb-server-10.6-dbgsym mariadb-server-core-10.6-dbgsym mariadb-test-dbgsym ' + + execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y mariadb-plugin-columnstore mariadb-test mariadb-test-data mariadb-plugin-columnstore-dbgsym mariadb-test-dbgsym' + if [[ $SERVER_VERSION == '10.6' ]]; then + execInnerDockerWithRetry "$CONTAINER_NAME" 'apt install -y mariadb-client-10.6-dbgsym mariadb-client-core-10.6-dbgsym mariadb-server-10.6-dbgsym mariadb-server-core-10.6-dbgsym' + else + execInnerDockerWithRetry "$CONTAINER_NAME" 'apt install -y mariadb-client-dbgsym mariadb-client-core-dbgsym mariadb-server-dbgsym mariadb-server-core-dbgsym' + fi fi sleep 5 - echo "PrepareTestStage completed in $CONTAINER_NAME" + message "PrepareTestStage completed in $CONTAINER_NAME" } if [[ -z $(docker ps -q --filter "name=${CONTAINER_NAME}") ]]; then From a0b4bcd1cec5fe927be00376aad8e39ddc935430 Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Tue, 26 Aug 2025 02:57:09 +0000 Subject: [PATCH 07/18] Basic request tracer Tracing requests Custom log factory adds all trace values as one log record parameter (it will be empty if trace values are empty, like in MainThread where there are no incoming requests) --- cmapi/.gitignore | 4 + cmapi/cmapi_server/__main__.py | 11 +- cmapi/cmapi_server/controllers/api_clients.py | 17 +- cmapi/cmapi_server/handlers/cluster.py | 10 +- cmapi/cmapi_server/helpers.py | 30 ++- cmapi/cmapi_server/logging_management.py | 25 +++ cmapi/cmapi_server/node_manipulation.py | 6 +- cmapi/cmapi_server/trace_tool.py | 51 +++++ cmapi/cmapi_server/traced_aiohttp.py | 43 ++++ cmapi/cmapi_server/traced_session.py | 52 +++++ cmapi/cmapi_server/tracer.py | 210 ++++++++++++++++++ 11 files changed, 432 insertions(+), 27 deletions(-) create mode 100644 cmapi/cmapi_server/trace_tool.py create mode 100644 cmapi/cmapi_server/traced_aiohttp.py create mode 100644 cmapi/cmapi_server/traced_session.py create mode 100644 cmapi/cmapi_server/tracer.py diff --git a/cmapi/.gitignore b/cmapi/.gitignore index bdbe67f3f..4b87c115f 100644 --- a/cmapi/.gitignore +++ b/cmapi/.gitignore @@ -87,3 +87,7 @@ result centos8 ubuntu20.04 buildinfo.txt + +# Self-signed certificates +cmapi_server/self-signed.crt +cmapi_server/self-signed.key \ No newline at end of file diff --git a/cmapi/cmapi_server/__main__.py b/cmapi/cmapi_server/__main__.py index d8bf3892b..5a8a2de58 100644 --- a/cmapi/cmapi_server/__main__.py +++ b/cmapi/cmapi_server/__main__.py @@ -18,6 +18,7 @@ from cherrypy.process import plugins from cmapi_server.logging_management import config_cmapi_server_logging from cmapi_server.sentry import maybe_init_sentry, register_sentry_cherrypy_tool config_cmapi_server_logging() +from cmapi_server.trace_tool import register_tracing_tools from cmapi_server import helpers from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, CMAPI_CONF_PATH @@ -141,6 +142,7 @@ if __name__ == '__main__': # TODO: read cmapi config filepath as an argument helpers.cmapi_config_check() + register_tracing_tools() # Init Sentry if DSN is present sentry_active = maybe_init_sentry() if sentry_active: @@ -153,6 +155,9 @@ if __name__ == '__main__': root_config = { "request.dispatch": dispatcher, "error_page.default": jsonify_error, + # Enable tracing tools + 'tools.trace.on': True, + 'tools.trace_end.on': True, } if sentry_active: root_config["tools.sentry.on"] = True @@ -230,10 +235,10 @@ if __name__ == '__main__': 'Something went wrong while trying to detect dbrm protocol.\n' 'Seems "controllernode" process isn\'t started.\n' 'This is just a notification, not a problem.\n' - 'Next detection will started at first node\\cluster ' + 'Next detection will start at first node\\cluster ' 'status check.\n' - f'This can cause extra {SOCK_TIMEOUT} seconds delay while\n' - 'first attempt to get status.', + f'This can cause extra {SOCK_TIMEOUT} seconds delay during\n' + 'this first attempt to get the status.', exc_info=True ) else: diff --git a/cmapi/cmapi_server/controllers/api_clients.py b/cmapi/cmapi_server/controllers/api_clients.py index 7b7e69622..74c8723a9 100644 --- a/cmapi/cmapi_server/controllers/api_clients.py +++ b/cmapi/cmapi_server/controllers/api_clients.py @@ -3,6 +3,7 @@ from typing import Any, Dict, Optional, Union import pyotp import requests +from cmapi_server.traced_session import get_traced_session from cmapi_server.controllers.dispatcher import _version from cmapi_server.constants import ( @@ -141,7 +142,7 @@ class ClusterControllerClient: headers['Content-Type'] = 'application/json' data = {'in_transaction': True, **(data or {})} try: - response = requests.request( + response = get_traced_session().request( method, url, headers=headers, json=data, timeout=self.request_timeout, verify=False ) @@ -151,24 +152,26 @@ class ClusterControllerClient: except requests.HTTPError as exc: resp = exc.response error_msg = str(exc) - if resp.status_code == 422: + if resp is not None and resp.status_code == 422: # in this case we think cmapi server returned some value but # had error during running endpoint handler code try: - resp_json = response.json() + resp_json = resp.json() error_msg = resp_json.get('error', resp_json) except requests.exceptions.JSONDecodeError: - error_msg = response.text + error_msg = resp.text message = ( - f'API client got an exception in request to {exc.request.url} ' - f'with code {resp.status_code} and error: {error_msg}' + f'API client got an exception in request to {exc.request.url if exc.request else url} ' + f'with code {resp.status_code if resp is not None else "?"} and error: {error_msg}' ) logging.error(message) raise CMAPIBasicError(message) except requests.exceptions.RequestException as exc: + request_url = getattr(exc.request, 'url', url) + response_status = getattr(getattr(exc, 'response', None), 'status_code', '?') message = ( 'API client got an undefined error in request to ' - f'{exc.request.url} with code {exc.response.status_code} and ' + f'{request_url} with code {response_status} and ' f'error: {str(exc)}' ) logging.error(message) diff --git a/cmapi/cmapi_server/handlers/cluster.py b/cmapi/cmapi_server/handlers/cluster.py index f2d8f892b..0f0d08606 100644 --- a/cmapi/cmapi_server/handlers/cluster.py +++ b/cmapi/cmapi_server/handlers/cluster.py @@ -4,7 +4,7 @@ from datetime import datetime from enum import Enum from typing import Optional -import requests +from cmapi_server.traced_session import get_traced_session from cmapi_server.constants import ( CMAPI_CONF_PATH, DEFAULT_MCS_CONF_PATH, @@ -78,7 +78,7 @@ class ClusterHandler(): for node in active_nodes: url = f'https://{node}:8640/cmapi/{get_version()}/node/status' try: - r = requests.get(url, verify=False, headers=headers) + r = get_traced_session().request('GET', url, verify=False, headers=headers) r.raise_for_status() r_json = r.json() if len(r_json.get('services', 0)) == 0: @@ -277,7 +277,7 @@ class ClusterHandler(): payload['cluster_mode'] = mode try: - r = requests.put(url, headers=headers, json=payload, verify=False) + r = get_traced_session().request('PUT', url, headers=headers, json=payload, verify=False) r.raise_for_status() response['cluster-mode'] = mode except Exception as err: @@ -330,7 +330,7 @@ class ClusterHandler(): logger.debug(f'Setting new api key to "{node}".') url = f'https://{node}:8640/cmapi/{get_version()}/node/apikey-set' try: - resp = requests.put(url, verify=False, json=body) + resp = get_traced_session().request('PUT', url, verify=False, json=body, headers={}) resp.raise_for_status() r_json = resp.json() if active_nodes_count > 0: @@ -383,7 +383,7 @@ class ClusterHandler(): logger.debug(f'Setting new log level to "{node}".') url = f'https://{node}:8640/cmapi/{get_version()}/node/log-level' try: - resp = requests.put(url, verify=False, json=body) + resp = get_traced_session().request('PUT', url, verify=False, json=body, headers={}) resp.raise_for_status() r_json = resp.json() if active_nodes_count > 0: diff --git a/cmapi/cmapi_server/helpers.py b/cmapi/cmapi_server/helpers.py index df58889a1..be38db2f5 100644 --- a/cmapi/cmapi_server/helpers.py +++ b/cmapi/cmapi_server/helpers.py @@ -11,7 +11,6 @@ import os import socket import time from collections import namedtuple -from functools import partial from random import random from shutil import copyfile from typing import Tuple, Optional @@ -20,6 +19,8 @@ from urllib.parse import urlencode, urlunparse import aiohttp import lxml.objectify import requests +from cmapi_server.traced_session import get_traced_session +from cmapi_server.traced_aiohttp import create_traced_async_session from cmapi_server.exceptions import CMAPIBasicError # Bug in pylint https://github.com/PyCQA/pylint/issues/4584 @@ -153,9 +154,9 @@ def start_transaction( body['timeout'] = ( final_time - datetime.datetime.now() ).seconds - r = requests.put( - url, verify=False, headers=headers, json=body, - timeout=10 + r = get_traced_session().request( + 'PUT', url, verify=False, headers=headers, + json=body, timeout=10 ) # a 4xx error from our endpoint; @@ -219,8 +220,9 @@ def rollback_txn_attempt(key, version, txnid, nodes): url = f"https://{node}:8640/cmapi/{version}/node/rollback" for retry in range(5): try: - r = requests.put( - url, verify=False, headers=headers, json=body, timeout=5 + r = get_traced_session().request( + 'PUT', url, verify=False, headers=headers, + json=body, timeout=5 ) r.raise_for_status() except requests.Timeout: @@ -274,7 +276,10 @@ def commit_transaction( url = f"https://{node}:8640/cmapi/{version}/node/commit" for retry in range(5): try: - r = requests.put(url, verify = False, headers = headers, json = body, timeout = 5) + r = get_traced_session().request( + 'PUT', url, verify=False, headers=headers, + json=body, timeout=5 + ) r.raise_for_status() except requests.Timeout as e: logging.warning(f"commit_transaction(): timeout on node {node}") @@ -373,7 +378,7 @@ def broadcast_new_config( url = f'https://{node}:8640/cmapi/{version}/node/config' resp_json: dict = dict() - async with aiohttp.ClientSession() as session: + async with create_traced_async_session() as session: try: async with session.put( url, headers=headers, json=body, ssl=False, timeout=120 @@ -656,7 +661,7 @@ def get_current_config_file( headers = {'x-api-key' : key} url = f'https://{node}:8640/cmapi/{get_version()}/node/config' try: - r = requests.get(url, verify=False, headers=headers, timeout=5) + r = get_traced_session().request('GET', url, verify=False, headers=headers, timeout=5) r.raise_for_status() config = r.json()['config'] except Exception as e: @@ -767,14 +772,17 @@ def if_primary_restart( success = False while not success and datetime.datetime.now() < endtime: try: - response = requests.put(url, verify = False, headers = headers, json = body, timeout = 60) + response = get_traced_session().request( + 'PUT', url, verify=False, headers=headers, + json=body, timeout=60 + ) response.raise_for_status() success = True except Exception as e: logging.warning(f"if_primary_restart(): failed to start the cluster, got {str(e)}") time.sleep(10) if not success: - logging.error(f"if_primary_restart(): failed to start the cluster. Manual intervention is required.") + logging.error("if_primary_restart(): failed to start the cluster. Manual intervention is required.") def get_cej_info(config_root): diff --git a/cmapi/cmapi_server/logging_management.py b/cmapi/cmapi_server/logging_management.py index cffcae122..837b4267f 100644 --- a/cmapi/cmapi_server/logging_management.py +++ b/cmapi/cmapi_server/logging_management.py @@ -7,6 +7,7 @@ import cherrypy from cherrypy import _cperror from cmapi_server.constants import CMAPI_LOG_CONF_PATH +from cmapi_server.tracer import get_tracer class AddIpFilter(logging.Filter): @@ -16,6 +17,28 @@ class AddIpFilter(logging.Filter): return True +def install_trace_record_factory() -> None: + """Install a LogRecord factory that adds 'trace_params' field. + 'trace_params' will be an empty string if there is no active trace/span + (like in MainThread, where there is no incoming requests). + Otherwise it will contain trace parameters. + """ + current_factory = logging.getLogRecordFactory() + + def factory(*args, **kwargs): # type: ignore[no-untyped-def] + record = current_factory(*args, **kwargs) + try: + trace_id, span_id, parent_span_id = get_tracer().current_trace_ids() + record.trace_params = ( + f" rid={trace_id} sid={span_id} psid={parent_span_id}" + ) + except Exception: + record.trace_params = " rid= sid= psid=" + return record + + logging.setLogRecordFactory(factory) + + def custom_cherrypy_error( self, msg='', context='', severity=logging.INFO, traceback=False ): @@ -119,6 +142,8 @@ def config_cmapi_server_logging(): cherrypy._cplogging.LogManager.access_log_format = ( '{h} ACCESS "{r}" code {s}, bytes {b}, user-agent "{a}"' ) + # Ensure trace_params is available on every record + install_trace_record_factory() dict_config(CMAPI_LOG_CONF_PATH) diff --git a/cmapi/cmapi_server/node_manipulation.py b/cmapi/cmapi_server/node_manipulation.py index bccdb142a..ff0d5259c 100644 --- a/cmapi/cmapi_server/node_manipulation.py +++ b/cmapi/cmapi_server/node_manipulation.py @@ -20,7 +20,9 @@ from cmapi_server.constants import ( CMAPI_CONF_PATH, CMAPI_SINGLE_NODE_XML, DEFAULT_MCS_CONF_PATH, LOCALHOSTS, MCS_DATA_PATH, ) +from cmapi_server.traced_session import get_traced_session from cmapi_server.managers.network import NetworkManager +from cmapi_server.tracer import get_tracer from mcs_node_control.models.node_config import NodeConfig @@ -617,7 +619,9 @@ def _rebalance_dbroots(root, test_mode=False): headers = {'x-api-key': key} url = f"https://{node_ip}:8640/cmapi/{version}/node/new_primary" try: - r = requests.get(url, verify = False, headers = headers, timeout = 10) + r = get_traced_session().request( + 'GET', url, verify=False, headers=headers, timeout=10 + ) r.raise_for_status() r = r.json() is_primary = r['is_primary'] diff --git a/cmapi/cmapi_server/trace_tool.py b/cmapi/cmapi_server/trace_tool.py new file mode 100644 index 000000000..7164373d6 --- /dev/null +++ b/cmapi/cmapi_server/trace_tool.py @@ -0,0 +1,51 @@ +""" +CherryPy tool that uses the tracer to start a span for each request. + +If traceparent header is present in the request, the tool will continue this trace chain. +Otherwise, it will start a new trace (with a new trace_id). +""" +from typing import Dict + +import cherrypy + +from cmapi_server.tracer import get_tracer + + +def _on_request_start() -> None: + """CherryPy tool hook: extract incoming context and start a SERVER span.""" + req = cherrypy.request + tracer = get_tracer() + + headers: Dict[str, str] = dict(req.headers or {}) + trace_id, parent_span_id = tracer.extract_traceparent(headers) + tracer.set_incoming_context(trace_id, parent_span_id) + + span_name = f"{getattr(req, 'method', 'HTTP')} {getattr(req, 'path_info', '/')}" + + ctx = tracer.start_as_current_span(span_name, kind="SERVER") + span = ctx.__enter__() + setattr(req, "_trace_span_ctx", ctx) + setattr(req, "_trace_span", span) + + # Echo current traceparent to the client + tracer.inject_traceparent(cherrypy.response.headers) # type: ignore[arg-type] + + +def _on_request_end() -> None: + """CherryPy tool hook: end the SERVER span started at request start.""" + req = cherrypy.request + ctx = getattr(req, "_trace_span_ctx", None) + if ctx is not None: + try: + ctx.__exit__(None, None, None) + finally: + setattr(req, "_trace_span_ctx", None) + setattr(req, "_trace_span", None) + + +def register_tracing_tools() -> None: + """Register CherryPy tools for request tracing.""" + cherrypy.tools.trace = cherrypy.Tool("on_start_resource", _on_request_start, priority=10) + cherrypy.tools.trace_end = cherrypy.Tool("on_end_resource", _on_request_end, priority=80) + + diff --git a/cmapi/cmapi_server/traced_aiohttp.py b/cmapi/cmapi_server/traced_aiohttp.py new file mode 100644 index 000000000..4643e217d --- /dev/null +++ b/cmapi/cmapi_server/traced_aiohttp.py @@ -0,0 +1,43 @@ +"""Async sibling of TracedSession""" + +from typing import Any + +import aiohttp + +from cmapi_server.tracer import get_tracer + + +class TracedAsyncSession(aiohttp.ClientSession): + async def _request( + self, method: str, str_or_url: Any, *args: Any, **kwargs: Any + ) -> aiohttp.ClientResponse: + tracer = get_tracer() + + headers = kwargs.get("headers") or {} + if headers is None: + headers = {} + kwargs["headers"] = headers + + url_text = str(str_or_url) + span_name = f"HTTP {method} {url_text}" + with tracer.start_as_current_span(span_name, kind="CLIENT") as span: + span.set_attribute("http.method", method) + span.set_attribute("http.url", url_text) + try: + tracer.inject_traceparent(headers) + except Exception: + pass + try: + response = await super()._request(method, str_or_url, *args, **kwargs) + except Exception as exc: + span.set_status("ERROR", str(exc)) + raise + else: + span.set_attribute("http.status_code", response.status) + return response + + +def create_traced_async_session(**kwargs: Any) -> TracedAsyncSession: + return TracedAsyncSession(**kwargs) + + diff --git a/cmapi/cmapi_server/traced_session.py b/cmapi/cmapi_server/traced_session.py new file mode 100644 index 000000000..120e2379e --- /dev/null +++ b/cmapi/cmapi_server/traced_session.py @@ -0,0 +1,52 @@ +"""Our own customized requests.Session that automatically traces outbound HTTP calls. + +Creates a CLIENT span per outbound HTTP request, injects traceparent, +records method/url/status, and closes the span when the request finishes. +""" + +from typing import Any, Optional + +import requests + +from cmapi_server.tracer import get_tracer + + +class TracedSession(requests.Session): + """requests.Session that automatically traces outbound HTTP calls.""" + + def request(self, method: str, url: str, *args: Any, **kwargs: Any) -> requests.Response: + tracer = get_tracer() + + headers = kwargs.get("headers") or {} + if headers is None: + headers = {} + kwargs["headers"] = headers + + span_name = f"HTTP {method} {url}" + with tracer.start_as_current_span(span_name, kind="CLIENT") as span: + span.set_attribute("http.method", method) + span.set_attribute("http.url", url) + + tracer.inject_traceparent(headers) + try: + response = super().request(method, url, *args, **kwargs) + except Exception as exc: + span.set_status("ERROR", str(exc)) + raise + else: + # Record status code + span.set_attribute("http.status_code", response.status_code) + return response + + +_default_session: Optional[TracedSession] = None + + +def get_traced_session() -> TracedSession: + """Return a process-wide TracedSession singleton.""" + global _default_session + if _default_session is None: + _default_session = TracedSession() + return _default_session + + diff --git a/cmapi/cmapi_server/tracer.py b/cmapi/cmapi_server/tracer.py new file mode 100644 index 000000000..1b85b2e4d --- /dev/null +++ b/cmapi/cmapi_server/tracer.py @@ -0,0 +1,210 @@ +"""Support distributed request tracing via W3C Trace Context. +See https://www.w3.org/TR/trace-context/ for the official spec. + +There are 3 and a half main components: +- trace_id: a unique identifier for a trace. + It is a 32-hex string, passed in the outbound HTTP requests headers, so that we can + trace the request chain through the system. +- span_id: a unique identifier for a span (the current operation within a trace chain). + It is a 16-hex string. For example, when we we receive a request to add a host, the addition + of the host is a separate span within the request chain. +- parent_span_id: a unique identifier for the parent span of the current span. + Continuing the example above, when we add a host, first we start a transaction, + then we add the host. If we are already adding a host, then creation of the transaction + is the parent span of the current span. +- traceparent: a header that combines trace_id and span_id in one value. + It has the format "00---". + +A system that calls CMAPI can pass the traceparent header in the request, so that we can pass + the trace_id through the system, changing span_id as we enter new sub-operations. + +We can reconstruct the trace tree from the set of the logged traceparent attributes, + representing how the request was processed, which nodes were involved, + how much time did each op take, etc. + +This module implements a tracer class that creates spans, injects/extracts traceparent headers. +It uses contextvars to store the trace/span/parent_span ids and start time for each context. +""" + +from __future__ import annotations + +import contextvars +import logging +import os +import time +from collections.abc import Iterator +from contextlib import contextmanager +from dataclasses import dataclass +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + +# Contextvars containing trace/span/parent_span ids and start time for this thread +# (contextvars are something like TLS, they contain variables that are local to the context) +_current_trace_id = contextvars.ContextVar[str]("trace_id", default="") +_current_span_id = contextvars.ContextVar[str]("span_id", default="") +_current_parent_span_id = contextvars.ContextVar[str]("parent_span_id", default="") +_current_span_start_ns = contextvars.ContextVar[int]("span_start_ns", default=0) + + +def _rand_16_hex() -> str: + # 16 hex bytes (32 hex chars) + return os.urandom(16).hex() + +def _rand_8_hex() -> str: + # 8 hex bytes (16 hex chars) + return os.urandom(8).hex() + +def format_traceparent(trace_id: str, span_id: str, flags: str = "01") -> str: + """W3C traceparent: version 00""" + # version(2)-trace_id(32)-span_id(16)-flags(2) + return f"00-{trace_id}-{span_id}-{flags}" + +def parse_traceparent(header: str) -> Optional[tuple[str, str, str]]: + """Return (trace_id, span_id, flags) or None if invalid.""" + try: + parts = header.strip().split("-") + if len(parts) != 4 or parts[0] != "00": + logger.error(f"Invalid traceparent: {header}") + return None + trace_id, span_id, flags = parts[1], parts[2], parts[3] + if len(trace_id) != 32 or len(span_id) != 16 or len(flags) != 2: + return None + # W3C: all zero trace_id/span_id are invalid + if set(trace_id) == {"0"} or set(span_id) == {"0"}: + return None + return trace_id, span_id, flags + except Exception: + logger.error(f"Failed to parse traceparent: {header}") + return None + + +@dataclass +class TraceSpan: + """Lightweight span handle; keeps attributes in memory (for logging only).""" + name: str + kind: str # "SERVER" | "CLIENT" | "INTERNAL" + start_ns: int + trace_id: str + span_id: str + parent_span_id: str + attributes: Dict[str, Any] + + def set_attribute(self, key: str, value: Any) -> None: + self.attributes[key] = value + + def add_event(self, name: str, **attrs: Any) -> None: + # For simplicity we just log events immediately + logger.info( + "event name=%s trace_id=%s span_id=%s attrs=%s", + name, self.trace_id, self.span_id, attrs + ) + + def set_status(self, code: str, description: str = "") -> None: + self.attributes["status.code"] = code + if description: + self.attributes["status.description"] = description + + def record_exception(self, exc: BaseException) -> None: + self.add_event("exception", type=type(exc).__name__, msg=str(exc)) + + +class Tracer: + """Encapsulates everything related to tracing (span creation, logging, etc)""" + @contextmanager + def start_as_current_span(self, name: str, kind: str = "INTERNAL") -> Iterator[TraceSpan]: + trace_id = _current_trace_id.get() or _rand_16_hex() + parent_span = _current_span_id.get() + new_span_id = _rand_8_hex() + + # Push new context + tok_tid = _current_trace_id.set(trace_id) + tok_sid = _current_span_id.set(new_span_id) + tok_pid = _current_parent_span_id.set(parent_span) + tok_start = _current_span_start_ns.set(time.time_ns()) + + span = TraceSpan( + name=name, + kind=kind, + start_ns=_current_span_start_ns.get(), + trace_id=trace_id, + span_id=new_span_id, + parent_span_id=parent_span, + attributes={"span.kind": kind, "span.name": name}, + ) + + try: + logger.info( + "span_begin name=%s kind=%s trace_id=%s span_id=%s parent_span_id=%s attrs=%s", + span.name, span.kind, span.trace_id, span.span_id, span.parent_span_id, span.attributes + ) + yield span + except BaseException as exc: + span.record_exception(exc) + span.set_status("ERROR", str(exc)) + raise + finally: + # Pop the span from the context (restore parent context) + duration_ms = (time.time_ns() - span.start_ns) / 1_000_000 + logger.info( + "span_end name=%s kind=%s trace_id=%s span_id=%s parent_span_id=%s duration_ms=%.3f attrs=%s", + span.name, span.kind, span.trace_id, span.span_id, span.parent_span_id, duration_ms, span.attributes + ) + # Restore previous context + _current_span_start_ns.reset(tok_start) + _current_parent_span_id.reset(tok_pid) + _current_span_id.reset(tok_sid) + _current_trace_id.reset(tok_tid) + + def set_incoming_context( + self, + trace_id: Optional[str] = None, + parent_span_id: Optional[str] = None, + ) -> None: + """Seed current context with incoming W3C traceparent values. + + Only non-empty values are applied. + """ + if trace_id: + _current_trace_id.set(trace_id) + if parent_span_id: + _current_parent_span_id.set(parent_span_id) + + def current_trace_ids(self) -> tuple[str, str, str]: + return _current_trace_id.get(), _current_span_id.get(), _current_parent_span_id.get() + + def inject_traceparent(self, headers: Dict[str, str]) -> None: + """Inject W3C traceparent into outbound headers.""" + trace_id, span_id, _ = self.current_trace_ids() + if not trace_id or not span_id: + # If called outside of a span, create a short-lived span just to carry IDs + trace_id = trace_id or _rand_16_hex() + span_id = span_id or _rand_8_hex() + headers["traceparent"] = format_traceparent(trace_id, span_id) + + def extract_traceparent(self, headers: Dict[str, str]) -> tuple[str, str]: + """Extract parent trace/span; returns (trace_id, parent_span_id).""" + raw_traceparent = (headers.get("traceparent") + or headers.get("Traceparent") + or headers.get("TRACEPARENT")) + if not raw_traceparent: + return "", "" + parsed = parse_traceparent(raw_traceparent) + if not parsed: + return "", "" + return parsed[0], parsed[1] + # No incoming context + return "", "" + +# Tracer singleton for the process (not thread) +_tracer = Tracer() + +def get_tracer() -> Tracer: + return _tracer + + +class TraceLogFilter(logging.Filter): + """Inject trace/span ids into LogRecord for formatting.""" + def filter(self, record: logging.LogRecord) -> bool: + record.traceID, record.spanID, record.parentSpanID = get_tracer().current_trace_ids() + return True From 9b98c5c20a2e7fdda094ab08f92534947d0220b7 Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Fri, 29 Aug 2025 23:04:24 +0000 Subject: [PATCH 08/18] Created a separate package for tracing-related stuff Added mirroring of spans into Sentry Tracer is a facade that redirects actions to tracing backends --- cmapi/CMakeLists.txt | 1 + cmapi/cmapi_server/__main__.py | 14 +- cmapi/cmapi_server/cmapi_logger.conf | 6 +- cmapi/cmapi_server/controllers/api_clients.py | 8 +- cmapi/cmapi_server/handlers/cluster.py | 25 ++- cmapi/cmapi_server/helpers.py | 4 +- cmapi/cmapi_server/logging_management.py | 16 +- cmapi/cmapi_server/node_manipulation.py | 11 +- cmapi/cmapi_server/sentry.py | 197 ---------------- cmapi/cmapi_server/trace_tool.py | 51 ----- cmapi/cmapi_server/tracer.py | 210 ------------------ cmapi/tracing/__init__.py | 49 ++++ cmapi/tracing/backend.py | 37 +++ cmapi/tracing/sentry.py | 65 ++++++ cmapi/tracing/sentry_backend.py | 114 ++++++++++ cmapi/tracing/span.py | 46 ++++ cmapi/tracing/trace_tool.py | 67 ++++++ .../traced_aiohttp.py | 11 +- .../traced_session.py | 16 +- cmapi/tracing/traceparent_backend.py | 38 ++++ cmapi/tracing/tracer.py | 155 +++++++++++++ cmapi/tracing/utils.py | 54 +++++ 22 files changed, 681 insertions(+), 514 deletions(-) delete mode 100644 cmapi/cmapi_server/sentry.py delete mode 100644 cmapi/cmapi_server/trace_tool.py delete mode 100644 cmapi/cmapi_server/tracer.py create mode 100644 cmapi/tracing/__init__.py create mode 100644 cmapi/tracing/backend.py create mode 100644 cmapi/tracing/sentry.py create mode 100644 cmapi/tracing/sentry_backend.py create mode 100644 cmapi/tracing/span.py create mode 100644 cmapi/tracing/trace_tool.py rename cmapi/{cmapi_server => tracing}/traced_aiohttp.py (84%) rename cmapi/{cmapi_server => tracing}/traced_session.py (69%) create mode 100644 cmapi/tracing/traceparent_backend.py create mode 100644 cmapi/tracing/tracer.py create mode 100644 cmapi/tracing/utils.py diff --git a/cmapi/CMakeLists.txt b/cmapi/CMakeLists.txt index 43f44816d..5130ec85f 100644 --- a/cmapi/CMakeLists.txt +++ b/cmapi/CMakeLists.txt @@ -72,6 +72,7 @@ install( cmapi_server engine_files mcs_cluster_tool + tracing DESTINATION ${CMAPI_DIR} USE_SOURCE_PERMISSIONS PATTERN "test" EXCLUDE diff --git a/cmapi/cmapi_server/__main__.py b/cmapi/cmapi_server/__main__.py index 5a8a2de58..e037f4f7f 100644 --- a/cmapi/cmapi_server/__main__.py +++ b/cmapi/cmapi_server/__main__.py @@ -16,9 +16,11 @@ from cherrypy.process import plugins # TODO: fix dispatcher choose logic because code executing in endpoints.py # while import process, this cause module logger misconfiguration from cmapi_server.logging_management import config_cmapi_server_logging -from cmapi_server.sentry import maybe_init_sentry, register_sentry_cherrypy_tool +from tracing.sentry import maybe_init_sentry +from tracing.traceparent_backend import TraceparentBackend +from tracing.tracer import get_tracer config_cmapi_server_logging() -from cmapi_server.trace_tool import register_tracing_tools +from tracing.trace_tool import register_tracing_tools from cmapi_server import helpers from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, CMAPI_CONF_PATH @@ -143,10 +145,8 @@ if __name__ == '__main__': helpers.cmapi_config_check() register_tracing_tools() - # Init Sentry if DSN is present - sentry_active = maybe_init_sentry() - if sentry_active: - register_sentry_cherrypy_tool() + get_tracer().register_backend(TraceparentBackend()) # Register default tracing backend + maybe_init_sentry() # Init Sentry if DSN is present CertificateManager.create_self_signed_certificate_if_not_exist() CertificateManager.renew_certificate() @@ -159,8 +159,6 @@ if __name__ == '__main__': 'tools.trace.on': True, 'tools.trace_end.on': True, } - if sentry_active: - root_config["tools.sentry.on"] = True app.config.update({ '/': root_config, diff --git a/cmapi/cmapi_server/cmapi_logger.conf b/cmapi/cmapi_server/cmapi_logger.conf index 2bc3d383d..301eed718 100644 --- a/cmapi/cmapi_server/cmapi_logger.conf +++ b/cmapi/cmapi_server/cmapi_logger.conf @@ -7,11 +7,11 @@ }, "formatters": { "cmapi_server": { - "format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(ip)s %(message)s", + "format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(ip)s %(message)s %(trace_params)s", "datefmt": "%d/%b/%Y %H:%M:%S" }, "default": { - "format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(message)s", + "format": "%(asctime)s [%(levelname)s] (%(name)s) {%(threadName)s} %(message)s %(trace_params)s", "datefmt": "%d/%b/%Y %H:%M:%S" }, "container_sh": { @@ -75,7 +75,7 @@ "level": "DEBUG", "propagate": false }, - "": { + "root": { "handlers": ["console", "file"], "level": "DEBUG" } diff --git a/cmapi/cmapi_server/controllers/api_clients.py b/cmapi/cmapi_server/controllers/api_clients.py index 74c8723a9..27063543e 100644 --- a/cmapi/cmapi_server/controllers/api_clients.py +++ b/cmapi/cmapi_server/controllers/api_clients.py @@ -3,14 +3,16 @@ from typing import Any, Dict, Optional, Union import pyotp import requests -from cmapi_server.traced_session import get_traced_session -from cmapi_server.controllers.dispatcher import _version from cmapi_server.constants import ( - CMAPI_CONF_PATH, CURRENT_NODE_CMAPI_URL, SECRET_KEY, + CMAPI_CONF_PATH, + CURRENT_NODE_CMAPI_URL, + SECRET_KEY, ) +from cmapi_server.controllers.dispatcher import _version from cmapi_server.exceptions import CMAPIBasicError from cmapi_server.helpers import get_config_parser, get_current_key +from tracing.traced_session import get_traced_session class ClusterControllerClient: diff --git a/cmapi/cmapi_server/handlers/cluster.py b/cmapi/cmapi_server/handlers/cluster.py index 0f0d08606..c78f5350b 100644 --- a/cmapi/cmapi_server/handlers/cluster.py +++ b/cmapi/cmapi_server/handlers/cluster.py @@ -4,21 +4,30 @@ from datetime import datetime from enum import Enum from typing import Optional -from cmapi_server.traced_session import get_traced_session +from mcs_node_control.models.misc import get_dbrm_master +from mcs_node_control.models.node_config import NodeConfig from cmapi_server.constants import ( - CMAPI_CONF_PATH, DEFAULT_MCS_CONF_PATH, + CMAPI_CONF_PATH, + DEFAULT_MCS_CONF_PATH, ) from cmapi_server.exceptions import CMAPIBasicError from cmapi_server.helpers import ( - broadcast_new_config, get_active_nodes, get_dbroots, get_config_parser, - get_current_key, get_version, update_revision_and_manager, + broadcast_new_config, + get_active_nodes, + get_config_parser, + get_current_key, + get_dbroots, + get_version, + update_revision_and_manager, ) from cmapi_server.node_manipulation import ( - add_node, add_dbroot, remove_node, switch_node_maintenance, + add_dbroot, + add_node, + remove_node, + switch_node_maintenance, ) -from mcs_node_control.models.misc import get_dbrm_master -from mcs_node_control.models.node_config import NodeConfig +from tracing.traced_session import get_traced_session class ClusterAction(Enum): @@ -50,7 +59,7 @@ def toggle_cluster_state( broadcast_new_config(config, distribute_secrets=True) -class ClusterHandler(): +class ClusterHandler: """Class for handling MCS Cluster operations.""" @staticmethod diff --git a/cmapi/cmapi_server/helpers.py b/cmapi/cmapi_server/helpers.py index be38db2f5..cb9baf1df 100644 --- a/cmapi/cmapi_server/helpers.py +++ b/cmapi/cmapi_server/helpers.py @@ -19,8 +19,8 @@ from urllib.parse import urlencode, urlunparse import aiohttp import lxml.objectify import requests -from cmapi_server.traced_session import get_traced_session -from cmapi_server.traced_aiohttp import create_traced_async_session +from tracing.traced_session import get_traced_session +from tracing.traced_aiohttp import create_traced_async_session from cmapi_server.exceptions import CMAPIBasicError # Bug in pylint https://github.com/PyCQA/pylint/issues/4584 diff --git a/cmapi/cmapi_server/logging_management.py b/cmapi/cmapi_server/logging_management.py index 837b4267f..9949db431 100644 --- a/cmapi/cmapi_server/logging_management.py +++ b/cmapi/cmapi_server/logging_management.py @@ -7,7 +7,7 @@ import cherrypy from cherrypy import _cperror from cmapi_server.constants import CMAPI_LOG_CONF_PATH -from cmapi_server.tracer import get_tracer +from tracing.tracer import get_tracer class AddIpFilter(logging.Filter): @@ -27,13 +27,13 @@ def install_trace_record_factory() -> None: def factory(*args, **kwargs): # type: ignore[no-untyped-def] record = current_factory(*args, **kwargs) - try: - trace_id, span_id, parent_span_id = get_tracer().current_trace_ids() - record.trace_params = ( - f" rid={trace_id} sid={span_id} psid={parent_span_id}" - ) - except Exception: - record.trace_params = " rid= sid= psid=" + trace_id, span_id, parent_span_id = get_tracer().current_trace_ids() + if trace_id and span_id: + record.trace_params = f'rid={trace_id} sid={span_id}' + if parent_span_id: + record.trace_params += f' psid={parent_span_id}' + else: + record.trace_params = "" return record logging.setLogRecordFactory(factory) diff --git a/cmapi/cmapi_server/node_manipulation.py b/cmapi/cmapi_server/node_manipulation.py index ff0d5259c..4d2d9e5dd 100644 --- a/cmapi/cmapi_server/node_manipulation.py +++ b/cmapi/cmapi_server/node_manipulation.py @@ -14,17 +14,18 @@ from typing import Optional import requests from lxml import etree +from mcs_node_control.models.node_config import NodeConfig from cmapi_server import helpers from cmapi_server.constants import ( - CMAPI_CONF_PATH, CMAPI_SINGLE_NODE_XML, DEFAULT_MCS_CONF_PATH, LOCALHOSTS, + CMAPI_CONF_PATH, + CMAPI_SINGLE_NODE_XML, + DEFAULT_MCS_CONF_PATH, + LOCALHOSTS, MCS_DATA_PATH, ) -from cmapi_server.traced_session import get_traced_session from cmapi_server.managers.network import NetworkManager -from cmapi_server.tracer import get_tracer -from mcs_node_control.models.node_config import NodeConfig - +from tracing.traced_session import get_traced_session PMS_NODE_PORT = '8620' EXEMGR_NODE_PORT = '8601' diff --git a/cmapi/cmapi_server/sentry.py b/cmapi/cmapi_server/sentry.py deleted file mode 100644 index 7777ee8fc..000000000 --- a/cmapi/cmapi_server/sentry.py +++ /dev/null @@ -1,197 +0,0 @@ -import logging -import socket - -import cherrypy -import sentry_sdk -from sentry_sdk.integrations.aiohttp import AioHttpIntegration -from sentry_sdk.integrations.logging import LoggingIntegration - -from cmapi_server import helpers -from cmapi_server.constants import CMAPI_CONF_PATH - -SENTRY_ACTIVE = False - -logger = logging.getLogger(__name__) - -def maybe_init_sentry() -> bool: - """Initialize Sentry from CMAPI configuration. - - Reads config and initializes Sentry only if dsn parameter is present in corresponding section. - The initialization enables the following integrations: - - LoggingIntegration: capture warning-level logs as Sentry events and use - lower-level logs as breadcrumbs. - - AioHttpIntegration: propagate trace headers for outbound requests made - with `aiohttp`. - - The function is a no-op if the DSN is missing. - - Returns: True if Sentry is initialized, False otherwise. - """ - global SENTRY_ACTIVE - try: - cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH) - dsn = helpers.dequote( - cfg_parser.get('Sentry', 'dsn', fallback='').strip() - ) - if not dsn: - return False - - environment = helpers.dequote( - cfg_parser.get('Sentry', 'environment', fallback='development').strip() - ) - traces_sample_rate_str = helpers.dequote( - cfg_parser.get('Sentry', 'traces_sample_rate', fallback='1.0').strip() - ) - except Exception: - logger.exception('Failed to initialize Sentry.') - return False - - try: - sentry_logging = LoggingIntegration( - level=logging.INFO, - event_level=logging.WARNING, - ) - - try: - traces_sample_rate = float(traces_sample_rate_str) - except ValueError: - logger.error('Invalid traces_sample_rate: %s', traces_sample_rate_str) - traces_sample_rate = 1.0 - - sentry_sdk.init( - dsn=dsn, - environment=environment, - traces_sample_rate=traces_sample_rate, - integrations=[sentry_logging, AioHttpIntegration()], - ) - SENTRY_ACTIVE = True - logger.info('Sentry initialized for CMAPI via config.') - except Exception: - logger.exception('Failed to initialize Sentry.') - return False - - logger.info('Sentry successfully initialized.') - return True - -def _sentry_on_start_resource(): - """Start or continue a Sentry transaction for the current CherryPy request. - - - Continues an incoming distributed trace using Sentry trace headers if - present; otherwise starts a new transaction with `op='http.server'`. - - Pushes the transaction into the current Sentry scope and attaches useful - request metadata as tags and context (HTTP method, path, client IP, - hostname, request ID, and a filtered subset of headers). - - Stores the transaction on the CherryPy request object for later finishing - in `_sentry_on_end_request`. - """ - if not SENTRY_ACTIVE: - return - try: - request = cherrypy.request - headers = dict(getattr(request, 'headers', {}) or {}) - name = f"{request.method} {request.path_info}" - transaction = sentry_sdk.start_transaction( - op='http.server', name=name, continue_from_headers=headers - ) - sentry_sdk.Hub.current.scope.set_span(transaction) - - # Add request-level context/tags - scope = sentry_sdk.Hub.current.scope - scope.set_tag('http.method', request.method) - scope.set_tag('http.path', request.path_info) - scope.set_tag('client.ip', getattr(request.remote, 'ip', '')) - scope.set_tag('instance.hostname', socket.gethostname()) - request_id = getattr(request, 'unique_id', None) - if request_id: - scope.set_tag('request.id', request_id) - # Optionally add headers as context without sensitive values - safe_headers = {k: v for k, v in headers.items() - if k.lower() not in {'authorization', 'x-api-key'}} - scope.set_context('headers', safe_headers) - - request.sentry_transaction = transaction - except Exception: - logger.exception('Failed to start Sentry transaction.') - - -def _sentry_before_error_response(): - """Capture the current exception (if any) to Sentry before error response. - - This hook runs when CherryPy prepares an error response. If an exception is - available in the current context, it will be sent to Sentry. - """ - if not SENTRY_ACTIVE: - return - try: - sentry_sdk.capture_exception() - except Exception: - logger.exception('Failed to capture exception to Sentry.') - - -def _sentry_on_end_request(): - """Finish the Sentry transaction for the current CherryPy request. - - Attempts to set the HTTP status code on the active transaction and then - finishes it. If no transaction was started on this request, the function is - a no-op. - """ - if not SENTRY_ACTIVE: - return - try: - request = cherrypy.request - transaction = getattr(request, 'sentry_transaction', None) - if transaction is None: - return - status = cherrypy.response.status - try: - status_code = int(str(status).split()[0]) - except Exception: - status_code = None - try: - if status_code is not None and hasattr(transaction, 'set_http_status'): - transaction.set_http_status(status_code) - except Exception: - logger.exception('Failed to set HTTP status code on Sentry transaction.') - transaction.finish() - except Exception: - logger.exception('Failed to finish Sentry transaction.') - - -class SentryTool(cherrypy.Tool): - """CherryPy Tool that wires Sentry request lifecycle hooks. - - The tool attaches handlers for `on_start_resource`, `before_error_response`, - and `on_end_request` in order to manage Sentry transactions and error - capture across the request lifecycle. - """ - def __init__(self): - cherrypy.Tool.__init__(self, 'on_start_resource', self._tool_callback, priority=50) - - @staticmethod - def _tool_callback(): - """Attach Sentry lifecycle callbacks to the current CherryPy request.""" - cherrypy.request.hooks.attach( - 'on_start_resource', _sentry_on_start_resource, priority=50 - ) - cherrypy.request.hooks.attach( - 'before_error_response', _sentry_before_error_response, priority=60 - ) - cherrypy.request.hooks.attach( - 'on_end_request', _sentry_on_end_request, priority=70 - ) - - -def register_sentry_cherrypy_tool() -> None: - """Register the Sentry CherryPy tool under `tools.sentry`. - - This function is safe to call multiple times; failures are silently ignored - to avoid impacting the application startup. - """ - if not SENTRY_ACTIVE: - return - - try: - cherrypy.tools.sentry = SentryTool() - except Exception: - logger.exception('Failed to register Sentry CherryPy tool.') - diff --git a/cmapi/cmapi_server/trace_tool.py b/cmapi/cmapi_server/trace_tool.py deleted file mode 100644 index 7164373d6..000000000 --- a/cmapi/cmapi_server/trace_tool.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -CherryPy tool that uses the tracer to start a span for each request. - -If traceparent header is present in the request, the tool will continue this trace chain. -Otherwise, it will start a new trace (with a new trace_id). -""" -from typing import Dict - -import cherrypy - -from cmapi_server.tracer import get_tracer - - -def _on_request_start() -> None: - """CherryPy tool hook: extract incoming context and start a SERVER span.""" - req = cherrypy.request - tracer = get_tracer() - - headers: Dict[str, str] = dict(req.headers or {}) - trace_id, parent_span_id = tracer.extract_traceparent(headers) - tracer.set_incoming_context(trace_id, parent_span_id) - - span_name = f"{getattr(req, 'method', 'HTTP')} {getattr(req, 'path_info', '/')}" - - ctx = tracer.start_as_current_span(span_name, kind="SERVER") - span = ctx.__enter__() - setattr(req, "_trace_span_ctx", ctx) - setattr(req, "_trace_span", span) - - # Echo current traceparent to the client - tracer.inject_traceparent(cherrypy.response.headers) # type: ignore[arg-type] - - -def _on_request_end() -> None: - """CherryPy tool hook: end the SERVER span started at request start.""" - req = cherrypy.request - ctx = getattr(req, "_trace_span_ctx", None) - if ctx is not None: - try: - ctx.__exit__(None, None, None) - finally: - setattr(req, "_trace_span_ctx", None) - setattr(req, "_trace_span", None) - - -def register_tracing_tools() -> None: - """Register CherryPy tools for request tracing.""" - cherrypy.tools.trace = cherrypy.Tool("on_start_resource", _on_request_start, priority=10) - cherrypy.tools.trace_end = cherrypy.Tool("on_end_resource", _on_request_end, priority=80) - - diff --git a/cmapi/cmapi_server/tracer.py b/cmapi/cmapi_server/tracer.py deleted file mode 100644 index 1b85b2e4d..000000000 --- a/cmapi/cmapi_server/tracer.py +++ /dev/null @@ -1,210 +0,0 @@ -"""Support distributed request tracing via W3C Trace Context. -See https://www.w3.org/TR/trace-context/ for the official spec. - -There are 3 and a half main components: -- trace_id: a unique identifier for a trace. - It is a 32-hex string, passed in the outbound HTTP requests headers, so that we can - trace the request chain through the system. -- span_id: a unique identifier for a span (the current operation within a trace chain). - It is a 16-hex string. For example, when we we receive a request to add a host, the addition - of the host is a separate span within the request chain. -- parent_span_id: a unique identifier for the parent span of the current span. - Continuing the example above, when we add a host, first we start a transaction, - then we add the host. If we are already adding a host, then creation of the transaction - is the parent span of the current span. -- traceparent: a header that combines trace_id and span_id in one value. - It has the format "00---". - -A system that calls CMAPI can pass the traceparent header in the request, so that we can pass - the trace_id through the system, changing span_id as we enter new sub-operations. - -We can reconstruct the trace tree from the set of the logged traceparent attributes, - representing how the request was processed, which nodes were involved, - how much time did each op take, etc. - -This module implements a tracer class that creates spans, injects/extracts traceparent headers. -It uses contextvars to store the trace/span/parent_span ids and start time for each context. -""" - -from __future__ import annotations - -import contextvars -import logging -import os -import time -from collections.abc import Iterator -from contextlib import contextmanager -from dataclasses import dataclass -from typing import Any, Dict, Optional - -logger = logging.getLogger(__name__) - -# Contextvars containing trace/span/parent_span ids and start time for this thread -# (contextvars are something like TLS, they contain variables that are local to the context) -_current_trace_id = contextvars.ContextVar[str]("trace_id", default="") -_current_span_id = contextvars.ContextVar[str]("span_id", default="") -_current_parent_span_id = contextvars.ContextVar[str]("parent_span_id", default="") -_current_span_start_ns = contextvars.ContextVar[int]("span_start_ns", default=0) - - -def _rand_16_hex() -> str: - # 16 hex bytes (32 hex chars) - return os.urandom(16).hex() - -def _rand_8_hex() -> str: - # 8 hex bytes (16 hex chars) - return os.urandom(8).hex() - -def format_traceparent(trace_id: str, span_id: str, flags: str = "01") -> str: - """W3C traceparent: version 00""" - # version(2)-trace_id(32)-span_id(16)-flags(2) - return f"00-{trace_id}-{span_id}-{flags}" - -def parse_traceparent(header: str) -> Optional[tuple[str, str, str]]: - """Return (trace_id, span_id, flags) or None if invalid.""" - try: - parts = header.strip().split("-") - if len(parts) != 4 or parts[0] != "00": - logger.error(f"Invalid traceparent: {header}") - return None - trace_id, span_id, flags = parts[1], parts[2], parts[3] - if len(trace_id) != 32 or len(span_id) != 16 or len(flags) != 2: - return None - # W3C: all zero trace_id/span_id are invalid - if set(trace_id) == {"0"} or set(span_id) == {"0"}: - return None - return trace_id, span_id, flags - except Exception: - logger.error(f"Failed to parse traceparent: {header}") - return None - - -@dataclass -class TraceSpan: - """Lightweight span handle; keeps attributes in memory (for logging only).""" - name: str - kind: str # "SERVER" | "CLIENT" | "INTERNAL" - start_ns: int - trace_id: str - span_id: str - parent_span_id: str - attributes: Dict[str, Any] - - def set_attribute(self, key: str, value: Any) -> None: - self.attributes[key] = value - - def add_event(self, name: str, **attrs: Any) -> None: - # For simplicity we just log events immediately - logger.info( - "event name=%s trace_id=%s span_id=%s attrs=%s", - name, self.trace_id, self.span_id, attrs - ) - - def set_status(self, code: str, description: str = "") -> None: - self.attributes["status.code"] = code - if description: - self.attributes["status.description"] = description - - def record_exception(self, exc: BaseException) -> None: - self.add_event("exception", type=type(exc).__name__, msg=str(exc)) - - -class Tracer: - """Encapsulates everything related to tracing (span creation, logging, etc)""" - @contextmanager - def start_as_current_span(self, name: str, kind: str = "INTERNAL") -> Iterator[TraceSpan]: - trace_id = _current_trace_id.get() or _rand_16_hex() - parent_span = _current_span_id.get() - new_span_id = _rand_8_hex() - - # Push new context - tok_tid = _current_trace_id.set(trace_id) - tok_sid = _current_span_id.set(new_span_id) - tok_pid = _current_parent_span_id.set(parent_span) - tok_start = _current_span_start_ns.set(time.time_ns()) - - span = TraceSpan( - name=name, - kind=kind, - start_ns=_current_span_start_ns.get(), - trace_id=trace_id, - span_id=new_span_id, - parent_span_id=parent_span, - attributes={"span.kind": kind, "span.name": name}, - ) - - try: - logger.info( - "span_begin name=%s kind=%s trace_id=%s span_id=%s parent_span_id=%s attrs=%s", - span.name, span.kind, span.trace_id, span.span_id, span.parent_span_id, span.attributes - ) - yield span - except BaseException as exc: - span.record_exception(exc) - span.set_status("ERROR", str(exc)) - raise - finally: - # Pop the span from the context (restore parent context) - duration_ms = (time.time_ns() - span.start_ns) / 1_000_000 - logger.info( - "span_end name=%s kind=%s trace_id=%s span_id=%s parent_span_id=%s duration_ms=%.3f attrs=%s", - span.name, span.kind, span.trace_id, span.span_id, span.parent_span_id, duration_ms, span.attributes - ) - # Restore previous context - _current_span_start_ns.reset(tok_start) - _current_parent_span_id.reset(tok_pid) - _current_span_id.reset(tok_sid) - _current_trace_id.reset(tok_tid) - - def set_incoming_context( - self, - trace_id: Optional[str] = None, - parent_span_id: Optional[str] = None, - ) -> None: - """Seed current context with incoming W3C traceparent values. - - Only non-empty values are applied. - """ - if trace_id: - _current_trace_id.set(trace_id) - if parent_span_id: - _current_parent_span_id.set(parent_span_id) - - def current_trace_ids(self) -> tuple[str, str, str]: - return _current_trace_id.get(), _current_span_id.get(), _current_parent_span_id.get() - - def inject_traceparent(self, headers: Dict[str, str]) -> None: - """Inject W3C traceparent into outbound headers.""" - trace_id, span_id, _ = self.current_trace_ids() - if not trace_id or not span_id: - # If called outside of a span, create a short-lived span just to carry IDs - trace_id = trace_id or _rand_16_hex() - span_id = span_id or _rand_8_hex() - headers["traceparent"] = format_traceparent(trace_id, span_id) - - def extract_traceparent(self, headers: Dict[str, str]) -> tuple[str, str]: - """Extract parent trace/span; returns (trace_id, parent_span_id).""" - raw_traceparent = (headers.get("traceparent") - or headers.get("Traceparent") - or headers.get("TRACEPARENT")) - if not raw_traceparent: - return "", "" - parsed = parse_traceparent(raw_traceparent) - if not parsed: - return "", "" - return parsed[0], parsed[1] - # No incoming context - return "", "" - -# Tracer singleton for the process (not thread) -_tracer = Tracer() - -def get_tracer() -> Tracer: - return _tracer - - -class TraceLogFilter(logging.Filter): - """Inject trace/span ids into LogRecord for formatting.""" - def filter(self, record: logging.LogRecord) -> bool: - record.traceID, record.spanID, record.parentSpanID = get_tracer().current_trace_ids() - return True diff --git a/cmapi/tracing/__init__.py b/cmapi/tracing/__init__.py new file mode 100644 index 000000000..4c9ad4334 --- /dev/null +++ b/cmapi/tracing/__init__.py @@ -0,0 +1,49 @@ +"""Tracing support for CMAPI + +Despite having many files, the idea of this package is simple: MCS is a distributed system, + and we need to be able to trace requests across the system. +We need to understand: +* how one incoming request caused many others +* how long each request took +* which request each log line corresponds to +etc + +The basic high-level mechanism is this: +1. Each incoming request is assigned a trace ID (or it may already have one, see point 2). +2. This trace ID is propagated to all other outbound requests that are caused by this request. +3. Each sub-operation is assigned a span ID. Request ID stays the same, but the span ID changes. +4. Each span can have a parent span ID, which is the span ID of the request that caused this span. +5. So basically, we have a tree of spans, and the trace ID identifies the root of the tree. + +TraceID/SpanID/ParentSpanID are added to each log line, so we can identify which request each log line corresponds to. + +Trace attributes are passed through the system via request headers, and here it becomes a bit more complicated. +There are two technologies that we use to pass these ids: +1. W3C TraceContext. This is a standard, it has a fixed header and its format. + The header is called `traceparent`. It encapsulates trace id and span id. +2. Sentry. For historical reasons, it has different headers. And in our setup it is optional. + But Sentry is very useful, we also use it to monitor the errors, and it has a powerful UI, so we support it too. + +How is it implemented? +1. We have a global tracer object, that is used to create spans and pass them through the system. +2. It is a facade that hides two tracing backends with the same interface: TraceparentBackend and SentryBackend. +3. We have CherryPy tool that processes incoming requests, extracts traceparent header (or generates its parts), + creates a span for each request, injects traceparent header into the response. +4. For each outcoming request, we ask tracer to create a new span and to inject tracing headers into the request. + To avoid boilerplate, there is a TracedSession, an extension to requests that does all that. + For async requests, there is a TracedAsyncSession, that does the same. +5. When the request is finished, we ask tracer to finish/pop the current span. + +Logging: +There is a trace record factory, that adds a new field trace_params to each log record. +trace_params is a string representation of trace id, span id and parent span id. +If in current context they are empty (like in MainThread that doesn't process requests), trace_params is an empty string. + +Sentry reporting: +If Sentry is enabled, we send info about errors and exceptions into it. We also send logs that preceded the problem + as breadcrumbs to understand context of the error. +As we keep Sentry updated about the current trace and span, when an error happens, info about the trace will be sent to Sentry. +So we will know which chain of requests caused the error. +""" + + diff --git a/cmapi/tracing/backend.py b/cmapi/tracing/backend.py new file mode 100644 index 000000000..2d37dca43 --- /dev/null +++ b/cmapi/tracing/backend.py @@ -0,0 +1,37 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional + +from tracing.span import TraceSpan + + +class TracerBackend(ABC): + @abstractmethod + def on_span_start(self, span: TraceSpan) -> None: + raise NotImplementedError + + @abstractmethod + def on_span_end(self, span: TraceSpan, exc: Optional[BaseException]) -> None: + raise NotImplementedError + + def on_span_event(self, span: TraceSpan, name: str, attrs: Dict[str, Any]) -> None: + return + + def on_span_status(self, span: TraceSpan, code: str, description: str) -> None: + return + + def on_span_exception(self, span: TraceSpan, exc: BaseException) -> None: + return + + def on_span_attribute(self, span: TraceSpan, key: str, value: Any) -> None: + return + + def on_inject_headers(self, headers: Dict[str, str]) -> None: + return + + def on_incoming_request(self, headers: Dict[str, str], method: str, path: str) -> None: + return + + def on_request_finished(self, status_code: Optional[int]) -> None: + return + + diff --git a/cmapi/tracing/sentry.py b/cmapi/tracing/sentry.py new file mode 100644 index 000000000..a28d455ee --- /dev/null +++ b/cmapi/tracing/sentry.py @@ -0,0 +1,65 @@ +import logging + +import sentry_sdk +from sentry_sdk.integrations.logging import LoggingIntegration + +from cmapi_server import helpers +from cmapi_server.constants import CMAPI_CONF_PATH +from tracing.sentry_backend import SentryBackend +from tracing.tracer import get_tracer + +SENTRY_ACTIVE = False + +logger = logging.getLogger(__name__) + +def maybe_init_sentry() -> bool: + """Initialize Sentry from CMAPI configuration. + + Reads config and initializes Sentry only if dsn parameter is present + in corresponding section of cmapi config. + The initialization enables LoggingIntegration: it captures error-level logs as Sentry events + and uses lower-level logs as breadcrumbs. + + Returns: True if Sentry is initialized, False otherwise. + """ + global SENTRY_ACTIVE + try: + cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH) + dsn = helpers.dequote( + cfg_parser.get('Sentry', 'dsn', fallback='').strip() + ) + if not dsn: + return False + + environment = helpers.dequote( + cfg_parser.get('Sentry', 'environment', fallback='development').strip() + ) + except Exception: + logger.exception('Failed to initialize Sentry.') + return False + + try: + sentry_logging = LoggingIntegration( + level=logging.INFO, + event_level=logging.ERROR, + ) + + sentry_sdk.init( + dsn=dsn, + environment=environment, + sample_rate=1.0, + traces_sample_rate=1.0, + integrations=[sentry_logging], + debug=True, + ) + SENTRY_ACTIVE = True + # Register backend to mirror our internal spans into Sentry + get_tracer().register_backend(SentryBackend()) + logger.info('Sentry initialized for CMAPI via config.') + except Exception: + logger.exception('Failed to initialize Sentry.') + return False + + logger.info('Sentry successfully initialized.') + return True + diff --git a/cmapi/tracing/sentry_backend.py b/cmapi/tracing/sentry_backend.py new file mode 100644 index 000000000..cb343fe5f --- /dev/null +++ b/cmapi/tracing/sentry_backend.py @@ -0,0 +1,114 @@ +import logging +import contextvars +from typing import Any, Dict, Optional + +import sentry_sdk +from sentry_sdk.tracing import Transaction + +from tracing.tracer import TraceSpan, TracerBackend +from tracing.utils import swallow_exceptions + + +logger = logging.getLogger(__name__) + + +class SentryBackend(TracerBackend): + """Mirror spans and events from our Tracer into Sentry SDK.""" + + def __init__(self) -> None: + self._active_spans: Dict[str, Any] = {} + self._current_transaction = contextvars.ContextVar[Optional[Transaction]]("sentry_transaction", default=None) + + @swallow_exceptions + def on_span_start(self, span: TraceSpan) -> None: + kind_to_op = { + 'SERVER': 'http.server', + 'CLIENT': 'http.client', + 'INTERNAL': 'internal', + } + op = kind_to_op.get(span.kind.upper(), 'internal') + sdk_span = sentry_sdk.start_span(op=op, description=span.name) + sdk_span.set_tag('w3c.trace_id', span.trace_id) + sdk_span.set_tag('w3c.span_id', span.span_id) + if span.parent_span_id: + sdk_span.set_tag('w3c.parent_span_id', span.parent_span_id) + if span.attributes: + sdk_span.set_data('cmapi.span_attributes', dict(span.attributes)) + sdk_span.__enter__() + self._active_spans[span.span_id] = sdk_span + + @swallow_exceptions + def on_span_end(self, span: TraceSpan, exc: Optional[BaseException]) -> None: + sdk_span = self._active_spans.pop(span.span_id, None) + if sdk_span is None: + return + if exc is not None: + sdk_span.set_status('internal_error') + sdk_span.__exit__( + type(exc) if exc else None, + exc, + exc.__traceback__ if exc else None + ) + + @swallow_exceptions + def on_span_event(self, span: TraceSpan, name: str, attrs: Dict[str, Any]) -> None: + sentry_sdk.add_breadcrumb(category='event', message=name, data=dict(attrs)) + + @swallow_exceptions + def on_span_status(self, span: TraceSpan, code: str, description: str) -> None: + sdk_span = self._active_spans.get(span.span_id) + if sdk_span is not None: + sdk_span.set_status(code) + + @swallow_exceptions + def on_span_exception(self, span: TraceSpan, exc: BaseException) -> None: + sentry_sdk.capture_exception(exc) + + @swallow_exceptions + def on_span_attribute(self, span: TraceSpan, key: str, value: Any) -> None: + sdk_span = self._active_spans.get(span.span_id) + if sdk_span is not None: + sdk_span.set_data(f'attr.{key}', value) + + @swallow_exceptions + def on_inject_headers(self, headers: Dict[str, str]) -> None: + traceparent = sentry_sdk.get_traceparent() + baggage = sentry_sdk.get_baggage() + if traceparent: + headers['sentry-trace'] = traceparent + if baggage: + headers['baggage'] = baggage + + @swallow_exceptions + def on_incoming_request(self, headers: Dict[str, str], method: str, path: str) -> None: + name = f"{method} {path}" if method or path else "http.server" + transaction = sentry_sdk.continue_trace(headers or {}, op='http.server', name=name) + # Store transaction in context var to ensure we can finish it later + self._current_transaction.set(transaction) + transaction.__enter__() + scope = sentry_sdk.Hub.current.scope + if method: + scope.set_tag('http.method', method) + if path: + scope.set_tag('http.path', path) + # TODO: remove this + logger.info( + "Sentry transaction started name=%s sampled=%s", + name, getattr(transaction, 'sampled', None) + ) + + @swallow_exceptions + def on_request_finished(self, status_code: Optional[int]) -> None: + transaction = self._current_transaction.get() + if transaction is None: + return + if status_code is not None: + transaction.set_http_status(status_code) + # Exit to restore parent and finish the transaction + transaction.__exit__(None, None, None) + # Clear transaction in this context + self._current_transaction.set(None) + # TODO: remove this + logger.info("Sentry transaction finished status=%s", status_code) + + diff --git a/cmapi/tracing/span.py b/cmapi/tracing/span.py new file mode 100644 index 000000000..cc1ac8996 --- /dev/null +++ b/cmapi/tracing/span.py @@ -0,0 +1,46 @@ +from typing import TYPE_CHECKING +from dataclasses import dataclass +from typing import Any, Dict +from tracing.utils import swallow_exceptions + +if TYPE_CHECKING: + from tracing.tracer import Tracer + +@dataclass +class TraceSpan: + """Span handle bound to a tracer. + + Provides helpers to add attributes/events/status/exception that + will never propagate exceptions. + """ + + name: str + kind: str # "SERVER" | "CLIENT" | "INTERNAL" + start_ns: int + trace_id: str + span_id: str + parent_span_id: str + attributes: Dict[str, Any] + tracer: "Tracer" + + @swallow_exceptions + def set_attribute(self, key: str, value: Any) -> None: + self.attributes[key] = value + self.tracer._notify_attribute(self, key, value) + + @swallow_exceptions + def add_event(self, name: str, **attrs: Any) -> None: + self.tracer._notify_event(self, name, attrs) + + @swallow_exceptions + def set_status(self, code: str, description: str = "") -> None: + self.attributes["status.code"] = code + if description: + self.attributes["status.description"] = description + self.tracer._notify_status(self, code, description) + + @swallow_exceptions + def record_exception(self, exc: BaseException) -> None: + self.tracer._notify_exception(self, exc) + + diff --git a/cmapi/tracing/trace_tool.py b/cmapi/tracing/trace_tool.py new file mode 100644 index 000000000..5d0c278db --- /dev/null +++ b/cmapi/tracing/trace_tool.py @@ -0,0 +1,67 @@ +""" +CherryPy tool that uses the tracer to start a span for each request. +""" +import socket +from typing import Dict + +import cherrypy + +from tracing.tracer import get_tracer + + +def _on_request_start() -> None: + req = cherrypy.request + tracer = get_tracer() + + headers: Dict[str, str] = dict(req.headers or {}) + tracer.notify_incoming_request( + headers=headers, + method=getattr(req, 'method', ''), + path=getattr(req, 'path_info', '') + ) + trace_id, parent_span_id = tracer.extract_traceparent(headers) + tracer.set_incoming_context(trace_id, parent_span_id) + + span_name = f"{getattr(req, 'method', 'HTTP')} {getattr(req, 'path_info', '/')}" + + ctx = tracer.start_as_current_span(span_name, kind="SERVER") + span = ctx.__enter__() + span.set_attribute('http.method', getattr(req, 'method', '')) + span.set_attribute('http.path', getattr(req, 'path_info', '')) + span.set_attribute('client.ip', getattr(getattr(req, 'remote', None), 'ip', '')) + span.set_attribute('instance.hostname', socket.gethostname()) + safe_headers = {k: v for k, v in headers.items() if k.lower() not in {'authorization', 'x-api-key'}} + span.set_attribute('sentry.incoming_headers', safe_headers) + req._trace_span_ctx = ctx + req._trace_span = span + + tracer.inject_traceparent(cherrypy.response.headers) # type: ignore[arg-type] + + +def _on_request_end() -> None: + req = cherrypy.request + try: + status_str = str(cherrypy.response.status) + status_code = int(status_str.split()[0]) + except Exception: + status_code = None + tracer = get_tracer() + tracer.notify_request_finished(status_code) + span = getattr(req, "_trace_span", None) + if span is not None and status_code is not None: + span.set_attribute('http.status_code', status_code) + ctx = getattr(req, "_trace_span_ctx", None) + if ctx is not None: + try: + ctx.__exit__(None, None, None) + finally: + req._trace_span_ctx = None + req._trace_span = None + + +def register_tracing_tools() -> None: + cherrypy.tools.trace = cherrypy.Tool("on_start_resource", _on_request_start, priority=10) + cherrypy.tools.trace_end = cherrypy.Tool("on_end_resource", _on_request_end, priority=80) + + + diff --git a/cmapi/cmapi_server/traced_aiohttp.py b/cmapi/tracing/traced_aiohttp.py similarity index 84% rename from cmapi/cmapi_server/traced_aiohttp.py rename to cmapi/tracing/traced_aiohttp.py index 4643e217d..d9468e0b1 100644 --- a/cmapi/cmapi_server/traced_aiohttp.py +++ b/cmapi/tracing/traced_aiohttp.py @@ -1,10 +1,9 @@ -"""Async sibling of TracedSession""" - +"""Async sibling of TracedSession.""" from typing import Any import aiohttp -from cmapi_server.tracer import get_tracer +from tracing.tracer import get_tracer class TracedAsyncSession(aiohttp.ClientSession): @@ -23,10 +22,7 @@ class TracedAsyncSession(aiohttp.ClientSession): with tracer.start_as_current_span(span_name, kind="CLIENT") as span: span.set_attribute("http.method", method) span.set_attribute("http.url", url_text) - try: - tracer.inject_traceparent(headers) - except Exception: - pass + tracer.inject_outbound_headers(headers) try: response = await super()._request(method, str_or_url, *args, **kwargs) except Exception as exc: @@ -41,3 +37,4 @@ def create_traced_async_session(**kwargs: Any) -> TracedAsyncSession: return TracedAsyncSession(**kwargs) + diff --git a/cmapi/cmapi_server/traced_session.py b/cmapi/tracing/traced_session.py similarity index 69% rename from cmapi/cmapi_server/traced_session.py rename to cmapi/tracing/traced_session.py index 120e2379e..823023a26 100644 --- a/cmapi/cmapi_server/traced_session.py +++ b/cmapi/tracing/traced_session.py @@ -1,19 +1,12 @@ -"""Our own customized requests.Session that automatically traces outbound HTTP calls. - -Creates a CLIENT span per outbound HTTP request, injects traceparent, -records method/url/status, and closes the span when the request finishes. -""" - +"""Customized requests.Session that automatically traces outbound HTTP calls.""" from typing import Any, Optional import requests -from cmapi_server.tracer import get_tracer +from tracing.tracer import get_tracer class TracedSession(requests.Session): - """requests.Session that automatically traces outbound HTTP calls.""" - def request(self, method: str, url: str, *args: Any, **kwargs: Any) -> requests.Response: tracer = get_tracer() @@ -27,14 +20,13 @@ class TracedSession(requests.Session): span.set_attribute("http.method", method) span.set_attribute("http.url", url) - tracer.inject_traceparent(headers) + tracer.inject_outbound_headers(headers) try: response = super().request(method, url, *args, **kwargs) except Exception as exc: span.set_status("ERROR", str(exc)) raise else: - # Record status code span.set_attribute("http.status_code", response.status_code) return response @@ -43,10 +35,10 @@ _default_session: Optional[TracedSession] = None def get_traced_session() -> TracedSession: - """Return a process-wide TracedSession singleton.""" global _default_session if _default_session is None: _default_session = TracedSession() return _default_session + diff --git a/cmapi/tracing/traceparent_backend.py b/cmapi/tracing/traceparent_backend.py new file mode 100644 index 000000000..89bda3ac4 --- /dev/null +++ b/cmapi/tracing/traceparent_backend.py @@ -0,0 +1,38 @@ +import logging +import time +from typing import Any, Dict, Optional + +from tracing.tracer import TracerBackend, TraceSpan +from tracing.utils import swallow_exceptions + +logger = logging.getLogger(__name__) + + +class TraceparentBackend(TracerBackend): + """Default backend that logs span lifecycle and mirrors events/status.""" + + @swallow_exceptions + def on_span_start(self, span: TraceSpan) -> None: + logger.info( + "span_begin name=%s kind=%s trace_id=%s span_id=%s parent=%s attrs=%s", + span.name, span.kind, span.trace_id, span.span_id, + span.parent_span_id, span.attributes, + ) + + @swallow_exceptions + def on_span_end(self, span: TraceSpan, exc: Optional[BaseException]) -> None: + duration_ms = (time.time_ns() - span.start_ns) / 1_000_000 + logger.info( + "span_end name=%s kind=%s trace_id=%s span_id=%s parent=%s duration_ms=%.3f attrs=%s", + span.name, span.kind, span.trace_id, span.span_id, + span.parent_span_id, duration_ms, span.attributes, + ) + + @swallow_exceptions + def on_span_event(self, span: TraceSpan, name: str, attrs: Dict[str, Any]) -> None: + logger.info( + "span_event name=%s trace_id=%s span_id=%s attrs=%s", + name, span.trace_id, span.span_id, attrs, + ) + + diff --git a/cmapi/tracing/tracer.py b/cmapi/tracing/tracer.py new file mode 100644 index 000000000..e73bb3f58 --- /dev/null +++ b/cmapi/tracing/tracer.py @@ -0,0 +1,155 @@ +"""This module implements a tracer facade that creates spans, injects/extracts traceparent headers, +and delegates span lifecycle and enrichment to pluggable backends (e.g., Traceparent and Sentry). +It uses contextvars to store the trace/span/parent_span ids and start time for each context. +""" + +import contextvars +import logging +import time +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Any, Dict, List, Optional, Tuple + +from tracing.backend import TracerBackend +from tracing.span import TraceSpan +from tracing.utils import ( + rand_16_hex, + rand_8_hex, + format_traceparent, + parse_traceparent, +) + +logger = logging.getLogger(__name__) + + +# Context vars are something like thread-local storage, they are context-local variables +_current_trace_id = contextvars.ContextVar[str]("trace_id", default="") +_current_span_id = contextvars.ContextVar[str]("span_id", default="") +_current_parent_span_id = contextvars.ContextVar[str]("parent_span_id", default="") +_current_span_start_ns = contextvars.ContextVar[int]("span_start_ns", default=0) + + +class Tracer: + def __init__(self) -> None: + self._backends: List[TracerBackend] = [] + + def register_backend(self, backend: TracerBackend) -> None: + try: + self._backends.append(backend) + logger.info( + "Tracing backend registered: %s", backend.__class__.__name__ + ) + except Exception: + logger.exception("Failed to register tracing backend") + + def clear_backends(self) -> None: + self._backends.clear() + + @contextmanager + def start_as_current_span(self, name: str, kind: str = "INTERNAL") -> Iterator[TraceSpan]: + trace_id = _current_trace_id.get() or rand_16_hex() + parent_span = _current_span_id.get() + new_span_id = rand_8_hex() + + tok_tid = _current_trace_id.set(trace_id) + tok_sid = _current_span_id.set(new_span_id) + tok_pid = _current_parent_span_id.set(parent_span) + tok_start = _current_span_start_ns.set(time.time_ns()) + + span = TraceSpan( + name=name, + kind=kind, + start_ns=_current_span_start_ns.get(), + trace_id=trace_id, + span_id=new_span_id, + parent_span_id=parent_span, + attributes={"span.kind": kind, "span.name": name}, + tracer=self, + ) + + caught_exc: Optional[BaseException] = None + try: + for backend in list(self._backends): + backend.on_span_start(span) + yield span + except BaseException as exc: + span.record_exception(exc) + span.set_status("ERROR", str(exc)) + caught_exc = exc + raise + finally: + for backend in list(self._backends): + backend.on_span_end(span, caught_exc) + _current_span_start_ns.reset(tok_start) + _current_parent_span_id.reset(tok_pid) + _current_span_id.reset(tok_sid) + _current_trace_id.reset(tok_tid) + + def set_incoming_context( + self, + trace_id: Optional[str] = None, + parent_span_id: Optional[str] = None, + ) -> None: + if trace_id: + _current_trace_id.set(trace_id) + if parent_span_id: + _current_parent_span_id.set(parent_span_id) + + def current_trace_ids(self) -> Tuple[str, str, str]: + return _current_trace_id.get(), _current_span_id.get(), _current_parent_span_id.get() + + def inject_traceparent(self, headers: Dict[str, str]) -> None: + trace_id, span_id, _ = self.current_trace_ids() + if not trace_id or not span_id: + trace_id = trace_id or rand_16_hex() + span_id = span_id or rand_8_hex() + headers["traceparent"] = format_traceparent(trace_id, span_id) + + def inject_outbound_headers(self, headers: Dict[str, str]) -> None: + self.inject_traceparent(headers) + for backend in list(self._backends): + backend.on_inject_headers(headers) + + def notify_incoming_request(self, headers: Dict[str, str], method: str, path: str) -> None: + for backend in list(self._backends): + backend.on_incoming_request(headers, method, path) + + def notify_request_finished(self, status_code: Optional[int]) -> None: + for backend in list(self._backends): + backend.on_request_finished(status_code) + + def extract_traceparent(self, headers: Dict[str, str]) -> Tuple[str, str]: + raw_traceparent = (headers.get("traceparent") + or headers.get("Traceparent") + or headers.get("TRACEPARENT")) + if not raw_traceparent: + return "", "" + parsed = parse_traceparent(raw_traceparent) + if not parsed: + return "", "" + return parsed[0], parsed[1] + + def _notify_event(self, span: TraceSpan, name: str, attrs: Dict[str, Any]) -> None: + for backend in list(self._backends): + backend.on_span_event(span, name, attrs) + + def _notify_status(self, span: TraceSpan, code: str, description: str) -> None: + for backend in list(self._backends): + backend.on_span_status(span, code, description) + + def _notify_exception(self, span: TraceSpan, exc: BaseException) -> None: + for backend in list(self._backends): + backend.on_span_exception(span, exc) + + def _notify_attribute(self, span: TraceSpan, key: str, value: Any) -> None: + for backend in list(self._backends): + backend.on_span_attribute(span, key, value) + + +_tracer = Tracer() + + +def get_tracer() -> Tracer: + return _tracer + + diff --git a/cmapi/tracing/utils.py b/cmapi/tracing/utils.py new file mode 100644 index 000000000..26e9d4a0c --- /dev/null +++ b/cmapi/tracing/utils.py @@ -0,0 +1,54 @@ +import logging +import os +from functools import wraps +from typing import Optional, Tuple + +logger = logging.getLogger("tracing") + +def swallow_exceptions(method): + """Decorator that logs exceptions and prevents them from propagating up.""" + + @wraps(method) + def _wrapper(*args, **kwargs): + try: + return method(*args, **kwargs) + except Exception: + logger.exception("%s failed", getattr(method, "__qualname__", repr(method))) + return None + + return _wrapper + +def rand_16_hex() -> str: + """Return 16 random bytes as a 32-char hex string (trace_id size).""" + return os.urandom(16).hex() + + +def rand_8_hex() -> str: + """Return 8 random bytes as a 16-char hex string (span_id size).""" + return os.urandom(8).hex() + + +def format_traceparent(trace_id: str, span_id: str, flags: str = "01") -> str: + """Build a W3C traceparent header (version 00).""" + return f"00-{trace_id}-{span_id}-{flags}" + + +def parse_traceparent(header: str) -> Optional[Tuple[str, str, str]]: + """Parse W3C traceparent and return (trace_id, span_id, flags) or None.""" + try: + parts = header.strip().split("-") + if len(parts) != 4 or parts[0] != "00": + logger.error("Invalid traceparent: %s", header) + return None + trace_id, span_id, flags = parts[1], parts[2], parts[3] + if len(trace_id) != 32 or len(span_id) != 16 or len(flags) != 2: + return None + # W3C: all zero trace_id/span_id are invalid + if set(trace_id) == {"0"} or set(span_id) == {"0"}: + return None + return trace_id, span_id, flags + except Exception: + logger.exception("Failed to parse traceparent: %s", header) + return None + + From 9821f14d0f870a9ffe0ed91b6b7310c62d7bddcb Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Mon, 1 Sep 2025 16:58:22 +0000 Subject: [PATCH 09/18] Enable loggers that were not explicitly mentioned in cmapi_logger.conf --- cmapi/cmapi_server/cmapi_logger.conf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmapi/cmapi_server/cmapi_logger.conf b/cmapi/cmapi_server/cmapi_logger.conf index 301eed718..e0edb52f6 100644 --- a/cmapi/cmapi_server/cmapi_logger.conf +++ b/cmapi/cmapi_server/cmapi_logger.conf @@ -79,5 +79,6 @@ "handlers": ["console", "file"], "level": "DEBUG" } - } + }, + "disable_existing_loggers": false } From 3ca5a2e6bb698ac4e48a0b9a837e5cb504dd1ef9 Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Mon, 1 Sep 2025 17:10:03 +0000 Subject: [PATCH 10/18] Fixed Sentry transaction continuation --- cmapi/tracing/sentry_backend.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmapi/tracing/sentry_backend.py b/cmapi/tracing/sentry_backend.py index cb343fe5f..38ee5da20 100644 --- a/cmapi/tracing/sentry_backend.py +++ b/cmapi/tracing/sentry_backend.py @@ -82,8 +82,10 @@ class SentryBackend(TracerBackend): @swallow_exceptions def on_incoming_request(self, headers: Dict[str, str], method: str, path: str) -> None: name = f"{method} {path}" if method or path else "http.server" - transaction = sentry_sdk.continue_trace(headers or {}, op='http.server', name=name) - # Store transaction in context var to ensure we can finish it later + # Continue from incoming headers, then START the transaction per SDK v2 + continued = sentry_sdk.continue_trace(headers or {}, op='http.server', name=name) + transaction = sentry_sdk.start_transaction(transaction=continued) + # Store started transaction in context var and make current (enter) self._current_transaction.set(transaction) transaction.__enter__() scope = sentry_sdk.Hub.current.scope From 08d46a9e700f23734139b50fc880363e8a208712 Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Mon, 1 Sep 2025 19:19:28 +0000 Subject: [PATCH 11/18] Pass git revision from the build process -> cmapi - Sentry (to be able to understand which revision caused some error) --- build/build_cmapi.sh | 12 +++- cmapi/CMakeLists.txt | 11 ++++ cmapi/VERSION.template | 1 + cmapi/cmapi_server/managers/application.py | 66 +++++++++++++++++----- cmapi/tracing/sentry.py | 13 ++++- 5 files changed, 87 insertions(+), 16 deletions(-) diff --git a/build/build_cmapi.sh b/build/build_cmapi.sh index 47ed35e26..5f5d24ca3 100755 --- a/build/build_cmapi.sh +++ b/build/build_cmapi.sh @@ -41,6 +41,15 @@ on_exit() { } trap on_exit EXIT +get_cmapi_git_revision() { + # Prefer explicit CMAPI_GIT_REVISION; fallback to DRONE_COMMIT; otherwise read current repo revision + local rev="${CMAPI_GIT_REVISION:-${DRONE_COMMIT:-}}" + if [[ -z "$rev" ]]; then + rev="$(git -C "$COLUMNSTORE_SOURCE_PATH" rev-parse --short=12 HEAD 2>/dev/null || echo unknown)" + fi + echo "$rev" +} + install_deps() { echo "Installing dependencies..." @@ -90,7 +99,8 @@ install_deps() { build_cmapi() { cd "$COLUMNSTORE_SOURCE_PATH"/cmapi ./cleanup.sh - cmake -D"${PKG_FORMAT^^}"=1 -DSERVER_DIR="$MDB_SOURCE_PATH" . && make package + CMAPI_GIT_REVISION_ARG="$(get_cmapi_git_revision)" + cmake -D"${PKG_FORMAT^^}"=1 -DSERVER_DIR="$MDB_SOURCE_PATH" -DCMAPI_GIT_REVISION="$CMAPI_GIT_REVISION_ARG" . && make package } install_deps build_cmapi diff --git a/cmapi/CMakeLists.txt b/cmapi/CMakeLists.txt index 5130ec85f..6f8441c0c 100644 --- a/cmapi/CMakeLists.txt +++ b/cmapi/CMakeLists.txt @@ -21,6 +21,17 @@ include(columnstore_version) set(CMAPI_PACKAGE_VERSION "${CMAPI_VERSION_MAJOR}.${CMAPI_VERSION_MINOR}.${CMAPI_VERSION_PATCH}") +# Git revision to embed into VERSION (may be provided via -DCMAPI_GIT_REVISION or env) +if(NOT DEFINED CMAPI_GIT_REVISION OR "${CMAPI_GIT_REVISION}" STREQUAL "") + if(DEFINED ENV{CMAPI_GIT_REVISION} AND NOT "$ENV{CMAPI_GIT_REVISION}" STREQUAL "") + set(CMAPI_GIT_REVISION "$ENV{CMAPI_GIT_REVISION}") + elseif(DEFINED ENV{DRONE_COMMIT} AND NOT "$ENV{DRONE_COMMIT}" STREQUAL "") + set(CMAPI_GIT_REVISION "$ENV{DRONE_COMMIT}") + else() + set(CMAPI_GIT_REVISION "unknown") + endif() +endif() + set(CMAPI_USER "root") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "MariaDB ColumnStore CMAPI: cluster management API and command line tool.") diff --git a/cmapi/VERSION.template b/cmapi/VERSION.template index e69dcaf42..f2e18d0a2 100644 --- a/cmapi/VERSION.template +++ b/cmapi/VERSION.template @@ -2,3 +2,4 @@ CMAPI_VERSION_MAJOR=${CMAPI_VERSION_MAJOR} CMAPI_VERSION_MINOR=${CMAPI_VERSION_MINOR} CMAPI_VERSION_PATCH=${CMAPI_VERSION_PATCH} CMAPI_VERSION_RELEASE=${CMAPI_VERSION_RELEASE} +CMAPI_GIT_REVISION=${CMAPI_GIT_REVISION} diff --git a/cmapi/cmapi_server/managers/application.py b/cmapi/cmapi_server/managers/application.py index d0adeab17..bf45baa03 100644 --- a/cmapi/cmapi_server/managers/application.py +++ b/cmapi/cmapi_server/managers/application.py @@ -1,5 +1,5 @@ import logging -from typing import Optional +from typing import Optional, Tuple, Dict from cmapi_server.constants import VERSION_PATH @@ -7,23 +7,61 @@ from cmapi_server.constants import VERSION_PATH class AppManager: started: bool = False version: Optional[str] = None + git_revision: Optional[str] = None @classmethod def get_version(cls) -> str: - """Get CMAPI version. - - :return: cmapi version - :rtype: str - """ if cls.version: return cls.version - with open(VERSION_PATH, encoding='utf-8') as version_file: - version = '.'.join([ - i.strip().split('=')[1] - for i in version_file.read().splitlines() if i - ]) - if not version: - logging.error('Couldn\'t detect version from VERSION file!') - version = 'Undefined' + version, revision = cls._read_version_file() cls.version = version + cls.git_revision = revision return cls.version + + @classmethod + def get_git_revision(cls) -> Optional[str]: + if cls.git_revision is not None: + return cls.git_revision + _, revision = cls._read_version_file() + cls.git_revision = revision + return cls.git_revision + + @classmethod + def _read_version_file(cls) -> Tuple[str, Optional[str]]: + """Read structured values from VERSION file. + + Returns tuple: (semantic_version, git_revision or None) + """ + values: Dict[str, str] = {} + try: + with open(VERSION_PATH, encoding='utf-8') as version_file: + for line in version_file.read().splitlines(): + if not line or '=' not in line: + continue + key, val = line.strip().split('=', 1) + values[key.strip()] = val.strip() + except Exception: + logging.exception("Failed to read VERSION file") + return 'Undefined', None + + # Release (build) part is optional + release = values.get('CMAPI_VERSION_RELEASE') + revision = values.get('CMAPI_GIT_REVISION') + + required_keys = ( + 'CMAPI_VERSION_MAJOR', + 'CMAPI_VERSION_MINOR', + 'CMAPI_VERSION_PATCH', + ) + if not all(k in values and values[k] for k in required_keys): + logging.error("Couldn't detect version from VERSION file!") + return 'Undefined', revision + + version = '.'.join([ + values['CMAPI_VERSION_MAJOR'], + values['CMAPI_VERSION_MINOR'], + values['CMAPI_VERSION_PATCH'], + ]) + if release: + version = f"{version}.{release}" + return version, revision diff --git a/cmapi/tracing/sentry.py b/cmapi/tracing/sentry.py index a28d455ee..8b69d3bf9 100644 --- a/cmapi/tracing/sentry.py +++ b/cmapi/tracing/sentry.py @@ -7,6 +7,7 @@ from cmapi_server import helpers from cmapi_server.constants import CMAPI_CONF_PATH from tracing.sentry_backend import SentryBackend from tracing.tracer import get_tracer +from cmapi_server.managers.application import AppManager SENTRY_ACTIVE = False @@ -44,14 +45,24 @@ def maybe_init_sentry() -> bool: event_level=logging.ERROR, ) + # Compose release string: cmapi-version(+shortrev) + version = AppManager.get_version() + shortrev = (AppManager.get_git_revision() or '').strip() + shortrev = shortrev[:12] if shortrev else '' + release = version if not shortrev else f"{version}+{shortrev}" + sentry_sdk.init( dsn=dsn, environment=environment, sample_rate=1.0, traces_sample_rate=1.0, integrations=[sentry_logging], - debug=True, + release=release, ) + # Add tags for easier filtering in Sentry + if shortrev: + sentry_sdk.set_tag("git_revision", shortrev) + sentry_sdk.set_tag("cmapi_version", version) SENTRY_ACTIVE = True # Register backend to mirror our internal spans into Sentry get_tracer().register_backend(SentryBackend()) From 113d6cc41b327b350a32a4910857e9593729e904 Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Mon, 1 Sep 2025 21:35:58 +0000 Subject: [PATCH 12/18] Disabled sentry backend debug prints Added couple of debug prints for host readiness --- cmapi/cmapi_server/controllers/endpoints.py | 6 +++--- cmapi/cmapi_server/logging_management.py | 4 ++++ cmapi/mcs_node_control/models/node_config.py | 13 +++++-------- cmapi/tracing/sentry_backend.py | 7 ------- cmapi/tracing/traceparent_backend.py | 2 +- 5 files changed, 13 insertions(+), 19 deletions(-) diff --git a/cmapi/cmapi_server/controllers/endpoints.py b/cmapi/cmapi_server/controllers/endpoints.py index 9df64fe0d..2eede858d 100644 --- a/cmapi/cmapi_server/controllers/endpoints.py +++ b/cmapi/cmapi_server/controllers/endpoints.py @@ -481,8 +481,8 @@ class ConfigController: attempts = 0 # TODO: FIX IT. If got (False, False) result, for eg in case - # when there are no special CEJ user set, this check loop - # is useless and do nothing. + # when special CEJ user is not set, this check loop + # is useless and does nothing. try: ready, retry = system_ready(mcs_config_filename) except CEJError as cej_error: @@ -495,7 +495,7 @@ class ConfigController: attempts +=1 if attempts >= 10: module_logger.debug( - 'Timed out waiting for node to be ready.' + 'Timed out waiting for this node to become ready.' ) break time.sleep(1) diff --git a/cmapi/cmapi_server/logging_management.py b/cmapi/cmapi_server/logging_management.py index 9949db431..8406bfb1f 100644 --- a/cmapi/cmapi_server/logging_management.py +++ b/cmapi/cmapi_server/logging_management.py @@ -145,6 +145,7 @@ def config_cmapi_server_logging(): # Ensure trace_params is available on every record install_trace_record_factory() dict_config(CMAPI_LOG_CONF_PATH) + disable_unwanted_loggers() def change_loggers_level(level: str): @@ -160,3 +161,6 @@ def change_loggers_level(level: str): loggers.append(logging.getLogger()) # add RootLogger for logger in loggers: logger.setLevel(level) + +def disable_unwanted_loggers(): + logging.getLogger("urllib3").setLevel(logging.WARNING) diff --git a/cmapi/mcs_node_control/models/node_config.py b/cmapi/mcs_node_control/models/node_config.py index 7dac18bce..e83f1e541 100644 --- a/cmapi/mcs_node_control/models/node_config.py +++ b/cmapi/mcs_node_control/models/node_config.py @@ -262,15 +262,12 @@ class NodeConfig: ) raise - def in_active_nodes(self, root): + def in_active_nodes(self, root) -> bool: my_names = set(self.get_network_addresses_and_names()) - active_nodes = [ - node.text for node in root.findall("./ActiveNodes/Node") - ] - for node in active_nodes: - if node in my_names: - return True - return False + active_nodes = {node.text for node in root.findall("./ActiveNodes/Node")} + am_i_active = bool(my_names.intersection(active_nodes)) + module_logger.debug("Active nodes: %s, my names: %s, am i active: %s", active_nodes, my_names, am_i_active) + return am_i_active def get_current_pm_num(self, root): # Find this node in the Module* tags, return the module number diff --git a/cmapi/tracing/sentry_backend.py b/cmapi/tracing/sentry_backend.py index 38ee5da20..05e25f023 100644 --- a/cmapi/tracing/sentry_backend.py +++ b/cmapi/tracing/sentry_backend.py @@ -93,11 +93,6 @@ class SentryBackend(TracerBackend): scope.set_tag('http.method', method) if path: scope.set_tag('http.path', path) - # TODO: remove this - logger.info( - "Sentry transaction started name=%s sampled=%s", - name, getattr(transaction, 'sampled', None) - ) @swallow_exceptions def on_request_finished(self, status_code: Optional[int]) -> None: @@ -110,7 +105,5 @@ class SentryBackend(TracerBackend): transaction.__exit__(None, None, None) # Clear transaction in this context self._current_transaction.set(None) - # TODO: remove this - logger.info("Sentry transaction finished status=%s", status_code) diff --git a/cmapi/tracing/traceparent_backend.py b/cmapi/tracing/traceparent_backend.py index 89bda3ac4..e486f9bd3 100644 --- a/cmapi/tracing/traceparent_backend.py +++ b/cmapi/tracing/traceparent_backend.py @@ -5,7 +5,7 @@ from typing import Any, Dict, Optional from tracing.tracer import TracerBackend, TraceSpan from tracing.utils import swallow_exceptions -logger = logging.getLogger(__name__) +logger = logging.getLogger("tracing") class TraceparentBackend(TracerBackend): From 873eeb172c626567d08620610fe50fa4971d3a8a Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Wed, 3 Sep 2025 16:13:20 +0000 Subject: [PATCH 13/18] CLI command to enable/disable Sentry logging --- cmapi/mcs_cluster_tool/__main__.py | 5 +- cmapi/mcs_cluster_tool/tools_commands.py | 113 +++++++++++++++++++++++ 2 files changed, 116 insertions(+), 2 deletions(-) diff --git a/cmapi/mcs_cluster_tool/__main__.py b/cmapi/mcs_cluster_tool/__main__.py index a2260d0c6..09086d51a 100644 --- a/cmapi/mcs_cluster_tool/__main__.py +++ b/cmapi/mcs_cluster_tool/__main__.py @@ -56,8 +56,9 @@ app.command( 'Provides useful functions to review and troubleshoot the MCS cluster.' ) )(tools_commands.review) - - +app.add_typer( + tools_commands.sentry_app, name='sentry', rich_help_panel='Tools commands', hidden=True +) @app.command( name='help-all', help='Show help for all commands in man page style.', add_help_option=False diff --git a/cmapi/mcs_cluster_tool/tools_commands.py b/cmapi/mcs_cluster_tool/tools_commands.py index 742f13d13..42fc1e4be 100644 --- a/cmapi/mcs_cluster_tool/tools_commands.py +++ b/cmapi/mcs_cluster_tool/tools_commands.py @@ -11,10 +11,12 @@ from typing_extensions import Annotated from cmapi_server.constants import ( MCS_DATA_PATH, MCS_SECRETS_FILENAME, REQUEST_TIMEOUT, TRANSACTION_TIMEOUT, + CMAPI_CONF_PATH, ) from cmapi_server.controllers.api_clients import ClusterControllerClient from cmapi_server.exceptions import CEJError from cmapi_server.handlers.cej import CEJPasswordHandler +from cmapi_server.helpers import get_config_parser from cmapi_server.managers.transaction import TransactionManager from cmapi_server.process_dispatchers.base import BaseDispatcher from mcs_cluster_tool.constants import MCS_COLUMNSTORE_REVIEW_SH @@ -379,4 +381,115 @@ def review( success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout) if not success: raise typer.Exit(code=1) + raise typer.Exit(code=0) + + +# Sentry subcommand app +sentry_app = typer.Typer(help='Manage Sentry DSN configuration for error tracking.') + + +@sentry_app.command() +@handle_output +def show(): + """Show current Sentry DSN configuration.""" + try: + # Read existing config + cfg_parser = get_config_parser(CMAPI_CONF_PATH) + + if not cfg_parser.has_section('Sentry'): + typer.echo('Sentry is disabled (no configuration found).', color='yellow') + raise typer.Exit(code=0) + + dsn = cfg_parser.get('Sentry', 'dsn', fallback='').strip().strip("'\"") + environment = cfg_parser.get('Sentry', 'environment', fallback='development').strip().strip("'\"") + + if not dsn: + typer.echo('Sentry is disabled (DSN is empty).', color='yellow') + else: + typer.echo('Sentry is enabled:', color='green') + typer.echo(f' DSN: {dsn}') + typer.echo(f' Environment: {environment}') + + except Exception as e: + typer.echo(f'Error reading configuration: {str(e)}', color='red') + raise typer.Exit(code=1) + + raise typer.Exit(code=0) + + +@sentry_app.command() +@handle_output +def enable( + dsn: Annotated[ + str, + typer.Argument( + help='Sentry DSN URL to enable for error tracking.', + ) + ], + environment: Annotated[ + str, + typer.Option( + '--environment', '-e', + help='Sentry environment name (default: development).', + ) + ] = 'development' +): + """Enable Sentry error tracking with the provided DSN.""" + if not dsn: + typer.echo('DSN cannot be empty.', color='red') + raise typer.Exit(code=1) + + try: + # Read existing config + cfg_parser = get_config_parser(CMAPI_CONF_PATH) + + # Add or update Sentry section + if not cfg_parser.has_section('Sentry'): + cfg_parser.add_section('Sentry') + + cfg_parser.set('Sentry', 'dsn', f"'{dsn}'") + cfg_parser.set('Sentry', 'environment', f"'{environment}'") + + # Write config back to file + with open(CMAPI_CONF_PATH, 'w') as config_file: + cfg_parser.write(config_file) + + typer.echo('Sentry error tracking enabled successfully.', color='green') + typer.echo(f'DSN: {dsn}', color='green') + typer.echo(f'Environment: {environment}', color='green') + typer.echo('Note: Restart cmapi service for changes to take effect.', color='yellow') + + except Exception as e: + typer.echo(f'Error updating configuration: {str(e)}', color='red') + raise typer.Exit(code=1) + + raise typer.Exit(code=0) + + +@sentry_app.command() +@handle_output +def disable(): + """Disable Sentry error tracking by removing the configuration.""" + try: + # Read existing config + cfg_parser = get_config_parser(CMAPI_CONF_PATH) + + if not cfg_parser.has_section('Sentry'): + typer.echo('Sentry is already disabled (no configuration found).', color='yellow') + raise typer.Exit(code=0) + + # Remove the entire Sentry section + cfg_parser.remove_section('Sentry') + + # Write config back to file + with open(CMAPI_CONF_PATH, 'w') as config_file: + cfg_parser.write(config_file) + + typer.echo('Sentry error tracking disabled successfully.', color='green') + typer.echo('Note: Restart cmapi service for changes to take effect.', color='yellow') + + except Exception as e: + typer.echo(f'Error updating configuration: {str(e)}', color='red') + raise typer.Exit(code=1) + raise typer.Exit(code=0) \ No newline at end of file From 83c293303989e15c5db0d359b96372dcade0472f Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 3 Sep 2025 18:12:42 +0000 Subject: [PATCH 14/18] fix(ci): do not build debuginfo packages for RPMS, as it cost 40min of packaging time --- build/bootstrap_mcs.sh | 21 ++++++++++++++++++--- cmake/cpack_manage.cmake | 27 ++++++++++++++++++++++++--- cmake/cpack_overrides.cmake | 31 +++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 cmake/cpack_overrides.cmake diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index 65a0d98b5..f6fb95ae0 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -442,7 +442,15 @@ construct_cmake_flags() { if [[ $SCCACHE = true ]]; then warn "Use sccache" - MDB_CMAKE_FLAGS+=(-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache) + # Use full path to ensure sccache is found during RPM builds + MDB_CMAKE_FLAGS+=(-DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/sccache -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/sccache) + + message "Sccache binary check:" + ls -la /usr/local/bin/sccache || warn "sccache binary not found" + /usr/local/bin/sccache --version || warn "sccache version failed" + + message "Starting sccache server:" + /usr/local/bin/sccache --start-server 2>&1 || warn "Failed to start sccache server" fi if [[ $RUN_BENCHMARKS = true ]]; then @@ -531,7 +539,13 @@ build_package() { cd $MDB_SOURCE_PATH if [[ $PKG_FORMAT == "rpm" ]]; then - command="cmake ${MDB_CMAKE_FLAGS[@]} && make -j\$(nproc) package" + message "Configuring cmake for RPM package" + MDB_CMAKE_FLAGS+=(-DCPACK_PACKAGE_DIRECTORY=$MARIA_BUILD_PATH/..) + + cmake "${MDB_CMAKE_FLAGS[@]}" -S"$MDB_SOURCE_PATH" -B"$MARIA_BUILD_PATH" + check_errorcode + message "Building RPM package" + command="cmake --build \"$MARIA_BUILD_PATH\" -j$(nproc) --target package" else export DEBIAN_FRONTEND="noninteractive" export DEB_BUILD_OPTIONS="parallel=$(nproc)" @@ -827,7 +841,8 @@ if [[ $BUILD_PACKAGES = true ]]; then exit_code=$? if [[ $SCCACHE = true ]]; then - sccache --show-adv-stats + message "Final sccache statistics:" + /usr/local/bin/sccache --show-adv-stats fi exit $exit_code diff --git a/cmake/cpack_manage.cmake b/cmake/cpack_manage.cmake index e463db9cf..ee51c011a 100644 --- a/cmake/cpack_manage.cmake +++ b/cmake/cpack_manage.cmake @@ -28,6 +28,27 @@ macro(columnstore_add_rpm_deps) columnstore_append_for_cpack(CPACK_RPM_columnstore-engine_PACKAGE_REQUIRES ${ARGN}) endmacro() -if(RPM) - columnstore_add_rpm_deps("snappy" "jemalloc" "procps-ng" "gawk") -endif() +# Columnstore-specific RPM packaging overrides 1) Use fast compression to speed up packaging +set(CPACK_RPM_COMPRESSION_TYPE + "zstd" + CACHE STRING "RPM payload compression" FORCE +) +# 2) Disable debuginfo/debugsource to avoid slow packaging and duplicate file warnings +set(CPACK_RPM_DEBUGINFO_PACKAGE + OFF + CACHE BOOL "Disable debuginfo package" FORCE +) +set(CPACK_RPM_PACKAGE_DEBUG + 0 + CACHE STRING "Disable RPM debug package" FORCE +) +unset(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX CACHE) + +# Ensure our overrides are applied by CPack at packaging time CPACK_PROJECT_CONFIG_FILE is included by cpack after +# CPackConfig.cmake is loaded +set(CPACK_PROJECT_CONFIG_FILE + "${CMAKE_CURRENT_LIST_DIR}/cpack_overrides.cmake" + CACHE FILEPATH "Columnstore CPack overrides" FORCE +) + +columnstore_add_rpm_deps("snappy" "jemalloc" "procps-ng" "gawk") diff --git a/cmake/cpack_overrides.cmake b/cmake/cpack_overrides.cmake new file mode 100644 index 000000000..fcc92e09b --- /dev/null +++ b/cmake/cpack_overrides.cmake @@ -0,0 +1,31 @@ +# Columnstore-specific CPack overrides applied at package time +# This file is referenced via CPACK_PROJECT_CONFIG_FILE and is included by CPack +# after it reads the generated CPackConfig.cmake, letting these settings win. + +# Faster payload compression +set(CPACK_RPM_COMPRESSION_TYPE "zstd") + +# Control debuginfo generation (symbols) without debugsource (sources) +option(CS_RPM_DEBUGINFO "Build Columnstore -debuginfo RPM (symbols only)" OFF) + +if(CS_RPM_DEBUGINFO) + # Generate debuginfo RPM (symbols) + set(CPACK_RPM_DEBUGINFO_PACKAGE ON) + set(CPACK_RPM_PACKAGE_DEBUG 1) +else() + # No debuginfo RPM + set(CPACK_RPM_DEBUGINFO_PACKAGE OFF) + set(CPACK_RPM_PACKAGE_DEBUG 0) + set(CPACK_STRIP_FILES OFF) + # Prevent rpmbuild from stripping binaries and running debug post scripts. + # CPACK_STRIP_FILES only affects CPack's own stripping; rpmbuild still + # executes brp-strip and find-debuginfo by default unless we override macros. + if(DEFINED CPACK_RPM_SPEC_MORE_DEFINE) + set(CPACK_RPM_SPEC_MORE_DEFINE "${CPACK_RPM_SPEC_MORE_DEFINE}\n%define __strip /bin/true\n%define __objdump /bin/true\n%define __os_install_post %nil\n%define __debug_install_post %nil") + else() + set(CPACK_RPM_SPEC_MORE_DEFINE "%define __strip /bin/true\n%define __objdump /bin/true\n%define __os_install_post %nil\n%define __debug_install_post %nil") + endif() +endif() + +# Always disable debugsource by not mapping sources +unset(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX) From 5f1c2e17450caa0eea1155dc3f57d0656599cc18 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 3 Sep 2025 18:13:51 +0000 Subject: [PATCH 15/18] chore(build): build only required boost parts, silence external_project logs --- cmake/boost.cmake | 26 +++++++++++++++++++------- cmake/cpack_manage.cmake | 4 ++++ cmake/thrift.cmake | 1 + tests/CMakeLists.txt | 1 + 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/cmake/boost.cmake b/cmake/boost.cmake index 614f723ae..8de017e49 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -1,4 +1,7 @@ -find_package(Boost 1.88.0 COMPONENTS chrono filesystem program_options regex system thread) +# Single source of truth for Boost components we need +set(BOOST_COMPONENTS chrono filesystem program_options regex system thread) + +find_package(Boost 1.88.0 COMPONENTS ${BOOST_COMPONENTS}) if(Boost_FOUND) add_custom_target(external_boost) @@ -35,11 +38,20 @@ elseif(COLUMNSTORE_WITH_LIBCPP) endif() set(_b2args cxxflags=${_cxxargs};cflags=-fPIC;threading=multi;${_extra};toolset=${_toolset} - --without-mpi;--without-charconv;--without-python;--prefix=${INSTALL_LOCATION} linkflags=${_linkflags} + --prefix=${INSTALL_LOCATION} linkflags=${_linkflags} ) +# Derived helper strings from BOOST_COMPONENTS +set(_boost_with_libs_list ${BOOST_COMPONENTS}) +string(REPLACE ";" "," _boost_with_libs_csv "${_boost_with_libs_list}") +set(_boost_b2_with_args) +foreach(_lib ${BOOST_COMPONENTS}) + list(APPEND _boost_b2_with_args "--with-${_lib}") +endforeach() +string(REPLACE ";" " " _boost_b2_with_args_str "${_boost_b2_with_args}") + set(byproducts) -foreach(name chrono filesystem program_options regex system thread) +foreach(name ${BOOST_COMPONENTS}) set(lib boost_${name}) add_library(${lib} STATIC IMPORTED GLOBAL) add_dependencies(${lib} external_boost) @@ -50,7 +62,7 @@ endforeach() set(LOG_BOOST_INSTEAD_OF_SCREEN "") if(COLUMNSTORE_MAINTAINER_MODE) - set(LOG_BOOST_INSTEAD_OF_SCREEN "LOG_BUILD TRUE LOG_INSTALL TRUE") + set(LOG_BOOST_INSTEAD_OF_SCREEN "LOG_BUILD TRUE LOG_INSTALL TRUE LOG_CONFIGURE TRUE") endif() ExternalProject_Add( @@ -58,13 +70,13 @@ ExternalProject_Add( PREFIX .boost URL https://archives.boost.io/release/1.88.0/source/boost_1_88_0.tar.gz URL_HASH SHA256=3621533e820dcab1e8012afd583c0c73cf0f77694952b81352bf38c1488f9cb4 - CONFIGURE_COMMAND ./bootstrap.sh + CONFIGURE_COMMAND ./bootstrap.sh --with-libraries=${_boost_with_libs_csv} UPDATE_COMMAND "" PATCH_COMMAND ${CMAKE_COMMAND} -E chdir patch -p1 -i ${CMAKE_SOURCE_DIR}/storage/columnstore/columnstore/cmake/boost.1.88.named_proxy.hpp.patch - BUILD_COMMAND ./b2 -q ${_b2args} + BUILD_COMMAND ./b2 -q -d0 ${_b2args} ${_boost_b2_with_args} BUILD_IN_SOURCE TRUE - INSTALL_COMMAND ./b2 -q install ${_b2args} + INSTALL_COMMAND ./b2 -q -d0 install ${_b2args} ${_boost_b2_with_args} ${LOG_BOOST_INSTEAD_OF_SCREEN} EXCLUDE_FROM_ALL TRUE ${byproducts} diff --git a/cmake/cpack_manage.cmake b/cmake/cpack_manage.cmake index ee51c011a..c324028d8 100644 --- a/cmake/cpack_manage.cmake +++ b/cmake/cpack_manage.cmake @@ -1,3 +1,7 @@ +if(NOT COLUMNSTORE_MAINTAINER_MODE) + return() +endif() + macro(columnstore_append_for_cpack var_name) # Get current value from parent scope or use empty string if(DEFINED ${var_name}) diff --git a/cmake/thrift.cmake b/cmake/thrift.cmake index 514e590ac..d078346d9 100644 --- a/cmake/thrift.cmake +++ b/cmake/thrift.cmake @@ -68,6 +68,7 @@ ExternalProject_Add( -DCMAKE_EXE_LINKER_FLAGS=${linkflags} -DCMAKE_SHARED_LINKER_FLAGS=${linkflags} -DCMAKE_MODULE_LINKER_FLAGS=${linkflags} + -DCMAKE_INSTALL_MESSAGE=NEVER BUILD_BYPRODUCTS "${THRIFT_LIBRARY_DIRS}/${CMAKE_STATIC_LIBRARY_PREFIX}thrift${CMAKE_STATIC_LIBRARY_SUFFIX}" EXCLUDE_FROM_ALL TRUE ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a70782061..5e0972f27 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -22,6 +22,7 @@ if(WITH_UNITTESTS) GIT_REPOSITORY https://github.com/google/googletest GIT_TAG release-1.12.0 CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DBUILD_SHARED_LIBS=ON + -DCMAKE_INSTALL_MESSAGE=NEVER -DCMAKE_CXX_FLAGS:STRING=${cxxflags} -DCMAKE_EXE_LINKER_FLAGS=${linkflags} -DCMAKE_SHARED_LINKER_FLAGS=${linkflags} -DCMAKE_MODULE_LINKER_FLAGS=${linkflags} ) From d4d0a80d19a2c11b3a5fb92a8747cc7f7653e910 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov <79837786+mariadb-LeonidFedorov@users.noreply.github.com> Date: Wed, 3 Sep 2025 22:48:48 +0400 Subject: [PATCH 16/18] Update cpack_manage.cmake --- cmake/cpack_manage.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/cpack_manage.cmake b/cmake/cpack_manage.cmake index c324028d8..c91e4b097 100644 --- a/cmake/cpack_manage.cmake +++ b/cmake/cpack_manage.cmake @@ -1,7 +1,3 @@ -if(NOT COLUMNSTORE_MAINTAINER_MODE) - return() -endif() - macro(columnstore_append_for_cpack var_name) # Get current value from parent scope or use empty string if(DEFINED ${var_name}) @@ -32,6 +28,10 @@ macro(columnstore_add_rpm_deps) columnstore_append_for_cpack(CPACK_RPM_columnstore-engine_PACKAGE_REQUIRES ${ARGN}) endmacro() +if(NOT COLUMNSTORE_MAINTAINER_MODE) + return() +endif() + # Columnstore-specific RPM packaging overrides 1) Use fast compression to speed up packaging set(CPACK_RPM_COMPRESSION_TYPE "zstd" From cbf74fe252d1ea190a7c696f899aaadda4e8f0cd Mon Sep 17 00:00:00 2001 From: Leonid Fedorov <79837786+mariadb-LeonidFedorov@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:18:07 +0400 Subject: [PATCH 17/18] Update cpack_manage.cmake --- cmake/cpack_manage.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/cpack_manage.cmake b/cmake/cpack_manage.cmake index c91e4b097..743e7d974 100644 --- a/cmake/cpack_manage.cmake +++ b/cmake/cpack_manage.cmake @@ -28,7 +28,7 @@ macro(columnstore_add_rpm_deps) columnstore_append_for_cpack(CPACK_RPM_columnstore-engine_PACKAGE_REQUIRES ${ARGN}) endmacro() -if(NOT COLUMNSTORE_MAINTAINER_MODE) +if(NOT COLUMNSTORE_MAINTAINER) return() endif() From 77c3aea6eba565673c3dcb370c7bd59111d410e9 Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Wed, 3 Sep 2025 19:59:33 +0000 Subject: [PATCH 18/18] Save packaging stderr to the log --- .drone.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 001b7ed9c..290f6bdb3 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -563,7 +563,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", "--build-path " + "/mdb/" + builddir + "/builddir " + " " + customBootstrapParamsForExisitingPipelines(platform) + " " + customBuildFlags(customBootstrapParamsKey) + - " | " + get_build_command("ansi2txt.sh") + + " 2>&1 | " + get_build_command("ansi2txt.sh") + "/mdb/" + builddir + "/" + result + '/build.log "', ], },