1
0
mirror of https://github.com/MariaDB/server.git synced 2025-04-17 10:37:22 +03:00

MDEV-34979 generate SBOM from server builds

This commit adds the capability to generate a Software Bill of Materials
(SBOM) from server builds.

It introduces a new WITH_SBOM variable, which defaults to ON for package
builds (i.e if BUILD_CONFIG is used) and to OFF otherwise.

When enabled, the build process will produce an sbom.json document in
CycloneDX format, capturing information about various dependencies,
which is gathered from various sources.

We use git submodule information and CMake external projects properties
to gather version information for 3rd party code, but also handle
dependencies if external code is part of our repository
(zlib, or Connect storage engine's minizip)

The SBOM document is stored in the root build directory in sbom.json file,
but is not currently installed.
This commit is contained in:
Vladislav Vaintroub 2025-01-15 14:37:43 +01:00
parent 18dbeae1b8
commit 2563839853
7 changed files with 453 additions and 0 deletions

2
.gitignore vendored
View File

@ -33,6 +33,7 @@ TAGS
Testing/
tmp/
VERSION.dep
cmake/submodule_info.cmake
configure
client/async_example
client/mysql
@ -113,6 +114,7 @@ plugin/auth_pam/config_auth_pam.h
plugin/aws_key_management/aws-sdk-cpp
plugin/aws_key_management/aws_sdk_cpp
plugin/aws_key_management/aws_sdk_cpp-prefix
sbom.json
scripts/comp_sql
scripts/make_binary_distribution
scripts/msql2mysql

View File

@ -581,6 +581,13 @@ ENDIF()
INCLUDE(build_depends)
OPTION(WITH_SBOM "Generate Software Bill of Materials (SBOM)" "${SBOM_DEFAULT}")
MARK_AS_ADVANCED(WITH_SBOM)
IF(WITH_SBOM)
INCLUDE(generate_sbom)
GENERATE_SBOM()
ENDIF()
INCLUDE(CPack)
IF(WIN32 AND SIGNCODE)

View File

@ -85,6 +85,8 @@ ENDIF()
SET(WITH_INNODB_SNAPPY OFF CACHE STRING "")
SET(WITH_NUMA 0 CACHE BOOL "")
SET(CPU_LEVEL1_DCACHE_LINESIZE 0)
# generate SBOMS
SET(SBOM_DEFAULT 1)
IF(NOT EXISTS ${CMAKE_SOURCE_DIR}/.git)
SET(GIT_EXECUTABLE GIT_EXECUTABLE-NOTFOUND CACHE FILEPATH "")

304
cmake/generate_sbom.cmake Normal file
View File

@ -0,0 +1,304 @@
INCLUDE(generate_submodule_info)
INCLUDE(ExternalProject)
# Extract user name and repository name from a github URL.
FUNCTION (EXTRACT_REPO_NAME_AND_USER repo_url repo_name_var repo_user_var)
IF(repo_url MATCHES "^git@")
# normalize to https-style URLs
STRING(REGEX REPLACE "^git@([^:]+):(.*)$" "https://\\1/\\2" repo_url "${repo_url}")
ENDIF()
# Extract the repository user
STRING(REGEX REPLACE "https://([^/]+)/([^/]+)/.*" "\\2" repo_user "${repo_url}")
STRING(REGEX REPLACE ".*/([^/]*)$" "\\1" repo_name "${repo_url}")
STRING(REGEX REPLACE "\\.git$" "" repo_name "${repo_name}")
SET(${repo_name_var} ${repo_name} PARENT_SCOPE)
SET(${repo_user_var} ${repo_user} PARENT_SCOPE)
ENDFUNCTION()
# Add a known 3rd party dependency for SBOM generation
# Currently used for "vendored" (part of our repository) source code we know about
# such as zlib, as well ExternalProject_Add() projects
MACRO(ADD_THIRD_PARTY_DEPENDENCY name url tag rev version description)
LIST(FIND ALL_THIRD_PARTY ${name} idx)
IF (idx GREATER -1)
MESSAGE(FATAL_ERROR "${name} is already in ALL_THIRD_PARTY")
ENDIF()
SET(${name}_URL ${url})
SET(${name}_TAG ${tag})
SET(${name}_REVISION ${rev})
SET(${name}_DESCRIPTION "${description}")
SET(${name}_VERSION "${version}")
LIST(APPEND ALL_THIRD_PARTY ${name})
ENDMACRO()
# Get CPE ID ( https://en.wikipedia.org/wiki/Common_Platform_Enumeration )
# for given project name and version
# Only "known" CPEs are handled here, e.g currently no CPE for rocksdb
FUNCTION(SBOM_GET_CPE name version var)
SET(cpe_prefix_map
"zlib" "zlib:zlib"
"mariadb-connector-c" "mariadb:connector\\\\/c"
"wolfssl" "wolfssl:wolfssl"
"minizip" "zlib:zlib"
"pcre2" "pcre:pcre2"
"fmt" "fmt:fmt"
"boost" "boost:boost"
"thrift" "apache:thrift"
)
LIST(FIND cpe_prefix_map "${name}" i)
IF(i GREATER -1)
MATH(EXPR next_idx "${i}+1")
LIST(GET cpe_prefix_map ${next_idx} cpe_name_and_vendor)
STRING(REGEX REPLACE "[^0-9\\.]" "" cleaned_version "${version}")
SET(${var} "cpe:2.3:a:${cpe_name_and_vendor}:${cleaned_version}:*:*:*:*:*:*:*" PARENT_SCOPE)
ELSE()
SET(${var} "" PARENT_SCOPE)
ENDIF()
ENDFUNCTION()
# Add dependency on CMake ExternalProject.
# Currently, only works for github hosted projects,
# URL property of the external project needs to point to release source download
MACRO(ADD_CMAKE_EXTERNAL_PROJECT_DEPENDENCY name)
ExternalProject_GET_PROPERTY(${name} URL)
STRING(REGEX REPLACE "https://github.com/([^/]+/[^/]+)/releases/download/([^/]+)/.*-([^-]+)\\..*" "\\1;\\2;\\3" parsed "${URL}")
# Split the result into components
LIST(LENGTH parsed parsed_length)
IF(parsed_length EQUAL 3)
LIST(GET parsed 0 project_path)
LIST(GET parsed 1 tag)
LIST(GET parsed 2 ver)
ELSE()
STRING(REGEX REPLACE "https://github.com/([^/]+/[^/]+)/archive/refs/tags/([^/]+)\\.(tar\\.gz|zip)$" "\\1;\\2;\\3" parsed "${URL}")
LIST(LENGTH parsed parsed_length)
IF(parsed_length GREATER 1)
LIST(GET parsed 0 project_path)
LIST(GET parsed 1 tag)
STRING(REGEX REPLACE "[^0-9.]" "" ver "${tag}")
ELSE()
MESSAGE(FATAL_ERROR "Unexpected format for the download URL of project ${name} : (${URL})")
ENDIF()
ENDIF()
ADD_THIRD_PARTY_DEPENDENCY(${name} "https://github.com/${project_path}" "${tag}" "${tag}" "${ver}" "")
ENDMACRO()
# Match third party component with supplier
# CyclonDX documentation says it is
# "The organization that supplied the component.
# The supplier may often be the manufacturer, but may also be a distributor or repackager."
#
# Perhaps it can always be "MariaDB", but security team recommendation is different
# more towards "author"
FUNCTION (sbom_get_supplier repo_name repo_user varname)
IF("${repo_name_SUPPLIER}")
SET(${varname} "${repo_name_SUPPLIER}" PARENT_SCOPE)
ELSEIF (repo_name MATCHES "zlib|minizip")
# stuff that is checked into out repos
SET(${varname} "MariaDB" PARENT_SCOPE)
ELSEIF (repo_name MATCHES "boost")
SET(${varname} "Boost.org" PARENT_SCOPE)
ELSE()
IF(repo_user MATCHES "mariadb-corporation|mariadb")
set(repo_user "MariaDB")
ENDIF()
# Capitalize just first letter in repo_user
STRING(SUBSTRING "${repo_user}" 0 1 first_letter)
STRING(SUBSTRING "${repo_user}" 1 -1 rest)
STRING(TOUPPER "${first_letter}" first_letter_upper)
SET(${varname} "${first_letter_upper}${rest}" PARENT_SCOPE)
ENDIF()
ENDFUNCTION()
# Generate sbom.json in the top-level build directory
FUNCTION(GENERATE_SBOM)
IF(EXISTS ${PROJECT_SOURCE_DIR}/cmake/submodule_info.cmake)
INCLUDE(${PROJECT_SOURCE_DIR}/cmake/submodule_info.cmake)
ELSE()
GENERATE_SUBMODULE_INFO(${PROJECT_BINARY_DIR}/cmake/submodule_info.cmake)
INCLUDE(${PROJECT_BINARY_DIR}/cmake/submodule_info.cmake)
ENDIF()
# Remove irrelevant for the current build submodule information
# That is, if we do not build say columnstore, do not include
# dependency info into SBOM
IF(NOT TARGET wolfssl)
# using openssl, rather than wolfssl
LIST(FILTER ALL_SUBMODULES EXCLUDE REGEX wolfssl)
ENDIF()
IF(NOT WITH_WSREP)
# wsrep is not compiled
LIST(FILTER ALL_SUBMODULES EXCLUDE REGEX wsrep)
ENDIF()
IF(NOT TARGET columnstore)
LIST(FILTER ALL_SUBMODULES EXCLUDE REGEX columnstore)
ENDIF()
IF(NOT TARGET rocksdb)
# Rocksdb is not compiled
LIST(FILTER ALL_SUBMODULES EXCLUDE REGEX rocksdb)
ENDIF()
IF(NOT TARGET s3)
# S3 aria is not compiled
LIST(FILTER ALL_SUBMODULES EXCLUDE REGEX storage/maria/libmarias3)
ENDIF()
# libmariadb/docs is not a library, so remove it
LIST(FILTER ALL_SUBMODULES EXCLUDE REGEX libmariadb/docs)
# It is possible to provide EXTRA_SBOM_DEPENDENCIES
# and accompanying per-dependency data, to extend generared sbom
# document.
# Example below injects an extra "ncurses" dependency using several
# command line parameters for CMake.
# -DEXTRA_SBOM_DEPENDENCIES=ncurses
# -Dncurses_URL=https://github.com/mirror/ncurses
# -Dncurses_TAG=v6.4
# -Dncurses_VERSION=6.4
# -Dncurses_DESCRIPTION="A fake extra dependency"
SET(ALL_THIRD_PARTY ${ALL_SUBMODULES} ${EXTRA_SBOM_DEPENDENCIES})
# Add dependencies on cmake ExternalProjects
FOREACH(ext_proj libfmt pcre2)
IF(TARGET ${ext_proj})
ADD_CMAKE_EXTERNAL_PROJECT_DEPENDENCY(${ext_proj})
ENDIF()
ENDFOREACH()
# ZLIB
IF(TARGET zlib OR TARGET connect)
# Path to the zlib.h file
SET(ZLIB_HEADER_PATH "${PROJECT_SOURCE_DIR}/zlib/zlib.h")
# Variable to store the extracted version
SET(ZLIB_VERSION "")
# Read the version string from the file
file(STRINGS "${ZLIB_HEADER_PATH}" ZLIB_VERSION_LINE REGEX "#define ZLIB_VERSION.*")
# Extract the version number using a regex
IF (ZLIB_VERSION_LINE)
STRING(REGEX MATCH "\"([^\"]+)\"" ZLIB_VERSION_MATCH "${ZLIB_VERSION_LINE}")
IF (ZLIB_VERSION_MATCH)
STRING(REPLACE "\"" "" ZLIB_VERSION "${ZLIB_VERSION_MATCH}")
IF(NOT ("${ZLIB_VERSION}" MATCHES "[0-9]+\\.[0-9]+\\.[0-9]+"))
MESSAGE(FATAL_ERROR "Unexpected zlib version '${ZLIB_VERSION}' parsed from ${ZLIB_HEADER_PATH}")
ENDIF()
ELSE()
MESSAGE(FATAL_ERROR "Could not extract ZLIB version from the line: ${ZLIB_VERSION_LINE}")
ENDIF()
ELSE()
MESSAGE(FATAL_ERROR "ZLIB_VERSION definition not found in ${ZLIB_HEADER_PATH}")
ENDIF()
IF(TARGET zlib)
ADD_THIRD_PARTY_DEPENDENCY(zlib "https://github.com/madler/zlib"
"v${ZLIB_VERSION}" "v${ZLIB_VERSION}" "${ZLIB_VERSION}" "Vendored zlib included into server source")
ENDIF()
IF(TARGET ha_connect OR TARGET connect)
SET(minizip_PURL "pkg:github/madler/zlib@${ZLIB_VERSION}?path=contrib/minizip")
ADD_THIRD_PARTY_DEPENDENCY(minizip "https://github.com/madler/zlib?path=contrib/minizip"
"v${ZLIB_VERSION}-minizip" "v${ZLIB_VERSION}-minizip" "${ZLIB_VERSION}"
"Vendored minizip (zip.c, unzip.c, ioapi.c) in connect engine, copied from zlib/contributions")
ENDIF()
ENDIF()
IF(TARGET columnstore)
# Determining if Columnstore builds Boost is tricky.
# The presence of the external_boost target isn't reliable, it is always
# present. Instead, we check indirectly by verifying if one of the libraries
# built by the external project exists in the build directory.
IF(TARGET external_boost AND TARGET boost_filesystem)
GET_TARGET_PROPERTY(boost_filesystem_loc boost_filesystem IMPORTED_LOCATION)
STRING(FIND "${boost_filesystem_loc}" "${CMAKE_BINARY_DIR}" idx)
IF(idx EQUAL 0)
# Now we can be reasonably sure, external_boost is indeed an external project
ExternalProject_GET_PROPERTY(external_boost URL)
# Extract the version from the URL using string manipulation.
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" BOOST_VERSION ${URL})
SET(tag boost-${BOOST_VERSION})
ADD_THIRD_PARTY_DEPENDENCY(boost
"https://github.com/boostorg/boost" "${tag}" "${tag}" "${BOOST_VERSION}"
"Boost library, linked with columnstore engine")
ENDIF()
ENDIF()
IF(TARGET external_thrift)
ADD_CMAKE_EXTERNAL_PROJECT_DEPENDENCY(external_thrift)
ENDIF()
ENDIF()
SET(sbom_components "")
SET(sbom_dependencies "\n {
\"ref\": \"${CPACK_PACKAGE_NAME}\",
\"dependsOn\": [" )
SET(first ON)
FOREACH(dep ${ALL_THIRD_PARTY})
# Extract the part after the last "/" from URL
SET(revision ${${dep}_REVISION})
SET(tag ${${dep}_TAG})
SET(desc ${${dep}_DESCRIPTION})
IF((tag STREQUAL "no-tag") OR (NOT tag))
SET(tag ${revision})
ENDIF()
IF (NOT "${revision}" AND "${tag}")
SET(revision ${tag})
ENDIF()
SET(version ${${dep}_VERSION})
IF (version)
ELSEIF(tag)
SET(version ${tag})
ELSEIF(revision)
SET(version ${revision})
ENDIF()
EXTRACT_REPO_NAME_AND_USER("${${dep}_URL}" repo_name repo_user)
IF(first)
SET(first OFF)
ELSE()
STRING(APPEND sbom_components ",")
STRING(APPEND sbom_dependencies ",")
ENDIF()
SET(bom_ref "${repo_name}-${version}")
IF(desc)
SET(desc_line "\n \"description\": \"${desc}\",")
ELSE()
SET(desc_line "")
ENDIF()
STRING(TOLOWER "${repo_user}" repo_user_lower)
STRING(TOLOWER "${repo_name}" repo_name_lower)
IF (${repo_name_lower}_PURL)
SET(purl "${${repo_name_lower}_PURL}")
ELSE()
SET(purl "pkg:github/${repo_user_lower}/${repo_name_lower}@${revision}")
ENDIF()
SBOM_GET_SUPPLIER(${repo_name_lower} ${repo_user_lower} supplier)
SBOM_GET_CPE(${repo_name_lower} "${version}" cpe)
IF(cpe)
SET(cpe "\n \"cpe\": \"${cpe}\",")
ENDIF()
STRING(APPEND sbom_components "
{
\"bom-ref\": \"${bom_ref}\",
\"type\": \"library\",
\"name\": \"${repo_name}\",
\"version\": \"${version}\",${desc_line}
\"purl\": \"${purl}\",${cpe}
\"supplier\": {
\"name\": \"${supplier}\"
}
}")
STRING(APPEND sbom_dependencies "
\"${bom_ref}\"")
STRING(APPEND reflist ",\n {\"ref\": \"${bom_ref}\"}")
ENDFOREACH()
STRING(APPEND sbom_dependencies "\n ]\n }${reflist}\n")
STRING(UUID UUID NAMESPACE ee390ca3-e70f-4b35-808e-a512489156f5 NAME SBOM TYPE SHA1)
STRING(TIMESTAMP TIMESTAMP "%Y-%m-%dT%H:%M:%SZ" UTC)
EXTRACT_REPO_NAME_AND_USER("${GIT_REMOTE_ORIGIN_URL}" GITHUB_REPO_NAME GITHUB_REPO_USER)
#github-purl needs lowercased user and project names
STRING(TOLOWER "${GITHUB_REPO_NAME}" GITHUB_REPO_NAME)
STRING(TOLOWER "${GITHUB_REPO_USER}" GITHUB_REPO_USER)
IF(NOT DEFINED CPACK_PACKAGE_VERSION)
SET(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
ENDIF()
configure_file(${CMAKE_CURRENT_LIST_DIR}/cmake/sbom.json.in ${CMAKE_BINARY_DIR}/sbom.json)
ENDFUNCTION()

View File

@ -0,0 +1,90 @@
# generate a cmake script containing git submodule version information.
# This is typically done during "make dist", and is needed for SBOM generation
# During the build from source package, git info is no more available.
#
# What is in this script
# - Variable ALL_SUBMODULES, a list of submodule subdirectories
# - for every entry in this list, there are 2 related variables defined
# ${name}_REVISION, set to the git tag of submodule, or git hash, if tags are missing
# ${name}_URL , set to the git URL of the submodule
# For example, following will be generated for wolfssl
# SET(extra/wolfssl/wolfssl_REVISION v5.7.2-stable)
# SET(extra/wolfssl/wolfssl_URL https://github.com/wolfSSL/wolfssl.git)
FUNCTION(generate_submodule_info outfile)
FIND_PACKAGE(Git REQUIRED)
SET(git_cmd "(git describe --tags --exact 2>/dev/null || echo no-tag) && git rev-parse --short HEAD && git remote get-url origin")
SET(ENV_LC_ALL "$ENV{LC_ALL}")
SET($ENV{LC_ALL} C)
EXECUTE_PROCESS(
COMMAND
${GIT_EXECUTABLE} submodule foreach --recursive ${git_cmd}
OUTPUT_VARIABLE outvar
RESULT_VARIABLE res
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
IF(NOT(${res} EQUAL 0))
MESSAGE(FATAL_ERROR "'git submodule foreach' failed")
ENDIF()
IF(NOT "${ENV_LC_ALL}" STREQUAL "")
SET($ENV{LC_ALL} ${ENV_LC_ALL})
ENDIF()
STRING(REPLACE "\n" ";" out_list "${outvar}")
SET(out_string)
SET(all_submodules)
SET(counter 0)
FOREACH(s ${out_list})
IF(${counter} EQUAL 0)
IF (NOT("${s}" MATCHES "Entering '"))
MESSAGE(FATAL "Unexpected output ${outvar}")
ENDIF()
STRING(LENGTH "${s}" slen)
MATH(EXPR substr_len "${slen} - 11")
STRING(SUBSTRING "${s}" 10 ${substr_len} submodule)
LIST(APPEND all_submodules ${submodule})
ELSEIF(${counter} EQUAL 1)
# tag
STRING(APPEND out_string "SET(${submodule}_TAG ${s})\n")
ELSEIF(${counter} EQUAL 2)
# get revision
STRING(APPEND out_string "SET(${submodule}_REVISION ${s})\n")
ELSEIF(${counter} EQUAL 3)
# origin url
STRING(APPEND out_string "SET(${submodule}_URL ${s})\n")
ELSE()
MESSAGE(FATAL_ERROR "should never happen")
ENDIF()
MATH(EXPR counter "(${counter}+1)%4")
ENDFOREACH()
STRING(APPEND out_string "SET(ALL_SUBMODULES \"${all_submodules}\")\n")
# Also while not strictly "submodule" info, get the origin url
IF(NOT GIT_REMOTE_ORIGIN_URL)
EXECUTE_PROCESS(
COMMAND
${GIT_EXECUTABLE} remote get-url origin
OUTPUT_VARIABLE GIT_REMOTE_ORIGIN_URL
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE res
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
IF(("${GIT_REMOTE_ORIGIN_URL}" STREQUAL "") OR NOT(${res} EQUAL 0))
# Meh, origin is not called "origin", and there is no GIT_REMOTE_ORIGIN_URL
# set. Fallback to hardcoded default
SET(GIT_REMOTE_ORIGIN_URL https://github.com/mariadb/server.git)
ENDIF()
ENDIF()
STRING(APPEND out_string "SET(GIT_REMOTE_ORIGIN_URL \"${GIT_REMOTE_ORIGIN_URL}\")\n")
EXECUTE_PROCESS(
COMMAND
${GIT_EXECUTABLE} rev-parse --short HEAD
OUTPUT_VARIABLE outvar
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE res
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
STRING(APPEND out_string "SET(GIT_REV_SHORT \"${outvar}\")\n")
SET(CMAKE_CONFIGURABLE_FILE_CONTENT ${out_string})
CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/cmake/configurable_file_content.in ${outfile} @ONLY)
ENDFUNCTION()

View File

@ -147,6 +147,9 @@ IF(NOT EXISTS ${PACKAGE_DIR}/Docs/INFO_SRC)
CREATE_INFO_SRC(${PACKAGE_DIR}/Docs)
ENDIF()
INCLUDE(${CMAKE_SOURCE_DIR}/cmake/generate_submodule_info.cmake)
GENERATE_SUBMODULE_INFO(${PACKAGE_DIR}/cmake/submodule_info.cmake)
# In case we used CPack, it could have copied some
# extra files that are not usable on different machines.
FILE(REMOVE ${PACKAGE_DIR}/CMakeCache.txt)

45
cmake/sbom.json.in Normal file
View File

@ -0,0 +1,45 @@
{
"serialNumber": "urn:uuid:@UUID@",
"version": 1,
"$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json",
"bomFormat": "CycloneDX",
"specVersion": "1.6",
"metadata": {
"lifecycles": [
{"phase": "build"}
],
"component": {
"bom-ref": "@CPACK_PACKAGE_NAME@",
"type": "application",
"name": "@CPACK_PACKAGE_NAME@",
"version": "@CPACK_PACKAGE_VERSION@",
"supplier": {
"name": "@CPACK_PACKAGE_VENDOR@",
"url": [
"@CPACK_PACKAGE_URL@"
]
},
"purl": "pkg:github/@GITHUB_REPO_USER@/@GITHUB_REPO_NAME@@@GIT_REV_SHORT@",
"cpe": "cpe:2.3:a:mariadb:mariadb:@CPACK_PACKAGE_VERSION@:*:*:*:*:*:*"
},
"authors": [
{
"email": "info@mariadb.com",
"name": "MariaDB"
}
],
"properties": [
{
"name": "package_name",
"value": "@CPACK_PACKAGE_FILE_NAME@"
}
],
"timestamp": "@TIMESTAMP@"
},
"components": [
@sbom_components@
],
"dependencies": [
@sbom_dependencies@
]
}