MCOL-5175 Increase the maximum effective length of S3 secret used as SHA256 key producing S3 signature (#2860)

Co-authored-by: Roman Nozdrin <rnozdrin@mariadb.com>
2025-07-30 19:23:07 +03:00 · 2023-06-07 13:24:42 +01:00
parent 727170dc66
commit 23a969dbe2
3 changed files with 55 additions and 45 deletions
--- a/storage-manager/src/S3Storage.cpp
+++ b/storage-manager/src/S3Storage.cpp
@ -28,14 +28,14 @@
 #include <boost/uuid/random_generator.hpp>
 #define BOOST_SPIRIT_THREADSAFE
 #ifndef __clang__
-  #pragma GCC diagnostic push
-  #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif

 #include <boost/property_tree/ptree.hpp>

 #ifndef __clang__
-  #pragma GCC diagnostic pop
+#pragma GCC diagnostic pop
 #endif
 #include <boost/property_tree/json_parser.hpp>
 #include "Utilities.h"
@ -61,8 +61,7 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use
 inline bool retryable_error(uint8_t s3err)
 {
  return (s3err == MS3_ERR_RESPONSE_PARSE || s3err == MS3_ERR_REQUEST_ERROR || s3err == MS3_ERR_OOM ||
-          s3err == MS3_ERR_IMPOSSIBLE || s3err == MS3_ERR_SERVER ||
-          s3err == MS3_ERR_AUTH_ROLE);
+          s3err == MS3_ERR_IMPOSSIBLE || s3err == MS3_ERR_SERVER || s3err == MS3_ERR_AUTH_ROLE);
 }

 // Best effort to map the errors returned by the ms3 API to linux errnos
@ -76,7 +75,7 @@ const int s3err_to_errno[] = {
    EBADMSG,       // 4 MS3_ERR_RESPONSE_PARSE
    ECOMM,         // 5 MS3_ERR_REQUEST_ERROR
    ENOMEM,        // 6 MS3_ERR_OOM
-    EINVAL,        // 7 MS3_ERR_IMPOSSIBLE.  Will have to look through the code to find out what this is exactly.
+    EINVAL,  // 7 MS3_ERR_IMPOSSIBLE.  Will have to look through the code to find out what this is exactly.
    EKEYREJECTED,  // 8 MS3_ERR_AUTH
    ENOENT,        // 9 MS3_ERR_NOT_FOUND
    EPROTO,        // 10 MS3_ERR_SERVER
@ -131,6 +130,7 @@ S3Storage::S3Storage(bool skipRetry) : skipRetryableErrors(skipRetry)
  string use_http = tolower(config->getValue("S3", "use_http"));
  string ssl_verify = tolower(config->getValue("S3", "ssl_verify"));
  string port_number = config->getValue("S3", "port_number");
+  string libs3_debug = config->getValue("S3", "libs3_debug");

  bool keyMissing = false;
  isEC2Instance = false;
@ -212,7 +212,10 @@ S3Storage::S3Storage(bool skipRetry) : skipRetryableErrors(skipRetry)
  endpoint = config->getValue("S3", "endpoint");

  ms3_library_init();
-  // ms3_debug();
+  if (libs3_debug == "enabled")
+  {
+    ms3_debug();
+  }
  testConnectivityAndPerms();
 }

@ -308,7 +311,8 @@ void S3Storage::testConnectivityAndPerms()
  err = exists(testObjKey, &_exists);
  if (err)
  {
-    logger->log(LOG_CRIT, "S3Storage::exists() failed on nonexistent object. Check 'ListBucket' permissions.");
+    logger->log(LOG_CRIT,
+                "S3Storage::exists() failed on nonexistent object. Check 'ListBucket' permissions.");
    FAIL(HEAD)
  }
  logger->log(LOG_INFO, "S3Storage: S3 connectivity & permissions are OK");
@ -518,8 +522,10 @@ int S3Storage::putObject(const std::shared_ptr<uint8_t[]> data, size_t len, cons
                  s3err_msgs[s3err], bucket.c_str(), destKey.c_str());
    errno = s3err_to_errno[s3err];
    if (s3err == MS3_ERR_ENDPOINT)
-      logger->log(LOG_ERR, "S3Storage::putObject(): Bucket location not match provided endpoint:, bucket = %s, endpoint = %s.",
-                  bucket.c_str(), endpoint.c_str());
+      logger->log(
+          LOG_ERR,
+          "S3Storage::putObject(): Bucket location not match provided endpoint:, bucket = %s, endpoint = %s.",
+          bucket.c_str(), endpoint.c_str());
    return -1;
  }
  return 0;
--- a/storage-manager/storagemanager.cnf.in
+++ b/storage-manager/storagemanager.cnf.in
@ -1,9 +1,9 @@
 # This is the configuration file for StorageManager (SM)

 [ObjectStorage]
-# 'service' is the module that SM will use for cloud IO.  
+# 'service' is the module that SM will use for cloud IO.
 # Current options are "LocalStorage" and "S3".
-# "LocalStorage" will use a directory on the local filesystem as if it 
+# "LocalStorage" will use a directory on the local filesystem as if it
 # were cloud storage.  "S3" is the module that uses real cloud storage.
 # Both modules have their own sections below.
 #
@ -20,19 +20,19 @@ service = LocalStorage
 # Regarding tuning, object stores do not support modifying stored data;
 # entire objects must be replaced on modification, and entire
 # objects are fetched on read.  This results in read and write amplification.
-# 
+#
 # Network characteristics are important to consider.  If the machine is
-# in the cloud and has low-latency and high-bandwidth access to the object 
-# store, then the limiting factor may be the get/put rate imposed by your 
+# in the cloud and has low-latency and high-bandwidth access to the object
+# store, then the limiting factor may be the get/put rate imposed by your
 # cloud provider.  In that case, using a larger object size will reduce
-# the number of get/put ops required to perform a task.  If the machine has 
-# low-bandwidth access to the object store, a lower value will reduce 
-# the degree of read/write amplification, reducing the total amount of data 
+# the number of get/put ops required to perform a task.  If the machine has
+# low-bandwidth access to the object store, a lower value will reduce
+# the degree of read/write amplification, reducing the total amount of data
 # to transfer.
 #
-# Of course, you will only really know how a complex system works by 
+# Of course, you will only really know how a complex system works by
 # experimentation.  If you experience poor performance using the default,
-# our suggestion is to reduce it to 2M and try again, then increase it to 
+# our suggestion is to reduce it to 2M and try again, then increase it to
 # 10M and try again.
 #
 # object_size should not be changed after you have run postConfigure.
@ -56,33 +56,33 @@ journal_path = @ENGINE_DATADIR@/storagemanager/journal
 # max_concurrent_downloads is what is sounds like, per node.
 # This is not a global setting.
 max_concurrent_downloads = 21
- 
+
 # max_concurrent_uploads is what is sounds like, per node.
-# This is not a global setting.  Currently, a file is locked while 
+# This is not a global setting.  Currently, a file is locked while
 # modifications to it are synchronized with cloud storage.  If your network
 # has low upstream bandwidth, consider lowering this value to the minimum
-# necessary to saturate your network.  This will reduce the latency of certain 
-# operations and improve your experience. 
+# necessary to saturate your network.  This will reduce the latency of certain
+# operations and improve your experience.
 max_concurrent_uploads = 21

-# common_prefix_depth is the depth of the common prefix that all files 
-# managed by SM have.  Ex: /var/lib/columnstore/data1, and 
+# common_prefix_depth is the depth of the common prefix that all files
+# managed by SM have.  Ex: /var/lib/columnstore/data1, and
 # /var/lib/columnstore/data2 differ at the 4th directory element,
-# so they have a common prefix depth of 3.  For Columnstore, it should 
+# so they have a common prefix depth of 3.  For Columnstore, it should
 # be set to the number of directories that precede the data* directories.
-# The default value of 3 works with package installations, where data* would be in 
+# The default value of 3 works with package installations, where data* would be in
 # /var/lib/columnstore
 #
 # This value is used to manage the ownership of prefixes between
 # StorageManager instances that share a filesystem.  For example,
 # if you have SM data stored on a distributed filesystem for fault-tolerance
-# reasons, and all nodes have it mounted, SM instances will be able to 
-# negotiate ownership of data from a failed instance. 
+# reasons, and all nodes have it mounted, SM instances will be able to
+# negotiate ownership of data from a failed instance.
 common_prefix_depth = 3

 [S3]
 # These should be self-explanatory.  Region can be blank or commented
-# if using a private cloud storage system.  Bucket has to be set to 
+# if using a private cloud storage system.  Bucket has to be set to
 # something though.  Obviously, do not change these after running
 # postConfigure, or SM will not be able to find your data.
 region = some_region
@ -102,29 +102,29 @@ bucket = some_bucket
 # prefix as short as possible for performance reasons.
 # prefix = cs/

-# Put your HMAC access keys here.  Keys can also be set through the 
+# Put your HMAC access keys here.  Keys can also be set through the
 # environment vars AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.
 # If set, SM will use these values and ignore the envvars.
 # aws_access_key_id =
-# aws_secret_access_key = 
+# aws_secret_access_key =

 # If you want StorageManager to assume an IAM role to use for its S3
 # accesses, specify the name of the role in iam_role_name.  The name
 # should be only the name, rather than the full path.
 #
-# The specified role must already exist and have permission to get, put, 
+# The specified role must already exist and have permission to get, put,
 # delete, and 'head' on the specified S3 bucket.
-# iam_role_name = 
+# iam_role_name =

 # If an IAM role is specified, sts_endpoint and sts_region are used to specify
 # which STS server & region to use to assume the role.  The default for
 # sts_endpoint is 'sts.amazonaws.com', and the default for sts_region is
 # 'us-east-1'.
-# sts_region = 
+# sts_region =
 # sts_endpoint =

 # If running on AWS EC2 instance the value ec2_iam_mode can be set
-# 'enabled' and allow StorageManager to detect IAM role assigned 
+# 'enabled' and allow StorageManager to detect IAM role assigned
 # to EC2 instances. This will then use the the temporary credentials
 # provided by EC2 metadata for S3 authentication access/secret keys.
 # ec2_iam_mode=enabled
@ -137,6 +137,10 @@ bucket = some_bucket
 # Default is ssl_verify = enabled
 # ssl_verify = disabled

+# libs3_debug setting controls S3 library debugging printouts
+# Default is libs3_debug = disabled
+# libs3_debug = disabled
+
 # The LocalStorage section configures the 'local storage' module
 # if specified by ObjectStorage/service.
 [LocalStorage]
@ -154,15 +158,15 @@ max_latency = 50000
 [Cache]

 # cache_size can be specified in terms of tera-, giga-, mega-, kilo-
-# bytes using T/t G/g M/m K/k.  Drive manufacturers use a power-of-10 
-# notion of what that means, which means 1m = 1,000,000 bytes.  These 
-# settings use the programmer's power-of-2 notion, which means 
-# 1m = 1,048,576 bytes. 
+# bytes using T/t G/g M/m K/k.  Drive manufacturers use a power-of-10
+# notion of what that means, which means 1m = 1,000,000 bytes.  These
+# settings use the programmer's power-of-2 notion, which means
+# 1m = 1,048,576 bytes.
 #
 # This number will include space used by journal files, but does not
-# include space used by metadata files.  In this version, journal data 
-# currently being written and downloads in progress are also not accounted 
-# for, so disk usage can temporarily go above this number.  You will want to 
+# include space used by metadata files.  In this version, journal data
+# currently being written and downloads in progress are also not accounted
+# for, so disk usage can temporarily go above this number.  You will want to
 # leave a little space available on the mount for those operations.
 cache_size = 2g