1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-08 14:22:09 +03:00
Files
mariadb-columnstore-engine/storage-manager/storagemanager.cnf.in
Aleksei Antipovskii 4aa281645e feat(SM): MCOL-5785 S3Storage improvements
Update libmarias3
fix build with the recent libmarias3

feat(SM): MCOL-5785 Add timeout options for S3Storage

    In some unfortunate situations StorageManager may get stuck on
    network operations. This commit adds the ability to set network
    timeouts which will help to ensure that the system is more
    responsive.

feat(SM): MCOL-5785 Add smps & smkill tools

    * `smps` shows all active S3 network operations
    * `smkill` terminates S3 network operations

    NB! At the moment smkill is able to terminate operations
    that are stuck on retries, but not hang inside the libcurl
    call. In other words if you want to terminate all operations
    you should configure `connect_timeout` & `timeout`

Install smkill & smps

Add install for new binaries
2024-08-21 20:45:38 +04:00

186 lines
8.1 KiB
Plaintext

# This is the configuration file for StorageManager (SM)
[ObjectStorage]
# 'service' is the module that SM will use for cloud IO.
# Current options are "LocalStorage" and "S3".
# "LocalStorage" will use a directory on the local filesystem as if it
# were cloud storage. "S3" is the module that uses real cloud storage.
# Both modules have their own sections below.
#
# Note, changing this after running postConfigure will leave you with an
# an inconsistent view of the data.
service = LocalStorage
# object_size is a tuneable value, but also implies a maximum capacity.
# Each file managed by StorageManager is broken into chunks of
# object_size bytes. Each chunk is stored in the cache as a file,
# so the filesystem the cache is put on needs to have enough inodes to
# support at least cache_size/object_size files.
#
# Regarding tuning, object stores do not support modifying stored data;
# entire objects must be replaced on modification, and entire
# objects are fetched on read. This results in read and write amplification.
#
# Network characteristics are important to consider. If the machine is
# in the cloud and has low-latency and high-bandwidth access to the object
# store, then the limiting factor may be the get/put rate imposed by your
# cloud provider. In that case, using a larger object size will reduce
# the number of get/put ops required to perform a task. If the machine has
# low-bandwidth access to the object store, a lower value will reduce
# the degree of read/write amplification, reducing the total amount of data
# to transfer.
#
# Of course, you will only really know how a complex system works by
# experimentation. If you experience poor performance using the default,
# our suggestion is to reduce it to 2M and try again, then increase it to
# 10M and try again.
#
# object_size should not be changed after you have run postConfigure.
# Things may fail that normally wouldn't. This is a temporary limitation which
# will be addressed in later versions.
object_size = 5M
# metadata_path is where SM will put its metadata. From the caller's
# perspective, each file will be represented by a metadata file in this
# path. A metadata file is a small json document enumerating the objects
# that compose the file.
metadata_path = @ENGINE_DATADIR@/storagemanager/metadata
# journal_path is where SM will store deltas to apply to objects.
# If an existing object is modified, that modification (aka delta) will
# be written to a journal file corresponding to that object. Periodically,
# those deltas will be merged with the object they apply to, resulting
# in a new object.
journal_path = @ENGINE_DATADIR@/storagemanager/journal
# max_concurrent_downloads is what is sounds like, per node.
# This is not a global setting.
max_concurrent_downloads = 21
# max_concurrent_uploads is what is sounds like, per node.
# This is not a global setting. Currently, a file is locked while
# modifications to it are synchronized with cloud storage. If your network
# has low upstream bandwidth, consider lowering this value to the minimum
# necessary to saturate your network. This will reduce the latency of certain
# operations and improve your experience.
max_concurrent_uploads = 21
# common_prefix_depth is the depth of the common prefix that all files
# managed by SM have. Ex: /var/lib/columnstore/data1, and
# /var/lib/columnstore/data2 differ at the 4th directory element,
# so they have a common prefix depth of 3. For Columnstore, it should
# be set to the number of directories that precede the data* directories.
# The default value of 3 works with package installations, where data* would be in
# /var/lib/columnstore
#
# This value is used to manage the ownership of prefixes between
# StorageManager instances that share a filesystem. For example,
# if you have SM data stored on a distributed filesystem for fault-tolerance
# reasons, and all nodes have it mounted, SM instances will be able to
# negotiate ownership of data from a failed instance.
common_prefix_depth = 3
[S3]
# These should be self-explanatory. Region can be blank or commented
# if using a private cloud storage system. Bucket has to be set to
# something though. Obviously, do not change these after running
# postConfigure, or SM will not be able to find your data.
region = some_region
bucket = some_bucket
# Specify the endpoint to connect to if using an S3 compatible object
# store like Google Cloud Storage or IBM's Cleversafe.
# The default endpoint if left unchanged is "s3.amazonaws.com"
# The default endpoint is only valid for amazon buckets located in
# region us-east-1. All other regions require setting this to region
# specific endpoints. Format for this is usually s3.[region].amazonaws.com
# endpoint = storage.googleapis.com
# For the best performance do not specify a prefix. It may be useful,
# however, if you must use a bucket with other data in it. Because
# of the way object stores route data and requests, keep the
# prefix as short as possible for performance reasons.
# prefix = cs/
# Put your HMAC access keys here. Keys can also be set through the
# environment vars AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.
# If set, SM will use these values and ignore the envvars.
# aws_access_key_id =
# aws_secret_access_key =
# If you want StorageManager to assume an IAM role to use for its S3
# accesses, specify the name of the role in iam_role_name. The name
# should be only the name, rather than the full path.
#
# The specified role must already exist and have permission to get, put,
# delete, and 'head' on the specified S3 bucket.
# iam_role_name =
# If an IAM role is specified, sts_endpoint and sts_region are used to specify
# which STS server & region to use to assume the role. The default for
# sts_endpoint is 'sts.amazonaws.com', and the default for sts_region is
# 'us-east-1'.
# sts_region =
# sts_endpoint =
# If running on AWS EC2 instance the value ec2_iam_mode can be set
# 'enabled' and allow StorageManager to detect IAM role assigned
# to EC2 instances. This will then use the the temporary credentials
# provided by EC2 metadata for S3 authentication access/secret keys.
# ec2_iam_mode=enabled
# Setting use_http to 'enabled' for host to use http instead of https
# The default is use_http = disabled (https)
# use_http = enabled
# Setting ssl_verify to 'disabled' for how to not use SSL verification
# Default is ssl_verify = enabled
# ssl_verify = disabled
# libs3_debug setting controls S3 library debugging printouts
# Default is libs3_debug = disabled
# libs3_debug = disabled
# Sets the maximum time in seconds for the connection phase to take. This
# timeout only limits the connection phase, it has no impact once the connection
# is established. The default value indicating that the default libcurl
# timeout (300 seconds?) will be used.
# connect_timeout = 5.5
# Sets the maximum time in seconds for the entire transfer operation to take.
# Default (no value) - no timeout at all.
# timeout = 7.5
# The LocalStorage section configures the 'local storage' module
# if specified by ObjectStorage/service.
[LocalStorage]
# path specifies where the module should store object data.
path = @ENGINE_DATADIR@/storagemanager/fake-cloud
# introduce latency to fake-cloud operations. Useful for debugging.
fake_latency = n
# max_latency specifies how much latency should be added to fake-cloud
# ops. Values are randomized between 1 and max_latency in microseconds.
max_latency = 50000
[Cache]
# cache_size can be specified in terms of tera-, giga-, mega-, kilo-
# bytes using T/t G/g M/m K/k. Drive manufacturers use a power-of-10
# notion of what that means, which means 1m = 1,000,000 bytes. These
# settings use the programmer's power-of-2 notion, which means
# 1m = 1,048,576 bytes.
#
# This number will include space used by journal files, but does not
# include space used by metadata files. In this version, journal data
# currently being written and downloads in progress are also not accounted
# for, so disk usage can temporarily go above this number. You will want to
# leave a little space available on the mount for those operations.
cache_size = 2g
# Cache/path is where cached objects get stored.
path = @ENGINE_DATADIR@/storagemanager/cache