From f9036aecd4ccc1886a303c0e1cff730aa74c0a32 Mon Sep 17 00:00:00 2001 From: Quentin Gliech Date: Wed, 13 Oct 2021 16:48:16 +0200 Subject: [PATCH] Make telemetry configurable Also allows opting-out of the OTLP exporter to remove the dependency to protoc when building. --- .dockerignore | 1 + Cargo.lock | 1 + Dockerfile | 4 +- crates/cli/Cargo.toml | 8 +++- crates/cli/src/main.rs | 34 ++++++++++--- crates/cli/src/telemetry.rs | 87 +++++++++++++++++++++++++++------- crates/config/src/lib.rs | 7 +++ crates/config/src/telemetry.rs | 79 ++++++++++++++++++++++++++++++ 8 files changed, 195 insertions(+), 26 deletions(-) create mode 100644 crates/config/src/telemetry.rs diff --git a/.dockerignore b/.dockerignore index 3ea08526..759985e5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,3 @@ target/ +crates/*/target .git/ diff --git a/Cargo.lock b/Cargo.lock index ddc410fc..ddbd99c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1407,6 +1407,7 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", + "url", "warp", ] diff --git a/Dockerfile b/Dockerfile index 11888c27..fc2ca413 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ ARG RUSTC_VERSION=1.55.0 # cargo-chef helps with caching dependencies between builds -FROM lukemathwalker/cargo-chef:latest-rust-${RUSTC_VERSION}-alpine AS chef -WORKDIR app +FROM lukemathwalker/cargo-chef:latest-rust-${RUSTC_VERSION} AS chef +WORKDIR /app FROM chef AS planner COPY . . diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 0c9ad4ba..02bf453c 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -21,12 +21,18 @@ serde_yaml = "0.8.21" warp = "0.3.1" argon2 = { version = "0.3.1", features = ["password-hash"] } opentelemetry = { version = "0.16.0", features = ["trace", "metrics", "rt-tokio"] } -opentelemetry-otlp = { version = "0.9.0", features = ["trace", "metrics"] } +opentelemetry-otlp = { version = "0.9.0", features = ["trace", "metrics"], optional = true } opentelemetry-semantic-conventions = "0.8.0" tracing-opentelemetry = "0.15.0" +url = "2.2.2" mas-config = { path = "../config" } mas-core = { path = "../core" } [dev-dependencies] indoc = "1.0.3" + +[features] +default = ["otlp"] +# Enable Opentelemetry OTLP exporter. Requires "protoc" +otlp = ["opentelemetry-otlp"] diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 1dcefff7..e43f70a7 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -22,8 +22,11 @@ use std::path::PathBuf; use anyhow::Context; use clap::Clap; -use mas_config::ConfigurationSection; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry}; +use mas_config::{ConfigurationSection, TelemetryConfig}; +use tracing_subscriber::{ + filter::LevelFilter, layer::SubscriberExt, reload, util::SubscriberInitExt, EnvFilter, Layer, + Registry, +}; use self::{ config::ConfigCommand, database::DatabaseCommand, manage::ManageCommand, server::ServerCommand, @@ -107,14 +110,17 @@ async fn try_main() -> anyhow::Result<()> { // Display the error if it is something other than the .env file not existing .or_else(|e| if e.not_found() { Ok(None) } else { Err(e) })?; - // Setup logging & tracing - let (tracer, _meter) = telemetry::setup()?; - let telemetry_layer = tracing_opentelemetry::layer().with_tracer(tracer); - + // Setup logging // This writes logs to stderr let fmt_layer = tracing_subscriber::fmt::layer().with_writer(std::io::stderr); let filter_layer = EnvFilter::try_from_default_env().or_else(|_| EnvFilter::try_new("info"))?; + // Don't fill the telemetry layer for now, we want to configure it based on the + // app config, so we need to delay that a bit + let (telemetry_layer, handle) = reload::Layer::new(None); + // We only want "INFO" level spans to go through OpenTelemetry + let telemetry_layer = telemetry_layer.with_filter(LevelFilter::INFO); + let subscriber = Registry::default() .with(telemetry_layer) .with(filter_layer) @@ -132,6 +138,22 @@ async fn try_main() -> anyhow::Result<()> { // Parse the CLI arguments let opts = RootCommand::parse(); + // Telemetry config could fail to load, but that's probably OK, since the whole + // config will be loaded afterwards, and crash if there is a problem. + // Falling back to default. + let telemetry_config: TelemetryConfig = opts.load_config().unwrap_or_default(); + + // Setup OpenTelemtry tracing and metrics + let tracer = telemetry::setup(&telemetry_config)?; + if let Some(tracer) = tracer { + // Now we can swap out the actual opentelemetry tracing layer + handle.reload( + tracing_opentelemetry::layer() + .with_tracer(tracer) + .with_tracked_inactivity(false), + )?; + } + // And run the command tracing::trace!(?opts, "Running command"); opts.run().await?; diff --git a/crates/cli/src/telemetry.rs b/crates/cli/src/telemetry.rs index 5e54381d..470973e6 100644 --- a/crates/cli/src/telemetry.rs +++ b/crates/cli/src/telemetry.rs @@ -15,29 +15,33 @@ use std::time::Duration; use futures::stream::{Stream, StreamExt}; +use mas_config::{MetricsConfig, TelemetryConfig, TracingConfig}; use opentelemetry::{ global, - sdk::{ - self, - metrics::{self, PushController}, - trace::{self, Tracer}, - Resource, - }, + sdk::{self, trace::Tracer, Resource}, }; use opentelemetry_semantic_conventions as semcov; -pub fn setup() -> anyhow::Result<(Tracer, PushController)> { +pub fn setup(config: &TelemetryConfig) -> anyhow::Result> { global::set_error_handler(|e| tracing::error!("{}", e))?; - Ok((tracer()?, meter()?)) + let tracer = tracer(&config.tracing)?; + meter(&config.metrics)?; + Ok(tracer) } pub fn shutdown() { global::shutdown_tracer_provider(); } -fn tracer() -> anyhow::Result { - let exporter = opentelemetry_otlp::new_exporter().tonic(); +#[cfg(feature = "otlp")] +fn otlp_tracer(endpoint: &Option) -> anyhow::Result { + use opentelemetry_otlp::WithExportConfig; + + let mut exporter = opentelemetry_otlp::new_exporter().tonic(); + if let Some(endpoint) = endpoint { + exporter = exporter.with_endpoint(endpoint.to_string()); + } let tracer = opentelemetry_otlp::new_pipeline() .tracing() @@ -48,25 +52,74 @@ fn tracer() -> anyhow::Result { Ok(tracer) } +#[cfg(not(feature = "otlp"))] +fn otlp_tracer(_endpoint: &Option) -> anyhow::Result { + anyhow::bail!("The service was compiled without OTLP exporter support, but config exports traces via OTLP.") +} + +fn stdout_tracer() -> Tracer { + sdk::export::trace::stdout::new_pipeline() + .with_pretty_print(true) + .with_trace_config(trace_config()) + .install_simple() +} + +fn tracer(config: &TracingConfig) -> anyhow::Result> { + let tracer = match config { + TracingConfig::None => return Ok(None), + TracingConfig::Stdout => stdout_tracer(), + TracingConfig::Otlp { endpoint } => otlp_tracer(endpoint)?, + }; + + Ok(Some(tracer)) +} + fn interval(duration: Duration) -> impl Stream { // Skip first immediate tick from tokio opentelemetry::util::tokio_interval_stream(duration).skip(1) } -fn meter() -> anyhow::Result { - let exporter = opentelemetry_otlp::new_exporter().tonic(); +#[cfg(feature = "otlp")] +fn otlp_meter(endpoint: &Option) -> anyhow::Result<()> { + use opentelemetry_otlp::WithExportConfig; - let meter = opentelemetry_otlp::new_pipeline() + let mut exporter = opentelemetry_otlp::new_exporter().tonic(); + if let Some(endpoint) = endpoint { + exporter = exporter.with_endpoint(endpoint.to_string()); + } + + opentelemetry_otlp::new_pipeline() .metrics(tokio::spawn, interval) .with_exporter(exporter) - .with_aggregator_selector(metrics::selectors::simple::Selector::Exact) + .with_aggregator_selector(sdk::metrics::selectors::simple::Selector::Exact) .build()?; - Ok(meter) + Ok(()) } -fn trace_config() -> trace::Config { - trace::config().with_resource(resource()) +#[cfg(not(feature = "otlp"))] +fn otlp_meter(_endpoint: &Option) -> anyhow::Result<()> { + anyhow::bail!("The service was compiled without OTLP exporter support, but config exports metrics via OTLP.") +} + +fn stdout_meter() { + sdk::export::metrics::stdout(tokio::spawn, interval) + .with_pretty_print(true) + .init(); +} + +fn meter(config: &MetricsConfig) -> anyhow::Result<()> { + match config { + MetricsConfig::None => {} + MetricsConfig::Stdout => stdout_meter(), + MetricsConfig::Otlp { endpoint } => otlp_meter(endpoint)?, + }; + + Ok(()) +} + +fn trace_config() -> sdk::trace::Config { + sdk::trace::config().with_resource(resource()) } fn resource() -> Resource { diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 8a6631be..0301850e 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -21,6 +21,7 @@ mod csrf; mod database; mod http; mod oauth2; +mod telemetry; mod templates; mod util; @@ -30,6 +31,7 @@ pub use self::{ database::DatabaseConfig, http::HttpConfig, oauth2::{Algorithm, KeySet, OAuth2ClientConfig, OAuth2Config}, + telemetry::{MetricsConfig, TelemetryConfig, TracingConfig}, templates::TemplatesConfig, util::ConfigurationSection, }; @@ -46,6 +48,9 @@ pub struct RootConfig { pub cookies: CookiesConfig, + #[serde(default)] + pub telemetry: TelemetryConfig, + #[serde(default)] pub templates: TemplatesConfig, @@ -65,6 +70,7 @@ impl ConfigurationSection<'_> for RootConfig { http: HttpConfig::generate().await?, database: DatabaseConfig::generate().await?, cookies: CookiesConfig::generate().await?, + telemetry: TelemetryConfig::generate().await?, templates: TemplatesConfig::generate().await?, csrf: CsrfConfig::generate().await?, }) @@ -76,6 +82,7 @@ impl ConfigurationSection<'_> for RootConfig { http: HttpConfig::test(), database: DatabaseConfig::test(), cookies: CookiesConfig::test(), + telemetry: TelemetryConfig::test(), templates: TemplatesConfig::test(), csrf: CsrfConfig::test(), } diff --git a/crates/config/src/telemetry.rs b/crates/config/src/telemetry.rs new file mode 100644 index 00000000..06ad8f01 --- /dev/null +++ b/crates/config/src/telemetry.rs @@ -0,0 +1,79 @@ +// Copyright 2021 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_with::skip_serializing_none; + +use super::ConfigurationSection; + +#[skip_serializing_none] +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "exporter", rename_all = "lowercase")] +pub enum TracingConfig { + None, + Stdout, + Otlp { + #[serde(default)] + endpoint: Option, + }, +} + +impl Default for TracingConfig { + fn default() -> Self { + Self::None + } +} + +#[skip_serializing_none] +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "exporter", rename_all = "lowercase")] +pub enum MetricsConfig { + None, + Stdout, + Otlp { + #[serde(default)] + endpoint: Option, + }, +} + +impl Default for MetricsConfig { + fn default() -> Self { + Self::None + } +} + +#[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)] +pub struct TelemetryConfig { + #[serde(default)] + pub tracing: TracingConfig, + #[serde(default)] + pub metrics: MetricsConfig, +} + +#[async_trait] +impl ConfigurationSection<'_> for TelemetryConfig { + fn path() -> &'static str { + "telemetry" + } + + async fn generate() -> anyhow::Result { + Ok(Default::default()) + } + + fn test() -> Self { + Default::default() + } +}