diff --git a/Cargo.lock b/Cargo.lock index a3e44aaf..84097f1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3018,6 +3018,19 @@ dependencies = [ "writeable", ] +[[package]] +name = "mas-i18n-scan" +version = "0.2.0" +dependencies = [ + "camino", + "clap", + "mas-i18n", + "serde_json", + "tera", + "tracing", + "tracing-subscriber", +] + [[package]] name = "mas-iana" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 56e59e87..91ab57a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,15 @@ version = "0.4.31" default-features = false features = ["serde", "clock"] +# UTF-8 paths +[workspace.dependencies.camino] +version = "1.1.6" + +# CLI argument parsing +[workspace.dependencies.clap] +version = "4.4.4" +features = ["derive"] + # HTTP request/response [workspace.dependencies.http] version = "0.2.9" @@ -50,6 +59,11 @@ version = "0.1.37" [workspace.dependencies.tracing-subscriber] version = "0.3.17" +# Templates +[workspace.dependencies.tera] +version = "1.19.1" +default-features = false + # URL manipulation [workspace.dependencies.url] version = "2.4.1" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 973ff9a4..3760b8cb 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -10,8 +10,8 @@ repository.workspace = true [dependencies] anyhow.workspace = true axum = "0.6.20" -camino = "1.1.6" -clap = { version = "4.4.4", features = ["derive"] } +camino.workspace = true +clap.workspace = true dotenvy = "0.15.7" httpdate = "1.0.3" hyper = { version = "0.14.27", features = ["full"] } diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index 3fe7faa9..a1a42d51 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -15,7 +15,7 @@ async-trait = "0.1.73" thiserror.workspace = true anyhow.workspace = true -camino = { version = "1.1.6", features = ["serde1"] } +camino = { workspace = true, features = ["serde1"] } chrono.workspace = true figment = { version = "0.10.10", features = ["env", "yaml", "test"] } ipnetwork = { version = "0.20.0", features = ["serde", "schemars"] } diff --git a/crates/handlers/Cargo.toml b/crates/handlers/Cargo.toml index 9ed79e8e..5d9b8740 100644 --- a/crates/handlers/Cargo.toml +++ b/crates/handlers/Cargo.toml @@ -51,7 +51,7 @@ pbkdf2 = { version = "0.12.2", features = ["password-hash", "std", "simple", "pa zeroize = "1.6.0" # Various data types and utilities -camino = "1.1.6" +camino.workspace = true chrono.workspace = true psl = "2.1.4" time = "0.3.29" diff --git a/crates/i18n-scan/Cargo.toml b/crates/i18n-scan/Cargo.toml new file mode 100644 index 00000000..f710f711 --- /dev/null +++ b/crates/i18n-scan/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "mas-i18n-scan" +version.workspace = true +license.workspace = true +authors.workspace = true +edition.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] +camino.workspace = true +clap.workspace = true +tera.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +serde_json.workspace = true + +mas-i18n = { path = "../i18n" } \ No newline at end of file diff --git a/crates/i18n-scan/src/main.rs b/crates/i18n-scan/src/main.rs new file mode 100644 index 00000000..21a7ee4b --- /dev/null +++ b/crates/i18n-scan/src/main.rs @@ -0,0 +1,68 @@ +// Copyright 2023 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![deny(clippy::all)] +#![warn(clippy::pedantic)] + +use std::fs::File; + +use ::tera::Tera; +use camino::Utf8PathBuf; +use clap::Parser; +use mas_i18n::translations::TranslationTree; + +use crate::tera::{add_missing, find_keys}; + +mod tera; + +/// Scan a directory of templates for usage of the translation function and +/// output a translation tree. +#[derive(Parser)] +struct Options { + /// The directory containing the templates + templates: Utf8PathBuf, + + /// Path of the existing translation file + existing: Option, + + /// The name of the translation function + #[clap(long, default_value = "t")] + function: String, +} + +fn main() { + tracing_subscriber::fmt::init(); + + let options = Options::parse(); + let glob = format!("{base}/**/*.{{html,txt,subject}}", base = options.templates); + tracing::debug!("Scanning templates in {}", glob); + let tera = Tera::new(&glob).expect("Failed to load templates"); + + let keys = find_keys(&tera, &options.function).unwrap(); + + let mut tree = if let Some(path) = options.existing { + let mut file = File::open(path).expect("Failed to open existing translation file"); + serde_json::from_reader(&mut file).expect("Failed to parse existing translation file") + } else { + TranslationTree::default() + }; + + add_missing(&mut tree, &keys); + + serde_json::to_writer_pretty(std::io::stdout(), &tree) + .expect("Failed to write translation tree"); + + // Just to make sure we don't end up with a trailing newline + println!(); +} diff --git a/crates/i18n-scan/src/tera.rs b/crates/i18n-scan/src/tera.rs new file mode 100644 index 00000000..812c2c04 --- /dev/null +++ b/crates/i18n-scan/src/tera.rs @@ -0,0 +1,434 @@ +// Copyright 2023 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use mas_i18n::{translations::TranslationTree, Message}; +use tera::{ + ast::{Block, Expr, ExprVal, FunctionCall, MacroDefinition, Node}, + Error, Template, Tera, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum KeyKind { + Message, + Plural, +} + +pub struct Key { + kind: KeyKind, + key: String, +} + +impl Key { + fn default_value(&self) -> String { + match self.kind { + KeyKind::Message => self.key.clone(), + KeyKind::Plural => format!("%(count)d {}", self.key), + } + } +} + +pub fn add_missing(translation_tree: &mut TranslationTree, keys: &[Key]) { + for translatable in keys { + let message = Message::from_literal(translatable.default_value()); + let key = translatable + .key + .split('.') + .chain(if translatable.kind == KeyKind::Plural { + Some("other") + } else { + None + }); + + translation_tree.set_if_not_defined(key, message); + } +} + +/// Find all translatable strings in a Tera instance. +/// +/// This is not particularly efficient in terms of allocations, but as it is +/// only meant to be used in an utility, it should be fine. +/// +/// # Parameters +/// +/// * `tera` - The Tera instance to scan. +/// * `function_name` - The name of the translation function. Usually `t`. +/// +/// # Errors +/// +/// This function will return an error if it encounters an invalid template. +pub fn find_keys(tera: &Tera, function_name: &str) -> Result, tera::Error> { + let names = tera.get_template_names(); + let mut keys = Vec::new(); + + for name in names { + tracing::trace!("Scanning {}", name); + // This should never fail, but who knows. + let template = tera.get_template(name)?; + keys.extend(find_in_template(template, function_name)?); + } + + Ok(keys) +} + +fn find_in_template(template: &Template, function_name: &str) -> Result, tera::Error> { + let mut keys = Vec::new(); + + for node in &template.ast { + keys.extend(find_in_node(node, function_name)?); + } + + for block in template.blocks.values() { + keys.extend(find_in_block(block, function_name)?); + } + + for block_definition in template.blocks_definitions.values() { + for (_, block) in block_definition { + keys.extend(find_in_block(block, function_name)?); + } + } + + for macro_definition in template.macros.values() { + keys.extend(find_in_macro_definition(macro_definition, function_name)?); + } + + Ok(keys) +} + +fn find_in_block(block: &Block, function_name: &str) -> Result, tera::Error> { + let mut keys = Vec::new(); + + for node in &block.body { + keys.extend(find_in_node(node, function_name)?); + } + + Ok(keys) +} + +fn find_in_node(node: &Node, function_name: &str) -> Result, tera::Error> { + let mut keys = Vec::new(); + + match node { + Node::VariableBlock(_, expr) => keys.extend(find_in_expr(expr, function_name)?), + + Node::MacroDefinition(_, definition, _) => { + keys.extend(find_in_macro_definition(definition, function_name)?); + } + + Node::Set(_, set) => keys.extend(find_in_expr(&set.value, function_name)?), + + Node::FilterSection(_, filter_section, _) => { + keys.extend(find_in_function_call( + &filter_section.filter, + function_name, + )?); + + for node in &filter_section.body { + keys.extend(find_in_node(node, function_name)?); + } + } + + Node::Block(_, block, _) => keys.extend(find_in_block(block, function_name)?), + + Node::Forloop(_, for_loop, _) => { + keys.extend(find_in_expr(&for_loop.container, function_name)?); + + for node in &for_loop.body { + keys.extend(find_in_node(node, function_name)?); + } + + if let Some(empty_body) = &for_loop.empty_body { + for node in empty_body { + keys.extend(find_in_node(node, function_name)?); + } + } + } + Node::If(if_block, _) => { + for (_ws, condition, expr) in &if_block.conditions { + keys.extend(find_in_expr(condition, function_name)?); + + for node in expr { + keys.extend(find_in_node(node, function_name)?); + } + } + + if let Some((_ws, expr)) = &if_block.otherwise { + for node in expr { + keys.extend(find_in_node(node, function_name)?); + } + } + } + + Node::Super + | Node::Text(_) + | Node::Extends(_, _) + | Node::Include(_, _, _) + | Node::ImportMacro(_, _, _) + | Node::Raw(_, _, _) + | Node::Break(_) + | Node::Continue(_) + | Node::Comment(_, _) => {} + }; + + Ok(keys) +} + +fn find_in_macro_definition( + definition: &MacroDefinition, + function_name: &str, +) -> Result, Error> { + let mut keys = Vec::new(); + + // Walk through argument defaults + for expr in definition.args.values().flatten() { + keys.extend(find_in_expr(expr, function_name)?); + } + + // Walk through the macro body + for node in &definition.body { + keys.extend(find_in_node(node, function_name)?); + } + + Ok(keys) +} + +fn find_in_expr_val(expr_val: &ExprVal, function_name: &str) -> Result, tera::Error> { + let mut keys = Vec::new(); + + match expr_val { + ExprVal::String(_) + | ExprVal::Int(_) + | ExprVal::Float(_) + | ExprVal::Bool(_) + | ExprVal::Ident(_) => {} + + ExprVal::Math(math_expr) => { + keys.extend(find_in_expr(&math_expr.lhs, function_name)?); + keys.extend(find_in_expr(&math_expr.rhs, function_name)?); + } + + ExprVal::Logic(logic_expr) => { + keys.extend(find_in_expr(&logic_expr.lhs, function_name)?); + keys.extend(find_in_expr(&logic_expr.rhs, function_name)?); + } + + ExprVal::Test(test_expr) => { + for arg in &test_expr.args { + keys.extend(find_in_expr(arg, function_name)?); + } + } + + ExprVal::MacroCall(macro_call) => { + for arg in macro_call.args.values() { + keys.extend(find_in_expr(arg, function_name)?); + } + } + + ExprVal::FunctionCall(function_call) => { + keys.extend(find_in_function_call(function_call, function_name)?); + } + + ExprVal::Array(array) => { + for expr in array { + keys.extend(find_in_expr(expr, function_name)?); + } + } + + ExprVal::StringConcat(string_concat) => { + for value in &string_concat.values { + keys.extend(find_in_expr_val(value, function_name)?); + } + } + + ExprVal::In(in_expr) => { + keys.extend(find_in_expr(&in_expr.lhs, function_name)?); + keys.extend(find_in_expr(&in_expr.rhs, function_name)?); + } + } + + Ok(keys) +} + +fn find_in_expr(expr: &Expr, function_name: &str) -> Result, tera::Error> { + let mut keys = Vec::new(); + + keys.extend(find_in_expr_val(&expr.val, function_name)?); + + for filter in &expr.filters { + keys.extend(find_in_function_call(filter, function_name)?); + } + + Ok(keys) +} + +fn find_in_function_call( + function_call: &FunctionCall, + function_name: &str, +) -> Result, tera::Error> { + tracing::trace!("Checking function call: {:?}", function_call); + let mut keys = Vec::new(); + + // Regardless of if it is the function we are looking for, we still need to + // check the arguments + for expr in function_call.args.values() { + keys.extend(find_in_expr(expr, function_name)?); + } + + // If it is the function we are looking for, we need to extract the key + if function_call.name == function_name { + let key = function_call + .args + .get("key") + .ok_or(tera::Error::msg("Missing key argument"))?; + if !key.filters.is_empty() { + return Err(tera::Error::msg("Key argument must not have filters")); + } + + if key.negated { + return Err(tera::Error::msg("Key argument must not be negated")); + } + + let key = match &key.val { + tera::ast::ExprVal::String(s) => s.clone(), + _ => return Err(tera::Error::msg("Key argument must be a string")), + }; + + let kind = if function_call.args.contains_key("count") { + KeyKind::Plural + } else { + KeyKind::Message + }; + + keys.push(Key { kind, key }); + } + + Ok(keys) +} + +#[cfg(test)] +mod tests { + use tera::Tera; + + use super::*; + + #[test] + fn test_find_keys() { + let mut tera = Tera::default(); + tera.add_raw_templates([ + ("hello.txt", r#"Hello {{ t(key="world") }}"#), + ("existing.txt", r#"{{ t(key="hello") }}"#), + ("plural.txt", r#"{{ t(key="plural", count=4) }}"#), + // Kitchen sink to make sure we're going through the whole AST + ( + "macros.txt", + r#" + {% macro test(arg="foo") %} + {% if function() == foo is test(t(key="nested.1")) %} + {% set foo = t(key="nested.2", arg=5 + 2) ~ "foo" in test %} + {{ foo | bar }} + {% else %} + {% for i in [t(key="nested.3", extra=t(key="nested.4")), "foo"] %} + {{ i | foo }} + {% else %} + {{ t(key="nested.5") }} + {% endfor %} + {% endif %} + {% endmacro %} + "#, + ), + ( + "nested.txt", + r#" + {% import "macros.txt" as macros %} + {% block test %} + {% filter upper %} + {{ macros::test(arg=t(key="nested.6")) }} + {% endfilter %} + {% endblock test %} + "#, + ), + ]) + .unwrap(); + + let mut tree = serde_json::from_value(serde_json::json!({ + "hello": "Hello!", + })) + .unwrap(); + + let keys = find_keys(&tera, "t").unwrap(); + add_missing(&mut tree, &keys); + let tree = serde_json::to_value(&tree).unwrap(); + assert_eq!( + tree, + serde_json::json!({ + "hello": "Hello!", + "world": "world", + "plural": { + "other": "%(count)d plural" + }, + "nested": { + "1": "nested.1", + "2": "nested.2", + "3": "nested.3", + "4": "nested.4", + "5": "nested.5", + "6": "nested.6", + }, + }) + ); + } + + #[test] + fn test_invalid_key_not_string() { + let mut tera = Tera::default(); + // This is invalid because the key is not a string + tera.add_raw_template("invalid.txt", r#"{{ t(key=5) }}"#) + .unwrap(); + + let keys = find_keys(&tera, "t"); + assert!(keys.is_err()); + } + + #[test] + fn test_invalid_key_filtered() { + let mut tera = Tera::default(); + // This is invalid because the key argument has a filter + tera.add_raw_template("invalid.txt", r#"{{ t(key="foo" | bar) }}"#) + .unwrap(); + + let keys = find_keys(&tera, "t"); + assert!(keys.is_err()); + } + + #[test] + fn test_invalid_key_missing() { + let mut tera = Tera::default(); + // This is invalid because the key argument is missing + tera.add_raw_template("invalid.txt", r#"{{ t() }}"#) + .unwrap(); + + let keys = find_keys(&tera, "t"); + assert!(keys.is_err()); + } + + #[test] + fn test_invalid_key_negated() { + let mut tera = Tera::default(); + // This is invalid because the key argument is missing + tera.add_raw_template("invalid.txt", r#"{{ t(key=not "foo") }}"#) + .unwrap(); + + let keys = find_keys(&tera, "t"); + assert!(keys.is_err()); + } +} diff --git a/crates/i18n/Cargo.toml b/crates/i18n/Cargo.toml index f7da32f3..e2c292c8 100644 --- a/crates/i18n/Cargo.toml +++ b/crates/i18n/Cargo.toml @@ -8,7 +8,7 @@ homepage.workspace = true repository.workspace = true [dependencies] -camino = "1.1.6" +camino.workspace = true icu_list = { version = "1.3.0", features = ["compiled_data", "std"] } icu_locid = { version = "1.3.0", features = ["std"] } icu_locid_transform = { version = "1.3.0", features = ["compiled_data", "std"] } diff --git a/crates/i18n/src/sprintf/message.rs b/crates/i18n/src/sprintf/message.rs index 2ef09c3c..b4e3d7d8 100644 --- a/crates/i18n/src/sprintf/message.rs +++ b/crates/i18n/src/sprintf/message.rs @@ -237,6 +237,14 @@ impl Message { pub(crate) fn parts(&self) -> std::slice::Iter<'_, Part> { self.parts.iter() } + + /// Create a message from a literal string, without any placeholders. + #[must_use] + pub fn from_literal(literal: String) -> Message { + Message { + parts: vec![Part::Text(literal)], + } + } } impl Serialize for Message { diff --git a/crates/i18n/src/translations.rs b/crates/i18n/src/translations.rs index 1dbdd180..111c7d22 100644 --- a/crates/i18n/src/translations.rs +++ b/crates/i18n/src/translations.rs @@ -37,6 +37,12 @@ pub enum TranslationTree { Children(BTreeMap), } +impl Default for TranslationTree { + fn default() -> Self { + Self::Children(BTreeMap::new()) + } +} + impl TranslationTree { /// Get a message from the tree by key. /// @@ -74,6 +80,33 @@ impl TranslationTree { } } + #[doc(hidden)] + pub fn set_if_not_defined, I: IntoIterator>( + &mut self, + path: I, + value: Message, + ) { + let mut path = path.into_iter(); + let Some(next) = path.next() else { + if let TranslationTree::Message(_) = self { + return; + } + + *self = TranslationTree::Message(value); + return; + }; + + match self { + TranslationTree::Message(_) => panic!("cannot set a value on a message node"), + TranslationTree::Children(children) => { + children + .entry(next.deref().to_owned()) + .or_default() + .set_if_not_defined(path, value); + } + } + } + fn walk_path, I: IntoIterator>( &self, path: I, diff --git a/crates/iana-codegen/Cargo.toml b/crates/iana-codegen/Cargo.toml index efb7b4a4..c7deb721 100644 --- a/crates/iana-codegen/Cargo.toml +++ b/crates/iana-codegen/Cargo.toml @@ -10,7 +10,7 @@ repository.workspace = true [dependencies] anyhow.workspace = true async-trait = "0.1.73" -camino = "1.1.6" +camino.workspace = true convert_case = "0.6.0" csv = "1.2.2" futures-util = "0.3.28" diff --git a/crates/spa/Cargo.toml b/crates/spa/Cargo.toml index 710c3990..5f123b26 100644 --- a/crates/spa/Cargo.toml +++ b/crates/spa/Cargo.toml @@ -10,5 +10,5 @@ repository.workspace = true [dependencies] serde.workspace = true thiserror.workspace = true -camino = { version = "1.1.6", features = ["serde1"] } +camino = { workspace = true, features = ["serde1"] } diff --git a/crates/templates/Cargo.toml b/crates/templates/Cargo.toml index a026f6d5..6162be77 100644 --- a/crates/templates/Cargo.toml +++ b/crates/templates/Cargo.toml @@ -14,12 +14,12 @@ tokio = { version = "1.32.0", features = ["macros", "rt", "fs"] } anyhow.workspace = true thiserror.workspace = true -tera = { version = "1.19.1", default-features = false } +tera.workspace = true serde.workspace = true serde_json.workspace = true serde_urlencoded = "0.7.1" -camino = "1.1.6" +camino.workspace = true chrono.workspace = true url.workspace = true http.workspace = true