diff --git a/Cargo.lock b/Cargo.lock index 01190ac2..fc475e3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4755,9 +4755,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libloading" @@ -5743,6 +5743,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pastey" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d6c094ee800037dff99e02cab0eaf3142826586742a270ab3d7a62656bd27a" + [[package]] name = "path-absolutize" version = "3.1.1" @@ -6733,15 +6739,15 @@ dependencies = [ [[package]] name = "rmcp" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaa07b85b779d1e1df52dd79f6c6bffbe005b191f07290136cc42a142da3409a" +checksum = "38b18323edc657390a6ed4d7a9110b0dec2dc3ed128eb2a123edfbafabdbddc5" dependencies = [ "async-trait", "base64 0.22.1", "chrono", "futures", - "paste", + "pastey", "pin-project-lite", "rmcp-macros", "schemars 1.1.0", @@ -6755,9 +6761,9 @@ dependencies = [ [[package]] name = "rmcp-macros" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f6fa09933cac0d0204c8a5d647f558425538ed6a0134b1ebb1ae4dc00c96db3" +checksum = "c75d0a62676bf8c8003c4e3c348e2ceb6a7b3e48323681aaf177fdccdac2ce50" dependencies = [ "darling 0.21.3", "proc-macro2", @@ -7032,9 +7038,11 @@ dependencies = [ "rand 0.10.0-rc.5", "reqwest", "rustfs-common", + "rustfs-config", "rustfs-ecstore", "rustfs-filemeta", "rustfs-madmin", + "rustfs-utils", "s3s", "serde", "serde_json", @@ -9563,23 +9571,23 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", "rand 0.9.2", - "serde", + "serde_core", "uuid-macro-internal", "wasm-bindgen", ] [[package]] name = "uuid-macro-internal" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9384a660318abfbd7f8932c34d67e4d1ec511095f95972ddc01e19d7ba8413f" +checksum = "39d11901c36b3650df7acb0f9ebe624f35b5ac4e1922ecd3c57f444648429594" dependencies = [ "proc-macro2", "quote", @@ -10265,18 +10273,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.30" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.30" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index ae11f9fc..01a089eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,7 +129,7 @@ flatbuffers = "25.9.23" form_urlencoded = "1.2.2" prost = "0.14.1" quick-xml = "0.38.4" -rmcp = { version = "0.9.1" } +rmcp = { version = "0.10.0" } rmp = { version = "0.8.14" } rmp-serde = { version = "1.3.0" } serde = { version = "1.0.228", features = ["derive"] } @@ -193,7 +193,7 @@ hex-simd = "0.8.0" highway = { version = "1.3.0" } ipnetwork = { version = "0.21.1", features = ["serde"] } lazy_static = "1.5.0" -libc = "0.2.177" +libc = "0.2.178" libsystemd = "0.7.2" local-ip-address = "0.6.5" lz4 = "1.28.1" @@ -244,7 +244,7 @@ tracing-subscriber = { version = "0.3.22", features = ["env-filter", "time"] } transform-stream = "0.3.1" url = "2.5.7" urlencoding = "2.1.3" -uuid = { version = "1.18.1", features = ["v4", "fast-rng", "macro-diagnostics"] } +uuid = { version = "1.19.0", features = ["v4", "fast-rng", "macro-diagnostics"] } vaultrs = { version = "0.7.4" } walkdir = "2.5.0" wildmatch = { version = "2.6.1", features = ["serde"] } diff --git a/crates/ahm/Cargo.toml b/crates/ahm/Cargo.toml index 1b6eb36e..1eadff89 100644 --- a/crates/ahm/Cargo.toml +++ b/crates/ahm/Cargo.toml @@ -13,10 +13,12 @@ keywords = ["RustFS", "AHM", "health-management", "scanner", "Minio"] categories = ["web-programming", "development-tools", "filesystem"] [dependencies] +rustfs-config = { workspace = true } rustfs-ecstore = { workspace = true } rustfs-common = { workspace = true } rustfs-filemeta = { workspace = true } rustfs-madmin = { workspace = true } +rustfs-utils = { workspace = true } tokio = { workspace = true, features = ["full"] } tokio-util = { workspace = true } tracing = { workspace = true } diff --git a/crates/ahm/src/heal/manager.rs b/crates/ahm/src/heal/manager.rs index 95556386..c2717ef2 100644 --- a/crates/ahm/src/heal/manager.rs +++ b/crates/ahm/src/heal/manager.rs @@ -195,12 +195,28 @@ pub struct HealConfig { impl Default for HealConfig { fn default() -> Self { + let queue_size: usize = + rustfs_utils::get_env_usize(rustfs_config::ENV_HEAL_QUEUE_SIZE, rustfs_config::DEFAULT_HEAL_QUEUE_SIZE); + let heal_interval = Duration::from_secs(rustfs_utils::get_env_u64( + rustfs_config::ENV_HEAL_INTERVAL_SECS, + rustfs_config::DEFAULT_HEAL_INTERVAL_SECS, + )); + let enable_auto_heal = + rustfs_utils::get_env_bool(rustfs_config::ENV_HEAL_AUTO_HEAL_ENABLE, rustfs_config::DEFAULT_HEAL_AUTO_HEAL_ENABLE); + let task_timeout = Duration::from_secs(rustfs_utils::get_env_u64( + rustfs_config::ENV_HEAL_TASK_TIMEOUT_SECS, + rustfs_config::DEFAULT_HEAL_TASK_TIMEOUT_SECS, + )); + let max_concurrent_heals = rustfs_utils::get_env_usize( + rustfs_config::ENV_HEAL_MAX_CONCURRENT_HEALS, + rustfs_config::DEFAULT_HEAL_MAX_CONCURRENT_HEALS, + ); Self { - enable_auto_heal: true, - heal_interval: Duration::from_secs(10), // 10 seconds - max_concurrent_heals: 4, - task_timeout: Duration::from_secs(300), // 5 minutes - queue_size: 1000, + enable_auto_heal, + heal_interval, // 10 seconds + max_concurrent_heals, // max 4, + task_timeout, // 5 minutes + queue_size, } } } diff --git a/crates/config/src/constants/heal.rs b/crates/config/src/constants/heal.rs new file mode 100644 index 00000000..728806be --- /dev/null +++ b/crates/config/src/constants/heal.rs @@ -0,0 +1,88 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Environment variable name that enables or disables auto-heal functionality. +/// - Purpose: Control whether the system automatically performs heal operations. +/// - Valid values: "true" or "false" (case insensitive). +/// - Semantics: When set to "true", auto-heal is enabled and the system will automatically attempt to heal detected issues; when set to "false", auto-heal is disabled and healing must be triggered manually. +/// - Example: `export RUSTFS_HEAL_AUTO_HEAL_ENABLE=true` +/// - Note: Enabling auto-heal can improve system resilience by automatically addressing issues, but may increase resource usage; evaluate based on your operational requirements. +pub const ENV_HEAL_AUTO_HEAL_ENABLE: &str = "RUSTFS_HEAL_AUTO_HEAL_ENABLE"; + +/// Environment variable name that specifies the heal queue size. +/// +/// - Purpose: Set the maximum number of heal requests that can be queued. +/// - Unit: number of requests (usize). +/// - Valid values: any positive integer. +/// - Semantics: When the heal queue reaches this size, new heal requests may be rejected or blocked until space is available; tune according to expected heal workload and system capacity. +/// - Example: `export RUSTFS_HEAL_QUEUE_SIZE=10000` +/// - Note: A larger queue size can accommodate bursts of heal requests but may increase memory usage. +pub const ENV_HEAL_QUEUE_SIZE: &str = "RUSTFS_HEAL_QUEUE_SIZE"; +/// Environment variable name that specifies the heal interval in seconds. +/// - Purpose: Define the time interval between successive heal operations. +/// - Unit: seconds (u64). +/// - Valid values: any positive integer. +/// - Semantics: This interval controls how frequently the heal manager checks for and processes heal requests; shorter intervals lead to more responsive healing but may increase system load. +/// - Example: `export RUSTFS_HEAL_INTERVAL_SECS=10` +/// - Note: Choose an interval that balances healing responsiveness with overall system performance. +pub const ENV_HEAL_INTERVAL_SECS: &str = "RUSTFS_HEAL_INTERVAL_SECS"; + +/// Environment variable name that specifies the heal task timeout in seconds. +/// - Purpose: Set the maximum duration allowed for a heal task to complete. +/// - Unit: seconds (u64). +/// - Valid values: any positive integer. +/// - Semantics: If a heal task exceeds this timeout, it may be aborted or retried; tune according to the expected duration of heal operations and system performance characteristics. +/// - Example: `export RUSTFS_HEAL_TASK_TIMEOUT_SECS=300` +/// - Note: Setting an appropriate timeout helps prevent long-running heal tasks from impacting system stability. +pub const ENV_HEAL_TASK_TIMEOUT_SECS: &str = "RUSTFS_HEAL_TASK_TIMEOUT_SECS"; + +/// Environment variable name that specifies the maximum number of concurrent heal operations. +/// - Purpose: Limit the number of heal operations that can run simultaneously. +/// - Unit: number of operations (usize). +/// - Valid values: any positive integer. +/// - Semantics: This limit helps control resource usage during healing; tune according to system capacity and expected heal workload. +/// - Example: `export RUSTFS_HEAL_MAX_CONCURRENT_HEALS=4` +/// - Note: A higher concurrency limit can speed up healing but may lead to resource contention. +pub const ENV_HEAL_MAX_CONCURRENT_HEALS: &str = "RUSTFS_HEAL_MAX_CONCURRENT_HEALS"; + +/// Default value for enabling authentication for heal operations if not specified in the environment variable. +/// - Value: true (authentication enabled). +/// - Rationale: Enabling authentication by default enhances security for heal operations. +/// - Adjustments: Users may disable this feature via the `RUSTFS_HEAL_AUTO_HEAL_ENABLE` environment variable based on their security requirements. +pub const DEFAULT_HEAL_AUTO_HEAL_ENABLE: bool = true; + +/// Default heal queue size if not specified in the environment variable. +/// +/// - Value: 10,000 requests. +/// - Rationale: This default size balances the need to handle typical heal workloads without excessive memory consumption. +/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_QUEUE_SIZE` environment variable based on their specific use cases and system capabilities. +pub const DEFAULT_HEAL_QUEUE_SIZE: usize = 10_000; + +/// Default heal interval in seconds if not specified in the environment variable. +/// - Value: 10 seconds. +/// - Rationale: This default interval provides a reasonable balance between healing responsiveness and system load for most deployments. +/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_INTERVAL_SECS` environment variable based on their specific healing requirements and system performance. +pub const DEFAULT_HEAL_INTERVAL_SECS: u64 = 10; + +/// Default heal task timeout in seconds if not specified in the environment variable. +/// - Value: 300 seconds (5 minutes). +/// - Rationale: This default timeout allows sufficient time for most heal operations to complete while preventing excessively long-running tasks. +/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_TASK_TIMEOUT_SECS` environment variable based on their specific heal operation characteristics and system performance. +pub const DEFAULT_HEAL_TASK_TIMEOUT_SECS: u64 = 300; // 5 minutes + +/// Default maximum number of concurrent heal operations if not specified in the environment variable. +/// - Value: 4 concurrent heal operations. +/// - Rationale: This default concurrency limit helps balance healing speed with resource usage, preventing system overload. +/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_MAX_CONCURRENT_HEALS` environment variable based on their system capacity and expected heal workload. +pub const DEFAULT_HEAL_MAX_CONCURRENT_HEALS: usize = 4; diff --git a/crates/config/src/constants/mod.rs b/crates/config/src/constants/mod.rs index 1badf48b..3c68f472 100644 --- a/crates/config/src/constants/mod.rs +++ b/crates/config/src/constants/mod.rs @@ -15,6 +15,7 @@ pub(crate) mod app; pub(crate) mod console; pub(crate) mod env; +pub(crate) mod heal; pub(crate) mod object; pub(crate) mod profiler; pub(crate) mod runtime; diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 46f4e332..0202d6dd 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -21,6 +21,8 @@ pub use constants::console::*; #[cfg(feature = "constants")] pub use constants::env::*; #[cfg(feature = "constants")] +pub use constants::heal::*; +#[cfg(feature = "constants")] pub use constants::object::*; #[cfg(feature = "constants")] pub use constants::profiler::*; diff --git a/crates/ecstore/src/set_disk.rs b/crates/ecstore/src/set_disk.rs index 696c8c71..054934e6 100644 --- a/crates/ecstore/src/set_disk.rs +++ b/crates/ecstore/src/set_disk.rs @@ -6145,54 +6145,54 @@ impl StorageAPI for SetDisks { version_id: &str, opts: &HealOpts, ) -> Result<(HealResultItem, Option)> { - let mut effective_object = object.to_string(); - - // Optimization: Only attempt correction if the name looks suspicious (quotes or URL encoded) - // and the original object does NOT exist. - let has_quotes = (effective_object.starts_with('\'') && effective_object.ends_with('\'')) - || (effective_object.starts_with('"') && effective_object.ends_with('"')); - let has_percent = effective_object.contains('%'); - - if has_quotes || has_percent { - let disks = self.disks.read().await; - // 1. Check if the original object exists (lightweight check) - let (_, errs) = Self::read_all_fileinfo(&disks, "", bucket, &effective_object, version_id, false, false).await?; - - if DiskError::is_all_not_found(&errs) { - // Original not found. Try candidates. - let mut candidates = Vec::new(); - - // Candidate 1: URL Decoded (Priority for web access issues) - if has_percent { - if let Ok(decoded) = urlencoding::decode(&effective_object) { - if decoded != effective_object { - candidates.push(decoded.to_string()); - } - } - } - - // Candidate 2: Quote Stripped (For shell copy-paste issues) - if has_quotes && effective_object.len() >= 2 { - candidates.push(effective_object[1..effective_object.len() - 1].to_string()); - } - - // Check candidates - for candidate in candidates { - let (_, errs_cand) = - Self::read_all_fileinfo(&disks, "", bucket, &candidate, version_id, false, false).await?; - - if !DiskError::is_all_not_found(&errs_cand) { - info!( - "Heal request for object '{}' failed (not found). Auto-corrected to '{}'.", - effective_object, candidate - ); - effective_object = candidate; - break; // Found a match, stop searching - } - } - } - } - let object = effective_object.as_str(); + // let mut effective_object = object.to_string(); + // + // // Optimization: Only attempt correction if the name looks suspicious (quotes or URL encoded) + // // and the original object does NOT exist. + // let has_quotes = (effective_object.starts_with('\'') && effective_object.ends_with('\'')) + // || (effective_object.starts_with('"') && effective_object.ends_with('"')); + // let has_percent = effective_object.contains('%'); + // + // if has_quotes || has_percent { + // let disks = self.disks.read().await; + // // 1. Check if the original object exists (lightweight check) + // let (_, errs) = Self::read_all_fileinfo(&disks, "", bucket, &effective_object, version_id, false, false).await?; + // + // if DiskError::is_all_not_found(&errs) { + // // Original not found. Try candidates. + // let mut candidates = Vec::new(); + // + // // Candidate 1: URL Decoded (Priority for web access issues) + // if has_percent { + // if let Ok(decoded) = urlencoding::decode(&effective_object) { + // if decoded != effective_object { + // candidates.push(decoded.to_string()); + // } + // } + // } + // + // // Candidate 2: Quote Stripped (For shell copy-paste issues) + // if has_quotes && effective_object.len() >= 2 { + // candidates.push(effective_object[1..effective_object.len() - 1].to_string()); + // } + // + // // Check candidates + // for candidate in candidates { + // let (_, errs_cand) = + // Self::read_all_fileinfo(&disks, "", bucket, &candidate, version_id, false, false).await?; + // + // if !DiskError::is_all_not_found(&errs_cand) { + // info!( + // "Heal request for object '{}' failed (not found). Auto-corrected to '{}'.", + // effective_object, candidate + // ); + // effective_object = candidate; + // break; // Found a match, stop searching + // } + // } + // } + // } + // let object = effective_object.as_str(); let _write_lock_guard = if !opts.no_lock { let key = rustfs_lock::fast_lock::types::ObjectKey::new(bucket, object); diff --git a/scripts/run.sh b/scripts/run.sh index 2af7b0c6..2b75d326 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -122,6 +122,9 @@ export RUSTFS_OBJECT_CACHE_ENABLE=true # Profiling configuration export RUSTFS_ENABLE_PROFILING=false +# Heal configuration queue size +export RUSTFS_HEAL_QUEUE_SIZE=10000 + if [ -n "$1" ]; then export RUSTFS_VOLUMES="$1" fi