upgrade crate version and improve heal config (#963)

This commit is contained in:
houseme
2025-12-03 18:49:11 +08:00
committed by GitHub
parent a8b7b28fd0
commit 5b0a3a0764
9 changed files with 192 additions and 72 deletions

40
Cargo.lock generated
View File

@@ -4755,9 +4755,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
[[package]]
name = "libc"
version = "0.2.177"
version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "libloading"
@@ -5743,6 +5743,12 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pastey"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57d6c094ee800037dff99e02cab0eaf3142826586742a270ab3d7a62656bd27a"
[[package]]
name = "path-absolutize"
version = "3.1.1"
@@ -6733,15 +6739,15 @@ dependencies = [
[[package]]
name = "rmcp"
version = "0.9.1"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaa07b85b779d1e1df52dd79f6c6bffbe005b191f07290136cc42a142da3409a"
checksum = "38b18323edc657390a6ed4d7a9110b0dec2dc3ed128eb2a123edfbafabdbddc5"
dependencies = [
"async-trait",
"base64 0.22.1",
"chrono",
"futures",
"paste",
"pastey",
"pin-project-lite",
"rmcp-macros",
"schemars 1.1.0",
@@ -6755,9 +6761,9 @@ dependencies = [
[[package]]
name = "rmcp-macros"
version = "0.9.1"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f6fa09933cac0d0204c8a5d647f558425538ed6a0134b1ebb1ae4dc00c96db3"
checksum = "c75d0a62676bf8c8003c4e3c348e2ceb6a7b3e48323681aaf177fdccdac2ce50"
dependencies = [
"darling 0.21.3",
"proc-macro2",
@@ -7032,9 +7038,11 @@ dependencies = [
"rand 0.10.0-rc.5",
"reqwest",
"rustfs-common",
"rustfs-config",
"rustfs-ecstore",
"rustfs-filemeta",
"rustfs-madmin",
"rustfs-utils",
"s3s",
"serde",
"serde_json",
@@ -9563,23 +9571,23 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.18.1"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
dependencies = [
"getrandom 0.3.4",
"js-sys",
"rand 0.9.2",
"serde",
"serde_core",
"uuid-macro-internal",
"wasm-bindgen",
]
[[package]]
name = "uuid-macro-internal"
version = "1.18.1"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9384a660318abfbd7f8932c34d67e4d1ec511095f95972ddc01e19d7ba8413f"
checksum = "39d11901c36b3650df7acb0f9ebe624f35b5ac4e1922ecd3c57f444648429594"
dependencies = [
"proc-macro2",
"quote",
@@ -10265,18 +10273,18 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.30"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c"
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.30"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5"
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -129,7 +129,7 @@ flatbuffers = "25.9.23"
form_urlencoded = "1.2.2"
prost = "0.14.1"
quick-xml = "0.38.4"
rmcp = { version = "0.9.1" }
rmcp = { version = "0.10.0" }
rmp = { version = "0.8.14" }
rmp-serde = { version = "1.3.0" }
serde = { version = "1.0.228", features = ["derive"] }
@@ -193,7 +193,7 @@ hex-simd = "0.8.0"
highway = { version = "1.3.0" }
ipnetwork = { version = "0.21.1", features = ["serde"] }
lazy_static = "1.5.0"
libc = "0.2.177"
libc = "0.2.178"
libsystemd = "0.7.2"
local-ip-address = "0.6.5"
lz4 = "1.28.1"
@@ -244,7 +244,7 @@ tracing-subscriber = { version = "0.3.22", features = ["env-filter", "time"] }
transform-stream = "0.3.1"
url = "2.5.7"
urlencoding = "2.1.3"
uuid = { version = "1.18.1", features = ["v4", "fast-rng", "macro-diagnostics"] }
uuid = { version = "1.19.0", features = ["v4", "fast-rng", "macro-diagnostics"] }
vaultrs = { version = "0.7.4" }
walkdir = "2.5.0"
wildmatch = { version = "2.6.1", features = ["serde"] }

View File

@@ -13,10 +13,12 @@ keywords = ["RustFS", "AHM", "health-management", "scanner", "Minio"]
categories = ["web-programming", "development-tools", "filesystem"]
[dependencies]
rustfs-config = { workspace = true }
rustfs-ecstore = { workspace = true }
rustfs-common = { workspace = true }
rustfs-filemeta = { workspace = true }
rustfs-madmin = { workspace = true }
rustfs-utils = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tokio-util = { workspace = true }
tracing = { workspace = true }

View File

@@ -195,12 +195,28 @@ pub struct HealConfig {
impl Default for HealConfig {
fn default() -> Self {
let queue_size: usize =
rustfs_utils::get_env_usize(rustfs_config::ENV_HEAL_QUEUE_SIZE, rustfs_config::DEFAULT_HEAL_QUEUE_SIZE);
let heal_interval = Duration::from_secs(rustfs_utils::get_env_u64(
rustfs_config::ENV_HEAL_INTERVAL_SECS,
rustfs_config::DEFAULT_HEAL_INTERVAL_SECS,
));
let enable_auto_heal =
rustfs_utils::get_env_bool(rustfs_config::ENV_HEAL_AUTO_HEAL_ENABLE, rustfs_config::DEFAULT_HEAL_AUTO_HEAL_ENABLE);
let task_timeout = Duration::from_secs(rustfs_utils::get_env_u64(
rustfs_config::ENV_HEAL_TASK_TIMEOUT_SECS,
rustfs_config::DEFAULT_HEAL_TASK_TIMEOUT_SECS,
));
let max_concurrent_heals = rustfs_utils::get_env_usize(
rustfs_config::ENV_HEAL_MAX_CONCURRENT_HEALS,
rustfs_config::DEFAULT_HEAL_MAX_CONCURRENT_HEALS,
);
Self {
enable_auto_heal: true,
heal_interval: Duration::from_secs(10), // 10 seconds
max_concurrent_heals: 4,
task_timeout: Duration::from_secs(300), // 5 minutes
queue_size: 1000,
enable_auto_heal,
heal_interval, // 10 seconds
max_concurrent_heals, // max 4,
task_timeout, // 5 minutes
queue_size,
}
}
}

View File

@@ -0,0 +1,88 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// Environment variable name that enables or disables auto-heal functionality.
/// - Purpose: Control whether the system automatically performs heal operations.
/// - Valid values: "true" or "false" (case insensitive).
/// - Semantics: When set to "true", auto-heal is enabled and the system will automatically attempt to heal detected issues; when set to "false", auto-heal is disabled and healing must be triggered manually.
/// - Example: `export RUSTFS_HEAL_AUTO_HEAL_ENABLE=true`
/// - Note: Enabling auto-heal can improve system resilience by automatically addressing issues, but may increase resource usage; evaluate based on your operational requirements.
pub const ENV_HEAL_AUTO_HEAL_ENABLE: &str = "RUSTFS_HEAL_AUTO_HEAL_ENABLE";
/// Environment variable name that specifies the heal queue size.
///
/// - Purpose: Set the maximum number of heal requests that can be queued.
/// - Unit: number of requests (usize).
/// - Valid values: any positive integer.
/// - Semantics: When the heal queue reaches this size, new heal requests may be rejected or blocked until space is available; tune according to expected heal workload and system capacity.
/// - Example: `export RUSTFS_HEAL_QUEUE_SIZE=10000`
/// - Note: A larger queue size can accommodate bursts of heal requests but may increase memory usage.
pub const ENV_HEAL_QUEUE_SIZE: &str = "RUSTFS_HEAL_QUEUE_SIZE";
/// Environment variable name that specifies the heal interval in seconds.
/// - Purpose: Define the time interval between successive heal operations.
/// - Unit: seconds (u64).
/// - Valid values: any positive integer.
/// - Semantics: This interval controls how frequently the heal manager checks for and processes heal requests; shorter intervals lead to more responsive healing but may increase system load.
/// - Example: `export RUSTFS_HEAL_INTERVAL_SECS=10`
/// - Note: Choose an interval that balances healing responsiveness with overall system performance.
pub const ENV_HEAL_INTERVAL_SECS: &str = "RUSTFS_HEAL_INTERVAL_SECS";
/// Environment variable name that specifies the heal task timeout in seconds.
/// - Purpose: Set the maximum duration allowed for a heal task to complete.
/// - Unit: seconds (u64).
/// - Valid values: any positive integer.
/// - Semantics: If a heal task exceeds this timeout, it may be aborted or retried; tune according to the expected duration of heal operations and system performance characteristics.
/// - Example: `export RUSTFS_HEAL_TASK_TIMEOUT_SECS=300`
/// - Note: Setting an appropriate timeout helps prevent long-running heal tasks from impacting system stability.
pub const ENV_HEAL_TASK_TIMEOUT_SECS: &str = "RUSTFS_HEAL_TASK_TIMEOUT_SECS";
/// Environment variable name that specifies the maximum number of concurrent heal operations.
/// - Purpose: Limit the number of heal operations that can run simultaneously.
/// - Unit: number of operations (usize).
/// - Valid values: any positive integer.
/// - Semantics: This limit helps control resource usage during healing; tune according to system capacity and expected heal workload.
/// - Example: `export RUSTFS_HEAL_MAX_CONCURRENT_HEALS=4`
/// - Note: A higher concurrency limit can speed up healing but may lead to resource contention.
pub const ENV_HEAL_MAX_CONCURRENT_HEALS: &str = "RUSTFS_HEAL_MAX_CONCURRENT_HEALS";
/// Default value for enabling authentication for heal operations if not specified in the environment variable.
/// - Value: true (authentication enabled).
/// - Rationale: Enabling authentication by default enhances security for heal operations.
/// - Adjustments: Users may disable this feature via the `RUSTFS_HEAL_AUTO_HEAL_ENABLE` environment variable based on their security requirements.
pub const DEFAULT_HEAL_AUTO_HEAL_ENABLE: bool = true;
/// Default heal queue size if not specified in the environment variable.
///
/// - Value: 10,000 requests.
/// - Rationale: This default size balances the need to handle typical heal workloads without excessive memory consumption.
/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_QUEUE_SIZE` environment variable based on their specific use cases and system capabilities.
pub const DEFAULT_HEAL_QUEUE_SIZE: usize = 10_000;
/// Default heal interval in seconds if not specified in the environment variable.
/// - Value: 10 seconds.
/// - Rationale: This default interval provides a reasonable balance between healing responsiveness and system load for most deployments.
/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_INTERVAL_SECS` environment variable based on their specific healing requirements and system performance.
pub const DEFAULT_HEAL_INTERVAL_SECS: u64 = 10;
/// Default heal task timeout in seconds if not specified in the environment variable.
/// - Value: 300 seconds (5 minutes).
/// - Rationale: This default timeout allows sufficient time for most heal operations to complete while preventing excessively long-running tasks.
/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_TASK_TIMEOUT_SECS` environment variable based on their specific heal operation characteristics and system performance.
pub const DEFAULT_HEAL_TASK_TIMEOUT_SECS: u64 = 300; // 5 minutes
/// Default maximum number of concurrent heal operations if not specified in the environment variable.
/// - Value: 4 concurrent heal operations.
/// - Rationale: This default concurrency limit helps balance healing speed with resource usage, preventing system overload.
/// - Adjustments: Users may modify this value via the `RUSTFS_HEAL_MAX_CONCURRENT_HEALS` environment variable based on their system capacity and expected heal workload.
pub const DEFAULT_HEAL_MAX_CONCURRENT_HEALS: usize = 4;

View File

@@ -15,6 +15,7 @@
pub(crate) mod app;
pub(crate) mod console;
pub(crate) mod env;
pub(crate) mod heal;
pub(crate) mod object;
pub(crate) mod profiler;
pub(crate) mod runtime;

View File

@@ -21,6 +21,8 @@ pub use constants::console::*;
#[cfg(feature = "constants")]
pub use constants::env::*;
#[cfg(feature = "constants")]
pub use constants::heal::*;
#[cfg(feature = "constants")]
pub use constants::object::*;
#[cfg(feature = "constants")]
pub use constants::profiler::*;

View File

@@ -6145,54 +6145,54 @@ impl StorageAPI for SetDisks {
version_id: &str,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)> {
let mut effective_object = object.to_string();
// Optimization: Only attempt correction if the name looks suspicious (quotes or URL encoded)
// and the original object does NOT exist.
let has_quotes = (effective_object.starts_with('\'') && effective_object.ends_with('\''))
|| (effective_object.starts_with('"') && effective_object.ends_with('"'));
let has_percent = effective_object.contains('%');
if has_quotes || has_percent {
let disks = self.disks.read().await;
// 1. Check if the original object exists (lightweight check)
let (_, errs) = Self::read_all_fileinfo(&disks, "", bucket, &effective_object, version_id, false, false).await?;
if DiskError::is_all_not_found(&errs) {
// Original not found. Try candidates.
let mut candidates = Vec::new();
// Candidate 1: URL Decoded (Priority for web access issues)
if has_percent {
if let Ok(decoded) = urlencoding::decode(&effective_object) {
if decoded != effective_object {
candidates.push(decoded.to_string());
}
}
}
// Candidate 2: Quote Stripped (For shell copy-paste issues)
if has_quotes && effective_object.len() >= 2 {
candidates.push(effective_object[1..effective_object.len() - 1].to_string());
}
// Check candidates
for candidate in candidates {
let (_, errs_cand) =
Self::read_all_fileinfo(&disks, "", bucket, &candidate, version_id, false, false).await?;
if !DiskError::is_all_not_found(&errs_cand) {
info!(
"Heal request for object '{}' failed (not found). Auto-corrected to '{}'.",
effective_object, candidate
);
effective_object = candidate;
break; // Found a match, stop searching
}
}
}
}
let object = effective_object.as_str();
// let mut effective_object = object.to_string();
//
// // Optimization: Only attempt correction if the name looks suspicious (quotes or URL encoded)
// // and the original object does NOT exist.
// let has_quotes = (effective_object.starts_with('\'') && effective_object.ends_with('\''))
// || (effective_object.starts_with('"') && effective_object.ends_with('"'));
// let has_percent = effective_object.contains('%');
//
// if has_quotes || has_percent {
// let disks = self.disks.read().await;
// // 1. Check if the original object exists (lightweight check)
// let (_, errs) = Self::read_all_fileinfo(&disks, "", bucket, &effective_object, version_id, false, false).await?;
//
// if DiskError::is_all_not_found(&errs) {
// // Original not found. Try candidates.
// let mut candidates = Vec::new();
//
// // Candidate 1: URL Decoded (Priority for web access issues)
// if has_percent {
// if let Ok(decoded) = urlencoding::decode(&effective_object) {
// if decoded != effective_object {
// candidates.push(decoded.to_string());
// }
// }
// }
//
// // Candidate 2: Quote Stripped (For shell copy-paste issues)
// if has_quotes && effective_object.len() >= 2 {
// candidates.push(effective_object[1..effective_object.len() - 1].to_string());
// }
//
// // Check candidates
// for candidate in candidates {
// let (_, errs_cand) =
// Self::read_all_fileinfo(&disks, "", bucket, &candidate, version_id, false, false).await?;
//
// if !DiskError::is_all_not_found(&errs_cand) {
// info!(
// "Heal request for object '{}' failed (not found). Auto-corrected to '{}'.",
// effective_object, candidate
// );
// effective_object = candidate;
// break; // Found a match, stop searching
// }
// }
// }
// }
// let object = effective_object.as_str();
let _write_lock_guard = if !opts.no_lock {
let key = rustfs_lock::fast_lock::types::ObjectKey::new(bucket, object);

View File

@@ -122,6 +122,9 @@ export RUSTFS_OBJECT_CACHE_ENABLE=true
# Profiling configuration
export RUSTFS_ENABLE_PROFILING=false
# Heal configuration queue size
export RUSTFS_HEAL_QUEUE_SIZE=10000
if [ -n "$1" ]; then
export RUSTFS_VOLUMES="$1"
fi