diff --git a/Cargo.lock b/Cargo.lock index e47c6646..155c7eef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -216,9 +216,12 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +dependencies = [ + "rustversion", +] [[package]] name = "argon2" @@ -515,9 +518,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "3.4.1" +version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ "event-listener", "event-listener-strategy", @@ -691,9 +694,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.118.0" +version = "1.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3e6b7079f85d9ea9a70643c9f89f50db70f5ada868fa9cfe08c1ffdf51abc13" +checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c" dependencies = [ "aws-credential-types", "aws-runtime", @@ -942,9 +945,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.6" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -4843,9 +4846,9 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c" +checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" dependencies = [ "zlib-rs", ] @@ -6725,9 +6728,9 @@ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" [[package]] name = "reqwest" -version = "0.12.26" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64", "bytes", @@ -6851,22 +6854,19 @@ dependencies = [ [[package]] name = "rmp" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" dependencies = [ - "byteorder", "num-traits", - "paste", ] [[package]] name = "rmp-serde" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155" dependencies = [ - "byteorder", "rmp", "serde", ] @@ -7040,6 +7040,7 @@ dependencies = [ "hex-simd", "http 1.4.0", "http-body 1.0.1", + "http-body-util", "hyper 1.8.1", "hyper-util", "jemalloc_pprof", @@ -7787,9 +7788,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags 2.10.0", "errno", @@ -7904,8 +7905,8 @@ checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" [[package]] name = "s3s" -version = "0.12.0-rc.6" -source = "git+https://github.com/s3s-project/s3s.git?branch=main#df2434d7ad2f0b774e68f25cae90c053dcb84f24" +version = "0.13.0-alpha" +source = "git+https://github.com/s3s-project/s3s.git?branch=main#f6198bbf49abe60066fe47cbbefcb7078863b3e9" dependencies = [ "arrayvec", "async-trait", @@ -8157,15 +8158,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -8885,14 +8886,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix 1.1.2", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -10282,7 +10283,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix 1.1.2", + "rustix 1.1.3", ] [[package]] @@ -10459,9 +10460,15 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235" +checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" + +[[package]] +name = "zmij" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba" [[package]] name = "zopfli" diff --git a/Cargo.toml b/Cargo.toml index c7796a35..9ea702d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,7 +111,8 @@ hyper-rustls = { version = "0.27.7", default-features = false, features = ["nati hyper-util = { version = "0.1.19", features = ["tokio", "server-auto", "server-graceful"] } http = "1.4.0" http-body = "1.0.1" -reqwest = { version = "0.12.26", default-features = false, features = ["rustls-tls-webpki-roots", "charset", "http2", "system-proxy", "stream", "json", "blocking"] } +http-body-util = "0.1.3" +reqwest = { version = "0.12.28", default-features = false, features = ["rustls-tls-webpki-roots", "charset", "http2", "system-proxy", "stream", "json", "blocking"] } socket2 = "0.6.1" tokio = { version = "1.48.0", features = ["fs", "rt-multi-thread"] } tokio-rustls = { version = "0.26.4", default-features = false, features = ["logging", "tls12", "ring"] } @@ -133,10 +134,10 @@ form_urlencoded = "1.2.2" prost = "0.14.1" quick-xml = "0.38.4" rmcp = { version = "0.12.0" } -rmp = { version = "0.8.14" } -rmp-serde = { version = "1.3.0" } +rmp = { version = "0.8.15" } +rmp-serde = { version = "1.3.1" } serde = { version = "1.0.228", features = ["derive"] } -serde_json = { version = "1.0.145", features = ["raw_value"] } +serde_json = { version = "1.0.147", features = ["raw_value"] } serde_urlencoded = "0.7.1" schemars = "1.1.0" @@ -165,13 +166,13 @@ time = { version = "0.3.44", features = ["std", "parsing", "formatting", "macros # Utilities and Tools anyhow = "1.0.100" -arc-swap = "1.7.1" +arc-swap = "1.8.0" astral-tokio-tar = "0.5.6" atoi = "2.0.0" atomic_enum = "0.3.0" aws-config = { version = "1.8.12" } aws-credential-types = { version = "1.2.11" } -aws-sdk-s3 = { version = "1.117.0", default-features = false, features = ["sigv4a", "rustls", "rt-tokio"] } +aws-sdk-s3 = { version = "1.119.0", default-features = false, features = ["sigv4a", "rustls", "rt-tokio"] } aws-smithy-types = { version = "1.3.5" } base64 = "0.22.1" base64-simd = "0.8.0" @@ -224,7 +225,7 @@ regex = { version = "1.12.2" } rumqttc = { version = "0.25.1" } rust-embed = { version = "8.9.0" } rustc-hash = { version = "2.1.1" } -s3s = { version = "0.12.0-rc.6", features = ["minio"], git = "https://github.com/s3s-project/s3s.git", branch = "main" } +s3s = { version = "0.13.0-alpha", features = ["minio"], git = "https://github.com/s3s-project/s3s.git", branch = "main" } serial_test = "3.2.0" shadow-rs = { version = "1.5.0", default-features = false } siphasher = "1.0.1" @@ -237,7 +238,7 @@ strum = { version = "0.27.2", features = ["derive"] } sysctl = "0.7.1" sysinfo = "0.37.2" temp-env = "0.3.6" -tempfile = "3.23.0" +tempfile = "3.24.0" test-case = "3.3.1" thiserror = "2.0.17" tracing = { version = "0.1.44" } diff --git a/README.md b/README.md index a5e0dca4..3bf4d0e4 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ nix run ### Accessing RustFS -5. **Access the Console**: Open your web browser and navigate to `http://localhost:9000` to access the RustFS console. +5. **Access the Console**: Open your web browser and navigate to `http://localhost:9001` to access the RustFS console. * Default credentials: `rustfsadmin` / `rustfsadmin` 6. **Create a Bucket**: Use the console to create a new bucket for your objects. 7. **Upload Objects**: You can upload files directly through the console or use S3-compatible APIs/clients to interact with your RustFS instance. diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 09dc164a..c239d4b3 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -19,6 +19,10 @@ pub mod globals; pub mod heal_channel; pub mod last_minute; pub mod metrics; +mod readiness; + +pub use globals::*; +pub use readiness::{GlobalReadiness, SystemStage}; // is ',' pub static DEFAULT_DELIMITER: u8 = 44; diff --git a/crates/common/src/readiness.rs b/crates/common/src/readiness.rs new file mode 100644 index 00000000..1a0b50d3 --- /dev/null +++ b/crates/common/src/readiness.rs @@ -0,0 +1,136 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::atomic::{AtomicU8, Ordering}; + +/// Represents the various stages of system startup +#[repr(u8)] +pub enum SystemStage { + Booting = 0, + StorageReady = 1, // Disks online, Quorum met + IamReady = 2, // Users and Policies loaded into cache + FullReady = 3, // System ready to serve all traffic +} + +/// Global readiness tracker for the service +/// This struct uses atomic operations to track the readiness status of various components +/// of the service in a thread-safe manner. +pub struct GlobalReadiness { + status: AtomicU8, +} + +impl Default for GlobalReadiness { + fn default() -> Self { + Self::new() + } +} + +impl GlobalReadiness { + /// Create a new GlobalReadiness instance with initial status as Starting + /// # Returns + /// A new instance of GlobalReadiness + pub fn new() -> Self { + Self { + status: AtomicU8::new(SystemStage::Booting as u8), + } + } + + /// Update the system to a new stage + /// + /// # Arguments + /// * `step` - The SystemStage step to mark as ready + pub fn mark_stage(&self, step: SystemStage) { + self.status.fetch_max(step as u8, Ordering::SeqCst); + } + + /// Check if the service is fully ready + /// # Returns + /// `true` if the service is fully ready, `false` otherwise + pub fn is_ready(&self) -> bool { + self.status.load(Ordering::SeqCst) == SystemStage::FullReady as u8 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use std::thread; + + #[test] + fn test_initial_state() { + let readiness = GlobalReadiness::new(); + assert!(!readiness.is_ready()); + assert_eq!(readiness.status.load(Ordering::SeqCst), SystemStage::Booting as u8); + } + + #[test] + fn test_mark_stage_progression() { + let readiness = GlobalReadiness::new(); + readiness.mark_stage(SystemStage::StorageReady); + assert!(!readiness.is_ready()); + assert_eq!(readiness.status.load(Ordering::SeqCst), SystemStage::StorageReady as u8); + + readiness.mark_stage(SystemStage::IamReady); + assert!(!readiness.is_ready()); + assert_eq!(readiness.status.load(Ordering::SeqCst), SystemStage::IamReady as u8); + + readiness.mark_stage(SystemStage::FullReady); + assert!(readiness.is_ready()); + } + + #[test] + fn test_no_regression() { + let readiness = GlobalReadiness::new(); + readiness.mark_stage(SystemStage::FullReady); + readiness.mark_stage(SystemStage::IamReady); // Should not regress + assert!(readiness.is_ready()); + } + + #[test] + fn test_concurrent_marking() { + let readiness = Arc::new(GlobalReadiness::new()); + let mut handles = vec![]; + + for _ in 0..10 { + let r = Arc::clone(&readiness); + handles.push(thread::spawn(move || { + r.mark_stage(SystemStage::StorageReady); + r.mark_stage(SystemStage::IamReady); + r.mark_stage(SystemStage::FullReady); + })); + } + + for h in handles { + h.join().unwrap(); + } + + assert!(readiness.is_ready()); + } + + #[test] + fn test_is_ready_only_at_full_ready() { + let readiness = GlobalReadiness::new(); + assert!(!readiness.is_ready()); + + readiness.mark_stage(SystemStage::StorageReady); + assert!(!readiness.is_ready()); + + readiness.mark_stage(SystemStage::IamReady); + assert!(!readiness.is_ready()); + + readiness.mark_stage(SystemStage::FullReady); + assert!(readiness.is_ready()); + } +} diff --git a/crates/config/src/constants/runtime.rs b/crates/config/src/constants/runtime.rs index b9fa5862..04afaf84 100644 --- a/crates/config/src/constants/runtime.rs +++ b/crates/config/src/constants/runtime.rs @@ -39,3 +39,10 @@ pub const DEFAULT_MAX_IO_EVENTS_PER_TICK: usize = 1024; /// Event polling default (Tokio default 61) pub const DEFAULT_EVENT_INTERVAL: u32 = 61; pub const DEFAULT_RNG_SEED: Option = None; // None means random + +/// Threshold for small object seek support in megabytes. +/// +/// When an object is smaller than this size, rustfs will provide seek support. +/// +/// Default is set to 10MB. +pub const DEFAULT_OBJECT_SEEK_SUPPORT_THRESHOLD: usize = 10 * 1024 * 1024; diff --git a/crates/ecstore/src/admin_server_info.rs b/crates/ecstore/src/admin_server_info.rs index 9117f8c0..324ec388 100644 --- a/crates/ecstore/src/admin_server_info.rs +++ b/crates/ecstore/src/admin_server_info.rs @@ -23,7 +23,7 @@ use crate::{ }; use crate::data_usage::load_data_usage_cache; -use rustfs_common::{globals::GLOBAL_LOCAL_NODE_NAME, heal_channel::DriveState}; +use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, heal_channel::DriveState}; use rustfs_madmin::{ BackendDisks, Disk, ErasureSetInfo, ITEM_INITIALIZING, ITEM_OFFLINE, ITEM_ONLINE, InfoMessage, ServerProperties, }; diff --git a/crates/ecstore/src/checksum.rs b/crates/ecstore/src/checksum.rs deleted file mode 100644 index dd8be1e6..00000000 --- a/crates/ecstore/src/checksum.rs +++ /dev/null @@ -1,350 +0,0 @@ -#![allow(clippy::map_entry)] -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#![allow(unused_imports)] -#![allow(unused_variables)] -#![allow(unused_mut)] -#![allow(unused_assignments)] -#![allow(unused_must_use)] -#![allow(clippy::all)] - -use lazy_static::lazy_static; -use rustfs_checksums::ChecksumAlgorithm; -use std::collections::HashMap; - -use crate::client::{api_put_object::PutObjectOptions, api_s3_datatypes::ObjectPart}; -use crate::{disk::DiskAPI, store_api::GetObjectReader}; -use rustfs_utils::crypto::{base64_decode, base64_encode}; -use s3s::header::{ - X_AMZ_CHECKSUM_ALGORITHM, X_AMZ_CHECKSUM_CRC32, X_AMZ_CHECKSUM_CRC32C, X_AMZ_CHECKSUM_SHA1, X_AMZ_CHECKSUM_SHA256, -}; - -use enumset::{EnumSet, EnumSetType, enum_set}; - -#[derive(Debug, EnumSetType, Default)] -#[enumset(repr = "u8")] -pub enum ChecksumMode { - #[default] - ChecksumNone, - ChecksumSHA256, - ChecksumSHA1, - ChecksumCRC32, - ChecksumCRC32C, - ChecksumCRC64NVME, - ChecksumFullObject, -} - -lazy_static! { - static ref C_ChecksumMask: EnumSet = { - let mut s = EnumSet::all(); - s.remove(ChecksumMode::ChecksumFullObject); - s - }; - static ref C_ChecksumFullObjectCRC32: EnumSet = - enum_set!(ChecksumMode::ChecksumCRC32 | ChecksumMode::ChecksumFullObject); - static ref C_ChecksumFullObjectCRC32C: EnumSet = - enum_set!(ChecksumMode::ChecksumCRC32C | ChecksumMode::ChecksumFullObject); -} -const AMZ_CHECKSUM_CRC64NVME: &str = "x-amz-checksum-crc64nvme"; - -impl ChecksumMode { - //pub const CRC64_NVME_POLYNOMIAL: i64 = 0xad93d23594c93659; - - pub fn base(&self) -> ChecksumMode { - let s = EnumSet::from(*self).intersection(*C_ChecksumMask); - match s.as_u8() { - 1_u8 => ChecksumMode::ChecksumNone, - 2_u8 => ChecksumMode::ChecksumSHA256, - 4_u8 => ChecksumMode::ChecksumSHA1, - 8_u8 => ChecksumMode::ChecksumCRC32, - 16_u8 => ChecksumMode::ChecksumCRC32C, - 32_u8 => ChecksumMode::ChecksumCRC64NVME, - _ => panic!("enum err."), - } - } - - pub fn is(&self, t: ChecksumMode) -> bool { - *self & t == t - } - - pub fn key(&self) -> String { - //match c & checksumMask { - match self { - ChecksumMode::ChecksumCRC32 => { - return X_AMZ_CHECKSUM_CRC32.to_string(); - } - ChecksumMode::ChecksumCRC32C => { - return X_AMZ_CHECKSUM_CRC32C.to_string(); - } - ChecksumMode::ChecksumSHA1 => { - return X_AMZ_CHECKSUM_SHA1.to_string(); - } - ChecksumMode::ChecksumSHA256 => { - return X_AMZ_CHECKSUM_SHA256.to_string(); - } - ChecksumMode::ChecksumCRC64NVME => { - return AMZ_CHECKSUM_CRC64NVME.to_string(); - } - _ => { - return "".to_string(); - } - } - } - - pub fn can_composite(&self) -> bool { - let s = EnumSet::from(*self).intersection(*C_ChecksumMask); - match s.as_u8() { - 2_u8 => true, - 4_u8 => true, - 8_u8 => true, - 16_u8 => true, - _ => false, - } - } - - pub fn can_merge_crc(&self) -> bool { - let s = EnumSet::from(*self).intersection(*C_ChecksumMask); - match s.as_u8() { - 8_u8 => true, - 16_u8 => true, - 32_u8 => true, - _ => false, - } - } - - pub fn full_object_requested(&self) -> bool { - let s = EnumSet::from(*self).intersection(*C_ChecksumMask); - match s.as_u8() { - //C_ChecksumFullObjectCRC32 as u8 => true, - //C_ChecksumFullObjectCRC32C as u8 => true, - 32_u8 => true, - _ => false, - } - } - - pub fn key_capitalized(&self) -> String { - self.key() - } - - pub fn raw_byte_len(&self) -> usize { - let u = EnumSet::from(*self).intersection(*C_ChecksumMask).as_u8(); - if u == ChecksumMode::ChecksumCRC32 as u8 || u == ChecksumMode::ChecksumCRC32C as u8 { - 4 - } else if u == ChecksumMode::ChecksumSHA1 as u8 { - use sha1::Digest; - sha1::Sha1::output_size() as usize - } else if u == ChecksumMode::ChecksumSHA256 as u8 { - use sha2::Digest; - sha2::Sha256::output_size() as usize - } else if u == ChecksumMode::ChecksumCRC64NVME as u8 { - 8 - } else { - 0 - } - } - - pub fn hasher(&self) -> Result, std::io::Error> { - match /*C_ChecksumMask & **/self { - ChecksumMode::ChecksumCRC32 => { - return Ok(ChecksumAlgorithm::Crc32.into_impl()); - } - ChecksumMode::ChecksumCRC32C => { - return Ok(ChecksumAlgorithm::Crc32c.into_impl()); - } - ChecksumMode::ChecksumSHA1 => { - return Ok(ChecksumAlgorithm::Sha1.into_impl()); - } - ChecksumMode::ChecksumSHA256 => { - return Ok(ChecksumAlgorithm::Sha256.into_impl()); - } - ChecksumMode::ChecksumCRC64NVME => { - return Ok(ChecksumAlgorithm::Crc64Nvme.into_impl()); - } - _ => return Err(std::io::Error::other("unsupported checksum type")), - } - } - - pub fn is_set(&self) -> bool { - let s = EnumSet::from(*self).intersection(*C_ChecksumMask); - s.len() == 1 - } - - pub fn set_default(&mut self, t: ChecksumMode) { - if !self.is_set() { - *self = t; - } - } - - pub fn encode_to_string(&self, b: &[u8]) -> Result { - if !self.is_set() { - return Ok("".to_string()); - } - let mut h = self.hasher()?; - h.update(b); - let hash = h.finalize(); - Ok(base64_encode(hash.as_ref())) - } - - pub fn to_string(&self) -> String { - //match c & checksumMask { - match self { - ChecksumMode::ChecksumCRC32 => { - return "CRC32".to_string(); - } - ChecksumMode::ChecksumCRC32C => { - return "CRC32C".to_string(); - } - ChecksumMode::ChecksumSHA1 => { - return "SHA1".to_string(); - } - ChecksumMode::ChecksumSHA256 => { - return "SHA256".to_string(); - } - ChecksumMode::ChecksumNone => { - return "".to_string(); - } - ChecksumMode::ChecksumCRC64NVME => { - return "CRC64NVME".to_string(); - } - _ => { - return "".to_string(); - } - } - } - - // pub fn check_sum_reader(&self, r: GetObjectReader) -> Result { - // let mut h = self.hasher()?; - // Ok(Checksum::new(self.clone(), h.sum().as_bytes())) - // } - - // pub fn check_sum_bytes(&self, b: &[u8]) -> Result { - // let mut h = self.hasher()?; - // Ok(Checksum::new(self.clone(), h.sum().as_bytes())) - // } - - pub fn composite_checksum(&self, p: &mut [ObjectPart]) -> Result { - if !self.can_composite() { - return Err(std::io::Error::other("cannot do composite checksum")); - } - p.sort_by(|i, j| { - if i.part_num < j.part_num { - std::cmp::Ordering::Less - } else if i.part_num > j.part_num { - std::cmp::Ordering::Greater - } else { - std::cmp::Ordering::Equal - } - }); - let c = self.base(); - let crc_bytes = Vec::::with_capacity(p.len() * self.raw_byte_len() as usize); - let mut h = self.hasher()?; - h.update(crc_bytes.as_ref()); - let hash = h.finalize(); - Ok(Checksum { - checksum_type: self.clone(), - r: hash.as_ref().to_vec(), - computed: false, - }) - } - - pub fn full_object_checksum(&self, p: &mut [ObjectPart]) -> Result { - todo!(); - } -} - -#[derive(Default)] -pub struct Checksum { - checksum_type: ChecksumMode, - r: Vec, - computed: bool, -} - -#[allow(dead_code)] -impl Checksum { - fn new(t: ChecksumMode, b: &[u8]) -> Checksum { - if t.is_set() && b.len() == t.raw_byte_len() { - return Checksum { - checksum_type: t, - r: b.to_vec(), - computed: false, - }; - } - Checksum::default() - } - - #[allow(dead_code)] - fn new_checksum_string(t: ChecksumMode, s: &str) -> Result { - let b = match base64_decode(s.as_bytes()) { - Ok(b) => b, - Err(err) => return Err(std::io::Error::other(err.to_string())), - }; - if t.is_set() && b.len() == t.raw_byte_len() { - return Ok(Checksum { - checksum_type: t, - r: b, - computed: false, - }); - } - Ok(Checksum::default()) - } - - fn is_set(&self) -> bool { - self.checksum_type.is_set() && self.r.len() == self.checksum_type.raw_byte_len() - } - - fn encoded(&self) -> String { - if !self.is_set() { - return "".to_string(); - } - base64_encode(&self.r) - } - - #[allow(dead_code)] - fn raw(&self) -> Option> { - if !self.is_set() { - return None; - } - Some(self.r.clone()) - } -} - -pub fn add_auto_checksum_headers(opts: &mut PutObjectOptions) { - opts.user_metadata - .insert("X-Amz-Checksum-Algorithm".to_string(), opts.auto_checksum.to_string()); - if opts.auto_checksum.full_object_requested() { - opts.user_metadata - .insert("X-Amz-Checksum-Type".to_string(), "FULL_OBJECT".to_string()); - } -} - -pub fn apply_auto_checksum(opts: &mut PutObjectOptions, all_parts: &mut [ObjectPart]) -> Result<(), std::io::Error> { - if opts.auto_checksum.can_composite() && !opts.auto_checksum.is(ChecksumMode::ChecksumFullObject) { - let crc = opts.auto_checksum.composite_checksum(all_parts)?; - opts.user_metadata = { - let mut hm = HashMap::new(); - hm.insert(opts.auto_checksum.key(), crc.encoded()); - hm - } - } else if opts.auto_checksum.can_merge_crc() { - let crc = opts.auto_checksum.full_object_checksum(all_parts)?; - opts.user_metadata = { - let mut hm = HashMap::new(); - hm.insert(opts.auto_checksum.key_capitalized(), crc.encoded()); - hm.insert("X-Amz-Checksum-Type".to_string(), "FULL_OBJECT".to_string()); - hm - } - } - - Ok(()) -} diff --git a/crates/ecstore/src/chunk_stream.rs b/crates/ecstore/src/chunk_stream.rs deleted file mode 100644 index 41b3b2d9..00000000 --- a/crates/ecstore/src/chunk_stream.rs +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// use crate::error::StdError; -// use bytes::Bytes; -// use futures::pin_mut; -// use futures::stream::{Stream, StreamExt}; -// use std::future::Future; -// use std::pin::Pin; -// use std::task::{Context, Poll}; -// use transform_stream::AsyncTryStream; - -// pub type SyncBoxFuture<'a, T> = Pin + Send + Sync + 'a>>; - -// pub struct ChunkedStream<'a> { -// /// inner -// inner: AsyncTryStream>>, - -// remaining_length: usize, -// } - -// impl<'a> ChunkedStream<'a> { -// pub fn new(body: S, content_length: usize, chunk_size: usize, need_padding: bool) -> Self -// where -// S: Stream> + Send + Sync + 'a, -// { -// let inner = AsyncTryStream::<_, _, SyncBoxFuture<'a, Result<(), StdError>>>::new(|mut y| { -// #[allow(clippy::shadow_same)] // necessary for `pin_mut!` -// Box::pin(async move { -// pin_mut!(body); -// // Data left over from the previous call -// let mut prev_bytes = Bytes::new(); -// let mut read_size = 0; - -// loop { -// let data: Vec = { -// // Read a fixed-size chunk -// match Self::read_data(body.as_mut(), prev_bytes, chunk_size).await { -// None => break, -// Some(Err(e)) => return Err(e), -// Some(Ok((data, remaining_bytes))) => { -// // debug!( -// // "content_length:{},read_size:{}, read_data data:{}, remaining_bytes: {} ", -// // content_length, -// // read_size, -// // data.len(), -// // remaining_bytes.len() -// // ); - -// prev_bytes = remaining_bytes; -// data -// } -// } -// }; - -// for bytes in data { -// read_size += bytes.len(); -// // debug!("read_size {}, content_length {}", read_size, content_length,); -// y.yield_ok(bytes).await; -// } - -// if read_size + prev_bytes.len() >= content_length { -// // debug!( -// // "Finished reading: read_size:{} + prev_bytes.len({}) == content_length {}", -// // read_size, -// // prev_bytes.len(), -// // content_length, -// // ); - -// // Pad with zeros? -// if !need_padding { -// y.yield_ok(prev_bytes).await; -// break; -// } - -// let mut bytes = vec![0u8; chunk_size]; -// let (left, _) = bytes.split_at_mut(prev_bytes.len()); -// left.copy_from_slice(&prev_bytes); - -// y.yield_ok(Bytes::from(bytes)).await; - -// break; -// } -// } - -// // debug!("chunked stream exit"); - -// Ok(()) -// }) -// }); -// Self { -// inner, -// remaining_length: content_length, -// } -// } -// /// read data and return remaining bytes -// async fn read_data( -// mut body: Pin<&mut S>, -// prev_bytes: Bytes, -// data_size: usize, -// ) -> Option, Bytes), StdError>> -// where -// S: Stream> + Send, -// { -// let mut bytes_buffer = Vec::new(); - -// // Run only once -// let mut push_data_bytes = |mut bytes: Bytes| { -// // debug!("read from body {} split per {}, prev_bytes: {}", bytes.len(), data_size, prev_bytes.len()); - -// if bytes.is_empty() { -// return None; -// } - -// if data_size == 0 { -// return Some(bytes); -// } - -// // Merge with the previous data -// if !prev_bytes.is_empty() { -// let need_size = data_size.wrapping_sub(prev_bytes.len()); -// // debug!( -// // "Previous leftover {}, take {} now, total: {}", -// // prev_bytes.len(), -// // need_size, -// // prev_bytes.len() + need_size -// // ); -// if bytes.len() >= need_size { -// let data = bytes.split_to(need_size); -// let mut combined = Vec::new(); -// combined.extend_from_slice(&prev_bytes); -// combined.extend_from_slice(&data); - -// // debug!( -// // "Fetched more bytes than needed: {}, merged result {}, remaining bytes {}", -// // need_size, -// // combined.len(), -// // bytes.len(), -// // ); - -// bytes_buffer.push(Bytes::from(combined)); -// } else { -// let mut combined = Vec::new(); -// combined.extend_from_slice(&prev_bytes); -// combined.extend_from_slice(&bytes); - -// // debug!( -// // "Fetched fewer bytes than needed: {}, merged result {}, remaining bytes {}, return immediately", -// // need_size, -// // combined.len(), -// // bytes.len(), -// // ); - -// return Some(Bytes::from(combined)); -// } -// } - -// // If the fetched data exceeds the chunk, slice the required size -// if data_size <= bytes.len() { -// let n = bytes.len() / data_size; - -// for _ in 0..n { -// let data = bytes.split_to(data_size); - -// // println!("bytes_buffer.push: {}, remaining: {}", data.len(), bytes.len()); -// bytes_buffer.push(data); -// } - -// Some(bytes) -// } else { -// // Insufficient data -// Some(bytes) -// } -// }; - -// // Remaining data -// let remaining_bytes = 'outer: { -// // // Exit if the previous data was sufficient -// // if let Some(remaining_bytes) = push_data_bytes(prev_bytes) { -// // println!("Consuming leftovers"); -// // break 'outer remaining_bytes; -// // } - -// loop { -// match body.next().await? { -// Err(e) => return Some(Err(e)), -// Ok(bytes) => { -// if let Some(remaining_bytes) = push_data_bytes(bytes) { -// break 'outer remaining_bytes; -// } -// } -// } -// } -// }; - -// Some(Ok((bytes_buffer, remaining_bytes))) -// } - -// fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>> { -// let ans = Pin::new(&mut self.inner).poll_next(cx); -// if let Poll::Ready(Some(Ok(ref bytes))) = ans { -// self.remaining_length = self.remaining_length.saturating_sub(bytes.len()); -// } -// ans -// } - -// // pub fn exact_remaining_length(&self) -> usize { -// // self.remaining_length -// // } -// } - -// impl Stream for ChunkedStream<'_> { -// type Item = Result; - -// fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { -// self.poll(cx) -// } - -// fn size_hint(&self) -> (usize, Option) { -// (0, None) -// } -// } - -// #[cfg(test)] -// mod test { - -// use super::*; - -// #[tokio::test] -// async fn test_chunked_stream() { -// let chunk_size = 4; - -// let data1 = vec![1u8; 7777]; // 65536 -// let data2 = vec![1u8; 7777]; // 65536 - -// let content_length = data1.len() + data2.len(); - -// let chunk1 = Bytes::from(data1); -// let chunk2 = Bytes::from(data2); - -// let chunk_results: Vec> = vec![Ok(chunk1), Ok(chunk2)]; - -// let stream = futures::stream::iter(chunk_results); - -// let mut chunked_stream = ChunkedStream::new(stream, content_length, chunk_size, true); - -// loop { -// let ans1 = chunked_stream.next().await; -// if ans1.is_none() { -// break; -// } - -// let bytes = ans1.unwrap().unwrap(); -// assert!(bytes.len() == chunk_size) -// } - -// // assert_eq!(ans1.unwrap(), chunk1_data.as_slice()); -// } -// } diff --git a/crates/ecstore/src/client/hook_reader.rs b/crates/ecstore/src/client/hook_reader.rs deleted file mode 100644 index 38d2c3f8..00000000 --- a/crates/ecstore/src/client/hook_reader.rs +++ /dev/null @@ -1,59 +0,0 @@ -#![allow(clippy::map_entry)] -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::{collections::HashMap, sync::Arc}; - -use crate::{ - disk::{ - error::{is_unformatted_disk, DiskError}, - format::{DistributionAlgoVersion, FormatV3}, - new_disk, DiskAPI, DiskInfo, DiskOption, DiskStore, - }, - store_api::{ - BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, GetObjectReader, HTTPRangeSpec, - ListMultipartsInfo, ListObjectVersionsInfo, ListObjectsV2Info, MakeBucketOptions, MultipartInfo, MultipartUploadResult, - ObjectIO, ObjectInfo, ObjectOptions, ObjectToDelete, PartInfo, PutObjReader, StorageAPI, - }, - credentials::{Credentials, SignatureType,}, - api_put_object_multipart::UploadPartParams, -}; - -use http::HeaderMap; -use tokio_util::sync::CancellationToken; -use tracing::warn; -use tracing::{error, info}; -use url::Url; - -struct HookReader { - source: GetObjectReader, - hook: GetObjectReader, -} - -impl HookReader { - pub fn new(source: GetObjectReader, hook: GetObjectReader) -> HookReader { - HookReader { - source, - hook, - } - } - - fn seek(&self, offset: i64, whence: i64) -> Result { - todo!(); - } - - fn read(&self, b: &[u8]) -> Result { - todo!(); - } -} \ No newline at end of file diff --git a/crates/ecstore/src/erasure.rs b/crates/ecstore/src/erasure.rs deleted file mode 100644 index 2939fe13..00000000 --- a/crates/ecstore/src/erasure.rs +++ /dev/null @@ -1,586 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::bitrot::{BitrotReader, BitrotWriter}; -use crate::disk::error::{Error, Result}; -use crate::disk::error_reduce::{reduce_write_quorum_errs, OBJECT_OP_IGNORED_ERRS}; -use crate::io::Etag; -use bytes::{Bytes, BytesMut}; -use futures::future::join_all; -use reed_solomon_erasure::galois_8::ReedSolomon; -use smallvec::SmallVec; -use std::any::Any; -use std::io::ErrorKind; -use std::sync::{mpsc, Arc}; -use tokio::io::{AsyncRead, AsyncWrite}; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::sync::mpsc; -use tracing::warn; -use tracing::{error, info}; -use uuid::Uuid; - -use crate::disk::error::DiskError; - -#[derive(Default)] -pub struct Erasure { - data_shards: usize, - parity_shards: usize, - encoder: Option, - pub block_size: usize, - _id: Uuid, - _buf: Vec, -} - -impl Erasure { - pub fn new(data_shards: usize, parity_shards: usize, block_size: usize) -> Self { - // debug!( - // "Erasure new data_shards {},parity_shards {} block_size {} ", - // data_shards, parity_shards, block_size - // ); - let mut encoder = None; - if parity_shards > 0 { - encoder = Some(ReedSolomon::new(data_shards, parity_shards).unwrap()); - } - - Erasure { - data_shards, - parity_shards, - block_size, - encoder, - _id: Uuid::new_v4(), - _buf: vec![0u8; block_size], - } - } - - #[tracing::instrument(level = "info", skip(self, reader, writers))] - pub async fn encode( - self: Arc, - mut reader: S, - writers: &mut [Option], - // block_size: usize, - total_size: usize, - write_quorum: usize, - ) -> Result<(usize, String)> - where - S: AsyncRead + Etag + Unpin + Send + 'static, - { - let (tx, mut rx) = mpsc::channel(5); - let task = tokio::spawn(async move { - let mut buf = vec![0u8; self.block_size]; - let mut total: usize = 0; - loop { - if total_size > 0 { - let new_len = { - let remain = total_size - total; - if remain > self.block_size { self.block_size } else { remain } - }; - - if new_len == 0 && total > 0 { - break; - } - - buf.resize(new_len, 0u8); - match reader.read_exact(&mut buf).await { - Ok(res) => res, - Err(e) => { - if let ErrorKind::UnexpectedEof = e.kind() { - break; - } else { - return Err(e.into()); - } - } - }; - total += buf.len(); - } - let blocks = Arc::new(Box::pin(self.clone().encode_data(&buf)?)); - let _ = tx.send(blocks).await; - if total_size == 0 { - break; - } - } - let etag = reader.etag().await; - Ok((total, etag)) - }); - - while let Some(blocks) = rx.recv().await { - let write_futures = writers.iter_mut().enumerate().map(|(i, w_op)| { - let i_inner = i; - let blocks_inner = blocks.clone(); - async move { - if let Some(w) = w_op { - w.write(blocks_inner[i_inner].clone()).await.err() - } else { - Some(DiskError::DiskNotFound) - } - } - }); - let errs = join_all(write_futures).await; - let none_count = errs.iter().filter(|&x| x.is_none()).count(); - if none_count >= write_quorum { - if total_size == 0 { - break; - } - continue; - } - - if let Some(err) = reduce_write_quorum_errs(&errs, OBJECT_OP_IGNORED_ERRS, write_quorum) { - warn!("Erasure encode errs {:?}", &errs); - return Err(err); - } - } - task.await? - } - - pub async fn decode( - &self, - writer: &mut W, - readers: Vec>, - offset: usize, - length: usize, - total_length: usize, - ) -> (usize, Option) - where - W: AsyncWriteExt + Send + Unpin + 'static, - { - if length == 0 { - return (0, None); - } - - let mut reader = ShardReader::new(readers, self, offset, total_length); - - // debug!("ShardReader {:?}", &reader); - - let start_block = offset / self.block_size; - let end_block = (offset + length) / self.block_size; - - // debug!("decode block from {} to {}", start_block, end_block); - - let mut bytes_written = 0; - - for block_idx in start_block..=end_block { - let (block_offset, block_length) = if start_block == end_block { - (offset % self.block_size, length) - } else if block_idx == start_block { - let block_offset = offset % self.block_size; - (block_offset, self.block_size - block_offset) - } else if block_idx == end_block { - (0, (offset + length) % self.block_size) - } else { - (0, self.block_size) - }; - - if block_length == 0 { - // debug!("block_length == 0 break"); - break; - } - - // debug!("decode {} block_offset {},block_length {} ", block_idx, block_offset, block_length); - - let mut bufs = match reader.read().await { - Ok(bufs) => bufs, - Err(err) => return (bytes_written, Some(err)), - }; - - if self.parity_shards > 0 { - if let Err(err) = self.decode_data(&mut bufs) { - return (bytes_written, Some(err)); - } - } - - let written_n = match self - .write_data_blocks(writer, bufs, self.data_shards, block_offset, block_length) - .await - { - Ok(n) => n, - Err(err) => { - error!("write_data_blocks err {:?}", &err); - return (bytes_written, Some(err)); - } - }; - - bytes_written += written_n; - - // debug!("decode {} written_n {}, total_written: {} ", block_idx, written_n, bytes_written); - } - - if bytes_written != length { - // debug!("bytes_written != length: {} != {} ", bytes_written, length); - return (bytes_written, Some(Error::other("erasure decode less data"))); - } - - (bytes_written, None) - } - - async fn write_data_blocks( - &self, - writer: &mut W, - bufs: Vec>>, - data_blocks: usize, - offset: usize, - length: usize, - ) -> Result - where - W: AsyncWrite + Send + Unpin + 'static, - { - if bufs.len() < data_blocks { - return Err(Error::other("read bufs not match data_blocks")); - } - - let data_len: usize = bufs - .iter() - .take(data_blocks) - .filter(|v| v.is_some()) - .map(|v| v.as_ref().unwrap().len()) - .sum(); - if data_len < length { - return Err(Error::other(format!("write_data_blocks data_len < length {} < {}", data_len, length))); - } - - let mut offset = offset; - - // debug!("write_data_blocks offset {}, length {}", offset, length); - - let mut write = length; - let mut total_written = 0; - - for opt_buf in bufs.iter().take(data_blocks) { - let buf = opt_buf.as_ref().unwrap(); - - if offset >= buf.len() { - offset -= buf.len(); - continue; - } - - let buf = &buf[offset..]; - - offset = 0; - - // debug!("write_data_blocks write buf len {}", buf.len()); - - if write < buf.len() { - let buf = &buf[..write]; - - // debug!("write_data_blocks write buf less len {}", buf.len()); - writer.write_all(buf).await?; - // debug!("write_data_blocks write done len {}", buf.len()); - total_written += buf.len(); - break; - } - - writer.write_all(buf).await?; - let n = buf.len(); - - // debug!("write_data_blocks write done len {}", n); - write -= n; - total_written += n; - } - - Ok(total_written) - } - - pub fn total_shard_count(&self) -> usize { - self.data_shards + self.parity_shards - } - - #[tracing::instrument(level = "info", skip_all, fields(data_len=data.len()))] - pub fn encode_data(self: Arc, data: &[u8]) -> Result> { - let (shard_size, total_size) = self.need_size(data.len()); - - // Generate the total length required for all shards - let mut data_buffer = BytesMut::with_capacity(total_size); - - // Copy the source data - data_buffer.extend_from_slice(data); - data_buffer.resize(total_size, 0u8); - - { - // Perform EC encoding; the results go into data_buffer - let data_slices: SmallVec<[&mut [u8]; 16]> = data_buffer.chunks_exact_mut(shard_size).collect(); - - // Only perform EC encoding when parity shards are present - if self.parity_shards > 0 { - self.encoder.as_ref().unwrap().encode(data_slices).map_err(Error::other)?; - } - } - - // Zero-copy shards: every shard references data_buffer - let mut data_buffer = data_buffer.freeze(); - let mut shards = Vec::with_capacity(self.total_shard_count()); - for _ in 0..self.total_shard_count() { - let shard = data_buffer.split_to(shard_size); - shards.push(shard); - } - - Ok(shards) - } - - pub fn decode_data(&self, shards: &mut [Option>]) -> Result<()> { - if self.parity_shards > 0 { - self.encoder.as_ref().unwrap().reconstruct(shards).map_err(Error::other)?; - } - - Ok(()) - } - - // The length per shard and the total required length - fn need_size(&self, data_size: usize) -> (usize, usize) { - let shard_size = self.shard_size(data_size); - (shard_size, shard_size * (self.total_shard_count())) - } - - // Compute each shard size - pub fn shard_size(&self, data_size: usize) -> usize { - data_size.div_ceil(self.data_shards) - } - // returns final erasure size from original size. - pub fn shard_file_size(&self, total_size: usize) -> usize { - if total_size == 0 { - return 0; - } - - let num_shards = total_size / self.block_size; - let last_block_size = total_size % self.block_size; - let last_shard_size = last_block_size.div_ceil(self.data_shards); - num_shards * self.shard_size(self.block_size) + last_shard_size - - // When writing, EC pads the data so the last shard length should match - // if last_block_size != 0 { - // num_shards += 1 - // } - // num_shards * self.shard_size(self.block_size) - } - - // where erasure reading begins. - pub fn shard_file_offset(&self, start_offset: usize, length: usize, total_length: usize) -> usize { - let shard_size = self.shard_size(self.block_size); - let shard_file_size = self.shard_file_size(total_length); - let end_shard = (start_offset + length) / self.block_size; - let mut till_offset = end_shard * shard_size + shard_size; - if till_offset > shard_file_size { - till_offset = shard_file_size; - } - - till_offset - } - - pub async fn heal( - &self, - writers: &mut [Option], - readers: Vec>, - total_length: usize, - _prefer: &[bool], - ) -> Result<()> { - info!( - "Erasure heal, writers len: {}, readers len: {}, total_length: {}", - writers.len(), - readers.len(), - total_length - ); - if writers.len() != self.parity_shards + self.data_shards { - return Err(Error::other("invalid argument")); - } - let mut reader = ShardReader::new(readers, self, 0, total_length); - - let start_block = 0; - let mut end_block = total_length / self.block_size; - if total_length % self.block_size != 0 { - end_block += 1; - } - - let mut errs = Vec::new(); - for _ in start_block..end_block { - let mut bufs = reader.read().await?; - - if self.parity_shards > 0 { - self.encoder.as_ref().unwrap().reconstruct(&mut bufs).map_err(Error::other)?; - } - - let shards = bufs.into_iter().flatten().map(Bytes::from).collect::>(); - if shards.len() != self.parity_shards + self.data_shards { - return Err(Error::other("can not reconstruct data")); - } - - for (i, w) in writers.iter_mut().enumerate() { - if w.is_none() { - continue; - } - match w.as_mut().unwrap().write(shards[i].clone()).await { - Ok(_) => {} - Err(e) => { - info!("write failed, err: {:?}", e); - errs.push(e); - } - } - } - } - if !errs.is_empty() { - return Err(errs[0].clone().into()); - } - - Ok(()) - } -} - -#[async_trait::async_trait] -pub trait Writer { - fn as_any(&self) -> &dyn Any; - async fn write(&mut self, buf: Bytes) -> Result<()>; - async fn close(&mut self) -> Result<()> { - Ok(()) - } -} - -#[async_trait::async_trait] -pub trait ReadAt { - async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec, usize)>; -} - -pub struct ShardReader { - readers: Vec>, // Disk readers - data_block_count: usize, // Total number of shards - parity_block_count: usize, - shard_size: usize, // Block size per shard (read one block at a time) - shard_file_size: usize, // Total size of the shard file - offset: usize, // Offset within the shard -} - -impl ShardReader { - pub fn new(readers: Vec>, ec: &Erasure, offset: usize, total_length: usize) -> Self { - Self { - readers, - data_block_count: ec.data_shards, - parity_block_count: ec.parity_shards, - shard_size: ec.shard_size(ec.block_size), - shard_file_size: ec.shard_file_size(total_length), - offset: (offset / ec.block_size) * ec.shard_size(ec.block_size), - } - } - - pub async fn read(&mut self) -> Result>>> { - // let mut disks = self.readers; - let reader_length = self.readers.len(); - // Length of the block to read - let mut read_length = self.shard_size; - if self.offset + read_length > self.shard_file_size { - read_length = self.shard_file_size - self.offset - } - - if read_length == 0 { - return Ok(vec![None; reader_length]); - } - - // debug!("shard reader read offset {}, shard_size {}", self.offset, read_length); - - let mut futures = Vec::with_capacity(reader_length); - let mut errors = Vec::with_capacity(reader_length); - - let mut ress = Vec::with_capacity(reader_length); - - for disk in self.readers.iter_mut() { - // if disk.is_none() { - // ress.push(None); - // errors.push(Some(Error::new(DiskError::DiskNotFound))); - // continue; - // } - - // let disk: &mut BitrotReader = disk.as_mut().unwrap(); - let offset = self.offset; - futures.push(async move { - if let Some(disk) = disk { - disk.read_at(offset, read_length).await - } else { - Err(DiskError::DiskNotFound) - } - }); - } - - let results = join_all(futures).await; - for result in results { - match result { - Ok((res, _)) => { - ress.push(Some(res)); - errors.push(None); - } - Err(e) => { - ress.push(None); - errors.push(Some(e)); - } - } - } - - if !self.can_decode(&ress) { - warn!("ec decode read ress {:?}", &ress); - warn!("ec decode read errors {:?}", &errors); - - return Err(Error::other("shard reader read failed")); - } - - self.offset += self.shard_size; - - Ok(ress) - } - - fn can_decode(&self, bufs: &[Option>]) -> bool { - let c = bufs.iter().filter(|v| v.is_some()).count(); - if self.parity_block_count > 0 { - c >= self.data_block_count - } else { - c == self.data_block_count - } - } -} - -// fn shards_to_option_shards(shards: &[Vec]) -> Vec>> { -// let mut result = Vec::with_capacity(shards.len()); - -// for v in shards.iter() { -// let inner: Vec = v.clone(); -// result.push(Some(inner)); -// } -// result -// } - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_erasure() { - let data_shards = 3; - let parity_shards = 2; - let data: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; - let ec = Erasure::new(data_shards, parity_shards, 1); - let shards = Arc::new(ec).encode_data(data).unwrap(); - println!("shards:{:?}", shards); - - let mut s: Vec<_> = shards - .iter() - .map(|d| if d.is_empty() { None } else { Some(d.to_vec()) }) - .collect(); - - // let mut s = shards_to_option_shards(&shards); - - // s[0] = None; - s[4] = None; - s[3] = None; - - println!("sss:{:?}", &s); - - let ec = Erasure::new(data_shards, parity_shards, 1); - ec.decode_data(&mut s).unwrap(); - // ec.encoder.reconstruct(&mut s).unwrap(); - - println!("sss:{:?}", &s); - } -} diff --git a/crates/ecstore/src/lib.rs b/crates/ecstore/src/lib.rs index 3194f2b8..d8ea3440 100644 --- a/crates/ecstore/src/lib.rs +++ b/crates/ecstore/src/lib.rs @@ -20,7 +20,6 @@ pub mod batch_processor; pub mod bitrot; pub mod bucket; pub mod cache_value; -mod chunk_stream; pub mod compress; pub mod config; pub mod data_usage; diff --git a/crates/ecstore/src/metrics_realtime.rs b/crates/ecstore/src/metrics_realtime.rs index 4d938a48..2bbe6456 100644 --- a/crates/ecstore/src/metrics_realtime.rs +++ b/crates/ecstore/src/metrics_realtime.rs @@ -19,11 +19,7 @@ use crate::{ // utils::os::get_drive_stats, }; use chrono::Utc; -use rustfs_common::{ - globals::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_ADDR}, - heal_channel::DriveState, - metrics::global_metrics, -}; +use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_ADDR, heal_channel::DriveState, metrics::global_metrics}; use rustfs_madmin::metrics::{DiskIOStats, DiskMetric, RealtimeMetrics}; use rustfs_utils::os::get_drive_stats; use serde::{Deserialize, Serialize}; diff --git a/crates/ecstore/src/sets.rs b/crates/ecstore/src/sets.rs index d321948f..bfec7c43 100644 --- a/crates/ecstore/src/sets.rs +++ b/crates/ecstore/src/sets.rs @@ -40,7 +40,7 @@ use futures::future::join_all; use http::HeaderMap; use rustfs_common::heal_channel::HealOpts; use rustfs_common::{ - globals::GLOBAL_LOCAL_NODE_NAME, + GLOBAL_LOCAL_NODE_NAME, heal_channel::{DriveState, HealItemType}, }; use rustfs_filemeta::FileInfo; diff --git a/crates/ecstore/src/store.rs b/crates/ecstore/src/store.rs index df0b6c66..06d2bd4d 100644 --- a/crates/ecstore/src/store.rs +++ b/crates/ecstore/src/store.rs @@ -55,8 +55,8 @@ use futures::future::join_all; use http::HeaderMap; use lazy_static::lazy_static; use rand::Rng as _; -use rustfs_common::globals::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT}; use rustfs_common::heal_channel::{HealItemType, HealOpts}; +use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT}; use rustfs_filemeta::FileInfo; use rustfs_lock::FastLockGuard; use rustfs_madmin::heal_commands::HealResultItem; diff --git a/crates/ecstore/src/tier/warm_backend_azure2.rs b/crates/ecstore/src/tier/warm_backend_azure2.rs deleted file mode 100644 index 338a475d..00000000 --- a/crates/ecstore/src/tier/warm_backend_azure2.rs +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#![allow(unused_imports)] -#![allow(unused_variables)] -#![allow(unused_mut)] -#![allow(unused_assignments)] -#![allow(unused_must_use)] -#![allow(clippy::all)] - -use std::collections::HashMap; -use std::sync::Arc; - -use azure_core::http::{Body, ClientOptions, RequestContent}; -use azure_storage::StorageCredentials; -use azure_storage_blobs::prelude::*; - -use crate::client::{ - admin_handler_utils::AdminError, - api_put_object::PutObjectOptions, - transition_api::{Options, ReadCloser, ReaderImpl}, -}; -use crate::tier::{ - tier_config::TierAzure, - warm_backend::{WarmBackend, WarmBackendGetOpts}, -}; -use tracing::warn; - -const MAX_MULTIPART_PUT_OBJECT_SIZE: i64 = 1024 * 1024 * 1024 * 1024 * 5; -const MAX_PARTS_COUNT: i64 = 10000; -const _MAX_PART_SIZE: i64 = 1024 * 1024 * 1024 * 5; -const MIN_PART_SIZE: i64 = 1024 * 1024 * 128; - -pub struct WarmBackendAzure { - pub client: Arc, - pub bucket: String, - pub prefix: String, - pub storage_class: String, -} - -impl WarmBackendAzure { - pub async fn new(conf: &TierAzure, tier: &str) -> Result { - if conf.access_key == "" || conf.secret_key == "" { - return Err(std::io::Error::other("both access and secret keys are required")); - } - - if conf.bucket == "" { - return Err(std::io::Error::other("no bucket name was provided")); - } - - let creds = StorageCredentials::access_key(conf.access_key.clone(), conf.secret_key.clone()); - let client = ClientBuilder::new(conf.access_key.clone(), creds) - //.endpoint(conf.endpoint) - .blob_service_client(); - let client = Arc::new(client); - Ok(Self { - client, - bucket: conf.bucket.clone(), - prefix: conf.prefix.strip_suffix("/").unwrap_or(&conf.prefix).to_owned(), - storage_class: "".to_string(), - }) - } - - /*pub fn tier(&self) -> *blob.AccessTier { - if self.storage_class == "" { - return None; - } - for t in blob.PossibleAccessTierValues() { - if strings.EqualFold(self.storage_class, t) { - return &t - } - } - None - }*/ - - pub fn get_dest(&self, object: &str) -> String { - let mut dest_obj = object.to_string(); - if self.prefix != "" { - dest_obj = format!("{}/{}", &self.prefix, object); - } - return dest_obj; - } -} - -#[async_trait::async_trait] -impl WarmBackend for WarmBackendAzure { - async fn put_with_meta( - &self, - object: &str, - r: ReaderImpl, - length: i64, - meta: HashMap, - ) -> Result { - let part_size = length; - let client = self.client.clone(); - let container_client = client.container_client(self.bucket.clone()); - let blob_client = container_client.blob_client(self.get_dest(object)); - /*let res = blob_client - .upload( - RequestContent::from(match r { - ReaderImpl::Body(content_body) => content_body.to_vec(), - ReaderImpl::ObjectBody(mut content_body) => content_body.read_all().await?, - }), - false, - length as u64, - None, - ) - .await - else { - return Err(std::io::Error::other("upload error")); - };*/ - - let Ok(res) = blob_client - .put_block_blob(match r { - ReaderImpl::Body(content_body) => content_body.to_vec(), - ReaderImpl::ObjectBody(mut content_body) => content_body.read_all().await?, - }) - .content_type("text/plain") - .into_future() - .await - else { - return Err(std::io::Error::other("put_block_blob error")); - }; - - //self.ToObjectError(err, object) - Ok(res.request_id.to_string()) - } - - async fn put(&self, object: &str, r: ReaderImpl, length: i64) -> Result { - self.put_with_meta(object, r, length, HashMap::new()).await - } - - async fn get(&self, object: &str, rv: &str, opts: WarmBackendGetOpts) -> Result { - let client = self.client.clone(); - let container_client = client.container_client(self.bucket.clone()); - let blob_client = container_client.blob_client(self.get_dest(object)); - blob_client.get(); - todo!(); - } - - async fn remove(&self, object: &str, rv: &str) -> Result<(), std::io::Error> { - let client = self.client.clone(); - let container_client = client.container_client(self.bucket.clone()); - let blob_client = container_client.blob_client(self.get_dest(object)); - blob_client.delete(); - todo!(); - } - - async fn in_use(&self) -> Result { - /*let result = self.client - .list_objects_v2(&self.bucket, &self.prefix, "", "", SLASH_SEPARATOR, 1) - .await?; - - Ok(result.common_prefixes.len() > 0 || result.contents.len() > 0)*/ - Ok(false) - } -} - -/*fn azure_to_object_error(err: Error, params: Vec) -> Option { - if err == nil { - return nil - } - - bucket := "" - object := "" - if len(params) >= 1 { - bucket = params[0] - } - if len(params) == 2 { - object = params[1] - } - - azureErr, ok := err.(*azcore.ResponseError) - if !ok { - // We don't interpret non Azure errors. As azure errors will - // have StatusCode to help to convert to object errors. - return err - } - - serviceCode := azureErr.ErrorCode - statusCode := azureErr.StatusCode - - azureCodesToObjectError(err, serviceCode, statusCode, bucket, object) -}*/ - -/*fn azure_codes_to_object_error(err: Error, service_code: String, status_code: i32, bucket: String, object: String) -> Option { - switch serviceCode { - case "ContainerNotFound", "ContainerBeingDeleted": - err = BucketNotFound{Bucket: bucket} - case "ContainerAlreadyExists": - err = BucketExists{Bucket: bucket} - case "InvalidResourceName": - err = BucketNameInvalid{Bucket: bucket} - case "RequestBodyTooLarge": - err = PartTooBig{} - case "InvalidMetadata": - err = UnsupportedMetadata{} - case "BlobAccessTierNotSupportedForAccountType": - err = NotImplemented{} - case "OutOfRangeInput": - err = ObjectNameInvalid{ - Bucket: bucket, - Object: object, - } - default: - switch statusCode { - case http.StatusNotFound: - if object != "" { - err = ObjectNotFound{ - Bucket: bucket, - Object: object, - } - } else { - err = BucketNotFound{Bucket: bucket} - } - case http.StatusBadRequest: - err = BucketNameInvalid{Bucket: bucket} - } - } - return err -}*/ diff --git a/crates/filemeta/src/headers.rs b/crates/filemeta/src/headers.rs deleted file mode 100644 index 687198a0..00000000 --- a/crates/filemeta/src/headers.rs +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub const AMZ_META_UNENCRYPTED_CONTENT_LENGTH: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Length"; -pub const AMZ_META_UNENCRYPTED_CONTENT_MD5: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Md5"; - -pub const AMZ_STORAGE_CLASS: &str = "x-amz-storage-class"; - -pub const RESERVED_METADATA_PREFIX: &str = "X-RustFS-Internal-"; -pub const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-"; - -pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing"; -// pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov"; - -// pub const X_RUSTFS_INLINE_DATA: &str = "x-rustfs-inline-data"; - -pub const VERSION_PURGE_STATUS_KEY: &str = "X-Rustfs-Internal-purgestatus"; - -pub const X_RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing"; -pub const X_RUSTFS_DATA_MOV: &str = "X-Rustfs-Internal-data-mov"; - -pub const AMZ_OBJECT_TAGGING: &str = "X-Amz-Tagging"; -pub const AMZ_BUCKET_REPLICATION_STATUS: &str = "X-Amz-Replication-Status"; -pub const AMZ_DECODED_CONTENT_LENGTH: &str = "X-Amz-Decoded-Content-Length"; - -pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov"; - -// Server-side encryption headers -pub const AMZ_SERVER_SIDE_ENCRYPTION: &str = "x-amz-server-side-encryption"; -pub const AMZ_SERVER_SIDE_ENCRYPTION_AWS_KMS_KEY_ID: &str = "x-amz-server-side-encryption-aws-kms-key-id"; -pub const AMZ_SERVER_SIDE_ENCRYPTION_CONTEXT: &str = "x-amz-server-side-encryption-context"; -pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: &str = "x-amz-server-side-encryption-customer-algorithm"; -pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: &str = "x-amz-server-side-encryption-customer-key"; -pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: &str = "x-amz-server-side-encryption-customer-key-md5"; - -// SSE-C copy source headers -pub const AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: &str = - "x-amz-copy-source-server-side-encryption-customer-algorithm"; -pub const AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: &str = "x-amz-copy-source-server-side-encryption-customer-key"; -pub const AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: &str = - "x-amz-copy-source-server-side-encryption-customer-key-md5"; diff --git a/crates/iam/src/error.rs b/crates/iam/src/error.rs index 2a654c43..82d4bbc4 100644 --- a/crates/iam/src/error.rs +++ b/crates/iam/src/error.rs @@ -109,6 +109,9 @@ pub enum Error { #[error("io error: {0}")] Io(std::io::Error), + + #[error("system already initialized")] + IamSysAlreadyInitialized, } impl PartialEq for Error { @@ -162,6 +165,7 @@ impl Clone for Error { Error::PolicyTooLarge => Error::PolicyTooLarge, Error::ConfigNotFound => Error::ConfigNotFound, Error::Io(e) => Error::Io(std::io::Error::new(e.kind(), e.to_string())), + Error::IamSysAlreadyInitialized => Error::IamSysAlreadyInitialized, } } } @@ -226,6 +230,7 @@ impl From for Error { rustfs_policy::error::Error::StringError(s) => Error::StringError(s), rustfs_policy::error::Error::CryptoError(e) => Error::CryptoError(e), rustfs_policy::error::Error::ErrCredMalformed => Error::ErrCredMalformed, + rustfs_policy::error::Error::IamSysAlreadyInitialized => Error::IamSysAlreadyInitialized, } } } diff --git a/crates/iam/src/lib.rs b/crates/iam/src/lib.rs index 592695d6..f217b84e 100644 --- a/crates/iam/src/lib.rs +++ b/crates/iam/src/lib.rs @@ -18,30 +18,58 @@ use rustfs_ecstore::store::ECStore; use std::sync::{Arc, OnceLock}; use store::object::ObjectStore; use sys::IamSys; -use tracing::{debug, instrument}; +use tracing::{error, info, instrument}; pub mod cache; pub mod error; pub mod manager; pub mod store; -pub mod utils; - pub mod sys; +pub mod utils; static IAM_SYS: OnceLock>> = OnceLock::new(); #[instrument(skip(ecstore))] pub async fn init_iam_sys(ecstore: Arc) -> Result<()> { - debug!("init iam system"); - let s = IamCache::new(ObjectStore::new(ecstore).await).await; + if IAM_SYS.get().is_some() { + info!("IAM system already initialized, skipping."); + return Ok(()); + } - IAM_SYS.get_or_init(move || IamSys::new(s).into()); + info!("Starting IAM system initialization sequence..."); + + // 1. Create the persistent storage adapter + let storage_adapter = ObjectStore::new(ecstore); + + // 2. Create the cache manager. + // The `new` method now performs a blocking initial load from disk. + let cache_manager = IamCache::new(storage_adapter).await; + + // 3. Construct the system interface + let iam_instance = Arc::new(IamSys::new(cache_manager)); + + // 4. Securely set the global singleton + if IAM_SYS.set(iam_instance).is_err() { + error!("Critical: Race condition detected during IAM initialization!"); + return Err(Error::IamSysAlreadyInitialized); + } + + info!("IAM system initialization completed successfully."); Ok(()) } #[inline] pub fn get() -> Result>> { - IAM_SYS.get().map(Arc::clone).ok_or(Error::IamSysNotInitialized) + let sys = IAM_SYS.get().map(Arc::clone).ok_or(Error::IamSysNotInitialized)?; + + // Double-check the internal readiness state. The OnceLock is only set + // after initialization and data loading complete, so this is a defensive + // guard to ensure callers never operate on a partially initialized system. + if !sys.is_ready() { + return Err(Error::IamSysNotInitialized); + } + + Ok(sys) } pub fn get_global_iam_sys() -> Option>> { diff --git a/crates/iam/src/manager.rs b/crates/iam/src/manager.rs index fbcb1185..5fa5220b 100644 --- a/crates/iam/src/manager.rs +++ b/crates/iam/src/manager.rs @@ -37,6 +37,7 @@ use rustfs_policy::{ use rustfs_utils::path::path_join_buf; use serde::{Deserialize, Serialize}; use serde_json::Value; +use std::sync::atomic::AtomicU8; use std::{ collections::{HashMap, HashSet}, sync::{ @@ -76,9 +77,19 @@ fn get_iam_format_file_path() -> String { path_join_buf(&[&IAM_CONFIG_PREFIX, IAM_FORMAT_FILE]) } +#[repr(u8)] +#[derive(Debug, PartialEq)] +pub enum IamState { + Uninitialized = 0, + Loading = 1, + Ready = 2, + Error = 3, +} + pub struct IamCache { pub cache: Cache, pub api: T, + pub state: Arc, pub loading: Arc, pub roles: HashMap>, pub send_chan: Sender, @@ -89,12 +100,19 @@ impl IamCache where T: Store, { + /// Create a new IAM system instance + /// # Arguments + /// * `api` - The storage backend implementing the Store trait + /// + /// # Returns + /// An Arc-wrapped instance of IamSystem pub(crate) async fn new(api: T) -> Arc { let (sender, receiver) = mpsc::channel::(100); let sys = Arc::new(Self { api, cache: Cache::default(), + state: Arc::new(AtomicU8::new(IamState::Uninitialized as u8)), loading: Arc::new(AtomicBool::new(false)), send_chan: sender, roles: HashMap::new(), @@ -105,10 +123,32 @@ where sys } + /// Initialize the IAM system async fn init(self: Arc, receiver: Receiver) -> Result<()> { + self.state.store(IamState::Loading as u8, Ordering::SeqCst); + // Ensure the IAM format file is persisted first self.clone().save_iam_formatter().await?; - self.clone().load().await?; + // Critical: Load all existing users/policies into memory cache + const MAX_RETRIES: usize = 3; + for attempt in 0..MAX_RETRIES { + if let Err(e) = self.clone().load().await { + if attempt == MAX_RETRIES - 1 { + self.state.store(IamState::Error as u8, Ordering::SeqCst); + error!("IAM fail to load initial data after {} attempts: {:?}", MAX_RETRIES, e); + return Err(e); + } else { + warn!("IAM load failed, retrying... attempt {}", attempt + 1); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } else { + break; + } + } + self.state.store(IamState::Ready as u8, Ordering::SeqCst); + info!("IAM System successfully initialized and marked as READY"); + + // Background ticker for synchronization // Check if environment variable is set let skip_background_task = std::env::var("RUSTFS_SKIP_BACKGROUND_TASK").is_ok(); @@ -152,6 +192,11 @@ where Ok(()) } + /// Check if IAM system is ready + pub fn is_ready(&self) -> bool { + self.state.load(Ordering::SeqCst) == IamState::Ready as u8 + } + async fn _notify(&self) { self.send_chan.send(OffsetDateTime::now_utc().unix_timestamp()).await.unwrap(); } diff --git a/crates/iam/src/store/object.rs b/crates/iam/src/store/object.rs index 05f2f3d3..5479465b 100644 --- a/crates/iam/src/store/object.rs +++ b/crates/iam/src/store/object.rs @@ -38,7 +38,7 @@ use std::sync::LazyLock; use std::{collections::HashMap, sync::Arc}; use tokio::sync::mpsc::{self, Sender}; use tokio_util::sync::CancellationToken; -use tracing::{info, warn}; +use tracing::{debug, error, info, warn}; pub static IAM_CONFIG_PREFIX: LazyLock = LazyLock::new(|| format!("{RUSTFS_CONFIG_PREFIX}/iam")); pub static IAM_CONFIG_USERS_PREFIX: LazyLock = LazyLock::new(|| format!("{RUSTFS_CONFIG_PREFIX}/iam/users/")); @@ -120,52 +120,18 @@ fn split_path(s: &str, last_index: bool) -> (&str, &str) { #[derive(Clone)] pub struct ObjectStore { object_api: Arc, - prev_cred: Option, } impl ObjectStore { const BUCKET_NAME: &'static str = ".rustfs.sys"; - const PREV_CRED_FILE: &'static str = "config/iam/prev_cred.json"; - /// Load previous credentials from persistent storage in .rustfs.sys bucket - async fn load_prev_cred(object_api: Arc) -> Option { - match read_config(object_api, Self::PREV_CRED_FILE).await { - Ok(data) => serde_json::from_slice::(&data).ok(), - Err(_) => None, - } + pub fn new(object_api: Arc) -> Self { + Self { object_api } } - /// Save previous credentials to persistent storage in .rustfs.sys bucket - async fn save_prev_cred(object_api: Arc, cred: &Option) -> Result<()> { - match cred { - Some(c) => { - let data = serde_json::to_vec(c).map_err(|e| Error::other(format!("Failed to serialize cred: {}", e)))?; - save_config(object_api, Self::PREV_CRED_FILE, data) - .await - .map_err(|e| Error::other(format!("Failed to write cred to storage: {}", e))) - } - None => { - // If no credentials, remove the config - match delete_config(object_api, Self::PREV_CRED_FILE).await { - Ok(_) => Ok(()), - Err(e) => { - // Ignore ConfigNotFound error when trying to delete non-existent config - if matches!(e, rustfs_ecstore::error::StorageError::ConfigNotFound) { - Ok(()) - } else { - Err(Error::other(format!("Failed to delete cred from storage: {}", e))) - } - } - } - } - } - } - - pub async fn new(object_api: Arc) -> Self { - // Load previous credentials from persistent storage in .rustfs.sys bucket - let prev_cred = Self::load_prev_cred(object_api.clone()).await.or_else(get_global_action_cred); - - Self { object_api, prev_cred } + fn decrypt_data(data: &[u8]) -> Result> { + let de = rustfs_crypto::decrypt_data(get_global_action_cred().unwrap_or_default().secret_key.as_bytes(), data)?; + Ok(de) } fn encrypt_data(data: &[u8]) -> Result> { @@ -173,65 +139,10 @@ impl ObjectStore { Ok(en) } - /// Decrypt data with credential fallback mechanism - /// First tries current credentials, then falls back to previous credentials if available - async fn decrypt_fallback(&self, data: &[u8], path: &str) -> Result> { - let current_cred = get_global_action_cred().unwrap_or_default(); - - // Try current credentials first - match rustfs_crypto::decrypt_data(current_cred.secret_key.as_bytes(), data) { - Ok(decrypted) => { - // Update persistent storage with current credentials for consistency - let _ = Self::save_prev_cred(self.object_api.clone(), &Some(current_cred)).await; - Ok(decrypted) - } - Err(_) => { - // Current credentials failed, try previous credentials - if let Some(ref prev_cred) = self.prev_cred { - match rustfs_crypto::decrypt_data(prev_cred.secret_key.as_bytes(), data) { - Ok(prev_decrypted) => { - warn!("Decryption succeeded with previous credentials, path: {}", path); - - // Re-encrypt with current credentials - match rustfs_crypto::encrypt_data(current_cred.secret_key.as_bytes(), &prev_decrypted) { - Ok(re_encrypted) => { - let _ = save_config(self.object_api.clone(), path, re_encrypted).await; - } - Err(e) => { - warn!("Failed to re-encrypt with current credentials: {}, path: {}", e, path); - } - } - - // Update persistent storage with current credentials - let _ = Self::save_prev_cred(self.object_api.clone(), &Some(current_cred)).await; - Ok(prev_decrypted) - } - Err(_) => { - // Both attempts failed - warn!("Decryption failed with both current and previous credentials, deleting config: {}", path); - let _ = self.delete_iam_config(path).await; - Err(Error::ConfigNotFound) - } - } - } else { - // No previous credentials available - warn!( - "Decryption failed with current credentials and no previous credentials available, deleting config: {}", - path - ); - let _ = self.delete_iam_config(path).await; - Err(Error::ConfigNotFound) - } - } - } - } - async fn load_iamconfig_bytes_with_metadata(&self, path: impl AsRef + Send) -> Result<(Vec, ObjectInfo)> { let (data, obj) = read_config_with_metadata(self.object_api.clone(), path.as_ref(), &ObjectOptions::default()).await?; - let decrypted_data = self.decrypt_fallback(&data, path.as_ref()).await?; - - Ok((decrypted_data, obj)) + Ok((Self::decrypt_data(&data)?, obj)) } async fn list_iam_config_items(&self, prefix: &str, ctx: CancellationToken, sender: Sender) { @@ -430,6 +341,27 @@ impl ObjectStore { Ok(policies) } + /// Checks if the underlying ECStore is ready for metadata operations. + /// This prevents silent failures during the storage boot-up phase. + /// + /// Performs a lightweight probe by attempting to read a known configuration object. + /// If the object is not found, it indicates the storage metadata is not ready. + /// The upper-level caller should handle retries if needed. + async fn check_storage_readiness(&self) -> Result<()> { + // Probe path for a fixed object under the IAM root prefix. + // If it doesn't exist, the system bucket or metadata is not ready. + let probe_path = format!("{}/format.json", *IAM_CONFIG_PREFIX); + + match read_config(self.object_api.clone(), &probe_path).await { + Ok(_) => Ok(()), + Err(rustfs_ecstore::error::StorageError::ConfigNotFound) => Err(Error::other(format!( + "Storage metadata not ready: probe object '{}' not found (expected IAM config to be initialized)", + probe_path + ))), + Err(e) => Err(e.into()), + } + } + // async fn load_policy(&self, name: &str) -> Result { // let mut policy = self // .load_iam_config::(&format!("config/iam/policies/{name}/policy.json")) @@ -475,17 +407,62 @@ impl Store for ObjectStore { async fn load_iam_config(&self, path: impl AsRef + Send) -> Result { let mut data = read_config(self.object_api.clone(), path.as_ref()).await?; - data = self.decrypt_fallback(&data, path.as_ref()).await?; + data = match Self::decrypt_data(&data) { + Ok(v) => v, + Err(err) => { + warn!("delete the config file when decrypt failed failed: {}, path: {}", err, path.as_ref()); + // delete the config file when decrypt failed + let _ = self.delete_iam_config(path.as_ref()).await; + return Err(Error::ConfigNotFound); + } + }; Ok(serde_json::from_slice(&data)?) } + /// Saves IAM configuration with a retry mechanism on failure. + /// + /// Attempts to save the IAM configuration up to 5 times if the storage layer is not ready, + /// using exponential backoff between attempts (starting at 200ms, doubling each retry). + /// + /// # Arguments + /// + /// * `item` - The IAM configuration item to save, must implement `Serialize` and `Send`. + /// * `path` - The path where the configuration will be saved. + /// + /// # Returns + /// + /// * `Result<()>` - `Ok(())` on success, or an `Error` if all attempts fail. #[tracing::instrument(level = "debug", skip(self, item, path))] async fn save_iam_config(&self, item: Item, path: impl AsRef + Send) -> Result<()> { let mut data = serde_json::to_vec(&item)?; data = Self::encrypt_data(&data)?; - save_config(self.object_api.clone(), path.as_ref(), data).await?; - Ok(()) + let mut attempts = 0; + let max_attempts = 5; + let path_ref = path.as_ref(); + + loop { + match save_config(self.object_api.clone(), path_ref, data.clone()).await { + Ok(_) => { + debug!("Successfully saved IAM config to {}", path_ref); + return Ok(()); + } + Err(e) if attempts < max_attempts => { + attempts += 1; + // Exponential backoff: 200ms, 400ms, 800ms... + let wait_ms = 200 * (1 << attempts); + warn!( + "Storage layer not ready for IAM write (attempt {}/{}). Retrying in {}ms. Path: {}, Error: {:?}", + attempts, max_attempts, wait_ms, path_ref, e + ); + tokio::time::sleep(std::time::Duration::from_millis(wait_ms)).await; + } + Err(e) => { + error!("Final failure saving IAM config to {}: {:?}", path_ref, e); + return Err(e.into()); + } + } + } } async fn delete_iam_config(&self, path: impl AsRef + Send) -> Result<()> { delete_config(self.object_api.clone(), path.as_ref()).await?; @@ -499,8 +476,16 @@ impl Store for ObjectStore { user_identity: UserIdentity, _ttl: Option, ) -> Result<()> { - self.save_iam_config(user_identity, get_user_identity_path(name, user_type)) - .await + // Pre-check storage health + self.check_storage_readiness().await?; + + let path = get_user_identity_path(name, user_type); + debug!("Saving IAM identity to path: {}", path); + + self.save_iam_config(user_identity, path).await.map_err(|e| { + error!("ObjectStore save failure for {}: {:?}", name, e); + e + }) } async fn delete_user_identity(&self, name: &str, user_type: UserType) -> Result<()> { self.delete_iam_config(get_user_identity_path(name, user_type)) diff --git a/crates/iam/src/sys.rs b/crates/iam/src/sys.rs index f5b931d9..a05cdb6b 100644 --- a/crates/iam/src/sys.rs +++ b/crates/iam/src/sys.rs @@ -67,6 +67,13 @@ pub struct IamSys { } impl IamSys { + /// Create a new IamSys instance with the given IamCache store + /// + /// # Arguments + /// * `store` - An Arc to the IamCache instance + /// + /// # Returns + /// A new instance of IamSys pub fn new(store: Arc>) -> Self { tokio::spawn(async move { match opa::lookup_config().await { @@ -87,6 +94,11 @@ impl IamSys { roles_map: HashMap::new(), } } + + /// Check if the IamSys has a watcher configured + /// + /// # Returns + /// `true` if a watcher is configured, `false` otherwise pub fn has_watcher(&self) -> bool { self.store.api.has_watcher() } @@ -859,6 +871,11 @@ impl IamSys { self.get_combined_policy(&policies).await.is_allowed(args).await } + + /// Check if the underlying store is ready + pub fn is_ready(&self) -> bool { + self.store.is_ready() + } } fn is_allowed_by_session_policy(args: &Args<'_>) -> (bool, bool) { diff --git a/crates/lock/src/fast_lock/benchmarks.rs b/crates/lock/src/fast_lock/benchmarks.rs deleted file mode 100644 index 930a5a81..00000000 --- a/crates/lock/src/fast_lock/benchmarks.rs +++ /dev/null @@ -1,325 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Benchmarks comparing fast lock vs old lock performance - -#[cfg(test)] -#[allow(dead_code)] // Temporarily disable benchmark tests -mod benchmarks { - use super::super::*; - use std::sync::Arc; - use std::time::{Duration, Instant}; - use tokio::task; - - /// Benchmark single-threaded lock operations - #[tokio::test] - async fn bench_single_threaded_fast_locks() { - let manager = Arc::new(FastObjectLockManager::new()); - let iterations = 10000; - - // Warm up - for i in 0..100 { - let _guard = manager - .acquire_write_lock("bucket", &format!("warm_{}", i), "owner") - .await - .unwrap(); - } - - // Benchmark write locks - let start = Instant::now(); - for i in 0..iterations { - let _guard = manager - .acquire_write_lock("bucket", &format!("object_{}", i), "owner") - .await - .unwrap(); - } - let duration = start.elapsed(); - - println!("Fast locks: {} write locks in {:?}", iterations, duration); - println!("Average: {:?} per lock", duration / iterations); - - let metrics = manager.get_metrics(); - println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0); - - // Should be much faster than old implementation - assert!(duration.as_millis() < 1000, "Should complete 10k locks in <1s"); - assert!(metrics.shard_metrics.fast_path_rate() > 0.95, "Should have >95% fast path rate"); - } - - /// Benchmark concurrent lock operations - #[tokio::test] - async fn bench_concurrent_fast_locks() { - let manager = Arc::new(FastObjectLockManager::new()); - let concurrent_tasks = 100; - let iterations_per_task = 100; - - let start = Instant::now(); - - let mut handles = Vec::new(); - for task_id in 0..concurrent_tasks { - let manager_clone = manager.clone(); - let handle = task::spawn(async move { - for i in 0..iterations_per_task { - let object_name = format!("obj_{}_{}", task_id, i); - let _guard = manager_clone - .acquire_write_lock("bucket", &object_name, &format!("owner_{}", task_id)) - .await - .unwrap(); - - // Simulate some work - tokio::task::yield_now().await; - } - }); - handles.push(handle); - } - - // Wait for all tasks - for handle in handles { - handle.await.unwrap(); - } - - let duration = start.elapsed(); - let total_ops = concurrent_tasks * iterations_per_task; - - println!("Concurrent fast locks: {} operations across {} tasks in {:?}", - total_ops, concurrent_tasks, duration); - println!("Throughput: {:.2} ops/sec", total_ops as f64 / duration.as_secs_f64()); - - let metrics = manager.get_metrics(); - println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0); - println!("Contention events: {}", metrics.shard_metrics.contention_events); - - // Should maintain high throughput even with concurrency - assert!(duration.as_millis() < 5000, "Should complete concurrent ops in <5s"); - } - - /// Benchmark contended lock operations - #[tokio::test] - async fn bench_contended_locks() { - let manager = Arc::new(FastObjectLockManager::new()); - let concurrent_tasks = 50; - let shared_objects = 10; // High contention on few objects - let iterations_per_task = 50; - - let start = Instant::now(); - - let mut handles = Vec::new(); - for task_id in 0..concurrent_tasks { - let manager_clone = manager.clone(); - let handle = task::spawn(async move { - for i in 0..iterations_per_task { - let object_name = format!("shared_{}", i % shared_objects); - - // Mix of read and write operations - if i % 3 == 0 { - // Write operation - if let Ok(_guard) = manager_clone - .acquire_write_lock("bucket", &object_name, &format!("owner_{}", task_id)) - .await - { - tokio::task::yield_now().await; - } - } else { - // Read operation - if let Ok(_guard) = manager_clone - .acquire_read_lock("bucket", &object_name, &format!("owner_{}", task_id)) - .await - { - tokio::task::yield_now().await; - } - } - } - }); - handles.push(handle); - } - - // Wait for all tasks - for handle in handles { - handle.await.unwrap(); - } - - let duration = start.elapsed(); - - println!("Contended locks: {} tasks on {} objects in {:?}", - concurrent_tasks, shared_objects, duration); - - let metrics = manager.get_metrics(); - println!("Total acquisitions: {}", metrics.shard_metrics.total_acquisitions()); - println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0); - println!("Average wait time: {:?}", metrics.shard_metrics.avg_wait_time()); - println!("Timeout rate: {:.2}%", metrics.shard_metrics.timeout_rate() * 100.0); - - // Even with contention, should maintain reasonable performance - assert!(metrics.shard_metrics.timeout_rate() < 0.1, "Should have <10% timeout rate"); - assert!(metrics.shard_metrics.avg_wait_time() < Duration::from_millis(100), "Avg wait should be <100ms"); - } - - /// Benchmark batch operations - #[tokio::test] - async fn bench_batch_operations() { - let manager = FastObjectLockManager::new(); - let batch_sizes = vec![10, 50, 100, 500]; - - for batch_size in batch_sizes { - // Create batch request - let mut batch = BatchLockRequest::new("batch_owner"); - for i in 0..batch_size { - batch = batch.add_write_lock("bucket", &format!("batch_obj_{}", i)); - } - - let start = Instant::now(); - let result = manager.acquire_locks_batch(batch).await; - let duration = start.elapsed(); - - assert!(result.all_acquired, "Batch should succeed"); - println!("Batch size {}: {:?} ({:.2} μs per lock)", - batch_size, - duration, - duration.as_micros() as f64 / batch_size as f64); - - // Batch should be much faster than individual acquisitions - assert!(duration.as_millis() < batch_size as u128 / 10, - "Batch should be 10x+ faster than individual locks"); - } - } - - /// Benchmark version-specific locks - #[tokio::test] - async fn bench_versioned_locks() { - let manager = Arc::new(FastObjectLockManager::new()); - let objects = 100; - let versions_per_object = 10; - - let start = Instant::now(); - - let mut handles = Vec::new(); - for obj_id in 0..objects { - let manager_clone = manager.clone(); - let handle = task::spawn(async move { - for version in 0..versions_per_object { - let _guard = manager_clone - .acquire_write_lock_versioned( - "bucket", - &format!("obj_{}", obj_id), - &format!("v{}", version), - "version_owner" - ) - .await - .unwrap(); - } - }); - handles.push(handle); - } - - for handle in handles { - handle.await.unwrap(); - } - - let duration = start.elapsed(); - let total_ops = objects * versions_per_object; - - println!("Versioned locks: {} version locks in {:?}", total_ops, duration); - println!("Throughput: {:.2} locks/sec", total_ops as f64 / duration.as_secs_f64()); - - let metrics = manager.get_metrics(); - println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0); - - // Versioned locks should not interfere with each other - assert!(metrics.shard_metrics.fast_path_rate() > 0.9, "Should maintain high fast path rate"); - } - - /// Compare with theoretical maximum performance - #[tokio::test] - async fn bench_theoretical_maximum() { - let manager = Arc::new(FastObjectLockManager::new()); - let iterations = 100000; - - // Measure pure fast path performance (no contention) - let start = Instant::now(); - for i in 0..iterations { - let _guard = manager - .acquire_write_lock("bucket", &format!("unique_{}", i), "owner") - .await - .unwrap(); - } - let duration = start.elapsed(); - - println!("Theoretical maximum: {} unique locks in {:?}", iterations, duration); - println!("Rate: {:.2} locks/sec", iterations as f64 / duration.as_secs_f64()); - println!("Latency: {:?} per lock", duration / iterations); - - let metrics = manager.get_metrics(); - println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0); - - // Should achieve very high performance with no contention - assert!(metrics.shard_metrics.fast_path_rate() > 0.99, "Should be nearly 100% fast path"); - assert!(duration.as_secs_f64() / (iterations as f64) < 0.0001, "Should be <100μs per lock"); - } - - /// Performance regression test - #[tokio::test] - async fn performance_regression_test() { - let manager = Arc::new(FastObjectLockManager::new()); - - // This test ensures we maintain performance targets - let test_cases = vec![ - ("single_thread", 1, 10000), - ("low_contention", 10, 1000), - ("high_contention", 100, 100), - ]; - - for (test_name, threads, ops_per_thread) in test_cases { - let start = Instant::now(); - - let mut handles = Vec::new(); - for thread_id in 0..threads { - let manager_clone = manager.clone(); - let handle = task::spawn(async move { - for op_id in 0..ops_per_thread { - let object = if threads == 1 { - format!("obj_{}_{}", thread_id, op_id) - } else { - format!("obj_{}", op_id % 100) // Create contention - }; - - let owner = format!("owner_{}", thread_id); - let _guard = manager_clone - .acquire_write_lock("bucket", object, owner) - .await - .unwrap(); - } - }); - handles.push(handle); - } - - for handle in handles { - handle.await.unwrap(); - } - - let duration = start.elapsed(); - let total_ops = threads * ops_per_thread; - let ops_per_sec = total_ops as f64 / duration.as_secs_f64(); - - println!("{}: {:.2} ops/sec", test_name, ops_per_sec); - - // Performance targets (adjust based on requirements) - match test_name { - "single_thread" => assert!(ops_per_sec > 50000.0, "Single thread should exceed 50k ops/sec"), - "low_contention" => assert!(ops_per_sec > 20000.0, "Low contention should exceed 20k ops/sec"), - "high_contention" => assert!(ops_per_sec > 5000.0, "High contention should exceed 5k ops/sec"), - _ => {} - } - } - } -} \ No newline at end of file diff --git a/crates/lock/src/fast_lock/mod.rs b/crates/lock/src/fast_lock/mod.rs index d6e89243..3cd4b9c9 100644 --- a/crates/lock/src/fast_lock/mod.rs +++ b/crates/lock/src/fast_lock/mod.rs @@ -37,9 +37,6 @@ pub mod shard; pub mod state; pub mod types; -// #[cfg(test)] -// pub mod benchmarks; // Temporarily disabled due to compilation issues - // Re-export main types pub use disabled_manager::DisabledLockManager; pub use guard::FastLockGuard; diff --git a/crates/mcp/Dockerfile b/crates/mcp/Dockerfile index 5ec9501c..d9c95e94 100644 --- a/crates/mcp/Dockerfile +++ b/crates/mcp/Dockerfile @@ -12,4 +12,6 @@ WORKDIR /app COPY --from=builder /build/target/release/rustfs-mcp /app/ -ENTRYPOINT ["/app/rustfs-mcp"] \ No newline at end of file +RUN apt-get update && apt-get install -y ca-certificates && update-ca-certificates + +ENTRYPOINT ["/app/rustfs-mcp"] diff --git a/crates/policy/src/error.rs b/crates/policy/src/error.rs index 04c58a02..5a0adce1 100644 --- a/crates/policy/src/error.rs +++ b/crates/policy/src/error.rs @@ -89,6 +89,7 @@ pub enum Error { #[error("invalid access_key")] InvalidAccessKey, + #[error("action not allowed")] IAMActionNotAllowed, @@ -106,6 +107,9 @@ pub enum Error { #[error("io error: {0}")] Io(std::io::Error), + + #[error("system already initialized")] + IamSysAlreadyInitialized, } impl Error { diff --git a/crates/protos/src/lib.rs b/crates/protos/src/lib.rs index 42fab1f4..e54f1edf 100644 --- a/crates/protos/src/lib.rs +++ b/crates/protos/src/lib.rs @@ -16,7 +16,7 @@ mod generated; use proto_gen::node_service::node_service_client::NodeServiceClient; -use rustfs_common::globals::{GLOBAL_CONN_MAP, GLOBAL_ROOT_CERT, evict_connection}; +use rustfs_common::{GLOBAL_CONN_MAP, GLOBAL_ROOT_CERT, evict_connection}; use std::{error::Error, time::Duration}; use tonic::{ Request, Status, diff --git a/crates/s3select-api/src/lib.rs b/crates/s3select-api/src/lib.rs index 3cee17e7..322bb436 100644 --- a/crates/s3select-api/src/lib.rs +++ b/crates/s3select-api/src/lib.rs @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::fmt::Display; - use datafusion::{common::DataFusionError, sql::sqlparser::parser::ParserError}; use snafu::{Backtrace, Location, Snafu}; +use std::fmt::Display; pub mod object_store; pub mod query; diff --git a/crates/s3select-api/src/query/datasource/mod.rs b/crates/s3select-api/src/query/datasource/mod.rs deleted file mode 100644 index 6238cfff..00000000 --- a/crates/s3select-api/src/query/datasource/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. diff --git a/crates/s3select-api/src/query/mod.rs b/crates/s3select-api/src/query/mod.rs index f21da83a..d83af94b 100644 --- a/crates/s3select-api/src/query/mod.rs +++ b/crates/s3select-api/src/query/mod.rs @@ -12,13 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use s3s::dto::SelectObjectContentInput; +use std::sync::Arc; pub mod analyzer; pub mod ast; -pub mod datasource; pub mod dispatcher; pub mod execution; pub mod function; diff --git a/crates/s3select-api/src/query/session.rs b/crates/s3select-api/src/query/session.rs index e96bc638..ab790542 100644 --- a/crates/s3select-api/src/query/session.rs +++ b/crates/s3select-api/src/query/session.rs @@ -12,20 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - +use crate::query::Context; +use crate::{QueryError, QueryResult, object_store::EcObjectStore}; use datafusion::{ execution::{SessionStateBuilder, context::SessionState, runtime_env::RuntimeEnvBuilder}, parquet::data_type::AsBytes, prelude::SessionContext, }; use object_store::{ObjectStore, memory::InMemory, path::Path}; +use std::sync::Arc; use tracing::error; -use crate::{QueryError, QueryResult, object_store::EcObjectStore}; - -use super::Context; - #[derive(Clone)] pub struct SessionCtx { _desc: Arc, diff --git a/flake.nix b/flake.nix index 675c1f57..be6b90b2 100644 --- a/flake.nix +++ b/flake.nix @@ -37,7 +37,10 @@ src = ./.; - cargoLock.lockFile = ./Cargo.lock; + cargoLock = { + lockFile = ./Cargo.lock; + allowBuiltinFetchGit = true; + }; nativeBuildInputs = with pkgs; [ pkg-config diff --git a/helm/README.md b/helm/README.md index 3ff09825..95515d27 100644 --- a/helm/README.md +++ b/helm/README.md @@ -44,7 +44,7 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m | imageRegistryCredentials.username | string | `""` | The username to pull rustfs image from private registry. | | ingress.className | string | `"traefik"` | Specify the ingress class, traefik or nginx. | | ingress.enabled | bool | `true` | | -| ingress.hosts[0].host | string | `"your.rustfs.com"` | | +| ingress.hosts[0].host | string | `"example.rustfs.com"` | | | ingress.hosts[0].paths[0].path | string | `"/"` | | | ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/affinity" | string | `"cookie"` | | @@ -52,6 +52,7 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-hash" | string | `"sha1"` | | | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-max-age" | string | `"3600"` | | | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-name" | string | `"rustfs"` | | +| ingress.customAnnotations | dict | `{}` |Customize annotations. | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie" | string | `"true"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.httponly" | string | `"true"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.name" | string | `"rustfs"` | | @@ -59,8 +60,6 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.secure" | string | `"true"` | | | ingress.tls.enabled | bool | `false` | Enable tls and access rustfs via https. | | ingress.tls.certManager.enabled | string | `false` | Enable cert manager support to generate certificate automatically. | -| ingress.tls.certManager.issuer.name | string | `false` | The name of cert manager issuer. | -| ingress.tls.certManager.issuer.kind | string | `false` | The kind of cert manager issuer, issuer or cluster-issuer. | | ingress.tls.crt | string | "" | The content of certificate file. | | ingress.tls.key | string | "" | The content of key file. | | livenessProbe.failureThreshold | int | `3` | | @@ -94,9 +93,11 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m | secret.existingSecret | string | `""` | Use existing secret with a credentials. | | secret.rustfs.access_key | string | `"rustfsadmin"` | RustFS Access Key ID | | secret.rustfs.secret_key | string | `"rustfsadmin"` | RustFS Secret Key ID | -| service.console_port | int | `9001` | | -| service.ep_port | int | `9000` | | | service.type | string | `"NodePort"` | | +| service.console.nodePort | int | `32001` | | +| service.console.port | int | `9001` | | +| service.endpoint.nodePort | int | `32000` | | +| service.endpoint.port | int | `9000` | | | serviceAccount.annotations | object | `{}` | | | serviceAccount.automount | bool | `true` | | | serviceAccount.create | bool | `true` | | @@ -179,12 +180,12 @@ Check the ingress status ``` kubectl -n rustfs get ing NAME CLASS HOSTS ADDRESS PORTS AGE -rustfs nginx your.rustfs.com 10.43.237.152 80, 443 29m +rustfs nginx example.rustfs.com 10.43.237.152 80, 443 29m ``` -Access the rustfs cluster via `https://your.rustfs.com` with the default username and password `rustfsadmin`. +Access the rustfs cluster via `https://example.rustfs.com` with the default username and password `rustfsadmin`. -> Replace the `your.rustfs.com` with your own domain as well as the certificates. +> Replace the `example.rustfs.com` with your own domain as well as the certificates. # TLS configuration diff --git a/helm/rustfs/templates/_helpers.tpl b/helm/rustfs/templates/_helpers.tpl index 0b4a1b4f..c9ab646b 100644 --- a/helm/rustfs/templates/_helpers.tpl +++ b/helm/rustfs/templates/_helpers.tpl @@ -104,10 +104,10 @@ Render RUSTFS_VOLUMES */}} {{- define "rustfs.volumes" -}} {{- if eq (int .Values.replicaCount) 4 }} -{{- printf "http://%s-{0...%d}.%s-headless:%d/data/rustfs{0...%d}" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" . ) (.Values.service.ep_port | int) (sub (.Values.replicaCount | int) 1) }} +{{- printf "http://%s-{0...%d}.%s-headless:%d/data/rustfs{0...%d}" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" . ) (.Values.service.endpoint.port | int) (sub (.Values.replicaCount | int) 1) }} {{- end }} {{- if eq (int .Values.replicaCount) 16 }} -{{- printf "http://%s-{0...%d}.%s-headless:%d/data" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" .) (.Values.service.ep_port | int) }} +{{- printf "http://%s-{0...%d}.%s-headless:%d/data" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" .) (.Values.service.endpoint.port | int) }} {{- end }} {{- end }} diff --git a/helm/rustfs/templates/certificate.yml b/helm/rustfs/templates/certificate.yml deleted file mode 100644 index 7eaf6a33..00000000 --- a/helm/rustfs/templates/certificate.yml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if and .Values.ingress.tls.enabled .Values.ingress.tls.certManager.enabled }} -{{- $host := index .Values.ingress.hosts 0 }} -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: {{ include "rustfs.fullname" . }}-tls - namespace: {{ .Release.Namespace }} -spec: - secretName: {{ .Values.ingress.tls.secretName }} - issuerRef: - name: {{ .Values.ingress.tls.certManager.issuer.name }} - kind: {{ .Values.ingress.tls.certManager.issuer.kind }} - dnsNames: - - {{ $host.host }} -{{- end }} diff --git a/helm/rustfs/templates/deployment.yaml b/helm/rustfs/templates/deployment.yaml index d19fc0a3..1a2672b3 100644 --- a/helm/rustfs/templates/deployment.yaml +++ b/helm/rustfs/templates/deployment.yaml @@ -57,11 +57,10 @@ spec: {{- end }} initContainers: - name: init-step - image: busybox - imagePullPolicy: {{ .Values.image.pullPolicy }} + image: "{{ .Values.initStep.image.repository }}:{{ .Values.initStep.image.tag }}" + imagePullPolicy: {{ .Values.initStep.image.pullPolicy }} securityContext: - runAsUser: 0 - runAsGroup: 0 + {{- toYaml .Values.initStep.containerSecurityContext | nindent 12 }} command: - sh - -c @@ -83,10 +82,10 @@ spec: {{- toYaml .Values.containerSecurityContext | nindent 12 }} {{- end }} ports: - - containerPort: {{ .Values.service.ep_port }} - name: endpoint - - containerPort: {{ .Values.service.console_port }} - name: console + - name: endpoint + containerPort: {{ .Values.service.endpoint.port }} + - name: console + containerPort: {{ .Values.service.console.port }} envFrom: - configMapRef: name: {{ include "rustfs.fullname" . }}-config diff --git a/helm/rustfs/templates/ingress.yaml b/helm/rustfs/templates/ingress.yaml index bbb7b9d7..89f99c4d 100644 --- a/helm/rustfs/templates/ingress.yaml +++ b/helm/rustfs/templates/ingress.yaml @@ -2,12 +2,9 @@ {{- $secretName := .Values.ingress.tls.secretName }} {{- $ingressAnnotations := dict }} {{- if eq .Values.ingress.className "nginx" }} - {{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.nginxAnnotations | default dict) }} +{{- $ingressAnnotations = .Values.ingress.nginxAnnotations }} {{- else if eq .Values.ingress.className "" }} - {{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.customAnnoations | default dict) }} -{{- end }} -{{- if .Values.ingress.tls.certManager.enabled }} - {{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.certManagerAnnotations | default dict) }} +{{- $ingressAnnotations = .Values.ingress.customAnnotations }} {{- end }} apiVersion: networking.k8s.io/v1 kind: Ingress diff --git a/helm/rustfs/templates/statefulset.yaml b/helm/rustfs/templates/statefulset.yaml index a9b07b54..5fcfcc7d 100644 --- a/helm/rustfs/templates/statefulset.yaml +++ b/helm/rustfs/templates/statefulset.yaml @@ -42,6 +42,7 @@ spec: {{- else }} {} {{- if .Values.affinity.podAntiAffinity.enabled }} + {{- end }} podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: @@ -52,7 +53,6 @@ spec: - {{ include "rustfs.name" . }} topologyKey: {{ .Values.affinity.podAntiAffinity.topologyKey }} {{- end }} - {{- end }} {{- end }} {{- if .Values.tolerations }} tolerations: @@ -68,11 +68,10 @@ spec: {{- end }} initContainers: - name: init-step - image: busybox - imagePullPolicy: {{ .Values.image.pullPolicy }} + image: "{{ .Values.initStep.image.repository }}:{{ .Values.initStep.image.tag }}" + imagePullPolicy: {{ .Values.initStep.image.pullPolicy }} securityContext: - runAsUser: 0 - runAsGroup: 0 + {{- toYaml .Values.initStep.containerSecurityContext | nindent 12 }} env: - name: REPLICA_COUNT value: {{ .Values.replicaCount | quote }} @@ -111,10 +110,10 @@ spec: {{- toYaml .Values.containerSecurityContext | nindent 12 }} {{- end }} ports: - - containerPort: {{ .Values.service.ep_port }} - name: endpoint - - containerPort: {{ .Values.service.console_port }} - name: console + - name: endpoint + containerPort: {{ .Values.service.endpoint.port }} + - name: console + containerPort: {{ .Values.service.console.port }} envFrom: - configMapRef: name: {{ include "rustfs.fullname" . }}-config diff --git a/helm/rustfs/templates/tests/test-connection.yaml b/helm/rustfs/templates/tests/test-connection.yaml index 428fc9b5..ee879f85 100644 --- a/helm/rustfs/templates/tests/test-connection.yaml +++ b/helm/rustfs/templates/tests/test-connection.yaml @@ -11,5 +11,5 @@ spec: - name: wget image: busybox command: ['wget'] - args: ['-O', '/dev/null', '{{ include "rustfs.fullname" . }}-svc:{{ .Values.service.ep_port }}/health'] + args: ['-O', '/dev/null', '{{ include "rustfs.fullname" . }}-svc:{{ .Values.service.endpoint.port }}/health'] restartPolicy: Never diff --git a/helm/rustfs/values.yaml b/helm/rustfs/values.yaml index 898e17cd..5159b478 100644 --- a/helm/rustfs/values.yaml +++ b/helm/rustfs/values.yaml @@ -114,13 +114,10 @@ ingress: nginx.ingress.kubernetes.io/session-cookie-hash: sha1 nginx.ingress.kubernetes.io/session-cookie-max-age: "3600" nginx.ingress.kubernetes.io/session-cookie-name: rustfs - certManagerAnnotations: - {} # Specify cert manager issuer annotations,cert-manager.io/issuer or cert-manager.io/cluster-issuer. - # cert-manager.io/issuer: "letsencrypt-staging" customAnnotations: # Specify custom annotations {} # Customize annotations hosts: - - host: xmg.rustfs.com + - host: example.rustfs.com paths: - path: / pathType: Prefix @@ -128,9 +125,6 @@ ingress: enabled: false # Enable tls and access rustfs via https. certManager: enabled: false # Enable certmanager to generate certificate for rustfs, default false. - issuer: - name: letsencrypt-staging # Specify cert manager issuer name - kind: Issuer # Specify cert manager issuer kind, Issuer or ClusterIssuer. secretName: secret-tls crt: tls.crt key: tls.key @@ -183,4 +177,16 @@ storageclass: dataStorageSize: 256Mi logStorageSize: 256Mi +# Init container parameters. +initStep: + image: + repository: busybox + pullPolicy: IfNotPresent + tag: "latest" + containerSecurityContext: + runAsUser: 0 + runAsGroup: 0 + + + extraManifests: [] diff --git a/rustfs/Cargo.toml b/rustfs/Cargo.toml index c0fb1776..73438f1a 100644 --- a/rustfs/Cargo.toml +++ b/rustfs/Cargo.toml @@ -73,6 +73,7 @@ hyper.workspace = true hyper-util.workspace = true http.workspace = true http-body.workspace = true +http-body-util.workspace = true reqwest = { workspace = true } socket2 = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread", "macros", "net", "signal", "process", "io-util"] } diff --git a/rustfs/src/admin/auth.rs b/rustfs/src/admin/auth.rs index 8b994097..2f101099 100644 --- a/rustfs/src/admin/auth.rs +++ b/rustfs/src/admin/auth.rs @@ -1,6 +1,18 @@ -use std::collections::HashMap; -use std::sync::Arc; +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +use crate::auth::get_condition_values; use http::HeaderMap; use rustfs_iam::store::object::ObjectStore; use rustfs_iam::sys::IamSys; @@ -9,8 +21,8 @@ use rustfs_policy::policy::Args; use rustfs_policy::policy::action::Action; use s3s::S3Result; use s3s::s3_error; - -use crate::auth::get_condition_values; +use std::collections::HashMap; +use std::sync::Arc; pub async fn validate_admin_request( headers: &HeaderMap, diff --git a/rustfs/src/admin/console.rs b/rustfs/src/admin/console.rs index 0fe66040..b541edf1 100644 --- a/rustfs/src/admin/console.rs +++ b/rustfs/src/admin/console.rs @@ -14,6 +14,7 @@ use crate::config::build; use crate::license::get_license; +use crate::server::{CONSOLE_PREFIX, FAVICON_PATH, HEALTH_PREFIX, RUSTFS_ADMIN_PREFIX}; use axum::{ Router, body::Body, @@ -45,9 +46,6 @@ use tower_http::timeout::TimeoutLayer; use tower_http::trace::TraceLayer; use tracing::{debug, error, info, instrument, warn}; -pub(crate) const CONSOLE_PREFIX: &str = "/rustfs/console"; -const RUSTFS_ADMIN_PREFIX: &str = "/rustfs/admin/v3"; - #[derive(RustEmbed)] #[folder = "$CARGO_MANIFEST_DIR/static"] struct StaticFiles; @@ -457,7 +455,7 @@ fn get_console_config_from_env() -> (bool, u32, u64, String) { /// # Returns: /// - `true` if the path is for console access, `false` otherwise. pub fn is_console_path(path: &str) -> bool { - path == "/favicon.ico" || path.starts_with(CONSOLE_PREFIX) + path == FAVICON_PATH || path.starts_with(CONSOLE_PREFIX) } /// Setup comprehensive middleware stack with tower-http features @@ -477,11 +475,11 @@ fn setup_console_middleware_stack( auth_timeout: u64, ) -> Router { let mut app = Router::new() - .route("/favicon.ico", get(static_handler)) + .route(FAVICON_PATH, get(static_handler)) .route(&format!("{CONSOLE_PREFIX}/license"), get(license_handler)) .route(&format!("{CONSOLE_PREFIX}/config.json"), get(config_handler)) .route(&format!("{CONSOLE_PREFIX}/version"), get(version_handler)) - .route(&format!("{CONSOLE_PREFIX}/health"), get(health_check).head(health_check)) + .route(&format!("{CONSOLE_PREFIX}{HEALTH_PREFIX}"), get(health_check).head(health_check)) .nest(CONSOLE_PREFIX, Router::new().fallback_service(get(static_handler))) .fallback_service(get(static_handler)); diff --git a/rustfs/src/admin/handlers.rs b/rustfs/src/admin/handlers.rs index ec9c9198..91bb86a3 100644 --- a/rustfs/src/admin/handlers.rs +++ b/rustfs/src/admin/handlers.rs @@ -72,7 +72,6 @@ use tokio_stream::wrappers::ReceiverStream; use tracing::debug; use tracing::{error, info, warn}; use url::Host; -// use url::UrlQuery; pub mod bucket_meta; pub mod event; diff --git a/rustfs/src/admin/handlers/rebalance.rs b/rustfs/src/admin/handlers/rebalance.rs index ca5b60f5..736c8754 100644 --- a/rustfs/src/admin/handlers/rebalance.rs +++ b/rustfs/src/admin/handlers/rebalance.rs @@ -12,8 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::{ + admin::{auth::validate_admin_request, router::Operation}, + auth::{check_key_valid, get_session_token}, +}; use http::{HeaderMap, StatusCode}; use matchit::Params; +use rustfs_ecstore::rebalance::RebalanceMeta; use rustfs_ecstore::{ StorageAPI, error::StorageError, @@ -33,12 +38,6 @@ use std::time::Duration; use time::OffsetDateTime; use tracing::warn; -use crate::{ - admin::{auth::validate_admin_request, router::Operation}, - auth::{check_key_valid, get_session_token}, -}; -use rustfs_ecstore::rebalance::RebalanceMeta; - #[derive(Debug, Clone, Deserialize, Serialize)] pub struct RebalanceResp { pub id: String, diff --git a/rustfs/src/admin/handlers/trace.rs b/rustfs/src/admin/handlers/trace.rs index 8b1e0b84..1b9577a1 100644 --- a/rustfs/src/admin/handlers/trace.rs +++ b/rustfs/src/admin/handlers/trace.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::admin::router::Operation; use http::StatusCode; use hyper::Uri; use matchit::Params; @@ -20,8 +21,6 @@ use rustfs_madmin::service_commands::ServiceTraceOpts; use s3s::{Body, S3Request, S3Response, S3Result, s3_error}; use tracing::warn; -use crate::admin::router::Operation; - #[allow(dead_code)] fn extract_trace_options(uri: &Uri) -> S3Result { let mut st_opts = ServiceTraceOpts::default(); diff --git a/rustfs/src/admin/mod.rs b/rustfs/src/admin/mod.rs index 01d4942c..22f6a881 100644 --- a/rustfs/src/admin/mod.rs +++ b/rustfs/src/admin/mod.rs @@ -22,6 +22,7 @@ pub mod utils; #[cfg(test)] mod console_test; +use crate::server::{ADMIN_PREFIX, HEALTH_PREFIX, PROFILE_CPU_PATH, PROFILE_MEMORY_PATH}; use handlers::{ GetReplicationMetricsHandler, HealthCheckHandler, IsAdminHandler, ListRemoteTargetHandler, RemoveRemoteTargetHandler, SetRemoteTargetHandler, bucket_meta, @@ -37,17 +38,21 @@ use router::{AdminOperation, S3Router}; use rpc::register_rpc_route; use s3s::route::S3Route; -const ADMIN_PREFIX: &str = "/rustfs/admin"; -// const ADMIN_PREFIX: &str = "/minio/admin"; - +/// Create admin router +/// +/// # Arguments +/// * `console_enabled` - Whether the console is enabled +/// +/// # Returns +/// An instance of S3Route for admin operations pub fn make_admin_route(console_enabled: bool) -> std::io::Result { let mut r: S3Router = S3Router::new(console_enabled); // Health check endpoint for monitoring and orchestration - r.insert(Method::GET, "/health", AdminOperation(&HealthCheckHandler {}))?; - r.insert(Method::HEAD, "/health", AdminOperation(&HealthCheckHandler {}))?; - r.insert(Method::GET, "/profile/cpu", AdminOperation(&TriggerProfileCPU {}))?; - r.insert(Method::GET, "/profile/memory", AdminOperation(&TriggerProfileMemory {}))?; + r.insert(Method::GET, HEALTH_PREFIX, AdminOperation(&HealthCheckHandler {}))?; + r.insert(Method::HEAD, HEALTH_PREFIX, AdminOperation(&HealthCheckHandler {}))?; + r.insert(Method::GET, PROFILE_CPU_PATH, AdminOperation(&TriggerProfileCPU {}))?; + r.insert(Method::GET, PROFILE_MEMORY_PATH, AdminOperation(&TriggerProfileMemory {}))?; // 1 r.insert(Method::POST, "/", AdminOperation(&sts::AssumeRoleHandle {}))?; diff --git a/rustfs/src/admin/router.rs b/rustfs/src/admin/router.rs index fd3c3306..09c390cf 100644 --- a/rustfs/src/admin/router.rs +++ b/rustfs/src/admin/router.rs @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::admin::ADMIN_PREFIX; use crate::admin::console::is_console_path; use crate::admin::console::make_console_server; -use crate::admin::rpc::RPC_PREFIX; +use crate::server::{ADMIN_PREFIX, HEALTH_PREFIX, PROFILE_CPU_PATH, PROFILE_MEMORY_PATH, RPC_PREFIX}; use hyper::HeaderMap; use hyper::Method; use hyper::StatusCode; @@ -86,12 +85,12 @@ where fn is_match(&self, method: &Method, uri: &Uri, headers: &HeaderMap, _: &mut Extensions) -> bool { let path = uri.path(); // Profiling endpoints - if method == Method::GET && (path == "/profile/cpu" || path == "/profile/memory") { + if method == Method::GET && (path == PROFILE_CPU_PATH || path == PROFILE_MEMORY_PATH) { return true; } // Health check - if (method == Method::HEAD || method == Method::GET) && path == "/health" { + if (method == Method::HEAD || method == Method::GET) && path == HEALTH_PREFIX { return true; } @@ -117,12 +116,12 @@ where let path = req.uri.path(); // Profiling endpoints - if req.method == Method::GET && (path == "/profile/cpu" || path == "/profile/memory") { + if req.method == Method::GET && (path == PROFILE_CPU_PATH || path == PROFILE_MEMORY_PATH) { return Ok(()); } // Health check - if (req.method == Method::HEAD || req.method == Method::GET) && path == "/health" { + if (req.method == Method::HEAD || req.method == Method::GET) && path == HEALTH_PREFIX { return Ok(()); } diff --git a/rustfs/src/admin/rpc.rs b/rustfs/src/admin/rpc.rs index 7df37404..8098236d 100644 --- a/rustfs/src/admin/rpc.rs +++ b/rustfs/src/admin/rpc.rs @@ -15,6 +15,7 @@ use super::router::AdminOperation; use super::router::Operation; use super::router::S3Router; +use crate::server::RPC_PREFIX; use futures::StreamExt; use http::StatusCode; use hyper::Method; @@ -36,8 +37,6 @@ use tokio::io::AsyncWriteExt; use tokio_util::io::ReaderStream; use tracing::warn; -pub const RPC_PREFIX: &str = "/rustfs/rpc"; - pub fn register_rpc_route(r: &mut S3Router) -> std::io::Result<()> { r.insert( Method::GET, diff --git a/rustfs/src/auth.rs b/rustfs/src/auth.rs index cc2d24c2..79cb2922 100644 --- a/rustfs/src/auth.rs +++ b/rustfs/src/auth.rs @@ -66,7 +66,7 @@ const SIGN_V2_ALGORITHM: &str = "AWS "; const SIGN_V4_ALGORITHM: &str = "AWS4-HMAC-SHA256"; const STREAMING_CONTENT_SHA256: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD"; const STREAMING_CONTENT_SHA256_TRAILER: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER"; -pub const UNSIGNED_PAYLOAD_TRAILER: &str = "STREAMING-UNSIGNED-PAYLOAD-TRAILER"; +pub(crate) const UNSIGNED_PAYLOAD_TRAILER: &str = "STREAMING-UNSIGNED-PAYLOAD-TRAILER"; const ACTION_HEADER: &str = "Action"; const AMZ_CREDENTIAL: &str = "X-Amz-Credential"; const AMZ_ACCESS_KEY_ID: &str = "AWSAccessKeyId"; diff --git a/rustfs/src/init.rs b/rustfs/src/init.rs index 397829ea..1db6eca7 100644 --- a/rustfs/src/init.rs +++ b/rustfs/src/init.rs @@ -13,7 +13,8 @@ // limitations under the License. use crate::storage::ecfs::{process_lambda_configurations, process_queue_configurations, process_topic_configurations}; -use crate::{admin, config}; +use crate::{admin, config, version}; +use chrono::Datelike; use rustfs_config::{DEFAULT_UPDATE_CHECK, ENV_UPDATE_CHECK}; use rustfs_ecstore::bucket::metadata_sys; use rustfs_notify::notifier_global; @@ -23,6 +24,21 @@ use std::env; use std::io::Error; use tracing::{debug, error, info, instrument, warn}; +#[instrument] +pub(crate) fn print_server_info() { + let current_year = chrono::Utc::now().year(); + // Use custom macros to print server information + info!("RustFS Object Storage Server"); + info!("Copyright: 2024-{} RustFS, Inc", current_year); + info!("License: Apache-2.0 https://www.apache.org/licenses/LICENSE-2.0"); + info!("Version: {}", version::get_version()); + info!("Docs: https://rustfs.com/docs/"); +} + +/// Initialize the asynchronous update check system. +/// This function checks if update checking is enabled via +/// environment variable or default configuration. If enabled, +/// it spawns an asynchronous task to check for updates with a timeout. pub(crate) fn init_update_check() { let update_check_enable = env::var(ENV_UPDATE_CHECK) .unwrap_or_else(|_| DEFAULT_UPDATE_CHECK.to_string()) @@ -70,6 +86,12 @@ pub(crate) fn init_update_check() { }); } +/// Add existing bucket notification configurations to the global notifier system. +/// This function retrieves notification configurations for each bucket +/// and registers the corresponding event rules with the notifier system. +/// It processes queue, topic, and lambda configurations and maps them to event rules. +/// # Arguments +/// * `buckets` - A vector of bucket names to process #[instrument(skip_all)] pub(crate) async fn add_bucket_notification_configuration(buckets: Vec) { let region_opt = rustfs_ecstore::global::get_global_region(); @@ -128,6 +150,15 @@ pub(crate) async fn add_bucket_notification_configuration(buckets: Vec) } /// Initialize KMS system and configure if enabled +/// +/// This function initializes the global KMS service manager. If KMS is enabled +/// via command line options, it configures and starts the service accordingly. +/// If not enabled, it attempts to load any persisted KMS configuration from +/// cluster storage and starts the service if found. +/// # Arguments +/// * `opt` - The application configuration options +/// +/// Returns `std::io::Result<()>` indicating success or failure #[instrument(skip(opt))] pub(crate) async fn init_kms_system(opt: &config::Opt) -> std::io::Result<()> { // Initialize global KMS service manager (starts in NotConfigured state) diff --git a/rustfs/src/main.rs b/rustfs/src/main.rs index 58b55a2f..5b5a0d2e 100644 --- a/rustfs/src/main.rs +++ b/rustfs/src/main.rs @@ -25,16 +25,17 @@ mod update; mod version; // Ensure the correct path for parse_license is imported -use crate::init::{add_bucket_notification_configuration, init_buffer_profile_system, init_kms_system, init_update_check}; +use crate::init::{ + add_bucket_notification_configuration, init_buffer_profile_system, init_kms_system, init_update_check, print_server_info, +}; use crate::server::{ SHUTDOWN_TIMEOUT, ServiceState, ServiceStateManager, ShutdownSignal, init_cert, init_event_notifier, shutdown_event_notifier, start_audit_system, start_http_server, stop_audit_system, wait_for_shutdown, }; -use chrono::Datelike; use clap::Parser; use license::init_license; use rustfs_ahm::{create_ahm_services_cancel_token, heal::storage::ECStoreHealStorage, init_heal_manager, shutdown_ahm_services}; -use rustfs_common::globals::set_global_addr; +use rustfs_common::{GlobalReadiness, SystemStage, set_global_addr}; use rustfs_ecstore::{ StorageAPI, bucket::metadata_sys::init_bucket_metadata_sys, @@ -67,25 +68,6 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; -const LOGO: &str = r#" - -░█▀▄░█░█░█▀▀░▀█▀░█▀▀░█▀▀ -░█▀▄░█░█░▀▀█░░█░░█▀▀░▀▀█ -░▀░▀░▀▀▀░▀▀▀░░▀░░▀░░░▀▀▀ - -"#; - -#[instrument] -fn print_server_info() { - let current_year = chrono::Utc::now().year(); - // Use custom macros to print server information - info!("RustFS Object Storage Server"); - info!("Copyright: 2024-{} RustFS, Inc", current_year); - info!("License: Apache-2.0 https://www.apache.org/licenses/LICENSE-2.0"); - info!("Version: {}", version::get_version()); - info!("Docs: https://rustfs.com/docs/"); -} - fn main() -> Result<()> { let runtime = server::get_tokio_runtime_builder() .build() @@ -118,7 +100,7 @@ async fn async_main() -> Result<()> { } // print startup logo - info!("{}", LOGO); + info!("{}", server::LOGO); // Initialize performance profiling if enabled profiling::init_from_env().await; @@ -141,6 +123,8 @@ async fn async_main() -> Result<()> { #[instrument(skip(opt))] async fn run(opt: config::Opt) -> Result<()> { debug!("opt: {:?}", &opt); + // 1. Initialize global readiness tracker + let readiness = Arc::new(GlobalReadiness::new()); if let Some(region) = &opt.region { rustfs_ecstore::global::set_global_region(region.clone()); @@ -212,14 +196,14 @@ async fn run(opt: config::Opt) -> Result<()> { let s3_shutdown_tx = { let mut s3_opt = opt.clone(); s3_opt.console_enable = false; - let s3_shutdown_tx = start_http_server(&s3_opt, state_manager.clone()).await?; + let s3_shutdown_tx = start_http_server(&s3_opt, state_manager.clone(), readiness.clone()).await?; Some(s3_shutdown_tx) }; let console_shutdown_tx = if opt.console_enable && !opt.console_address.is_empty() { let mut console_opt = opt.clone(); console_opt.address = console_opt.console_address.clone(); - let console_shutdown_tx = start_http_server(&console_opt, state_manager.clone()).await?; + let console_shutdown_tx = start_http_server(&console_opt, state_manager.clone(), readiness.clone()).await?; Some(console_shutdown_tx) } else { None @@ -234,6 +218,7 @@ async fn run(opt: config::Opt) -> Result<()> { let ctx = CancellationToken::new(); // init store + // 2. Start Storage Engine (ECStore) let store = ECStore::new(server_addr, endpoint_pools.clone(), ctx.clone()) .await .inspect_err(|err| { @@ -241,10 +226,20 @@ async fn run(opt: config::Opt) -> Result<()> { })?; ecconfig::init(); - // config system configuration - GLOBAL_CONFIG_SYS.init(store.clone()).await?; - // init replication_pool + // // Initialize global configuration system + let mut retry_count = 0; + while let Err(e) = GLOBAL_CONFIG_SYS.init(store.clone()).await { + error!("GLOBAL_CONFIG_SYS.init failed {:?}", e); + // TODO: check error type + retry_count += 1; + if retry_count > 15 { + return Err(Error::other("GLOBAL_CONFIG_SYS.init failed")); + } + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + } + readiness.mark_stage(SystemStage::StorageReady); + // init replication_pool init_background_replication(store.clone()).await; // Initialize KMS system if enabled init_kms_system(&opt).await?; @@ -277,7 +272,10 @@ async fn run(opt: config::Opt) -> Result<()> { init_bucket_metadata_sys(store.clone(), buckets.clone()).await; + // 3. Initialize IAM System (Blocking load) + // This ensures data is in memory before moving forward init_iam_sys(store.clone()).await.map_err(Error::other)?; + readiness.mark_stage(SystemStage::IamReady); add_bucket_notification_configuration(buckets.clone()).await; @@ -335,6 +333,15 @@ async fn run(opt: config::Opt) -> Result<()> { init_update_check(); + println!( + "RustFS server started successfully at {}, current time: {}", + &server_address, + chrono::offset::Utc::now().to_string() + ); + info!(target: "rustfs::main::run","server started successfully at {}", &server_address); + // 4. Mark as Full Ready now that critical components are warm + readiness.mark_stage(SystemStage::FullReady); + // Perform hibernation for 1 second tokio::time::sleep(SHUTDOWN_TIMEOUT).await; // listen to the shutdown signal diff --git a/rustfs/src/server/cert.rs b/rustfs/src/server/cert.rs index 6dba5c05..93013be0 100644 --- a/rustfs/src/server/cert.rs +++ b/rustfs/src/server/cert.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use rustfs_common::globals::set_global_root_cert; +use rustfs_common::set_global_root_cert; use rustfs_config::{RUSTFS_CA_CERT, RUSTFS_PUBLIC_CERT, RUSTFS_TLS_CERT}; use tracing::{debug, info}; diff --git a/rustfs/src/server/http.rs b/rustfs/src/server/http.rs index 2b4484cb..53a03bca 100644 --- a/rustfs/src/server/http.rs +++ b/rustfs/src/server/http.rs @@ -17,7 +17,7 @@ use super::compress::{CompressionConfig, CompressionPredicate}; use crate::admin; use crate::auth::IAMAuth; use crate::config; -use crate::server::{ServiceState, ServiceStateManager, hybrid::hybrid, layer::RedirectLayer}; +use crate::server::{ReadinessGateLayer, ServiceState, ServiceStateManager, hybrid::hybrid, layer::RedirectLayer}; use crate::storage; use crate::storage::tonic_service::make_server; use bytes::Bytes; @@ -29,6 +29,7 @@ use hyper_util::{ service::TowerToHyperService, }; use metrics::{counter, histogram}; +use rustfs_common::GlobalReadiness; use rustfs_config::{DEFAULT_ACCESS_KEY, DEFAULT_SECRET_KEY, MI_B, RUSTFS_TLS_CERT, RUSTFS_TLS_KEY}; use rustfs_protos::proto_gen::node_service::node_service_server::NodeServiceServer; use rustfs_utils::net::parse_and_resolve_address; @@ -112,6 +113,7 @@ fn get_cors_allowed_origins() -> String { pub async fn start_http_server( opt: &config::Opt, worker_state_manager: ServiceStateManager, + readiness: Arc, ) -> Result> { let server_addr = parse_and_resolve_address(opt.address.as_str()).map_err(Error::other)?; let server_port = server_addr.port(); @@ -208,7 +210,7 @@ pub async fn start_http_server( println!("Console WebUI (localhost): {protocol}://127.0.0.1:{server_port}/rustfs/console/index.html",); } else { info!(target: "rustfs::main::startup","RustFS API: {api_endpoints} {localhost_endpoint}"); - println!("RustFS API: {api_endpoints} {localhost_endpoint}"); + println!("RustFS Http API: {api_endpoints} {localhost_endpoint}"); println!("RustFS Start Time: {now_time}"); if DEFAULT_ACCESS_KEY.eq(&opt.access_key) && DEFAULT_SECRET_KEY.eq(&opt.secret_key) { warn!( @@ -388,6 +390,7 @@ pub async fn start_http_server( cors_layer: cors_layer.clone(), compression_config: compression_config.clone(), is_console, + readiness: readiness.clone(), }; process_connection(socket, tls_acceptor.clone(), connection_ctx, graceful.clone()); @@ -490,6 +493,7 @@ struct ConnectionContext { cors_layer: CorsLayer, compression_config: CompressionConfig, is_console: bool, + readiness: Arc, } /// Process a single incoming TCP connection. @@ -513,6 +517,7 @@ fn process_connection( cors_layer, compression_config, is_console, + readiness, } = context; // Build services inside each connected task to avoid passing complex service types across tasks, @@ -523,6 +528,9 @@ fn process_connection( let hybrid_service = ServiceBuilder::new() .layer(SetRequestIdLayer::x_request_id(MakeRequestUuid)) .layer(CatchPanicLayer::new()) + // CRITICAL: Insert ReadinessGateLayer before business logic + // This stops requests from hitting IAMAuth or Storage if they are not ready. + .layer(ReadinessGateLayer::new(readiness)) .layer( TraceLayer::new_for_http() .make_span_with(|request: &HttpRequest<_>| { diff --git a/rustfs/src/server/mod.rs b/rustfs/src/server/mod.rs index 630f6f94..28af0093 100644 --- a/rustfs/src/server/mod.rs +++ b/rustfs/src/server/mod.rs @@ -19,6 +19,8 @@ mod event; mod http; mod hybrid; mod layer; +mod prefix; +mod readiness; mod runtime; mod service_state; @@ -26,6 +28,8 @@ pub(crate) use audit::{start_audit_system, stop_audit_system}; pub(crate) use cert::init_cert; pub(crate) use event::{init_event_notifier, shutdown_event_notifier}; pub(crate) use http::start_http_server; +pub(crate) use prefix::*; +pub(crate) use readiness::ReadinessGateLayer; pub(crate) use runtime::get_tokio_runtime_builder; pub(crate) use service_state::SHUTDOWN_TIMEOUT; pub(crate) use service_state::ServiceState; diff --git a/rustfs/src/server/prefix.rs b/rustfs/src/server/prefix.rs new file mode 100644 index 00000000..bdb8216a --- /dev/null +++ b/rustfs/src/server/prefix.rs @@ -0,0 +1,55 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Predefined CPU profiling path for RustFS server. +/// This path is used to access CPU profiling data. +pub(crate) const PROFILE_CPU_PATH: &str = "/profile/cpu"; + +/// This path is used to access memory profiling data. +pub(crate) const PROFILE_MEMORY_PATH: &str = "/profile/memory"; + +/// Favicon path to handle browser requests for the favicon. +/// This path serves the favicon.ico file. +pub(crate) const FAVICON_PATH: &str = "/favicon.ico"; + +/// Predefined health check path for RustFS server. +/// This path is used to check the health status of the server. +pub(crate) const HEALTH_PREFIX: &str = "/health"; + +/// Predefined administrative prefix for RustFS server routes. +/// This prefix is used for endpoints that handle administrative tasks +/// such as configuration, monitoring, and management. +pub(crate) const ADMIN_PREFIX: &str = "/rustfs/admin"; + +/// Environment variable name for overriding the default +/// administrative prefix path. +pub(crate) const RUSTFS_ADMIN_PREFIX: &str = "/rustfs/admin/v3"; + +/// Predefined console prefix for RustFS server routes. +/// This prefix is used for endpoints that handle console-related tasks +/// such as user interface and management. +pub(crate) const CONSOLE_PREFIX: &str = "/rustfs/console"; + +/// Predefined RPC prefix for RustFS server routes. +/// This prefix is used for endpoints that handle remote procedure calls (RPC). +pub(crate) const RPC_PREFIX: &str = "/rustfs/rpc"; + +/// LOGO art for RustFS server. +pub(crate) const LOGO: &str = r#" + +░█▀▄░█░█░█▀▀░▀█▀░█▀▀░█▀▀ +░█▀▄░█░█░▀▀█░░█░░█▀▀░▀▀█ +░▀░▀░▀▀▀░▀▀▀░░▀░░▀░░░▀▀▀ + +"#; diff --git a/rustfs/src/server/readiness.rs b/rustfs/src/server/readiness.rs new file mode 100644 index 00000000..a79ad083 --- /dev/null +++ b/rustfs/src/server/readiness.rs @@ -0,0 +1,129 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bytes::Bytes; +use http::{Request as HttpRequest, Response, StatusCode}; +use http_body::Body; +use http_body_util::{BodyExt, Full}; +use hyper::body::Incoming; +use rustfs_common::GlobalReadiness; +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use tower::{Layer, Service}; + +/// ReadinessGateLayer ensures that the system components (IAM, Storage) +/// are fully initialized before allowing any request to proceed. +#[derive(Clone)] +pub struct ReadinessGateLayer { + readiness: Arc, +} + +impl ReadinessGateLayer { + /// Create a new ReadinessGateLayer + /// # Arguments + /// * `readiness` - An Arc to the GlobalReadiness instance + /// + /// # Returns + /// A new instance of ReadinessGateLayer + pub fn new(readiness: Arc) -> Self { + Self { readiness } + } +} + +impl Layer for ReadinessGateLayer { + type Service = ReadinessGateService; + + /// Wrap the inner service with ReadinessGateService + /// # Arguments + /// * `inner` - The inner service to wrap + /// # Returns + /// An instance of ReadinessGateService + fn layer(&self, inner: S) -> Self::Service { + ReadinessGateService { + inner, + readiness: self.readiness.clone(), + } + } +} + +#[derive(Clone)] +pub struct ReadinessGateService { + inner: S, + readiness: Arc, +} + +type BoxError = Box; +type BoxBody = http_body_util::combinators::UnsyncBoxBody; +impl Service> for ReadinessGateService +where + S: Service, Response = Response> + Clone + Send + 'static, + S::Future: Send + 'static, + S::Error: Send + 'static, + B: Body + Send + 'static, + B::Error: Into + Send + 'static, +{ + type Response = Response; + type Error = S::Error; + type Future = Pin> + Send>>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, req: HttpRequest) -> Self::Future { + let mut inner = self.inner.clone(); + let readiness = self.readiness.clone(); + Box::pin(async move { + let path = req.uri().path(); + // 1) Exact match: fixed probe/resource path + let is_exact_probe = matches!( + path, + crate::server::PROFILE_MEMORY_PATH + | crate::server::PROFILE_CPU_PATH + | crate::server::HEALTH_PREFIX + | crate::server::FAVICON_PATH + ); + + // 2) Prefix matching: the entire set of route prefixes (including their subpaths) + let is_prefix_probe = path.starts_with(crate::server::RUSTFS_ADMIN_PREFIX) + || path.starts_with(crate::server::CONSOLE_PREFIX) + || path.starts_with(crate::server::RPC_PREFIX) + || path.starts_with(crate::server::ADMIN_PREFIX); + + let is_probe = is_exact_probe || is_prefix_probe; + if !is_probe && !readiness.is_ready() { + let body: BoxBody = Full::new(Bytes::from_static(b"Service not ready")) + .map_err(|e| -> BoxError { Box::new(e) }) + .boxed_unsync(); + + let resp = Response::builder() + .status(StatusCode::SERVICE_UNAVAILABLE) + .header(http::header::RETRY_AFTER, "5") + .header(http::header::CONTENT_TYPE, "text/plain; charset=utf-8") + .header(http::header::CACHE_CONTROL, "no-store") + .body(body) + .expect("failed to build not ready response"); + return Ok(resp); + } + let resp = inner.call(req).await?; + // System is ready, forward to the actual S3/RPC handlers + // Transparently converts any response body into a BoxBody, and then Trace/Cors/Compression continues to work + let (parts, body) = resp.into_parts(); + let body: BoxBody = body.map_err(Into::into).boxed_unsync(); + Ok(Response::from_parts(parts, body)) + }) + } +} diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index 183ccb26..b4949454 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -134,7 +134,10 @@ use std::{ sync::{Arc, LazyLock}, }; use time::{OffsetDateTime, format_description::well_known::Rfc3339}; -use tokio::{io::AsyncRead, sync::mpsc}; +use tokio::{ + io::{AsyncRead, AsyncSeek}, + sync::mpsc, +}; use tokio_stream::wrappers::ReceiverStream; use tokio_tar::Archive; use tokio_util::io::{ReaderStream, StreamReader}; @@ -398,6 +401,19 @@ impl AsyncRead for InMemoryAsyncReader { } } +impl AsyncSeek for InMemoryAsyncReader { + fn start_seek(mut self: std::pin::Pin<&mut Self>, position: std::io::SeekFrom) -> std::io::Result<()> { + // std::io::Cursor natively supports negative SeekCurrent offsets + // It will automatically handle validation and return an error if the final position would be negative + std::io::Seek::seek(&mut self.cursor, position)?; + Ok(()) + } + + fn poll_complete(self: std::pin::Pin<&mut Self>, _cx: &mut std::task::Context<'_>) -> std::task::Poll> { + std::task::Poll::Ready(Ok(self.cursor.position())) + } +} + async fn decrypt_multipart_managed_stream( mut encrypted_stream: Box, parts: &[ObjectPartInfo], @@ -2264,11 +2280,55 @@ impl S3 for FS { ); Some(StreamingBlob::wrap(ReaderStream::with_capacity(final_stream, optimal_buffer_size))) } else { - // Standard streaming path for large objects or range/part requests - Some(StreamingBlob::wrap(bytes_stream( - ReaderStream::with_capacity(final_stream, optimal_buffer_size), - response_content_length as usize, - ))) + let seekable_object_size_threshold = rustfs_config::DEFAULT_OBJECT_SEEK_SUPPORT_THRESHOLD; + + let should_provide_seek_support = response_content_length > 0 + && response_content_length <= seekable_object_size_threshold as i64 + && part_number.is_none() + && rs.is_none(); + + if should_provide_seek_support { + debug!( + "Reading small object into memory for seek support: key={} size={}", + cache_key, response_content_length + ); + + // Read the stream into memory + let mut buf = Vec::with_capacity(response_content_length as usize); + match tokio::io::AsyncReadExt::read_to_end(&mut final_stream, &mut buf).await { + Ok(_) => { + // Verify we read the expected amount + if buf.len() != response_content_length as usize { + warn!( + "Object size mismatch during seek support read: expected={} actual={}", + response_content_length, + buf.len() + ); + } + + // Create seekable in-memory reader (similar to MinIO SDK's bytes.Reader) + let mem_reader = InMemoryAsyncReader::new(buf); + Some(StreamingBlob::wrap(bytes_stream( + ReaderStream::with_capacity(Box::new(mem_reader), optimal_buffer_size), + response_content_length as usize, + ))) + } + Err(e) => { + error!("Failed to read object into memory for seek support: {}", e); + // Fallback to streaming if read fails + Some(StreamingBlob::wrap(bytes_stream( + ReaderStream::with_capacity(final_stream, optimal_buffer_size), + response_content_length as usize, + ))) + } + } + } else { + // Standard streaming path for large objects or range/part requests + Some(StreamingBlob::wrap(bytes_stream( + ReaderStream::with_capacity(final_stream, optimal_buffer_size), + response_content_length as usize, + ))) + } }; // Extract SSE information from metadata for response diff --git a/rustfs/src/storage/error.rs b/rustfs/src/storage/error.rs deleted file mode 100644 index e3b10cde..00000000 --- a/rustfs/src/storage/error.rs +++ /dev/null @@ -1,499 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use ecstore::error::StorageError; -use rustfs_common::error::Error; -use s3s::{s3_error, S3Error, S3ErrorCode}; -pub fn to_s3_error(err: Error) -> S3Error { - if let Some(storage_err) = err.downcast_ref::() { - return match storage_err { - StorageError::NotImplemented => s3_error!(NotImplemented), - StorageError::InvalidArgument(bucket, object, version_id) => { - s3_error!(InvalidArgument, "Invalid arguments provided for {}/{}-{}", bucket, object, version_id) - } - StorageError::MethodNotAllowed => s3_error!(MethodNotAllowed), - StorageError::BucketNotFound(bucket) => { - s3_error!(NoSuchBucket, "bucket not found {}", bucket) - } - StorageError::BucketNotEmpty(bucket) => s3_error!(BucketNotEmpty, "bucket not empty {}", bucket), - StorageError::BucketNameInvalid(bucket) => s3_error!(InvalidBucketName, "invalid bucket name {}", bucket), - StorageError::ObjectNameInvalid(bucket, object) => { - s3_error!(InvalidArgument, "invalid object name {}/{}", bucket, object) - } - StorageError::BucketExists(bucket) => s3_error!(BucketAlreadyExists, "{}", bucket), - StorageError::StorageFull => s3_error!(ServiceUnavailable, "Storage reached its minimum free drive threshold."), - StorageError::SlowDown => s3_error!(SlowDown, "Please reduce your request rate"), - StorageError::PrefixAccessDenied(bucket, object) => { - s3_error!(AccessDenied, "PrefixAccessDenied {}/{}", bucket, object) - } - StorageError::InvalidUploadIDKeyCombination(bucket, object) => { - s3_error!(InvalidArgument, "Invalid UploadID KeyCombination: {}/{}", bucket, object) - } - StorageError::MalformedUploadID(bucket) => s3_error!(InvalidArgument, "Malformed UploadID: {}", bucket), - StorageError::ObjectNameTooLong(bucket, object) => { - s3_error!(InvalidArgument, "Object name too long: {}/{}", bucket, object) - } - StorageError::ObjectNamePrefixAsSlash(bucket, object) => { - s3_error!(InvalidArgument, "Object name contains forward slash as prefix: {}/{}", bucket, object) - } - StorageError::ObjectNotFound(bucket, object) => s3_error!(NoSuchKey, "{}/{}", bucket, object), - StorageError::VersionNotFound(bucket, object, version_id) => { - s3_error!(NoSuchVersion, "{}/{}/{}", bucket, object, version_id) - } - StorageError::InvalidUploadID(bucket, object, version_id) => { - s3_error!(InvalidPart, "Invalid upload id: {}/{}-{}", bucket, object, version_id) - } - StorageError::InvalidVersionID(bucket, object, version_id) => { - s3_error!(InvalidArgument, "Invalid version id: {}/{}-{}", bucket, object, version_id) - } - // extended - StorageError::DataMovementOverwriteErr(bucket, object, version_id) => s3_error!( - InvalidArgument, - "invalid data movement operation, source and destination pool are the same for : {}/{}-{}", - bucket, - object, - version_id - ), - - // extended - StorageError::ObjectExistsAsDirectory(bucket, object) => { - s3_error!(InvalidArgument, "Object exists on :{} as directory {}", bucket, object) - } - StorageError::InvalidPart(bucket, object, version_id) => { - s3_error!( - InvalidPart, - "Specified part could not be found. PartNumber {}, Expected {}, got {}", - bucket, - object, - version_id - ) - } - StorageError::DoneForNow => s3_error!(InternalError, "DoneForNow"), - }; - } - - if is_err_file_not_found(&err) { - return S3Error::with_message(S3ErrorCode::NoSuchKey, format!(" ec err {}", err)); - } - - S3Error::with_message(S3ErrorCode::InternalError, format!(" ec err {}", err)) -} - -#[cfg(test)] -mod tests { - use super::*; - use s3s::S3ErrorCode; - - #[test] - fn test_to_s3_error_not_implemented() { - let storage_err = StorageError::NotImplemented; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NotImplemented); - } - - #[test] - fn test_to_s3_error_invalid_argument() { - let storage_err = - StorageError::InvalidArgument("test-bucket".to_string(), "test-object".to_string(), "test-version".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Invalid arguments provided")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-object")); - assert!(s3_err.message().unwrap().contains("test-version")); - } - - #[test] - fn test_to_s3_error_method_not_allowed() { - let storage_err = StorageError::MethodNotAllowed; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::MethodNotAllowed); - } - - #[test] - fn test_to_s3_error_bucket_not_found() { - let storage_err = StorageError::BucketNotFound("test-bucket".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchBucket); - assert!(s3_err.message().unwrap().contains("bucket not found")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - } - - #[test] - fn test_to_s3_error_bucket_not_empty() { - let storage_err = StorageError::BucketNotEmpty("test-bucket".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::BucketNotEmpty); - assert!(s3_err.message().unwrap().contains("bucket not empty")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - } - - #[test] - fn test_to_s3_error_bucket_name_invalid() { - let storage_err = StorageError::BucketNameInvalid("invalid-bucket-name".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidBucketName); - assert!(s3_err.message().unwrap().contains("invalid bucket name")); - assert!(s3_err.message().unwrap().contains("invalid-bucket-name")); - } - - #[test] - fn test_to_s3_error_object_name_invalid() { - let storage_err = StorageError::ObjectNameInvalid("test-bucket".to_string(), "invalid-object".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("invalid object name")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("invalid-object")); - } - - #[test] - fn test_to_s3_error_bucket_exists() { - let storage_err = StorageError::BucketExists("existing-bucket".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::BucketAlreadyExists); - assert!(s3_err.message().unwrap().contains("existing-bucket")); - } - - #[test] - fn test_to_s3_error_storage_full() { - let storage_err = StorageError::StorageFull; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::ServiceUnavailable); - assert!( - s3_err - .message() - .unwrap() - .contains("Storage reached its minimum free drive threshold") - ); - } - - #[test] - fn test_to_s3_error_slow_down() { - let storage_err = StorageError::SlowDown; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::SlowDown); - assert!(s3_err.message().unwrap().contains("Please reduce your request rate")); - } - - #[test] - fn test_to_s3_error_prefix_access_denied() { - let storage_err = StorageError::PrefixAccessDenied("test-bucket".to_string(), "test-prefix".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::AccessDenied); - assert!(s3_err.message().unwrap().contains("PrefixAccessDenied")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-prefix")); - } - - #[test] - fn test_to_s3_error_invalid_upload_id_key_combination() { - let storage_err = StorageError::InvalidUploadIDKeyCombination("test-bucket".to_string(), "test-object".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Invalid UploadID KeyCombination")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-object")); - } - - #[test] - fn test_to_s3_error_malformed_upload_id() { - let storage_err = StorageError::MalformedUploadID("malformed-id".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Malformed UploadID")); - assert!(s3_err.message().unwrap().contains("malformed-id")); - } - - #[test] - fn test_to_s3_error_object_name_too_long() { - let storage_err = StorageError::ObjectNameTooLong("test-bucket".to_string(), "very-long-object-name".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Object name too long")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("very-long-object-name")); - } - - #[test] - fn test_to_s3_error_object_name_prefix_as_slash() { - let storage_err = StorageError::ObjectNamePrefixAsSlash("test-bucket".to_string(), "/invalid-object".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!( - s3_err - .message() - .unwrap() - .contains("Object name contains forward slash as prefix") - ); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("/invalid-object")); - } - - #[test] - fn test_to_s3_error_object_not_found() { - let storage_err = StorageError::ObjectNotFound("test-bucket".to_string(), "missing-object".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchKey); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("missing-object")); - } - - #[test] - fn test_to_s3_error_version_not_found() { - let storage_err = - StorageError::VersionNotFound("test-bucket".to_string(), "test-object".to_string(), "missing-version".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchVersion); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-object")); - assert!(s3_err.message().unwrap().contains("missing-version")); - } - - #[test] - fn test_to_s3_error_invalid_upload_id() { - let storage_err = - StorageError::InvalidUploadID("test-bucket".to_string(), "test-object".to_string(), "invalid-upload-id".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidPart); - assert!(s3_err.message().unwrap().contains("Invalid upload id")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-object")); - assert!(s3_err.message().unwrap().contains("invalid-upload-id")); - } - - #[test] - fn test_to_s3_error_invalid_version_id() { - let storage_err = StorageError::InvalidVersionID( - "test-bucket".to_string(), - "test-object".to_string(), - "invalid-version-id".to_string(), - ); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Invalid version id")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-object")); - assert!(s3_err.message().unwrap().contains("invalid-version-id")); - } - - #[test] - fn test_to_s3_error_data_movement_overwrite_err() { - let storage_err = StorageError::DataMovementOverwriteErr( - "test-bucket".to_string(), - "test-object".to_string(), - "test-version".to_string(), - ); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("invalid data movement operation")); - assert!(s3_err.message().unwrap().contains("source and destination pool are the same")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("test-object")); - assert!(s3_err.message().unwrap().contains("test-version")); - } - - #[test] - fn test_to_s3_error_object_exists_as_directory() { - let storage_err = StorageError::ObjectExistsAsDirectory("test-bucket".to_string(), "directory-object".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Object exists on")); - assert!(s3_err.message().unwrap().contains("as directory")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - assert!(s3_err.message().unwrap().contains("directory-object")); - } - - #[test] - fn test_to_s3_error_insufficient_read_quorum() { - let storage_err = StorageError::InsufficientReadQuorum; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::SlowDown); - assert!( - s3_err - .message() - .unwrap() - .contains("Storage resources are insufficient for the read operation") - ); - } - - #[test] - fn test_to_s3_error_insufficient_write_quorum() { - let storage_err = StorageError::InsufficientWriteQuorum; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::SlowDown); - assert!( - s3_err - .message() - .unwrap() - .contains("Storage resources are insufficient for the write operation") - ); - } - - #[test] - fn test_to_s3_error_decommission_not_started() { - let storage_err = StorageError::DecommissionNotStarted; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("Decommission Not Started")); - } - - #[test] - fn test_to_s3_error_decommission_already_running() { - let storage_err = StorageError::DecommissionAlreadyRunning; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InternalError); - assert!(s3_err.message().unwrap().contains("Decommission already running")); - } - - #[test] - fn test_to_s3_error_volume_not_found() { - let storage_err = StorageError::VolumeNotFound("test-volume".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchBucket); - assert!(s3_err.message().unwrap().contains("bucket not found")); - assert!(s3_err.message().unwrap().contains("test-volume")); - } - - #[test] - fn test_to_s3_error_invalid_part() { - let storage_err = StorageError::InvalidPart(1, "expected-part".to_string(), "got-part".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidPart); - assert!(s3_err.message().unwrap().contains("Specified part could not be found")); - assert!(s3_err.message().unwrap().contains("PartNumber")); - assert!(s3_err.message().unwrap().contains("expected-part")); - assert!(s3_err.message().unwrap().contains("got-part")); - } - - #[test] - fn test_to_s3_error_done_for_now() { - let storage_err = StorageError::DoneForNow; - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InternalError); - assert!(s3_err.message().unwrap().contains("DoneForNow")); - } - - #[test] - fn test_to_s3_error_non_storage_error() { - // Test with a non-StorageError - let err = Error::from_string("Generic error message".to_string()); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InternalError); - assert!(s3_err.message().unwrap().contains("ec err")); - assert!(s3_err.message().unwrap().contains("Generic error message")); - } - - #[test] - fn test_to_s3_error_with_unicode_strings() { - let storage_err = StorageError::BucketNotFound("test-bucket".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchBucket); - assert!(s3_err.message().unwrap().contains("bucket not found")); - assert!(s3_err.message().unwrap().contains("test-bucket")); - } - - #[test] - fn test_to_s3_error_with_special_characters() { - let storage_err = StorageError::ObjectNameInvalid("bucket-with-@#$%".to_string(), "object-with-!@#$%^&*()".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::InvalidArgument); - assert!(s3_err.message().unwrap().contains("invalid object name")); - assert!(s3_err.message().unwrap().contains("bucket-with-@#$%")); - assert!(s3_err.message().unwrap().contains("object-with-!@#$%^&*()")); - } - - #[test] - fn test_to_s3_error_with_empty_strings() { - let storage_err = StorageError::BucketNotFound("".to_string()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchBucket); - assert!(s3_err.message().unwrap().contains("bucket not found")); - } - - #[test] - fn test_to_s3_error_with_very_long_strings() { - let long_bucket_name = "a".repeat(1000); - let storage_err = StorageError::BucketNotFound(long_bucket_name.clone()); - let err = Error::new(storage_err); - let s3_err = to_s3_error(err); - - assert_eq!(*s3_err.code(), S3ErrorCode::NoSuchBucket); - assert!(s3_err.message().unwrap().contains("bucket not found")); - assert!(s3_err.message().unwrap().contains(&long_bucket_name)); - } -} diff --git a/rustfs/src/storage/tonic_service.rs b/rustfs/src/storage/tonic_service.rs index 22a7434f..70e8b6fa 100644 --- a/rustfs/src/storage/tonic_service.rs +++ b/rustfs/src/storage/tonic_service.rs @@ -16,7 +16,7 @@ use bytes::Bytes; use futures::Stream; use futures_util::future::join_all; use rmp_serde::{Deserializer, Serializer}; -use rustfs_common::{globals::GLOBAL_LOCAL_NODE_NAME, heal_channel::HealOpts}; +use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, heal_channel::HealOpts}; use rustfs_ecstore::{ admin_server_info::get_local_server_property, bucket::{metadata::load_bucket_metadata, metadata_sys},