From 90f21a91021b18a467a03c79115c507edaaf0cd1 Mon Sep 17 00:00:00 2001 From: weisd Date: Fri, 26 Sep 2025 14:27:53 +0800 Subject: [PATCH] refactor: Reimplement bucket replication system with enhanced architecture (#590) * feat:refactor replication * use aws sdk for replication client * refactor/replication * merge main * fix lifecycle test --- Cargo.lock | 14 + crates/ahm/src/scanner/data_scanner.rs | 24 +- crates/ahm/src/scanner/lifecycle.rs | 14 +- crates/ahm/src/scanner/local_scan/mod.rs | 2 +- crates/ahm/tests/endpoint_index_test.rs | 3 +- crates/ahm/tests/heal_integration_test.rs | 5 +- .../ahm/tests/lifecycle_integration_test.rs | 15 +- crates/ahm/tests/optimized_scanner_tests.rs | 5 +- crates/audit/src/observability.rs | 11 +- crates/audit/src/registry.rs | 6 +- crates/audit/src/system.rs | 6 +- crates/e2e_test/src/common.rs | 10 +- crates/e2e_test/src/kms/common.rs | 42 +- .../src/kms/encryption_metadata_test.rs | 12 +- .../e2e_test/src/kms/kms_edge_cases_test.rs | 10 +- .../src/kms/kms_fault_recovery_test.rs | 6 +- crates/e2e_test/src/kms/kms_local_test.rs | 2 +- crates/e2e_test/src/kms/kms_vault_test.rs | 22 +- .../src/kms/multipart_encryption_test.rs | 2 +- crates/e2e_test/src/kms/test_runner.rs | 4 +- crates/ecstore/Cargo.toml | 3 + .../ecstore/src/bucket/bucket_target_sys.rs | 1400 +++++++++ crates/ecstore/src/bucket/metadata.rs | 168 +- crates/ecstore/src/bucket/metadata_sys.rs | 13 +- crates/ecstore/src/bucket/mod.rs | 1 + .../ecstore/src/bucket/replication/config.rs | 233 ++ .../src/bucket/replication/datatypes.rs | 52 +- crates/ecstore/src/bucket/replication/mod.rs | 13 + .../bucket/replication/replication_pool.rs | 1035 +++++++ .../replication/replication_resyncer.rs | 2403 +++++++++++++++ .../bucket/replication/replication_state.rs | 1201 ++++++++ .../bucket/replication/replication_type.rs | 470 +++ crates/ecstore/src/bucket/replication/rule.rs | 51 + crates/ecstore/src/bucket/tagging/mod.rs | 16 + crates/ecstore/src/bucket/target/arn.rs | 66 + .../src/bucket/target/bucket_target.rs | 800 +++++ crates/ecstore/src/bucket/target/mod.rs | 124 +- .../ecstore/src/cache_value/metacache_set.rs | 15 +- crates/ecstore/src/client/api_get_options.rs | 2 + crates/ecstore/src/client/api_remove.rs | 15 +- crates/ecstore/src/client/api_stat.rs | 72 +- crates/ecstore/src/client/bucket_cache.rs | 1 + crates/ecstore/src/client/transition_api.rs | 18 +- crates/ecstore/src/cmd/bucket_replication.rs | 2736 ----------------- .../src/cmd/bucket_replication_utils.rs | 69 - crates/ecstore/src/cmd/bucket_targets.rs | 890 ------ .../src/cmd/bucketreplicationhandler.rs | 14 - crates/ecstore/src/cmd/mod.rs | 16 - .../ecstore/src/data_usage/local_snapshot.rs | 2 +- crates/ecstore/src/disk/local.rs | 9 +- crates/ecstore/src/disk/mod.rs | 14 +- crates/ecstore/src/lib.rs | 1 - crates/ecstore/src/pools.rs | 26 +- crates/ecstore/src/rebalance.rs | 29 +- crates/ecstore/src/rpc/remote_disk.rs | 70 +- crates/ecstore/src/rpc/tonic_service.rs | 36 +- crates/ecstore/src/set_disk.rs | 255 +- crates/ecstore/src/sets.rs | 49 +- crates/ecstore/src/store.rs | 278 +- crates/ecstore/src/store_api.rs | 215 +- crates/ecstore/src/store_list_objects.rs | 67 +- crates/ecstore/src/store_utils.rs | 4 +- crates/filemeta/Cargo.toml | 2 +- crates/filemeta/src/fileinfo.rs | 29 +- crates/filemeta/src/filemeta.rs | 400 ++- crates/filemeta/src/lib.rs | 6 +- crates/filemeta/src/replication.rs | 494 +++ crates/filemeta/src/test_data.rs | 4 +- crates/iam/Cargo.toml | 1 + crates/iam/src/store/object.rs | 54 +- crates/kms/src/backends/local.rs | 22 +- crates/kms/src/backends/vault.rs | 21 +- crates/kms/src/config.rs | 2 +- crates/kms/src/encryption/service.rs | 23 +- crates/kms/src/error.rs | 4 +- crates/kms/src/service_manager.rs | 4 +- crates/lock/src/fast_lock/manager.rs | 4 + crates/mcp/src/s3_client.rs | 6 +- crates/rio/src/lib.rs | 1 + crates/rio/src/limit_reader.rs | 4 +- crates/utils/Cargo.toml | 4 +- crates/utils/src/hash.rs | 2 + crates/utils/src/http/headers.rs | 277 ++ crates/utils/src/http/mod.rs | 3 + crates/utils/src/lib.rs | 3 + crates/utils/src/string.rs | 11 + rustfs/src/admin/handlers.rs | 377 ++- rustfs/src/admin/handlers/pools.rs | 7 +- rustfs/src/main.rs | 50 +- rustfs/src/storage/ecfs.rs | 433 ++- rustfs/src/storage/options.rs | 34 +- 91 files changed, 10532 insertions(+), 4917 deletions(-) create mode 100644 crates/ecstore/src/bucket/bucket_target_sys.rs create mode 100644 crates/ecstore/src/bucket/replication/config.rs create mode 100644 crates/ecstore/src/bucket/replication/replication_pool.rs create mode 100644 crates/ecstore/src/bucket/replication/replication_resyncer.rs create mode 100644 crates/ecstore/src/bucket/replication/replication_state.rs create mode 100644 crates/ecstore/src/bucket/replication/replication_type.rs create mode 100644 crates/ecstore/src/bucket/replication/rule.rs create mode 100644 crates/ecstore/src/bucket/target/arn.rs create mode 100644 crates/ecstore/src/bucket/target/bucket_target.rs delete mode 100644 crates/ecstore/src/cmd/bucket_replication.rs delete mode 100644 crates/ecstore/src/cmd/bucket_replication_utils.rs delete mode 100644 crates/ecstore/src/cmd/bucket_targets.rs delete mode 100644 crates/ecstore/src/cmd/bucketreplicationhandler.rs delete mode 100644 crates/ecstore/src/cmd/mod.rs create mode 100644 crates/filemeta/src/replication.rs create mode 100644 crates/utils/src/http/headers.rs create mode 100644 crates/utils/src/http/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 4a6d662f..3c2e0756 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1654,6 +1654,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "convert_case" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -6384,7 +6393,10 @@ dependencies = [ "async-channel", "async-recursion", "async-trait", + "aws-credential-types", "aws-sdk-s3", + "aws-smithy-runtime-api", + "aws-smithy-types", "base64 0.22.1", "byteorder", "bytes", @@ -6497,6 +6509,7 @@ dependencies = [ "thiserror 2.0.16", "time", "tokio", + "tokio-util", "tracing", ] @@ -6802,6 +6815,7 @@ dependencies = [ "blake3", "brotli", "bytes", + "convert_case", "crc32fast", "flate2", "futures", diff --git a/crates/ahm/src/scanner/data_scanner.rs b/crates/ahm/src/scanner/data_scanner.rs index d502ed24..6dbc2d58 100644 --- a/crates/ahm/src/scanner/data_scanner.rs +++ b/crates/ahm/src/scanner/data_scanner.rs @@ -29,6 +29,7 @@ use rustfs_ecstore::{ data_usage::{aggregate_local_snapshots, store_data_usage_in_backend}, }; use rustfs_filemeta::{MetacacheReader, VersionType}; +use s3s::dto::{BucketVersioningStatus, VersioningConfiguration}; use tokio::sync::{Mutex, RwLock}; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, warn}; @@ -50,7 +51,6 @@ use rustfs_common::data_usage::{DataUsageInfo, SizeSummary}; use rustfs_common::metrics::{Metric, Metrics, globalMetrics}; use rustfs_ecstore::bucket::versioning::VersioningApi; use rustfs_ecstore::bucket::versioning_sys::BucketVersioningSys; -use rustfs_ecstore::cmd::bucket_targets::VersioningConfig; use rustfs_ecstore::disk::RUSTFS_META_BUCKET; use uuid; @@ -300,8 +300,13 @@ impl Scanner { .map(|(c, _)| Arc::new(c)); // Get bucket versioning configuration - let versioning_config = Arc::new(VersioningConfig { - enabled: bucket_info.versioning, + let versioning_config = Arc::new(VersioningConfiguration { + status: if bucket_info.versioning { + Some(BucketVersioningStatus::from_static(BucketVersioningStatus::ENABLED)) + } else { + None + }, + ..Default::default() }); let records = match bucket_objects_map.get(bucket_name) { @@ -1825,7 +1830,16 @@ impl Scanner { } }; let bucket_info = ecstore.get_bucket_info(bucket, &Default::default()).await.ok(); - let versioning_config = bucket_info.map(|bi| Arc::new(VersioningConfig { enabled: bi.versioning })); + let versioning_config = bucket_info.map(|bi| { + Arc::new(VersioningConfiguration { + status: if bi.versioning { + Some(BucketVersioningStatus::from_static(BucketVersioningStatus::ENABLED)) + } else { + None + }, + ..Default::default() + }) + }); let lifecycle_config = rustfs_ecstore::bucket::metadata_sys::get_lifecycle_config(bucket) .await .ok() @@ -2651,7 +2665,7 @@ mod tests { // create ECStore with dynamic port let port = port.unwrap_or(9000); let server_addr: SocketAddr = format!("127.0.0.1:{port}").parse().expect("Invalid server address format"); - let ecstore = ECStore::new(server_addr, endpoint_pools) + let ecstore = ECStore::new(server_addr, endpoint_pools, CancellationToken::new()) .await .expect("Failed to create ECStore"); diff --git a/crates/ahm/src/scanner/lifecycle.rs b/crates/ahm/src/scanner/lifecycle.rs index 00808e31..d2466157 100644 --- a/crates/ahm/src/scanner/lifecycle.rs +++ b/crates/ahm/src/scanner/lifecycle.rs @@ -28,10 +28,9 @@ use rustfs_ecstore::bucket::metadata_sys::get_object_lock_config; use rustfs_ecstore::bucket::object_lock::objectlock_sys::{BucketObjectLockSys, enforce_retention_for_deletion}; use rustfs_ecstore::bucket::versioning::VersioningApi; use rustfs_ecstore::bucket::versioning_sys::BucketVersioningSys; -use rustfs_ecstore::cmd::bucket_targets::VersioningConfig; use rustfs_ecstore::store_api::{ObjectInfo, ObjectToDelete}; use rustfs_filemeta::FileInfo; -use s3s::dto::BucketLifecycleConfiguration as LifecycleConfig; +use s3s::dto::{BucketLifecycleConfiguration as LifecycleConfig, VersioningConfiguration}; use time::OffsetDateTime; use tracing::info; @@ -43,11 +42,15 @@ pub struct ScannerItem { pub bucket: String, pub object_name: String, pub lifecycle: Option>, - pub versioning: Option>, + pub versioning: Option>, } impl ScannerItem { - pub fn new(bucket: String, lifecycle: Option>, versioning: Option>) -> Self { + pub fn new( + bucket: String, + lifecycle: Option>, + versioning: Option>, + ) -> Self { Self { bucket, object_name: "".to_string(), @@ -145,6 +148,7 @@ impl ScannerItem { to_del.push(ObjectToDelete { object_name: obj.name, version_id: obj.version_id, + ..Default::default() }); } @@ -233,7 +237,7 @@ impl ScannerItem { IlmAction::DeleteAction => { info!("apply_lifecycle: Object {} marked for deletion", oi.name); if let Some(vcfg) = &self.versioning { - if !vcfg.is_enabled() { + if !vcfg.enabled() { info!("apply_lifecycle: Versioning disabled, setting new_size=0"); new_size = 0; } diff --git a/crates/ahm/src/scanner/local_scan/mod.rs b/crates/ahm/src/scanner/local_scan/mod.rs index 96d2b6f7..97e4b687 100644 --- a/crates/ahm/src/scanner/local_scan/mod.rs +++ b/crates/ahm/src/scanner/local_scan/mod.rs @@ -444,7 +444,7 @@ mod tests { let delete_marker = MetaDeleteMarker { version_id: Some(Uuid::new_v4()), mod_time: Some(OffsetDateTime::now_utc()), - meta_sys: None, + meta_sys: HashMap::new(), }; let version = FileMetaVersion { diff --git a/crates/ahm/tests/endpoint_index_test.rs b/crates/ahm/tests/endpoint_index_test.rs index 2f32640e..7da3d31d 100644 --- a/crates/ahm/tests/endpoint_index_test.rs +++ b/crates/ahm/tests/endpoint_index_test.rs @@ -18,6 +18,7 @@ use rustfs_ecstore::disk::endpoint::Endpoint; use rustfs_ecstore::endpoints::{EndpointServerPools, Endpoints, PoolEndpoints}; use std::net::SocketAddr; use tempfile::TempDir; +use tokio_util::sync::CancellationToken; #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_endpoint_index_settings() -> anyhow::Result<()> { @@ -73,7 +74,7 @@ async fn test_endpoint_index_settings() -> anyhow::Result<()> { rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await?; let server_addr: SocketAddr = "127.0.0.1:0".parse().unwrap(); - let ecstore = rustfs_ecstore::store::ECStore::new(server_addr, endpoint_pools).await?; + let ecstore = rustfs_ecstore::store::ECStore::new(server_addr, endpoint_pools, CancellationToken::new()).await?; println!("ECStore initialized successfully with {} pools", ecstore.pools.len()); diff --git a/crates/ahm/tests/heal_integration_test.rs b/crates/ahm/tests/heal_integration_test.rs index bb1a89e6..70ea5398 100644 --- a/crates/ahm/tests/heal_integration_test.rs +++ b/crates/ahm/tests/heal_integration_test.rs @@ -29,6 +29,7 @@ use std::sync::Once; use std::sync::OnceLock; use std::{path::PathBuf, sync::Arc, time::Duration}; use tokio::fs; +use tokio_util::sync::CancellationToken; use tracing::info; use walkdir::WalkDir; @@ -98,7 +99,9 @@ async fn setup_test_env() -> (Vec, Arc, Arc (Vec, Arc) { // create ECStore with dynamic port 0 (let OS assign) or fixed 9002 if free let port = 9002; // for simplicity let server_addr: std::net::SocketAddr = format!("127.0.0.1:{port}").parse().unwrap(); - let ecstore = ECStore::new(server_addr, endpoint_pools).await.unwrap(); + let ecstore = ECStore::new(server_addr, endpoint_pools, CancellationToken::new()) + .await + .unwrap(); // init bucket metadata system let buckets_list = ecstore @@ -124,7 +127,7 @@ async fn setup_test_env() -> (Vec, Arc) { } /// Test helper: Create a test bucket -async fn create_test_bucket(ecstore: &Arc, bucket_name: &str) { +async fn _create_test_bucket(ecstore: &Arc, bucket_name: &str) { (**ecstore) .make_bucket(bucket_name, &Default::default()) .await @@ -312,7 +315,7 @@ mod serial_tests { let object_name = "test/object.txt"; // Match the lifecycle rule prefix "test/" let test_data = b"Hello, this is test data for lifecycle expiry!"; - create_test_bucket(&ecstore, bucket_name).await; + create_test_lock_bucket(&ecstore, bucket_name).await; upload_test_object(&ecstore, bucket_name, object_name, test_data).await; // Verify object exists initially @@ -458,7 +461,7 @@ mod serial_tests { let check_result = object_exists(&ecstore, bucket_name, object_name).await; println!("Object exists after lifecycle processing: {check_result}"); - if !check_result { + if check_result { println!("❌ Object was not deleted by lifecycle processing"); // Let's try to get object info to see its details match ecstore @@ -479,7 +482,7 @@ mod serial_tests { println!("✅ Object was successfully deleted by lifecycle processing"); } - assert!(check_result); + assert!(!check_result); println!("✅ Object successfully expired"); // Stop scanner @@ -501,7 +504,7 @@ mod serial_tests { let object_name = "test/object.txt"; // Match the lifecycle rule prefix "test/" let test_data = b"Hello, this is test data for lifecycle expiry!"; - create_test_bucket(&ecstore, bucket_name).await; + create_test_lock_bucket(&ecstore, bucket_name).await; upload_test_object(&ecstore, bucket_name, object_name, test_data).await; // Verify object exists initially diff --git a/crates/ahm/tests/optimized_scanner_tests.rs b/crates/ahm/tests/optimized_scanner_tests.rs index f4631f05..cd6a23a1 100644 --- a/crates/ahm/tests/optimized_scanner_tests.rs +++ b/crates/ahm/tests/optimized_scanner_tests.rs @@ -14,6 +14,7 @@ use std::{fs, net::SocketAddr, sync::Arc, sync::OnceLock, time::Duration}; use tempfile::TempDir; +use tokio_util::sync::CancellationToken; use serial_test::serial; @@ -89,7 +90,9 @@ async fn prepare_test_env(test_dir: Option<&str>, port: Option) -> (Vec Err(TargetError::Configuration(format!("Unknown target type: {}", target_type))), + _ => Err(TargetError::Configuration(format!("Unknown target type: {target_type}"))), } } @@ -352,7 +352,7 @@ fn parse_webhook_args(_id: &str, config: &KVS) -> Result Result { .filter(|s| !s.is_empty()) .ok_or_else(|| TargetError::Configuration("MQTT broker is required".to_string()))?; - let broker_url = Url::parse(&broker).map_err(|e| TargetError::Configuration(format!("invalid MQTT broker URL: {}", e)))?; + let broker_url = Url::parse(&broker).map_err(|e| TargetError::Configuration(format!("invalid MQTT broker URL: {e}")))?; let topic = config .lookup(MQTT_TOPIC) diff --git a/crates/audit/src/system.rs b/crates/audit/src/system.rs index 8d4d1291..05b6b07c 100644 --- a/crates/audit/src/system.rs +++ b/crates/audit/src/system.rs @@ -461,7 +461,7 @@ impl AuditSystem { info!(target_id = %target_id, "Target enabled"); Ok(()) } else { - Err(AuditError::Configuration(format!("Target not found: {}", target_id))) + Err(AuditError::Configuration(format!("Target not found: {target_id}"))) } } @@ -474,7 +474,7 @@ impl AuditSystem { info!(target_id = %target_id, "Target disabled"); Ok(()) } else { - Err(AuditError::Configuration(format!("Target not found: {}", target_id))) + Err(AuditError::Configuration(format!("Target not found: {target_id}"))) } } @@ -488,7 +488,7 @@ impl AuditSystem { info!(target_id = %target_id, "Target removed"); Ok(()) } else { - Err(AuditError::Configuration(format!("Target not found: {}", target_id))) + Err(AuditError::Configuration(format!("Target not found: {target_id}"))) } } diff --git a/crates/e2e_test/src/common.rs b/crates/e2e_test/src/common.rs index b459128d..a3cf1371 100644 --- a/crates/e2e_test/src/common.rs +++ b/crates/e2e_test/src/common.rs @@ -94,7 +94,7 @@ fn build_rustfs_binary() { if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - panic!("Failed to build RustFS binary. Error: {}", stderr); + panic!("Failed to build RustFS binary. Error: {stderr}"); } info!("✅ RustFS binary built successfully"); @@ -134,8 +134,8 @@ impl RustFSTestEnvironment { // Use a unique port for each test environment let port = Self::find_available_port().await?; - let address = format!("127.0.0.1:{}", port); - let url = format!("http://{}", address); + let address = format!("127.0.0.1:{port}"); + let url = format!("http://{address}"); Ok(Self { temp_dir, @@ -152,7 +152,7 @@ impl RustFSTestEnvironment { let temp_dir = format!("/tmp/rustfs_e2e_test_{}", Uuid::new_v4()); fs::create_dir_all(&temp_dir).await?; - let url = format!("http://{}", address); + let url = format!("http://{address}"); Ok(Self { temp_dir, @@ -327,7 +327,7 @@ pub async fn execute_awscurl( if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - return Err(format!("awscurl failed: {}", stderr).into()); + return Err(format!("awscurl failed: {stderr}").into()); } let response = String::from_utf8_lossy(&output.stdout).to_string(); diff --git a/crates/e2e_test/src/kms/common.rs b/crates/e2e_test/src/kms/common.rs index 29828751..daeafb9c 100644 --- a/crates/e2e_test/src/kms/common.rs +++ b/crates/e2e_test/src/kms/common.rs @@ -59,7 +59,7 @@ pub async fn configure_kms( access_key: &str, secret_key: &str, ) -> Result<(), Box> { - let url = format!("{}/rustfs/admin/v3/kms/configure", base_url); + let url = format!("{base_url}/rustfs/admin/v3/kms/configure"); awscurl_post(&url, config_json, access_key, secret_key).await?; info!("KMS configured successfully"); Ok(()) @@ -71,7 +71,7 @@ pub async fn start_kms( access_key: &str, secret_key: &str, ) -> Result<(), Box> { - let url = format!("{}/rustfs/admin/v3/kms/start", base_url); + let url = format!("{base_url}/rustfs/admin/v3/kms/start"); awscurl_post(&url, "{}", access_key, secret_key).await?; info!("KMS started successfully"); Ok(()) @@ -83,7 +83,7 @@ pub async fn get_kms_status( access_key: &str, secret_key: &str, ) -> Result> { - let url = format!("{}/rustfs/admin/v3/kms/status", base_url); + let url = format!("{base_url}/rustfs/admin/v3/kms/status"); let status = awscurl_get(&url, access_key, secret_key).await?; info!("KMS status retrieved: {}", status); Ok(status) @@ -101,7 +101,7 @@ pub async fn create_default_key( }) .to_string(); - let url = format!("{}/rustfs/admin/v3/kms/keys", base_url); + let url = format!("{base_url}/rustfs/admin/v3/kms/keys"); let response = awscurl_post(&url, &create_key_body, access_key, secret_key).await?; // Parse response to get the actual key ID @@ -141,7 +141,7 @@ pub async fn create_key_with_specific_id(key_dir: &str, key_id: &str) -> Result< }); // Write the key to file with the specified ID as JSON - let key_path = format!("{}/{}.key", key_dir, key_id); + let key_path = format!("{key_dir}/{key_id}.key"); let content = serde_json::to_vec_pretty(&stored_key)?; fs::write(&key_path, &content).await?; @@ -281,13 +281,8 @@ pub async fn test_kms_key_management( }) .to_string(); - let create_response = awscurl_post( - &format!("{}/rustfs/admin/v3/kms/keys", base_url), - &create_key_body, - access_key, - secret_key, - ) - .await?; + let create_response = + awscurl_post(&format!("{base_url}/rustfs/admin/v3/kms/keys"), &create_key_body, access_key, secret_key).await?; let create_result: serde_json::Value = serde_json::from_str(&create_response)?; let key_id = create_result["key_id"] @@ -296,8 +291,7 @@ pub async fn test_kms_key_management( info!("Created key with ID: {}", key_id); // Test DescribeKey - let describe_response = - awscurl_get(&format!("{}/rustfs/admin/v3/kms/keys/{}", base_url, key_id), access_key, secret_key).await?; + let describe_response = awscurl_get(&format!("{base_url}/rustfs/admin/v3/kms/keys/{key_id}"), access_key, secret_key).await?; info!("DescribeKey response: {}", describe_response); let describe_result: serde_json::Value = serde_json::from_str(&describe_response)?; @@ -306,7 +300,7 @@ pub async fn test_kms_key_management( info!("Successfully described key: {}", key_id); // Test ListKeys - let list_response = awscurl_get(&format!("{}/rustfs/admin/v3/kms/keys", base_url), access_key, secret_key).await?; + let list_response = awscurl_get(&format!("{base_url}/rustfs/admin/v3/kms/keys"), access_key, secret_key).await?; let list_result: serde_json::Value = serde_json::from_str(&list_response)?; let keys = list_result["keys"] @@ -412,7 +406,7 @@ impl VaultTestEnvironment { let port_check = TcpStream::connect(VAULT_ADDRESS).await.is_ok(); if port_check { // Additional check by making a health request - if let Ok(response) = reqwest::get(&format!("{}/v1/sys/health", VAULT_URL)).await { + if let Ok(response) = reqwest::get(&format!("{VAULT_URL}/v1/sys/health")).await { if response.status().is_success() { info!("Vault server is ready after {} seconds", i); return Ok(()); @@ -438,7 +432,7 @@ impl VaultTestEnvironment { // Enable transit secrets engine let enable_response = client - .post(format!("{}/v1/sys/mounts/{}", VAULT_URL, VAULT_TRANSIT_PATH)) + .post(format!("{VAULT_URL}/v1/sys/mounts/{VAULT_TRANSIT_PATH}")) .header("X-Vault-Token", VAULT_TOKEN) .json(&serde_json::json!({ "type": "transit" @@ -448,14 +442,14 @@ impl VaultTestEnvironment { if !enable_response.status().is_success() && enable_response.status() != 400 { let error_text = enable_response.text().await?; - return Err(format!("Failed to enable transit engine: {}", error_text).into()); + return Err(format!("Failed to enable transit engine: {error_text}").into()); } info!("Creating Vault encryption key"); // Create encryption key let key_response = client - .post(format!("{}/v1/{}/keys/{}", VAULT_URL, VAULT_TRANSIT_PATH, VAULT_KEY_NAME)) + .post(format!("{VAULT_URL}/v1/{VAULT_TRANSIT_PATH}/keys/{VAULT_KEY_NAME}")) .header("X-Vault-Token", VAULT_TOKEN) .json(&serde_json::json!({ "type": "aes256-gcm96" @@ -465,7 +459,7 @@ impl VaultTestEnvironment { if !key_response.status().is_success() && key_response.status() != 400 { let error_text = key_response.text().await?; - return Err(format!("Failed to create encryption key: {}", error_text).into()); + return Err(format!("Failed to create encryption key: {error_text}").into()); } info!("Vault transit engine setup completed"); @@ -713,10 +707,10 @@ pub async fn test_all_multipart_encryption_types( // Test configurations for all encryption types let test_configs = vec![ - MultipartTestConfig::new(format!("{}-no-encryption", base_object_key), part_size, total_parts, EncryptionType::None), - MultipartTestConfig::new(format!("{}-sse-s3", base_object_key), part_size, total_parts, EncryptionType::SSES3), - MultipartTestConfig::new(format!("{}-sse-kms", base_object_key), part_size, total_parts, EncryptionType::SSEKMS), - MultipartTestConfig::new(format!("{}-sse-c", base_object_key), part_size, total_parts, create_sse_c_config()), + MultipartTestConfig::new(format!("{base_object_key}-no-encryption"), part_size, total_parts, EncryptionType::None), + MultipartTestConfig::new(format!("{base_object_key}-sse-s3"), part_size, total_parts, EncryptionType::SSES3), + MultipartTestConfig::new(format!("{base_object_key}-sse-kms"), part_size, total_parts, EncryptionType::SSEKMS), + MultipartTestConfig::new(format!("{base_object_key}-sse-c"), part_size, total_parts, create_sse_c_config()), ]; // Run tests for each encryption type diff --git a/crates/e2e_test/src/kms/encryption_metadata_test.rs b/crates/e2e_test/src/kms/encryption_metadata_test.rs index cc458fc3..8f155550 100644 --- a/crates/e2e_test/src/kms/encryption_metadata_test.rs +++ b/crates/e2e_test/src/kms/encryption_metadata_test.rs @@ -33,11 +33,10 @@ fn assert_encryption_metadata(metadata: &HashMap, expected_size: "x-rustfs-encryption-context", "x-rustfs-encryption-original-size", ] { - assert!(metadata.contains_key(key), "expected managed encryption metadata '{}' to be present", key); + assert!(metadata.contains_key(key), "expected managed encryption metadata '{key}' to be present"); assert!( !metadata.get(key).unwrap().is_empty(), - "managed encryption metadata '{}' should not be empty", - key + "managed encryption metadata '{key}' should not be empty" ); } @@ -84,10 +83,7 @@ fn assert_storage_encrypted(storage_root: &std::path::Path, bucket: &str, key: & assert!( scanned > 0, - "Failed to locate stored data files for bucket '{}' and key '{}' under {:?}", - bucket, - key, - storage_root + "Failed to locate stored data files for bucket '{bucket}' and key '{key}' under {storage_root:?}" ); assert!(plaintext_path.is_none(), "Plaintext detected on disk at {:?}", plaintext_path.unwrap()); } @@ -220,7 +216,7 @@ async fn test_head_reports_managed_metadata_for_sse_kms_and_copy() -> Result<(), assert_encryption_metadata(source_metadata, payload.len()); let dest_key = "metadata-sse-kms-object-copy"; - let copy_source = format!("{}/{}", TEST_BUCKET, source_key); + let copy_source = format!("{TEST_BUCKET}/{source_key}"); s3_client .copy_object() diff --git a/crates/e2e_test/src/kms/kms_edge_cases_test.rs b/crates/e2e_test/src/kms/kms_edge_cases_test.rs index 1d369799..1960c066 100644 --- a/crates/e2e_test/src/kms/kms_edge_cases_test.rs +++ b/crates/e2e_test/src/kms/kms_edge_cases_test.rs @@ -389,8 +389,8 @@ async fn test_kms_concurrent_encryption() -> Result<(), Box Result<(), Box { // SSE-C - let key = format!("testkey{:026}", i); // 32-byte key + let key = format!("testkey{i:026}"); // 32-byte key let key_b64 = base64::engine::general_purpose::STANDARD.encode(&key); let key_md5 = format!("{:x}", md5::compute(&key)); @@ -459,9 +459,7 @@ async fn test_kms_concurrent_encryption() -> Result<(), Box= num_concurrent - 1, - "Most concurrent uploads should succeed (got {}/{})", - successful_uploads, - num_concurrent + "Most concurrent uploads should succeed (got {successful_uploads}/{num_concurrent})" ); info!("✅ Successfully completed {}/{} concurrent uploads", successful_uploads, num_concurrent); diff --git a/crates/e2e_test/src/kms/kms_fault_recovery_test.rs b/crates/e2e_test/src/kms/kms_fault_recovery_test.rs index 25c79e49..2325281e 100644 --- a/crates/e2e_test/src/kms/kms_fault_recovery_test.rs +++ b/crates/e2e_test/src/kms/kms_fault_recovery_test.rs @@ -152,7 +152,7 @@ async fn test_kms_corrupted_key_files() -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box 0 { - return Err(format!("Critical test suite failed: {} tests failed", failed_count).into()); + return Err(format!("Critical test suite failed: {failed_count} tests failed").into()); } info!("✅ 所有关键测试通过"); @@ -498,7 +498,7 @@ async fn test_kms_full_suite() -> Result<(), Box = OnceLock::new(); + +#[derive(Debug, Clone)] +pub struct ArnTarget { + pub client: Option>, + pub last_refresh: OffsetDateTime, +} + +impl Default for ArnTarget { + fn default() -> Self { + Self { + client: None, + last_refresh: OffsetDateTime::UNIX_EPOCH, + } + } +} + +impl ArnTarget { + pub fn with_client(client: Arc) -> Self { + Self { + client: Some(client), + last_refresh: OffsetDateTime::now_utc(), + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct ArnErrs { + pub count: i64, + pub update_in_progress: bool, + pub bucket: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LastMinuteLatency { + times: Vec, + #[serde(skip, default = "instant_now")] + start_time: Instant, +} + +fn instant_now() -> Instant { + Instant::now() +} + +impl Default for LastMinuteLatency { + fn default() -> Self { + Self { + times: Vec::new(), + start_time: Instant::now(), + } + } +} + +impl LastMinuteLatency { + pub fn new() -> Self { + Self::default() + } + + pub fn add(&mut self, duration: Duration) { + let now = Instant::now(); + // Remove entries older than 1 minute + self.times + .retain(|_| now.duration_since(self.start_time) < Duration::from_secs(60)); + self.times.push(duration); + } + + pub fn get_total(&self) -> LatencyAverage { + if self.times.is_empty() { + return LatencyAverage { + avg: Duration::from_secs(0), + }; + } + let total: Duration = self.times.iter().sum(); + LatencyAverage { + avg: total / self.times.len() as u32, + } + } +} + +#[derive(Debug, Clone)] +pub struct LatencyAverage { + pub avg: Duration, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LatencyStat { + pub lastmin: LastMinuteLatency, + pub curr: Duration, + pub avg: Duration, + pub peak: Duration, + pub n: i64, +} + +impl LatencyStat { + pub fn new() -> Self { + Self::default() + } + + pub fn update(&mut self, duration: Duration) { + self.lastmin.add(duration); + self.n += 1; + if duration > self.peak { + self.peak = duration; + } + self.curr = self.lastmin.get_total().avg; + self.avg = Duration::from_nanos( + (self.avg.as_nanos() as i64 * (self.n - 1) + self.curr.as_nanos() as i64) as u64 / self.n as u64, + ); + } +} + +#[derive(Debug, Clone)] +pub struct EpHealth { + pub endpoint: String, + pub scheme: String, + pub online: bool, + pub last_online: Option, + pub last_hc_at: Option, + pub offline_duration: Duration, + pub latency: LatencyStat, +} + +impl Default for EpHealth { + fn default() -> Self { + Self { + endpoint: String::new(), + scheme: String::new(), + online: true, + last_online: None, + last_hc_at: None, + offline_duration: Duration::from_secs(0), + latency: LatencyStat::new(), + } + } +} + +#[derive(Debug, Default)] +pub struct BucketTargetSys { + pub arn_remotes_map: Arc>>, + pub targets_map: Arc>>>, + pub h_mutex: Arc>>, + pub hc_client: Arc, + pub a_mutex: Arc>>, + pub arn_errs_map: Arc>>, +} + +impl BucketTargetSys { + pub fn get() -> &'static Self { + GLOBAL_BUCKET_TARGET_SYS.get_or_init(Self::new) + } + + fn new() -> Self { + Self { + arn_remotes_map: Arc::new(RwLock::new(HashMap::new())), + targets_map: Arc::new(RwLock::new(HashMap::new())), + h_mutex: Arc::new(RwLock::new(HashMap::new())), + hc_client: Arc::new(HttpClient::new()), + a_mutex: Arc::new(Mutex::new(HashMap::new())), + arn_errs_map: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub async fn is_offline(&self, url: &Url) -> bool { + { + let health_map = self.h_mutex.read().await; + if let Some(health) = health_map.get(url.host_str().unwrap_or("")) { + return !health.online; + } + } + // Initialize health check if not exists + self.init_hc(url).await; + false + } + + pub async fn mark_offline(&self, url: &Url) { + let mut health_map = self.h_mutex.write().await; + if let Some(health) = health_map.get_mut(url.host_str().unwrap_or("")) { + health.online = false; + } + } + + pub async fn init_hc(&self, url: &Url) { + let mut health_map = self.h_mutex.write().await; + let host = url.host_str().unwrap_or("").to_string(); + health_map.insert( + host.clone(), + EpHealth { + endpoint: host, + scheme: url.scheme().to_string(), + online: true, + ..Default::default() + }, + ); + } + + pub async fn heartbeat(&self) { + let mut interval = tokio::time::interval(DEFAULT_HEALTH_CHECK_DURATION); + loop { + interval.tick().await; + + let endpoints = { + let health_map = self.h_mutex.read().await; + health_map.keys().cloned().collect::>() + }; + + for endpoint in endpoints { + // Perform health check + let start = Instant::now(); + let online = self.check_endpoint_health(&endpoint).await; + let duration = start.elapsed(); + + { + let mut health_map = self.h_mutex.write().await; + if let Some(health) = health_map.get_mut(&endpoint) { + let prev_online = health.online; + health.online = online; + health.last_hc_at = Some(OffsetDateTime::now_utc()); + health.latency.update(duration); + + if online { + health.last_online = Some(OffsetDateTime::now_utc()); + } else if prev_online { + // Just went offline + health.offline_duration += duration; + } + } + } + } + } + } + + async fn check_endpoint_health(&self, _endpoint: &str) -> bool { + true + // TODO: Health check + + // // Simple health check implementation + // // In a real implementation, you would make actual HTTP requests + // match self + // .hc_client + // .get(format!("https://{}/rustfs/health/ready", endpoint)) + // .timeout(Duration::from_secs(3)) + // .send() + // .await + // { + // Ok(response) => response.status().is_success(), + // Err(_) => false, + // } + } + + pub async fn health_stats(&self) -> HashMap { + let health_map = self.h_mutex.read().await; + health_map.clone() + } + + pub async fn list_targets(&self, bucket: &str, arn_type: &str) -> Vec { + let health_stats = self.health_stats().await; + let mut targets = Vec::new(); + + if !bucket.is_empty() { + if let Ok(bucket_targets) = self.list_bucket_targets(bucket).await { + for mut target in bucket_targets.targets { + if arn_type.is_empty() || target.target_type.to_string() == arn_type { + if let Some(health) = health_stats.get(&target.endpoint) { + target.total_downtime = health.offline_duration; + target.online = health.online; + target.last_online = health.last_online; + target.latency = target::LatencyStat { + curr: health.latency.curr, + avg: health.latency.avg, + max: health.latency.peak, + }; + } + targets.push(target); + } + } + } + return targets; + } + + let targets_map = self.targets_map.read().await; + for bucket_targets in targets_map.values() { + for mut target in bucket_targets.iter().cloned() { + if arn_type.is_empty() || target.target_type.to_string() == arn_type { + if let Some(health) = health_stats.get(&target.endpoint) { + target.total_downtime = health.offline_duration; + target.online = health.online; + target.last_online = health.last_online; + target.latency = target::LatencyStat { + curr: health.latency.curr, + avg: health.latency.avg, + max: health.latency.peak, + }; + } + targets.push(target); + } + } + } + + targets + } + + pub async fn list_bucket_targets(&self, bucket: &str) -> Result { + let targets_map = self.targets_map.read().await; + if let Some(targets) = targets_map.get(bucket) { + Ok(BucketTargets { + targets: targets.clone(), + }) + } else { + Err(BucketTargetError::BucketRemoteTargetNotFound { + bucket: bucket.to_string(), + }) + } + } + + pub async fn delete(&self, bucket: &str) { + let mut targets_map = self.targets_map.write().await; + let mut arn_remotes_map = self.arn_remotes_map.write().await; + + if let Some(targets) = targets_map.remove(bucket) { + for target in targets { + arn_remotes_map.remove(&target.arn); + } + } + } + + pub async fn set_target(&self, bucket: &str, target: &BucketTarget, update: bool) -> Result<(), BucketTargetError> { + if !target.target_type.is_valid() && !update { + return Err(BucketTargetError::BucketRemoteArnTypeInvalid { + bucket: bucket.to_string(), + }); + } + + let target_client = self.get_remote_target_client_internal(target).await?; + + // Validate target credentials + if !self.validate_target_credentials(target).await? { + return Err(BucketTargetError::BucketRemoteTargetNotFound { + bucket: target.target_bucket.clone(), + }); + } + + match target_client.bucket_exists(&target.target_bucket).await { + Ok(false) => { + return Err(BucketTargetError::BucketRemoteTargetNotFound { + bucket: target.target_bucket.clone(), + }); + } + Err(e) => { + return Err(BucketTargetError::RemoteTargetConnectionErr { + bucket: target.target_bucket.clone(), + access_key: target.credentials.as_ref().map(|c| c.access_key.clone()).unwrap_or_default(), + error: e.to_string(), + }); + } + Ok(true) => {} + } + + if target.target_type == BucketTargetType::ReplicationService { + if !BucketVersioningSys::enabled(bucket).await { + return Err(BucketTargetError::BucketReplicationSourceNotVersioned { + bucket: bucket.to_string(), + }); + } + + let versioning = target_client + .get_bucket_versioning(&target.target_bucket) + .await + .map_err(|_e| BucketTargetError::BucketReplicationSourceNotVersioned { + bucket: bucket.to_string(), + })?; + + if versioning.is_none() { + return Err(BucketTargetError::BucketReplicationSourceNotVersioned { + bucket: bucket.to_string(), + }); + } + } + + { + let mut targets_map = self.targets_map.write().await; + let bucket_targets = targets_map.entry(bucket.to_string()).or_insert_with(Vec::new); + let mut found = false; + + for (idx, existing_target) in bucket_targets.iter().enumerate() { + if existing_target.target_type.to_string() == target.target_type.to_string() { + if existing_target.arn == target.arn { + if !update { + return Err(BucketTargetError::BucketRemoteAlreadyExists { + bucket: existing_target.target_bucket.clone(), + }); + } + bucket_targets[idx] = target.clone(); + found = true; + break; + } + if existing_target.endpoint == target.endpoint { + return Err(BucketTargetError::BucketRemoteAlreadyExists { + bucket: existing_target.target_bucket.clone(), + }); + } + } + } + + if !found && !update { + bucket_targets.push(target.clone()); + } + } + + { + let mut arn_remotes_map = self.arn_remotes_map.write().await; + arn_remotes_map.insert( + target.arn.clone(), + ArnTarget { + client: Some(Arc::new(target_client)), + last_refresh: OffsetDateTime::now_utc(), + }, + ); + } + + self.update_bandwidth_limit(bucket, &target.arn, target.bandwidth_limit); + Ok(()) + } + + pub async fn remove_target(&self, bucket: &str, arn_str: &str) -> Result<(), BucketTargetError> { + if arn_str.is_empty() { + return Err(BucketTargetError::BucketRemoteArnInvalid { + bucket: bucket.to_string(), + }); + } + + let arn = ARN::from_str(arn_str).map_err(|_e| BucketTargetError::BucketRemoteArnInvalid { + bucket: bucket.to_string(), + })?; + + if arn.arn_type == BucketTargetType::ReplicationService { + if let Ok((config, _)) = get_replication_config(bucket).await { + for rule in config.filter_target_arns(&ObjectOpts { + op_type: ReplicationType::All, + ..Default::default() + }) { + if rule == arn_str || config.role == arn_str { + let arn_remotes_map = self.arn_remotes_map.read().await; + if arn_remotes_map.get(arn_str).is_some() { + return Err(BucketTargetError::BucketRemoteRemoveDisallowed { + bucket: bucket.to_string(), + }); + } + } + } + } + } + + { + let mut targets_map = self.targets_map.write().await; + + let Some(targets) = targets_map.get(bucket) else { + return Err(BucketTargetError::BucketRemoteTargetNotFound { + bucket: bucket.to_string(), + }); + }; + + let new_targets: Vec = targets.iter().filter(|t| t.arn != arn_str).cloned().collect(); + + if new_targets.len() == targets.len() { + return Err(BucketTargetError::BucketRemoteTargetNotFound { + bucket: bucket.to_string(), + }); + } + + targets_map.insert(bucket.to_string(), new_targets); + } + + { + self.arn_remotes_map.write().await.remove(arn_str); + } + + self.update_bandwidth_limit(bucket, arn_str, 0); + + Ok(()) + } + + pub async fn mark_refresh_in_progress(&self, bucket: &str, arn: &str) { + let mut arn_errs = self.arn_errs_map.write().await; + arn_errs.entry(arn.to_string()).or_insert_with(|| ArnErrs { + bucket: bucket.to_string(), + update_in_progress: true, + count: 1, + }); + } + + pub async fn mark_refresh_done(&self, bucket: &str, arn: &str) { + let mut arn_errs = self.arn_errs_map.write().await; + if let Some(err) = arn_errs.get_mut(arn) { + err.update_in_progress = false; + err.bucket = bucket.to_string(); + } + } + + pub async fn is_reloading_target(&self, _bucket: &str, arn: &str) -> bool { + let arn_errs = self.arn_errs_map.read().await; + arn_errs.get(arn).map(|err| err.update_in_progress).unwrap_or(false) + } + + pub async fn inc_arn_errs(&self, _bucket: &str, arn: &str) { + let mut arn_errs = self.arn_errs_map.write().await; + if let Some(err) = arn_errs.get_mut(arn) { + err.count += 1; + } + } + + pub async fn get_remote_target_client(&self, bucket: &str, arn: &str) -> Option> { + let (cli, last_refresh) = { + self.arn_remotes_map + .read() + .await + .get(arn) + .map(|target| (target.client.clone(), Some(target.last_refresh))) + .unwrap_or((None, None)) + }; + + if let Some(cli) = cli { + return Some(cli.clone()); + } + + // TODO: spawn a task to reload the target + if self.is_reloading_target(bucket, arn).await { + return None; + } + + if let Some(last_refresh) = last_refresh { + let now = OffsetDateTime::now_utc(); + if now - last_refresh > Duration::from_secs(60 * 5) { + return None; + } + } + + match get_bucket_targets_config(bucket).await { + Ok(bucket_targets) => { + self.mark_refresh_in_progress(bucket, arn).await; + self.update_all_targets(bucket, Some(&bucket_targets)).await; + self.mark_refresh_done(bucket, arn).await; + } + Err(e) => { + error!("get bucket targets config error:{}", e); + } + }; + + self.inc_arn_errs(bucket, arn).await; + None + } + + pub async fn get_remote_target_client_internal(&self, target: &BucketTarget) -> Result { + let Some(credentials) = &target.credentials else { + return Err(BucketTargetError::BucketRemoteTargetNotFound { + bucket: target.target_bucket.clone(), + }); + }; + + let creds = SdkCredentials::builder() + .access_key_id(credentials.access_key.clone()) + .secret_access_key(credentials.secret_key.clone()) + .account_id(target.reset_id.clone()) + .provider_name("bucket_target_sys") + .build(); + + let endpoint = if target.secure { + format!("https://{}", target.endpoint) + } else { + format!("http://{}", target.endpoint) + }; + + let config = S3Config::builder() + .endpoint_url(endpoint.clone()) + .credentials_provider(SharedCredentialsProvider::new(creds)) + .region(SdkRegion::new(target.region.clone())) + .behavior_version(aws_sdk_s3::config::BehaviorVersion::latest()) + .build(); + + Ok(TargetClient { + endpoint, + credentials: target.credentials.clone(), + bucket: target.target_bucket.clone(), + storage_class: target.storage_class.clone(), + disable_proxy: target.disable_proxy, + arn: target.arn.clone(), + reset_id: target.reset_id.clone(), + secure: target.secure, + health_check_duration: target.health_check_duration, + replicate_sync: target.replication_sync, + client: Arc::new(S3Client::from_conf(config)), + }) + } + + async fn validate_target_credentials(&self, _target: &BucketTarget) -> Result { + // In a real implementation, you would validate the credentials + // by making actual API calls to the target + Ok(true) + } + + fn update_bandwidth_limit(&self, _bucket: &str, _arn: &str, _limit: i64) { + // Implementation for bandwidth limit update + // This would interact with the global bucket monitor + } + + pub async fn get_remote_target_client_by_arn(&self, _bucket: &str, arn: &str) -> Option> { + let arn_remotes_map = self.arn_remotes_map.read().await; + arn_remotes_map.get(arn).and_then(|target| target.client.clone()) + } + + pub async fn get_remote_bucket_target_by_arn(&self, bucket: &str, arn: &str) -> Option { + let targets_map = self.targets_map.read().await; + targets_map + .get(bucket) + .and_then(|targets| targets.iter().find(|t| t.arn == arn).cloned()) + } + + pub async fn update_all_targets(&self, bucket: &str, targets: Option<&BucketTargets>) { + let mut targets_map = self.targets_map.write().await; + let mut arn_remotes_map = self.arn_remotes_map.write().await; + // Remove existing targets + if let Some(existing_targets) = targets_map.remove(bucket) { + for target in existing_targets { + arn_remotes_map.remove(&target.arn); + } + } + + // Add new targets + if let Some(new_targets) = targets { + if !new_targets.is_empty() { + for target in &new_targets.targets { + if let Ok(client) = self.get_remote_target_client_internal(target).await { + arn_remotes_map.insert( + target.arn.clone(), + ArnTarget { + client: Some(Arc::new(client)), + last_refresh: OffsetDateTime::now_utc(), + }, + ); + self.update_bandwidth_limit(bucket, &target.arn, target.bandwidth_limit); + } + } + targets_map.insert(bucket.to_string(), new_targets.targets.clone()); + } + } + } + + pub async fn set(&self, bucket: &str, meta: &BucketMetadata) { + let Some(config) = &meta.bucket_target_config else { + return; + }; + + if config.is_empty() { + return; + } + + for target in config.targets.iter() { + let cli = match self.get_remote_target_client_internal(target).await { + Ok(cli) => cli, + Err(e) => { + warn!("get_remote_target_client_internal error:{}", e); + continue; + } + }; + + { + let arn_target = ArnTarget::with_client(Arc::new(cli)); + let mut arn_remotes_map = self.arn_remotes_map.write().await; + arn_remotes_map.insert(target.arn.clone(), arn_target); + } + self.update_bandwidth_limit(bucket, &target.arn, target.bandwidth_limit); + } + + let mut targets_map = self.targets_map.write().await; + targets_map.insert(bucket.to_string(), config.targets.clone()); + } + + // getRemoteARN gets existing ARN for an endpoint or generates a new one. + pub async fn get_remote_arn(&self, bucket: &str, target: Option<&BucketTarget>, depl_id: &str) -> (String, bool) { + let Some(target) = target else { + return (String::new(), false); + }; + + { + let targets_map = self.targets_map.read().await; + if let Some(targets) = targets_map.get(bucket) { + for tgt in targets { + if tgt.target_type == target.target_type + && tgt.target_bucket == target.target_bucket + && target.endpoint == tgt.endpoint + && tgt + .credentials + .as_ref() + .map(|c| c.access_key == target.credentials.as_ref().unwrap_or(&Credentials::default()).access_key) + .unwrap_or(false) + { + return (tgt.arn.clone(), true); + } + } + } + } + + if !target.target_type.is_valid() { + return (String::new(), false); + } + let arn = generate_arn(target, depl_id); + (arn, false) + } +} + +// generate ARN that is unique to this target type +fn generate_arn(t: &BucketTarget, depl_id: &str) -> String { + let uuid = if depl_id.is_empty() { + Uuid::new_v4().to_string() + } else { + depl_id.to_string() + }; + let arn = ARN { + arn_type: t.target_type.clone(), + id: uuid, + region: t.region.clone(), + bucket: t.target_bucket.clone(), + }; + arn.to_string() +} + +pub struct RemoveObjectOptions { + pub force_delete: bool, + pub governance_bypass: bool, + pub replication_delete_marker: bool, + pub replication_mtime: Option, + pub replication_status: ReplicationStatusType, + pub replication_request: bool, + pub replication_validity_check: bool, +} + +#[derive(Debug, Clone)] +pub struct AdvancedPutOptions { + pub source_version_id: String, + pub source_etag: String, + pub replication_status: ReplicationStatusType, + pub source_mtime: OffsetDateTime, + pub replication_request: bool, + pub retention_timestamp: OffsetDateTime, + pub tagging_timestamp: OffsetDateTime, + pub legalhold_timestamp: OffsetDateTime, + pub replication_validity_check: bool, +} + +impl Default for AdvancedPutOptions { + fn default() -> Self { + Self { + source_version_id: "".to_string(), + source_etag: "".to_string(), + replication_status: ReplicationStatusType::Pending, + source_mtime: OffsetDateTime::now_utc(), + replication_request: false, + retention_timestamp: OffsetDateTime::now_utc(), + tagging_timestamp: OffsetDateTime::now_utc(), + legalhold_timestamp: OffsetDateTime::now_utc(), + replication_validity_check: false, + } + } +} + +#[derive(Clone)] +pub struct PutObjectOptions { + pub user_metadata: HashMap, + pub user_tags: HashMap, + //pub progress: ReaderImpl, + pub content_type: String, + pub content_encoding: String, + pub content_disposition: String, + pub content_language: String, + pub cache_control: String, + pub expires: OffsetDateTime, + pub mode: Option, + pub retain_until_date: OffsetDateTime, + //pub server_side_encryption: encrypt::ServerSide, + pub num_threads: u64, + pub storage_class: String, + pub website_redirect_location: String, + pub part_size: u64, + pub legalhold: Option, + pub send_content_md5: bool, + pub disable_content_sha256: bool, + pub disable_multipart: bool, + pub auto_checksum: Option, + pub checksum: Option, + pub concurrent_stream_parts: bool, + pub internal: AdvancedPutOptions, + pub custom_header: HeaderMap, +} + +impl Default for PutObjectOptions { + fn default() -> Self { + Self { + user_metadata: HashMap::new(), + user_tags: HashMap::new(), + //progress: ReaderImpl::Body(Bytes::new()), + content_type: "".to_string(), + content_encoding: "".to_string(), + content_disposition: "".to_string(), + content_language: "".to_string(), + cache_control: "".to_string(), + expires: OffsetDateTime::UNIX_EPOCH, + mode: None, + retain_until_date: OffsetDateTime::UNIX_EPOCH, + //server_side_encryption: encrypt.ServerSide::default(), + num_threads: 0, + storage_class: "".to_string(), + website_redirect_location: "".to_string(), + part_size: 0, + legalhold: None, + send_content_md5: false, + disable_content_sha256: false, + disable_multipart: false, + auto_checksum: None, + checksum: None, + concurrent_stream_parts: false, + internal: AdvancedPutOptions::default(), + custom_header: HeaderMap::new(), + } + } +} + +#[allow(dead_code)] +impl PutObjectOptions { + fn set_match_etag(&mut self, etag: &str) { + if etag == "*" { + self.custom_header + .insert("If-Match", HeaderValue::from_str("*").expect("err")); + } else { + self.custom_header + .insert("If-Match", HeaderValue::from_str(&format!("\"{etag}\"")).expect("err")); + } + } + + fn set_match_etag_except(&mut self, etag: &str) { + if etag == "*" { + self.custom_header + .insert("If-None-Match", HeaderValue::from_str("*").expect("err")); + } else { + self.custom_header + .insert("If-None-Match", HeaderValue::from_str(&format!("\"{etag}\"")).expect("err")); + } + } + + pub fn header(&self) -> HeaderMap { + let mut header = HeaderMap::new(); + + let mut content_type = self.content_type.clone(); + if content_type.is_empty() { + content_type = "application/octet-stream".to_string(); + } + header.insert("Content-Type", HeaderValue::from_str(&content_type).expect("err")); + + if !self.content_encoding.is_empty() { + header.insert("Content-Encoding", HeaderValue::from_str(&self.content_encoding).expect("err")); + } + if !self.content_disposition.is_empty() { + header.insert("Content-Disposition", HeaderValue::from_str(&self.content_disposition).expect("err")); + } + if !self.content_language.is_empty() { + header.insert("Content-Language", HeaderValue::from_str(&self.content_language).expect("err")); + } + if !self.cache_control.is_empty() { + header.insert("Cache-Control", HeaderValue::from_str(&self.cache_control).expect("err")); + } + + if self.expires.unix_timestamp() != 0 { + header.insert("Expires", HeaderValue::from_str(&self.expires.format(&Rfc3339).unwrap()).expect("err")); //rustfs invalid header + } + + if let Some(mode) = &self.mode { + header.insert(AMZ_OBJECT_LOCK_MODE, HeaderValue::from_str(mode.as_str()).expect("err")); + } + + if self.retain_until_date.unix_timestamp() != 0 { + header.insert( + AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE, + HeaderValue::from_str(&self.retain_until_date.format(&Rfc3339).unwrap()).expect("err"), + ); + } + + if let Some(legalhold) = &self.legalhold { + header.insert(AMZ_OBJECT_LOCK_LEGAL_HOLD, HeaderValue::from_str(legalhold.as_str()).expect("err")); + } + + if !self.storage_class.is_empty() { + header.insert(AMZ_STORAGE_CLASS, HeaderValue::from_str(&self.storage_class).expect("err")); + } + + if !self.website_redirect_location.is_empty() { + header.insert( + AMZ_WEBSITE_REDIRECT_LOCATION, + HeaderValue::from_str(&self.website_redirect_location).expect("err"), + ); + } + + if !self.internal.replication_status.as_str().is_empty() { + header.insert( + AMZ_BUCKET_REPLICATION_STATUS, + HeaderValue::from_str(self.internal.replication_status.as_str()).expect("err"), + ); + } + + for (k, v) in &self.user_metadata { + if is_amz_header(k) || is_standard_header(k) || is_storageclass_header(k) || is_rustfs_header(k) || is_minio_header(k) + { + if let Ok(header_name) = HeaderName::from_bytes(k.as_bytes()) { + header.insert(header_name, HeaderValue::from_str(v).unwrap()); + } + } else if let Ok(header_name) = HeaderName::from_bytes(format!("x-amz-meta-{k}").as_bytes()) { + header.insert(header_name, HeaderValue::from_str(v).unwrap()); + } + } + + for (k, v) in self.custom_header.iter() { + header.insert(k.clone(), v.clone()); + } + + if !self.internal.source_version_id.is_empty() { + header.insert( + RUSTFS_BUCKET_SOURCE_VERSION_ID, + HeaderValue::from_str(&self.internal.source_version_id).expect("err"), + ); + } + if self.internal.source_etag.is_empty() { + header.insert(RUSTFS_BUCKET_SOURCE_ETAG, HeaderValue::from_str(&self.internal.source_etag).expect("err")); + } + if self.internal.source_mtime.unix_timestamp() != 0 { + header.insert( + RUSTFS_BUCKET_SOURCE_MTIME, + HeaderValue::from_str(&self.internal.source_mtime.unix_timestamp().to_string()).expect("err"), + ); + } + + header + } + + fn validate(&self, _c: Arc) -> Result<(), std::io::Error> { + //if self.checksum.is_set() { + /*if !self.trailing_header_support { + return Err(Error::from(err_invalid_argument("Checksum requires Client with TrailingHeaders enabled"))); + }*/ + /*else if self.override_signer_type == SignatureType::SignatureV2 { + return Err(Error::from(err_invalid_argument("Checksum cannot be used with v2 signatures"))); + }*/ + //} + + Ok(()) + } +} + +#[derive(Debug, Clone, Default)] +pub struct PutObjectPartOptions { + pub md5_base64: String, + pub sha256_hex: String, + //pub sse: encrypt.ServerSide, + pub custom_header: HeaderMap, + pub trailer: HeaderMap, + pub disable_content_sha256: bool, +} + +#[derive(Debug)] +pub struct S3ClientError { + pub error: String, + pub status_code: Option, + pub code: Option, + pub message: Option, +} +impl S3ClientError { + pub fn new(value: impl Into) -> Self { + S3ClientError { + error: value.into(), + status_code: None, + code: None, + message: None, + } + } + + pub fn add_message(self, message: impl Into) -> Self { + S3ClientError { + error: format!("{}: {}", message.into(), self.error), + status_code: self.status_code, + code: self.code, + message: self.message, + } + } +} + +impl From for S3ClientError { + fn from(value: T) -> Self { + S3ClientError { + error: format!( + "{}: {}", + value.code().map(String::from).unwrap_or("unknown code".into()), + value.message().map(String::from).unwrap_or("missing reason".into()), + ), + status_code: None, + code: None, + message: None, + } + } +} + +impl std::error::Error for S3ClientError {} + +impl std::fmt::Display for S3ClientError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.error) + } +} + +#[derive(Debug)] +pub struct TargetClient { + pub endpoint: String, + pub credentials: Option, + pub bucket: String, + pub storage_class: String, + pub disable_proxy: bool, + pub arn: String, + pub reset_id: String, + pub secure: bool, + pub health_check_duration: Duration, + pub replicate_sync: bool, + pub client: Arc, +} + +impl TargetClient { + pub fn to_url(&self) -> Url { + let scheme = if self.secure { "https" } else { "http" }; + Url::parse(&format!("{scheme}://{}", self.endpoint)).unwrap() + } + + pub async fn bucket_exists(&self, bucket: &str) -> Result { + match self.client.head_bucket().bucket(bucket).send().await { + Ok(_) => Ok(true), + Err(e) => match e { + SdkError::ServiceError(oe) => match oe.into_err() { + HeadBucketError::NotFound(_) => Ok(false), + other => Err(other.into()), + }, + + _ => Err(e.into()), + }, + } + } + + pub async fn get_bucket_versioning(&self, bucket: &str) -> Result, S3ClientError> { + match self.client.get_bucket_versioning().bucket(bucket).send().await { + Ok(res) => Ok(res.status), + Err(e) => Err(e.into()), + } + } + + pub async fn head_object( + &self, + bucket: &str, + object: &str, + version_id: Option, + ) -> Result> { + match self + .client + .head_object() + .bucket(bucket) + .key(object) + .set_version_id(version_id) + .send() + .await + { + Ok(res) => Ok(res), + Err(e) => Err(e), + } + } + + pub async fn put_object( + &self, + bucket: &str, + object: &str, + size: i64, + body: ByteStream, + opts: &PutObjectOptions, + ) -> Result<(), S3ClientError> { + let headers = opts.header(); + + let builder = self.client.put_object(); + + match builder + .bucket(bucket) + .key(object) + .content_length(size) + .body(body) + .customize() + .map_request(move |mut req| { + for (k, v) in headers.clone().into_iter() { + let key_str = k.unwrap().as_str().to_string(); + let value_str = v.to_str().unwrap_or("").to_string(); + req.headers_mut().insert(key_str, value_str); + } + + Result::<_, aws_smithy_types::error::operation::BuildError>::Ok(req) + }) + .send() + .await + { + Ok(_) => Ok(()), + Err(e) => Err(e.into()), + } + } + + pub async fn create_multipart_upload( + &self, + bucket: &str, + object: &str, + _opts: &PutObjectOptions, + ) -> Result { + match self.client.create_multipart_upload().bucket(bucket).key(object).send().await { + Ok(res) => Ok(res.upload_id.unwrap_or_default()), + Err(e) => Err(e.into()), + } + } + + #[allow(clippy::too_many_arguments)] + pub async fn put_object_part( + &self, + bucket: &str, + object: &str, + upload_id: &str, + part_id: i32, + size: i64, + body: ByteStream, + opts: &PutObjectPartOptions, + ) -> Result { + let headers = opts.custom_header.clone(); + + match self + .client + .upload_part() + .bucket(bucket) + .key(object) + .upload_id(upload_id) + .part_number(part_id) + .content_length(size) + .body(body) + .customize() + .map_request(move |mut req| { + for (k, v) in headers.clone().into_iter() { + let key_str = k.unwrap().as_str().to_string(); + let value_str = v.to_str().unwrap_or("").to_string(); + req.headers_mut().insert(key_str, value_str); + } + Result::<_, aws_smithy_types::error::operation::BuildError>::Ok(req) + }) + .send() + .await + { + Ok(res) => Ok(res), + Err(e) => Err(e.into()), + } + } + + pub async fn complete_multipart_upload( + &self, + bucket: &str, + object: &str, + upload_id: &str, + parts: Vec, + opts: &PutObjectOptions, + ) -> Result { + let multipart_upload = CompletedMultipartUpload::builder().set_parts(Some(parts)).build(); + + let headers = opts.header(); + + match self + .client + .complete_multipart_upload() + .bucket(bucket) + .key(object) + .upload_id(upload_id) + .multipart_upload(multipart_upload) + .customize() + .map_request(move |mut req| { + for (k, v) in headers.clone().into_iter() { + let key_str = k.unwrap().as_str().to_string(); + let value_str = v.to_str().unwrap_or("").to_string(); + req.headers_mut().insert(key_str, value_str); + } + Result::<_, aws_smithy_types::error::operation::BuildError>::Ok(req) + }) + .send() + .await + { + Ok(res) => Ok(res), + Err(e) => Err(e.into()), + } + } + + pub async fn remove_object( + &self, + bucket: &str, + object: &str, + version_id: Option, + opts: RemoveObjectOptions, + ) -> Result<(), S3ClientError> { + let mut headers = HeaderMap::new(); + if opts.force_delete { + headers.insert(RUSTFS_FORCE_DELETE, "true".parse().unwrap()); + } + if opts.governance_bypass { + headers.insert(AMZ_OBJECT_LOCK_BYPASS_GOVERNANCE, "true".parse().unwrap()); + } + + if opts.replication_delete_marker { + headers.insert(RUSTFS_BUCKET_REPLICATION_DELETE_MARKER, "true".parse().unwrap()); + } + + if let Some(t) = opts.replication_mtime { + headers.insert( + RUSTFS_BUCKET_SOURCE_MTIME, + t.format(&Rfc3339).unwrap_or_default().as_str().parse().unwrap(), + ); + } + + if !opts.replication_status.is_empty() { + headers.insert(AMZ_BUCKET_REPLICATION_STATUS, opts.replication_status.as_str().parse().unwrap()); + } + + if opts.replication_request { + headers.insert(RUSTFS_BUCKET_REPLICATION_REQUEST, "true".parse().unwrap()); + } + if opts.replication_validity_check { + headers.insert(RUSTFS_BUCKET_REPLICATION_CHECK, "true".parse().unwrap()); + } + + match self + .client + .delete_object() + .bucket(bucket) + .key(object) + .set_version_id(version_id) + .customize() + .map_request(move |mut req| { + for (k, v) in headers.clone().into_iter() { + let key_str = k.unwrap().as_str().to_string(); + let value_str = v.to_str().unwrap_or("").to_string(); + req.headers_mut().insert(key_str, value_str); + } + Result::<_, aws_smithy_types::error::operation::BuildError>::Ok(req) + }) + .send() + .await + { + Ok(_res) => Ok(()), + Err(e) => Err(e.into()), + } + } +} + +#[derive(Debug)] +pub enum BucketTargetError { + BucketRemoteTargetNotFound { + bucket: String, + }, + BucketRemoteArnTypeInvalid { + bucket: String, + }, + BucketRemoteAlreadyExists { + bucket: String, + }, + BucketRemoteArnInvalid { + bucket: String, + }, + RemoteTargetConnectionErr { + bucket: String, + access_key: String, + error: String, + }, + BucketReplicationSourceNotVersioned { + bucket: String, + }, + BucketRemoteTargetNotVersioned { + bucket: String, + }, + BucketRemoteRemoveDisallowed { + bucket: String, + }, + + Io(std::io::Error), +} + +impl fmt::Display for BucketTargetError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BucketTargetError::BucketRemoteTargetNotFound { bucket } => { + write!(f, "Remote target not found for bucket: {bucket}") + } + BucketTargetError::BucketRemoteArnTypeInvalid { bucket } => { + write!(f, "Invalid ARN type for bucket: {bucket}") + } + BucketTargetError::BucketRemoteAlreadyExists { bucket } => { + write!(f, "Remote target already exists for bucket: {bucket}") + } + BucketTargetError::BucketRemoteArnInvalid { bucket } => { + write!(f, "Invalid ARN for bucket: {bucket}") + } + BucketTargetError::RemoteTargetConnectionErr { + bucket, + access_key, + error, + } => { + write!(f, "Connection error for bucket: {bucket}, access key: {access_key}, error: {error}") + } + BucketTargetError::BucketReplicationSourceNotVersioned { bucket } => { + write!(f, "Replication source bucket not versioned: {bucket}") + } + BucketTargetError::BucketRemoteTargetNotVersioned { bucket } => { + write!(f, "Remote target bucket not versioned: {bucket}") + } + BucketTargetError::BucketRemoteRemoveDisallowed { bucket } => { + write!(f, "Remote target removal disallowed for bucket: {bucket}") + } + BucketTargetError::Io(e) => write!(f, "IO error: {e}"), + } + } +} + +impl From for BucketTargetError { + fn from(e: std::io::Error) -> Self { + BucketTargetError::Io(e) + } +} + +impl Error for BucketTargetError {} diff --git a/crates/ecstore/src/bucket/metadata.rs b/crates/ecstore/src/bucket/metadata.rs index 63b158f5..f388cd0c 100644 --- a/crates/ecstore/src/bucket/metadata.rs +++ b/crates/ecstore/src/bucket/metadata.rs @@ -16,6 +16,10 @@ use super::{quota::BucketQuota, target::BucketTargets}; use super::object_lock::ObjectLockApi; use super::versioning::VersioningApi; +use crate::bucket::utils::deserialize; +use crate::config::com::{read_config, save_config}; +use crate::error::{Error, Result}; +use crate::new_object_layer_fn; use byteorder::{BigEndian, ByteOrder, LittleEndian}; use rmp_serde::Serializer as rmpSerializer; use rustfs_policy::policy::BucketPolicy; @@ -30,12 +34,6 @@ use std::sync::Arc; use time::OffsetDateTime; use tracing::error; -use crate::bucket::target::BucketTarget; -use crate::bucket::utils::deserialize; -use crate::config::com::{read_config, save_config}; -use crate::error::{Error, Result}; -use crate::new_object_layer_fn; - use crate::disk::BUCKET_META_PREFIX; use crate::store::ECStore; @@ -322,7 +320,9 @@ impl BucketMetadata { LittleEndian::write_u16(&mut buf[2..4], BUCKET_METADATA_VERSION); - let data = self.marshal_msg()?; + let data = self + .marshal_msg() + .map_err(|e| Error::other(format!("save bucket metadata failed: {e}")))?; buf.extend_from_slice(&data); @@ -362,8 +362,8 @@ impl BucketMetadata { } //let temp = self.bucket_targets_config_json.clone(); if !self.bucket_targets_config_json.is_empty() { - let arr: Vec = serde_json::from_slice(&self.bucket_targets_config_json)?; - self.bucket_target_config = Some(BucketTargets { targets: arr }); + let bucket_targets: BucketTargets = serde_json::from_slice(&self.bucket_targets_config_json)?; + self.bucket_target_config = Some(bucket_targets); } else { self.bucket_target_config = Some(BucketTargets::default()) } @@ -451,4 +451,154 @@ mod test { assert_eq!(bm.name, new.name); } + + #[tokio::test] + async fn marshal_msg_complete_example() { + // Create a complete BucketMetadata with various configurations + let mut bm = BucketMetadata::new("test-bucket"); + + // Set creation time to current time + bm.created = OffsetDateTime::now_utc(); + bm.lock_enabled = true; + + // Add policy configuration + let policy_json = r#"{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":"*","Action":"s3:GetObject","Resource":"arn:aws:s3:::test-bucket/*"}]}"#; + bm.policy_config_json = policy_json.as_bytes().to_vec(); + bm.policy_config_updated_at = OffsetDateTime::now_utc(); + + // Add lifecycle configuration + let lifecycle_xml = r#"rule1Enabled30"#; + bm.lifecycle_config_xml = lifecycle_xml.as_bytes().to_vec(); + bm.lifecycle_config_updated_at = OffsetDateTime::now_utc(); + + // Add versioning configuration + let versioning_xml = r#"Enabled"#; + bm.versioning_config_xml = versioning_xml.as_bytes().to_vec(); + bm.versioning_config_updated_at = OffsetDateTime::now_utc(); + + // Add encryption configuration + let encryption_xml = r#"AES256"#; + bm.encryption_config_xml = encryption_xml.as_bytes().to_vec(); + bm.encryption_config_updated_at = OffsetDateTime::now_utc(); + + // Add tagging configuration + let tagging_xml = r#"EnvironmentTestOwnerRustFS"#; + bm.tagging_config_xml = tagging_xml.as_bytes().to_vec(); + bm.tagging_config_updated_at = OffsetDateTime::now_utc(); + + // Add quota configuration + let quota_json = r#"{"quota":1073741824,"quotaType":"hard"}"#; // 1GB quota + bm.quota_config_json = quota_json.as_bytes().to_vec(); + bm.quota_config_updated_at = OffsetDateTime::now_utc(); + + // Add object lock configuration + let object_lock_xml = r#"EnabledGOVERNANCE7"#; + bm.object_lock_config_xml = object_lock_xml.as_bytes().to_vec(); + bm.object_lock_config_updated_at = OffsetDateTime::now_utc(); + + // Add notification configuration + let notification_xml = r#"notification1s3:ObjectCreated:*test-log-group"#; + bm.notification_config_xml = notification_xml.as_bytes().to_vec(); + bm.notification_config_updated_at = OffsetDateTime::now_utc(); + + // Add replication configuration + let replication_xml = r#"arn:aws:iam::123456789012:role/replication-rolerule1Enableddocuments/arn:aws:s3:::destination-bucket"#; + bm.replication_config_xml = replication_xml.as_bytes().to_vec(); + bm.replication_config_updated_at = OffsetDateTime::now_utc(); + + // Add bucket targets configuration + let bucket_targets_json = r#"[{"endpoint":"http://target1.example.com","credentials":{"accessKey":"key1","secretKey":"secret1"},"targetBucket":"target-bucket-1","region":"us-east-1"},{"endpoint":"http://target2.example.com","credentials":{"accessKey":"key2","secretKey":"secret2"},"targetBucket":"target-bucket-2","region":"us-west-2"}]"#; + bm.bucket_targets_config_json = bucket_targets_json.as_bytes().to_vec(); + bm.bucket_targets_config_updated_at = OffsetDateTime::now_utc(); + + // Add bucket targets meta configuration + let bucket_targets_meta_json = r#"{"replicationId":"repl-123","syncMode":"async","bandwidth":"100MB"}"#; + bm.bucket_targets_config_meta_json = bucket_targets_meta_json.as_bytes().to_vec(); + bm.bucket_targets_config_meta_updated_at = OffsetDateTime::now_utc(); + + // Test serialization + let buf = bm.marshal_msg().unwrap(); + assert!(!buf.is_empty(), "Serialized buffer should not be empty"); + + // Test deserialization + let deserialized_bm = BucketMetadata::unmarshal(&buf).unwrap(); + + // Verify all fields are correctly serialized and deserialized + assert_eq!(bm.name, deserialized_bm.name); + assert_eq!(bm.created.unix_timestamp(), deserialized_bm.created.unix_timestamp()); + assert_eq!(bm.lock_enabled, deserialized_bm.lock_enabled); + + // Verify configuration data + assert_eq!(bm.policy_config_json, deserialized_bm.policy_config_json); + assert_eq!(bm.lifecycle_config_xml, deserialized_bm.lifecycle_config_xml); + assert_eq!(bm.versioning_config_xml, deserialized_bm.versioning_config_xml); + assert_eq!(bm.encryption_config_xml, deserialized_bm.encryption_config_xml); + assert_eq!(bm.tagging_config_xml, deserialized_bm.tagging_config_xml); + assert_eq!(bm.quota_config_json, deserialized_bm.quota_config_json); + assert_eq!(bm.object_lock_config_xml, deserialized_bm.object_lock_config_xml); + assert_eq!(bm.notification_config_xml, deserialized_bm.notification_config_xml); + assert_eq!(bm.replication_config_xml, deserialized_bm.replication_config_xml); + assert_eq!(bm.bucket_targets_config_json, deserialized_bm.bucket_targets_config_json); + assert_eq!(bm.bucket_targets_config_meta_json, deserialized_bm.bucket_targets_config_meta_json); + + // Verify timestamps (comparing unix timestamps to avoid precision issues) + assert_eq!( + bm.policy_config_updated_at.unix_timestamp(), + deserialized_bm.policy_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.lifecycle_config_updated_at.unix_timestamp(), + deserialized_bm.lifecycle_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.versioning_config_updated_at.unix_timestamp(), + deserialized_bm.versioning_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.encryption_config_updated_at.unix_timestamp(), + deserialized_bm.encryption_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.tagging_config_updated_at.unix_timestamp(), + deserialized_bm.tagging_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.quota_config_updated_at.unix_timestamp(), + deserialized_bm.quota_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.object_lock_config_updated_at.unix_timestamp(), + deserialized_bm.object_lock_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.notification_config_updated_at.unix_timestamp(), + deserialized_bm.notification_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.replication_config_updated_at.unix_timestamp(), + deserialized_bm.replication_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.bucket_targets_config_updated_at.unix_timestamp(), + deserialized_bm.bucket_targets_config_updated_at.unix_timestamp() + ); + assert_eq!( + bm.bucket_targets_config_meta_updated_at.unix_timestamp(), + deserialized_bm.bucket_targets_config_meta_updated_at.unix_timestamp() + ); + + // Test that the serialized data contains expected content + let buf_str = String::from_utf8_lossy(&buf); + assert!(buf_str.contains("test-bucket"), "Serialized data should contain bucket name"); + + // Verify the buffer size is reasonable (should be larger due to all the config data) + assert!(buf.len() > 1000, "Buffer should be substantial in size due to all configurations"); + + println!("✅ Complete BucketMetadata serialization test passed"); + println!(" - Bucket name: {}", deserialized_bm.name); + println!(" - Lock enabled: {}", deserialized_bm.lock_enabled); + println!(" - Policy config size: {} bytes", deserialized_bm.policy_config_json.len()); + println!(" - Lifecycle config size: {} bytes", deserialized_bm.lifecycle_config_xml.len()); + println!(" - Serialized buffer size: {} bytes", buf.len()); + } } diff --git a/crates/ecstore/src/bucket/metadata_sys.rs b/crates/ecstore/src/bucket/metadata_sys.rs index b192cd2f..395a8b76 100644 --- a/crates/ecstore/src/bucket/metadata_sys.rs +++ b/crates/ecstore/src/bucket/metadata_sys.rs @@ -12,19 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::StorageAPI; +use crate::StorageAPI as _; +use crate::bucket::bucket_target_sys::BucketTargetSys; use crate::bucket::metadata::{BUCKET_LIFECYCLE_CONFIG, load_bucket_metadata_parse}; use crate::bucket::utils::{deserialize, is_meta_bucketname}; -use crate::cmd::bucket_targets; use crate::error::{Error, Result, is_err_bucket_not_found}; use crate::global::{GLOBAL_Endpoints, is_dist_erasure, is_erasure, new_object_layer_fn}; use crate::store::ECStore; use futures::future::join_all; use rustfs_common::heal_channel::HealOpts; use rustfs_policy::policy::BucketPolicy; +use s3s::dto::ReplicationConfiguration; use s3s::dto::{ - BucketLifecycleConfiguration, NotificationConfiguration, ObjectLockConfiguration, ReplicationConfiguration, - ServerSideEncryptionConfiguration, Tagging, VersioningConfiguration, + BucketLifecycleConfiguration, NotificationConfiguration, ObjectLockConfiguration, ServerSideEncryptionConfiguration, Tagging, + VersioningConfiguration, }; use std::collections::HashSet; use std::sync::OnceLock; @@ -261,7 +262,8 @@ impl BucketMetadataSys { if let Some(bucket) = buckets.get(idx) { let x = Arc::new(res); mp.insert(bucket.clone(), x.clone()); - bucket_targets::init_bucket_targets(bucket, x.clone()).await; + // TODO:EventNotifier,BucketTargetSys + BucketTargetSys::get().set(bucket, &x).await; } } Err(e) => { @@ -348,6 +350,7 @@ impl BucketMetadataSys { if !is_erasure().await && !is_dist_erasure().await && is_err_bucket_not_found(&err) { BucketMetadata::new(bucket) } else { + error!("load bucket metadata failed: {}", err); return Err(err); } } diff --git a/crates/ecstore/src/bucket/mod.rs b/crates/ecstore/src/bucket/mod.rs index efae304a..18fe8050 100644 --- a/crates/ecstore/src/bucket/mod.rs +++ b/crates/ecstore/src/bucket/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod bucket_target_sys; pub mod error; pub mod lifecycle; pub mod metadata; diff --git a/crates/ecstore/src/bucket/replication/config.rs b/crates/ecstore/src/bucket/replication/config.rs new file mode 100644 index 00000000..88b3a8ed --- /dev/null +++ b/crates/ecstore/src/bucket/replication/config.rs @@ -0,0 +1,233 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::ReplicationRuleExt as _; +use crate::bucket::tagging::decode_tags_to_map; +use rustfs_filemeta::ReplicationType; +use s3s::dto::DeleteMarkerReplicationStatus; +use s3s::dto::DeleteReplicationStatus; +use s3s::dto::Destination; +use s3s::dto::{ExistingObjectReplicationStatus, ReplicationConfiguration, ReplicationRuleStatus, ReplicationRules}; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ObjectOpts { + pub name: String, + pub user_tags: String, + pub version_id: Option, + pub delete_marker: bool, + pub ssec: bool, + pub op_type: ReplicationType, + pub replica: bool, + pub existing_object: bool, + pub target_arn: String, +} + +pub trait ReplicationConfigurationExt { + fn replicate(&self, opts: &ObjectOpts) -> bool; + fn has_existing_object_replication(&self, arn: &str) -> (bool, bool); + fn filter_actionable_rules(&self, obj: &ObjectOpts) -> ReplicationRules; + fn get_destination(&self) -> Destination; + fn has_active_rules(&self, prefix: &str, recursive: bool) -> bool; + fn filter_target_arns(&self, obj: &ObjectOpts) -> Vec; +} + +impl ReplicationConfigurationExt for ReplicationConfiguration { + /// 检查是否有现有对象复制规则 + fn has_existing_object_replication(&self, arn: &str) -> (bool, bool) { + let mut has_arn = false; + + for rule in &self.rules { + if rule.destination.bucket == arn || self.role == arn { + if !has_arn { + has_arn = true; + } + if let Some(status) = &rule.existing_object_replication { + if status.status == ExistingObjectReplicationStatus::from_static(ExistingObjectReplicationStatus::ENABLED) { + return (true, true); + } + } + } + } + (has_arn, false) + } + + fn filter_actionable_rules(&self, obj: &ObjectOpts) -> ReplicationRules { + if obj.name.is_empty() && obj.op_type != ReplicationType::Resync && obj.op_type != ReplicationType::All { + return vec![]; + } + + let mut rules = ReplicationRules::default(); + + for rule in &self.rules { + if rule.status == ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED) { + continue; + } + + if !obj.target_arn.is_empty() && rule.destination.bucket != obj.target_arn && self.role != obj.target_arn { + continue; + } + + if obj.op_type == ReplicationType::Resync || obj.op_type == ReplicationType::All { + rules.push(rule.clone()); + continue; + } + + if let Some(status) = &rule.existing_object_replication { + if obj.existing_object + && status.status == ExistingObjectReplicationStatus::from_static(ExistingObjectReplicationStatus::DISABLED) + { + continue; + } + } + + if !obj.name.starts_with(rule.prefix()) { + continue; + } + + if let Some(filter) = &rule.filter { + let object_tags = decode_tags_to_map(&obj.user_tags); + if filter.test_tags(&object_tags) { + rules.push(rule.clone()); + } + } + } + + rules.sort_by(|a, b| { + if a.destination == b.destination { + a.priority.cmp(&b.priority) + } else { + std::cmp::Ordering::Equal + } + }); + + rules + } + + /// 获取目标配置 + fn get_destination(&self) -> Destination { + if !self.rules.is_empty() { + self.rules[0].destination.clone() + } else { + Destination { + account: None, + bucket: "".to_string(), + encryption_configuration: None, + metrics: None, + replication_time: None, + access_control_translation: None, + storage_class: None, + } + } + } + + /// 判断对象是否应该被复制 + fn replicate(&self, obj: &ObjectOpts) -> bool { + let rules = self.filter_actionable_rules(obj); + + for rule in rules.iter() { + if rule.status == ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED) { + continue; + } + + if let Some(status) = &rule.existing_object_replication { + if obj.existing_object + && status.status == ExistingObjectReplicationStatus::from_static(ExistingObjectReplicationStatus::DISABLED) + { + return false; + } + } + + if obj.op_type == ReplicationType::Delete { + if obj.version_id.is_some() { + return rule + .delete_replication + .clone() + .is_some_and(|d| d.status == DeleteReplicationStatus::from_static(DeleteReplicationStatus::ENABLED)); + } else { + return rule.delete_marker_replication.clone().is_some_and(|d| { + d.status == Some(DeleteMarkerReplicationStatus::from_static(DeleteMarkerReplicationStatus::ENABLED)) + }); + } + } + + // 常规对象/元数据复制 + return rule.metadata_replicate(obj); + } + false + } + + /// 检查是否有活跃的规则 + /// 可选择性地提供前缀 + /// 如果recursive为true,函数还会在前缀下的任何级别有活跃规则时返回true + /// 如果没有指定前缀,recursive实际上为true + fn has_active_rules(&self, prefix: &str, recursive: bool) -> bool { + if self.rules.is_empty() { + return false; + } + + for rule in &self.rules { + if rule.status == ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED) { + continue; + } + + if let Some(filter) = &rule.filter { + if let Some(filter_prefix) = &filter.prefix { + if !prefix.is_empty() && !filter_prefix.is_empty() { + // 传入的前缀必须在规则前缀中 + if !recursive && !prefix.starts_with(filter_prefix) { + continue; + } + } + + // 如果是递归的,我们可以跳过这个规则,如果它不匹配测试前缀或前缀下的级别不匹配 + if recursive && !rule.prefix().starts_with(prefix) && !prefix.starts_with(rule.prefix()) { + continue; + } + } + } + return true; + } + false + } + + /// 过滤目标ARN,返回配置中不同目标ARN的切片 + fn filter_target_arns(&self, obj: &ObjectOpts) -> Vec { + let mut arns = Vec::new(); + let mut targets_map: HashSet = HashSet::new(); + let rules = self.filter_actionable_rules(obj); + + for rule in rules { + if rule.status == ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED) { + continue; + } + + if !self.role.is_empty() { + arns.push(self.role.clone()); // 如果存在,使用传统的RoleArn + return arns; + } + + if !targets_map.contains(&rule.destination.bucket) { + targets_map.insert(rule.destination.bucket.clone()); + } + } + + for arn in targets_map { + arns.push(arn); + } + arns + } +} diff --git a/crates/ecstore/src/bucket/replication/datatypes.rs b/crates/ecstore/src/bucket/replication/datatypes.rs index 274c0dfe..79ff131c 100644 --- a/crates/ecstore/src/bucket/replication/datatypes.rs +++ b/crates/ecstore/src/bucket/replication/datatypes.rs @@ -12,30 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Replication status type for x-amz-replication-status header -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum StatusType { - Pending, - Completed, - CompletedLegacy, - Failed, - Replica, +use serde::{Deserialize, Serialize}; +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum ResyncStatusType { + #[default] + NoResync, + ResyncPending, + ResyncCanceled, + ResyncStarted, + ResyncCompleted, + ResyncFailed, } -impl StatusType { - // Converts the enum variant to its string representation - pub fn as_str(&self) -> &'static str { - match self { - StatusType::Pending => "PENDING", - StatusType::Completed => "COMPLETED", - StatusType::CompletedLegacy => "COMPLETE", - StatusType::Failed => "FAILED", - StatusType::Replica => "REPLICA", - } - } - - // Checks if the status is empty (not set) - pub fn is_empty(&self) -> bool { - matches!(self, StatusType::Pending) // Adjust this as needed +impl ResyncStatusType { + pub fn is_valid(&self) -> bool { + *self != ResyncStatusType::NoResync + } +} + +impl fmt::Display for ResyncStatusType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + ResyncStatusType::ResyncStarted => "Ongoing", + ResyncStatusType::ResyncCompleted => "Completed", + ResyncStatusType::ResyncFailed => "Failed", + ResyncStatusType::ResyncPending => "Pending", + ResyncStatusType::ResyncCanceled => "Canceled", + ResyncStatusType::NoResync => "", + }; + write!(f, "{s}") } } diff --git a/crates/ecstore/src/bucket/replication/mod.rs b/crates/ecstore/src/bucket/replication/mod.rs index 7dbb177b..a8a9baa8 100644 --- a/crates/ecstore/src/bucket/replication/mod.rs +++ b/crates/ecstore/src/bucket/replication/mod.rs @@ -12,4 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod config; pub mod datatypes; +mod replication_pool; +mod replication_resyncer; +mod replication_state; +mod replication_type; +mod rule; + +pub use config::*; +pub use datatypes::*; +pub use replication_pool::*; +pub use replication_resyncer::*; +pub use replication_type::*; +pub use rule::*; diff --git a/crates/ecstore/src/bucket/replication/replication_pool.rs b/crates/ecstore/src/bucket/replication/replication_pool.rs new file mode 100644 index 00000000..313282b5 --- /dev/null +++ b/crates/ecstore/src/bucket/replication/replication_pool.rs @@ -0,0 +1,1035 @@ +use crate::StorageAPI; +use crate::bucket::replication::MrfReplicateEntry; +use crate::bucket::replication::ReplicateDecision; +use crate::bucket::replication::ReplicateObjectInfo; +use crate::bucket::replication::ReplicationWorkerOperation; +use crate::bucket::replication::ResyncDecision; +use crate::bucket::replication::ResyncOpts; +use crate::bucket::replication::ResyncStatusType; +use crate::bucket::replication::replicate_delete; +use crate::bucket::replication::replicate_object; +use crate::disk::BUCKET_META_PREFIX; +use std::any::Any; +use std::sync::Arc; +use std::sync::atomic::AtomicI32; +use std::sync::atomic::Ordering; + +use crate::bucket::replication::replication_resyncer::{ + BucketReplicationResyncStatus, DeletedObjectReplicationInfo, ReplicationResyncer, +}; +use crate::bucket::replication::replication_state::ReplicationStats; +use crate::bucket::replication::replication_statuses_map; +use crate::bucket::replication::version_purge_statuses_map; +use crate::config::com::read_config; +use crate::error::Error as EcstoreError; +use crate::store_api::ObjectInfo; + +use lazy_static::lazy_static; +use rustfs_filemeta::ReplicatedTargetInfo; +use rustfs_filemeta::ReplicationStatusType; +use rustfs_filemeta::ReplicationType; +use rustfs_utils::http::RESERVED_METADATA_PREFIX_LOWER; +use time::OffsetDateTime; +use time::format_description::well_known::Rfc3339; +use tokio::sync::Mutex; +use tokio::sync::RwLock; +use tokio::sync::mpsc; +use tokio::sync::mpsc::Receiver; +use tokio::sync::mpsc::Sender; +use tokio::task::JoinHandle; +use tokio::time::Duration; +use tokio_util::sync::CancellationToken; +use tracing::info; +use tracing::warn; + +// Worker limits +pub const WORKER_MAX_LIMIT: usize = 500; +pub const WORKER_MIN_LIMIT: usize = 50; +pub const WORKER_AUTO_DEFAULT: usize = 100; +pub const MRF_WORKER_MAX_LIMIT: usize = 8; +pub const MRF_WORKER_MIN_LIMIT: usize = 2; +pub const MRF_WORKER_AUTO_DEFAULT: usize = 4; +pub const LARGE_WORKER_COUNT: usize = 10; +pub const MIN_LARGE_OBJ_SIZE: i64 = 128 * 1024 * 1024; // 128MiB + +/// Priority levels for replication +#[derive(Debug, Clone, PartialEq)] +pub enum ReplicationPriority { + Fast, + Slow, + Auto, +} + +impl std::str::FromStr for ReplicationPriority { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "fast" => Ok(ReplicationPriority::Fast), + "slow" => Ok(ReplicationPriority::Slow), + "auto" => Ok(ReplicationPriority::Auto), + _ => Ok(ReplicationPriority::Auto), // Default to Auto for unknown values + } + } +} + +impl ReplicationPriority { + pub fn as_str(&self) -> &'static str { + match self { + ReplicationPriority::Fast => "fast", + ReplicationPriority::Slow => "slow", + ReplicationPriority::Auto => "auto", + } + } +} + +/// Enum for different types of replication operations +#[derive(Debug)] +pub enum ReplicationOperation { + Object(Box), + Delete(Box), +} + +impl ReplicationWorkerOperation for ReplicationOperation { + fn as_any(&self) -> &dyn Any { + self + } + + fn to_mrf_entry(&self) -> MrfReplicateEntry { + match self { + ReplicationOperation::Object(obj) => obj.to_mrf_entry(), + ReplicationOperation::Delete(del) => del.to_mrf_entry(), + } + } + + fn get_bucket(&self) -> &str { + match self { + ReplicationOperation::Object(obj) => obj.get_bucket(), + ReplicationOperation::Delete(del) => del.get_bucket(), + } + } + + fn get_object(&self) -> &str { + match self { + ReplicationOperation::Object(obj) => obj.get_object(), + ReplicationOperation::Delete(del) => del.get_object(), + } + } + + fn get_size(&self) -> i64 { + match self { + ReplicationOperation::Object(obj) => obj.get_size(), + ReplicationOperation::Delete(del) => del.get_size(), + } + } + + fn is_delete_marker(&self) -> bool { + match self { + ReplicationOperation::Object(obj) => obj.is_delete_marker(), + ReplicationOperation::Delete(del) => del.is_delete_marker(), + } + } + + fn get_op_type(&self) -> ReplicationType { + match self { + ReplicationOperation::Object(obj) => obj.get_op_type(), + ReplicationOperation::Delete(del) => del.get_op_type(), + } + } +} + +/// Replication pool options +#[derive(Debug, Clone)] +pub struct ReplicationPoolOpts { + pub priority: ReplicationPriority, + pub max_workers: Option, + pub max_l_workers: Option, +} + +impl Default for ReplicationPoolOpts { + fn default() -> Self { + Self { + priority: ReplicationPriority::Auto, + max_workers: None, + max_l_workers: None, + } + } +} +/// Main replication pool structure +#[derive(Debug)] +pub struct ReplicationPool { + // Atomic counters for active workers + active_workers: Arc, + active_lrg_workers: Arc, + active_mrf_workers: Arc, + + storage: Arc, + + // Configuration + priority: RwLock, + max_workers: RwLock, + max_l_workers: RwLock, + + // Statistics + stats: Arc, + + // Worker channels + workers: RwLock>>, + lrg_workers: RwLock>>, + + // MRF (Most Recent Failures) channels + mrf_replica_tx: Sender, + mrf_replica_rx: Mutex>>, + mrf_save_tx: Sender, + mrf_save_rx: Mutex>>, + + // Control channels + mrf_worker_kill_tx: Sender<()>, + mrf_stop_tx: Sender<()>, + + // Worker size tracking + mrf_worker_size: AtomicI32, + + // Task handles for cleanup + task_handles: Mutex>>, + + // Replication resyncer for handling bucket resync operations + resyncer: Arc, +} + +impl ReplicationPool { + /// Creates a new replication pool with specified options + pub async fn new(opts: ReplicationPoolOpts, stats: Arc, storage: Arc) -> Arc { + let max_workers = opts.max_workers.unwrap_or(WORKER_MAX_LIMIT); + + let (workers, failed_workers) = match opts.priority { + ReplicationPriority::Fast => (WORKER_MAX_LIMIT, MRF_WORKER_MAX_LIMIT), + ReplicationPriority::Slow => (WORKER_MIN_LIMIT, MRF_WORKER_MIN_LIMIT), + ReplicationPriority::Auto => (WORKER_AUTO_DEFAULT, MRF_WORKER_AUTO_DEFAULT), + }; + + let workers = std::cmp::min(workers, max_workers); + let failed_workers = std::cmp::min(failed_workers, max_workers); + + let max_l_workers = opts.max_l_workers.unwrap_or(LARGE_WORKER_COUNT); + + // Create MRF channels + let (mrf_replica_tx, mrf_replica_rx) = mpsc::channel(100000); + let (mrf_save_tx, mrf_save_rx) = mpsc::channel(100000); + let (mrf_worker_kill_tx, _mrf_worker_kill_rx) = mpsc::channel(failed_workers); + let (mrf_stop_tx, _mrf_stop_rx) = mpsc::channel(1); + + let pool = Arc::new(Self { + active_workers: Arc::new(AtomicI32::new(0)), + active_lrg_workers: Arc::new(AtomicI32::new(0)), + active_mrf_workers: Arc::new(AtomicI32::new(0)), + priority: RwLock::new(opts.priority), + max_workers: RwLock::new(max_workers), + max_l_workers: RwLock::new(max_l_workers), + stats, + storage, + workers: RwLock::new(Vec::new()), + lrg_workers: RwLock::new(Vec::new()), + mrf_replica_tx, + mrf_replica_rx: Mutex::new(Some(mrf_replica_rx)), + mrf_save_tx, + mrf_save_rx: Mutex::new(Some(mrf_save_rx)), + mrf_worker_kill_tx, + mrf_stop_tx, + mrf_worker_size: AtomicI32::new(0), + task_handles: Mutex::new(Vec::new()), + resyncer: Arc::new(ReplicationResyncer::new().await), + }); + + // Initialize workers + pool.resize_lrg_workers(max_l_workers, 0).await; + pool.resize_workers(workers, 0).await; + pool.resize_failed_workers(failed_workers as i32).await; + + // Start background tasks + pool.start_mrf_processor().await; + pool.start_mrf_persister().await; + + pool + } + + /// Returns the number of active workers handling replication traffic + pub fn active_workers(&self) -> i32 { + self.active_workers.load(Ordering::SeqCst) + } + + /// Returns the number of active workers handling replication failures + pub fn active_mrf_workers(&self) -> i32 { + self.active_mrf_workers.load(Ordering::SeqCst) + } + + /// Returns the number of active workers handling traffic > 128MiB object size + pub fn active_lrg_workers(&self) -> i32 { + self.active_lrg_workers.load(Ordering::SeqCst) + } + + /// Resizes the large workers pool + pub async fn resize_lrg_workers(&self, n: usize, check_old: usize) { + let mut lrg_workers = self.lrg_workers.write().await; + + if (check_old > 0 && lrg_workers.len() != check_old) || n == lrg_workers.len() || n < 1 { + return; + } + + // Add workers if needed + while lrg_workers.len() < n { + let (tx, rx) = mpsc::channel(100000); + lrg_workers.push(tx); + + let active_counter = self.active_lrg_workers.clone(); + let storage = self.storage.clone(); + + let handle = tokio::spawn(async move { + let mut rx = rx; + while let Some(operation) = rx.recv().await { + active_counter.fetch_add(1, Ordering::SeqCst); + + match operation { + ReplicationOperation::Object(obj_info) => { + replicate_object(*obj_info, storage.clone()).await; + } + ReplicationOperation::Delete(del_info) => { + replicate_delete(*del_info, storage.clone()).await; + } + } + + active_counter.fetch_sub(1, Ordering::SeqCst); + } + }); + + self.task_handles.lock().await.push(handle); + } + + // Remove workers if needed + while lrg_workers.len() > n { + if let Some(worker) = lrg_workers.pop() { + drop(worker); // Closing the channel will terminate the worker + } + } + } + + /// Resizes the regular workers pool + pub async fn resize_workers(&self, n: usize, check_old: usize) { + let mut workers = self.workers.write().await; + + if (check_old > 0 && workers.len() != check_old) || n == workers.len() || n < 1 { + warn!( + "resize_workers: skipping resize - check_old_mismatch={}, same_size={}, invalid_n={}", + check_old > 0 && workers.len() != check_old, + n == workers.len(), + n < 1 + ); + return; + } + + // Add workers if needed + if workers.len() < n { + info!("resize_workers: adding workers from {} to {}", workers.len(), n); + } + + while workers.len() < n { + let (tx, rx) = mpsc::channel(10000); + workers.push(tx); + + let active_counter = self.active_workers.clone(); + let stats = self.stats.clone(); + let storage = self.storage.clone(); + + let handle = tokio::spawn(async move { + let mut rx = rx; + while let Some(operation) = rx.recv().await { + active_counter.fetch_add(1, Ordering::SeqCst); + + match operation { + ReplicationOperation::Object(obj_info) => { + stats + .inc_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + + // Perform actual replication (placeholder) + replicate_object(obj_info.as_ref().clone(), storage.clone()).await; + + stats + .dec_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + } + ReplicationOperation::Delete(del_info) => { + stats.inc_q(&del_info.bucket, 0, true, del_info.op_type).await; + // Perform actual delete replication (placeholder) + replicate_delete(del_info.as_ref().clone(), storage.clone()).await; + + stats.dec_q(&del_info.bucket, 0, true, del_info.op_type).await; + } + } + + active_counter.fetch_sub(1, Ordering::SeqCst); + } + }); + + self.task_handles.lock().await.push(handle); + } + + // Remove workers if needed + if workers.len() > n { + warn!("resize_workers: removing workers from {} to {}", workers.len(), n); + } + + while workers.len() > n { + if let Some(worker) = workers.pop() { + drop(worker); // Closing the channel will terminate the worker + } + } + } + + /// Resizes the failed workers pool + pub async fn resize_failed_workers(&self, n: i32) { + // Add workers if needed + while self.mrf_worker_size.load(Ordering::SeqCst) < n { + self.mrf_worker_size.fetch_add(1, Ordering::SeqCst); + + let active_counter = self.active_mrf_workers.clone(); + let stats = self.stats.clone(); + let storage = self.storage.clone(); + let mrf_rx = self.mrf_replica_rx.lock().await.take(); + + if let Some(rx) = mrf_rx { + let handle = tokio::spawn(async move { + let mut rx = rx; + while let Some(operation) = rx.recv().await { + active_counter.fetch_add(1, Ordering::SeqCst); + + match operation { + ReplicationOperation::Object(obj_info) => { + stats + .inc_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + + replicate_object(obj_info.as_ref().clone(), storage.clone()).await; + + stats + .dec_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + } + ReplicationOperation::Delete(del_info) => { + replicate_delete(*del_info, storage.clone()).await; + } + } + + active_counter.fetch_sub(1, Ordering::SeqCst); + } + }); + self.task_handles.lock().await.push(handle); + break; // Only one receiver can be taken + } + } + + // Remove workers if needed + while self.mrf_worker_size.load(Ordering::SeqCst) > n { + self.mrf_worker_size.fetch_sub(1, Ordering::SeqCst); + let _ = self.mrf_worker_kill_tx.try_send(()); // Signal worker to stop + } + } + + /// Resizes worker priority and counts + pub async fn resize_worker_priority( + &self, + pri: ReplicationPriority, + max_workers: Option, + max_l_workers: Option, + ) { + let (workers, mrf_workers) = match pri { + ReplicationPriority::Fast => (WORKER_MAX_LIMIT, MRF_WORKER_MAX_LIMIT), + ReplicationPriority::Slow => (WORKER_MIN_LIMIT, MRF_WORKER_MIN_LIMIT), + ReplicationPriority::Auto => { + let mut workers = WORKER_AUTO_DEFAULT; + let mut mrf_workers = MRF_WORKER_AUTO_DEFAULT; + + let current_workers = self.workers.read().await.len(); + if current_workers < WORKER_AUTO_DEFAULT { + workers = std::cmp::min(current_workers + 1, WORKER_AUTO_DEFAULT); + } + + let current_mrf = self.mrf_worker_size.load(Ordering::SeqCst) as usize; + if current_mrf < MRF_WORKER_AUTO_DEFAULT { + mrf_workers = std::cmp::min(current_mrf + 1, MRF_WORKER_AUTO_DEFAULT); + } + (workers, mrf_workers) + } + }; + + let (final_workers, final_mrf_workers) = if let Some(max_w) = max_workers { + *self.max_workers.write().await = max_w; + (std::cmp::min(workers, max_w), std::cmp::min(mrf_workers, max_w)) + } else { + (workers, mrf_workers) + }; + + let max_l_workers_val = max_l_workers.unwrap_or(LARGE_WORKER_COUNT); + *self.max_l_workers.write().await = max_l_workers_val; + *self.priority.write().await = pri; + + self.resize_workers(final_workers, 0).await; + self.resize_failed_workers(final_mrf_workers as i32).await; + self.resize_lrg_workers(max_l_workers_val, 0).await; + } + + /// Gets a worker channel deterministically based on bucket and object names + async fn get_worker_ch(&self, bucket: &str, object: &str, _size: i64) -> Option> { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + format!("{bucket}{object}").hash(&mut hasher); + let hash = hasher.finish(); + + let workers = self.workers.read().await; + if workers.is_empty() { + return None; + } + + let index = (hash as usize) % workers.len(); + workers.get(index).cloned() + } + + /// Queues a replica task + pub async fn queue_replica_task(&self, ri: ReplicateObjectInfo) { + // If object is large, queue it to a static set of large workers + if ri.size >= MIN_LARGE_OBJ_SIZE { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + format!("{}{}", ri.bucket, ri.name).hash(&mut hasher); + let hash = hasher.finish(); + + let lrg_workers = self.lrg_workers.read().await; + + if !lrg_workers.is_empty() { + let index = (hash as usize) % lrg_workers.len(); + + if let Some(worker) = lrg_workers.get(index) { + if worker.try_send(ReplicationOperation::Object(Box::new(ri.clone()))).is_err() { + // Queue to MRF if worker is busy + let _ = self.mrf_save_tx.try_send(ri.to_mrf_entry()); + + // Try to add more workers if possible + let max_l_workers = *self.max_l_workers.read().await; + let existing = lrg_workers.len(); + if self.active_lrg_workers() < std::cmp::min(max_l_workers, LARGE_WORKER_COUNT) as i32 { + let workers = std::cmp::min(existing + 1, max_l_workers); + + drop(lrg_workers); + self.resize_lrg_workers(workers, existing).await; + } + } + } + } + return; + } + + // Handle regular sized objects + + let ch = match ri.op_type { + ReplicationType::Heal | ReplicationType::ExistingObject => Some(self.mrf_replica_tx.clone()), + _ => self.get_worker_ch(&ri.bucket, &ri.name, ri.size).await, + }; + + if let Some(channel) = ch { + if channel.try_send(ReplicationOperation::Object(Box::new(ri.clone()))).is_err() { + // Queue to MRF if all workers are busy + let _ = self.mrf_save_tx.try_send(ri.to_mrf_entry()); + + // Try to scale up workers based on priority + let priority = self.priority.read().await.clone(); + let max_workers = *self.max_workers.read().await; + + match priority { + ReplicationPriority::Fast => { + // Log warning about unable to keep up + info!("Warning: Unable to keep up with incoming traffic"); + } + ReplicationPriority::Slow => { + info!( + "Warning: Unable to keep up with incoming traffic - recommend increasing replication priority to auto" + ); + } + ReplicationPriority::Auto => { + let max_w = std::cmp::min(max_workers, WORKER_MAX_LIMIT); + let active_workers = self.active_workers(); + + if active_workers < max_w as i32 { + let workers = self.workers.read().await; + let new_count = std::cmp::min(workers.len() + 1, max_w); + let existing = workers.len(); + + drop(workers); + self.resize_workers(new_count, existing).await; + } + + let max_mrf_workers = std::cmp::min(max_workers, MRF_WORKER_MAX_LIMIT); + let active_mrf = self.active_mrf_workers(); + + if active_mrf < max_mrf_workers as i32 { + let current_mrf = self.mrf_worker_size.load(Ordering::SeqCst); + let new_mrf = std::cmp::min(current_mrf + 1, max_mrf_workers as i32); + + self.resize_failed_workers(new_mrf).await; + } + } + } + } + } + } + + /// Queues a replica delete task + pub async fn queue_replica_delete_task(&self, doi: DeletedObjectReplicationInfo) { + let ch = match doi.op_type { + ReplicationType::Heal | ReplicationType::ExistingObject => Some(self.mrf_replica_tx.clone()), + _ => self.get_worker_ch(&doi.bucket, &doi.delete_object.object_name, 0).await, + }; + + if let Some(channel) = ch { + if channel.try_send(ReplicationOperation::Delete(Box::new(doi.clone()))).is_err() { + let _ = self.mrf_save_tx.try_send(doi.to_mrf_entry()); + + let priority = self.priority.read().await.clone(); + let max_workers = *self.max_workers.read().await; + + match priority { + ReplicationPriority::Fast => { + info!("Warning: Unable to keep up with incoming deletes"); + } + ReplicationPriority::Slow => { + info!( + "Warning: Unable to keep up with incoming deletes - recommend increasing replication priority to auto" + ); + } + ReplicationPriority::Auto => { + let max_w = std::cmp::min(max_workers, WORKER_MAX_LIMIT); + if self.active_workers() < max_w as i32 { + let workers = self.workers.read().await; + let new_count = std::cmp::min(workers.len() + 1, max_w); + let existing = workers.len(); + drop(workers); + self.resize_workers(new_count, existing).await; + } + } + } + } + } + } + + /// Queues an MRF save operation + async fn queue_mrf_save(&self, entry: MrfReplicateEntry) { + let _ = self.mrf_save_tx.try_send(entry); + } + + /// Starts the MRF processor background task + async fn start_mrf_processor(&self) { + // This would start a background task to process MRF entries + // Implementation depends on the actual MRF processing logic + } + + /// Starts the MRF persister background task + async fn start_mrf_persister(&self) { + // This would start a background task to persist MRF entries to disk + // Implementation depends on the actual persistence logic + } + + /// Worker function for handling regular replication operations + async fn add_worker( + &self, + mut rx: Receiver, + active_counter: Arc, + stats: Arc, + ) { + while let Some(operation) = rx.recv().await { + active_counter.fetch_add(1, Ordering::SeqCst); + + match operation { + ReplicationOperation::Object(obj_info) => { + stats + .inc_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + + // Perform actual replication (placeholder) + replicate_object(obj_info.as_ref().clone(), self.storage.clone()).await; + + stats + .dec_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + } + ReplicationOperation::Delete(del_info) => { + stats.inc_q(&del_info.bucket, 0, true, del_info.op_type).await; + + // Perform actual delete replication (placeholder) + replicate_delete(del_info.as_ref().clone(), self.storage.clone()).await; + + stats.dec_q(&del_info.bucket, 0, true, del_info.op_type).await; + } + } + + active_counter.fetch_sub(1, Ordering::SeqCst); + } + } + + /// Worker function for handling large object replication operations + async fn add_large_worker(&self, mut rx: Receiver, active_counter: Arc, storage: Arc) { + while let Some(operation) = rx.recv().await { + active_counter.fetch_add(1, Ordering::SeqCst); + + match operation { + ReplicationOperation::Object(obj_info) => { + replicate_object(*obj_info, storage.clone()).await; + } + ReplicationOperation::Delete(del_info) => { + replicate_delete(*del_info, storage.clone()).await; + } + } + + active_counter.fetch_sub(1, Ordering::SeqCst); + } + } + + /// Worker function for handling MRF (Most Recent Failures) operations + async fn add_mrf_worker( + &self, + mut rx: Receiver, + active_counter: Arc, + stats: Arc, + ) { + while let Some(operation) = rx.recv().await { + active_counter.fetch_add(1, Ordering::SeqCst); + + match operation { + ReplicationOperation::Object(obj_info) => { + stats + .inc_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + + replicate_object(obj_info.as_ref().clone(), self.storage.clone()).await; + + stats + .dec_q(&obj_info.bucket, obj_info.size, obj_info.delete_marker, obj_info.op_type) + .await; + } + ReplicationOperation::Delete(del_info) => { + replicate_delete(*del_info, self.storage.clone()).await; + } + } + + active_counter.fetch_sub(1, Ordering::SeqCst); + } + } + + /// Delete resync metadata from replication resync state in memory + pub async fn delete_resync_metadata(&self, bucket: &str) { + let mut status_map = self.resyncer.status_map.write().await; + status_map.remove(bucket); + // Note: global site resync metrics deletion would be handled here + // global_site_resync_metrics.delete_bucket(bucket); + } + + /// Initialize bucket replication resync for all buckets + pub async fn init_resync_internal( + self: Arc, + cancellation_token: CancellationToken, + buckets: Vec, + ) -> Result<(), EcstoreError> { + // Load bucket metadata system in background + let pool_clone = self.clone(); + + tokio::spawn(async move { + pool_clone.start_resync_routine(buckets, cancellation_token).await; + }); + + Ok(()) + } + + /// Start the resync routine that runs in a loop + async fn start_resync_routine(self: Arc, buckets: Vec, cancellation_token: CancellationToken) { + // Run the replication resync in a loop + loop { + let self_clone = self.clone(); + let ctx = cancellation_token.clone(); + tokio::select! { + _ = cancellation_token.cancelled() => { + return; + } + result = self_clone.load_resync(&buckets, ctx) => { + if result.is_ok() { + return; + } + } + } + + // Generate random duration between 0 and 1 minute + use rand::Rng; + let duration_millis = rand::rng().random_range(0..60_000); + let mut duration = Duration::from_millis(duration_millis); + + // Make sure to sleep at least a second to avoid high CPU ticks + if duration < Duration::from_secs(1) { + duration = Duration::from_secs(1); + } + + tokio::time::sleep(duration).await; + } + } + + /// Load bucket replication resync statuses into memory + async fn load_resync(self: Arc, buckets: &[String], cancellation_token: CancellationToken) -> Result<(), EcstoreError> { + // TODO: add leader_lock + // Make sure only one node running resync on the cluster + // Note: Leader lock implementation would be needed here + // let _lock_guard = global_leader_lock.get_lock().await?; + + for bucket in buckets { + let meta = match load_bucket_resync_metadata(bucket, self.storage.clone()).await { + Ok(meta) => meta, + Err(err) => { + if !matches!(err, EcstoreError::VolumeNotFound) { + warn!("Error loading resync metadata for bucket {bucket}: {err:?}"); + } + continue; + } + }; + + // Store metadata in resyncer + { + let mut status_map = self.resyncer.status_map.write().await; + status_map.insert(bucket.clone(), meta.clone()); + } + + // Process target statistics + let target_stats = meta.clone_tgt_stats(); + for (arn, stats) in target_stats { + match stats.resync_status { + ResyncStatusType::ResyncFailed | ResyncStatusType::ResyncStarted | ResyncStatusType::ResyncPending => { + // Note: This would spawn a resync task in a real implementation + // For now, we just log the resync request + + let ctx = cancellation_token.clone(); + let bucket_clone = bucket.clone(); + let resync = self.resyncer.clone(); + let storage = self.storage.clone(); + tokio::spawn(async move { + resync + .resync_bucket( + ctx, + storage, + true, + ResyncOpts { + bucket: bucket_clone, + arn, + resync_id: stats.resync_id, + resync_before: stats.resync_before_date, + }, + ) + .await; + }); + } + _ => {} + } + } + } + + Ok(()) + } +} + +/// Load bucket resync metadata from disk +async fn load_bucket_resync_metadata( + bucket: &str, + obj_api: Arc, +) -> Result { + use std::convert::TryInto; + + let mut brs = BucketReplicationResyncStatus::new(); + + // Constants that would be defined elsewhere + const REPLICATION_DIR: &str = "replication"; + const RESYNC_FILE_NAME: &str = "resync.bin"; + const RESYNC_META_FORMAT: u16 = 1; + const RESYNC_META_VERSION: u16 = 1; + const RESYNC_META_VERSION_V1: u16 = 1; + + let resync_dir_path = format!("{BUCKET_META_PREFIX}/{bucket}/{REPLICATION_DIR}"); + let resync_file_path = format!("{resync_dir_path}/{RESYNC_FILE_NAME}"); + + let data = match read_config(obj_api, &resync_file_path).await { + Ok(data) => data, + Err(EcstoreError::ConfigNotFound) => return Ok(brs), + Err(err) => return Err(err), + }; + + if data.is_empty() { + // Seems to be empty + return Ok(brs); + } + + if data.len() <= 4 { + return Err(EcstoreError::CorruptedFormat); + } + + // Read resync meta header + let format = u16::from_le_bytes(data[0..2].try_into().unwrap()); + if format != RESYNC_META_FORMAT { + return Err(EcstoreError::CorruptedFormat); + } + + let version = u16::from_le_bytes(data[2..4].try_into().unwrap()); + if version != RESYNC_META_VERSION { + return Err(EcstoreError::CorruptedFormat); + } + + // Parse data + brs = BucketReplicationResyncStatus::unmarshal_msg(&data[4..])?; + + if brs.version != RESYNC_META_VERSION_V1 { + return Err(EcstoreError::CorruptedFormat); + } + + Ok(brs) +} + +// Define a trait object type for the replication pool +pub type DynReplicationPool = dyn ReplicationPoolTrait + Send + Sync; + +/// Trait that abstracts the replication pool operations +#[async_trait::async_trait] +pub trait ReplicationPoolTrait: std::fmt::Debug { + async fn queue_replica_task(&self, ri: ReplicateObjectInfo); + async fn queue_replica_delete_task(&self, ri: DeletedObjectReplicationInfo); + async fn resize(&self, priority: ReplicationPriority, max_workers: usize, max_l_workers: usize); + async fn init_resync( + self: Arc, + cancellation_token: CancellationToken, + buckets: Vec, + ) -> Result<(), EcstoreError>; +} + +// Implement the trait for ReplicationPool +#[async_trait::async_trait] +impl ReplicationPoolTrait for ReplicationPool { + async fn queue_replica_task(&self, ri: ReplicateObjectInfo) { + self.queue_replica_task(ri).await; + } + + async fn queue_replica_delete_task(&self, ri: DeletedObjectReplicationInfo) { + self.queue_replica_delete_task(ri).await; + } + + async fn resize(&self, priority: ReplicationPriority, max_workers: usize, max_l_workers: usize) { + self.resize(priority, max_workers, max_l_workers).await; + } + + async fn init_resync( + self: Arc, + cancellation_token: CancellationToken, + buckets: Vec, + ) -> Result<(), EcstoreError> { + self.init_resync_internal(cancellation_token, buckets).await + } +} + +lazy_static! { + pub static ref GLOBAL_REPLICATION_POOL: tokio::sync::OnceCell> = tokio::sync::OnceCell::new(); + pub static ref GLOBAL_REPLICATION_STATS: tokio::sync::OnceCell> = tokio::sync::OnceCell::new(); +} + +/// Initializes background replication with the given options +pub async fn init_background_replication(storage: Arc) { + let stats = GLOBAL_REPLICATION_STATS + .get_or_init(|| async { + let stats = Arc::new(ReplicationStats::new()); + stats.start_background_tasks().await; + stats + }) + .await; + + let _pool = GLOBAL_REPLICATION_POOL + .get_or_init(|| async { + let pool = ReplicationPool::new(ReplicationPoolOpts::default(), stats.clone(), storage).await; + pool as Arc + }) + .await; + + assert!(GLOBAL_REPLICATION_STATS.get().is_some()); + assert!(GLOBAL_REPLICATION_POOL.get().is_some()); +} + +pub async fn schedule_replication(oi: ObjectInfo, o: Arc, dsc: ReplicateDecision, op_type: ReplicationType) { + let tgt_statuses = replication_statuses_map(&oi.replication_status_internal.clone().unwrap_or_default()); + let purge_statuses = version_purge_statuses_map(&oi.version_purge_status_internal.clone().unwrap_or_default()); + let tm = oi + .user_defined + .get(&format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp")) + .map(|v| OffsetDateTime::parse(v, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH)); + let mut rstate = oi.replication_state(); + rstate.replicate_decision_str = dsc.to_string(); + let asz = oi.get_actual_size().unwrap_or_default(); + + let mut ri = ReplicateObjectInfo { + name: oi.name, + size: oi.size, + actual_size: asz, + bucket: oi.bucket, + version_id: oi.version_id, + etag: oi.etag, + mod_time: oi.mod_time, + replication_status: oi.replication_status, + replication_status_internal: oi.replication_status_internal, + delete_marker: oi.delete_marker, + version_purge_status_internal: oi.version_purge_status_internal, + version_purge_status: oi.version_purge_status, + + replication_state: Some(rstate), + op_type, + dsc: dsc.clone(), + target_statuses: tgt_statuses, + target_purge_statuses: purge_statuses, + replication_timestamp: tm, + user_tags: oi.user_tags, + checksum: vec![], + retry_count: 0, + event_type: "".to_string(), + existing_obj_resync: ResyncDecision::default(), + ssec: false, + }; + + if ri.ssec { + ri.checksum = oi.checksum + } + if dsc.is_synchronous() { + replicate_object(ri, o).await + } else if let Some(pool) = GLOBAL_REPLICATION_POOL.get() { + pool.queue_replica_task(ri).await; + } +} + +pub async fn schedule_replication_delete(dv: DeletedObjectReplicationInfo) { + if let Some(pool) = GLOBAL_REPLICATION_POOL.get() { + pool.queue_replica_delete_task(dv.clone()).await; + } + + if let (Some(rs), Some(stats)) = (dv.delete_object.replication_state, GLOBAL_REPLICATION_STATS.get()) { + for (k, _v) in rs.targets.iter() { + let ri = ReplicatedTargetInfo { + arn: k.clone(), + size: 0, + duration: Duration::default(), + op_type: ReplicationType::Delete, + ..Default::default() + }; + stats + .update(&dv.bucket, &ri, ReplicationStatusType::Pending, ReplicationStatusType::Empty) + .await; + } + } +} diff --git a/crates/ecstore/src/bucket/replication/replication_resyncer.rs b/crates/ecstore/src/bucket/replication/replication_resyncer.rs new file mode 100644 index 00000000..f3578955 --- /dev/null +++ b/crates/ecstore/src/bucket/replication/replication_resyncer.rs @@ -0,0 +1,2403 @@ +use crate::bucket::bucket_target_sys::{ + AdvancedPutOptions, BucketTargetSys, PutObjectOptions, PutObjectPartOptions, RemoveObjectOptions, TargetClient, +}; +use crate::bucket::metadata_sys; +use crate::bucket::replication::{MrfReplicateEntry, ReplicationWorkerOperation, ResyncStatusType}; +use crate::bucket::replication::{ + ObjectOpts, REPLICATE_EXISTING, REPLICATE_EXISTING_DELETE, REPLICATION_RESET, ReplicateObjectInfo, + ReplicationConfigurationExt as _, ResyncTargetDecision, get_replication_state, parse_replicate_decision, + replication_statuses_map, target_reset_header, version_purge_statuses_map, +}; +use crate::bucket::tagging::decode_tags_to_map; +use crate::bucket::target::BucketTargets; +use crate::bucket::versioning_sys::BucketVersioningSys; +use crate::client::api_get_options::{AdvancedGetOptions, StatObjectOptions}; +use crate::config::com::save_config; +use crate::disk::BUCKET_META_PREFIX; +use crate::error::{Error, Result, is_err_object_not_found, is_err_version_not_found}; +use crate::event::name::EventName; +use crate::event_notification::{EventArgs, send_event}; +use crate::global::GLOBAL_LocalNodeName; +use crate::store_api::{DeletedObject, ObjectInfo, ObjectOptions, ObjectToDelete, WalkOptions}; +use crate::{StorageAPI, new_object_layer_fn}; + +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::operation::head_object::HeadObjectOutput; +use aws_sdk_s3::primitives::ByteStream; +use aws_sdk_s3::types::{CompletedPart, ObjectLockLegalHoldStatus}; +use byteorder::ByteOrder; +use futures::future::join_all; +use http::HeaderMap; + +use rustfs_filemeta::{ + ReplicatedInfos, ReplicatedTargetInfo, ReplicationAction, ReplicationState, ReplicationStatusType, ReplicationType, + VersionPurgeStatusType, +}; +use rustfs_utils::http::{ + AMZ_BUCKET_REPLICATION_STATUS, AMZ_OBJECT_TAGGING, AMZ_TAGGING_DIRECTIVE, CONTENT_ENCODING, HeaderExt as _, + RESERVED_METADATA_PREFIX, RESERVED_METADATA_PREFIX_LOWER, RUSTFS_REPLICATION_AUTUAL_OBJECT_SIZE, SSEC_ALGORITHM_HEADER, + SSEC_KEY_HEADER, SSEC_KEY_MD5_HEADER, headers, +}; +use rustfs_utils::path::path_join_buf; +use rustfs_utils::string::strings_has_prefix_fold; +use rustfs_utils::{DEFAULT_SIP_HASH_KEY, sip_hash}; +use s3s::dto::ReplicationConfiguration; +use serde::Deserialize; +use serde::Serialize; +use std::any::Any; +use std::collections::HashMap; +use std::sync::Arc; +use time::OffsetDateTime; +use time::format_description::well_known::Rfc3339; +use tokio::io::{AsyncRead, AsyncReadExt}; +use tokio::sync::RwLock; +use tokio::task::JoinSet; +use tokio::time::Duration as TokioDuration; +use tokio_util::sync::CancellationToken; +use tracing::{error, info, warn}; + +use super::replication_type::{ReplicateDecision, ReplicateTargetDecision, ResyncDecision}; +use regex::Regex; + +const REPLICATION_DIR: &str = ".replication"; +const RESYNC_FILE_NAME: &str = "resync.bin"; +const RESYNC_META_FORMAT: u16 = 1; +const RESYNC_META_VERSION: u16 = 1; +const RESYNC_TIME_INTERVAL: TokioDuration = TokioDuration::from_secs(60); + +#[derive(Debug, Clone, Default)] +pub struct ResyncOpts { + pub bucket: String, + pub arn: String, + pub resync_id: String, + pub resync_before: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct TargetReplicationResyncStatus { + pub start_time: Option, + pub last_update: Option, + pub resync_id: String, + pub resync_before_date: Option, + pub resync_status: ResyncStatusType, + pub failed_size: i64, + pub failed_count: i64, + pub replicated_size: i64, + pub replicated_count: i64, + pub bucket: String, + pub object: String, + pub error: Option, +} + +impl TargetReplicationResyncStatus { + pub fn new() -> Self { + Self::default() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct BucketReplicationResyncStatus { + pub version: u16, + pub targets_map: HashMap, + pub id: i32, + pub last_update: Option, +} + +impl BucketReplicationResyncStatus { + pub fn new() -> Self { + Self { + version: RESYNC_META_VERSION, + ..Default::default() + } + } + + pub fn clone_tgt_stats(&self) -> HashMap { + self.targets_map.clone() + } + + pub fn marshal_msg(&self) -> Result> { + Ok(rmp_serde::to_vec(&self)?) + } + + pub fn unmarshal_msg(data: &[u8]) -> Result { + Ok(rmp_serde::from_slice(data)?) + } +} + +static RESYNC_WORKER_COUNT: usize = 10; + +#[derive(Debug)] +pub struct ReplicationResyncer { + pub status_map: Arc>>, + pub worker_size: usize, + pub resync_cancel_tx: CancellationToken, + pub resync_cancel_rx: CancellationToken, + pub worker_tx: tokio::sync::broadcast::Sender<()>, + pub worker_rx: tokio::sync::broadcast::Receiver<()>, +} + +impl ReplicationResyncer { + pub async fn new() -> Self { + let resync_cancel_tx = CancellationToken::new(); + let resync_cancel_rx = resync_cancel_tx.clone(); + let (worker_tx, worker_rx) = tokio::sync::broadcast::channel(RESYNC_WORKER_COUNT); + + for _ in 0..RESYNC_WORKER_COUNT { + if let Err(err) = worker_tx.send(()) { + error!("Failed to send worker message: {}", err); + } + } + + Self { + status_map: Arc::new(RwLock::new(HashMap::new())), + worker_size: RESYNC_WORKER_COUNT, + resync_cancel_tx, + resync_cancel_rx, + worker_tx, + worker_rx, + } + } + + pub async fn mark_status(&self, status: ResyncStatusType, opts: ResyncOpts, obj_layer: Arc) -> Result<()> { + let bucket_status = { + let mut status_map = self.status_map.write().await; + + let bucket_status = if let Some(bucket_status) = status_map.get_mut(&opts.bucket) { + bucket_status + } else { + let mut bucket_status = BucketReplicationResyncStatus::new(); + bucket_status.id = 0; + status_map.insert(opts.bucket.clone(), bucket_status); + status_map.get_mut(&opts.bucket).unwrap() + }; + + let state = if let Some(state) = bucket_status.targets_map.get_mut(&opts.arn) { + state + } else { + let state = TargetReplicationResyncStatus::new(); + bucket_status.targets_map.insert(opts.arn.clone(), state); + bucket_status.targets_map.get_mut(&opts.arn).unwrap() + }; + + state.resync_status = status; + state.last_update = Some(OffsetDateTime::now_utc()); + + bucket_status.last_update = Some(OffsetDateTime::now_utc()); + + bucket_status.clone() + }; + + save_resync_status(&opts.bucket, &bucket_status, obj_layer).await?; + + Ok(()) + } + + pub async fn inc_stats(&self, status: &TargetReplicationResyncStatus, opts: ResyncOpts) { + let mut status_map = self.status_map.write().await; + + let bucket_status = if let Some(bucket_status) = status_map.get_mut(&opts.bucket) { + bucket_status + } else { + let mut bucket_status = BucketReplicationResyncStatus::new(); + bucket_status.id = 0; + status_map.insert(opts.bucket.clone(), bucket_status); + status_map.get_mut(&opts.bucket).unwrap() + }; + + let state = if let Some(state) = bucket_status.targets_map.get_mut(&opts.arn) { + state + } else { + let state = TargetReplicationResyncStatus::new(); + bucket_status.targets_map.insert(opts.arn.clone(), state); + bucket_status.targets_map.get_mut(&opts.arn).unwrap() + }; + + state.object = status.object.clone(); + state.replicated_count += status.replicated_count; + state.replicated_size += status.replicated_size; + state.failed_count += status.failed_count; + state.failed_size += status.failed_size; + state.last_update = Some(OffsetDateTime::now_utc()); + bucket_status.last_update = Some(OffsetDateTime::now_utc()); + } + + pub async fn persist_to_disk(&self, cancel_token: CancellationToken, api: Arc) { + let mut interval = tokio::time::interval(RESYNC_TIME_INTERVAL); + + let mut last_update_times = HashMap::new(); + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + return; + } + _ = interval.tick() => { + + let status_map = self.status_map.read().await; + + let mut update = false; + for (bucket, status) in status_map.iter() { + for target in status.targets_map.values() { + if target.last_update.is_none() { + update = true; + break; + } + } + + + + if let Some(last_update) = status.last_update { + if last_update > *last_update_times.get(bucket).unwrap_or(&OffsetDateTime::UNIX_EPOCH) { + update = true; + } + } + + if update { + if let Err(err) = save_resync_status(bucket, status, api.clone()).await { + error!("Failed to save resync status: {}", err); + } else { + last_update_times.insert(bucket.clone(), status.last_update.unwrap()); + } + } + } + + interval.reset(); + } + } + } + } + + async fn resync_bucket_mark_status(&self, status: ResyncStatusType, opts: ResyncOpts, storage: Arc) { + if let Err(err) = self.mark_status(status, opts.clone(), storage.clone()).await { + error!("Failed to mark resync status: {}", err); + } + if let Err(err) = self.worker_tx.send(()) { + error!("Failed to send worker message: {}", err); + } + // TODO: Metrics + } + + pub async fn resync_bucket( + self: Arc, + cancellation_token: CancellationToken, + storage: Arc, + heal: bool, + opts: ResyncOpts, + ) { + let mut worker_rx = self.worker_rx.resubscribe(); + + tokio::select! { + _ = cancellation_token.cancelled() => { + return; + } + + _ = worker_rx.recv() => {} + } + + let cfg = match get_replication_config(&opts.bucket).await { + Ok(cfg) => cfg, + Err(err) => { + error!("Failed to get replication config: {}", err); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + }; + + let targets = match BucketTargetSys::get().list_bucket_targets(&opts.bucket).await { + Ok(targets) => targets, + Err(err) => { + warn!("Failed to list bucket targets: {}", err); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + }; + + let rcfg = ReplicationConfig::new(cfg.clone(), Some(targets)); + + let target_arns = if let Some(cfg) = cfg { + cfg.filter_target_arns(&ObjectOpts { + op_type: ReplicationType::Resync, + target_arn: opts.arn.clone(), + ..Default::default() + }) + } else { + vec![] + }; + + if target_arns.len() != 1 { + error!( + "replication resync failed for {} - arn specified {} is missing in the replication config", + opts.bucket, opts.arn + ); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + + let Some(target_client) = BucketTargetSys::get() + .get_remote_target_client(&opts.bucket, &target_arns[0]) + .await + else { + error!( + "replication resync failed for {} - arn specified {} is missing in the bucket targets", + opts.bucket, opts.arn + ); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + }; + + if !heal { + if let Err(e) = self + .mark_status(ResyncStatusType::ResyncStarted, opts.clone(), storage.clone()) + .await + { + error!("Failed to mark resync status: {}", e); + } + } + + let (tx, mut rx) = tokio::sync::mpsc::channel(100); + + if let Err(err) = storage + .clone() + .walk(cancellation_token.clone(), &opts.bucket, "", tx.clone(), WalkOptions::default()) + .await + { + error!("Failed to walk bucket {}: {}", opts.bucket, err); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + + let status = { + self.status_map + .read() + .await + .get(&opts.bucket) + .and_then(|status| status.targets_map.get(&opts.arn)) + .cloned() + .unwrap_or_default() + }; + + let mut last_checkpoint = if status.resync_status == ResyncStatusType::ResyncStarted + || status.resync_status == ResyncStatusType::ResyncFailed + { + Some(status.object) + } else { + None + }; + + let mut worker_txs = Vec::new(); + let (results_tx, mut results_rx) = tokio::sync::broadcast::channel::(1); + + let opts_clone = opts.clone(); + let self_clone = self.clone(); + + let mut futures = Vec::new(); + + let results_fut = tokio::spawn(async move { + while let Ok(st) = results_rx.recv().await { + self_clone.inc_stats(&st, opts_clone.clone()).await; + } + }); + + futures.push(results_fut); + + for _ in 0..RESYNC_WORKER_COUNT { + let (tx, mut rx) = tokio::sync::mpsc::channel::(100); + worker_txs.push(tx); + + let cancel_token = cancellation_token.clone(); + let target_client = target_client.clone(); + let resync_cancel_rx = self.resync_cancel_rx.clone(); + let storage = storage.clone(); + let results_tx = results_tx.clone(); + let bucket_name = opts.bucket.clone(); + + let f = tokio::spawn(async move { + while let Some(mut roi) = rx.recv().await { + if cancel_token.is_cancelled() { + return; + } + + if roi.delete_marker || !roi.version_purge_status.is_empty() { + let (version_id, dm_version_id) = if roi.version_purge_status.is_empty() { + (None, roi.version_id) + } else { + (roi.version_id, None) + }; + + let doi = DeletedObjectReplicationInfo { + delete_object: DeletedObject { + object_name: roi.name.clone(), + delete_marker_version_id: dm_version_id, + version_id, + replication_state: roi.replication_state.clone(), + delete_marker: roi.delete_marker, + delete_marker_mtime: roi.mod_time, + ..Default::default() + }, + bucket: roi.bucket.clone(), + event_type: REPLICATE_EXISTING_DELETE.to_string(), + op_type: ReplicationType::ExistingObject, + ..Default::default() + }; + replicate_delete(doi, storage.clone()).await; + } else { + roi.op_type = ReplicationType::ExistingObject; + roi.event_type = REPLICATE_EXISTING.to_string(); + replicate_object(roi.clone(), storage.clone()).await; + } + + let mut st = TargetReplicationResyncStatus { + object: roi.name.clone(), + bucket: roi.bucket.clone(), + ..Default::default() + }; + + let reset_id = target_client.reset_id.clone(); + + let (size, err) = if let Err(err) = target_client + .head_object(&target_client.bucket, &roi.name, roi.version_id.map(|v| v.to_string())) + .await + { + if roi.delete_marker { + st.replicated_count += 1; + } else { + st.failed_count += 1; + } + (0, Some(err)) + } else { + st.replicated_count += 1; + st.replicated_size += roi.size; + (roi.size, None) + }; + + info!( + "resynced reset_id:{} object: {}/{}-{} size:{} err:{:?}", + reset_id, + bucket_name, + roi.name, + roi.version_id.unwrap_or_default(), + size, + err, + ); + + if resync_cancel_rx.is_cancelled() { + return; + } + + if cancel_token.is_cancelled() { + return; + } + + if let Err(err) = results_tx.send(st) { + error!("Failed to send resync status: {}", err); + } + } + }); + + futures.push(f); + } + + let resync_cancel_rx = self.resync_cancel_rx.clone(); + + while let Some(res) = rx.recv().await { + if let Some(err) = res.err { + error!("Failed to get object info: {}", err); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + + if resync_cancel_rx.is_cancelled() { + self.resync_bucket_mark_status(ResyncStatusType::ResyncCanceled, opts.clone(), storage.clone()) + .await; + return; + } + + if cancellation_token.is_cancelled() { + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + + let Some(object) = res.item else { + continue; + }; + + if heal + && let Some(checkpoint) = &last_checkpoint + && &object.name != checkpoint + { + continue; + } + last_checkpoint = None; + + let roi = get_heal_replicate_object_info(&object, &rcfg).await; + if !roi.existing_obj_resync.must_resync() { + continue; + } + + if resync_cancel_rx.is_cancelled() { + self.resync_bucket_mark_status(ResyncStatusType::ResyncCanceled, opts.clone(), storage.clone()) + .await; + return; + } + + if cancellation_token.is_cancelled() { + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + + let worker_idx = sip_hash(&roi.name, RESYNC_WORKER_COUNT, &DEFAULT_SIP_HASH_KEY) as usize; + + if let Err(err) = worker_txs[worker_idx].send(roi).await { + error!("Failed to send object info to worker: {}", err); + self.resync_bucket_mark_status(ResyncStatusType::ResyncFailed, opts.clone(), storage.clone()) + .await; + return; + } + } + + for worker_tx in worker_txs { + drop(worker_tx); + } + + join_all(futures).await; + + self.resync_bucket_mark_status(ResyncStatusType::ResyncCompleted, opts.clone(), storage.clone()) + .await; + } +} + +pub async fn get_heal_replicate_object_info(oi: &ObjectInfo, rcfg: &ReplicationConfig) -> ReplicateObjectInfo { + let mut oi = oi.clone(); + let mut user_defined = oi.user_defined.clone(); + + if let Some(rc) = rcfg.config.as_ref() + && !rc.role.is_empty() + { + if !oi.replication_status.is_empty() { + oi.replication_status_internal = Some(format!("{}={};", rc.role, oi.replication_status.as_str())); + } + + if !oi.replication_status.is_empty() { + oi.replication_status_internal = Some(format!("{}={};", rc.role, oi.replication_status.as_str())); + } + + let keys_to_update: Vec<_> = user_defined + .iter() + .filter(|(k, _)| k.eq_ignore_ascii_case(format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}").as_str())) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + for (k, v) in keys_to_update { + user_defined.remove(&k); + user_defined.insert(target_reset_header(rc.role.as_str()), v); + } + } + + let dsc = if oi.delete_marker || !oi.replication_status.is_empty() { + check_replicate_delete( + oi.bucket.as_str(), + &ObjectToDelete { + object_name: oi.name.clone(), + version_id: oi.version_id, + ..Default::default() + }, + &oi, + &ObjectOptions { + versioned: BucketVersioningSys::prefix_enabled(&oi.bucket, &oi.name).await, + version_suspended: BucketVersioningSys::prefix_suspended(&oi.bucket, &oi.name).await, + ..Default::default() + }, + None, + ) + .await + } else { + must_replicate( + oi.bucket.as_str(), + &oi.name, + MustReplicateOptions::new( + &user_defined, + oi.user_tags.clone(), + ReplicationStatusType::Empty, + ReplicationType::Heal, + ObjectOptions::default(), + ), + ) + .await + }; + + let target_statuses = replication_statuses_map(&oi.replication_status_internal.clone().unwrap_or_default()); + let target_purge_statuses = version_purge_statuses_map(&oi.version_purge_status_internal.clone().unwrap_or_default()); + let existing_obj_resync = rcfg.resync(oi.clone(), dsc.clone(), &target_statuses).await; + let mut replication_state = oi.replication_state(); + replication_state.replicate_decision_str = dsc.to_string(); + let actual_size = oi.get_actual_size().unwrap_or_default(); + + ReplicateObjectInfo { + name: oi.name.clone(), + size: oi.size, + actual_size, + bucket: oi.bucket.clone(), + version_id: oi.version_id, + etag: oi.etag.clone(), + mod_time: oi.mod_time, + replication_status: oi.replication_status, + replication_status_internal: oi.replication_status_internal.clone(), + delete_marker: oi.delete_marker, + version_purge_status_internal: oi.version_purge_status_internal.clone(), + version_purge_status: oi.version_purge_status, + replication_state: Some(replication_state), + op_type: ReplicationType::Heal, + event_type: "".to_string(), + dsc, + existing_obj_resync, + target_statuses, + target_purge_statuses, + replication_timestamp: None, + ssec: false, // TODO: add ssec support + user_tags: oi.user_tags.clone(), + checksum: Vec::new(), + retry_count: 0, + } +} + +async fn save_resync_status(bucket: &str, status: &BucketReplicationResyncStatus, api: Arc) -> Result<()> { + let buf = status.marshal_msg()?; + + let mut data = Vec::new(); + + let mut major = [0u8; 2]; + byteorder::LittleEndian::write_u16(&mut major, RESYNC_META_FORMAT); + data.extend_from_slice(&major); + + let mut minor = [0u8; 2]; + byteorder::LittleEndian::write_u16(&mut minor, RESYNC_META_VERSION); + data.extend_from_slice(&minor); + + data.extend_from_slice(&buf); + + let config_file = path_join_buf(&[BUCKET_META_PREFIX, bucket, REPLICATION_DIR, RESYNC_FILE_NAME]); + save_config(api, &config_file, data).await?; + + Ok(()) +} + +async fn get_replication_config(bucket: &str) -> Result> { + let config = match metadata_sys::get_replication_config(bucket).await { + Ok((config, _)) => Some(config), + Err(err) => { + if err != Error::ConfigNotFound { + return Err(err); + } + None + } + }; + Ok(config) +} + +#[derive(Debug, Clone, Default)] +pub struct DeletedObjectReplicationInfo { + pub delete_object: DeletedObject, + pub bucket: String, + pub event_type: String, + pub op_type: ReplicationType, + pub reset_id: String, + pub target_arn: String, +} + +impl ReplicationWorkerOperation for DeletedObjectReplicationInfo { + fn as_any(&self) -> &dyn Any { + self + } + + fn to_mrf_entry(&self) -> MrfReplicateEntry { + MrfReplicateEntry { + bucket: self.bucket.clone(), + object: self.delete_object.object_name.clone(), + version_id: None, + retry_count: 0, + size: 0, + } + } + + fn get_bucket(&self) -> &str { + &self.bucket + } + + fn get_object(&self) -> &str { + &self.delete_object.object_name + } + + fn get_size(&self) -> i64 { + 0 + } + + fn is_delete_marker(&self) -> bool { + true + } + + fn get_op_type(&self) -> ReplicationType { + self.op_type + } +} + +#[derive(Debug, Clone, Default)] +pub struct ReplicationConfig { + pub config: Option, + pub remotes: Option, +} + +impl ReplicationConfig { + pub fn new(config: Option, remotes: Option) -> Self { + Self { config, remotes } + } + + pub fn is_empty(&self) -> bool { + self.config.is_none() + } + + pub fn replicate(&self, obj: &ObjectOpts) -> bool { + self.config.as_ref().is_some_and(|config| config.replicate(obj)) + } + + pub async fn resync( + &self, + oi: ObjectInfo, + dsc: ReplicateDecision, + status: &HashMap, + ) -> ResyncDecision { + if self.is_empty() { + return ResyncDecision::default(); + } + + let mut dsc = dsc; + + if oi.delete_marker { + let opts = ObjectOpts { + name: oi.name.clone(), + version_id: oi.version_id, + delete_marker: true, + op_type: ReplicationType::Delete, + existing_object: true, + ..Default::default() + }; + let arns = self + .config + .as_ref() + .map(|config| config.filter_target_arns(&opts)) + .unwrap_or_default(); + + if arns.is_empty() { + return ResyncDecision::default(); + } + + for arn in arns { + let mut opts = opts.clone(); + opts.target_arn = arn; + + dsc.set(ReplicateTargetDecision::new(opts.target_arn.clone(), self.replicate(&opts), false)); + } + + return self.resync_internal(oi, dsc, status); + } + + let mut user_defined = oi.user_defined.clone(); + user_defined.remove(AMZ_BUCKET_REPLICATION_STATUS); + + let dsc = must_replicate( + oi.bucket.as_str(), + &oi.name, + MustReplicateOptions::new( + &user_defined, + oi.user_tags.clone(), + ReplicationStatusType::Empty, + ReplicationType::ExistingObject, + ObjectOptions::default(), + ), + ) + .await; + + self.resync_internal(oi, dsc, status) + } + + fn resync_internal( + &self, + oi: ObjectInfo, + dsc: ReplicateDecision, + status: &HashMap, + ) -> ResyncDecision { + let Some(remotes) = self.remotes.as_ref() else { + return ResyncDecision::default(); + }; + + if remotes.is_empty() { + return ResyncDecision::default(); + } + + let mut resync_decision = ResyncDecision::default(); + + for target in remotes.targets.iter() { + if let Some(decision) = dsc.targets_map.get(&target.arn) + && decision.replicate + { + resync_decision.targets.insert( + decision.arn.clone(), + ResyncTargetDecision::resync_target( + &oi, + &target.arn, + &target.reset_id, + target.reset_before_date, + status.get(&decision.arn).unwrap_or(&ReplicationStatusType::Empty).clone(), + ), + ); + } + } + + resync_decision + } +} + +pub struct MustReplicateOptions { + meta: HashMap, + status: ReplicationStatusType, + op_type: ReplicationType, + replication_request: bool, +} + +impl MustReplicateOptions { + pub fn new( + meta: &HashMap, + user_tags: String, + status: ReplicationStatusType, + op_type: ReplicationType, + opts: ObjectOptions, + ) -> Self { + let mut meta = meta.clone(); + if !user_tags.is_empty() { + meta.insert(AMZ_OBJECT_TAGGING.to_string(), user_tags); + } + + Self { + meta, + status, + op_type, + replication_request: opts.replication_request, + } + } + + pub fn from_object_info(oi: &ObjectInfo, op_type: ReplicationType, opts: ObjectOptions) -> Self { + Self::new(&oi.user_defined, oi.user_tags.clone(), oi.replication_status.clone(), op_type, opts) + } + + pub fn replication_status(&self) -> ReplicationStatusType { + if let Some(rs) = self.meta.get(AMZ_BUCKET_REPLICATION_STATUS) { + return ReplicationStatusType::from(rs.as_str()); + } + ReplicationStatusType::default() + } + + pub fn is_existing_object_replication(&self) -> bool { + self.op_type == ReplicationType::ExistingObject + } + + pub fn is_metadata_replication(&self) -> bool { + self.op_type == ReplicationType::Metadata + } +} + +pub fn get_must_replicate_options( + user_defined: &HashMap, + user_tags: String, + status: ReplicationStatusType, + op_type: ReplicationType, + opts: ObjectOptions, +) -> MustReplicateOptions { + MustReplicateOptions::new(user_defined, user_tags, status, op_type, opts) +} + +/// Returns whether object version is a delete marker and if object qualifies for replication +pub async fn check_replicate_delete( + bucket: &str, + dobj: &ObjectToDelete, + oi: &ObjectInfo, + del_opts: &ObjectOptions, + gerr: Option, +) -> ReplicateDecision { + let rcfg = match get_replication_config(bucket).await { + Ok(Some(config)) => config, + Ok(None) => { + warn!("No replication config found for bucket: {}", bucket); + return ReplicateDecision::default(); + } + Err(err) => { + error!("Failed to get replication config for bucket {}: {}", bucket, err); + return ReplicateDecision::default(); + } + }; + + // If incoming request is a replication request, it does not need to be re-replicated. + if del_opts.replication_request { + return ReplicateDecision::default(); + } + + // Skip replication if this object's prefix is excluded from being versioned. + if !del_opts.versioned { + return ReplicateDecision::default(); + } + + let opts = ObjectOpts { + name: dobj.object_name.clone(), + ssec: is_ssec_encrypted(&oi.user_defined), + user_tags: oi.user_tags.clone(), + delete_marker: oi.delete_marker, + version_id: dobj.version_id, + op_type: ReplicationType::Delete, + ..Default::default() + }; + + let tgt_arns = rcfg.filter_target_arns(&opts); + let mut dsc = ReplicateDecision::new(); + + if tgt_arns.is_empty() { + return dsc; + } + + for tgt_arn in tgt_arns { + let mut opts = opts.clone(); + opts.target_arn = tgt_arn.clone(); + let replicate = rcfg.replicate(&opts); + let sync = false; // Default sync value + + // When incoming delete is removal of a delete marker (a.k.a versioned delete), + // GetObjectInfo returns extra information even though it returns errFileNotFound + if gerr.is_some() { + let valid_repl_status = matches!( + oi.target_replication_status(&tgt_arn), + ReplicationStatusType::Pending | ReplicationStatusType::Completed | ReplicationStatusType::Failed + ); + + if oi.delete_marker && (valid_repl_status || replicate) { + dsc.set(ReplicateTargetDecision::new(tgt_arn, replicate, sync)); + continue; + } + + // Can be the case that other cluster is down and duplicate `mc rm --vid` + // is issued - this still needs to be replicated back to the other target + if oi.version_purge_status != VersionPurgeStatusType::default() { + let replicate = oi.version_purge_status == VersionPurgeStatusType::Pending + || oi.version_purge_status == VersionPurgeStatusType::Failed; + dsc.set(ReplicateTargetDecision::new(tgt_arn, replicate, sync)); + } + continue; + } + + let tgt = BucketTargetSys::get().get_remote_target_client(bucket, &tgt_arn).await; + // The target online status should not be used here while deciding + // whether to replicate deletes as the target could be temporarily down + let tgt_dsc = if let Some(tgt) = tgt { + ReplicateTargetDecision::new(tgt_arn, replicate, tgt.replicate_sync) + } else { + ReplicateTargetDecision::new(tgt_arn, false, false) + }; + dsc.set(tgt_dsc); + } + + dsc +} + +/// Check if the user-defined metadata contains SSEC encryption headers +fn is_ssec_encrypted(user_defined: &std::collections::HashMap) -> bool { + user_defined.contains_key(SSEC_ALGORITHM_HEADER) + || user_defined.contains_key(SSEC_KEY_HEADER) + || user_defined.contains_key(SSEC_KEY_MD5_HEADER) +} + +/// Extension trait for ObjectInfo to add replication-related methods +pub trait ObjectInfoExt { + fn target_replication_status(&self, arn: &str) -> ReplicationStatusType; + fn replication_state(&self) -> ReplicationState; +} + +impl ObjectInfoExt for ObjectInfo { + /// Returns replication status of a target + fn target_replication_status(&self, arn: &str) -> ReplicationStatusType { + lazy_static::lazy_static! { + static ref REPL_STATUS_REGEX: Regex = Regex::new(r"([^=].*?)=([^,].*?);").unwrap(); + } + + let binding = self.replication_status_internal.clone().unwrap_or_default(); + let captures = REPL_STATUS_REGEX.captures_iter(&binding); + for cap in captures { + if cap.len() == 3 && &cap[1] == arn { + return ReplicationStatusType::from(&cap[2]); + } + } + ReplicationStatusType::default() + } + + fn replication_state(&self) -> ReplicationState { + ReplicationState { + replication_status_internal: self.replication_status_internal.clone(), + version_purge_status_internal: self.version_purge_status_internal.clone(), + replicate_decision_str: self.replication_decision.clone(), + targets: replication_statuses_map(&self.replication_status_internal.clone().unwrap_or_default()), + purge_targets: version_purge_statuses_map(&self.version_purge_status_internal.clone().unwrap_or_default()), + reset_statuses_map: self + .user_defined + .iter() + .filter_map(|(k, v)| { + if k.starts_with(&format!("{RESERVED_METADATA_PREFIX_LOWER}-{REPLICATION_RESET}")) { + Some(( + k.trim_start_matches(&format!("{RESERVED_METADATA_PREFIX_LOWER}-{REPLICATION_RESET}")) + .to_string(), + v.clone(), + )) + } else { + None + } + }) + .collect(), + ..Default::default() + } + } +} + +pub async fn must_replicate(bucket: &str, object: &str, mopts: MustReplicateOptions) -> ReplicateDecision { + if new_object_layer_fn().is_none() { + return ReplicateDecision::default(); + } + + if !BucketVersioningSys::prefix_enabled(bucket, object).await { + return ReplicateDecision::default(); + } + + let replication_status = mopts.replication_status(); + + if replication_status == ReplicationStatusType::Replica && !mopts.is_metadata_replication() { + return ReplicateDecision::default(); + } + + if mopts.replication_request { + return ReplicateDecision::default(); + } + + let cfg = match get_replication_config(bucket).await { + Ok(cfg) => { + if let Some(cfg) = cfg { + cfg + } else { + return ReplicateDecision::default(); + } + } + Err(_err) => { + return ReplicateDecision::default(); + } + }; + + let opts = ObjectOpts { + name: object.to_string(), + replica: replication_status == ReplicationStatusType::Replica, + existing_object: mopts.is_existing_object_replication(), + user_tags: mopts.meta.get(AMZ_OBJECT_TAGGING).map(|s| s.to_string()).unwrap_or_default(), + ..Default::default() + }; + + let arns = cfg.filter_target_arns(&opts); + + if arns.is_empty() { + return ReplicateDecision::default(); + } + + let mut dsc = ReplicateDecision::default(); + + for arn in arns { + let cli = BucketTargetSys::get().get_remote_target_client(bucket, &arn).await; + + let mut sopts = opts.clone(); + sopts.target_arn = arn.clone(); + + let replicate = cfg.replicate(&sopts); + let synchronous = if let Some(cli) = cli { cli.replicate_sync } else { false }; + + dsc.set(ReplicateTargetDecision::new(arn, replicate, synchronous)); + } + + dsc +} + +pub async fn replicate_delete(dobj: DeletedObjectReplicationInfo, storage: Arc) { + let bucket = dobj.bucket.clone(); + let version_id = if let Some(version_id) = &dobj.delete_object.delete_marker_version_id { + Some(version_id.to_owned()) + } else { + dobj.delete_object.version_id + }; + + let _rcfg = match get_replication_config(&bucket).await { + Ok(Some(config)) => config, + Ok(None) => { + warn!("No replication config found for bucket: {}", bucket); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: ObjectInfo { + bucket: bucket.clone(), + name: dobj.delete_object.object_name.clone(), + version_id, + delete_marker: dobj.delete_object.delete_marker, + ..Default::default() + }, + user_agent: "Internal: [Replication]".to_string(), + host: GLOBAL_LocalNodeName.to_string(), + ..Default::default() + }); + + return; + } + Err(err) => { + warn!("replication config for bucket: {} error: {}", bucket, err); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: ObjectInfo { + bucket: bucket.clone(), + name: dobj.delete_object.object_name.clone(), + version_id, + delete_marker: dobj.delete_object.delete_marker, + ..Default::default() + }, + user_agent: "Internal: [Replication]".to_string(), + host: GLOBAL_LocalNodeName.to_string(), + ..Default::default() + }); + return; + } + }; + + let dsc = match parse_replicate_decision( + &bucket, + &dobj + .delete_object + .replication_state + .as_ref() + .map(|v| v.replicate_decision_str.clone()) + .unwrap_or_default(), + ) { + Ok(dsc) => dsc, + Err(err) => { + warn!( + "failed to parse replicate decision for bucket:{} arn:{} error:{}", + bucket, dobj.target_arn, err + ); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: ObjectInfo { + bucket: bucket.clone(), + name: dobj.delete_object.object_name.clone(), + version_id, + delete_marker: dobj.delete_object.delete_marker, + ..Default::default() + }, + user_agent: "Internal: [Replication]".to_string(), + host: GLOBAL_LocalNodeName.to_string(), + ..Default::default() + }); + return; + } + }; + + //TODO: nslock + + // Initialize replicated infos + let mut rinfos = ReplicatedInfos { + replication_timestamp: Some(OffsetDateTime::now_utc()), + targets: Vec::with_capacity(dsc.targets_map.len()), + }; + + let mut join_set = JoinSet::new(); + + // Process each target + for (_, tgt_entry) in dsc.targets_map.iter() { + // Skip targets that should not be replicated + if !tgt_entry.replicate { + continue; + } + + // If dobj.TargetArn is not empty string, this is a case of specific target being re-synced. + if !dobj.target_arn.is_empty() && dobj.target_arn != tgt_entry.arn { + continue; + } + + // Get the remote target client + let Some(tgt_client) = BucketTargetSys::get().get_remote_target_client(&bucket, &tgt_entry.arn).await else { + warn!("failed to get target for bucket:{:?}, arn:{:?}", &bucket, &tgt_entry.arn); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: ObjectInfo { + bucket: bucket.clone(), + name: dobj.delete_object.object_name.clone(), + version_id, + delete_marker: dobj.delete_object.delete_marker, + ..Default::default() + }, + user_agent: "Internal: [Replication]".to_string(), + host: GLOBAL_LocalNodeName.to_string(), + ..Default::default() + }); + continue; + }; + + let dobj_clone = dobj.clone(); + + // Spawn task in the join set + join_set.spawn(async move { replicate_delete_to_target(&dobj_clone, tgt_client.clone()).await }); + } + + // Collect all results + while let Some(result) = join_set.join_next().await { + match result { + Ok(tgt_info) => { + rinfos.targets.push(tgt_info); + } + Err(e) => { + error!("replicate_delete task failed: {}", e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: ObjectInfo { + bucket: bucket.clone(), + name: dobj.delete_object.object_name.clone(), + version_id, + delete_marker: dobj.delete_object.delete_marker, + ..Default::default() + }, + ..Default::default() + }); + } + } + } + + let (replication_status, prev_status) = if dobj.delete_object.version_id.is_none() { + ( + rinfos.replication_status(), + dobj.delete_object + .replication_state + .as_ref() + .map(|v| v.composite_replication_status()) + .unwrap_or(ReplicationStatusType::Empty), + ) + } else { + ( + ReplicationStatusType::from(rinfos.version_purge_status()), + ReplicationStatusType::from( + dobj.delete_object + .replication_state + .as_ref() + .map(|v| v.composite_version_purge_status()) + .unwrap_or(VersionPurgeStatusType::Empty), + ), + ) + }; + + for tgt in rinfos.targets.iter() { + if tgt.replication_status != tgt.prev_replication_status { + // TODO: update global replication status + } + } + + let mut drs = get_replication_state( + &rinfos, + &dobj.delete_object.replication_state.clone().unwrap_or_default(), + dobj.delete_object.version_id.map(|v| v.to_string()), + ); + if replication_status != prev_status { + drs.replica_timestamp = Some(OffsetDateTime::now_utc()); + } + + let event_name = if replication_status == ReplicationStatusType::Completed { + EventName::ObjectReplicationComplete.as_ref().to_string() + } else { + EventName::ObjectReplicationFailed.as_ref().to_string() + }; + + match storage + .delete_object( + &bucket, + &dobj.delete_object.object_name, + ObjectOptions { + version_id: version_id.map(|v| v.to_string()), + mod_time: dobj.delete_object.delete_marker_mtime, + delete_replication: Some(drs), + versioned: BucketVersioningSys::prefix_enabled(&bucket, &dobj.delete_object.object_name).await, + version_suspended: BucketVersioningSys::prefix_suspended(&bucket, &dobj.delete_object.object_name).await, + ..Default::default() + }, + ) + .await + { + Ok(object) => { + send_event(EventArgs { + event_name, + bucket_name: bucket.clone(), + object, + ..Default::default() + }); + } + Err(e) => { + error!("failed to delete object for bucket:{} arn:{} error:{}", bucket, dobj.target_arn, e); + send_event(EventArgs { + event_name, + bucket_name: bucket.clone(), + object: ObjectInfo { + bucket: bucket.clone(), + name: dobj.delete_object.object_name.clone(), + version_id, + delete_marker: dobj.delete_object.delete_marker, + ..Default::default() + }, + ..Default::default() + }); + } + } +} + +async fn replicate_delete_to_target(dobj: &DeletedObjectReplicationInfo, tgt_client: Arc) -> ReplicatedTargetInfo { + let version_id = if let Some(version_id) = &dobj.delete_object.delete_marker_version_id { + version_id.to_owned() + } else { + dobj.delete_object.version_id.unwrap_or_default() + }; + + let mut rinfo = dobj + .delete_object + .replication_state + .clone() + .unwrap_or_default() + .target_state(&tgt_client.arn); + rinfo.op_type = dobj.op_type; + rinfo.endpoint = tgt_client.endpoint.clone(); + rinfo.secure = tgt_client.secure; + + if dobj.delete_object.version_id.is_none() + && rinfo.prev_replication_status == ReplicationStatusType::Completed + && dobj.op_type != ReplicationType::ExistingObject + { + rinfo.replication_status = rinfo.prev_replication_status.clone(); + return rinfo; + } + + if dobj.delete_object.version_id.is_some() && rinfo.version_purge_status == VersionPurgeStatusType::Complete { + return rinfo; + } + + if BucketTargetSys::get().is_offline(&tgt_client.to_url()).await { + if dobj.delete_object.version_id.is_none() { + rinfo.replication_status = ReplicationStatusType::Failed; + } else { + rinfo.version_purge_status = VersionPurgeStatusType::Failed; + } + return rinfo; + } + + let version_id = if version_id.is_nil() { + None + } else { + Some(version_id.to_string()) + }; + + if dobj.delete_object.delete_marker_version_id.is_some() { + if let Err(e) = tgt_client + .head_object(&tgt_client.bucket, &dobj.delete_object.object_name, version_id.clone()) + .await + { + if let SdkError::ServiceError(service_err) = &e { + if !service_err.err().is_not_found() { + rinfo.replication_status = ReplicationStatusType::Failed; + rinfo.error = Some(e.to_string()); + + return rinfo; + } + } + }; + } + + match tgt_client + .remove_object( + &tgt_client.bucket, + &dobj.delete_object.object_name, + version_id.clone(), + RemoveObjectOptions { + force_delete: false, + governance_bypass: false, + replication_delete_marker: dobj.delete_object.delete_marker_version_id.is_some(), + replication_mtime: dobj.delete_object.delete_marker_mtime, + replication_status: ReplicationStatusType::Replica, + replication_request: true, + replication_validity_check: false, + }, + ) + .await + { + Ok(_) => { + if dobj.delete_object.version_id.is_none() { + rinfo.replication_status = ReplicationStatusType::Completed; + } else { + rinfo.version_purge_status = VersionPurgeStatusType::Complete; + } + } + Err(e) => { + rinfo.error = Some(e.to_string()); + if dobj.delete_object.version_id.is_none() { + rinfo.replication_status = ReplicationStatusType::Failed; + } else { + rinfo.version_purge_status = VersionPurgeStatusType::Failed; + } + // TODO: check offline + } + } + + rinfo +} + +pub async fn replicate_object(roi: ReplicateObjectInfo, storage: Arc) { + let bucket = roi.bucket.clone(); + let object = roi.name.clone(); + + let cfg = match get_replication_config(&bucket).await { + Ok(Some(config)) => config, + Ok(None) => { + warn!("No replication config found for bucket: {}", bucket); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: roi.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return; + } + Err(err) => { + error!("Failed to get replication config for bucket {}: {}", bucket, err); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: roi.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return; + } + }; + + let tgt_arns = cfg.filter_target_arns(&ObjectOpts { + name: object.clone(), + user_tags: roi.user_tags.clone(), + ssec: roi.ssec, + ..Default::default() + }); + + // TODO: NSLOCK + + let mut join_set = JoinSet::new(); + + for arn in tgt_arns { + let Some(tgt_client) = BucketTargetSys::get().get_remote_target_client(&bucket, &arn).await else { + warn!("failed to get target for bucket:{:?}, arn:{:?}", &bucket, &arn); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: roi.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + continue; + }; + + let roi_clone = roi.clone(); + let storage_clone = storage.clone(); + join_set.spawn(async move { + if roi.op_type == ReplicationType::Object { + roi_clone.replicate_object(storage_clone, tgt_client).await + } else { + roi_clone.replicate_all(storage_clone, tgt_client).await + } + }); + } + + let mut rinfos = ReplicatedInfos { + replication_timestamp: Some(OffsetDateTime::now_utc()), + targets: Vec::with_capacity(join_set.len()), + }; + + while let Some(result) = join_set.join_next().await { + match result { + Ok(tgt_info) => { + rinfos.targets.push(tgt_info); + } + Err(e) => { + error!("replicate_object task failed: {}", e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: roi.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + } + } + } + + let replication_status = rinfos.replication_status(); + let new_replication_internal = rinfos.replication_status_internal(); + let mut object_info = roi.to_object_info(); + + if roi.replication_status_internal != new_replication_internal || rinfos.replication_resynced() { + let popts = ObjectOptions { + version_id: roi.version_id.map(|v| v.to_string()), + ..Default::default() + }; + + if let Ok(u) = storage.put_object_metadata(&bucket, &object, &popts).await { + object_info = u; + } + + // TODO: update stats + } + + let event_name = if replication_status == ReplicationStatusType::Completed { + EventName::ObjectReplicationComplete.as_ref().to_string() + } else { + EventName::ObjectReplicationFailed.as_ref().to_string() + }; + + send_event(EventArgs { + event_name, + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + + if rinfos.replication_status() != ReplicationStatusType::Completed { + // TODO: update stats + // pool + } +} + +trait ReplicateObjectInfoExt { + async fn replicate_object(&self, storage: Arc, tgt_client: Arc) -> ReplicatedTargetInfo; + async fn replicate_all(&self, storage: Arc, tgt_client: Arc) -> ReplicatedTargetInfo; + fn to_object_info(&self) -> ObjectInfo; +} + +impl ReplicateObjectInfoExt for ReplicateObjectInfo { + async fn replicate_object(&self, storage: Arc, tgt_client: Arc) -> ReplicatedTargetInfo { + let bucket = self.bucket.clone(); + let object = self.name.clone(); + + let replication_action = ReplicationAction::All; + let mut rinfo = ReplicatedTargetInfo { + arn: tgt_client.arn.clone(), + size: self.actual_size, + replication_action, + op_type: self.op_type, + replication_status: ReplicationStatusType::Failed, + prev_replication_status: self.target_replication_status(&tgt_client.arn), + endpoint: tgt_client.endpoint.clone(), + secure: tgt_client.secure, + ..Default::default() + }; + + if self.target_replication_status(&tgt_client.arn) == ReplicationStatusType::Completed + && !self.existing_obj_resync.is_empty() + && self.existing_obj_resync.must_resync_target(&tgt_client.arn) + { + rinfo.replication_status = ReplicationStatusType::Completed; + rinfo.replication_resynced = true; + + return rinfo; + } + + if BucketTargetSys::get().is_offline(&tgt_client.to_url()).await { + warn!("target is offline: {}", tgt_client.to_url()); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: self.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + + let versioned = BucketVersioningSys::prefix_enabled(&bucket, &object).await; + let version_suspended = BucketVersioningSys::prefix_suspended(&bucket, &object).await; + + let mut gr = match storage + .get_object_reader( + &bucket, + &object, + None, + HeaderMap::new(), + &ObjectOptions { + version_id: self.version_id.map(|v| v.to_string()), + version_suspended, + versioned, + replication_request: true, + ..Default::default() + }, + ) + .await + { + Ok(gr) => gr, + Err(e) => { + if !is_err_object_not_found(&e) || is_err_version_not_found(&e) { + warn!("failed to get object reader for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: self.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + } + + return rinfo; + } + }; + + let object_info = gr.object_info.clone(); + + rinfo.prev_replication_status = object_info.target_replication_status(&tgt_client.arn); + + let size = match object_info.get_actual_size() { + Ok(size) => size, + Err(e) => { + warn!("failed to get actual size for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + }; + + if tgt_client.bucket.is_empty() { + warn!("target bucket is empty: {}", tgt_client.bucket); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + + rinfo.replication_status = ReplicationStatusType::Completed; + rinfo.replication_resynced = true; + rinfo.size = size; + rinfo.replication_action = replication_action; + + let (put_opts, is_multipart) = match put_replication_opts(&tgt_client.storage_class, &object_info) { + Ok((put_opts, is_mp)) => (put_opts, is_mp), + Err(e) => { + warn!( + "failed to get put replication opts for bucket:{} arn:{} error:{}", + bucket, tgt_client.arn, e + ); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + }; + + // TODO:bandwidth + + if let Some(err) = if is_multipart { + replicate_object_with_multipart(tgt_client.clone(), &tgt_client.bucket, &object, gr.stream, &object_info, put_opts) + .await + .err() + } else { + // TODO: use stream + let body = match gr.read_all().await { + Ok(body) => body, + Err(e) => { + rinfo.replication_status = ReplicationStatusType::Failed; + rinfo.error = Some(e.to_string()); + warn!("failed to read object for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info.clone(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + }; + let reader = ByteStream::from(body); + tgt_client + .put_object(&tgt_client.bucket, &object, size, reader, &put_opts) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + .err() + } { + rinfo.replication_status = ReplicationStatusType::Failed; + rinfo.error = Some(err.to_string()); + + // TODO: check offline + return rinfo; + } + + rinfo.replication_status = ReplicationStatusType::Completed; + + rinfo + } + + async fn replicate_all(&self, storage: Arc, tgt_client: Arc) -> ReplicatedTargetInfo { + let start_time = OffsetDateTime::now_utc(); + + let bucket = self.bucket.clone(); + let object = self.name.clone(); + + let mut replication_action = ReplicationAction::Metadata; + let mut rinfo = ReplicatedTargetInfo { + arn: tgt_client.arn.clone(), + size: self.actual_size, + replication_action, + op_type: self.op_type, + replication_status: ReplicationStatusType::Failed, + prev_replication_status: self.target_replication_status(&tgt_client.arn), + endpoint: tgt_client.endpoint.clone(), + secure: tgt_client.secure, + ..Default::default() + }; + + if BucketTargetSys::get().is_offline(&tgt_client.to_url()).await { + warn!("target is offline: {}", tgt_client.to_url()); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: self.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + + let versioned = BucketVersioningSys::prefix_enabled(&bucket, &object).await; + let version_suspended = BucketVersioningSys::prefix_suspended(&bucket, &object).await; + + let mut gr = match storage + .get_object_reader( + &bucket, + &object, + None, + HeaderMap::new(), + &ObjectOptions { + version_id: self.version_id.map(|v| v.to_string()), + version_suspended, + versioned, + replication_request: true, + ..Default::default() + }, + ) + .await + { + Ok(gr) => gr, + Err(e) => { + if !is_err_object_not_found(&e) || is_err_version_not_found(&e) { + warn!("failed to get object reader for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: self.to_object_info(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + } + + return rinfo; + } + }; + + let object_info = gr.object_info.clone(); + + rinfo.prev_replication_status = object_info.target_replication_status(&tgt_client.arn); + + if rinfo.prev_replication_status == ReplicationStatusType::Completed + && !self.existing_obj_resync.is_empty() + && self.existing_obj_resync.must_resync_target(&tgt_client.arn) + { + rinfo.replication_status = ReplicationStatusType::Completed; + rinfo.replication_resynced = true; + return rinfo; + } + + let size = match object_info.get_actual_size() { + Ok(size) => size, + Err(e) => { + warn!("failed to get actual size for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + }; + + // TODO: SSE + + if tgt_client.bucket.is_empty() { + warn!("target bucket is empty: {}", tgt_client.bucket); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + return rinfo; + } + + let sopts = StatObjectOptions { + version_id: object_info.version_id.map(|v| v.to_string()).unwrap_or_default(), + internal: AdvancedGetOptions { + replication_proxy_request: "false".to_string(), + ..Default::default() + }, + ..Default::default() + }; + + sopts.set(AMZ_TAGGING_DIRECTIVE, "ACCESS"); + + match tgt_client + .head_object(&tgt_client.bucket, &object, self.version_id.map(|v| v.to_string())) + .await + { + Ok(oi) => { + replication_action = get_replication_action(&object_info, &oi, self.op_type); + rinfo.replication_status = ReplicationStatusType::Completed; + if replication_action == ReplicationAction::None { + if self.op_type == ReplicationType::ExistingObject + && object_info.mod_time + > oi.last_modified.map(|dt| { + time::OffsetDateTime::from_unix_timestamp(dt.secs()).unwrap_or(time::OffsetDateTime::UNIX_EPOCH) + }) + && object_info.version_id.is_none() + { + warn!( + "unable to replicate {}/{} Newer version exists on target {}", + bucket, + object, + tgt_client.to_url() + ); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info.clone(), + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + } + + if object_info.target_replication_status(&tgt_client.arn) == ReplicationStatusType::Pending + || object_info.target_replication_status(&tgt_client.arn) == ReplicationStatusType::Failed + || self.op_type == ReplicationType::ExistingObject + { + rinfo.replication_action = replication_action; + rinfo.replication_status = ReplicationStatusType::Completed; + } + + if rinfo.replication_status == ReplicationStatusType::Completed + && self.op_type == ReplicationType::ExistingObject + && !tgt_client.reset_id.is_empty() + { + rinfo.resync_timestamp = + format!("{};{}", OffsetDateTime::now_utc().format(&Rfc3339).unwrap(), tgt_client.reset_id); + rinfo.replication_resynced = true; + } + + rinfo.duration = (OffsetDateTime::now_utc() - start_time).unsigned_abs(); + + return rinfo; + } + } + Err(e) => { + if let Some(se) = e.as_service_error() { + if se.is_not_found() { + replication_action = ReplicationAction::All; + } else { + rinfo.error = Some(e.to_string()); + warn!("failed to head object for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + + rinfo.duration = (OffsetDateTime::now_utc() - start_time).unsigned_abs(); + return rinfo; + } + } else { + rinfo.error = Some(e.to_string()); + warn!("failed to head object for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + + rinfo.duration = (OffsetDateTime::now_utc() - start_time).unsigned_abs(); + return rinfo; + } + } + }; + + rinfo.replication_status = ReplicationStatusType::Completed; + rinfo.size = size; + rinfo.replication_action = replication_action; + + if replication_action != ReplicationAction::All { + // TODO: copy object + } else { + let (put_opts, is_multipart) = match put_replication_opts(&tgt_client.storage_class, &object_info) { + Ok((put_opts, is_mp)) => (put_opts, is_mp), + Err(e) => { + rinfo.error = Some(e.to_string()); + warn!( + "failed to get put replication opts for bucket:{} arn:{} error:{}", + bucket, tgt_client.arn, e + ); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + + rinfo.duration = (OffsetDateTime::now_utc() - start_time).unsigned_abs(); + return rinfo; + } + }; + if let Some(err) = if is_multipart { + replicate_object_with_multipart( + tgt_client.clone(), + &tgt_client.bucket, + &object, + gr.stream, + &object_info, + put_opts, + ) + .await + .err() + } else { + let body = match gr.read_all().await { + Ok(body) => body, + Err(e) => { + rinfo.replication_status = ReplicationStatusType::Failed; + rinfo.error = Some(e.to_string()); + warn!("failed to read object for bucket:{} arn:{} error:{}", bucket, tgt_client.arn, e); + send_event(EventArgs { + event_name: EventName::ObjectReplicationNotTracked.as_ref().to_string(), + bucket_name: bucket.clone(), + object: object_info, + host: GLOBAL_LocalNodeName.to_string(), + user_agent: "Internal: [Replication]".to_string(), + ..Default::default() + }); + rinfo.duration = (OffsetDateTime::now_utc() - start_time).unsigned_abs(); + return rinfo; + } + }; + let reader = ByteStream::from(body); + tgt_client + .put_object(&tgt_client.bucket, &object, size, reader, &put_opts) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + .err() + } { + rinfo.replication_status = ReplicationStatusType::Failed; + rinfo.error = Some(err.to_string()); + rinfo.duration = (OffsetDateTime::now_utc() - start_time).unsigned_abs(); + + // TODO: check offline + return rinfo; + } + } + + rinfo + } + + fn to_object_info(&self) -> ObjectInfo { + ObjectInfo { + bucket: self.bucket.clone(), + name: self.name.clone(), + mod_time: self.mod_time, + version_id: self.version_id, + size: self.size, + user_tags: self.user_tags.clone(), + actual_size: self.actual_size, + replication_status_internal: self.replication_status_internal.clone(), + replication_status: self.replication_status.clone(), + version_purge_status_internal: self.version_purge_status_internal.clone(), + version_purge_status: self.version_purge_status.clone(), + delete_marker: true, + checksum: self.checksum.clone(), + ..Default::default() + } + } +} + +// Standard headers that needs to be extracted from User metadata. +static STANDARD_HEADERS: &[&str] = &[ + headers::CONTENT_TYPE, + headers::CACHE_CONTROL, + headers::CONTENT_ENCODING, + headers::CONTENT_LANGUAGE, + headers::CONTENT_DISPOSITION, + headers::AMZ_STORAGE_CLASS, + headers::AMZ_OBJECT_TAGGING, + headers::AMZ_BUCKET_REPLICATION_STATUS, + headers::AMZ_OBJECT_LOCK_MODE, + headers::AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE, + headers::AMZ_OBJECT_LOCK_LEGAL_HOLD, + headers::AMZ_TAG_COUNT, + headers::AMZ_SERVER_SIDE_ENCRYPTION, +]; + +fn is_standard_header(k: &str) -> bool { + STANDARD_HEADERS.iter().any(|h| h.eq_ignore_ascii_case(k)) +} + +fn put_replication_opts(sc: &str, object_info: &ObjectInfo) -> Result<(PutObjectOptions, bool)> { + let mut meta = HashMap::new(); + + for (k, v) in object_info.user_defined.iter() { + if strings_has_prefix_fold(k, RESERVED_METADATA_PREFIX) { + continue; + } + + if is_standard_header(k) { + continue; + } + + meta.insert(k.to_string(), v.to_string()); + } + + let is_multipart = object_info.is_multipart(); + + let mut put_op = PutObjectOptions { + user_metadata: meta, + content_type: object_info.content_type.clone().unwrap_or_default(), + content_encoding: object_info.content_encoding.clone().unwrap_or_default(), + expires: object_info.expires.unwrap_or(OffsetDateTime::UNIX_EPOCH), + storage_class: sc.to_string(), + internal: AdvancedPutOptions { + source_version_id: object_info.version_id.map(|v| v.to_string()).unwrap_or_default(), + source_etag: object_info.etag.clone().unwrap_or_default(), + source_mtime: object_info.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH), + replication_status: ReplicationStatusType::Pending, + replication_request: true, + ..Default::default() + }, + ..Default::default() + }; + + if !object_info.user_tags.is_empty() { + let tags = decode_tags_to_map(&object_info.user_tags); + + if !tags.is_empty() { + put_op.user_tags = tags; + put_op.internal.tagging_timestamp = if let Some(ts) = object_info + .user_defined + .get(&format!("{RESERVED_METADATA_PREFIX}tagging-timestamp")) + { + OffsetDateTime::parse(ts, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH) + } else { + object_info.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH) + }; + } + } + + if let Some(lang) = object_info.user_defined.lookup(headers::CONTENT_LANGUAGE) { + put_op.content_language = lang.to_string(); + } + + if let Some(cd) = object_info.user_defined.lookup(headers::CONTENT_DISPOSITION) { + put_op.content_disposition = cd.to_string(); + } + + if let Some(v) = object_info.user_defined.lookup(headers::CACHE_CONTROL) { + put_op.cache_control = v.to_string(); + } + + if let Some(v) = object_info.user_defined.lookup(headers::AMZ_OBJECT_LOCK_MODE) { + let mode = v.to_string().to_uppercase(); + put_op.mode = Some(aws_sdk_s3::types::ObjectLockRetentionMode::from(mode.as_str())); + } + + if let Some(v) = object_info.user_defined.lookup(headers::AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE) { + put_op.retain_until_date = OffsetDateTime::parse(v, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH); + put_op.internal.retention_timestamp = if let Some(v) = object_info + .user_defined + .get(&format!("{RESERVED_METADATA_PREFIX_LOWER}objectlock-retention-timestamp")) + { + OffsetDateTime::parse(v, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH) + } else { + object_info.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH) + }; + } + + if let Some(v) = object_info.user_defined.lookup(headers::AMZ_OBJECT_LOCK_LEGAL_HOLD) { + let hold = v.to_uppercase(); + put_op.legalhold = Some(ObjectLockLegalHoldStatus::from(hold.as_str())); + put_op.internal.legalhold_timestamp = if let Some(v) = object_info + .user_defined + .get(&format!("{RESERVED_METADATA_PREFIX_LOWER}objectlock-legalhold-timestamp")) + { + OffsetDateTime::parse(v, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH) + } else { + object_info.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH) + }; + } + + // TODO: is encrypted + + Ok((put_op, is_multipart)) +} + +async fn replicate_object_with_multipart( + cli: Arc, + bucket: &str, + object: &str, + reader: Box, + object_info: &ObjectInfo, + opts: PutObjectOptions, +) -> std::io::Result<()> { + let mut attempts = 1; + let upload_id = loop { + match cli.create_multipart_upload(bucket, object, &opts).await { + Ok(id) => { + break id; + } + Err(e) => { + attempts += 1; + if attempts > 3 { + return Err(std::io::Error::other(e.to_string())); + } + + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + + continue; + } + } + }; + + let mut uploaded_parts: Vec = Vec::new(); + + let mut reader = reader; + for part_info in object_info.parts.iter() { + let mut chunk = vec![0u8; part_info.actual_size as usize]; + AsyncReadExt::read_exact(&mut *reader, &mut chunk).await?; + + let object_part = cli + .put_object_part( + bucket, + object, + &upload_id, + part_info.number as i32, + part_info.actual_size, + ByteStream::from(chunk), + &PutObjectPartOptions { ..Default::default() }, + ) + .await + .map_err(|e| std::io::Error::other(e.to_string()))?; + + let etag = object_part.e_tag.unwrap_or_default(); + + uploaded_parts.push( + CompletedPart::builder() + .part_number(part_info.number as i32) + .e_tag(etag) + .build(), + ); + } + + let mut user_metadata = HashMap::new(); + + user_metadata.insert( + RUSTFS_REPLICATION_AUTUAL_OBJECT_SIZE.to_string(), + object_info + .user_defined + .get(&format!("{RESERVED_METADATA_PREFIX}actual-size")) + .map(|v| v.to_string()) + .unwrap_or_default(), + ); + + cli.complete_multipart_upload( + bucket, + object, + &upload_id, + uploaded_parts, + &PutObjectOptions { + user_metadata, + ..Default::default() + }, + ) + .await + .map_err(|e| std::io::Error::other(e.to_string()))?; + + Ok(()) +} + +fn get_replication_action(oi1: &ObjectInfo, oi2: &HeadObjectOutput, op_type: ReplicationType) -> ReplicationAction { + if op_type == ReplicationType::ExistingObject + && oi1.mod_time + > oi2 + .last_modified + .map(|dt| time::OffsetDateTime::from_unix_timestamp(dt.secs()).unwrap_or(time::OffsetDateTime::UNIX_EPOCH)) + && oi1.version_id.is_none() + { + return ReplicationAction::None; + } + + let size = oi1.get_actual_size().unwrap_or_default(); + + if oi1.etag != oi2.e_tag + || oi1.version_id.map(|v| v.to_string()) != oi2.version_id + || size != oi2.content_length.unwrap_or_default() + || oi1.delete_marker != oi2.delete_marker.unwrap_or_default() + || oi1.mod_time + != oi2 + .last_modified + .map(|dt| time::OffsetDateTime::from_unix_timestamp(dt.secs()).unwrap_or(time::OffsetDateTime::UNIX_EPOCH)) + { + return ReplicationAction::All; + } + + if oi1.content_type != oi2.content_type { + return ReplicationAction::Metadata; + } + + let empty_metadata = HashMap::new(); + let metadata = oi2.metadata.as_ref().unwrap_or(&empty_metadata); + + if let Some(content_encoding) = &oi1.content_encoding { + if let Some(enc) = metadata + .get(CONTENT_ENCODING) + .or_else(|| metadata.get(&CONTENT_ENCODING.to_lowercase())) + { + if enc != content_encoding { + return ReplicationAction::Metadata; + } + } else { + return ReplicationAction::Metadata; + } + } + + let oi1_tags = decode_tags_to_map(&oi1.user_tags); + let oi2_tags = decode_tags_to_map(metadata.get(AMZ_OBJECT_TAGGING).cloned().unwrap_or_default().as_str()); + + if (oi2.tag_count.unwrap_or_default() > 0 && oi1_tags != oi2_tags) + || oi2.tag_count.unwrap_or_default() != oi1_tags.len() as i32 + { + return ReplicationAction::Metadata; + } + + // Compare only necessary headers + let compare_keys = vec![ + "Expires", + "Cache-Control", + "Content-Language", + "Content-Disposition", + "X-Amz-Object-Lock-Mode", + "X-Amz-Object-Lock-Retain-Until-Date", + "X-Amz-Object-Lock-Legal-Hold", + "X-Amz-Website-Redirect-Location", + "X-Amz-Meta-", + ]; + + // compare metadata on both maps to see if meta is identical + let mut compare_meta1 = HashMap::new(); + for (k, v) in &oi1.user_defined { + let mut found = false; + for prefix in &compare_keys { + if strings_has_prefix_fold(k, prefix) { + found = true; + break; + } + } + if found { + compare_meta1.insert(k.to_lowercase(), v.clone()); + } + } + + let mut compare_meta2 = HashMap::new(); + for (k, v) in metadata { + let mut found = false; + for prefix in &compare_keys { + if strings_has_prefix_fold(k.to_string().as_str(), prefix) { + found = true; + break; + } + } + if found { + compare_meta2.insert(k.to_lowercase(), v.clone()); + } + } + + if compare_meta1 != compare_meta2 { + return ReplicationAction::Metadata; + } + + ReplicationAction::None +} diff --git a/crates/ecstore/src/bucket/replication/replication_state.rs b/crates/ecstore/src/bucket/replication/replication_state.rs new file mode 100644 index 00000000..5de189be --- /dev/null +++ b/crates/ecstore/src/bucket/replication/replication_state.rs @@ -0,0 +1,1201 @@ +use crate::error::Error; +use rustfs_filemeta::{ReplicatedTargetInfo, ReplicationStatusType, ReplicationType}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::Arc; +use std::sync::atomic::{AtomicI64, Ordering}; +use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering}; +use std::time::{Duration, SystemTime}; +use tokio::sync::{Mutex, RwLock}; +use tokio::time::interval; + +/// Exponential Moving Average with thread-safe interior mutability +#[derive(Debug)] +pub struct ExponentialMovingAverage { + pub alpha: f64, + pub value: AtomicU64, // Store f64 as u64 bits + pub last_update: Arc>, +} + +impl ExponentialMovingAverage { + pub fn new() -> Self { + let now = SystemTime::now(); + Self { + alpha: 0.1, // smoothing factor + value: AtomicU64::new(0_f64.to_bits()), + last_update: Arc::new(Mutex::new(now)), + } + } + + pub fn add_value(&self, value: f64, timestamp: SystemTime) { + let current_value = f64::from_bits(self.value.load(AtomicOrdering::Relaxed)); + let new_value = if current_value == 0.0 { + value + } else { + self.alpha * value + (1.0 - self.alpha) * current_value + }; + self.value.store(new_value.to_bits(), AtomicOrdering::Relaxed); + + // Update timestamp (this is async, but we'll use try_lock to avoid blocking) + if let Ok(mut last_update) = self.last_update.try_lock() { + *last_update = timestamp; + } + } + + pub fn get_current_average(&self) -> f64 { + f64::from_bits(self.value.load(AtomicOrdering::Relaxed)) + } + + pub fn update_exponential_moving_average(&self, now: SystemTime) { + if let Ok(mut last_update_guard) = self.last_update.try_lock() { + let last_update = *last_update_guard; + if let Ok(duration) = now.duration_since(last_update) { + if duration.as_secs() > 0 { + let decay = (-duration.as_secs_f64() / 60.0).exp(); // 1 minute decay + let current_value = f64::from_bits(self.value.load(AtomicOrdering::Relaxed)); + self.value.store((current_value * decay).to_bits(), AtomicOrdering::Relaxed); + *last_update_guard = now; + } + } + } + } + + pub fn merge(&self, other: &ExponentialMovingAverage) -> Self { + let now = SystemTime::now(); + let self_value = f64::from_bits(self.value.load(AtomicOrdering::Relaxed)); + let other_value = f64::from_bits(other.value.load(AtomicOrdering::Relaxed)); + let merged_value = (self_value + other_value) / 2.0; + + // Get timestamps (use current time as fallback) + let self_time = self.last_update.try_lock().map(|t| *t).unwrap_or(now); + let other_time = other.last_update.try_lock().map(|t| *t).unwrap_or(now); + let merged_time = self_time.max(other_time); + + Self { + alpha: self.alpha, + value: AtomicU64::new(merged_value.to_bits()), + last_update: Arc::new(Mutex::new(merged_time)), + } + } +} + +impl Clone for ExponentialMovingAverage { + fn clone(&self) -> Self { + let now = SystemTime::now(); + let value = self.value.load(AtomicOrdering::Relaxed); + let last_update = self.last_update.try_lock().map(|t| *t).unwrap_or(now); + + Self { + alpha: self.alpha, + value: AtomicU64::new(value), + last_update: Arc::new(Mutex::new(last_update)), + } + } +} + +impl Default for ExponentialMovingAverage { + fn default() -> Self { + Self::new() + } +} + +impl Serialize for ExponentialMovingAverage { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut state = serializer.serialize_struct("ExponentialMovingAverage", 3)?; + state.serialize_field("alpha", &self.alpha)?; + state.serialize_field("value", &f64::from_bits(self.value.load(AtomicOrdering::Relaxed)))?; + let last_update = self.last_update.try_lock().map(|t| *t).unwrap_or(SystemTime::UNIX_EPOCH); + state.serialize_field("last_update", &last_update)?; + state.end() + } +} + +impl<'de> Deserialize<'de> for ExponentialMovingAverage { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + #[derive(Deserialize)] + struct ExponentialMovingAverageData { + alpha: f64, + value: f64, + last_update: SystemTime, + } + + let data = ExponentialMovingAverageData::deserialize(deserializer)?; + Ok(Self { + alpha: data.alpha, + value: AtomicU64::new(data.value.to_bits()), + last_update: Arc::new(Mutex::new(data.last_update)), + }) + } +} + +/// Transfer statistics +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct XferStats { + pub avg: f64, + pub curr: f64, + pub peak: f64, + pub measure: ExponentialMovingAverage, +} + +impl XferStats { + pub fn new() -> Self { + Self { + avg: 0.0, + curr: 0.0, + peak: 0.0, + measure: ExponentialMovingAverage::new(), + } + } + + pub fn add_size(&mut self, size: i64, duration: Duration) { + if duration.as_nanos() > 0 { + let rate = (size as f64) / duration.as_secs_f64(); + self.curr = rate; + if rate > self.peak { + self.peak = rate; + } + self.measure.add_value(rate, SystemTime::now()); + self.avg = self.measure.get_current_average(); + } + } + + pub fn clone_stats(&self) -> Self { + Self { + avg: self.avg, + curr: self.curr, + peak: self.peak, + measure: self.measure.clone(), + } + } + + pub fn merge(&self, other: &XferStats) -> Self { + Self { + avg: (self.avg + other.avg) / 2.0, + curr: self.curr + other.curr, + peak: self.peak.max(other.peak), + measure: self.measure.merge(&other.measure), + } + } + + pub fn update_exponential_moving_average(&mut self, now: SystemTime) { + self.measure.update_exponential_moving_average(now); + self.avg = self.measure.get_current_average(); + } +} + +impl Default for XferStats { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone)] +pub struct ReplStat { + pub arn: String, + pub completed: bool, + pub pending: bool, + pub failed: bool, + pub op_type: ReplicationType, + pub transfer_size: i64, + pub transfer_duration: Duration, + pub endpoint: String, + pub secure: bool, + pub err: Option, +} + +impl ReplStat { + pub fn new() -> Self { + Self { + arn: String::new(), + completed: false, + pending: false, + failed: false, + op_type: ReplicationType::default(), + transfer_size: 0, + transfer_duration: Duration::default(), + endpoint: String::new(), + secure: false, + err: None, + } + } + + pub fn endpoint(&self) -> String { + let scheme = if self.secure { "https" } else { "http" }; + format!("{}://{}", scheme, self.endpoint) + } + + #[allow(clippy::too_many_arguments)] + pub fn set( + &mut self, + arn: String, + size: i64, + duration: Duration, + status: ReplicationStatusType, + op_type: ReplicationType, + endpoint: String, + secure: bool, + err: Option, + ) { + self.arn = arn; + self.transfer_size = size; + self.transfer_duration = duration; + self.op_type = op_type; + self.endpoint = endpoint; + self.secure = secure; + self.err = err; + + // Reset status + self.completed = false; + self.pending = false; + self.failed = false; + + match status { + ReplicationStatusType::Completed => self.completed = true, + ReplicationStatusType::Pending => self.pending = true, + ReplicationStatusType::Failed => self.failed = true, + _ => {} + } + } +} + +impl Default for ReplStat { + fn default() -> Self { + Self::new() + } +} + +/// Site replication statistics +#[derive(Debug, Default)] +pub struct SRStats { + pub replica_size: AtomicI64, + pub replica_count: AtomicI64, + // More site replication related statistics fields can be added here +} + +impl SRStats { + pub fn new() -> Self { + Self::default() + } + + pub fn update(&self, rs: &ReplStat, _depl_id: &str) { + // Update site replication statistics + // In actual implementation, statistics would be updated based on deployment ID + if rs.completed { + self.replica_size.fetch_add(rs.transfer_size, Ordering::Relaxed); + self.replica_count.fetch_add(1, Ordering::Relaxed); + } + } + + pub fn get(&self) -> HashMap { + // Return current statistics + let mut stats = HashMap::new(); + stats.insert("replica_size".to_string(), self.replica_size.load(Ordering::Relaxed)); + stats.insert("replica_count".to_string(), self.replica_count.load(Ordering::Relaxed)); + stats + } +} + +/// Statistics in queue +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct InQueueStats { + pub bytes: i64, + pub count: i64, + #[serde(skip)] + pub now_bytes: AtomicI64, + #[serde(skip)] + pub now_count: AtomicI64, +} + +impl Clone for InQueueStats { + fn clone(&self) -> Self { + Self { + bytes: self.bytes, + count: self.count, + now_bytes: AtomicI64::new(self.now_bytes.load(Ordering::Relaxed)), + now_count: AtomicI64::new(self.now_count.load(Ordering::Relaxed)), + } + } +} + +impl InQueueStats { + pub fn new() -> Self { + Self::default() + } + + pub fn get_current_bytes(&self) -> i64 { + self.now_bytes.load(Ordering::Relaxed) + } + + pub fn get_current_count(&self) -> i64 { + self.now_count.load(Ordering::Relaxed) + } +} + +/// Metrics in queue +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct InQueueMetric { + pub curr: InQueueStats, + pub avg: InQueueStats, + pub max: InQueueStats, +} + +impl InQueueMetric { + pub fn merge(&self, other: &InQueueMetric) -> Self { + Self { + curr: InQueueStats { + bytes: self.curr.bytes + other.curr.bytes, + count: self.curr.count + other.curr.count, + now_bytes: AtomicI64::new( + self.curr.now_bytes.load(Ordering::Relaxed) + other.curr.now_bytes.load(Ordering::Relaxed), + ), + now_count: AtomicI64::new( + self.curr.now_count.load(Ordering::Relaxed) + other.curr.now_count.load(Ordering::Relaxed), + ), + }, + avg: InQueueStats { + bytes: (self.avg.bytes + other.avg.bytes) / 2, + count: (self.avg.count + other.avg.count) / 2, + ..Default::default() + }, + max: InQueueStats { + bytes: self.max.bytes.max(other.max.bytes), + count: self.max.count.max(other.max.count), + ..Default::default() + }, + } + } +} + +/// Queue cache +#[derive(Debug, Default)] +pub struct QueueCache { + pub bucket_stats: HashMap, + pub sr_queue_stats: InQueueStats, +} + +impl QueueCache { + pub fn new() -> Self { + Self::default() + } + + pub fn update(&mut self) { + // Update queue statistics cache + // In actual implementation, this would get latest statistics from queue system + } + + pub fn get_bucket_stats(&self, bucket: &str) -> InQueueMetric { + if let Some(bucket_stat) = self.bucket_stats.get(bucket) { + InQueueMetric { + curr: InQueueStats { + bytes: bucket_stat.now_bytes.load(Ordering::Relaxed), + count: bucket_stat.now_count.load(Ordering::Relaxed), + ..Default::default() + }, + avg: InQueueStats::default(), // simplified implementation + max: InQueueStats::default(), // simplified implementation + } + } else { + InQueueMetric::default() + } + } + + pub fn get_site_stats(&self) -> InQueueMetric { + InQueueMetric { + curr: InQueueStats { + bytes: self.sr_queue_stats.now_bytes.load(Ordering::Relaxed), + count: self.sr_queue_stats.now_count.load(Ordering::Relaxed), + ..Default::default() + }, + avg: InQueueStats::default(), // simplified implementation + max: InQueueStats::default(), // simplified implementation + } + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ProxyMetric { + pub get_total: i64, + pub get_failed: i64, + pub put_total: i64, + pub put_failed: i64, + pub head_total: i64, + pub head_failed: i64, +} + +impl ProxyMetric { + pub fn add(&mut self, other: &ProxyMetric) { + self.get_total += other.get_total; + self.get_failed += other.get_failed; + self.put_total += other.put_total; + self.put_failed += other.put_failed; + self.head_total += other.head_total; + self.head_failed += other.head_failed; + } +} + +/// Proxy statistics cache +#[derive(Debug, Clone, Default)] +pub struct ProxyStatsCache { + bucket_stats: HashMap, +} + +impl ProxyStatsCache { + pub fn new() -> Self { + Self::default() + } + + pub fn inc(&mut self, bucket: &str, api: &str, is_err: bool) { + let metric = self.bucket_stats.entry(bucket.to_string()).or_default(); + + match api { + "GetObject" => { + metric.get_total += 1; + if is_err { + metric.get_failed += 1; + } + } + "PutObject" => { + metric.put_total += 1; + if is_err { + metric.put_failed += 1; + } + } + "HeadObject" => { + metric.head_total += 1; + if is_err { + metric.head_failed += 1; + } + } + _ => {} + } + } + + pub fn get_bucket_stats(&self, bucket: &str) -> ProxyMetric { + self.bucket_stats.get(bucket).cloned().unwrap_or_default() + } + + pub fn get_site_stats(&self) -> ProxyMetric { + let mut total = ProxyMetric::default(); + for metric in self.bucket_stats.values() { + total.add(metric); + } + total + } +} + +/// Failure statistics +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct FailStats { + pub count: i64, + pub size: i64, +} + +impl FailStats { + pub fn new() -> Self { + Self::default() + } + + pub fn add_size(&mut self, size: i64, _err: Option<&Error>) { + self.count += 1; + self.size += size; + } + + pub fn merge(&self, other: &FailStats) -> Self { + Self { + count: self.count + other.count, + size: self.size + other.size, + } + } + + pub fn to_metric(&self) -> FailedMetric { + FailedMetric { + count: self.count, + size: self.size, + } + } +} + +/// Failed metric +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct FailedMetric { + pub count: i64, + pub size: i64, +} + +/// Latency statistics +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct LatencyStats { + pub avg: f64, + pub curr: f64, + pub max: f64, +} + +impl LatencyStats { + pub fn new() -> Self { + Self::default() + } + + pub fn update(&mut self, _size: i64, duration: Duration) { + let latency = duration.as_millis() as f64; + self.curr = latency; + if latency > self.max { + self.max = latency; + } + // Simple moving average (simplified implementation) + self.avg = (self.avg + latency) / 2.0; + } + + pub fn merge(&self, other: &LatencyStats) -> Self { + Self { + avg: (self.avg + other.avg) / 2.0, + curr: self.curr.max(other.curr), + max: self.max.max(other.max), + } + } +} + +/// Bucket replication statistics for a single target +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct BucketReplicationStat { + pub replicated_size: i64, + pub replicated_count: i64, + pub failed: FailedMetric, + pub fail_stats: FailStats, + pub latency: LatencyStats, + pub xfer_rate_lrg: XferStats, + pub xfer_rate_sml: XferStats, +} + +impl BucketReplicationStat { + pub fn new() -> Self { + Self::default() + } + + pub fn update_xfer_rate(&mut self, size: i64, duration: Duration) { + // Classify as large or small transfer based on size + if size > 1024 * 1024 { + // > 1MB + self.xfer_rate_lrg.add_size(size, duration); + } else { + self.xfer_rate_sml.add_size(size, duration); + } + } +} + +/// Queue statistics for nodes +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct QueueStats { + pub nodes: Vec, +} + +/// Queue node statistics +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct QueueNode { + pub q_stats: InQueueMetric, +} + +/// Bucket replication statistics +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct BucketReplicationStats { + pub stats: HashMap, + pub replica_size: i64, + pub replica_count: i64, + pub replicated_size: i64, + pub replicated_count: i64, + pub q_stat: InQueueMetric, +} + +impl BucketReplicationStats { + pub fn new() -> Self { + Self::default() + } + + pub fn is_empty(&self) -> bool { + self.stats.is_empty() && self.replica_size == 0 && self.replicated_size == 0 + } + + pub fn has_replication_usage(&self) -> bool { + self.replica_size > 0 || self.replicated_size > 0 || !self.stats.is_empty() + } + + pub fn clone_stats(&self) -> Self { + self.clone() + } +} + +/// Bucket statistics +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct BucketStats { + pub uptime: i64, + pub replication_stats: BucketReplicationStats, + pub queue_stats: QueueStats, + pub proxy_stats: ProxyMetric, +} + +/// Site replication metrics summary +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct SRMetricsSummary { + pub uptime: i64, + pub queued: InQueueMetric, + pub active_workers: ActiveWorkerStat, + pub metrics: HashMap, + pub proxied: ProxyMetric, + pub replica_size: i64, + pub replica_count: i64, +} + +/// Active worker statistics +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ActiveWorkerStat { + pub curr: i32, + pub max: i32, + pub avg: f64, +} + +impl ActiveWorkerStat { + pub fn new() -> Self { + Self::default() + } + + pub fn get(&self) -> Self { + self.clone() + } + + pub fn update(&mut self) { + // Simulate worker statistics update logic + // In actual implementation, this would get current active count from worker pool + } +} + +/// Global replication statistics +#[derive(Debug)] +pub struct ReplicationStats { + // Site replication statistics - maintain global level statistics + pub sr_stats: Arc, + // Active worker statistics + pub workers: Arc>, + // Queue statistics cache + pub q_cache: Arc>, + // Proxy statistics cache + pub p_cache: Arc>, + // MRF backlog statistics (simplified) + pub mrf_stats: HashMap, + // Bucket replication cache + pub cache: Arc>>, + pub most_recent_stats: Arc>>, +} + +impl ReplicationStats { + pub fn new() -> Self { + Self { + sr_stats: Arc::new(SRStats::new()), + workers: Arc::new(Mutex::new(ActiveWorkerStat::new())), + q_cache: Arc::new(Mutex::new(QueueCache::new())), + p_cache: Arc::new(Mutex::new(ProxyStatsCache::new())), + mrf_stats: HashMap::new(), + cache: Arc::new(RwLock::new(HashMap::new())), + most_recent_stats: Arc::new(Mutex::new(HashMap::new())), + } + } + + /// Initialize background tasks + pub async fn start_background_tasks(&self) { + // Start moving average calculation task + let cache_clone = Arc::clone(&self.cache); + tokio::spawn(async move { + let mut interval = interval(Duration::from_secs(5)); + loop { + interval.tick().await; + Self::update_moving_avg_static(&cache_clone).await; + } + }); + + // Start worker statistics collection task + let workers_clone = Arc::clone(&self.workers); + tokio::spawn(async move { + let mut interval = interval(Duration::from_secs(2)); + loop { + interval.tick().await; + let mut workers = workers_clone.lock().await; + workers.update(); + } + }); + + // Start queue statistics collection task + let q_cache_clone = Arc::clone(&self.q_cache); + tokio::spawn(async move { + let mut interval = interval(Duration::from_secs(2)); + loop { + interval.tick().await; + let mut cache = q_cache_clone.lock().await; + cache.update(); + } + }); + } + + async fn update_moving_avg_static(cache: &Arc>>) { + // This is a simplified implementation + // In actual implementation, exponential moving averages need to be updated + let now = SystemTime::now(); + + let cache_read = cache.read().await; + for (_bucket, stats) in cache_read.iter() { + for stat in stats.stats.values() { + // Now we can update the moving averages using interior mutability + stat.xfer_rate_lrg.measure.update_exponential_moving_average(now); + stat.xfer_rate_sml.measure.update_exponential_moving_average(now); + } + } + } + + /// Check if bucket replication statistics have usage + pub fn has_replication_usage(&self, bucket: &str) -> bool { + if let Ok(cache) = self.cache.try_read() { + if let Some(stats) = cache.get(bucket) { + return stats.has_replication_usage(); + } + } + false + } + + /// Get active worker statistics + pub fn active_workers(&self) -> ActiveWorkerStat { + // This should be called from an async context + // For now, use try_lock to avoid blocking + self.workers.try_lock().map(|w| w.get()).unwrap_or_default() + } + + /// Delete bucket's memory replication statistics + pub async fn delete(&self, bucket: &str) { + let mut cache = self.cache.write().await; + cache.remove(bucket); + } + + /// Update replica statistics + pub async fn update_replica_stat(&self, bucket: &str, size: i64) { + let mut cache = self.cache.write().await; + let stats = cache.entry(bucket.to_string()).or_insert_with(BucketReplicationStats::new); + + stats.replica_size += size; + stats.replica_count += 1; + + // Update site replication statistics + self.sr_stats.replica_size.fetch_add(size, Ordering::Relaxed); + self.sr_stats.replica_count.fetch_add(1, Ordering::Relaxed); + } + + /// Site replication update replica statistics + fn sr_update_replica_stat(&self, size: i64) { + self.sr_stats.replica_size.fetch_add(size, Ordering::Relaxed); + self.sr_stats.replica_count.fetch_add(1, Ordering::Relaxed); + } + + /// Site replication update + fn sr_update(&self, rs: &ReplStat) { + // In actual implementation, deployment ID would be obtained here + let depl_id = "default"; // simplified implementation + self.sr_stats.update(rs, depl_id); + } + + /// Update replication statistics + pub async fn update( + &self, + bucket: &str, + ri: &ReplicatedTargetInfo, + status: ReplicationStatusType, + prev_status: ReplicationStatusType, + ) { + let mut rs = ReplStat::new(); + + match status { + ReplicationStatusType::Pending => { + if ri.op_type.is_data_replication() && prev_status != status { + rs.set( + ri.arn.clone(), + ri.size, + Duration::default(), + status, + ri.op_type, + ri.endpoint.clone(), + ri.secure, + ri.error.as_ref().map(|e| crate::error::Error::other(e.clone())), + ); + } + } + ReplicationStatusType::Completed => { + if ri.op_type.is_data_replication() { + rs.set( + ri.arn.clone(), + ri.size, + ri.duration, + status, + ri.op_type, + ri.endpoint.clone(), + ri.secure, + ri.error.as_ref().map(|e| crate::error::Error::other(e.clone())), + ); + } + } + ReplicationStatusType::Failed => { + if ri.op_type.is_data_replication() && prev_status == ReplicationStatusType::Pending { + rs.set( + ri.arn.clone(), + ri.size, + ri.duration, + status, + ri.op_type, + ri.endpoint.clone(), + ri.secure, + ri.error.as_ref().map(|e| crate::error::Error::other(e.clone())), + ); + } + } + ReplicationStatusType::Replica => { + if ri.op_type == ReplicationType::Object { + rs.set( + ri.arn.clone(), + ri.size, + Duration::default(), + status, + ri.op_type, + String::new(), + false, + ri.error.as_ref().map(|e| crate::error::Error::other(e.clone())), + ); + } + } + _ => {} + } + + // Update site replication memory statistics + if rs.completed || rs.failed { + self.sr_update(&rs); + } + + // Update bucket replication memory statistics + let mut cache = self.cache.write().await; + let bucket_stats = cache.entry(bucket.to_string()).or_insert_with(BucketReplicationStats::new); + + let stat = bucket_stats + .stats + .entry(ri.arn.clone()) + .or_insert_with(|| BucketReplicationStat { + xfer_rate_lrg: XferStats::new(), + xfer_rate_sml: XferStats::new(), + ..Default::default() + }); + + match (rs.completed, rs.failed, rs.pending) { + (true, false, false) => { + stat.replicated_size += rs.transfer_size; + stat.replicated_count += 1; + if rs.transfer_duration > Duration::default() { + stat.latency.update(rs.transfer_size, rs.transfer_duration); + stat.update_xfer_rate(rs.transfer_size, rs.transfer_duration); + } + } + (false, true, false) => { + stat.fail_stats.add_size(rs.transfer_size, rs.err.as_ref()); + } + (false, false, true) => { + // Pending status, no processing for now + } + _ => {} + } + } + + /// Get replication metrics for all buckets + pub async fn get_all(&self) -> HashMap { + let cache = self.cache.read().await; + let mut result = HashMap::new(); + + for (bucket, stats) in cache.iter() { + let mut cloned_stats = stats.clone_stats(); + // Add queue statistics + let q_cache = self.q_cache.lock().await; + cloned_stats.q_stat = q_cache.get_bucket_stats(bucket); + result.insert(bucket.clone(), cloned_stats); + } + + result + } + + /// Get replication metrics for a single bucket + pub async fn get(&self, bucket: &str) -> BucketReplicationStats { + let cache = self.cache.read().await; + if let Some(stats) = cache.get(bucket) { + stats.clone_stats() + } else { + BucketReplicationStats::new() + } + } + + /// Get metrics summary for site replication node + pub async fn get_sr_metrics_for_node(&self) -> SRMetricsSummary { + let boot_time = SystemTime::UNIX_EPOCH; // simplified implementation + let uptime = SystemTime::now().duration_since(boot_time).unwrap_or_default().as_secs() as i64; + + let q_cache = self.q_cache.lock().await; + let queued = q_cache.get_site_stats(); + + let p_cache = self.p_cache.lock().await; + let proxied = p_cache.get_site_stats(); + + SRMetricsSummary { + uptime, + queued, + active_workers: self.active_workers(), + metrics: self.sr_stats.get(), + proxied, + replica_size: self.sr_stats.replica_size.load(Ordering::Relaxed), + replica_count: self.sr_stats.replica_count.load(Ordering::Relaxed), + } + } + + /// Calculate bucket replication statistics + pub async fn calculate_bucket_replication_stats(&self, bucket: &str, bucket_stats: Vec) -> BucketStats { + if bucket_stats.is_empty() { + return BucketStats { + uptime: 0, + replication_stats: BucketReplicationStats::new(), + queue_stats: Default::default(), + proxy_stats: ProxyMetric::default(), + }; + } + + // Accumulate cluster bucket statistics + let mut stats = HashMap::new(); + let mut tot_replica_size = 0i64; + let mut tot_replica_count = 0i64; + let mut tot_replicated_size = 0i64; + let mut tot_replicated_count = 0i64; + let mut tq = InQueueMetric::default(); + + for bucket_stat in &bucket_stats { + tot_replica_size += bucket_stat.replication_stats.replica_size; + tot_replica_count += bucket_stat.replication_stats.replica_count; + + for q in &bucket_stat.queue_stats.nodes { + tq = tq.merge(&q.q_stats); + } + + for (arn, stat) in &bucket_stat.replication_stats.stats { + let old_stat = stats.entry(arn.clone()).or_insert_with(|| BucketReplicationStat { + xfer_rate_lrg: XferStats::new(), + xfer_rate_sml: XferStats::new(), + ..Default::default() + }); + + let f_stats = stat.fail_stats.merge(&old_stat.fail_stats); + let lrg = old_stat.xfer_rate_lrg.merge(&stat.xfer_rate_lrg); + let sml = old_stat.xfer_rate_sml.merge(&stat.xfer_rate_sml); + + *old_stat = BucketReplicationStat { + failed: f_stats.to_metric(), + fail_stats: f_stats, + replicated_size: stat.replicated_size + old_stat.replicated_size, + replicated_count: stat.replicated_count + old_stat.replicated_count, + latency: stat.latency.merge(&old_stat.latency), + xfer_rate_lrg: lrg, + xfer_rate_sml: sml, + }; + + tot_replicated_size += stat.replicated_size; + tot_replicated_count += stat.replicated_count; + } + } + + let s = BucketReplicationStats { + stats, + q_stat: tq, + replica_size: tot_replica_size, + replica_count: tot_replica_count, + replicated_size: tot_replicated_size, + replicated_count: tot_replicated_count, + }; + + let qs = Default::default(); + let mut ps = ProxyMetric::default(); + + for bs in &bucket_stats { + // qs.nodes.extend(bs.queue_stats.nodes.clone()); // simplified implementation + ps.add(&bs.proxy_stats); + } + + let uptime = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as i64; + + let bs = BucketStats { + uptime, + replication_stats: s, + queue_stats: qs, + proxy_stats: ps, + }; + + // Update recent statistics + let mut recent_stats = self.most_recent_stats.lock().await; + if !bs.replication_stats.stats.is_empty() { + recent_stats.insert(bucket.to_string(), bs.clone()); + } + + bs + } + + /// Get latest replication statistics + pub async fn get_latest_replication_stats(&self, bucket: &str) -> BucketStats { + // In actual implementation, statistics would be obtained from cluster + // This is simplified to get from local cache + let cache = self.cache.read().await; + if let Some(stats) = cache.get(bucket) { + BucketStats { + uptime: SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as i64, + replication_stats: stats.clone_stats(), + queue_stats: Default::default(), + proxy_stats: ProxyMetric::default(), + } + } else { + BucketStats { + uptime: 0, + replication_stats: BucketReplicationStats::new(), + queue_stats: Default::default(), + proxy_stats: ProxyMetric::default(), + } + } + } + + /// Increase queue statistics + pub async fn inc_q(&self, bucket: &str, size: i64, _is_delete_repl: bool, _op_type: ReplicationType) { + let mut q_cache = self.q_cache.lock().await; + let stats = q_cache + .bucket_stats + .entry(bucket.to_string()) + .or_insert_with(InQueueStats::default); + stats.now_bytes.fetch_add(size, Ordering::Relaxed); + stats.now_count.fetch_add(1, Ordering::Relaxed); + + q_cache.sr_queue_stats.now_bytes.fetch_add(size, Ordering::Relaxed); + q_cache.sr_queue_stats.now_count.fetch_add(1, Ordering::Relaxed); + } + + /// Decrease queue statistics + pub async fn dec_q(&self, bucket: &str, size: i64, _is_del_marker: bool, _op_type: ReplicationType) { + let mut q_cache = self.q_cache.lock().await; + let stats = q_cache + .bucket_stats + .entry(bucket.to_string()) + .or_insert_with(InQueueStats::default); + stats.now_bytes.fetch_sub(size, Ordering::Relaxed); + stats.now_count.fetch_sub(1, Ordering::Relaxed); + + q_cache.sr_queue_stats.now_bytes.fetch_sub(size, Ordering::Relaxed); + q_cache.sr_queue_stats.now_count.fetch_sub(1, Ordering::Relaxed); + } + + /// Increase proxy metrics + pub async fn inc_proxy(&self, bucket: &str, api: &str, is_err: bool) { + let mut p_cache = self.p_cache.lock().await; + p_cache.inc(bucket, api, is_err); + } + + /// Get proxy statistics + pub async fn get_proxy_stats(&self, bucket: &str) -> ProxyMetric { + let p_cache = self.p_cache.lock().await; + p_cache.get_bucket_stats(bucket) + } +} + +impl Default for ReplicationStats { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_replication_stats_new() { + let stats = ReplicationStats::new(); + let workers = stats.active_workers(); + assert_eq!(workers.curr, 0); + } + + #[tokio::test] + async fn test_delete_bucket_stats() { + let stats = ReplicationStats::new(); + stats.delete("test-bucket").await; + + let bucket_stats = stats.get("test-bucket").await; + assert!(bucket_stats.is_empty()); + } + + #[tokio::test] + async fn test_update_replica_stat() { + let stats = ReplicationStats::new(); + stats.update_replica_stat("test-bucket", 1024).await; + + let bucket_stats = stats.get("test-bucket").await; + assert_eq!(bucket_stats.replica_size, 1024); + assert_eq!(bucket_stats.replica_count, 1); + } + + #[tokio::test] + async fn test_replication_stats_update() { + let stats = ReplicationStats::new(); + + let target_info = ReplicatedTargetInfo { + arn: "test-arn".to_string(), + size: 1024, + duration: Duration::from_secs(1), + op_type: ReplicationType::Object, + endpoint: "test.example.com".to_string(), + secure: true, + error: None, + ..Default::default() + }; + + stats + .update( + "test-bucket", + &target_info, + ReplicationStatusType::Completed, + ReplicationStatusType::Pending, + ) + .await; + + let bucket_stats = stats.get("test-bucket").await; + assert!(!bucket_stats.is_empty()); + assert!(bucket_stats.stats.contains_key("test-arn")); + + let stat = &bucket_stats.stats["test-arn"]; + assert_eq!(stat.replicated_size, 1024); + assert_eq!(stat.replicated_count, 1); + } + + #[test] + fn test_sr_stats() { + let sr_stats = SRStats::new(); + let initial_size = sr_stats.replica_size.load(Ordering::Relaxed); + let initial_count = sr_stats.replica_count.load(Ordering::Relaxed); + + assert_eq!(initial_size, 0); + assert_eq!(initial_count, 0); + + let stats_map = sr_stats.get(); + assert_eq!(stats_map["replica_size"], 0); + assert_eq!(stats_map["replica_count"], 0); + } +} diff --git a/crates/ecstore/src/bucket/replication/replication_type.rs b/crates/ecstore/src/bucket/replication/replication_type.rs new file mode 100644 index 00000000..c474d508 --- /dev/null +++ b/crates/ecstore/src/bucket/replication/replication_type.rs @@ -0,0 +1,470 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::error::{Error, Result}; +use crate::store_api::ObjectInfo; + +use regex::Regex; + +use rustfs_filemeta::VersionPurgeStatusType; +use rustfs_filemeta::{ReplicatedInfos, ReplicationType}; +use rustfs_filemeta::{ReplicationState, ReplicationStatusType}; +use rustfs_utils::http::RESERVED_METADATA_PREFIX_LOWER; +use rustfs_utils::http::RUSTFS_REPLICATION_RESET_STATUS; +use serde::{Deserialize, Serialize}; +use std::any::Any; +use std::collections::HashMap; +use std::fmt; +use time::OffsetDateTime; +use uuid::Uuid; + +pub const REPLICATION_RESET: &str = "replication-reset"; +pub const REPLICATION_STATUS: &str = "replication-status"; + +// ReplicateQueued - replication being queued trail +pub const REPLICATE_QUEUED: &str = "replicate:queue"; + +// ReplicateExisting - audit trail for existing objects replication +pub const REPLICATE_EXISTING: &str = "replicate:existing"; +// ReplicateExistingDelete - audit trail for delete replication triggered for existing delete markers +pub const REPLICATE_EXISTING_DELETE: &str = "replicate:existing:delete"; + +// ReplicateMRF - audit trail for replication from Most Recent Failures (MRF) queue +pub const REPLICATE_MRF: &str = "replicate:mrf"; +// ReplicateIncoming - audit trail of inline replication +pub const REPLICATE_INCOMING: &str = "replicate:incoming"; +// ReplicateIncomingDelete - audit trail of inline replication of deletes. +pub const REPLICATE_INCOMING_DELETE: &str = "replicate:incoming:delete"; + +// ReplicateHeal - audit trail for healing of failed/pending replications +pub const REPLICATE_HEAL: &str = "replicate:heal"; +// ReplicateHealDelete - audit trail of healing of failed/pending delete replications. +pub const REPLICATE_HEAL_DELETE: &str = "replicate:heal:delete"; + +#[derive(Serialize, Deserialize, Debug)] +pub struct MrfReplicateEntry { + #[serde(rename = "bucket")] + pub bucket: String, + + #[serde(rename = "object")] + pub object: String, + + #[serde(skip_serializing, skip_deserializing)] + pub version_id: Option, + + #[serde(rename = "retryCount")] + pub retry_count: i32, + + #[serde(skip_serializing, skip_deserializing)] + pub size: i64, +} + +pub trait ReplicationWorkerOperation: Any + Send + Sync { + fn to_mrf_entry(&self) -> MrfReplicateEntry; + fn as_any(&self) -> &dyn Any; + fn get_bucket(&self) -> &str; + fn get_object(&self) -> &str; + fn get_size(&self) -> i64; + fn is_delete_marker(&self) -> bool; + fn get_op_type(&self) -> ReplicationType; +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ReplicateTargetDecision { + pub replicate: bool, + pub synchronous: bool, + pub arn: String, + pub id: String, +} + +impl ReplicateTargetDecision { + pub fn new(arn: String, replicate: bool, sync: bool) -> Self { + Self { + replicate, + synchronous: sync, + arn, + id: String::new(), + } + } +} + +impl fmt::Display for ReplicateTargetDecision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{};{};{};{}", self.replicate, self.synchronous, self.arn, self.id) + } +} + +/// ReplicateDecision represents replication decision for each target +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReplicateDecision { + pub targets_map: HashMap, +} + +impl ReplicateDecision { + pub fn new() -> Self { + Self { + targets_map: HashMap::new(), + } + } + + /// Returns true if at least one target qualifies for replication + pub fn replicate_any(&self) -> bool { + self.targets_map.values().any(|t| t.replicate) + } + + /// Returns true if at least one target qualifies for synchronous replication + pub fn is_synchronous(&self) -> bool { + self.targets_map.values().any(|t| t.synchronous) + } + + /// Updates ReplicateDecision with target's replication decision + pub fn set(&mut self, target: ReplicateTargetDecision) { + self.targets_map.insert(target.arn.clone(), target); + } + + /// Returns a stringified representation of internal replication status with all targets marked as `PENDING` + pub fn pending_status(&self) -> Option { + let mut result = String::new(); + for target in self.targets_map.values() { + if target.replicate { + result.push_str(&format!("{}={};", target.arn, ReplicationStatusType::Pending.as_str())); + } + } + if result.is_empty() { None } else { Some(result) } + } +} + +impl fmt::Display for ReplicateDecision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut result = String::new(); + for (key, value) in &self.targets_map { + result.push_str(&format!("{key}={value},")); + } + write!(f, "{}", result.trim_end_matches(',')) + } +} + +impl Default for ReplicateDecision { + fn default() -> Self { + Self::new() + } +} + +// parse k-v pairs of target ARN to stringified ReplicateTargetDecision delimited by ',' into a +// ReplicateDecision struct +pub fn parse_replicate_decision(_bucket: &str, s: &str) -> Result { + let mut decision = ReplicateDecision::new(); + + if s.is_empty() { + return Ok(decision); + } + + for p in s.split(',') { + if p.is_empty() { + continue; + } + + let slc = p.split('=').collect::>(); + if slc.len() != 2 { + return Err(Error::other(format!("invalid replicate decision format: {s}"))); + } + + let tgt_str = slc[1].trim_matches('"'); + let tgt = tgt_str.split(';').collect::>(); + if tgt.len() != 4 { + return Err(Error::other(format!("invalid replicate decision format: {s}"))); + } + + let tgt = ReplicateTargetDecision { + replicate: tgt[0] == "true", + synchronous: tgt[1] == "true", + arn: tgt[2].to_string(), + id: tgt[3].to_string(), + }; + decision.targets_map.insert(slc[0].to_string(), tgt); + } + + Ok(decision) + + // r = ReplicateDecision{ + // targetsMap: make(map[string]replicateTargetDecision), + // } + // if len(s) == 0 { + // return + // } + // for _, p := range strings.Split(s, ",") { + // if p == "" { + // continue + // } + // slc := strings.Split(p, "=") + // if len(slc) != 2 { + // return r, errInvalidReplicateDecisionFormat + // } + // tgtStr := strings.TrimSuffix(strings.TrimPrefix(slc[1], `"`), `"`) + // tgt := strings.Split(tgtStr, ";") + // if len(tgt) != 4 { + // return r, errInvalidReplicateDecisionFormat + // } + // r.targetsMap[slc[0]] = replicateTargetDecision{Replicate: tgt[0] == "true", Synchronous: tgt[1] == "true", Arn: tgt[2], ID: tgt[3]} + // } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ResyncTargetDecision { + pub replicate: bool, + pub reset_id: String, + pub reset_before_date: Option, +} + +pub fn target_reset_header(arn: &str) -> String { + format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}-{arn}") +} + +impl ResyncTargetDecision { + pub fn resync_target( + oi: &ObjectInfo, + arn: &str, + reset_id: &str, + reset_before_date: Option, + status: ReplicationStatusType, + ) -> Self { + let rs = oi + .user_defined + .get(target_reset_header(arn).as_str()) + .or(oi.user_defined.get(RUSTFS_REPLICATION_RESET_STATUS)) + .map(|s| s.to_string()); + + let mut dec = Self::default(); + + let mod_time = oi.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH); + + if rs.is_none() { + let reset_before_date = reset_before_date.unwrap_or(OffsetDateTime::UNIX_EPOCH); + if !reset_id.is_empty() && mod_time < reset_before_date { + dec.replicate = true; + return dec; + } + + dec.replicate = status == ReplicationStatusType::Empty; + + return dec; + } + + if reset_id.is_empty() || reset_before_date.is_none() { + return dec; + } + + let rs = rs.unwrap(); + let reset_before_date = reset_before_date.unwrap(); + + let parts: Vec<&str> = rs.splitn(2, ';').collect(); + + if parts.len() != 2 { + return dec; + } + + let new_reset = parts[0] == reset_id; + + if !new_reset && status == ReplicationStatusType::Completed { + return dec; + } + + dec.replicate = new_reset && mod_time < reset_before_date; + + dec + } +} + +/// ResyncDecision is a struct representing a map with target's individual resync decisions +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ResyncDecision { + pub targets: HashMap, +} + +impl ResyncDecision { + pub fn new() -> Self { + Self { targets: HashMap::new() } + } + + /// Returns true if no targets with resync decision present + pub fn is_empty(&self) -> bool { + self.targets.is_empty() + } + + pub fn must_resync(&self) -> bool { + self.targets.values().any(|v| v.replicate) + } + + pub fn must_resync_target(&self, tgt_arn: &str) -> bool { + self.targets.get(tgt_arn).map(|v| v.replicate).unwrap_or(false) + } +} + +impl Default for ResyncDecision { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReplicateObjectInfo { + pub name: String, + pub size: i64, + pub actual_size: i64, + pub bucket: String, + pub version_id: Option, + pub etag: Option, + pub mod_time: Option, + pub replication_status: ReplicationStatusType, + pub replication_status_internal: Option, + pub delete_marker: bool, + pub version_purge_status_internal: Option, + pub version_purge_status: VersionPurgeStatusType, + pub replication_state: Option, + pub op_type: ReplicationType, + pub event_type: String, + pub dsc: ReplicateDecision, + pub existing_obj_resync: ResyncDecision, + pub target_statuses: HashMap, + pub target_purge_statuses: HashMap, + pub replication_timestamp: Option, + pub ssec: bool, + pub user_tags: String, + pub checksum: Vec, + pub retry_count: u32, +} + +impl ReplicationWorkerOperation for ReplicateObjectInfo { + fn as_any(&self) -> &dyn Any { + self + } + + fn to_mrf_entry(&self) -> MrfReplicateEntry { + MrfReplicateEntry { + bucket: self.bucket.clone(), + object: self.name.clone(), + version_id: self.version_id, + retry_count: self.retry_count as i32, + size: self.size, + } + } + + fn get_bucket(&self) -> &str { + &self.bucket + } + + fn get_object(&self) -> &str { + &self.name + } + + fn get_size(&self) -> i64 { + self.size + } + + fn is_delete_marker(&self) -> bool { + self.delete_marker + } + + fn get_op_type(&self) -> ReplicationType { + self.op_type + } +} + +lazy_static::lazy_static! { + static ref REPL_STATUS_REGEX: Regex = Regex::new(r"([^=].*?)=([^,].*?);").unwrap(); +} + +impl ReplicateObjectInfo { + /// Returns replication status of a target + pub fn target_replication_status(&self, arn: &str) -> ReplicationStatusType { + let binding = self.replication_status_internal.clone().unwrap_or_default(); + let captures = REPL_STATUS_REGEX.captures_iter(&binding); + for cap in captures { + if cap.len() == 3 && &cap[1] == arn { + return ReplicationStatusType::from(&cap[2]); + } + } + ReplicationStatusType::default() + } + + /// Returns the relevant info needed by MRF + pub fn to_mrf_entry(&self) -> MrfReplicateEntry { + MrfReplicateEntry { + bucket: self.bucket.clone(), + object: self.name.clone(), + version_id: self.version_id, + retry_count: self.retry_count as i32, + size: self.size, + } + } +} + +// constructs a replication status map from string representation +pub fn replication_statuses_map(s: &str) -> HashMap { + let mut targets = HashMap::new(); + let rep_stat_matches = REPL_STATUS_REGEX.captures_iter(s).map(|c| c.extract()); + for (_, [arn, status]) in rep_stat_matches { + if arn.is_empty() { + continue; + } + let status = ReplicationStatusType::from(status); + targets.insert(arn.to_string(), status); + } + targets +} + +// constructs a version purge status map from string representation +pub fn version_purge_statuses_map(s: &str) -> HashMap { + let mut targets = HashMap::new(); + let purge_status_matches = REPL_STATUS_REGEX.captures_iter(s).map(|c| c.extract()); + for (_, [arn, status]) in purge_status_matches { + if arn.is_empty() { + continue; + } + let status = VersionPurgeStatusType::from(status); + targets.insert(arn.to_string(), status); + } + targets +} + +pub fn get_replication_state(rinfos: &ReplicatedInfos, prev_state: &ReplicationState, _vid: Option) -> ReplicationState { + let reset_status_map: Vec<(String, String)> = rinfos + .targets + .iter() + .filter(|v| !v.resync_timestamp.is_empty()) + .map(|t| (target_reset_header(t.arn.as_str()), t.resync_timestamp.clone())) + .collect(); + + let repl_statuses = rinfos.replication_status_internal(); + let vpurge_statuses = rinfos.version_purge_status_internal(); + + let mut reset_statuses_map = prev_state.reset_statuses_map.clone(); + for (key, value) in reset_status_map { + reset_statuses_map.insert(key, value); + } + + ReplicationState { + replicate_decision_str: prev_state.replicate_decision_str.clone(), + reset_statuses_map, + replica_timestamp: prev_state.replica_timestamp, + replica_status: prev_state.replica_status.clone(), + targets: replication_statuses_map(&repl_statuses.clone().unwrap_or_default()), + replication_status_internal: repl_statuses, + replication_timestamp: rinfos.replication_timestamp, + purge_targets: version_purge_statuses_map(&vpurge_statuses.clone().unwrap_or_default()), + version_purge_status_internal: vpurge_statuses, + + ..Default::default() + } +} diff --git a/crates/ecstore/src/bucket/replication/rule.rs b/crates/ecstore/src/bucket/replication/rule.rs new file mode 100644 index 00000000..136c5480 --- /dev/null +++ b/crates/ecstore/src/bucket/replication/rule.rs @@ -0,0 +1,51 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use s3s::dto::ReplicaModificationsStatus; +use s3s::dto::ReplicationRule; + +use super::ObjectOpts; + +pub trait ReplicationRuleExt { + fn prefix(&self) -> &str; + fn metadata_replicate(&self, obj: &ObjectOpts) -> bool; +} + +impl ReplicationRuleExt for ReplicationRule { + fn prefix(&self) -> &str { + if let Some(filter) = &self.filter { + if let Some(prefix) = &filter.prefix { + prefix + } else if let Some(and) = &filter.and { + and.prefix.as_deref().unwrap_or("") + } else { + "" + } + } else { + "" + } + } + + fn metadata_replicate(&self, obj: &ObjectOpts) -> bool { + if !obj.replica { + return true; + } + + self.source_selection_criteria.as_ref().is_some_and(|s| { + s.replica_modifications + .clone() + .is_some_and(|r| r.status == ReplicaModificationsStatus::from_static(ReplicaModificationsStatus::ENABLED)) + }) + } +} diff --git a/crates/ecstore/src/bucket/tagging/mod.rs b/crates/ecstore/src/bucket/tagging/mod.rs index bcce2377..62e428a4 100644 --- a/crates/ecstore/src/bucket/tagging/mod.rs +++ b/crates/ecstore/src/bucket/tagging/mod.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use s3s::dto::Tag; use url::form_urlencoded; @@ -34,6 +36,20 @@ pub fn decode_tags(tags: &str) -> Vec { list } +pub fn decode_tags_to_map(tags: &str) -> HashMap { + let mut list = HashMap::new(); + + for (k, v) in form_urlencoded::parse(tags.as_bytes()) { + if k.is_empty() || v.is_empty() { + continue; + } + + list.insert(k.to_string(), v.to_string()); + } + + list +} + pub fn encode_tags(tags: Vec) -> String { let mut encoded = form_urlencoded::Serializer::new(String::new()); diff --git a/crates/ecstore/src/bucket/target/arn.rs b/crates/ecstore/src/bucket/target/arn.rs new file mode 100644 index 00000000..a9104077 --- /dev/null +++ b/crates/ecstore/src/bucket/target/arn.rs @@ -0,0 +1,66 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::BucketTargetType; +use std::fmt::Display; +use std::str::FromStr; + +pub struct ARN { + pub arn_type: BucketTargetType, + pub id: String, + pub region: String, + pub bucket: String, +} + +impl ARN { + pub fn new(arn_type: BucketTargetType, id: String, region: String, bucket: String) -> Self { + Self { + arn_type, + id, + region, + bucket, + } + } + + pub fn is_empty(&self) -> bool { + self.arn_type.is_valid() + } +} + +impl Display for ARN { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "arn:rustfs:{}:{}:{}:{}", self.arn_type, self.region, self.id, self.bucket) + } +} + +impl FromStr for ARN { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result { + if !s.starts_with("arn:rustfs:") { + return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid ARN format")); + } + + let parts: Vec<&str> = s.split(':').collect(); + if parts.len() != 6 { + return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid ARN format")); + } + Ok(ARN { + arn_type: BucketTargetType::from_str(parts[2]).unwrap_or_default(), + id: parts[3].to_string(), + region: parts[4].to_string(), + bucket: parts[5].to_string(), + }) + } +} diff --git a/crates/ecstore/src/bucket/target/bucket_target.rs b/crates/ecstore/src/bucket/target/bucket_target.rs new file mode 100644 index 00000000..779997f9 --- /dev/null +++ b/crates/ecstore/src/bucket/target/bucket_target.rs @@ -0,0 +1,800 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::error::{Error, Result}; +use rmp_serde::Serializer as rmpSerializer; +use serde::{Deserialize, Serialize}; +use std::{ + fmt::{self, Display}, + str::FromStr, + time::Duration, +}; +use time::OffsetDateTime; +use url::Url; + +#[derive(Debug, Deserialize, Serialize, Default, Clone)] +pub struct Credentials { + #[serde(rename = "accessKey")] + pub access_key: String, + #[serde(rename = "secretKey")] + pub secret_key: String, + pub session_token: Option, + pub expiration: Option>, +} + +#[derive(Debug, Deserialize, Serialize, Default, Clone)] +pub enum ServiceType { + #[default] + Replication, +} + +#[derive(Debug, Deserialize, Serialize, Default, Clone)] +pub struct LatencyStat { + #[serde(with = "duration_milliseconds")] + pub curr: Duration, // Current latency + #[serde(with = "duration_milliseconds")] + pub avg: Duration, // Average latency + #[serde(with = "duration_milliseconds")] + pub max: Duration, // Maximum latency +} + +mod duration_milliseconds { + use serde::{Deserialize, Deserializer, Serializer}; + use std::time::Duration; + + pub fn serialize(duration: &Duration, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_u64(duration.as_millis() as u64) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let millis = u64::deserialize(deserializer)?; + Ok(Duration::from_millis(millis)) + } +} + +mod duration_seconds { + use serde::{Deserialize, Deserializer, Serializer}; + use std::time::Duration; + + pub fn serialize(duration: &Duration, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_u64(duration.as_secs()) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let secs = u64::deserialize(deserializer)?; + Ok(Duration::from_secs(secs)) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)] +pub enum BucketTargetType { + #[default] + None, + #[serde(rename = "replication")] + ReplicationService, + #[serde(rename = "ilm")] + IlmService, +} + +impl BucketTargetType { + pub fn is_valid(&self) -> bool { + match self { + BucketTargetType::None => false, + BucketTargetType::ReplicationService | BucketTargetType::IlmService => true, + } + } +} + +impl FromStr for BucketTargetType { + type Err = std::io::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "replication" => Ok(BucketTargetType::ReplicationService), + "ilm" => Ok(BucketTargetType::IlmService), + _ => Ok(BucketTargetType::None), + } + } +} + +impl fmt::Display for BucketTargetType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BucketTargetType::None => write!(f, ""), + BucketTargetType::ReplicationService => write!(f, "replication"), + BucketTargetType::IlmService => write!(f, "ilm"), + } + } +} + +// Define BucketTarget structure +#[derive(Debug, Deserialize, Serialize, Default, Clone)] +pub struct BucketTarget { + #[serde(rename = "sourcebucket", default)] + pub source_bucket: String, + + #[serde(default)] + pub endpoint: String, + + #[serde(default)] + pub credentials: Option, + #[serde(rename = "targetbucket", default)] + pub target_bucket: String, + + #[serde(default)] + pub secure: bool, + #[serde(default)] + pub path: String, + #[serde(default)] + pub api: String, + #[serde(default)] + pub arn: String, + #[serde(rename = "type", default)] + pub target_type: BucketTargetType, + + #[serde(default)] + pub region: String, + + #[serde(alias = "bandwidth", default)] + pub bandwidth_limit: i64, + + #[serde(rename = "replicationSync", default)] + pub replication_sync: bool, + #[serde(default)] + pub storage_class: String, + #[serde(rename = "healthCheckDuration", with = "duration_seconds", default)] + pub health_check_duration: Duration, + #[serde(rename = "disableProxy", default)] + pub disable_proxy: bool, + + #[serde(rename = "resetBeforeDate", with = "time::serde::rfc3339::option", default)] + pub reset_before_date: Option, + #[serde(default)] + pub reset_id: String, + #[serde(rename = "totalDowntime", with = "duration_seconds", default)] + pub total_downtime: Duration, + + #[serde(rename = "lastOnline", with = "time::serde::rfc3339::option", default)] + pub last_online: Option, + #[serde(rename = "isOnline", default)] + pub online: bool, + + #[serde(default)] + pub latency: LatencyStat, + + #[serde(default)] + pub deployment_id: String, + + #[serde(default)] + pub edge: bool, + #[serde(rename = "edgeSyncBeforeExpiry", default)] + pub edge_sync_before_expiry: bool, + #[serde(rename = "offlineCount", default)] + pub offline_count: u64, +} + +impl BucketTarget { + pub fn is_empty(self) -> bool { + self.target_bucket.is_empty() && self.endpoint.is_empty() && self.arn.is_empty() + } + pub fn url(&self) -> Result { + let scheme = if self.secure { "https" } else { "http" }; + Url::parse(&format!("{}://{}", scheme, self.endpoint)).map_err(Error::other) + } +} + +impl Display for BucketTarget { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ", self.endpoint)?; + write!(f, "{}", self.target_bucket.clone())?; + Ok(()) + } +} + +#[derive(Debug, Deserialize, Serialize, Default, Clone)] +pub struct BucketTargets { + pub targets: Vec, +} + +impl BucketTargets { + pub fn marshal_msg(&self) -> Result> { + let mut buf = Vec::new(); + + self.serialize(&mut rmpSerializer::new(&mut buf).with_struct_map())?; + + Ok(buf) + } + + pub fn unmarshal(buf: &[u8]) -> Result { + let t: BucketTargets = rmp_serde::from_slice(buf)?; + Ok(t) + } + + pub fn is_empty(&self) -> bool { + if self.targets.is_empty() { + return true; + } + + for target in &self.targets { + if !target.clone().is_empty() { + return false; + } + } + + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + use std::time::Duration; + use time::OffsetDateTime; + + #[test] + fn test_bucket_target_json_deserialize() { + let json = r#" + { + "sourcebucket": "source-bucket-name", + "endpoint": "s3.amazonaws.com", + "credentials": { + "accessKey": "test-access-key", + "secretKey": "test-secret-key", + "session_token": "test-session-token", + "expiration": "2024-12-31T23:59:59Z" + }, + "targetbucket": "target-bucket-name", + "secure": true, + "path": "/api/v1", + "api": "s3v4", + "arn": "arn:aws:s3:::target-bucket-name", + "type": "replication", + "region": "us-east-1", + "bandwidth_limit": 1000000, + "replicationSync": true, + "storage_class": "STANDARD", + "healthCheckDuration": 30, + "disableProxy": false, + "resetBeforeDate": null, + "reset_id": "reset-123", + "totalDowntime": 3600, + "last_online": null, + "isOnline": true, + "latency": { + "curr": 100, + "avg": 150, + "max": 300 + }, + "deployment_id": "deployment-456", + "edge": false, + "edgeSyncBeforeExpiry": true, + "offlineCount": 5 + } + "#; + + let result: std::result::Result = serde_json::from_str(json); + assert!(result.is_ok(), "Failed to deserialize BucketTarget: {:?}", result.err()); + + let target = result.unwrap(); + + // Verify basic fields + assert_eq!(target.source_bucket, "source-bucket-name"); + assert_eq!(target.endpoint, "s3.amazonaws.com"); + assert_eq!(target.target_bucket, "target-bucket-name"); + assert!(target.secure); + assert_eq!(target.path, "/api/v1"); + assert_eq!(target.api, "s3v4"); + assert_eq!(target.arn, "arn:aws:s3:::target-bucket-name"); + assert_eq!(target.target_type, BucketTargetType::ReplicationService); + assert_eq!(target.region, "us-east-1"); + assert_eq!(target.bandwidth_limit, 1000000); + assert!(target.replication_sync); + assert_eq!(target.storage_class, "STANDARD"); + assert_eq!(target.health_check_duration, Duration::from_secs(30)); + assert!(!target.disable_proxy); + assert_eq!(target.reset_id, "reset-123"); + assert_eq!(target.total_downtime, Duration::from_secs(3600)); + assert!(target.online); + assert_eq!(target.deployment_id, "deployment-456"); + assert!(!target.edge); + assert!(target.edge_sync_before_expiry); + assert_eq!(target.offline_count, 5); + + // Verify credentials + assert!(target.credentials.is_some()); + let credentials = target.credentials.unwrap(); + assert_eq!(credentials.access_key, "test-access-key"); + assert_eq!(credentials.secret_key, "test-secret-key"); + assert_eq!(credentials.session_token, Some("test-session-token".to_string())); + assert!(credentials.expiration.is_some()); + + // Verify latency statistics + assert_eq!(target.latency.curr, Duration::from_millis(100)); + assert_eq!(target.latency.avg, Duration::from_millis(150)); + assert_eq!(target.latency.max, Duration::from_millis(300)); + + // Verify time fields + assert!(target.reset_before_date.is_none()); + assert!(target.last_online.is_none()); + } + + #[test] + fn test_bucket_target_json_serialize_deserialize_roundtrip() { + let original = BucketTarget { + source_bucket: "test-source".to_string(), + endpoint: "rustfs.example.com".to_string(), + credentials: Some(Credentials { + access_key: "rustfsaccess".to_string(), + secret_key: "rustfssecret".to_string(), + session_token: None, + expiration: None, + }), + target_bucket: "test-target".to_string(), + secure: false, + path: "/".to_string(), + api: "s3v4".to_string(), + arn: "arn:rustfs:s3:::test-target".to_string(), + target_type: BucketTargetType::ReplicationService, + region: "us-west-2".to_string(), + bandwidth_limit: 500000, + replication_sync: false, + storage_class: "REDUCED_REDUNDANCY".to_string(), + health_check_duration: Duration::from_secs(60), + disable_proxy: true, + reset_before_date: Some(OffsetDateTime::now_utc()), + reset_id: "reset-456".to_string(), + total_downtime: Duration::from_secs(1800), + last_online: Some(OffsetDateTime::now_utc()), + online: false, + latency: LatencyStat { + curr: Duration::from_millis(250), + avg: Duration::from_millis(200), + max: Duration::from_millis(500), + }, + deployment_id: "deploy-789".to_string(), + edge: true, + edge_sync_before_expiry: false, + offline_count: 10, + }; + + // Serialize to JSON + let json = serde_json::to_string(&original).expect("Failed to serialize to JSON"); + + // Deserialize from JSON + let deserialized: BucketTarget = serde_json::from_str(&json).expect("Failed to deserialize from JSON"); + + // Verify key fields are equal + assert_eq!(original.source_bucket, deserialized.source_bucket); + assert_eq!(original.endpoint, deserialized.endpoint); + assert_eq!(original.target_bucket, deserialized.target_bucket); + assert_eq!(original.secure, deserialized.secure); + assert_eq!(original.target_type, deserialized.target_type); + assert_eq!(original.region, deserialized.region); + assert_eq!(original.bandwidth_limit, deserialized.bandwidth_limit); + assert_eq!(original.replication_sync, deserialized.replication_sync); + assert_eq!(original.health_check_duration, deserialized.health_check_duration); + assert_eq!(original.online, deserialized.online); + assert_eq!(original.edge, deserialized.edge); + assert_eq!(original.offline_count, deserialized.offline_count); + } + + #[test] + fn test_bucket_target_type_json_deserialize() { + // Test BucketTargetType JSON deserialization + let replication_json = r#""replication""#; + let ilm_json = r#""ilm""#; + + let replication_type: BucketTargetType = + serde_json::from_str(replication_json).expect("Failed to deserialize replication type"); + let ilm_type: BucketTargetType = serde_json::from_str(ilm_json).expect("Failed to deserialize ilm type"); + + assert_eq!(replication_type, BucketTargetType::ReplicationService); + assert_eq!(ilm_type, BucketTargetType::IlmService); + + // Verify type validity + assert!(replication_type.is_valid()); + assert!(ilm_type.is_valid()); + assert!(!BucketTargetType::None.is_valid()); + } + + #[test] + fn test_credentials_json_deserialize() { + let json = r#" + { + "accessKey": "AKIAIOSFODNN7EXAMPLE", + "secretKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + "session_token": "AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT", + "expiration": "2024-12-31T23:59:59Z" + } + "#; + + let credentials: Credentials = serde_json::from_str(json).expect("Failed to deserialize credentials"); + + assert_eq!(credentials.access_key, "AKIAIOSFODNN7EXAMPLE"); + assert_eq!(credentials.secret_key, "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"); + assert_eq!( + credentials.session_token, + Some("AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT".to_string()) + ); + assert!(credentials.expiration.is_some()); + } + + #[test] + fn test_latency_stat_json_deserialize() { + let json = r#" + { + "curr": 50, + "avg": 75, + "max": 200 + } + "#; + + let latency: LatencyStat = serde_json::from_str(json).expect("Failed to deserialize latency stat"); + + assert_eq!(latency.curr, Duration::from_millis(50)); + assert_eq!(latency.avg, Duration::from_millis(75)); + assert_eq!(latency.max, Duration::from_millis(200)); + } + + #[test] + fn test_bucket_targets_json_deserialize() { + let json = r#" + { + "targets": [ + { + "sourcebucket": "bucket1", + "endpoint": "s3.amazonaws.com", + "targetbucket": "target1", + "secure": true, + "path": "/", + "api": "s3v4", + "arn": "arn:aws:s3:::target1", + "type": "replication", + "region": "us-east-1", + "bandwidth_limit": 0, + "replicationSync": false, + "storage_class": "", + "healthCheckDuration": 0, + "disableProxy": false, + "resetBeforeDate": null, + "reset_id": "", + "totalDowntime": 0, + "lastOnline": null, + "isOnline": false, + "latency": { + "curr": 0, + "avg": 0, + "max": 0 + }, + "deployment_id": "", + "edge": false, + "edgeSyncBeforeExpiry": false, + "offlineCount": 0 + } + ] + } + "#; + + let targets: BucketTargets = serde_json::from_str(json).expect("Failed to deserialize bucket targets"); + + assert_eq!(targets.targets.len(), 1); + assert_eq!(targets.targets[0].source_bucket, "bucket1"); + assert_eq!(targets.targets[0].endpoint, "s3.amazonaws.com"); + assert_eq!(targets.targets[0].target_bucket, "target1"); + assert!(!targets.is_empty()); + } + + #[test] + fn test_user_provided_json_deserialize() { + // Test the specific JSON provided by the user with missing required fields added + let json = r#" + { + "sourcebucket": "mc-test-bucket-22139", + "endpoint": "localhost:8000", + "credentials": { + "accessKey": "rustfsadmin", + "secretKey": "rustfsadmin", + "expiration": "0001-01-01T00:00:00Z" + }, + "targetbucket": "test", + "secure": false, + "path": "auto", + "api": "s3v4", + "type": "replication", + "replicationSync": false, + "healthCheckDuration": 60, + "disableProxy": false, + "resetBeforeDate": "0001-01-01T00:00:00Z", + "totalDowntime": 0, + "lastOnline": "0001-01-01T00:00:00Z", + "isOnline": false, + "latency": { + "curr": 0, + "avg": 0, + "max": 0 + }, + "deployment_id": "", + "edge": false, + "edgeSyncBeforeExpiry": false, + "offlineCount": 0, + "bandwidth": 107374182400 + } + "#; + + let target: BucketTarget = serde_json::from_str(json).expect("Failed to deserialize user provided JSON to BucketTarget"); + + // Verify the deserialized values match the original JSON + assert_eq!(target.source_bucket, "mc-test-bucket-22139"); + assert_eq!(target.endpoint, "localhost:8000"); + assert_eq!(target.target_bucket, "test"); + assert!(!target.secure); + assert_eq!(target.path, "auto"); + assert_eq!(target.api, "s3v4"); + assert_eq!(target.target_type, BucketTargetType::ReplicationService); + assert!(!target.replication_sync); + assert_eq!(target.health_check_duration, Duration::from_secs(60)); + assert!(!target.disable_proxy); + assert!(!target.online); + assert!(!target.edge); + assert!(!target.edge_sync_before_expiry); + assert_eq!(target.bandwidth_limit, 107374182400); // bandwidth field mapped to bandwidth_limit + + // Verify credentials + assert!(target.credentials.is_some()); + let credentials = target.credentials.unwrap(); + assert_eq!(credentials.access_key, "rustfsadmin"); + assert_eq!(credentials.secret_key, "rustfsadmin"); + + // Verify latency statistics + assert_eq!(target.latency.curr, Duration::from_millis(0)); + assert_eq!(target.latency.avg, Duration::from_millis(0)); + assert_eq!(target.latency.max, Duration::from_millis(0)); + + // Verify time fields parsing (should handle "0001-01-01T00:00:00Z" as None due to being the zero time) + assert!(target.reset_before_date.is_some()); + assert!(target.last_online.is_some()); + + println!("✅ User provided JSON successfully deserialized to BucketTarget"); + } + + #[test] + fn test_user_provided_json_as_bucket_targets() { + // Test wrapping the user JSON in BucketTargets structure + let json = r#" + { + "targets": [ + { + "sourcebucket": "mc-test-bucket-22139", + "endpoint": "localhost:8000", + "credentials": { + "accessKey": "rustfsadmin", + "secretKey": "rustfsadmin", + "expiration": "0001-01-01T00:00:00Z" + }, + "targetbucket": "test", + "secure": false, + "path": "auto", + "api": "s3v4", + "arn": "", + "type": "replication", + "region": "", + "replicationSync": false, + "storage_class": "", + "healthCheckDuration": 60, + "disableProxy": false, + "resetBeforeDate": "0001-01-01T00:00:00Z", + "reset_id": "", + "totalDowntime": 0, + "lastOnline": "0001-01-01T00:00:00Z", + "isOnline": false, + "latency": { + "curr": 0, + "avg": 0, + "max": 0 + }, + "deployment_id": "", + "edge": false, + "edgeSyncBeforeExpiry": false, + "offlineCount": 0, + "bandwidth": 107374182400 + } + ] + } + "#; + + let bucket_targets: BucketTargets = + serde_json::from_str(json).expect("Failed to deserialize user provided JSON to BucketTargets"); + + assert_eq!(bucket_targets.targets.len(), 1); + assert!(!bucket_targets.is_empty()); + + let target = &bucket_targets.targets[0]; + assert_eq!(target.source_bucket, "mc-test-bucket-22139"); + assert_eq!(target.endpoint, "localhost:8000"); + assert_eq!(target.target_bucket, "test"); + assert_eq!(target.bandwidth_limit, 107374182400); + + println!("✅ User provided JSON successfully deserialized to BucketTargets"); + } + + #[test] + fn test_bucket_target_minimal_json_with_defaults() { + // Test that BucketTarget can be deserialized with minimal JSON using defaults + let minimal_json = r#" + { + "sourcebucket": "test-source", + "endpoint": "localhost:9000", + "targetbucket": "test-target" + } + "#; + + let target: BucketTarget = + serde_json::from_str(minimal_json).expect("Failed to deserialize minimal JSON to BucketTarget"); + + // Verify required fields + assert_eq!(target.source_bucket, "test-source"); + assert_eq!(target.endpoint, "localhost:9000"); + assert_eq!(target.target_bucket, "test-target"); + + // Verify default values + assert!(!target.secure); // bool default is false + assert_eq!(target.path, ""); // String default is empty + assert_eq!(target.api, ""); // String default is empty + assert_eq!(target.arn, ""); // String default is empty + assert_eq!(target.target_type, BucketTargetType::None); // enum default + assert_eq!(target.region, ""); // String default is empty + assert_eq!(target.bandwidth_limit, 0); // i64 default is 0 + assert!(!target.replication_sync); // bool default is false + assert_eq!(target.storage_class, ""); // String default is empty + assert_eq!(target.health_check_duration, Duration::from_secs(0)); // Duration default + assert!(!target.disable_proxy); // bool default is false + assert!(target.reset_before_date.is_none()); // Option default is None + assert_eq!(target.reset_id, ""); // String default is empty + assert_eq!(target.total_downtime, Duration::from_secs(0)); // Duration default + assert!(target.last_online.is_none()); // Option default is None + assert!(!target.online); // bool default is false + assert_eq!(target.latency.curr, Duration::from_millis(0)); // LatencyStat default + assert_eq!(target.latency.avg, Duration::from_millis(0)); + assert_eq!(target.latency.max, Duration::from_millis(0)); + assert_eq!(target.deployment_id, ""); // String default is empty + assert!(!target.edge); // bool default is false + assert!(!target.edge_sync_before_expiry); // bool default is false + assert_eq!(target.offline_count, 0); // u64 default is 0 + assert!(target.credentials.is_none()); // Option default is None + + println!("✅ Minimal JSON with defaults successfully deserialized to BucketTarget"); + } + + #[test] + fn test_bucket_target_empty_json_with_defaults() { + // Test that BucketTarget can be deserialized with completely empty JSON using all defaults + let empty_json = r#"{}"#; + + let target: BucketTarget = serde_json::from_str(empty_json).expect("Failed to deserialize empty JSON to BucketTarget"); + + // Verify all fields use default values + assert_eq!(target.source_bucket, ""); + assert_eq!(target.endpoint, ""); + assert_eq!(target.target_bucket, ""); + assert!(!target.secure); + assert_eq!(target.path, ""); + assert_eq!(target.api, ""); + assert_eq!(target.arn, ""); + assert_eq!(target.target_type, BucketTargetType::None); + assert_eq!(target.region, ""); + assert_eq!(target.bandwidth_limit, 0); + assert!(!target.replication_sync); + assert_eq!(target.storage_class, ""); + assert_eq!(target.health_check_duration, Duration::from_secs(0)); + assert!(!target.disable_proxy); + assert!(target.reset_before_date.is_none()); + assert_eq!(target.reset_id, ""); + assert_eq!(target.total_downtime, Duration::from_secs(0)); + assert!(target.last_online.is_none()); + assert!(!target.online); + assert_eq!(target.latency.curr, Duration::from_millis(0)); + assert_eq!(target.latency.avg, Duration::from_millis(0)); + assert_eq!(target.latency.max, Duration::from_millis(0)); + assert_eq!(target.deployment_id, ""); + assert!(!target.edge); + assert!(!target.edge_sync_before_expiry); + assert_eq!(target.offline_count, 0); + assert!(target.credentials.is_none()); + + println!("✅ Empty JSON with all defaults successfully deserialized to BucketTarget"); + } + + #[test] + fn test_original_user_json_with_defaults() { + // Test the original user JSON without extra required fields + let json = r#" + { + "sourcebucket": "mc-test-bucket-22139", + "endpoint": "localhost:8000", + "credentials": { + "accessKey": "rustfsadmin", + "secretKey": "rustfsadmin", + "expiration": "0001-01-01T00:00:00Z" + }, + "targetbucket": "test", + "secure": false, + "path": "auto", + "api": "s3v4", + "type": "replication", + "replicationSync": false, + "healthCheckDuration": 60, + "disableProxy": false, + "resetBeforeDate": "0001-01-01T00:00:00Z", + "totalDowntime": 0, + "lastOnline": "0001-01-01T00:00:00Z", + "isOnline": false, + "latency": { + "curr": 0, + "avg": 0, + "max": 0 + }, + "edge": false, + "edgeSyncBeforeExpiry": false, + "bandwidth": 107374182400 + } + "#; + + let target: BucketTarget = serde_json::from_str(json).expect("Failed to deserialize original user JSON to BucketTarget"); + + // Verify the deserialized values + assert_eq!(target.source_bucket, "mc-test-bucket-22139"); + assert_eq!(target.endpoint, "localhost:8000"); + assert_eq!(target.target_bucket, "test"); + assert!(!target.secure); + assert_eq!(target.path, "auto"); + assert_eq!(target.api, "s3v4"); + assert_eq!(target.target_type, BucketTargetType::ReplicationService); + assert!(!target.replication_sync); + assert_eq!(target.health_check_duration, Duration::from_secs(60)); + assert!(!target.disable_proxy); + assert!(!target.online); + assert!(!target.edge); + assert!(!target.edge_sync_before_expiry); + assert_eq!(target.bandwidth_limit, 107374182400); + + // Fields not specified should use defaults + assert_eq!(target.arn, ""); // default empty string + assert_eq!(target.region, ""); // default empty string + assert_eq!(target.storage_class, ""); // default empty string + assert_eq!(target.reset_id, ""); // default empty string + assert_eq!(target.deployment_id, ""); // default empty string + assert_eq!(target.offline_count, 0); // default u64 + + // Verify credentials + assert!(target.credentials.is_some()); + let credentials = target.credentials.unwrap(); + assert_eq!(credentials.access_key, "rustfsadmin"); + assert_eq!(credentials.secret_key, "rustfsadmin"); + + println!("✅ Original user JSON with defaults successfully deserialized to BucketTarget"); + } +} diff --git a/crates/ecstore/src/bucket/target/mod.rs b/crates/ecstore/src/bucket/target/mod.rs index e279848a..43f3fc1b 100644 --- a/crates/ecstore/src/bucket/target/mod.rs +++ b/crates/ecstore/src/bucket/target/mod.rs @@ -12,124 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::error::Result; -use rmp_serde::Serializer as rmpSerializer; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; +mod arn; +mod bucket_target; -#[derive(Debug, Deserialize, Serialize, Default, Clone)] -pub struct Credentials { - #[serde(rename = "accessKey")] - pub access_key: String, - #[serde(rename = "secretKey")] - pub secret_key: String, - pub session_token: Option, - pub expiration: Option>, -} - -#[derive(Debug, Deserialize, Serialize, Default, Clone)] -pub enum ServiceType { - #[default] - Replication, -} - -#[derive(Debug, Deserialize, Serialize, Default, Clone)] -pub struct LatencyStat { - curr: u64, // current latency - avg: u64, // average latency - max: u64, // maximum latency -} - -// Define BucketTarget struct -#[derive(Debug, Deserialize, Serialize, Default, Clone)] -pub struct BucketTarget { - #[serde(rename = "sourcebucket")] - pub source_bucket: String, - - pub endpoint: String, - - pub credentials: Option, - #[serde(rename = "targetbucket")] - pub target_bucket: String, - - secure: bool, - pub path: Option, - - api: Option, - - pub arn: Option, - #[serde(rename = "type")] - pub type_: Option, - - pub region: Option, - - bandwidth_limit: Option, - - #[serde(rename = "replicationSync")] - replication_sync: bool, - - storage_class: Option, - #[serde(rename = "healthCheckDuration")] - health_check_duration: u64, - #[serde(rename = "disableProxy")] - disable_proxy: bool, - - #[serde(rename = "resetBeforeDate")] - reset_before_date: String, - reset_id: Option, - #[serde(rename = "totalDowntime")] - total_downtime: u64, - - last_online: Option, - #[serde(rename = "isOnline")] - online: bool, - - latency: Option, - - deployment_id: Option, - - edge: bool, - #[serde(rename = "edgeSyncBeforeExpiry")] - edge_sync_before_expiry: bool, -} - -impl BucketTarget { - pub fn is_empty(self) -> bool { - //self.target_bucket.is_empty() && self.endpoint.is_empty() && self.arn.is_empty() - self.target_bucket.is_empty() && self.endpoint.is_empty() && self.arn.is_none() - } -} - -#[derive(Debug, Deserialize, Serialize, Default, Clone)] -pub struct BucketTargets { - pub targets: Vec, -} - -impl BucketTargets { - pub fn marshal_msg(&self) -> Result> { - let mut buf = Vec::new(); - - self.serialize(&mut rmpSerializer::new(&mut buf).with_struct_map())?; - - Ok(buf) - } - - pub fn unmarshal(buf: &[u8]) -> Result { - let t: BucketTargets = rmp_serde::from_slice(buf)?; - Ok(t) - } - - pub fn is_empty(&self) -> bool { - if self.targets.is_empty() { - return true; - } - - for target in &self.targets { - if !target.clone().is_empty() { - return false; - } - } - - true - } -} +pub use arn::*; +pub use bucket_target::*; diff --git a/crates/ecstore/src/cache_value/metacache_set.rs b/crates/ecstore/src/cache_value/metacache_set.rs index 0ac371a4..eef250eb 100644 --- a/crates/ecstore/src/cache_value/metacache_set.rs +++ b/crates/ecstore/src/cache_value/metacache_set.rs @@ -17,7 +17,8 @@ use crate::disk::{self, DiskAPI, DiskStore, WalkDirOptions}; use futures::future::join_all; use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetacacheReader, is_io_eof}; use std::{future::Future, pin::Pin, sync::Arc}; -use tokio::{spawn, sync::broadcast::Receiver as B_Receiver}; +use tokio::spawn; +use tokio_util::sync::CancellationToken; use tracing::{error, warn}; pub type AgreedFn = Box Pin + Send>> + Send + 'static>; @@ -63,7 +64,7 @@ impl Clone for ListPathRawOptions { } } -pub async fn list_path_raw(mut rx: B_Receiver, opts: ListPathRawOptions) -> disk::error::Result<()> { +pub async fn list_path_raw(rx: CancellationToken, opts: ListPathRawOptions) -> disk::error::Result<()> { if opts.disks.is_empty() { return Err(DiskError::other("list_path_raw: 0 drives provided")); } @@ -72,13 +73,13 @@ pub async fn list_path_raw(mut rx: B_Receiver, opts: ListPathRawOptions) - let mut readers = Vec::with_capacity(opts.disks.len()); let fds = Arc::new(opts.fallback_disks.clone()); - let (cancel_tx, cancel_rx) = tokio::sync::broadcast::channel::(1); + let cancel_rx = CancellationToken::new(); for disk in opts.disks.iter() { let opdisk = disk.clone(); let opts_clone = opts.clone(); let fds_clone = fds.clone(); - let mut cancel_rx_clone = cancel_rx.resubscribe(); + let cancel_rx_clone = cancel_rx.clone(); let (rd, mut wr) = tokio::io::duplex(64); readers.push(MetacacheReader::new(rd)); jobs.push(spawn(async move { @@ -106,7 +107,7 @@ pub async fn list_path_raw(mut rx: B_Receiver, opts: ListPathRawOptions) - need_fallback = true; } - if cancel_rx_clone.try_recv().is_ok() { + if cancel_rx_clone.is_cancelled() { // warn!("list_path_raw: cancel_rx_clone.try_recv().await.is_ok()"); return Ok(()); } @@ -173,7 +174,7 @@ pub async fn list_path_raw(mut rx: B_Receiver, opts: ListPathRawOptions) - // opts.bucket, opts.path, ¤t.name // ); - if rx.try_recv().is_ok() { + if rx.is_cancelled() { return Err(DiskError::other("canceled")); } @@ -351,7 +352,7 @@ pub async fn list_path_raw(mut rx: B_Receiver, opts: ListPathRawOptions) - if let Err(err) = revjob.await.map_err(std::io::Error::other)? { error!("list_path_raw: revjob err {:?}", err); - let _ = cancel_tx.send(true); + cancel_rx.cancel(); return Err(err); } diff --git a/crates/ecstore/src/client/api_get_options.rs b/crates/ecstore/src/client/api_get_options.rs index 3692b29b..3025018b 100644 --- a/crates/ecstore/src/client/api_get_options.rs +++ b/crates/ecstore/src/client/api_get_options.rs @@ -44,6 +44,8 @@ pub struct GetObjectOptions { pub internal: AdvancedGetOptions, } +pub type StatObjectOptions = GetObjectOptions; + impl Default for GetObjectOptions { fn default() -> Self { Self { diff --git a/crates/ecstore/src/client/api_remove.rs b/crates/ecstore/src/client/api_remove.rs index 9cb67d86..80188721 100644 --- a/crates/ecstore/src/client/api_remove.rs +++ b/crates/ecstore/src/client/api_remove.rs @@ -46,11 +46,11 @@ pub struct RemoveBucketOptions { #[derive(Debug)] #[allow(dead_code)] pub struct AdvancedRemoveOptions { - replication_delete_marker: bool, - replication_status: ReplicationStatus, - replication_mtime: OffsetDateTime, - replication_request: bool, - replication_validity_check: bool, + pub replication_delete_marker: bool, + pub replication_status: ReplicationStatus, + pub replication_mtime: Option, + pub replication_request: bool, + pub replication_validity_check: bool, } impl Default for AdvancedRemoveOptions { @@ -58,7 +58,7 @@ impl Default for AdvancedRemoveOptions { Self { replication_delete_marker: false, replication_status: ReplicationStatus::from_static(ReplicationStatus::PENDING), - replication_mtime: OffsetDateTime::now_utc(), + replication_mtime: None, replication_request: false, replication_validity_check: false, } @@ -140,8 +140,7 @@ impl TransitionClient { } pub async fn remove_object(&self, bucket_name: &str, object_name: &str, opts: RemoveObjectOptions) -> Option { - let res = self.remove_object_inner(bucket_name, object_name, opts).await.expect("err"); - res.err + self.remove_object_inner(bucket_name, object_name, opts).await.err() } pub async fn remove_object_inner( diff --git a/crates/ecstore/src/client/api_stat.rs b/crates/ecstore/src/client/api_stat.rs index 8a064558..58d66b99 100644 --- a/crates/ecstore/src/client/api_stat.rs +++ b/crates/ecstore/src/client/api_stat.rs @@ -23,6 +23,7 @@ use http::{HeaderMap, HeaderValue}; use rustfs_utils::EMPTY_STRING_SHA256_HASH; use std::{collections::HashMap, str::FromStr}; use tokio::io::BufReader; +use tracing::warn; use uuid::Uuid; use crate::client::{ @@ -30,7 +31,10 @@ use crate::client::{ api_get_options::GetObjectOptions, transition_api::{ObjectInfo, ReadCloser, ReaderImpl, RequestMetadata, TransitionClient, to_object_info}, }; -use s3s::header::{X_AMZ_DELETE_MARKER, X_AMZ_VERSION_ID}; +use s3s::{ + dto::VersioningConfiguration, + header::{X_AMZ_DELETE_MARKER, X_AMZ_VERSION_ID}, +}; impl TransitionClient { pub async fn bucket_exists(&self, bucket_name: &str) -> Result { @@ -58,8 +62,14 @@ impl TransitionClient { .await; if let Ok(resp) = resp { + if resp.status() != http::StatusCode::OK { + return Ok(false); + } + let b = resp.body().bytes().expect("err").to_vec(); let resperr = http_resp_to_error_response(&resp, b, bucket_name, ""); + + warn!("bucket exists, resp: {:?}, resperr: {:?}", resp, resperr); /*if to_error_response(resperr).code == "NoSuchBucket" { return Ok(false); } @@ -70,6 +80,46 @@ impl TransitionClient { Ok(true) } + pub async fn get_bucket_versioning(&self, bucket_name: &str) -> Result { + let mut query_values = HashMap::new(); + query_values.insert("versioning".to_string(), "".to_string()); + let resp = self + .execute_method( + http::Method::GET, + &mut RequestMetadata { + bucket_name: bucket_name.to_string(), + object_name: "".to_string(), + query_values, + custom_header: HeaderMap::new(), + content_sha256_hex: EMPTY_STRING_SHA256_HASH.to_string(), + content_md5_base64: "".to_string(), + content_body: ReaderImpl::Body(Bytes::new()), + content_length: 0, + stream_sha256: false, + trailer: HeaderMap::new(), + pre_sign_url: Default::default(), + add_crc: Default::default(), + extra_pre_sign_header: Default::default(), + bucket_location: Default::default(), + expires: Default::default(), + }, + ) + .await; + + match resp { + Ok(resp) => { + let b = resp.body().bytes().expect("get bucket versioning err").to_vec(); + let resperr = http_resp_to_error_response(&resp, b, bucket_name, ""); + + warn!("get bucket versioning, resp: {:?}, resperr: {:?}", resp, resperr); + + Ok(VersioningConfiguration::default()) + } + + Err(err) => Err(std::io::Error::other(err)), + } + } + pub async fn stat_object( &self, bucket_name: &str, @@ -131,24 +181,20 @@ impl TransitionClient { ..Default::default() }; return Ok(ObjectInfo { - version_id: match Uuid::from_str(h.get(X_AMZ_VERSION_ID).unwrap().to_str().unwrap()) { - Ok(v) => v, - Err(e) => { - return Err(std::io::Error::other(e)); - } - }, + version_id: h + .get(X_AMZ_VERSION_ID) + .and_then(|v| v.to_str().ok()) + .and_then(|s| Uuid::from_str(s).ok()), is_delete_marker: delete_marker, ..Default::default() }); //err_resp } return Ok(ObjectInfo { - version_id: match Uuid::from_str(h.get(X_AMZ_VERSION_ID).unwrap().to_str().unwrap()) { - Ok(v) => v, - Err(e) => { - return Err(std::io::Error::other(e)); - } - }, + version_id: h + .get(X_AMZ_VERSION_ID) + .and_then(|v| v.to_str().ok()) + .and_then(|s| Uuid::from_str(s).ok()), is_delete_marker: delete_marker, replication_ready: replication_ready, ..Default::default() diff --git a/crates/ecstore/src/client/bucket_cache.rs b/crates/ecstore/src/client/bucket_cache.rs index 2ddf860e..e2308e34 100644 --- a/crates/ecstore/src/client/bucket_cache.rs +++ b/crates/ecstore/src/client/bucket_cache.rs @@ -36,6 +36,7 @@ use s3s::S3ErrorCode; use super::constants::UNSIGNED_PAYLOAD; use super::credentials::SignatureType; +#[derive(Debug, Clone)] pub struct BucketLocationCache { items: HashMap, } diff --git a/crates/ecstore/src/client/transition_api.rs b/crates/ecstore/src/client/transition_api.rs index ed1754ab..14b6f85f 100644 --- a/crates/ecstore/src/client/transition_api.rs +++ b/crates/ecstore/src/client/transition_api.rs @@ -89,6 +89,7 @@ pub enum ReaderImpl { pub type ReadCloser = BufReader>>; +#[derive(Debug)] pub struct TransitionClient { pub endpoint_url: Url, pub creds_provider: Arc>>, @@ -809,6 +810,7 @@ impl TransitionCore { } } +#[derive(Debug, Clone, Default)] pub struct PutObjectPartOptions { pub md5_base64: String, pub sha256_hex: String, @@ -820,23 +822,23 @@ pub struct PutObjectPartOptions { #[derive(Debug, Clone, Deserialize, Serialize)] pub struct ObjectInfo { - pub etag: String, + pub etag: Option, pub name: String, - pub mod_time: OffsetDateTime, - pub size: usize, + pub mod_time: Option, + pub size: i64, pub content_type: Option, #[serde(skip)] pub metadata: HeaderMap, pub user_metadata: HashMap, pub user_tags: String, - pub user_tag_count: i64, + pub user_tag_count: usize, #[serde(skip)] pub owner: Owner, //pub grant: Vec, pub storage_class: String, pub is_latest: bool, pub is_delete_marker: bool, - pub version_id: Uuid, + pub version_id: Option, #[serde(skip, default = "replication_status_default")] pub replication_status: ReplicationStatus, @@ -862,9 +864,9 @@ fn replication_status_default() -> ReplicationStatus { impl Default for ObjectInfo { fn default() -> Self { Self { - etag: "".to_string(), + etag: None, name: "".to_string(), - mod_time: OffsetDateTime::now_utc(), + mod_time: None, size: 0, content_type: None, metadata: HeaderMap::new(), @@ -875,7 +877,7 @@ impl Default for ObjectInfo { storage_class: "".to_string(), is_latest: false, is_delete_marker: false, - version_id: Uuid::nil(), + version_id: None, replication_status: ReplicationStatus::from_static(ReplicationStatus::PENDING), replication_ready: false, expiration: OffsetDateTime::now_utc(), diff --git a/crates/ecstore/src/cmd/bucket_replication.rs b/crates/ecstore/src/cmd/bucket_replication.rs deleted file mode 100644 index 5a1f815b..00000000 --- a/crates/ecstore/src/cmd/bucket_replication.rs +++ /dev/null @@ -1,2736 +0,0 @@ -#![allow(unused_variables)] -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#![allow(dead_code)] -// use error::Error; -use crate::StorageAPI; -use crate::bucket::metadata_sys::get_replication_config; -use crate::bucket::versioning_sys::BucketVersioningSys; -use crate::error::Error; -use crate::new_object_layer_fn; -use crate::rpc::RemotePeerS3Client; -use crate::store; -use crate::store_api::ObjectIO; -use crate::store_api::ObjectInfo; -use crate::store_api::ObjectOptions; -use crate::store_api::ObjectToDelete; -use aws_sdk_s3::Client as S3Client; -use aws_sdk_s3::Config; -use aws_sdk_s3::config::BehaviorVersion; -use aws_sdk_s3::config::Credentials; -use aws_sdk_s3::config::Region; -use bytes::Bytes; -use chrono::DateTime; -use chrono::Duration; -use chrono::Utc; -use futures::StreamExt; -use futures::stream::FuturesUnordered; -use http::HeaderMap; -use http::Method; -use lazy_static::lazy_static; -// use std::time::SystemTime; -use once_cell::sync::Lazy; -use regex::Regex; -use rustfs_rsc::Minio; -use rustfs_rsc::provider::StaticProvider; -use s3s::dto::DeleteMarkerReplicationStatus; -use s3s::dto::DeleteReplicationStatus; -use s3s::dto::ExistingObjectReplicationStatus; -use s3s::dto::ReplicaModificationsStatus; -use s3s::dto::ReplicationRuleStatus; -use serde::{Deserialize, Serialize}; -use std::any::Any; -use std::collections::HashMap; -use std::collections::HashSet; -use std::fmt; -use std::iter::Iterator; -use std::str::FromStr; -use std::sync::Arc; -use std::sync::atomic::AtomicI32; -use std::sync::atomic::Ordering; -use std::vec; -use time::OffsetDateTime; -use tokio::sync::Mutex; -use tokio::sync::RwLock; -use tokio::sync::mpsc::{Receiver, Sender}; -use tokio::task; -use tracing::{debug, error, info, warn}; -use uuid::Uuid; -use xxhash_rust::xxh3::xxh3_64; -// use bucket_targets::{self, GLOBAL_Bucket_Target_Sys}; -use crate::bucket::lifecycle::bucket_lifecycle_ops::TransitionedObject; - -#[derive(Serialize, Deserialize, Debug)] -struct MRFReplicateEntry { - #[serde(rename = "bucket")] - bucket: String, - - #[serde(rename = "object")] - object: String, - - #[serde(skip_serializing, skip_deserializing)] - version_id: String, - - #[serde(rename = "retryCount")] - retry_count: i32, - - #[serde(skip_serializing, skip_deserializing)] - sz: i64, -} - -trait ReplicationWorkerOperation: Any + Send + Sync { - fn to_mrf_entry(&self) -> MRFReplicateEntry; - fn as_any(&self) -> &dyn Any; -} - -// WorkerMaxLimit max number of workers per node for "fast" mode -pub const WORKER_MAX_LIMIT: usize = 50; - -// WorkerMinLimit min number of workers per node for "slow" mode -pub const WORKER_MIN_LIMIT: usize = 5; - -// WorkerAutoDefault is default number of workers for "auto" mode -pub const WORKER_AUTO_DEFAULT: usize = 10; - -// MRFWorkerMaxLimit max number of mrf workers per node for "fast" mode -pub const MRF_WORKER_MAX_LIMIT: usize = 8; - -// MRFWorkerMinLimit min number of mrf workers per node for "slow" mode -pub const MRF_WORKER_MIN_LIMIT: usize = 2; - -// MRFWorkerAutoDefault is default number of mrf workers for "auto" mode -pub const MRF_WORKER_AUTO_DEFAULT: usize = 4; - -// LargeWorkerCount is default number of workers assigned to large uploads ( >= 128MiB) -pub const LARGE_WORKER_COUNT: usize = 2; - -pub const MIN_LARGE_OBJSIZE: u64 = 128 * 1024 * 1024; - -pub struct ReplicationPool { - // Atomic operations - active_workers: Arc, - active_lrg_workers: Arc, - active_mrf_workers: Arc, - - // Shared objects - obj_layer: Arc, - //ctx: Arc>, // Placeholder for context; replace as needed - priority: String, - max_workers: usize, - max_lworkers: usize, - //stats: Option>, - - // Synchronization primitives - //mu: RwLock<()>, - //mrf_mu: Mutex<()>, - //resyncer: Option>, - - // Workers - workers_sender: Vec>>, - workers_recever: Vec>>, - lrg_workers_sender: Vec>>, - lrg_workers_receiver: Vec>>, - - // MRF - //mrf_worker_kill_ch: Option>, - mrf_replica_ch_sender: Sender>, - mrf_replica_ch_receiver: Receiver>, - //mrf_save_ch: Sender, - //mrf_stop_ch: Sender<()>, - mrf_worker_size: usize, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] -#[repr(u8)] // Explicitly indicate underlying value is u8 -pub enum ReplicationType { - #[default] - UnsetReplicationType = 0, - ObjectReplicationType = 1, - DeleteReplicationType = 2, - MetadataReplicationType = 3, - HealReplicationType = 4, - ExistingObjectReplicationType = 5, - ResyncReplicationType = 6, - AllReplicationType = 7, -} - -impl ReplicationType { - /// 从 u8 转换为枚举 - pub fn from_u8(value: u8) -> Option { - match value { - 0 => Some(Self::UnsetReplicationType), - 1 => Some(Self::ObjectReplicationType), - 2 => Some(Self::DeleteReplicationType), - 3 => Some(Self::MetadataReplicationType), - 4 => Some(Self::HealReplicationType), - 5 => Some(Self::ExistingObjectReplicationType), - 6 => Some(Self::ResyncReplicationType), - 7 => Some(Self::AllReplicationType), - _ => None, - } - } - - /// 获取枚举对应的 u8 值 - pub fn as_u8(self) -> u8 { - self as u8 - } - - pub fn is_data_replication(self) -> bool { - matches!( - self, - ReplicationType::ObjectReplicationType - | ReplicationType::HealReplicationType - | ReplicationType::ExistingObjectReplicationType - ) - } -} - -const SYSTEM_XML_OBJECT: &str = ".system-d26a9498-cb7c-4a87-a44a-8ae204f5ba6c/system.xml"; -const CAPACITY_XML_OBJECT: &str = ".system-d26a9498-cb7c-4a87-a44a-8ae204f5ba6c/capacity.xml"; -const VEEAM_AGENT_SUBSTR: &str = "APN/1.0 Veeam/1.0"; - -fn is_veeam_sos_api_object(object: &str) -> bool { - matches!(object, SYSTEM_XML_OBJECT | CAPACITY_XML_OBJECT) -} - -pub async fn queue_replication_heal( - bucket: &str, - oi: &ObjectInfo, - rcfg: &s3s::dto::ReplicationConfiguration, - _retry_count: u32, -) -> Option { - if oi.mod_time.is_none() || is_veeam_sos_api_object(&oi.name) { - return None; - } - - if rcfg.rules.is_empty() { - return None; - } - - let mut moi = oi.clone(); - - let mut roi = get_heal_replicate_object_info(&mut moi, rcfg).await; - //roi.retry_count = retry_count; - - if !roi.dsc.replicate_any() { - error!("Replication heal for object {} in bucket {} is not configured", oi.name, bucket); - return None; - } - - if oi.replication_status == ReplicationStatusType::Completed && !roi.existing_obj_resync.must_resync() { - return None; - } - - // Handle Delete Marker or VersionPurgeStatus cases - if roi.delete_marker || !roi.version_purge_status.is_empty() { - let (version_id, dm_version_id) = if roi.version_purge_status.is_empty() { - (String::new(), roi.version_id.clone()) - } else { - (roi.version_id.clone(), String::new()) - }; - - let dv = DeletedObjectReplicationInfo { - deleted_object: DeletedObject { - object_name: Some(roi.name.clone()), - delete_marker_version_id: Some(dm_version_id), - version_id: Some(roi.version_id.clone()), - replication_state: roi.replication_state.clone(), - delete_marker_mtime: roi.mod_time, - delete_marker: Some(roi.delete_marker), - }, - bucket: roi.bucket.clone(), - op_type: ReplicationType::HealReplicationType, - //event_type: ReplicationType::HealDeleteType, - event_type: "".to_string(), - reset_id: "".to_string(), - target_arn: "".to_string(), - }; - - if matches!(roi.replication_status, ReplicationStatusType::Pending | ReplicationStatusType::Failed) - || matches!(roi.version_purge_status, VersionPurgeStatusType::Failed | VersionPurgeStatusType::Pending) - { - let mut pool = GLOBAL_REPLICATION_POOL.write().await; - pool.as_mut().unwrap().queue_replica_task(roi).await; - //GLOBAL_REPLICATION_POOL().queue_replica_delete_task(dv); - return None; - } - - if roi.existing_obj_resync.must_resync() - && (roi.replication_status == ReplicationStatusType::Completed || roi.replication_status.is_empty()) - { - //queue_replicate_deletes_wrapper(dv, &roi.existing_obj_resync); - let mut pool = GLOBAL_REPLICATION_POOL.write().await; - pool.as_mut().unwrap().queue_replica_task(roi).await; - return None; - } - - return None; - } - - if roi.existing_obj_resync.must_resync() { - roi.op_type = ReplicationType::ExistingObjectReplicationType as i32; - } - - let mut pool = GLOBAL_REPLICATION_POOL.write().await; - - match roi.replication_status { - ReplicationStatusType::Pending | ReplicationStatusType::Failed => { - //roi.event_type = ReplicateEventType::Heal; - //roi.event_type = ReplicateEventType::Heal; - pool.as_mut().unwrap().queue_replica_task(roi.clone()).await; - return Some(roi); - } - _ => {} - } - - if roi.existing_obj_resync.must_resync() { - //roi.event_type = ReplicateEventType::Existing; - pool.as_mut().unwrap().queue_replica_task(roi.clone()).await; - } - - Some(roi) -} - -fn new_replicate_target_decision(arn: String, replicate: bool, sync: bool) -> ReplicateTargetDecision { - ReplicateTargetDecision { - id: String::new(), // Using a default value for the 'id' field is acceptable - replicate, - synchronous: sync, - arn, - } -} - -pub async fn check_replicate_delete( - bucket: &str, - dobj: &ObjectToDelete, - oi: &ObjectInfo, - del_opts: &ObjectOptions, - gerr: Option<&Error>, -) -> ReplicateDecision { - error!("check_replicate_delete"); - let mut dsc = ReplicateDecision::default(); - - let rcfg = match get_replication_config(bucket).await { - Ok((cfg, mod_time)) => cfg, - Err(e) => { - //repl_log_once_if(ctx, None, bucket); // 你需要实现这个日志函数 - error!("get replication config err:"); - return dsc; - } - }; - - if del_opts.replication_request { - return dsc; - } - - if !del_opts.versioned { - return dsc; - } - - let mut opts = ReplicationObjectOpts { - name: dobj.object_name.clone(), - ssec: false, - user_tags: Some(oi.user_tags.clone()), - delete_marker: oi.delete_marker, - //version_id: dobj.version_id.clone().map(|v| v.to_string()), - version_id: oi.version_id.map(|uuid| uuid.to_string()).unwrap_or_default(), - op_type: ReplicationType::DeleteReplicationType, - target_arn: None, - replica: true, - existing_object: true, - }; - - let tgt_arns = rcfg.filter_target_arns(&opts); - dsc.targets_map = HashMap::with_capacity(tgt_arns.len()); - - if tgt_arns.is_empty() { - return dsc; - } - - let sync = false; - let mut replicate; - - for tgt_arn in tgt_arns { - //let mut opts = opts.clone(); - opts.target_arn = Some(tgt_arn.clone()); - replicate = rcfg.replicate(&opts); - - if gerr.is_some() { - let valid_repl_status = matches!( - oi.target_replication_status(tgt_arn.clone()), - ReplicationStatusType::Pending | ReplicationStatusType::Completed | ReplicationStatusType::Failed - ); - - if oi.delete_marker && (valid_repl_status || replicate) { - dsc.set(new_replicate_target_decision(tgt_arn.clone(), replicate, sync)); - continue; - } - - if !oi.version_purge_status.is_empty() { - replicate = matches!(oi.version_purge_status, VersionPurgeStatusType::Pending | VersionPurgeStatusType::Failed); - dsc.set(new_replicate_target_decision(tgt_arn.clone(), replicate, sync)); - continue; - } - } - - let tgt = bucket_targets::get_bucket_target_client(bucket, &tgt_arn).await; - - let tgt_dsc = match tgt { - Ok(tgt) => new_replicate_target_decision(tgt_arn.clone(), replicate, tgt.replicate_sync), - Err(_) => new_replicate_target_decision(tgt_arn.clone(), false, false), - }; - - // let tgt_dsc = if let Some(tgt) = tgt { - // new_replicate_target_decision(tgt_arn.clone(), replicate, tgt.replicate_sync) - // } else { - // new_replicate_target_decision(tgt_arn.clone(), false, false) - // }; - - dsc.set(tgt_dsc); - } - - dsc -} -// use crate::replication::*; -// use crate::crypto; -// use crate::global::*; - -fn target_reset_header(arn: &str) -> String { - format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}-{arn}") -} - -pub async fn get_heal_replicate_object_info( - oi: &mut ObjectInfo, - rcfg: &s3s::dto::ReplicationConfiguration, -) -> ReplicateObjectInfo { - let mut user_defined = oi.user_defined.clone(); - - if !rcfg.rules.is_empty() { - if !oi.replication_status.is_empty() { - oi.replication_status_internal = format!("{}={};", rcfg.role, oi.replication_status.as_str()); - } - - if !oi.version_purge_status.is_empty() { - oi.version_purge_status_internal = format!("{}={};", rcfg.role, oi.version_purge_status); - } - - // let to_replace: Vec<(String, String)> = user_defined - // .iter() - // .filter(|(k, _)| k.eq_ignore_ascii_case(&(RESERVED_METADATA_PREFIX_LOWER.to_owned() + REPLICATION_RESET))) - // .map(|(k, v)| (k.clone(), v.clone())) - // .collect::>() - // .collect(); - let to_replace: Vec<(String, String)> = user_defined - .iter() - .filter(|(k, _)| k.eq_ignore_ascii_case(&(RESERVED_METADATA_PREFIX_LOWER.to_owned() + REPLICATION_RESET))) - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); - - // 第二步:apply 修改 - for (k, v) in to_replace { - user_defined.remove(&k); - user_defined.insert(target_reset_header(&rcfg.role), v); - } - } - //} - - //let dsc = if oi.delete_marker || !oi.version_purge_status.is_empty() { - let dsc = if oi.delete_marker { - check_replicate_delete( - &oi.bucket, - &ObjectToDelete { - object_name: oi.name.clone(), - version_id: oi.version_id, - }, - oi, - &ObjectOptions { - // versioned: global_bucket_versioning_sys::prefix_enabled(&oi.bucket, &oi.name), - // version_suspended: global_bucket_versioning_sys::prefix_suspended(&oi.bucket, &oi.name), - versioned: true, - version_suspended: false, - ..Default::default() - }, - None, - ) - .await - } else { - // let opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(mt)) - // .await - // .map_err(to_s3_error)?; - let mt = oi.user_defined.clone(); - let mt2 = oi.user_defined.clone(); - let opts = ObjectOptions { - user_defined: user_defined.clone(), - versioned: true, - version_id: oi.version_id.map(|uuid| uuid.to_string()), - mod_time: oi.mod_time, - ..Default::default() - }; - let repoptions = - get_must_replicate_options(&mt2, "", ReplicationStatusType::Unknown, ReplicationType::ObjectReplicationType, &opts); - - let decision = must_replicate(&oi.bucket, &oi.name, &repoptions).await; - error!("decision:"); - decision - }; - - let tgt_statuses = replication_statuses_map(&oi.replication_status_internal); - let purge_statuses = version_purge_statuses_map(&oi.version_purge_status_internal); - //let existing_obj_resync = rcfg.resync(&GLOBAL_CONTEXT, oi, &dsc, &tgt_statuses); - - // let tm = user_defined - // .get(&(RESERVED_METADATA_PREFIX_LOWER.to_owned() + REPLICATION_TIMESTAMP)) - // .and_then(|v| DateTime::parse_from_rfc3339(v).ok()) - // .map(|dt| dt.with_timezone(&Utc)); - - let tm = user_defined - .get(&(RESERVED_METADATA_PREFIX_LOWER.to_owned() + REPLICATION_TIMESTAMP)) - .and_then(|v| DateTime::parse_from_rfc3339(v).ok()) - .map(|dt| dt.with_timezone(&Utc)); - - let mut rstate = oi.replication_state(); - rstate.replicate_decision_str = dsc.to_string(); - - let asz = oi.get_actual_size().unwrap_or(0); - - let key = format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_TIMESTAMP}"); - let tm: Option> = user_defined - .get(&key) - .and_then(|v| DateTime::parse_from_rfc3339(v).ok()) - .map(|dt| dt.with_timezone(&Utc)); - - let mut result = ReplicateObjectInfo { - name: oi.name.clone(), - size: oi.size, - actual_size: asz, - bucket: oi.bucket.clone(), - //version_id: oi.version_id.clone(), - version_id: oi - .version_id - .map(|uuid| uuid.to_string()) // 将 Uuid 转换为 String - .unwrap_or_default(), - etag: oi.etag.clone().unwrap(), - mod_time: convert_offsetdatetime_to_chrono(oi.mod_time).unwrap(), - replication_status: oi.replication_status.clone(), - replication_status_internal: oi.replication_status_internal.clone(), - delete_marker: oi.delete_marker, - version_purge_status_internal: oi.version_purge_status_internal.clone(), - version_purge_status: oi.version_purge_status.clone(), - replication_state: rstate, - op_type: 1, - dsc, - existing_obj_resync: Default::default(), - target_statuses: tgt_statuses, - target_purge_statuses: purge_statuses, - replication_timestamp: tm.unwrap_or_else(Utc::now), - //ssec: crypto::is_encrypted(&oi.user_defined), - ssec: false, - user_tags: oi.user_tags.clone(), - checksum: oi.checksum.clone(), - event_type: "".to_string(), - retry_count: 0, - reset_id: "".to_string(), - target_arn: "".to_string(), - }; - - if result.ssec { - result.checksum = oi.checksum.clone(); - } - - warn!( - "Replication heal for object {} in bucket {} is configured {:?}", - oi.name, oi.bucket, oi.version_id - ); - - result -} - -#[derive(Debug, Clone)] -pub struct MustReplicateOptions { - pub meta: HashMap, - pub status: ReplicationStatusType, - pub op_type: ReplicationType, - pub replication_request: bool, // Incoming request is a replication request -} - -impl MustReplicateOptions { - /// Get the replication status from metadata, if available. - pub fn replication_status(&self) -> ReplicationStatusType { - if let Some(rs) = self.meta.get("x-amz-bucket-replication-status") { - return match rs.as_str() { - "Pending" => ReplicationStatusType::Pending, - "Completed" => ReplicationStatusType::Completed, - "CompletedLegacy" => ReplicationStatusType::CompletedLegacy, - "Failed" => ReplicationStatusType::Failed, - "Replica" => ReplicationStatusType::Replica, - _ => ReplicationStatusType::Unknown, - }; - } - self.status.clone() - } - - /// Check if the operation type is existing object replication. - pub fn is_existing_object_replication(&self) -> bool { - self.op_type == ReplicationType::ExistingObjectReplicationType - } - - /// Check if the operation type is metadata replication. - pub fn is_metadata_replication(&self) -> bool { - self.op_type == ReplicationType::MetadataReplicationType - } -} - -use tokio::sync::mpsc; - -use crate::cmd::bucket_targets; - -// use super::bucket_targets::Client; -use super::bucket_targets::TargetClient; -//use crate::storage; - -// 模拟依赖的类型 -pub struct Context; // Used to replace Go's `context.Context` -#[derive(Default)] -pub struct ReplicationStats; - -#[derive(Default)] -pub struct ReplicationPoolOpts { - pub priority: String, - pub max_workers: usize, - pub max_l_workers: usize, -} - -//pub static GLOBAL_REPLICATION_POOL: OnceLock> = OnceLock::new(); - -pub static GLOBAL_REPLICATION_POOL: Lazy>> = Lazy::new(|| { - RwLock::new(None) // 允许延迟初始化 -}); - -impl ReplicationPool { - pub async fn init_bucket_replication_pool( - obj_layer: Arc, - opts: ReplicationPoolOpts, - stats: Arc, - ) { - let mut workers = 0; - let mut failed_workers = 0; - let mut priority = "auto".to_string(); - let mut max_workers = WORKER_MAX_LIMIT; - warn!("init_bucket_replication_pool {} {} {} {}", workers, failed_workers, priority, max_workers); - - let (sender, receiver) = mpsc::channel::>(10); - - // Self { - // mrf_replica_ch_sender: sender, - // } - - if !opts.priority.is_empty() { - priority = opts.priority.clone(); - } - if opts.max_workers > 0 { - max_workers = opts.max_workers; - } - - match priority.as_str() { - "fast" => { - workers = WORKER_MAX_LIMIT; - failed_workers = MRF_WORKER_MAX_LIMIT; - } - "slow" => { - workers = WORKER_MIN_LIMIT; - failed_workers = MRF_WORKER_MIN_LIMIT; - } - _ => { - workers = WORKER_AUTO_DEFAULT; - failed_workers = MRF_WORKER_AUTO_DEFAULT; - } - } - - if max_workers > 0 && workers > max_workers { - workers = max_workers; - } - if max_workers > 0 && failed_workers > max_workers { - failed_workers = max_workers; - } - - let max_l_workers = if opts.max_l_workers > 0 { - opts.max_l_workers - } else { - LARGE_WORKER_COUNT - }; - - // 初始化通道 - let (mrf_replica_tx, _) = mpsc::channel::(100_000); - let (mrf_worker_kill_tx, _) = mpsc::channel::(failed_workers); - let (mrf_save_tx, _) = mpsc::channel::(100_000); - let (mrf_stop_tx, _) = mpsc::channel::(1); - - let mut pool = Self { - workers_sender: Vec::with_capacity(workers), - workers_recever: Vec::with_capacity(workers), - lrg_workers_sender: Vec::with_capacity(max_l_workers), - lrg_workers_receiver: Vec::with_capacity(max_l_workers), - active_workers: Arc::new(AtomicI32::new(0)), - active_lrg_workers: Arc::new(AtomicI32::new(0)), - active_mrf_workers: Arc::new(AtomicI32::new(0)), - max_lworkers: max_l_workers, - //mrf_worker_kill_ch: None, - mrf_replica_ch_sender: sender, - mrf_replica_ch_receiver: receiver, - mrf_worker_size: workers, - priority, - max_workers, - obj_layer, - }; - - warn!("work size is: {}", workers); - pool.resize_lrg_workers(max_l_workers, Some(0)).await; - pool.resize_workers(workers, Some(0)).await; - pool.resize_failed_workers(failed_workers).await; - let obj_layer_clone = pool.obj_layer.clone(); - - // 启动后台任务 - let resyncer = Arc::new(RwLock::new(ReplicationResyncer::new())); - let x = Arc::new(RwLock::new(&pool)); - // tokio::spawn(async move { - // resyncer.lock().await.persist_to_disk(ctx_clone, obj_layer_clone).await; - // }); - - tokio::spawn(async move { - //pool4.process_mrf().await - }); - let pool5 = Arc::clone(&x); - tokio::spawn(async move { - //pool5.persist_mrf().await - }); - - let mut global_pool = GLOBAL_REPLICATION_POOL.write().await; - global_pool.replace(pool); - } - - pub async fn resize_lrg_workers(&mut self, n: usize, check_old: Option) { - //let mut lrg_workers = self.lrg_workers.lock().unwrap(); - if (check_old.is_some() && self.lrg_workers_sender.len() != check_old.unwrap()) - || n == self.lrg_workers_sender.len() - || n < 1 - { - // Either already satisfied or worker count changed while waiting for the lock. - return; - } - debug!("Resizing large workers pool"); - - let active_workers = Arc::clone(&self.active_lrg_workers); - let obj_layer = Arc::clone(&self.obj_layer); - let mut lrg_workers_sender = std::mem::take(&mut self.lrg_workers_sender); - - while lrg_workers_sender.len() < n { - let (sender, mut receiver) = mpsc::channel::>(100); - lrg_workers_sender.push(sender); - - let active_workers_clone = Arc::clone(&active_workers); - let obj_layer_clone = Arc::clone(&obj_layer); - - tokio::spawn(async move { - while let Some(operation) = receiver.recv().await { - debug!("Processing replication operation in worker"); - active_workers_clone.fetch_add(1, Ordering::SeqCst); - - if let Some(info) = operation.as_any().downcast_ref::() { - replicate_object(info.clone(), obj_layer_clone.clone()).await; - } else if let Some(info) = operation.as_any().downcast_ref::() { - replicate_delete(&info.clone(), obj_layer_clone.clone()).await; - } else { - eprintln!("Unknown replication type"); - } - - active_workers_clone.fetch_sub(1, Ordering::SeqCst); - } - }); - } - - // Add new workers if needed - // Remove excess workers if needed - while lrg_workers_sender.len() > n { - lrg_workers_sender.pop(); // Dropping the sender will close the channel - } - - self.lrg_workers_sender = lrg_workers_sender; - } - - pub async fn resize_workers(&mut self, n: usize, check_old: Option) { - debug!("resize worker"); - //let mut lrg_workers = self.lrg_workers.lock().unwrap(); - if (check_old.is_some() && self.workers_sender.len() != check_old.unwrap()) || n == self.workers_sender.len() || n < 1 { - // Either already satisfied or worker count changed while waiting for the lock. - return; - } - debug!("resize worker"); - // Add new workers if needed - let active_workers_clone = Arc::clone(&self.active_workers); - let mut vsender = std::mem::take(&mut self.workers_sender); - //let mut works_sender = std::mem::take(&mut self.workers_sender); - let layer = Arc::clone(&self.obj_layer); - while vsender.len() < n { - debug!("resize workers"); - let (sender, mut receiver) = mpsc::channel::>(100); - vsender.push(sender); - - let active_workers_clone = Arc::clone(&active_workers_clone); - // Spawn a new workero - let layer_clone = Arc::clone(&layer); - tokio::spawn(async move { - while let Some(operation) = receiver.recv().await { - // Simulate work being processed - active_workers_clone.fetch_add(1, Ordering::SeqCst); - - if let Some(info) = operation.as_any().downcast_ref::() { - //self.stats.inc_q(&info.bucket, info.size, info.delete_marker, &info.op_type); - let _layer = Arc::clone(&layer_clone); - replicate_object(info.clone(), _layer).await; - //self.stats.dec_q(&info.bucket, info.size, info.delete_marker, &info.op_type); - } else if let Some(info) = operation.as_any().downcast_ref::() { - let _layer = Arc::clone(&layer_clone); - replicate_delete(&info.clone(), _layer).await; - } else { - eprintln!("Unknown replication type"); - } - - active_workers_clone.fetch_sub(1, Ordering::SeqCst); - } - }); - } - // Remove excess workers if needed - while vsender.len() > n { - vsender.pop(); // Dropping the sender will close the channel - } - self.workers_sender = vsender; - // warn!("self sender size is {:?}", self.workers_sender.len()); - // warn!("self sender size is {:?}", self.workers_sender.len()); - } - - async fn resize_failed_workers(&self, _count: usize) { - // 实现失败 worker 的初始化逻辑 - } - - // async fn process_mrf(&self) { - // // 实现 MRF 处理逻辑 - // } - - // async fn persist_mrf(&self) { - // // 实现 MRF 持久化逻辑 - // } - - fn get_worker_ch(&self, bucket: &str, object: &str, _sz: i64) -> Option<&Sender>> { - let h = xxh3_64(format!("{bucket}{object}").as_bytes()); // 计算哈希值 - - // need lock; - let workers = &self.workers_sender; // 读锁 - - if workers.is_empty() { - warn!("workers is empty"); - return None; - } - - let index = (h as usize) % workers.len(); // 选择 worker - Some(&workers[index]) // 返回对应的 Sender - } - - async fn queue_replica_task(&mut self, ri: ReplicateObjectInfo) { - if ri.size >= MIN_LARGE_OBJSIZE as i64 { - let h = xxh3_64(format!("{}{}", ri.bucket, ri.name).as_bytes()); - let workers = &self.lrg_workers_sender; - let worker_count = workers.len(); - - if worker_count > 0 { - let worker_index = (h as usize) % worker_count; - let sender = &workers[worker_index]; - - match sender.try_send(Box::new(ri.clone())) { - Ok(_) => return, - Err(_) => { - // 任务队列满了,执行 MRF 处理 - //println!("Queue full, saving to MRF: {}", ri.to_mrf_entry()); - println!("Queue full, saving to MRF"); - } - } - } - - // 检查是否需要增加 worker - let existing = worker_count; - let max_workers = self.max_lworkers.min(LARGE_WORKER_COUNT); - - if self.active_lrg_workers.load(Ordering::SeqCst) < max_workers as i32 { - let new_worker_count = (existing + 1).min(max_workers); - self.resize_lrg_workers(new_worker_count, Some(existing)).await; - } - return; - } - let mut ch: Option<&Sender>> = None; - let mut heal_ch: Option<&Sender>> = None; - warn!("enqueue object:{}", ch.is_none()); - - if ri.op_type == ReplicationType::HealReplicationType as i32 - || ri.op_type == ReplicationType::ExistingObjectReplicationType as i32 - { - ch = Some(&self.mrf_replica_ch_sender); - heal_ch = self.get_worker_ch(&ri.name, &ri.bucket, ri.size); - } else { - info!("get worker channel for replication"); - ch = self.get_worker_ch(&ri.name, &ri.bucket, ri.size); - } - - if ch.is_none() && heal_ch.is_none() { - error!("replicste chan empty"); - return; - } - - let mut sent = false; - tokio::select! { - //_ = self.ctx_done.closed() => {}, - Some(h) = async { heal_ch } => { - //if let Some(h) = h { - if h.send(Box::new(ri.clone())).await.is_ok() { - warn!("enqueue object"); - sent = true; - } - //} - } - Some(c) = async { ch } => { - //if let Some(c) = c { - if c.send(Box::new(ri.clone())).await.is_ok() { - info!("enqueue object"); - sent = true; - } - //} - } - } - - if !sent { - //todo! - //self.queue_mrf_save(ri).await; - let max_workers = self.max_workers; - - match self.priority.as_str() { - "fast" => { - println!("Warning: Unable to keep up with incoming traffic"); - } - "slow" => { - println!("Warning: Incoming traffic is too high. Increase replication priority."); - } - _ => { - let worker_count = self.active_workers.load(Ordering::SeqCst); - let max_workers = max_workers.min(WORKER_MAX_LIMIT); - if worker_count < max_workers as i32 { - //self.resize_workers((worker_count + 1 as usize).try_into().unwrap(), worker_count).await; - self.resize_workers(worker_count as usize + 1_usize, Some(worker_count as usize)) - .await; - } - - //let max_mrf_workers = max_workers.min(MRFWorkerMaxLimit); - let max_mrf_workers = max_workers.min(MRF_WORKER_MAX_LIMIT); - if self.mrf_worker_size < max_mrf_workers { - self.resize_failed_workers(self.mrf_worker_size + 1).await; - } - } - } - } - } -} - -pub struct ReplicationResyncer; - -impl Default for ReplicationResyncer { - fn default() -> Self { - Self - } -} - -impl ReplicationResyncer { - pub fn new() -> Self { - Self - } - - pub async fn persist_to_disk(&self, _ctx: Arc, _obj_layer: Arc) { - // 实现持久化到磁盘的逻辑 - } -} - -pub async fn init_bucket_replication_pool() { - if let Some(store) = new_object_layer_fn() { - let opts = ReplicationPoolOpts::default(); - let stats = ReplicationStats; - let stat = Arc::new(stats); - warn!("init bucket replication pool"); - ReplicationPool::init_bucket_replication_pool(store, opts, stat).await; - } else { - // TODO: to be added - } -} - -pub struct ReplicationClient { - pub s3cli: S3Client, - pub remote_peer_client: RemotePeerS3Client, - pub arn: String, -} - -pub trait RemotePeerS3ClientExt { - fn putobject(remote_bucket: String, remote_object: String, size: i64); - fn multipart(); -} - -impl RemotePeerS3ClientExt for RemotePeerS3Client { - fn putobject(remote_bucket: String, remote_object: String, size: i64) {} - - fn multipart() {} -} - -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum ReplicationStatusType { - #[default] - Pending, - Completed, - CompletedLegacy, - Failed, - Replica, - Unknown, -} - -impl ReplicationStatusType { - // Converts the enum variant to its string representation - pub fn as_str(&self) -> &'static str { - match self { - ReplicationStatusType::Pending => "PENDING", - ReplicationStatusType::Completed => "COMPLETED", - ReplicationStatusType::CompletedLegacy => "COMPLETE", - ReplicationStatusType::Failed => "FAILED", - ReplicationStatusType::Replica => "REPLICA", - ReplicationStatusType::Unknown => "", - } - } - - // Checks if the status is empty (not set) - pub fn is_empty(&self) -> bool { - matches!(self, ReplicationStatusType::Pending) // Adjust logic if needed - } - - // Construct ReplicationStatusType enum from string - pub fn from(value: &str) -> Self { - match value.to_uppercase().as_str() { - "PENDING" => ReplicationStatusType::Pending, - "COMPLETED" => ReplicationStatusType::Completed, - "COMPLETE" => ReplicationStatusType::CompletedLegacy, - "FAILED" => ReplicationStatusType::Failed, - "REPLICA" => ReplicationStatusType::Replica, - other => ReplicationStatusType::Unknown, - } - } -} - -#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum VersionPurgeStatusType { - Pending, - Complete, - Failed, - Empty, - #[default] - Unknown, -} - -impl VersionPurgeStatusType { - // 检查是否是 Empty - pub fn is_empty(&self) -> bool { - matches!(self, VersionPurgeStatusType::Empty) - } - - // Check if it's Pending (both Pending and Failed are considered Pending status) - pub fn is_pending(&self) -> bool { - matches!(self, VersionPurgeStatusType::Pending | VersionPurgeStatusType::Failed) - } -} - -// Implement conversion from string (similar to Go's string comparison) -impl From<&str> for VersionPurgeStatusType { - fn from(value: &str) -> Self { - match value.to_uppercase().as_str() { - "PENDING" => VersionPurgeStatusType::Pending, - "COMPLETE" => VersionPurgeStatusType::Complete, - "FAILED" => VersionPurgeStatusType::Failed, - _ => VersionPurgeStatusType::Empty, - } - } -} - -impl fmt::Display for VersionPurgeStatusType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - VersionPurgeStatusType::Pending => "PENDING", - VersionPurgeStatusType::Complete => "COMPLETE", - VersionPurgeStatusType::Failed => "FAILED", - VersionPurgeStatusType::Empty => "", - VersionPurgeStatusType::Unknown => "UNKNOWN", - }; - write!(f, "{s}") - } -} - -pub fn get_composite_version_purge_status(status_map: &HashMap) -> VersionPurgeStatusType { - if status_map.is_empty() { - return VersionPurgeStatusType::Unknown; - } - - let mut completed_count = 0; - - for status in status_map.values() { - match status { - VersionPurgeStatusType::Failed => return VersionPurgeStatusType::Failed, - VersionPurgeStatusType::Complete => completed_count += 1, - _ => {} - } - } - - if completed_count == status_map.len() { - VersionPurgeStatusType::Complete - } else { - VersionPurgeStatusType::Pending - } -} - -// 定义 ReplicationAction 枚举 -#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)] -pub enum ReplicationAction { - ReplicateMetadata, - #[default] - ReplicateNone, - ReplicateAll, -} - -impl FromStr for ReplicationAction { - // 工厂方法,根据字符串生成对应的枚举 - type Err = (); - fn from_str(action: &str) -> Result { - match action.to_lowercase().as_str() { - "metadata" => Ok(ReplicationAction::ReplicateMetadata), - "none" => Ok(ReplicationAction::ReplicateNone), - "all" => Ok(ReplicationAction::ReplicateAll), - _ => Err(()), - } - } -} - -// 定义 ObjectInfo 结构体 -// #[derive(Debug)] -// pub struct ObjectInfo { -// pub e_tag: String, -// pub version_id: String, -// pub actual_size: i64, -// pub mod_time: DateTime, -// pub delete_marker: bool, -// pub content_type: String, -// pub content_encoding: String, -// pub user_tags: HashMap, -// pub user_defined: HashMap, -// } - -// impl ObjectInfo { -// // 获取实际大小 -// pub fn get_actual_size(&self) -> i64 { -// self.actual_size -// } -// } - -// 忽略大小写比较字符串列表 -// fn equals(k1: &str, keys: &[&str]) -> bool { -// keys.iter().any(|&k2| k1.eq_ignore_ascii_case(k2)) -// } - -// 比较两个对象的 ReplicationAction -pub fn get_replication_action(oi1: &ObjectInfo, oi2: &ObjectInfo, op_type: &str) -> ReplicationAction { - let _null_version_id = "null"; - - // 如果是现有对象复制,判断是否需要跳过同步 - if op_type == "existing" && oi1.mod_time > oi2.mod_time && oi1.version_id.is_none() { - return ReplicationAction::ReplicateNone; - } - - let sz = oi1.get_actual_size(); - - // 完整复制的条件 - if oi1.etag != oi2.etag - || oi1.version_id != oi2.version_id - || sz.unwrap() != oi2.size - || oi1.delete_marker != oi2.delete_marker - || oi1.mod_time != oi2.mod_time - { - return ReplicationAction::ReplicateAll; - } - - // 元数据复制的条件 - if oi1.content_type != oi2.content_type { - return ReplicationAction::ReplicateMetadata; - } - - // if oi1.content_encoding.is_some() { - // if let Some(enc) = oi2 - // .metadata - // .get("content-encoding") - // .or_else(|| oi2.metadata.get("content-encoding".to_lowercase().as_str())) - // { - // if enc.join(",") != oi1.content_encoding { - // return ReplicationAction::ReplicateMetadata; - // } - // } else { - // return ReplicationAction::ReplicateMetadata; - // } - // } - - // if !oi2.user_tags.is_empty() && oi1.user_tags != oi2.user_tags { - // return ReplicationAction::ReplicateMetadata; - // } - - // 需要比较的头部前缀列表 - // let compare_keys = vec![ - // "expires", - // "cache-control", - // "content-language", - // "content-disposition", - // "x-amz-object-lock-mode", - // "x-amz-object-lock-retain-until-date", - // "x-amz-object-lock-legal-hold", - // "x-amz-website-redirect-location", - // "x-amz-meta-", - // ]; - - // 提取并比较必要的元数据 - // let compare_meta1: HashMap = oi1 - // .user_defined - // .iter() - // .filter(|(k, _)| compare_keys.iter().any(|prefix| k.to_lowercase().starts_with(prefix))) - // .map(|(k, v)| (k.to_lowercase(), v.clone())) - // .collect(); - - // let compare_meta2: HashMap = oi2 - // .metadata - // .iter() - // .filter(|(k, _)| compare_keys.iter().any(|prefix| k.to_lowercase().starts_with(prefix))) - // .map(|(k, v)| (k.to_lowercase(), v.join(","))) - // .collect(); - - // if compare_meta1 != compare_meta2 { - // return ReplicationAction::ReplicateMetadata; - // } - - ReplicationAction::ReplicateNone -} - -/// Target replication decision structure -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReplicateTargetDecision { - pub replicate: bool, // Whether to perform replication - pub synchronous: bool, // Whether it's synchronous replication - pub arn: String, // ARN of the replication target - pub id: String, // ID -} - -impl ReplicateTargetDecision { - /// 创建一个新的 ReplicateTargetDecision 实例 - pub fn new(arn: &str, replicate: bool, synchronous: bool) -> Self { - Self { - id: String::new(), - replicate, - synchronous, - arn: arn.to_string(), - } - } -} - -impl fmt::Display for ReplicateTargetDecision { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{};{};{};{}", self.replicate, self.synchronous, self.arn, self.id) - } -} - -/// 复制决策结构体,包含多个目标的决策 -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct ReplicateDecision { - targets_map: HashMap, -} - -impl ReplicateDecision { - /// 创建一个新的空的 ReplicateDecision - pub fn new() -> Self { - Self { - targets_map: HashMap::new(), - } - } - - /// 检查是否有任何目标需要复制 - pub fn replicate_any(&self) -> bool { - self.targets_map.values().any(|t| t.replicate) - } - - /// 检查是否有任何目标需要同步复制 - pub fn synchronous(&self) -> bool { - self.targets_map.values().any(|t| t.synchronous) - } - - /// 将目标的决策添加到 map 中 - pub fn set(&mut self, decision: ReplicateTargetDecision) { - self.targets_map.insert(decision.arn.clone(), decision); - } - - /// 返回所有目标的 Pending 状态字符串 - pub fn pending_status(&self) -> String { - let mut result = String::new(); - for target in self.targets_map.values() { - if target.replicate { - result.push_str(&format!("{}=PENDING;", target.arn)); - } - } - result - } -} - -impl fmt::Display for ReplicateDecision { - /// 将 ReplicateDecision 转换为字符串格式 - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut entries = Vec::new(); - for (key, value) in &self.targets_map { - entries.push(format!("{key}={value}")); - } - write!(f, "{}", entries.join(",")) - } -} - -/// ResyncTargetDecision 表示重同步决策 -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ResyncTargetDecision { - pub replicate: bool, - pub reset_id: String, - pub reset_before_date: DateTime, -} - -/// ResyncDecision 表示所有目标的重同步决策 -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct ResyncDecision { - targets: HashMap, -} - -impl ResyncDecision { - /// 创建一个新的 ResyncDecision - pub fn new() -> Self { - Self { targets: HashMap::new() } - } - - /// 检查是否没有任何目标需要重同步 - pub fn is_empty(&self) -> bool { - self.targets.is_empty() - } - - /// 检查是否有至少一个目标需要重同步 - pub fn must_resync(&self) -> bool { - self.targets.values().any(|v| v.replicate) - } - - /// 检查指定目标是否需要重同步 - pub fn must_resync_target(&self, tgt_arn: &str) -> bool { - if let Some(target) = self.targets.get(tgt_arn) { - target.replicate - } else { - false - } - } -} - -/// 解析字符串为 ReplicateDecision 结构 -pub fn parse_replicate_decision(input: &str) -> Result { - let mut decision = ReplicateDecision::new(); - if input.is_empty() { - return Ok(decision); - } - - for pair in input.split(',') { - if pair.is_empty() { - continue; - } - let parts: Vec<&str> = pair.split('=').collect(); - if parts.len() != 2 { - return Err("Invalid replicate decision format"); - } - - let key = parts[0]; - let value = parts[1].trim_matches('"'); - let values: Vec<&str> = value.split(';').collect(); - - if values.len() != 4 { - return Err("Invalid replicate target decision format"); - } - - let replicate = values[0] == "true"; - let synchronous = values[1] == "true"; - let arn = values[2].to_string(); - let id = values[3].to_string(); - - decision.set(ReplicateTargetDecision { - replicate, - synchronous, - arn, - id, - }); - } - Ok(decision) -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct ReplicatedTargetInfo { - pub arn: String, - pub size: i64, - pub duration: Duration, - pub replication_action: ReplicationAction, // Complete or metadata only - pub op_type: i32, // Transfer type - pub replication_status: ReplicationStatusType, // Current replication status - pub prev_replication_status: ReplicationStatusType, // Previous replication status - pub version_purge_status: VersionPurgeStatusType, // Version purge status - pub resync_timestamp: String, // Resync timestamp - pub replication_resynced: bool, // Whether resynced - pub endpoint: String, // Target endpoint - pub secure: bool, // Whether secure connection - pub err: Option, // Error information -} - -// 实现 ReplicatedTargetInfo 方法 -impl ReplicatedTargetInfo { - /// 检查 arn 是否为空 - pub fn is_empty(&self) -> bool { - self.arn.is_empty() - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct DeletedObjectReplicationInfo { - #[serde(flatten)] // Use `flatten` to expand `DeletedObject` fields into current struct - pub deleted_object: DeletedObject, - - pub bucket: String, - pub event_type: String, - pub op_type: ReplicationType, // Assume `replication.Type` is `ReplicationType` enum - pub reset_id: String, - pub target_arn: String, -} - -pub fn get_composite_replication_status(m: &HashMap) -> ReplicationStatusType { - if m.is_empty() { - return ReplicationStatusType::Unknown; - } - - let mut completed_count = 0; - - for status in m.values() { - match status { - ReplicationStatusType::Failed => return ReplicationStatusType::Failed, - ReplicationStatusType::Completed => completed_count += 1, - _ => {} - } - } - - if completed_count == m.len() { - return ReplicationStatusType::Completed; - } - - ReplicationStatusType::Pending -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct ReplicationState { - pub replica_timestamp: DateTime, - pub replica_status: ReplicationStatusType, - pub delete_marker: bool, - pub replication_timestamp: DateTime, - pub replication_status_internal: String, - pub version_purge_status_internal: String, - pub replicate_decision_str: String, - pub targets: HashMap, - pub purge_targets: HashMap, - pub reset_statuses_map: HashMap, -} - -// impl Default for ReplicationState { -// fn default() -> Self { -// ReplicationState { -// replica_timestamp: Utc::now(), -// replica_status: ReplicationStatusType::default(), -// delete_marker: false, -// replication_timestamp: Utc::now(), -// replication_status_internal: String::new(), -// version_purge_status_internal: String::new(), -// replicate_decision_str: String::new(), -// targets: HashMap::new(), -// purge_targets: HashMap::new(), -// reset_statuses_map: HashMap::new(), -// } -// } -// } - -pub struct ReplicationObjectOpts { - pub name: String, - pub user_tags: Option, - pub version_id: String, - pub delete_marker: bool, - pub ssec: bool, - pub op_type: ReplicationType, - pub replica: bool, - pub existing_object: bool, - pub target_arn: Option, -} - -pub trait ConfigProcess { - fn filter_actionable_rules(&self, obj: &ReplicationObjectOpts) -> Vec; - - fn replicate(&self, obj: &ReplicationObjectOpts) -> bool; - fn filter_target_arns(&self, obj: &ReplicationObjectOpts) -> Vec; -} - -impl ConfigProcess for s3s::dto::ReplicationConfiguration { - fn filter_target_arns(&self, obj: &ReplicationObjectOpts) -> Vec { - let mut arns = Vec::new(); - let mut tgts_map = HashSet::new(); - - let rules = self.filter_actionable_rules(obj); - debug!("rule len is {}", rules.len()); - for rule in rules { - debug!("rule"); - - if rule.status == ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED) { - debug!("rule"); - continue; - } - - if !self.role.is_empty() { - debug!("rule"); - arns.push(self.role.clone()); // use legacy RoleArn if present - return arns; - } - - debug!("rule"); - if !tgts_map.contains(&rule.destination.bucket) { - tgts_map.insert(rule.destination.bucket.clone()); - } - } - - for arn in tgts_map { - arns.push(arn); - } - arns - } - - fn replicate(&self, obj: &ReplicationObjectOpts) -> bool { - for rule in self.filter_actionable_rules(obj) { - if rule.status == ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED) { - warn!("need replicate failed"); - continue; - } - if obj.existing_object - && rule.existing_object_replication.is_some() - && rule.existing_object_replication.unwrap().status - == ExistingObjectReplicationStatus::from_static(ExistingObjectReplicationStatus::DISABLED) - { - warn!("need replicate failed"); - return false; - } - - if obj.op_type == ReplicationType::DeleteReplicationType { - return if !obj.version_id.is_empty() { - // 扩展:检查版本化删除 - if rule.delete_replication.is_none() { - warn!("need replicate failed"); - return false; - } - rule.delete_replication.unwrap().status - == DeleteReplicationStatus::from_static(DeleteReplicationStatus::DISABLED) - } else { - if rule.delete_marker_replication.is_none() { - warn!("need replicate failed"); - return false; - } - if rule.delete_marker_replication.as_ref().unwrap().status.clone().is_none() { - warn!("need replicate failed"); - return false; - } - rule.delete_marker_replication.as_ref().unwrap().status.clone().unwrap() - == DeleteMarkerReplicationStatus::from_static(DeleteMarkerReplicationStatus::DISABLED) - }; - } - // 处理常规对象/元数据复制 - if !obj.replica { - warn!("not need replicate {} {} ", obj.name, obj.version_id); - return true; - } - return obj.replica - && rule.source_selection_criteria.is_some() - && rule.source_selection_criteria.unwrap().replica_modifications.unwrap().status - == ReplicaModificationsStatus::from_static(ReplicaModificationsStatus::ENABLED); - } - warn!("need replicate failed"); - false - } - - fn filter_actionable_rules(&self, obj: &ReplicationObjectOpts) -> Vec { - if obj.name.is_empty() - && !matches!(obj.op_type, ReplicationType::ResyncReplicationType | ReplicationType::AllReplicationType) - { - warn!("filter"); - return vec![]; - } - - let mut rules: Vec = Vec::new(); - debug!("rule size is {}", &self.rules.len()); - - for rule in &self.rules { - if rule.status.as_str() == ReplicationRuleStatus::DISABLED { - debug!("rule size is"); - continue; - } - - if obj.target_arn.is_some() - && rule.destination.bucket != obj.target_arn.clone().unwrap() - && self.role != obj.target_arn.clone().unwrap() - { - debug!("rule size is"); - continue; - } - debug!("match {:?}", obj.op_type.clone()); - if matches!(obj.op_type, ReplicationType::ResyncReplicationType | ReplicationType::AllReplicationType) { - //println!("filter"); - rules.push(rule.clone()); - continue; - } - - if obj.existing_object { - if rule.existing_object_replication.is_none() { - continue; - } - - if rule.existing_object_replication.clone().unwrap().status.as_str() == ExistingObjectReplicationStatus::DISABLED - { - continue; - } - } - - if rule.prefix.is_some() && !obj.name.starts_with(rule.prefix.as_ref().unwrap()) { - continue; - } - - //if rule.filter.test_tags(&obj.user_tags) { - rules.push(rule.clone()); - //} - } - - rules.sort_by(|a, b| { - if a.priority == b.priority { - a.destination.bucket.to_string().cmp(&b.destination.bucket.to_string()) - } else { - b.priority.cmp(&a.priority) - } - }); - - rules - } -} - -fn replication_statuses_map(s: &str) -> HashMap { - let mut targets = HashMap::new(); - let repl_status_regex = Regex::new(r"(\w+):([\w-]+)").unwrap(); - - for cap in repl_status_regex.captures_iter(s) { - if let (Some(target), Some(status)) = (cap.get(1), cap.get(2)) { - let tp = ReplicationStatusType::from(status.as_str()); - targets.insert(target.as_str().to_string(), tp); - } - } - - targets -} - -fn version_purge_statuses_map(s: &str) -> HashMap { - let mut targets = HashMap::new(); - let repl_status_regex = Regex::new(r"(\w+):([\w-]+)").unwrap(); - - for cap in repl_status_regex.captures_iter(s) { - if let (Some(target), Some(status)) = (cap.get(1), cap.get(2)) { - let ptp = VersionPurgeStatusType::from(status.as_str()); - targets.insert(target.as_str().to_string(), ptp); - } - } - - targets -} - -pub trait TraitForObjectInfo { - fn replication_state(&self) -> ReplicationState; -} - -const RESERVED_METADATA_PREFIX: &str = "X-Rustfs-Internal-"; -const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-"; -lazy_static! { - static ref THROTTLE_DEADLINE: std::time::Duration = std::time::Duration::from_secs(3600); -} - -// Replication-related string constants -pub const REPLICATION_RESET: &str = "replication-reset"; -pub const REPLICATION_STATUS: &str = "replication-status"; -pub const REPLICATION_TIMESTAMP: &str = "replication-timestamp"; -pub const REPLICA_STATUS: &str = "replica-status"; -pub const REPLICA_TIMESTAMP: &str = "replica-timestamp"; -pub const TAGGING_TIMESTAMP: &str = "tagging-timestamp"; -pub const OBJECT_LOCK_RETENTION_TIMESTAMP: &str = "objectlock-retention-timestamp"; -pub const OBJECT_LOCK_LEGAL_HOLD_TIMESTAMP: &str = "objectlock-legalhold-timestamp"; -pub const REPLICATION_SSEC_CHECKSUM_HEADER: &str = "X-Rustfs-Replication-Ssec-Crc"; - -impl TraitForObjectInfo for ObjectInfo { - fn replication_state(&self) -> ReplicationState { - let mut rs = ReplicationState { - replication_status_internal: self.replication_status_internal.clone(), - //version_purge_status_internal: self.version_purge_status_internal.clone(), - version_purge_status_internal: "".to_string(), - replicate_decision_str: self.replication_status_internal.clone(), - targets: HashMap::new(), - purge_targets: HashMap::new(), - reset_statuses_map: HashMap::new(), - replica_timestamp: Utc::now(), - replica_status: ReplicationStatusType::Pending, - delete_marker: false, - replication_timestamp: Utc::now(), - }; - - // Set targets and purge_targets using respective functions - rs.targets = replication_statuses_map(&self.replication_status_internal); - //rs.purge_targets = version_purge_statuses_map(&self.version_purge_status_internal); - rs.purge_targets = version_purge_statuses_map(""); - - // Process reset statuses map - - for (k, v) in self.user_defined.iter() { - if k.starts_with(&(RESERVED_METADATA_PREFIX_LOWER.to_owned() + REPLICATION_RESET)) { - let arn = k.trim_start_matches(&(RESERVED_METADATA_PREFIX_LOWER.to_owned() + REPLICATION_RESET)); - rs.reset_statuses_map.insert(arn.to_string(), v.clone()); - } - } - - rs - } -} - -fn convert_offsetdatetime_to_chrono(offset_dt: Option) -> Option> { - //offset_dt.map(|odt| { - let tm = offset_dt.unwrap().unix_timestamp(); - //let naive = NaiveDateTime::from_timestamp_opt(tm, 0).expect("Invalid timestamp"); - DateTime::::from_timestamp(tm, 0) - //DateTime::from_naive_utc_and_offset(naive, Utc) // Convert to Utc first - //}) -} - -pub async fn schedule_replication(oi: ObjectInfo, o: Arc, dsc: ReplicateDecision, op_type: i32) { - let tgt_statuses = replication_statuses_map(&oi.replication_status_internal); - // //let purge_statuses = version_purge_statuses_map(&oi.); - let replication_timestamp = Utc::now(); // Placeholder for timestamp parsing - let replication_state = oi.replication_state(); - - let actual_size = oi.actual_size; - //let ssec = oi.user_defined.contains_key("ssec"); - let ssec = false; - - let ri = ReplicateObjectInfo { - name: oi.name, - size: oi.size, - bucket: oi.bucket, - version_id: oi - .version_id - .map(|uuid| uuid.to_string()) // 将 Uuid 转换为 String - .unwrap_or_default(), - etag: oi.etag.unwrap_or_default(), - mod_time: convert_offsetdatetime_to_chrono(oi.mod_time).unwrap(), - replication_status: oi.replication_status, - replication_status_internal: oi.replication_status_internal, - delete_marker: oi.delete_marker, - version_purge_status_internal: oi.version_purge_status_internal, - version_purge_status: oi.version_purge_status, - replication_state, - op_type, - dsc: dsc.clone(), - target_statuses: tgt_statuses, - target_purge_statuses: Default::default(), - replication_timestamp, - ssec, - user_tags: oi.user_tags, - checksum: if ssec { oi.checksum.clone() } else { Vec::new() }, - event_type: "".to_string(), - retry_count: 0, - reset_id: "".to_string(), - existing_obj_resync: Default::default(), - target_arn: "".to_string(), - actual_size: 0, - }; - - if dsc.synchronous() { - warn!("object sync replication"); - replicate_object(ri, o).await; - } else { - warn!("object need async replication"); - //GLOBAL_REPLICATION_POOL.lock().unwrap().queue_replica_task(ri); - let mut pool = GLOBAL_REPLICATION_POOL.write().await; - pool.as_mut().unwrap().queue_replica_task(ri).await; - } -} - -pub async fn must_replicate(bucket: &str, object: &str, mopts: &MustReplicateOptions) -> ReplicateDecision { - let mut decision = ReplicateDecision::default(); - - // object layer 未初始化时直接返回 - if new_object_layer_fn().is_none() { - return decision; - } - - // 检查是否允许复制(版本化前缀 - if !BucketVersioningSys::prefix_enabled(bucket, object).await { - return decision; - } - - let repl_status = mopts.replication_status(); - if repl_status == ReplicationStatusType::Replica && !mopts.is_metadata_replication() { - return decision; - } - - if mopts.replication_request { - return decision; - } - - let cfg = match get_replication_config(bucket).await { - Ok((config, timestamp)) => config, - //Ok(None) => return decision, - Err(err) => { - //repl_log_once_if(err, bucket); - return decision; - } - }; - - let mut opts = ReplicationObjectOpts { - name: object.to_string(), - //ssec: crypto::is_ssec_encrypted(&mopts.meta), - ssec: false, - replica: repl_status == ReplicationStatusType::Replica, - existing_object: mopts.is_existing_object_replication(), - user_tags: None, - target_arn: None, - version_id: "0".to_string(), - delete_marker: false, - op_type: mopts.op_type, - }; - - if let Some(tag_str) = mopts.meta.get("x-amz-object-tagging") { - opts.user_tags = Some(tag_str.clone()); - } - - // let rules = cfg.filter_actionable_rules(&opts); - let tgt_arns = cfg.filter_target_arns(&opts); - info!("arn lens:{}", tgt_arns.len()); - for tgt_arn in tgt_arns { - let tgt = bucket_targets::get_bucket_target_client(bucket, &tgt_arn.clone()).await; - //let tgt = GLOBAL_Bucket_Target_Sys.get().unwrap().get_remote_target_client(tgt) - - // 不判断在线状态,因为目标可能暂时不可用 - opts.target_arn = Some(tgt_arn.clone()); - let replicate = cfg.replicate(&opts); - info!("need replicate {}", &replicate); - - let synchronous = tgt.is_ok_and(|t| t.replicate_sync); - //decision.set(ReplicateTargetDecision::new(replicate,synchronous)); - info!("targe decision arn is:{}", tgt_arn.clone()); - decision.set(ReplicateTargetDecision { - replicate, - synchronous, - arn: tgt_arn.clone(), - id: 0.to_string(), - }); - } - info!("must replicate"); - decision -} - -impl ReplicationState { - // Equal 方法:判断两个状态是否相等 - pub fn equal(&self, other: &ReplicationState) -> bool { - self.replica_status == other.replica_status - && self.replication_status_internal == other.replication_status_internal - && self.version_purge_status_internal == other.version_purge_status_internal - } - - // CompositeReplicationStatus 方法:返回总体的复制状态 - pub fn composite_replication_status(&self) -> ReplicationStatusType { - if !self.replication_status_internal.is_empty() { - let status = ReplicationStatusType::from(self.replication_status_internal.as_str()); - match status { - ReplicationStatusType::Pending - | ReplicationStatusType::Completed - | ReplicationStatusType::Failed - | ReplicationStatusType::Replica => status, - _ => { - let repl_status = get_composite_replication_status(&self.targets); - if self.replica_timestamp == Utc::now() || self.replica_timestamp.timestamp() == 0 { - return repl_status; - } - if repl_status == ReplicationStatusType::Completed && self.replica_timestamp > self.replication_timestamp { - return self.replica_status.clone(); - } - repl_status - } - } - } else if !self.replica_status.is_empty() { - self.replica_status.clone() - } else { - ReplicationStatusType::Unknown - } - } - - // CompositeVersionPurgeStatus 方法:返回总体的版本清除状态 - pub fn composite_version_purge_status(&self) -> VersionPurgeStatusType { - let status = VersionPurgeStatusType::from(self.version_purge_status_internal.as_str()); - match status { - VersionPurgeStatusType::Pending | VersionPurgeStatusType::Complete | VersionPurgeStatusType::Failed => status, - _ => get_composite_version_purge_status(&self.purge_targets), - } - } - - // target_state 方法:返回目标状态 - pub fn target_state(&self, arn: &str) -> ReplicatedTargetInfo { - ReplicatedTargetInfo { - arn: arn.to_string(), - prev_replication_status: self.targets.get(arn).cloned().unwrap_or(ReplicationStatusType::Unknown), - version_purge_status: self - .purge_targets - .get(arn) - .cloned() - .unwrap_or(VersionPurgeStatusType::Unknown), - resync_timestamp: self.reset_statuses_map.get(arn).cloned().unwrap_or_default(), - size: 0, - replication_status: self.replica_status.clone(), - duration: Duration::zero(), - replication_action: ReplicationAction::ReplicateAll, - op_type: 0, - replication_resynced: false, - endpoint: "".to_string(), - secure: false, - err: None, - } - } -} - -lazy_static! { - static ref REPL_STATUS_REGEX: Regex = Regex::new(r"([^=].*?)=([^,].*?);").unwrap(); -} -pub trait ObjectInfoExt { - fn target_replication_status(&self, arn: String) -> ReplicationStatusType; - fn is_multipart(&self) -> bool; -} - -impl ObjectInfoExt for ObjectInfo { - fn target_replication_status(&self, arn: String) -> ReplicationStatusType { - let rep_stat_matches = REPL_STATUS_REGEX.captures_iter(&self.replication_status_internal); - for matched in rep_stat_matches { - if let Some(arn_match) = matched.get(1) { - if arn_match.as_str() == arn { - if let Some(status_match) = matched.get(2) { - return ReplicationStatusType::from(status_match.as_str()); - } - } - } - } - /* `ReplicationStatusType` value */ - ReplicationStatusType::Unknown - } - fn is_multipart(&self) -> bool { - match &self.etag { - Some(etgval) => etgval.len() != 32 && !etgval.is_empty(), - None => false, - } - } -} - -// Replication type enum (placeholder, as it's not clearly used in the Go code) -//#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReplicateObjectInfo { - pub name: String, - pub bucket: String, - pub version_id: String, - pub etag: String, - pub size: i64, - pub actual_size: i64, - pub mod_time: DateTime, - pub user_tags: String, - pub ssec: bool, - pub replication_status: ReplicationStatusType, - pub replication_status_internal: String, - pub version_purge_status_internal: String, - pub version_purge_status: VersionPurgeStatusType, - pub replication_state: ReplicationState, - pub delete_marker: bool, - - pub op_type: i32, - pub event_type: String, - pub retry_count: u32, - pub reset_id: String, - pub dsc: ReplicateDecision, - pub existing_obj_resync: ResyncDecision, - pub target_arn: String, - pub target_statuses: HashMap, - pub target_purge_statuses: HashMap, - pub replication_timestamp: DateTime, - pub checksum: Vec, -} -impl ReplicateObjectInfo { - pub fn to_object_info(&self) -> ObjectInfo { - ObjectInfo { - bucket: self.bucket.clone(), - name: self.name.clone(), - mod_time: Some( - OffsetDateTime::from_unix_timestamp(self.mod_time.timestamp()).unwrap_or_else(|_| OffsetDateTime::now_utc()), - ), - size: self.size, - actual_size: self.actual_size, - is_dir: false, - user_defined: HashMap::new(), // 可以按需从别处导入 - parity_blocks: 0, - data_blocks: 0, - version_id: Uuid::try_parse(&self.version_id).ok(), - delete_marker: self.delete_marker, - transitioned_object: TransitionedObject::default(), - restore_ongoing: false, - restore_expires: Some(OffsetDateTime::now_utc()), - user_tags: self.user_tags.clone(), - parts: Vec::new(), - is_latest: true, - content_type: None, - content_encoding: None, - num_versions: 0, - successor_mod_time: None, - put_object_reader: None, - etag: Some(self.etag.clone()), - inlined: false, - metadata_only: false, - version_only: false, - replication_status_internal: self.replication_status_internal.clone(), - replication_status: self.replication_status.clone(), - version_purge_status_internal: self.version_purge_status_internal.clone(), - version_purge_status: self.version_purge_status.clone(), - checksum: self.checksum.clone(), - } - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct DeletedObject { - #[serde(rename = "DeleteMarker")] - pub delete_marker: Option, // Go's `bool` converted to Rust's `Option` to support `omitempty` - - #[serde(rename = "DeleteMarkerVersionId")] - pub delete_marker_version_id: Option, // `omitempty` converted to `Option` - - #[serde(rename = "Key")] - pub object_name: Option, // Similarly use `Option` to include `omitempty` - - #[serde(rename = "VersionId")] - pub version_id: Option, // Same as above - - // The following fields do not appear in XML serialization, so no serde annotation needed - #[serde(skip)] - pub delete_marker_mtime: DateTime, // Custom type, needs definition or import - #[serde(skip)] - pub replication_state: ReplicationState, // Custom type, needs definition or import -} - -// 假设 `DeleteMarkerMTime` 和 `ReplicationState` 的定义如下: -#[derive(Debug, Default, Clone)] -pub struct DeleteMarkerMTime { - time: chrono::NaiveDate, - // 填写具体字段类型 -} - -impl ReplicationWorkerOperation for ReplicateObjectInfo { - fn to_mrf_entry(&self) -> MRFReplicateEntry { - MRFReplicateEntry { - bucket: self.bucket.clone(), - object: self.name.clone(), - version_id: self.version_id.clone(), // 直接使用计算后的 version_id - retry_count: 0, - sz: self.size, - } - } - fn as_any(&self) -> &dyn Any { - self - } -} - -impl ReplicationWorkerOperation for DeletedObjectReplicationInfo { - fn to_mrf_entry(&self) -> MRFReplicateEntry { - MRFReplicateEntry { - bucket: self.bucket.clone(), - object: self.deleted_object.object_name.clone().unwrap().clone(), - version_id: self.deleted_object.delete_marker_version_id.clone().unwrap_or_default(), - retry_count: 0, - sz: 0, - } - } - fn as_any(&self) -> &dyn Any { - self - } -} - -pub fn get_s3client_from_para(ak: &str, sk: &str, url: &str, _region: &str) -> Result> { - let credentials = Credentials::new(ak, sk, None, None, ""); - let region = Region::new("us-east-1".to_string()); - - let config = Config::builder() - .region(region) - .endpoint_url(url.to_string()) - .credentials_provider(credentials) - .behavior_version(BehaviorVersion::latest()) // Adjust as necessary - .build(); - Ok(S3Client::from_conf(config)) -} - -// use hyper::body::Body; -// use s3s::Body; - -async fn replicate_object_with_multipart( - rep_obj: &ReplicateObjectInfo, - local_obj_info: &ObjectInfo, - target_info: &ReplicatedTargetInfo, - tgt_cli: &TargetClient, -) -> Result<(), Error> { - let store = new_object_layer_fn().unwrap(); - let provider = StaticProvider::new(&tgt_cli.ak, &tgt_cli.sk, None); - let rustfs_cli = Minio::builder() - .endpoint(target_info.endpoint.clone()) - .provider(provider) - .secure(false) - .build() - .map_err(|e| Error::other(format!("build rustfs client failed: {e}")))?; - - let ret = rustfs_cli - .create_multipart_upload_with_versionid(tgt_cli.bucket.clone(), local_obj_info.name.clone(), rep_obj.version_id.clone()) - .await; - match ret { - Ok(task) => { - let parts_len = local_obj_info.parts.len(); - let mut part_results = vec![None; parts_len]; - let version_id = local_obj_info.version_id.expect("missing version_id"); - let task = Arc::new(task); // clone safe - let store = Arc::new(store); - let rustfs_cli = Arc::new(rustfs_cli); - - let mut upload_futures = FuturesUnordered::new(); - - for (index, _) in local_obj_info.parts.iter().enumerate() { - let store = Arc::clone(&store); - let rustfs_cli = Arc::clone(&rustfs_cli); - let task = Arc::clone(&task); - let bucket = local_obj_info.bucket.clone(); - let name = local_obj_info.name.clone(); - - upload_futures.push(tokio::spawn(async move { - let get_opts = ObjectOptions { - version_id: Some(version_id.to_string()), - versioned: true, - part_number: Some(index + 1), - version_suspended: false, - ..Default::default() - }; - - let h = HeaderMap::new(); - match store.get_object_reader(&bucket, &name, None, h, &get_opts).await { - Ok(mut reader) => match reader.read_all().await { - Ok(ret) => { - debug!("readall suc:"); - let body = Bytes::from(ret); - match rustfs_cli.upload_part(&task, index + 1, body).await { - Ok(part) => { - debug!("multipar upload suc:"); - Ok((index, part)) - } - Err(err) => { - error!("upload part {} failed: {}", index + 1, err); - Err(Error::other(format!("upload error: {err}"))) - } - } - } - Err(err) => { - error!("read error for part {}: {}", index + 1, err); - Err(err) - } - }, - Err(err) => { - error!("reader error for part {}: {}", index + 1, err); - Err(Error::other(format!("reader error: {err}"))) - } - } - })); - } - - while let Some(result) = upload_futures.next().await { - match result { - Ok(Ok((index, part))) => { - part_results[index] = Some(part); - } - Ok(Err(err)) => { - error!("upload part failed: {}", err); - return Err(err); - } - Err(join_err) => { - error!("tokio join error: {}", join_err); - return Err(Error::other(format!("join error: {join_err}"))); - } - } - } - - let parts: Vec<_> = part_results.into_iter().flatten().collect(); - - let ret = rustfs_cli.complete_multipart_upload(&task, parts, None).await; - match ret { - Ok(res) => { - warn!("finish upload suc:{:?} version_id={:?}", res, local_obj_info.version_id); - } - Err(err) => { - error!("finish upload failed:{}", err); - return Err(Error::other(format!("finish upload failed:{err}"))); - } - } - } - Err(err) => { - return Err(Error::other(format!("finish upload failed:{err}"))); - } - } - Ok(()) -} - -impl ReplicateObjectInfo { - fn target_replication_status(&self, arn: &str) -> ReplicationStatusType { - // 定义正则表达式,匹配类似 `arn;status` 格式 - let repl_status_regex = Regex::new(r"(\w+);(\w+)").expect("Invalid regex"); - - // 遍历正则表达式的匹配项 - for caps in repl_status_regex.captures_iter(&self.replication_status_internal) { - if let (Some(matched_arn), Some(matched_status)) = (caps.get(1), caps.get(2)) { - // 如果 ARN 匹配,返回对应的状态 - if matched_arn.as_str() == arn { - return ReplicationStatusType::from(matched_status.as_str()); - } - } - } - - // 如果没有匹配到,返回默认的 `Unknown` 状态 - ReplicationStatusType::Unknown - } - - async fn replicate_object(&self, target: &TargetClient, _arn: String) -> ReplicatedTargetInfo { - let _start_time = Utc::now(); - - // 初始化 ReplicatedTargetInfo - warn!("replicate is {}", _arn.clone()); - let mut rinfo = ReplicatedTargetInfo { - size: self.actual_size, - arn: _arn.clone(), - prev_replication_status: self.target_replication_status(&_arn.clone()), - replication_status: ReplicationStatusType::Failed, - op_type: self.op_type, - replication_action: ReplicationAction::ReplicateAll, - endpoint: target.endpoint.clone(), - secure: target.endpoint.clone().contains("https://"), - resync_timestamp: Utc::now().to_string(), - replication_resynced: false, - duration: Duration::default(), - err: None, - version_purge_status: VersionPurgeStatusType::Pending, - }; - - if self.target_replication_status(&_arn) == ReplicationStatusType::Completed - && !self.existing_obj_resync.is_empty() - && !self.existing_obj_resync.must_resync_target(&_arn) - { - warn!("replication return"); - rinfo.replication_status = ReplicationStatusType::Completed; - rinfo.replication_resynced = true; - return rinfo; - } - - // 模拟远程目标离线的检查 - // if self.is_target_offline(&target.endpoint) { - // rinfo.err = Some(format!( - // "Target is offline for bucket: {} arn: {} retry: {}", - // self.bucket, - // _arn.clone(), - // self.retry_count - // )); - // return rinfo; - // } - - // versioned := globalBucketVersioningSys.PrefixEnabled(bucket, object) - // versionSuspended := globalBucketVersioningSys.PrefixSuspended(bucket, object) - - // 模拟对象获取和元数据检查 - let opt = ObjectOptions { - version_id: Some(self.version_id.clone()), - versioned: true, - version_suspended: false, - ..Default::default() - }; - - let object_info = match self.get_object_info(opt).await { - Ok(info) => info, - Err(err) => { - error!("get object info err:{}", err); - rinfo.err = Some(err.to_string()); - return rinfo; - } - }; - - rinfo.prev_replication_status = object_info.target_replication_status(_arn); - - // 设置对象大小 - //rinfo.size = object_info.actual_size.unwrap_or(0); - rinfo.size = object_info.actual_size; - //rinfo.replication_action = object_info. - - rinfo.replication_status = ReplicationStatusType::Completed; - rinfo.size = object_info.get_actual_size().unwrap_or(0) as i64; - rinfo.replication_action = ReplicationAction::ReplicateAll; - - let store = new_object_layer_fn().unwrap(); - //todo!() put replicationopts; - if object_info.is_multipart() { - debug!("version is multi part"); - match replicate_object_with_multipart(self, &object_info, &rinfo, target).await { - Ok(_) => { - rinfo.replication_status = ReplicationStatusType::Completed; - println!("Object replicated successfully."); - } - Err(e) => { - rinfo.replication_status = ReplicationStatusType::Failed; - error!("Failed to replicate object: {:?}", e); - // 你可以根据错误类型进一步分类处理 - } - } - //replicate_object_with_multipart(local_obj_info, target_info, tgt_cli) - } else { - let get_opts = ObjectOptions { - version_id: Some(object_info.version_id.expect("REASON").to_string()), - versioned: true, - version_suspended: false, - ..Default::default() - }; - warn!("version id is:{:?}", get_opts.version_id); - let h = HeaderMap::new(); - let gr = store - .get_object_reader(&object_info.bucket, &object_info.name, None, h, &get_opts) - .await; - - match gr { - Ok(mut reader) => { - warn!("endpoint is: {}", rinfo.endpoint); - let provider = StaticProvider::new(&target.ak, &target.sk, None); - let res = reader.read_all().await; - match res { - Ok(ret) => { - let body = rustfs_rsc::Data::from(ret); - let rustfs_cli = Minio::builder() - .endpoint(rinfo.endpoint.clone()) - .provider(provider) - .secure(false) - .build() - .unwrap(); - - let ex = rustfs_cli.executor(Method::PUT); - let ret = ex - .bucket_name(target.bucket.clone()) - .object_name(self.name.clone()) - .body(body) - .query("versionId", get_opts.version_id.clone().unwrap()) - .send_ok() - .await; - match ret { - Ok(_res) => { - warn!("replicate suc: {} {} {}", self.bucket, self.name, self.version_id); - rinfo.replication_status = ReplicationStatusType::Completed; - } - Err(err) => { - error!("replicate {} err:{}", target.bucket.clone(), err); - rinfo.replication_status = ReplicationStatusType::Failed; - } - } - } - Err(err) => { - error!("read_all err {}", err); - rinfo.replication_status = ReplicationStatusType::Failed; - return rinfo; - } - } - } - Err(err) => { - rinfo.replication_status = ReplicationStatusType::Failed; - error!("get client error {}", err); - } - } - } - rinfo - } - - fn is_target_offline(&self, endpoint: &str) -> bool { - // 模拟检查目标是否离线 - warn!("Checking if target {} is offline", endpoint); - false - } - - async fn get_object_info(&self, opts: ObjectOptions) -> Result { - let objectlayer = new_object_layer_fn(); - //let opts = ecstore::store_api::ObjectOptions { max_parity: (), mod_time: (), part_number: (), delete_prefix: (), version_id: (), no_lock: (), versioned: (), version_suspended: (), skip_decommissioned: (), skip_rebalancing: (), data_movement: (), src_pool_idx: (), user_defined: (), preserve_etag: (), metadata_chg: (), replication_request: (), delete_marker: () } - objectlayer.unwrap().get_object_info(&self.bucket, &self.name, &opts).await - } - - fn perform_replication(&self, target: &RemotePeerS3Client, object_info: &ObjectInfo) -> Result<(), String> { - // 模拟复制操作 - // println!( - // "Replicating object {} to target {}", - // //object_info.name, target.arn - // ); - Ok(()) - } - - fn current_timestamp() -> String { - // 返回当前时间戳 - "2024-12-18T00:00:00Z".to_string() - } -} - -//pub fn getvalidrule(cfg: ReplicationConfiguration) -> Vec { -// let mut arns = Vec::new(); -// let mut tgts_map = std::collections::HashSet::new(); -// for rule in cfg.rules { -// if rule.status.as_str() == "Disable" { -// continue; -// } - -// if tgts_map.insert(rule.clone()) {} -// } -// arns -//} - -pub async fn replicate_delete(_ri: &DeletedObjectReplicationInfo, object_api: Arc) {} - -pub fn clone_mss(v: &HashMap) -> HashMap { - let mut r = HashMap::with_capacity(v.len()); - for (k, v) in v { - r.insert(k.clone(), v.clone()); - } - r -} - -pub fn get_must_replicate_options( - user_defined: &HashMap, - user_tags: &str, - status: ReplicationStatusType, // Assume `status` is string type - op: ReplicationType, // Assume `op` is string type - opts: &ObjectOptions, -) -> MustReplicateOptions { - let mut meta = clone_mss(user_defined); - - if !user_tags.is_empty() { - meta.insert("xhttp.AmzObjectTagging".to_string(), user_tags.to_string()); - } - - MustReplicateOptions { - meta, - status, - op_type: op, - replication_request: opts.replication_request, - } -} - -#[derive(Default)] -struct ReplicatedInfos { - //replication_time_stamp: DateTime, - targets: Vec, -} - -// #[derive(Clone, Copy, PartialEq)] -// enum ReplicationStatus { -// Completed, -// InProgress, -// Pending, -// } - -impl ReplicatedInfos { - pub fn action(&self) -> ReplicationAction { - for target in &self.targets { - if target.is_empty() { - continue; - } - if target.prev_replication_status != ReplicationStatusType::Completed { - return target.replication_action.clone(); - } - } - ReplicationAction::ReplicateNone - } - - // fn completed_size(&self) -> i64 { - // let mut sz = 0; - // for t in &self.targets { - // if t.empty() { - // continue; - // } - // if t.replication_status == ReplicationStatusType::Completed - // && t.prev_replication_status != ReplicationStatusType::Completed - // { - // sz += t.size; - // } - // } - // sz - // } - - pub fn replication_resynced(&self) -> bool { - // 只要存在一个非 empty 且 replication_resynced 为 true 的目标,就返回 true - self.targets.iter().any(|t| !t.is_empty() && t.replication_resynced) - } - - /// 对应 Go 的 ReplicationStatusInternal - pub fn replication_status_internal(&self) -> String { - let mut buf = String::new(); - for t in &self.targets { - if t.is_empty() { - continue; - } - // 类似 fmt.Fprintf(b, "%s=%s;", t.Arn, t.ReplicationStatus.String()) - buf.push_str(&format!("{}={};", t.arn, t.replication_status.as_str())); - } - buf - } - - pub fn replication_status(&self) -> ReplicationStatusType { - // 如果没有任何目标,返回 Unknown(对应 Go 里 StatusType("")) - if self.targets.is_empty() { - return ReplicationStatusType::Unknown; - } - - // 统计已完成的数量 - let mut completed = 0; - - for t in &self.targets { - match t.replication_status { - ReplicationStatusType::Failed => { - // 只要有一个失败,整体就是 Failed - return ReplicationStatusType::Failed; - } - ReplicationStatusType::Completed => { - completed += 1; - } - _ => {} - } - } - - // 全部完成,则 Completed,否则 Pending - if completed == self.targets.len() { - ReplicationStatusType::Completed - } else { - ReplicationStatusType::Pending - } - } -} - -impl ReplicatedTargetInfo { - fn empty(&self) -> bool { - // Implement your logic to check if the target is empty - self.size == 0 - } -} - -pub async fn replicate_object(ri: ReplicateObjectInfo, object_api: Arc) { - let bucket = ri.bucket.clone(); - let obj = ri.name.clone(); - match get_replication_config(&bucket).await { - Ok((cfg, timestamp)) => { - info!( - "replicate object: {} {} and arn is: {}", - ri.name.clone(), - timestamp, - ri.target_arn.clone() - ); - //let arns = getvalidrule(config); - - //TODO:nslock - - let objectlayer = new_object_layer_fn(); - - let opts = ReplicationObjectOpts { - name: ri.name.clone(), - //ssec: crypto::is_ssec_encrypted(&mopts.meta), - ssec: false, - //replica: repl_status == ReplicationStatusType::Replica, - replica: ri.replication_status == ReplicationStatusType::Replica, - existing_object: ri.existing_obj_resync.must_resync(), - user_tags: None, - target_arn: Some(ri.target_arn.clone()), - version_id: ri.version_id.clone(), - delete_marker: false, - op_type: ReplicationType::from_u8(ri.op_type as u8).expect("REASON"), - }; - - let tgt_arns = cfg.filter_target_arns(&opts); - info!("target len:{}", tgt_arns.len()); - - let rinfos = Arc::new(Mutex::new(ReplicatedInfos::default())); - let cri = Arc::new(ri.clone()); - let mut tasks: Vec> = vec![]; - - for tgt_arn in tgt_arns { - let tgt = bucket_targets::get_bucket_target_client(&ri.bucket, &tgt_arn).await; - - if tgt.is_err() { - // repl_log_once_if(ctx, format!("failed to get target for bucket: {} arn: {}", bucket, tgt_arn), &tgt_arn).await; - // send_event(event_args { - // event_name: "ObjectReplicationNotTracked".to_string(), - // bucket_name: bucket.to_string(), - // object: ri.to_object_info(), - // user_agent: "Internal: [Replication]".to_string(), - // host: global_local_node_name.to_string(), - // }).await; - continue; - } - - let tgt = tgt.unwrap(); - let rinfos_clone = Arc::clone(&rinfos); - let lcri = Arc::clone(&cri); - let task = task::spawn(async move { - warn!("async task"); - let mut tgt_info: ReplicatedTargetInfo = Default::default(); - if lcri.op_type as u8 == ReplicationType::ObjectReplicationType.as_u8() { - warn!("object replication and arn is {}", tgt.arn.clone()); - // all incoming calls go through optimized path.`o` - - tgt_info = lcri.replicate_object(&tgt, tgt.arn.clone()).await; - } else { - warn!("async task"); - // tgt_info = ri.replicate_all(object_api, &tgt).await; - } - - let mut rinfos_locked = rinfos_clone.lock().await; - rinfos_locked.targets.push(tgt_info); - }); - - tasks.push(task); - } - //futures::future::join_all(tasks); - futures::future::join_all(tasks).await; - - let mut rs = rinfos.lock().await; - let replication_status = rs.replication_status(); - //rinfos - let new_repl_status_internal = rs.replication_status_internal(); - // ri.to_object_info() 假设... - warn!("{} and {}", new_repl_status_internal, ri.replication_status_internal); - let obj_info = ri.to_object_info(); - if ri.replication_status_internal != new_repl_status_internal || rs.replication_resynced() { - warn!("save meta"); - let mut eval_metadata = HashMap::new(); - - eval_metadata.insert( - format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status"), - new_repl_status_internal.clone(), - ); - eval_metadata.insert( - format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp"), - Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Nanos, true), - ); - eval_metadata.insert("x-amz-bucket-replication-status".to_string(), replication_status.as_str().to_owned()); - - for rinfo in &rs.targets { - // if !rinfo.resync_timestamp.is_empty() { - // eval_metadata.insert( - // format!("x-rustfs-replication-reset-status-{}", rinfo.arn), - // rinfo.resync_timestamp.clone(), - // ); - // } - } - - if !ri.user_tags.is_empty() { - eval_metadata.insert("x-amz-tagging".to_string(), ri.user_tags.clone()); - } - - let popts = ObjectOptions { - //mod_time: Some(ri.mod_time), - mod_time: None, - version_id: Some(ri.version_id.clone()), - eval_metadata: Some(eval_metadata), - ..Default::default() - }; - - //let uobj_info = ; - match object_api.put_object_metadata(&ri.bucket, &ri.name, &popts).await { - Ok(info) => { - info!("Put metadata success: {:?}", info); - // 你可以访问 info 字段,例如 info.size, info.last_modified 等 - } - Err(e) => { - error!("Failed to put metadata: {}", e); - // 根据错误类型做不同处理 - // if let Some(CustomError::NotFound) = e.downcast_ref::() { ... } - } - } - - // if !uobj_info.name.is_empty() { - // obj_info = uobj_info; - // } - - let mut op_type = ReplicationType::MetadataReplicationType; - if rs.action() == ReplicationAction::ReplicateAll { - op_type = ReplicationType::ObjectReplicationType - } - - for rinfo in &mut rs.targets { - if rinfo.replication_status != rinfo.prev_replication_status { - //rinfo.op_type = Some(op_type.clone()); - //global_replication_stats::update(&bucket, rinfo); - } - } - debug!("op type: {:?}", op_type); - } - - // send_event(EventArgs { - // event_name: ri.event_name.clone(), - // bucket_name: bucket.into(), - // object: obj_info.clone(), - // user_agent: "Internal: [Replication]".into(), - // host: "local-node-name".into(), - // }); - - // 失败重试 - // if rs.replication_status() != ReplicationStatusType::Completed { - // //ri.op_type = "HealReplicationType".into(); - // ri.event_type = "ReplicateMRF".into(); - // //ri.replication_status_internal = rinfos.replication_status_internal(); - // ri.retry_count += 1; - // // global_replication_pool.get().queue_mrf_save(ri.to_mrf_entry()); - // } - } - Err(err) => { - println!("Failed to get replication config: {err:?}"); - } - } -} diff --git a/crates/ecstore/src/cmd/bucket_replication_utils.rs b/crates/ecstore/src/cmd/bucket_replication_utils.rs deleted file mode 100644 index fa76aa9f..00000000 --- a/crates/ecstore/src/cmd/bucket_replication_utils.rs +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use chrono::{DateTime, Utc}; - -// Representation of the replication status -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum StatusType { - Pending, - Completed, - CompletedLegacy, - Failed, - Replica, -} - -// Representation of version purge status type (customize as needed) -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum VersionPurgeStatusType { - Pending, - Completed, - Failed, -} - -// ReplicationState struct definition -#[derive(Debug, Clone)] -pub struct ReplicationState { - // Timestamp when the last replica update was received - pub replica_time_stamp: DateTime, - - // Replica status - pub replica_status: StatusType, - - // Represents DeleteMarker replication state - pub delete_marker: bool, - - // Timestamp when the last replication activity happened - pub replication_time_stamp: DateTime, - - // Stringified representation of all replication activity - pub replication_status_internal: String, - - // Stringified representation of all version purge statuses - // Example format: "arn1=PENDING;arn2=COMPLETED;" - pub version_purge_status_internal: String, - - // Stringified representation of replication decision for each target - pub replicate_decision_str: String, - - // Map of ARN -> replication status for ongoing replication activity - pub targets: HashMap, - - // Map of ARN -> VersionPurgeStatus for all the targets - pub purge_targets: HashMap, - - // Map of ARN -> stringified reset id and timestamp for all the targets - pub reset_statuses_map: HashMap, -} \ No newline at end of file diff --git a/crates/ecstore/src/cmd/bucket_targets.rs b/crates/ecstore/src/cmd/bucket_targets.rs deleted file mode 100644 index f51971e5..00000000 --- a/crates/ecstore/src/cmd/bucket_targets.rs +++ /dev/null @@ -1,890 +0,0 @@ -#![allow(unused_variables)] -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#![allow(dead_code)] -use crate::{ - StorageAPI, - bucket::{metadata_sys, target::BucketTarget}, - endpoints::Node, - rpc::{PeerS3Client, RemotePeerS3Client}, -}; -use crate::{ - bucket::{self, target::BucketTargets}, - new_object_layer_fn, store_api, -}; -//use tokio::sync::RwLock; -use aws_sdk_s3::Client as S3Client; -use chrono::Utc; -use lazy_static::lazy_static; -use std::sync::Arc; -use std::{ - collections::HashMap, - time::{Duration, SystemTime}, -}; -use thiserror::Error; -use tokio::sync::RwLock; - -pub struct TClient { - pub s3cli: S3Client, - pub remote_peer_client: RemotePeerS3Client, - pub arn: String, -} -impl TClient { - pub fn new(s3cli: S3Client, remote_peer_client: RemotePeerS3Client, arn: String) -> Self { - TClient { - s3cli, - remote_peer_client, - arn, - } - } -} - -pub struct EpHealth { - pub endpoint: String, - pub scheme: String, - pub online: bool, - pub last_online: SystemTime, - pub last_hc_at: SystemTime, - pub offline_duration: Duration, - pub latency: LatencyStat, // Assuming LatencyStat is a custom struct -} - -impl EpHealth { - pub fn new( - endpoint: String, - scheme: String, - online: bool, - last_online: SystemTime, - last_hc_at: SystemTime, - offline_duration: Duration, - latency: LatencyStat, - ) -> Self { - EpHealth { - endpoint, - scheme, - online, - last_online, - last_hc_at, - offline_duration, - latency, - } - } -} - -pub struct LatencyStat { - // Define the fields of LatencyStat as per your requirements -} - -pub struct ArnTarget { - client: TargetClient, - last_refresh: chrono::DateTime, -} -impl ArnTarget { - pub fn new(bucket: String, endpoint: String, ak: String, sk: String) -> Self { - Self { - client: TargetClient { - bucket, - storage_class: "STANDARD".to_string(), - disable_proxy: false, - health_check_duration: Duration::from_secs(100), - endpoint, - reset_id: "0".to_string(), - replicate_sync: false, - secure: false, - arn: "".to_string(), - client: reqwest::Client::new(), - ak, - sk, - }, - last_refresh: Utc::now(), - } - } -} - -// pub fn get_s3client_from_para( -// ak: &str, -// sk: &str, -// url: &str, -// _region: &str, -// ) -> Result> { -// let credentials = Credentials::new(ak, sk, None, None, ""); -// let region = Region::new("us-east-1".to_string()); - -// let config = Config::builder() -// .region(region) -// .endpoint_url(url.to_string()) -// .credentials_provider(credentials) -// .behavior_version(BehaviorVersion::latest()) // Adjust as necessary -// .build(); -// Ok(S3Client::from_conf(config)) -// } - -pub struct BucketTargetSys { - arn_remote_map: Arc>>, - targets_map: Arc>>>, - hc: HashMap, - //store:Option>, -} - -lazy_static! { - pub static ref GLOBAL_Bucket_Target_Sys: std::sync::OnceLock = BucketTargetSys::new().into(); -} - -//#[derive(Debug)] -// pub enum SetTargetError { -// NotFound, -// } - -pub async fn get_bucket_target_client(bucket: &str, arn: &str) -> Result { - if let Some(sys) = GLOBAL_Bucket_Target_Sys.get() { - sys.get_remote_target_client2(arn).await - } else { - Err(SetTargetError::TargetNotFound(bucket.to_string())) - } -} - -#[derive(Debug)] -pub struct BucketRemoteTargetNotFound { - pub bucket: String, -} - -pub async fn init_bucket_targets(bucket: &str, meta: Arc) { - println!("140 {bucket}"); - if let Some(sys) = GLOBAL_Bucket_Target_Sys.get() { - if let Some(tgts) = meta.bucket_target_config.clone() { - for tgt in tgts.targets { - warn!("ak and sk is:{:?}", tgt.credentials); - let _ = sys.set_target(bucket, &tgt, false, true).await; - //sys.targets_map. - } - } - } -} - -pub async fn remove_bucket_target(bucket: &str, arn_str: &str) { - if let Some(sys) = GLOBAL_Bucket_Target_Sys.get() { - let _ = sys.remove_target(bucket, arn_str).await; - } -} - -pub async fn list_bucket_targets(bucket: &str) -> Result { - if let Some(sys) = GLOBAL_Bucket_Target_Sys.get() { - sys.list_bucket_targets(bucket).await - } else { - Err(BucketRemoteTargetNotFound { - bucket: bucket.to_string(), - }) - } -} - -impl Default for BucketTargetSys { - fn default() -> Self { - Self::new() - } -} - -impl BucketTargetSys { - pub fn new() -> Self { - BucketTargetSys { - arn_remote_map: Arc::new(RwLock::new(HashMap::new())), - targets_map: Arc::new(RwLock::new(HashMap::new())), - hc: HashMap::new(), - } - } - - pub async fn list_bucket_targets(&self, bucket: &str) -> Result { - let targets_map = self.targets_map.read().await; - if let Some(targets) = targets_map.get(bucket) { - Ok(BucketTargets { - targets: targets.clone(), - }) - } else { - Err(BucketRemoteTargetNotFound { - bucket: bucket.to_string(), - }) - } - } - - pub async fn list_targets(&self, bucket: Option<&str>, _arn_type: Option<&str>) -> Vec { - let _ = _arn_type; - //let health_stats = self.health_stats(); - - let mut targets = Vec::new(); - - if let Some(bucket_name) = bucket { - if let Ok(ts) = self.list_bucket_targets(bucket_name).await { - for t in ts.targets { - //if arn_type.map_or(true, |arn| t.target_type == arn) { - //if let Some(hs) = health_stats.get(&t.url().host) { - // t.total_downtime = hs.offline_duration; - // t.online = hs.online; - // t.last_online = hs.last_online; - // t.latency = LatencyStat { - // curr: hs.latency.curr, - // avg: hs.latency.avg, - // max: hs.latency.peak, - // }; - //} - targets.push(t.clone()); - //} - } - } - return targets; - } - - // Locking and iterating over all targets in the system - let targets_map = self.targets_map.read().await; - for tgts in targets_map.values() { - for t in tgts { - //if arn_type.map_or(true, |arn| t.target_type == arn) { - // if let Some(hs) = health_stats.get(&t.url().host) { - // t.total_downtime = hs.offline_duration; - // t.online = hs.online; - // t.last_online = hs.last_online; - // t.latency = LatencyStat { - // curr: hs.latency.curr, - // avg: hs.latency.avg, - // max: hs.latency.peak, - // }; - // } - targets.push(t.clone()); - //} - } - } - - targets - } - - pub async fn remove_target(&self, bucket: &str, arn_str: &str) -> Result<(), SetTargetError> { - //to do need lock; - let mut targets_map = self.targets_map.write().await; - let tgts = targets_map.get(bucket); - let mut arn_remotes_map = self.arn_remote_map.write().await; - if tgts.is_none() { - //Err(SetTargetError::TargetNotFound(bucket.to_string())); - return Ok(()); - } - - let tgts = tgts.unwrap(); // 安全解引用 - let mut targets = Vec::with_capacity(tgts.len()); - let mut found = false; - - // 遍历 targets,找出不匹配的 ARN - for tgt in tgts { - if tgt.arn != Some(arn_str.to_string()) { - targets.push(tgt.clone()); // 克隆符合条件的项 - } else { - found = true; // 找到匹配的 ARN - } - } - - // 如果没有找到匹配的 ARN,则返回错误 - if !found { - return Ok(()); - } - - // 更新 targets_map - targets_map.insert(bucket.to_string(), targets); - arn_remotes_map.remove(arn_str); - - let targets = self.list_targets(Some(bucket), None).await; - println!("targets is {}", targets.len()); - match serde_json::to_vec(&targets) { - Ok(json) => { - let _ = metadata_sys::update(bucket, "bucket-targets.json", json).await; - } - Err(e) => { - println!("序列化失败{e}"); - } - } - - Ok(()) - } - - pub async fn get_remote_arn(&self, bucket: &str, target: Option<&BucketTarget>, depl_id: &str) -> (Option, bool) { - if target.is_none() { - return (None, false); - } - - let target = target.unwrap(); - - let targets_map = self.targets_map.read().await; - - // 获取锁以访问 arn_remote_map - let mut _arn_remotes_map = self.arn_remote_map.read().await; - if let Some(tgts) = targets_map.get(bucket) { - for tgt in tgts { - if tgt.type_ == target.type_ - && tgt.target_bucket == target.target_bucket - && tgt.endpoint == target.endpoint - && tgt.credentials.as_ref().unwrap().access_key == target.credentials.as_ref().unwrap().access_key - { - return (tgt.arn.clone(), true); - } - } - } - - // if !target.type_.is_valid() { - // return (None, false); - // } - - println!("generate_arn"); - - (Some(generate_arn(target.clone(), depl_id.to_string())), false) - } - - pub async fn get_remote_target_client2(&self, arn: &str) -> Result { - let map = self.arn_remote_map.read().await; - info!("get remote target client and arn is: {}", arn); - if let Some(value) = map.get(arn) { - let mut x = value.client.clone(); - x.arn = arn.to_string(); - Ok(x) - } else { - error!("not find target"); - Err(SetTargetError::TargetNotFound(arn.to_string())) - } - } - - // pub async fn get_remote_target_client(&self, _tgt: &BucketTarget) -> Result { - // // Mocked implementation for obtaining a remote client - // let tcli = TargetClient { - // bucket: _tgt.target_bucket.clone(), - // storage_class: "STANDARD".to_string(), - // disable_proxy: false, - // health_check_duration: Duration::from_secs(100), - // endpoint: _tgt.endpoint.clone(), - // reset_id: "0".to_string(), - // replicate_sync: false, - // secure: false, - // arn: "".to_string(), - // client: reqwest::Client::new(), - // ak: _tgt. - - // }; - // Ok(tcli) - // } - // pub async fn get_remote_target_client_with_bucket(&self, _bucket: String) -> Result { - // // Mocked implementation for obtaining a remote client - // let tcli = TargetClient { - // bucket: _tgt.target_bucket.clone(), - // storage_class: "STANDARD".to_string(), - // disable_proxy: false, - // health_check_duration: Duration::from_secs(100), - // endpoint: _tgt.endpoint.clone(), - // reset_id: "0".to_string(), - // replicate_sync: false, - // secure: false, - // arn: "".to_string(), - // client: reqwest::Client::new(), - // }; - // Ok(tcli) - // } - - async fn local_is_bucket_versioned(&self, _bucket: &str) -> bool { - let Some(store) = new_object_layer_fn() else { - return false; - }; - //store.get_bucket_info(bucket, opts) - - // let binfo:BucketInfo = store - // .get_bucket_info(bucket, &ecstore::store_api::BucketOptions::default()).await; - match store.get_bucket_info(_bucket, &store_api::BucketOptions::default()).await { - Ok(info) => { - println!("Bucket Info: {info:?}"); - info.versioning - } - Err(err) => { - eprintln!("Error: {err:?}"); - false - } - } - } - - async fn is_bucket_versioned(&self, _bucket: &str) -> bool { - true - // let url_str = "http://127.0.0.1:9001"; - - // // 转换为 Url 类型 - // let parsed_url = url::Url::parse(url_str).unwrap(); - - // let node = Node { - // url: parsed_url, - // pools: vec![], - // is_local: false, - // grid_host: "".to_string(), - // }; - // let cli = ecstore::peer::RemotePeerS3Client::new(Some(node), None); - - // match cli.get_bucket_info(_bucket, &ecstore::store_api::BucketOptions::default()).await - // { - // Ok(info) => { - // println!("Bucket Info: {:?}", info); - // info.versioning - // } - // Err(err) => { - // eprintln!("Error: {:?}", err); - // return false; - // } - // } - } - - pub async fn set_target(&self, bucket: &str, tgt: &BucketTarget, update: bool, fromdisk: bool) -> Result<(), SetTargetError> { - // if !tgt.type_.is_valid() && !update { - // return Err(SetTargetError::InvalidTargetType(bucket.to_string())); - // } - - //let client = self.get_remote_target_client(tgt).await?; - if tgt.type_ == Some("replication".to_string()) && !fromdisk { - let versioning_config = self.local_is_bucket_versioned(bucket).await; - if !versioning_config { - // println!("111111111"); - return Err(SetTargetError::TargetNotVersioned(bucket.to_string())); - } - } - - let url_str = format!("http://{}", tgt.endpoint.clone()); - - println!("url str is {url_str}"); - // 转换为 Url 类型 - let parsed_url = url::Url::parse(&url_str).unwrap(); - - let node = Node { - url: parsed_url, - pools: vec![], - is_local: false, - grid_host: "".to_string(), - }; - - let cli = RemotePeerS3Client::new(Some(node), None); - - match cli - .get_bucket_info(&tgt.target_bucket, &store_api::BucketOptions::default()) - .await - { - Ok(info) => { - println!("Bucket Info: {info:?}"); - if !info.versioning { - return Err(SetTargetError::TargetNotVersioned(tgt.target_bucket.to_string())); - } - } - Err(err) => { - println!("remote bucket 369 is:{}", tgt.target_bucket); - eprintln!("Error: {err:?}"); - return Err(SetTargetError::SourceNotVersioned(tgt.target_bucket.to_string())); - } - } - - //if tgt.target_type == BucketTargetType::ReplicationService { - // Check if target is a rustfs server and alive - // let hc_result = tokio::time::timeout(Duration::from_secs(3), client.health_check(&tgt.endpoint)).await; - // match hc_result { - // Ok(Ok(true)) => {} // Server is alive - // Ok(Ok(false)) | Ok(Err(_)) | Err(_) => { - // return Err(SetTargetError::HealthCheckFailed(tgt.target_bucket.clone())); - // } - // } - - //Lock and update target maps - let mut targets_map = self.targets_map.write().await; - let mut arn_remotes_map = self.arn_remote_map.write().await; - - let targets = targets_map.entry(bucket.to_string()).or_default(); - let mut found = false; - - for existing_target in targets.iter_mut() { - println!("418 exist:{}", existing_target.source_bucket.clone()); - if existing_target.type_ == tgt.type_ { - if existing_target.arn == tgt.arn { - if !update { - return Err(SetTargetError::TargetAlreadyExists(existing_target.target_bucket.clone())); - } - *existing_target = tgt.clone(); - found = true; - break; - } - - if existing_target.endpoint == tgt.endpoint { - println!("endpoint is same:{}", tgt.endpoint.clone()); - return Err(SetTargetError::TargetAlreadyExists(existing_target.target_bucket.clone())); - } - } - } - - if !found && !update { - println!("437 exist:{}", tgt.arn.clone().unwrap()); - targets.push(tgt.clone()); - } - let arntgt: ArnTarget = ArnTarget::new( - tgt.target_bucket.clone(), - tgt.endpoint.clone(), - tgt.credentials.clone().unwrap().access_key.clone(), - tgt.credentials.clone().unwrap().secret_key, - ); - - arn_remotes_map.insert(tgt.arn.clone().unwrap().clone(), arntgt); - //self.update_bandwidth_limit(bucket, &tgt.arn, tgt.bandwidth_limit).await; - - Ok(()) - } -} - -#[derive(Clone)] -pub struct TargetClient { - pub client: reqwest::Client, // Using reqwest HTTP client - pub health_check_duration: Duration, - pub bucket: String, // Remote bucket target - pub replicate_sync: bool, - pub storage_class: String, // Storage class on remote - pub disable_proxy: bool, - pub arn: String, // ARN to uniquely identify remote target - pub reset_id: String, - pub endpoint: String, - pub secure: bool, - pub ak: String, - pub sk: String, -} - -#[allow(clippy::too_many_arguments)] -impl TargetClient { - #[allow(clippy::too_many_arguments)] - pub fn new( - client: reqwest::Client, - health_check_duration: Duration, - bucket: String, - replicate_sync: bool, - storage_class: String, - disable_proxy: bool, - arn: String, - reset_id: String, - endpoint: String, - secure: bool, - ak: String, - sk: String, - ) -> Self { - TargetClient { - client, - health_check_duration, - bucket, - replicate_sync, - storage_class, - disable_proxy, - arn, - reset_id, - endpoint, - secure, - ak, - sk, - } - } - pub async fn bucket_exists(&self, _bucket: &str) -> Result { - Ok(true) // Mocked implementation - } -} -use tracing::{error, info, warn}; -use uuid::Uuid; - -#[derive(Debug, Clone)] -pub struct VersioningConfig { - pub enabled: bool, -} - -impl VersioningConfig { - pub fn is_enabled(&self) -> bool { - self.enabled - } -} - -#[derive(Debug)] -pub struct Client; - -impl Client { - pub async fn bucket_exists(&self, _bucket: &str) -> Result { - Ok(true) // Mocked implementation - } - - pub async fn get_bucket_versioning(&self, _bucket: &str) -> Result { - Ok(VersioningConfig { enabled: true }) - } - - pub async fn health_check(&self, _endpoint: &str) -> Result { - Ok(true) // Mocked health check - } -} - -#[derive(Debug, PartialEq)] -pub struct ServiceType(String); - -impl ServiceType { - pub fn is_valid(&self) -> bool { - !self.0.is_empty() // 根据需求添加具体的验证逻辑 - } -} - -#[derive(Debug, PartialEq)] -pub struct ARN { - pub arn_type: String, - pub id: String, - pub region: String, - pub bucket: String, -} - -impl ARN { - /// 检查 ARN 是否为空 - pub fn is_empty(&self) -> bool { - //!self.arn_type.is_valid() - false - } - - // 从字符串解析 ARN - pub fn parse(s: &str) -> Result { - // ARN 必须是格式 arn:rustfs:::: - if !s.starts_with("arn:rustfs:") { - return Err(format!("Invalid ARN {s}")); - } - - let tokens: Vec<&str> = s.split(':').collect(); - if tokens.len() != 6 || tokens[4].is_empty() || tokens[5].is_empty() { - return Err(format!("Invalid ARN {s}")); - } - - Ok(ARN { - arn_type: tokens[2].to_string(), - region: tokens[3].to_string(), - id: tokens[4].to_string(), - bucket: tokens[5].to_string(), - }) - } -} - -// 实现 `Display` trait,使得可以直接使用 `format!` 或 `{}` 输出 ARN -impl std::fmt::Display for ARN { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "arn:rustfs:{}:{}:{}:{}", self.arn_type, self.region, self.id, self.bucket) - } -} - -fn must_get_uuid() -> String { - Uuid::new_v4().to_string() - // match Uuid::new_v4() { - // Ok(uuid) => uuid.to_string(), - // Err(err) => { - // error!("Critical error: {}", err); - // panic!("Failed to generate UUID: {}", err); // Ensures similar behavior as Go's logger.CriticalIf - // } - // } -} -fn generate_arn(target: BucketTarget, depl_id: String) -> String { - let mut uuid: String = depl_id; - if uuid.is_empty() { - uuid = must_get_uuid(); - } - - let arn: ARN = ARN { - arn_type: target.type_.unwrap(), - id: (uuid), - region: "us-east-1".to_string(), - bucket: (target.target_bucket), - }; - arn.to_string() -} - -// use std::collections::HashMap; -// use std::sync::{Arc, Mutex, RwLock}; -// use std::time::Duration; -// use tokio::time::timeout; -// use tokio::sync::RwLock as AsyncRwLock; -// use serde::Deserialize; -// use thiserror::Error; - -// #[derive(Debug, Clone, PartialEq)] -// pub enum BucketTargetType { -// ReplicationService, -// // Add other service types as needed -// } - -// impl BucketTargetType { -// pub fn is_valid(&self) -> bool { -// matches!(self, BucketTargetType::ReplicationService) -// } -// } - -// #[derive(Debug, Clone)] -// pub struct BucketTarget { -// pub arn: String, -// pub target_bucket: String, -// pub endpoint: String, -// pub credentials: Credentials, -// pub secure: bool, -// pub bandwidth_limit: Option, -// pub target_type: BucketTargetType, -// } - -// #[derive(Debug, Clone)] -// pub struct Credentials { -// pub access_key: String, -// pub secret_key: String, -// } - -// #[derive(Debug)] -// pub struct BucketTargetSys { -// targets_map: Arc>>>, -// arn_remotes_map: Arc>>, -// } - -// impl BucketTargetSys { -// pub fn new() -> Self { -// Self { -// targets_map: Arc::new(RwLock::new(HashMap::new())), -// arn_remotes_map: Arc::new(Mutex::new(HashMap::new())), -// } -// } - -// pub async fn set_target( -// &self, -// bucket: &str, -// tgt: &BucketTarget, -// update: bool, -// ) -> Result<(), SetTargetError> { -// if !tgt.target_type.is_valid() && !update { -// return Err(SetTargetError::InvalidTargetType(bucket.to_string())); -// } - -// let client = self.get_remote_target_client(tgt).await?; - -// // Validate if target credentials are OK -// let exists = client.bucket_exists(&tgt.target_bucket).await?; -// if !exists { -// return Err(SetTargetError::TargetNotFound(tgt.target_bucket.clone())); -// } - -// if tgt.target_type == BucketTargetType::ReplicationService { -// if !self.is_bucket_versioned(bucket).await { -// return Err(SetTargetError::SourceNotVersioned(bucket.to_string())); -// } - -// let versioning_config = client.get_bucket_versioning(&tgt.target_bucket).await?; -// if !versioning_config.is_enabled() { -// return Err(SetTargetError::TargetNotVersioned(tgt.target_bucket.clone())); -// } -// } - -// // Check if target is a rustfs server and alive -// let hc_result = timeout(Duration::from_secs(3), client.health_check(&tgt.endpoint)).await; -// match hc_result { -// Ok(Ok(true)) => {} // Server is alive -// Ok(Ok(false)) | Ok(Err(_)) | Err(_) => { -// return Err(SetTargetError::HealthCheckFailed(tgt.target_bucket.clone())); -// } -// } - -// // Lock and update target maps -// let mut targets_map = self.targets_map.write().await; -// let mut arn_remotes_map = self.arn_remotes_map.lock().unwrap(); - -// let targets = targets_map.entry(bucket.to_string()).or_default(); -// let mut found = false; - -// for existing_target in targets.iter_mut() { -// if existing_target.target_type == tgt.target_type { -// if existing_target.arn == tgt.arn { -// if !update { -// return Err(SetTargetError::TargetAlreadyExists(existing_target.target_bucket.clone())); -// } -// *existing_target = tgt.clone(); -// found = true; -// break; -// } - -// if existing_target.endpoint == tgt.endpoint { -// return Err(SetTargetError::TargetAlreadyExists(existing_target.target_bucket.clone())); -// } -// } -// } - -// if !found && !update { -// targets.push(tgt.clone()); -// } - -// arn_remotes_map.insert(tgt.arn.clone(), ArnTarget { client }); -// self.update_bandwidth_limit(bucket, &tgt.arn, tgt.bandwidth_limit).await; - -// Ok(()) -// } - -// async fn get_remote_target_client(&self, tgt: &BucketTarget) -> Result { -// // Mocked implementation for obtaining a remote client -// Ok(Client {}) -// } - -// async fn is_bucket_versioned(&self, bucket: &str) -> bool { -// // Mocked implementation for checking if a bucket is versioned -// true -// } - -// async fn update_bandwidth_limit( -// &self, -// bucket: &str, -// arn: &str, -// limit: Option, -// ) { -// // Mocked implementation for updating bandwidth limits -// } -// } - -// #[derive(Debug)] -// pub struct Client; - -// impl Client { -// pub async fn bucket_exists(&self, _bucket: &str) -> Result { -// Ok(true) // Mocked implementation -// } - -// pub async fn get_bucket_versioning( -// &self, -// _bucket: &str, -// ) -> Result { -// Ok(VersioningConfig { enabled: true }) -// } - -// pub async fn health_check(&self, _endpoint: &str) -> Result { -// Ok(true) // Mocked health check -// } -// } - -// #[derive(Debug, Clone)] -// pub struct ArnTarget { -// pub client: Client, -// } - -#[derive(Debug, Error)] -pub enum SetTargetError { - #[error("Invalid target type for bucket {0}")] - InvalidTargetType(String), - - #[error("Target bucket {0} not found")] - TargetNotFound(String), - - #[error("Source bucket {0} is not versioned")] - SourceNotVersioned(String), - - #[error("Target bucket {0} is not versioned")] - TargetNotVersioned(String), - - #[error("Health check failed for bucket {0}")] - HealthCheckFailed(String), - - #[error("Target bucket {0} already exists")] - TargetAlreadyExists(String), -} diff --git a/crates/ecstore/src/cmd/bucketreplicationhandler.rs b/crates/ecstore/src/cmd/bucketreplicationhandler.rs deleted file mode 100644 index 8e170c05..00000000 --- a/crates/ecstore/src/cmd/bucketreplicationhandler.rs +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - diff --git a/crates/ecstore/src/cmd/mod.rs b/crates/ecstore/src/cmd/mod.rs deleted file mode 100644 index d58a08a9..00000000 --- a/crates/ecstore/src/cmd/mod.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub mod bucket_replication; -pub mod bucket_targets; diff --git a/crates/ecstore/src/data_usage/local_snapshot.rs b/crates/ecstore/src/data_usage/local_snapshot.rs index 7d81a140..fa232f73 100644 --- a/crates/ecstore/src/data_usage/local_snapshot.rs +++ b/crates/ecstore/src/data_usage/local_snapshot.rs @@ -88,7 +88,7 @@ impl LocalUsageSnapshot { /// Build the snapshot file name `.json`. pub fn snapshot_file_name(disk_id: &str) -> String { - format!("{}.json", disk_id) + format!("{disk_id}.json") } /// Build the object path relative to `RUSTFS_META_BUCKET`, e.g. `datausage/.json`. diff --git a/crates/ecstore/src/disk/local.rs b/crates/ecstore/src/disk/local.rs index cf556f31..44ffe160 100644 --- a/crates/ecstore/src/disk/local.rs +++ b/crates/ecstore/src/disk/local.rs @@ -2349,12 +2349,7 @@ impl DiskAPI for LocalDisk { self.delete_file(&volume_dir, &xl_path, true, false).await } #[tracing::instrument(level = "debug", skip(self))] - async fn delete_versions( - &self, - volume: &str, - versions: Vec, - _opts: DeleteOptions, - ) -> Result>> { + async fn delete_versions(&self, volume: &str, versions: Vec, _opts: DeleteOptions) -> Vec> { let mut errs = Vec::with_capacity(versions.len()); for _ in 0..versions.len() { errs.push(None); @@ -2368,7 +2363,7 @@ impl DiskAPI for LocalDisk { } } - Ok(errs) + errs } #[tracing::instrument(skip(self))] diff --git a/crates/ecstore/src/disk/mod.rs b/crates/ecstore/src/disk/mod.rs index 63b3e49f..1e8af91c 100644 --- a/crates/ecstore/src/disk/mod.rs +++ b/crates/ecstore/src/disk/mod.rs @@ -201,12 +201,7 @@ impl DiskAPI for Disk { } #[tracing::instrument(skip(self))] - async fn delete_versions( - &self, - volume: &str, - versions: Vec, - opts: DeleteOptions, - ) -> Result>> { + async fn delete_versions(&self, volume: &str, versions: Vec, opts: DeleteOptions) -> Vec> { match self { Disk::Local(local_disk) => local_disk.delete_versions(volume, versions, opts).await, Disk::Remote(remote_disk) => remote_disk.delete_versions(volume, versions, opts).await, @@ -448,12 +443,7 @@ pub trait DiskAPI: Debug + Send + Sync + 'static { force_del_marker: bool, opts: DeleteOptions, ) -> Result<()>; - async fn delete_versions( - &self, - volume: &str, - versions: Vec, - opts: DeleteOptions, - ) -> Result>>; + async fn delete_versions(&self, volume: &str, versions: Vec, opts: DeleteOptions) -> Vec>; async fn delete_paths(&self, volume: &str, paths: &[String]) -> Result<()>; async fn write_metadata(&self, org_volume: &str, volume: &str, path: &str, fi: FileInfo) -> Result<()>; async fn update_metadata(&self, volume: &str, path: &str, fi: FileInfo, opts: &UpdateMetadataOpts) -> Result<()>; diff --git a/crates/ecstore/src/lib.rs b/crates/ecstore/src/lib.rs index b99424f8..43558d39 100644 --- a/crates/ecstore/src/lib.rs +++ b/crates/ecstore/src/lib.rs @@ -21,7 +21,6 @@ pub mod bitrot; pub mod bucket; pub mod cache_value; mod chunk_stream; -pub mod cmd; pub mod compress; pub mod config; pub mod data_usage; diff --git a/crates/ecstore/src/pools.rs b/crates/ecstore/src/pools.rs index 7bfde413..a879ba8b 100644 --- a/crates/ecstore/src/pools.rs +++ b/crates/ecstore/src/pools.rs @@ -48,7 +48,7 @@ use std::path::PathBuf; use std::sync::Arc; use time::{Duration, OffsetDateTime}; use tokio::io::{AsyncReadExt, BufReader}; -use tokio::sync::broadcast::Receiver as B_Receiver; +use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; pub const POOL_META_NAME: &str = "pool.bin"; @@ -651,7 +651,7 @@ impl ECStore { } #[tracing::instrument(skip(self, rx))] - pub async fn decommission(&self, rx: B_Receiver, indices: Vec) -> Result<()> { + pub async fn decommission(&self, rx: CancellationToken, indices: Vec) -> Result<()> { warn!("decommission: {:?}", indices); if indices.is_empty() { return Err(Error::other("InvalidArgument")); @@ -663,13 +663,14 @@ impl ECStore { self.start_decommission(indices.clone()).await?; + let rx_clone = rx.clone(); tokio::spawn(async move { let Some(store) = new_object_layer_fn() else { error!("store not init"); return; }; for idx in indices.iter() { - store.do_decommission_in_routine(rx.resubscribe(), *idx).await; + store.do_decommission_in_routine(rx_clone.clone(), *idx).await; } }); @@ -891,7 +892,7 @@ impl ECStore { #[tracing::instrument(skip(self, rx))] async fn decommission_pool( self: &Arc, - rx: B_Receiver, + rx: CancellationToken, idx: usize, pool: Arc, bi: DecomBucketInfo, @@ -936,20 +937,20 @@ impl ECStore { }); let set = set.clone(); - let mut rx = rx.resubscribe(); + let rx_clone = rx.clone(); let bi = bi.clone(); let set_id = set_idx; let wk_clone = wk.clone(); tokio::spawn(async move { loop { - if rx.try_recv().is_ok() { + if rx_clone.is_cancelled() { warn!("decommission_pool: cancel {}", set_id); break; } warn!("decommission_pool: list_objects_to_decommission {} {}", set_id, &bi.name); match set - .list_objects_to_decommission(rx.resubscribe(), bi.clone(), decommission_entry.clone()) + .list_objects_to_decommission(rx_clone.clone(), bi.clone(), decommission_entry.clone()) .await { Ok(_) => { @@ -982,7 +983,7 @@ impl ECStore { } #[tracing::instrument(skip(self, rx))] - pub async fn do_decommission_in_routine(self: &Arc, rx: B_Receiver, idx: usize) { + pub async fn do_decommission_in_routine(self: &Arc, rx: CancellationToken, idx: usize) { if let Err(err) = self.decommission_in_background(rx, idx).await { error!("decom err {:?}", &err); if let Err(er) = self.decommission_failed(idx).await { @@ -1060,7 +1061,7 @@ impl ECStore { } #[tracing::instrument(skip(self, rx))] - async fn decommission_in_background(self: &Arc, rx: B_Receiver, idx: usize) -> Result<()> { + async fn decommission_in_background(self: &Arc, rx: CancellationToken, idx: usize) -> Result<()> { let pool = self.pools[idx].clone(); let pending = { @@ -1090,10 +1091,7 @@ impl ECStore { warn!("decommission: currently on bucket {}", &bucket.name); - if let Err(err) = self - .decommission_pool(rx.resubscribe(), idx, pool.clone(), bucket.clone()) - .await - { + if let Err(err) = self.decommission_pool(rx.clone(), idx, pool.clone(), bucket.clone()).await { error!("decommission: decommission_pool err {:?}", &err); return Err(err); } else { @@ -1329,7 +1327,7 @@ impl SetDisks { #[tracing::instrument(skip(self, rx, cb_func))] async fn list_objects_to_decommission( self: &Arc, - rx: B_Receiver, + rx: CancellationToken, bucket_info: DecomBucketInfo, cb_func: ListCallback, ) -> Result<()> { diff --git a/crates/ecstore/src/rebalance.rs b/crates/ecstore/src/rebalance.rs index 6800060a..40b53c59 100644 --- a/crates/ecstore/src/rebalance.rs +++ b/crates/ecstore/src/rebalance.rs @@ -34,8 +34,8 @@ use std::io::Cursor; use std::sync::Arc; use time::OffsetDateTime; use tokio::io::{AsyncReadExt, BufReader}; -use tokio::sync::broadcast::{self, Receiver as B_Receiver}; use tokio::time::{Duration, Instant}; +use tokio_util::sync::CancellationToken; use tracing::{error, info}; use uuid::Uuid; @@ -151,7 +151,7 @@ pub struct DiskStat { #[derive(Debug, Default, Serialize, Deserialize, Clone)] pub struct RebalanceMeta { #[serde(skip)] - pub cancel: Option>, // To be invoked on rebalance-stop + pub cancel: Option, // To be invoked on rebalance-stop #[serde(skip)] pub last_refreshed_at: Option, #[serde(rename = "stopTs")] @@ -493,8 +493,8 @@ impl ECStore { pub async fn stop_rebalance(self: &Arc) -> Result<()> { let rebalance_meta = self.rebalance_meta.read().await; if let Some(meta) = rebalance_meta.as_ref() { - if let Some(tx) = meta.cancel.as_ref() { - let _ = tx.send(true); + if let Some(cancel_tx) = meta.cancel.as_ref() { + cancel_tx.cancel(); } } @@ -506,13 +506,14 @@ impl ECStore { info!("start_rebalance: start rebalance"); // let rebalance_meta = self.rebalance_meta.read().await; - let (tx, rx) = broadcast::channel::(1); + let cancel_tx = CancellationToken::new(); + let rx = cancel_tx.clone(); { let mut rebalance_meta = self.rebalance_meta.write().await; if let Some(meta) = rebalance_meta.as_mut() { - meta.cancel = Some(tx) + meta.cancel = Some(cancel_tx) } else { info!("start_rebalance: rebalance_meta is None exit"); return; @@ -565,9 +566,9 @@ impl ECStore { let pool_idx = idx; let store = self.clone(); - let rx = rx.resubscribe(); + let rx_clone = rx.clone(); tokio::spawn(async move { - if let Err(err) = store.rebalance_buckets(rx, pool_idx).await { + if let Err(err) = store.rebalance_buckets(rx_clone, pool_idx).await { error!("Rebalance failed for pool {}: {}", pool_idx, err); } else { info!("Rebalance completed for pool {}", pool_idx); @@ -579,7 +580,7 @@ impl ECStore { } #[tracing::instrument(skip(self, rx))] - async fn rebalance_buckets(self: &Arc, mut rx: B_Receiver, pool_index: usize) -> Result<()> { + async fn rebalance_buckets(self: &Arc, rx: CancellationToken, pool_index: usize) -> Result<()> { let (done_tx, mut done_rx) = tokio::sync::mpsc::channel::>(1); // Save rebalance metadata periodically @@ -651,7 +652,7 @@ impl ECStore { info!("Pool {} rebalancing is started", pool_index); loop { - if let Ok(true) = rx.try_recv() { + if rx.is_cancelled() { info!("Pool {} rebalancing is stopped", pool_index); done_tx.send(Err(Error::other("rebalance stopped canceled"))).await.ok(); break; @@ -660,7 +661,7 @@ impl ECStore { if let Some(bucket) = self.next_rebal_bucket(pool_index).await? { info!("Rebalancing bucket: start {}", bucket); - if let Err(err) = self.rebalance_bucket(rx.resubscribe(), bucket.clone(), pool_index).await { + if let Err(err) = self.rebalance_bucket(rx.clone(), bucket.clone(), pool_index).await { if err.to_string().contains("not initialized") { info!("rebalance_bucket: rebalance not initialized, continue"); continue; @@ -1033,7 +1034,7 @@ impl ECStore { } #[tracing::instrument(skip(self, rx))] - async fn rebalance_bucket(self: &Arc, rx: B_Receiver, bucket: String, pool_index: usize) -> Result<()> { + async fn rebalance_bucket(self: &Arc, rx: CancellationToken, bucket: String, pool_index: usize) -> Result<()> { // Placeholder for actual bucket rebalance logic info!("Rebalancing bucket {} in pool {}", bucket, pool_index); @@ -1072,7 +1073,7 @@ impl ECStore { }); let set = set.clone(); - let rx = rx.resubscribe(); + let rx = rx.clone(); let bucket = bucket.clone(); // let wk = wk.clone(); @@ -1144,7 +1145,7 @@ impl SetDisks { #[tracing::instrument(skip(self, rx, cb))] pub async fn list_objects_to_rebalance( self: &Arc, - rx: B_Receiver, + rx: CancellationToken, bucket: String, cb: ListCallback, ) -> Result<()> { diff --git a/crates/ecstore/src/rpc/remote_disk.rs b/crates/ecstore/src/rpc/remote_disk.rs index 5f2f078a..da8a11e1 100644 --- a/crates/ecstore/src/rpc/remote_disk.rs +++ b/crates/ecstore/src/rpc/remote_disk.rs @@ -345,21 +345,43 @@ impl DiskAPI for RemoteDisk { } #[tracing::instrument(skip(self))] - async fn delete_versions( - &self, - volume: &str, - versions: Vec, - opts: DeleteOptions, - ) -> Result>> { + async fn delete_versions(&self, volume: &str, versions: Vec, opts: DeleteOptions) -> Vec> { info!("delete_versions"); - let opts = serde_json::to_string(&opts)?; + + let opts = match serde_json::to_string(&opts) { + Ok(opts) => opts, + Err(err) => { + let mut errors = Vec::with_capacity(versions.len()); + for _ in 0..versions.len() { + errors.push(Some(Error::other(err.to_string()))); + } + return errors; + } + }; let mut versions_str = Vec::with_capacity(versions.len()); for file_info_versions in versions.iter() { - versions_str.push(serde_json::to_string(file_info_versions)?); + versions_str.push(match serde_json::to_string(file_info_versions) { + Ok(versions_str) => versions_str, + Err(err) => { + let mut errors = Vec::with_capacity(versions.len()); + for _ in 0..versions.len() { + errors.push(Some(Error::other(err.to_string()))); + } + return errors; + } + }); } - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let mut client = match node_service_time_out_client(&self.addr).await { + Ok(client) => client, + Err(err) => { + let mut errors = Vec::with_capacity(versions.len()); + for _ in 0..versions.len() { + errors.push(Some(Error::other(err.to_string()))); + } + return errors; + } + }; + let request = Request::new(DeleteVersionsRequest { disk: self.endpoint.to_string(), volume: volume.to_string(), @@ -368,11 +390,27 @@ impl DiskAPI for RemoteDisk { }); // TODO: use Error not string - let response = client.delete_versions(request).await?.into_inner(); + + let response = match client.delete_versions(request).await { + Ok(response) => response, + Err(err) => { + let mut errors = Vec::with_capacity(versions.len()); + for _ in 0..versions.len() { + errors.push(Some(Error::other(err.to_string()))); + } + return errors; + } + }; + + let response = response.into_inner(); if !response.success { - return Err(response.error.unwrap_or_default().into()); + let mut errors = Vec::with_capacity(versions.len()); + for _ in 0..versions.len() { + errors.push(Some(Error::other(response.error.clone().map(|e| e.error_info).unwrap_or_default()))); + } + return errors; } - let errors = response + response .errors .iter() .map(|error| { @@ -382,9 +420,7 @@ impl DiskAPI for RemoteDisk { Some(Error::other(error.to_string())) } }) - .collect(); - - Ok(errors) + .collect() } #[tracing::instrument(skip(self))] diff --git a/crates/ecstore/src/rpc/tonic_service.rs b/crates/ecstore/src/rpc/tonic_service.rs index cbf30dcc..522ffb84 100644 --- a/crates/ecstore/src/rpc/tonic_service.rs +++ b/crates/ecstore/src/rpc/tonic_service.rs @@ -1301,28 +1301,22 @@ impl Node for NodeService { })); } }; - match disk.delete_versions(&request.volume, versions, opts).await { - Ok(errors) => { - let errors = errors - .into_iter() - .map(|error| match error { - Some(e) => e.to_string(), - None => "".to_string(), - }) - .collect(); - Ok(tonic::Response::new(DeleteVersionsResponse { - success: true, - errors, - error: None, - })) - } - Err(err) => Ok(tonic::Response::new(DeleteVersionsResponse { - success: false, - errors: Vec::new(), - error: Some(err.into()), - })), - } + let errors = disk + .delete_versions(&request.volume, versions, opts) + .await + .into_iter() + .map(|error| match error { + Some(e) => e.to_string(), + None => "".to_string(), + }) + .collect(); + + Ok(tonic::Response::new(DeleteVersionsResponse { + success: true, + errors, + error: None, + })) } else { Ok(tonic::Response::new(DeleteVersionsResponse { success: false, diff --git a/crates/ecstore/src/set_disk.rs b/crates/ecstore/src/set_disk.rs index 4674906f..586773a8 100644 --- a/crates/ecstore/src/set_disk.rs +++ b/crates/ecstore/src/set_disk.rs @@ -18,6 +18,7 @@ use crate::batch_processor::{AsyncBatchProcessor, get_global_processors}; use crate::bitrot::{create_bitrot_reader, create_bitrot_writer}; use crate::bucket::lifecycle::lifecycle::TRANSITION_COMPLETE; +use crate::bucket::replication::check_replicate_delete; use crate::bucket::versioning::VersioningApi; use crate::bucket::versioning_sys::BucketVersioningSys; use crate::client::{object_api_utils::extract_etag, transition_api::ReaderImpl}; @@ -29,11 +30,12 @@ use crate::disk::{ }; use crate::erasure_coding; use crate::erasure_coding::bitrot_verify; -use crate::error::{Error, Result}; +use crate::error::{Error, Result, is_err_version_not_found}; use crate::error::{ObjectApiError, is_err_object_not_found}; use crate::global::{GLOBAL_LocalNodeName, GLOBAL_TierConfigMgr}; use crate::store_api::ListObjectVersionsInfo; -use crate::store_api::{ListPartsInfo, ObjectToDelete}; +use crate::store_api::{ListPartsInfo, ObjectOptions, ObjectToDelete}; +use crate::store_api::{ObjectInfoOrErr, WalkOptions}; use crate::{ bucket::lifecycle::bucket_lifecycle_ops::{gen_transition_objname, get_transitioned_object_reader, put_restore_opts}, cache_value::metacache_set::{ListPathRawOptions, list_path_raw}, @@ -50,7 +52,7 @@ use crate::{ store_api::{ BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, GetObjectReader, HTTPRangeSpec, ListMultipartsInfo, ListObjectsV2Info, MakeBucketOptions, MultipartInfo, MultipartUploadResult, ObjectIO, ObjectInfo, - ObjectOptions, PartInfo, PutObjReader, StorageAPI, + PartInfo, PutObjReader, StorageAPI, }, store_init::load_format_erasure, }; @@ -64,16 +66,16 @@ use md5::{Digest as Md5Digest, Md5}; use rand::{Rng, seq::SliceRandom}; use regex::Regex; use rustfs_common::heal_channel::{DriveState, HealChannelPriority, HealItemType, HealOpts, HealScanMode, send_heal_disk}; -use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER; use rustfs_filemeta::{ FileInfo, FileMeta, FileMetaShallowVersion, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ObjectPartInfo, - RawFileInfo, file_info_from_raw, - headers::{AMZ_OBJECT_TAGGING, AMZ_STORAGE_CLASS}, - merge_file_meta_versions, + RawFileInfo, ReplicationStatusType, VersionPurgeStatusType, file_info_from_raw, merge_file_meta_versions, }; use rustfs_lock::fast_lock::types::LockResult; use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem}; use rustfs_rio::{EtagResolvable, HashReader, TryGetIndex as _, WarpReader}; +use rustfs_utils::http::headers::AMZ_OBJECT_TAGGING; +use rustfs_utils::http::headers::AMZ_STORAGE_CLASS; +use rustfs_utils::http::headers::RESERVED_METADATA_PREFIX_LOWER; use rustfs_utils::{ HashAlgorithm, crypto::{base64_decode, base64_encode, hex}, @@ -102,6 +104,7 @@ use tokio::{ sync::mpsc::{self, Sender}, time::interval, }; +use tokio_util::sync::CancellationToken; use tracing::error; use tracing::{debug, info, warn}; use uuid::Uuid; @@ -3810,7 +3813,7 @@ impl ObjectIO for SetDisks { } } - fi.is_latest = true; + fi.replication_state_internal = Some(opts.put_replication_state()); // TODO: version support Ok(ObjectInfo::from_file_info(&fi, bucket, object, opts.versioned || opts.version_suspended)) @@ -3976,12 +3979,12 @@ impl StorageAPI for SetDisks { } #[tracing::instrument(skip(self))] async fn delete_object_version(&self, bucket: &str, object: &str, fi: &FileInfo, force_del_marker: bool) -> Result<()> { - // Guard lock for single object delete-version - let _lock_guard = self - .fast_lock_manager - .acquire_write_lock(bucket, object, self.locker_owner.as_str()) - .await - .map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))?; + // // Guard lock for single object delete-version + // let _lock_guard = self + // .fast_lock_manager + // .acquire_write_lock("", object, self.locker_owner.as_str()) + // .await + // .map_err(|_| Error::other("can not get lock. please retry".to_string()))?; let disks = self.get_disks(0, 0).await?; let write_quorum = disks.len() / 2 + 1; @@ -4028,7 +4031,7 @@ impl StorageAPI for SetDisks { bucket: &str, objects: Vec, opts: ObjectOptions, - ) -> Result<(Vec, Vec>)> { + ) -> (Vec, Vec>) { // 默认返回值 let mut del_objects = vec![DeletedObject::default(); objects.len()]; @@ -4080,6 +4083,7 @@ impl StorageAPI for SetDisks { name: dobj.object_name.clone(), version_id: dobj.version_id, idx: i, + replication_state_internal: Some(dobj.replication_state()), ..Default::default() }; @@ -4117,15 +4121,17 @@ impl StorageAPI for SetDisks { if vr.deleted { del_objects[i] = DeletedObject { delete_marker: vr.deleted, - delete_marker_version_id: vr.version_id.map(|v| v.to_string()), + delete_marker_version_id: vr.version_id, delete_marker_mtime: vr.mod_time, object_name: vr.name.clone(), + replication_state: vr.replication_state_internal.clone(), ..Default::default() } } else { del_objects[i] = DeletedObject { object_name: vr.name.clone(), - version_id: vr.version_id.map(|v| v.to_string()), + version_id: vr.version_id, + replication_state: vr.replication_state_internal.clone(), ..Default::default() } } @@ -4163,25 +4169,73 @@ impl StorageAPI for SetDisks { if let Some(disk) = disk { disk.delete_versions(bucket, vers, DeleteOptions::default()).await } else { - Err(DiskError::DiskNotFound) + let mut errs = Vec::with_capacity(vers.len()); + for _ in 0..vers.len() { + errs.push(Some(DiskError::DiskNotFound)); + } + errs } }); } let results = join_all(futures).await; - for errs in results.into_iter().flatten() { - // TODO: handle err reduceWriteQuorumErrs - for err in errs.iter().flatten() { - warn!("result err {:?}", err); + let mut del_obj_errs: Vec>> = vec![vec![None; objects.len()]; disks.len()]; + + // 每个磁盘, 删除所有对象 + for (disk_idx, errors) in results.into_iter().enumerate() { + // 所有对象的删除结果 + for idx in 0..vers.len() { + if errors[idx].is_some() { + for fi in vers[idx].versions.iter() { + del_obj_errs[disk_idx][fi.idx] = errors[idx].clone(); + } + } } } - Ok((del_objects, del_errs)) + for obj_idx in 0..objects.len() { + let mut disk_err = vec![None; disks.len()]; + + for disk_idx in 0..disks.len() { + if del_obj_errs[disk_idx][obj_idx].is_some() { + disk_err[disk_idx] = del_obj_errs[disk_idx][obj_idx].clone(); + } + } + + let mut has_err = reduce_write_quorum_errs(&disk_err, OBJECT_OP_IGNORED_ERRS, disks.len() / 2 + 1); + if let Some(err) = has_err.clone() { + let er = err.into(); + if (is_err_object_not_found(&er) || is_err_version_not_found(&er)) && !del_objects[obj_idx].delete_marker { + has_err = None; + } + } else { + del_objects[obj_idx].found = true; + } + + if let Some(err) = has_err { + if del_objects[obj_idx].version_id.is_some() { + del_errs[obj_idx] = Some(to_object_err( + err.into(), + vec![ + bucket, + &objects[obj_idx].object_name.clone(), + &objects[obj_idx].version_id.unwrap_or_default().to_string(), + ], + )); + } else { + del_errs[obj_idx] = Some(to_object_err(err.into(), vec![bucket, &objects[obj_idx].object_name.clone()])); + } + } + } + + // TODO: add_partial + + (del_objects, del_errs) } #[tracing::instrument(skip(self))] - async fn delete_object(&self, bucket: &str, object: &str, opts: ObjectOptions) -> Result { + async fn delete_object(&self, bucket: &str, object: &str, mut opts: ObjectOptions) -> Result { // Guard lock for single object delete let _lock_guard = if !opts.delete_prefix { Some( @@ -4201,17 +4255,55 @@ impl StorageAPI for SetDisks { return Ok(ObjectInfo::default()); } - let (oi, write_quorum) = match self.get_object_info_and_quorum(bucket, object, &opts).await { - Ok((oi, wq)) => (oi, wq), - Err(e) => { - return Err(to_object_err(e, vec![bucket, object])); - } + let (mut goi, write_quorum, gerr) = match self.get_object_info_and_quorum(bucket, object, &opts).await { + Ok((oi, wq)) => (oi, wq, None), + Err(e) => (ObjectInfo::default(), 0, Some(e)), }; - let mark_delete = oi.version_id.is_some(); + let otd = ObjectToDelete { + object_name: object.to_string(), + version_id: opts + .version_id + .clone() + .map(|v| Uuid::parse_str(v.as_str()).ok().unwrap_or_default()), + ..Default::default() + }; + + let version_found = if opts.delete_marker { gerr.is_none() } else { true }; + + let dsc = check_replicate_delete(bucket, &otd, &goi, &opts, gerr.map(|e| e.to_string())).await; + + if dsc.replicate_any() { + opts.set_delete_replication_state(dsc); + goi.replication_decision = opts + .delete_replication + .as_ref() + .map(|v| v.replicate_decision_str.clone()) + .unwrap_or_default(); + } + + let mut mark_delete = goi.version_id.is_some(); let mut delete_marker = opts.versioned; + if opts.version_id.is_some() { + if version_found && opts.delete_marker_replication_status() == ReplicationStatusType::Replica { + mark_delete = false; + } + + if opts.version_purge_status().is_empty() && opts.delete_marker_replication_status().is_empty() { + mark_delete = false; + } + + if opts.version_purge_status() != VersionPurgeStatusType::Complete { + mark_delete = false; + } + + if version_found && (goi.version_purge_status.is_empty() || !goi.delete_marker) { + delete_marker = false; + } + } + let mod_time = if let Some(mt) = opts.mod_time { mt } else { @@ -4230,7 +4322,8 @@ impl StorageAPI for SetDisks { deleted: delete_marker, mark_deleted: mark_delete, mod_time: Some(mod_time), - ..Default::default() // TODO: replication + replication_state_internal: opts.delete_replication.clone(), + ..Default::default() // TODO: Transition }; fi.set_tier_free_version_id(&find_vid.to_string()); @@ -4257,88 +4350,27 @@ impl StorageAPI for SetDisks { let version_id = opts.version_id.as_ref().and_then(|v| Uuid::parse_str(v).ok()); // Create a single object deletion request - let mut vr = FileInfo { + let mut dfi = FileInfo { name: object.to_string(), version_id: opts.version_id.as_ref().and_then(|v| Uuid::parse_str(v).ok()), + mark_deleted: mark_delete, + deleted: delete_marker, + mod_time: Some(mod_time), + replication_state_internal: opts.delete_replication.clone(), ..Default::default() }; - // Handle versioning - let (suspended, versioned) = (opts.version_suspended, opts.versioned); - if opts.version_id.is_none() && (suspended || versioned) { - vr.mod_time = Some(OffsetDateTime::now_utc()); - vr.deleted = true; - if versioned { - vr.version_id = Some(Uuid::new_v4()); - } + dfi.set_tier_free_version_id(&find_vid.to_string()); + + if opts.skip_free_version { + dfi.set_skip_tier_free_version(); } - let vers = vec![FileInfoVersions { - name: vr.name.clone(), - versions: vec![vr.clone()], - ..Default::default() - }]; + self.delete_object_version(bucket, object, &dfi, opts.delete_marker) + .await + .map_err(|e| to_object_err(e, vec![bucket, object]))?; - let disks = self.disks.read().await; - let disks = disks.clone(); - let write_quorum = disks.len() / 2 + 1; - - let mut futures = Vec::with_capacity(disks.len()); - let mut errs = Vec::with_capacity(disks.len()); - - for disk in disks.iter() { - let vers = vers.clone(); - futures.push(async move { - if let Some(disk) = disk { - disk.delete_versions(bucket, vers, DeleteOptions::default()).await - } else { - Err(DiskError::DiskNotFound) - } - }); - } - - let results = join_all(futures).await; - - for result in results { - match result { - Ok(disk_errs) => { - // Handle errors from disk operations - for err in disk_errs.iter().flatten() { - warn!("delete_object disk error: {:?}", err); - } - errs.push(None); - } - Err(e) => { - errs.push(Some(e)); - } - } - } - - // Check write quorum - if let Some(err) = reduce_write_quorum_errs(&errs, OBJECT_OP_IGNORED_ERRS, write_quorum) { - return Err(to_object_err(err.into(), vec![bucket, object])); - } - - // Create result ObjectInfo - let result_info = if vr.deleted { - ObjectInfo { - bucket: bucket.to_string(), - name: object.to_string(), - delete_marker: true, - mod_time: vr.mod_time, - version_id: vr.version_id, - ..Default::default() - } - } else { - ObjectInfo { - bucket: bucket.to_string(), - name: object.to_string(), - version_id: vr.version_id, - ..Default::default() - } - }; - - Ok(result_info) + Ok(ObjectInfo::from_file_info(&dfi, bucket, object, opts.versioned || opts.version_suspended)) } #[tracing::instrument(skip(self))] @@ -4368,6 +4400,17 @@ impl StorageAPI for SetDisks { unimplemented!() } + async fn walk( + self: Arc, + _rx: CancellationToken, + _bucket: &str, + _prefix: &str, + _result: tokio::sync::mpsc::Sender, + _opts: WalkOptions, + ) -> Result<()> { + unimplemented!() + } + #[tracing::instrument(skip(self))] async fn get_object_info(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result { // Acquire a shared read-lock to protect consistency during info fetch @@ -4994,7 +5037,7 @@ impl StorageAPI for SetDisks { // Extract storage class from metadata, default to STANDARD if not found let storage_class = fi .metadata - .get(rustfs_filemeta::headers::AMZ_STORAGE_CLASS) + .get(AMZ_STORAGE_CLASS) .cloned() .unwrap_or_else(|| storageclass::STANDARD.to_string()); diff --git a/crates/ecstore/src/sets.rs b/crates/ecstore/src/sets.rs index 7e2bd9ca..2ba142f0 100644 --- a/crates/ecstore/src/sets.rs +++ b/crates/ecstore/src/sets.rs @@ -17,7 +17,7 @@ use std::{collections::HashMap, sync::Arc}; use crate::disk::error_reduce::count_errs; use crate::error::{Error, Result}; -use crate::store_api::ListPartsInfo; +use crate::store_api::{ListPartsInfo, ObjectInfoOrErr, WalkOptions}; use crate::{ disk::{ DiskAPI, DiskInfo, DiskOption, DiskStore, @@ -48,6 +48,7 @@ use rustfs_filemeta::FileInfo; use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem}; use rustfs_utils::{crc_hash, path::path_join_buf, sip_hash}; use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; use uuid::Uuid; use tokio::sync::broadcast::{Receiver, Sender}; @@ -459,6 +460,17 @@ impl StorageAPI for Sets { unimplemented!() } + async fn walk( + self: Arc, + _rx: CancellationToken, + _bucket: &str, + _prefix: &str, + _result: tokio::sync::mpsc::Sender, + _opts: WalkOptions, + ) -> Result<()> { + unimplemented!() + } + #[tracing::instrument(skip(self))] async fn get_object_info(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result { self.get_disks_by_key(object).get_object_info(bucket, object, opts).await @@ -543,7 +555,7 @@ impl StorageAPI for Sets { bucket: &str, objects: Vec, opts: ObjectOptions, - ) -> Result<(Vec, Vec>)> { + ) -> (Vec, Vec>) { // Default return value let mut del_objects = vec![DeletedObject::default(); objects.len()]; @@ -576,38 +588,11 @@ impl StorageAPI for Sets { } } - // let semaphore = Arc::new(Semaphore::new(num_cpus::get())); - // let mut jhs = Vec::with_capacity(semaphore.available_permits()); - - // for (k, v) in set_obj_map { - // let disks = self.get_disks(k); - // let semaphore = semaphore.clone(); - // let opts = opts.clone(); - // let bucket = bucket.to_string(); - - // let jh = tokio::spawn(async move { - // let _permit = semaphore.acquire().await.unwrap(); - // let objs: Vec = v.iter().map(|v| v.obj.clone()).collect(); - // disks.delete_objects(&bucket, objs, opts).await - // }); - // jhs.push(jh); - // } - - // let mut results = Vec::with_capacity(jhs.len()); - // for jh in jhs { - // results.push(jh.await?.unwrap()); - // } - - // for (dobjects, errs) in results { - // del_objects.extend(dobjects); - // del_errs.extend(errs); - // } - - // TODO: Implement concurrency + // TODO: concurrency for (k, v) in set_obj_map { let disks = self.get_disks(k); let objs: Vec = v.iter().map(|v| v.obj.clone()).collect(); - let (dobjects, errs) = disks.delete_objects(bucket, objs, opts.clone()).await?; + let (dobjects, errs) = disks.delete_objects(bucket, objs, opts.clone()).await; for (i, err) in errs.into_iter().enumerate() { let obj = v.get(i).unwrap(); @@ -618,7 +603,7 @@ impl StorageAPI for Sets { } } - Ok((del_objects, del_errs)) + (del_objects, del_errs) } async fn list_object_parts( diff --git a/crates/ecstore/src/store.rs b/crates/ecstore/src/store.rs index f2bbd93b..41a15b2a 100644 --- a/crates/ecstore/src/store.rs +++ b/crates/ecstore/src/store.rs @@ -34,7 +34,9 @@ use crate::global::{ use crate::notification_sys::get_global_notification_sys; use crate::pools::PoolMeta; use crate::rebalance::RebalanceMeta; -use crate::store_api::{ListMultipartsInfo, ListObjectVersionsInfo, ListPartsInfo, MultipartInfo, ObjectIO}; +use crate::store_api::{ + ListMultipartsInfo, ListObjectVersionsInfo, ListPartsInfo, MultipartInfo, ObjectIO, ObjectInfoOrErr, WalkOptions, +}; use crate::store_init::{check_disk_fatal_errs, ec_drives_no_config}; use crate::{ bucket::{lifecycle::bucket_lifecycle_ops::TransitionState, metadata::BucketMetadata}, @@ -68,8 +70,9 @@ use std::time::SystemTime; use std::{collections::HashMap, sync::Arc, time::Duration}; use time::OffsetDateTime; use tokio::select; -use tokio::sync::{RwLock, broadcast}; +use tokio::sync::RwLock; use tokio::time::sleep; +use tokio_util::sync::CancellationToken; use tracing::{debug, info}; use tracing::{error, warn}; use uuid::Uuid; @@ -109,7 +112,7 @@ pub struct ECStore { impl ECStore { #[allow(clippy::new_ret_no_self)] #[tracing::instrument(level = "debug", skip(endpoint_pools))] - pub async fn new(address: SocketAddr, endpoint_pools: EndpointServerPools) -> Result> { + pub async fn new(address: SocketAddr, endpoint_pools: EndpointServerPools, ctx: CancellationToken) -> Result> { // let layouts = DisksLayout::from_volumes(endpoints.as_slice())?; let mut deployment_id = None; @@ -251,7 +254,7 @@ impl ECStore { let wait_sec = 5; let mut exit_count = 0; loop { - if let Err(err) = ec.init().await { + if let Err(err) = ec.init(ctx.clone()).await { error!("init err: {}", err); error!("retry after {} second", wait_sec); sleep(Duration::from_secs(wait_sec)).await; @@ -273,7 +276,7 @@ impl ECStore { Ok(ec) } - pub async fn init(self: &Arc) -> Result<()> { + pub async fn init(self: &Arc, rx: CancellationToken) -> Result<()> { GLOBAL_BOOT_TIME.get_or_init(|| async { SystemTime::now() }).await; if self.load_rebalance_meta().await.is_ok() { @@ -317,18 +320,16 @@ impl ECStore { if !pool_indices.is_empty() { let idx = pool_indices[0]; if endpoints.as_ref()[idx].endpoints.as_ref()[0].is_local { - let (_tx, rx) = broadcast::channel(1); - let store = self.clone(); tokio::spawn(async move { // wait 3 minutes for cluster init tokio::time::sleep(Duration::from_secs(60 * 3)).await; - if let Err(err) = store.decommission(rx.resubscribe(), pool_indices.clone()).await { + if let Err(err) = store.decommission(rx.clone(), pool_indices.clone()).await { if err == StorageError::DecommissionAlreadyRunning { for i in pool_indices.iter() { - store.do_decommission_in_routine(rx.resubscribe(), *i).await; + store.do_decommission_in_routine(rx.clone(), *i).await; } return; } @@ -700,9 +701,13 @@ impl ECStore { opts: &ObjectOptions, ) -> Result<(PoolObjInfo, Vec)> { let mut futures = Vec::new(); - for pool in self.pools.iter() { - futures.push(pool.get_object_info(bucket, object, opts)); + let mut pool_opts = opts.clone(); + if !pool_opts.metadata_chg { + pool_opts.version_id = None; + } + + futures.push(async move { pool.get_object_info(bucket, object, &pool_opts).await }); } let results = join_all(futures).await; @@ -1351,6 +1356,17 @@ impl StorageAPI for ECStore { .await } + async fn walk( + self: Arc, + rx: CancellationToken, + bucket: &str, + prefix: &str, + result: tokio::sync::mpsc::Sender, + opts: WalkOptions, + ) -> Result<()> { + self.walk_internal(rx, bucket, prefix, result, opts).await + } + #[tracing::instrument(skip(self))] async fn get_object_info(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result { check_object_args(bucket, object)?; @@ -1450,9 +1466,12 @@ impl StorageAPI for ECStore { let object = encode_dir_object(object); let object = object.as_str(); + let mut gopts = opts.clone(); + gopts.no_lock = true; + // 查询在哪个 pool let (mut pinfo, errs) = self - .get_pool_info_existing_with_opts(bucket, object, &opts) + .get_pool_info_existing_with_opts(bucket, object, &gopts) .await .map_err(|e| { if is_err_read_quorum(&e) { @@ -1513,7 +1532,7 @@ impl StorageAPI for ECStore { bucket: &str, objects: Vec, opts: ObjectOptions, - ) -> Result<(Vec, Vec>)> { + ) -> (Vec, Vec>) { // encode object name let objects: Vec = objects .iter() @@ -1534,131 +1553,160 @@ impl StorageAPI for ECStore { // TODO: nslock - let mut futures = Vec::with_capacity(objects.len()); + let mut futures = Vec::with_capacity(self.pools.len()); - for obj in objects.iter() { - futures.push(async move { - self.internal_get_pool_info_existing_with_opts( - bucket, - &obj.object_name, - &ObjectOptions { - no_lock: true, - ..Default::default() - }, - ) - .await - }); + for pool in self.pools.iter() { + futures.push(pool.delete_objects(bucket, objects.clone(), opts.clone())); } let results = join_all(futures).await; - // let mut jhs = Vec::new(); - // let semaphore = Arc::new(Semaphore::new(num_cpus::get())); - // let pools = Arc::new(self.pools.clone()); + for idx in 0..del_objects.len() { + for (dels, errs) in results.iter() { + if errs[idx].is_none() && dels[idx].found { + del_errs[idx] = None; + del_objects[idx] = dels[idx].clone(); + break; + } + + if del_errs[idx].is_none() { + del_errs[idx] = errs[idx].clone(); + del_objects[idx] = dels[idx].clone(); + } + } + } + + del_objects.iter_mut().for_each(|v| { + v.object_name = decode_dir_object(&v.object_name); + }); + + (del_objects, del_errs) + + // let mut futures = Vec::with_capacity(objects.len()); // for obj in objects.iter() { - // let (semaphore, pools, bucket, object_name, opt) = ( - // semaphore.clone(), - // pools.clone(), - // bucket.to_string(), - // obj.object_name.to_string(), - // ObjectOptions::default(), - // ); - - // let jh = tokio::spawn(async move { - // let _permit = semaphore.acquire().await.unwrap(); - // self.internal_get_pool_info_existing_with_opts(pools.as_ref(), &bucket, &object_name, &opt) - // .await + // futures.push(async move { + // self.internal_get_pool_info_existing_with_opts( + // bucket, + // &obj.object_name, + // &ObjectOptions { + // no_lock: true, + // ..Default::default() + // }, + // ) + // .await // }); - // jhs.push(jh); - // } - // let mut results = Vec::new(); - // for jh in jhs { - // results.push(jh.await.unwrap()); // } - // 记录 pool Index 对应的 objects pool_idx -> objects idx - let mut pool_obj_idx_map = HashMap::new(); - let mut orig_index_map = HashMap::new(); + // let results = join_all(futures).await; - for (i, res) in results.into_iter().enumerate() { - match res { - Ok((pinfo, _)) => { - if let Some(obj) = objects.get(i) { - if pinfo.object_info.delete_marker && obj.version_id.is_none() { - del_objects[i] = DeletedObject { - delete_marker: pinfo.object_info.delete_marker, - delete_marker_version_id: pinfo.object_info.version_id.map(|v| v.to_string()), - object_name: decode_dir_object(&pinfo.object_info.name), - delete_marker_mtime: pinfo.object_info.mod_time, - ..Default::default() - }; - continue; - } + // // let mut jhs = Vec::new(); + // // let semaphore = Arc::new(Semaphore::new(num_cpus::get())); + // // let pools = Arc::new(self.pools.clone()); - if !pool_obj_idx_map.contains_key(&pinfo.index) { - pool_obj_idx_map.insert(pinfo.index, vec![obj.clone()]); - } else if let Some(val) = pool_obj_idx_map.get_mut(&pinfo.index) { - val.push(obj.clone()); - } + // // for obj in objects.iter() { + // // let (semaphore, pools, bucket, object_name, opt) = ( + // // semaphore.clone(), + // // pools.clone(), + // // bucket.to_string(), + // // obj.object_name.to_string(), + // // ObjectOptions::default(), + // // ); - if !orig_index_map.contains_key(&pinfo.index) { - orig_index_map.insert(pinfo.index, vec![i]); - } else if let Some(val) = orig_index_map.get_mut(&pinfo.index) { - val.push(i); - } - } - } - Err(e) => { - if !is_err_object_not_found(&e) && is_err_version_not_found(&e) { - del_errs[i] = Some(e) - } + // // let jh = tokio::spawn(async move { + // // let _permit = semaphore.acquire().await.unwrap(); + // // self.internal_get_pool_info_existing_with_opts(pools.as_ref(), &bucket, &object_name, &opt) + // // .await + // // }); + // // jhs.push(jh); + // // } + // // let mut results = Vec::new(); + // // for jh in jhs { + // // results.push(jh.await.unwrap()); + // // } - if let Some(obj) = objects.get(i) { - del_objects[i] = DeletedObject { - object_name: decode_dir_object(&obj.object_name), - version_id: obj.version_id.map(|v| v.to_string()), - ..Default::default() - } - } - } - } - } + // // 记录 pool Index 对应的 objects pool_idx -> objects idx + // let mut pool_obj_idx_map = HashMap::new(); + // let mut orig_index_map = HashMap::new(); - if !pool_obj_idx_map.is_empty() { - for (i, sets) in self.pools.iter().enumerate() { - // 取 pool idx 对应的 objects index - if let Some(objs) = pool_obj_idx_map.get(&i) { - // 取对应 obj,理论上不会 none - // let objs: Vec = obj_idxs.iter().filter_map(|&idx| objects.get(idx).cloned()).collect(); + // for (i, res) in results.into_iter().enumerate() { + // match res { + // Ok((pinfo, _)) => { + // if let Some(obj) = objects.get(i) { + // if pinfo.object_info.delete_marker && obj.version_id.is_none() { + // del_objects[i] = DeletedObject { + // delete_marker: pinfo.object_info.delete_marker, + // delete_marker_version_id: pinfo.object_info.version_id.map(|v| v.to_string()), + // object_name: decode_dir_object(&pinfo.object_info.name), + // delete_marker_mtime: pinfo.object_info.mod_time, + // ..Default::default() + // }; + // continue; + // } - if objs.is_empty() { - continue; - } + // if !pool_obj_idx_map.contains_key(&pinfo.index) { + // pool_obj_idx_map.insert(pinfo.index, vec![obj.clone()]); + // } else if let Some(val) = pool_obj_idx_map.get_mut(&pinfo.index) { + // val.push(obj.clone()); + // } - let (pdel_objs, perrs) = sets.delete_objects(bucket, objs.clone(), opts.clone()).await?; + // if !orig_index_map.contains_key(&pinfo.index) { + // orig_index_map.insert(pinfo.index, vec![i]); + // } else if let Some(val) = orig_index_map.get_mut(&pinfo.index) { + // val.push(i); + // } + // } + // } + // Err(e) => { + // if !is_err_object_not_found(&e) && is_err_version_not_found(&e) { + // del_errs[i] = Some(e) + // } - // 同时存入不可能为 none - let org_indexes = orig_index_map.get(&i).unwrap(); + // if let Some(obj) = objects.get(i) { + // del_objects[i] = DeletedObject { + // object_name: decode_dir_object(&obj.object_name), + // version_id: obj.version_id.map(|v| v.to_string()), + // ..Default::default() + // } + // } + // } + // } + // } - // perrs 的顺序理论上跟 obj_idxs 顺序一致 - for (i, err) in perrs.into_iter().enumerate() { - let obj_idx = org_indexes[i]; + // if !pool_obj_idx_map.is_empty() { + // for (i, sets) in self.pools.iter().enumerate() { + // // 取 pool idx 对应的 objects index + // if let Some(objs) = pool_obj_idx_map.get(&i) { + // // 取对应 obj,理论上不会 none + // // let objs: Vec = obj_idxs.iter().filter_map(|&idx| objects.get(idx).cloned()).collect(); - if err.is_some() { - del_errs[obj_idx] = err; - } + // if objs.is_empty() { + // continue; + // } - let mut dobj = pdel_objs.get(i).unwrap().clone(); - dobj.object_name = decode_dir_object(&dobj.object_name); + // let (pdel_objs, perrs) = sets.delete_objects(bucket, objs.clone(), opts.clone()).await?; - del_objects[obj_idx] = dobj; - } - } - } - } + // // 同时存入不可能为 none + // let org_indexes = orig_index_map.get(&i).unwrap(); - Ok((del_objects, del_errs)) + // // perrs 的顺序理论上跟 obj_idxs 顺序一致 + // for (i, err) in perrs.into_iter().enumerate() { + // let obj_idx = org_indexes[i]; + + // if err.is_some() { + // del_errs[obj_idx] = err; + // } + + // let mut dobj = pdel_objs.get(i).unwrap().clone(); + // dobj.object_name = decode_dir_object(&dobj.object_name); + + // del_objects[obj_idx] = dobj; + // } + // } + // } + // } + + // Ok((del_objects, del_errs)) } #[tracing::instrument(skip(self))] diff --git a/crates/ecstore/src/store_api.rs b/crates/ecstore/src/store_api.rs index bfc51d59..ce1a3cce 100644 --- a/crates/ecstore/src/store_api.rs +++ b/crates/ecstore/src/store_api.rs @@ -13,8 +13,10 @@ // limitations under the License. use crate::bucket::metadata_sys::get_versioning_config; +use crate::bucket::replication::REPLICATION_RESET; +use crate::bucket::replication::REPLICATION_STATUS; +use crate::bucket::replication::{ReplicateDecision, replication_statuses_map, version_purge_statuses_map}; use crate::bucket::versioning::VersioningApi as _; -use crate::cmd::bucket_replication::{ReplicationStatusType, VersionPurgeStatusType}; use crate::disk::DiskStore; use crate::error::{Error, Result}; use crate::store_utils::clean_metadata; @@ -25,20 +27,25 @@ use crate::{ }; use http::{HeaderMap, HeaderValue}; use rustfs_common::heal_channel::HealOpts; -use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER; -use rustfs_filemeta::{FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, headers::AMZ_OBJECT_TAGGING}; +use rustfs_filemeta::{ + FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, ReplicationState, ReplicationStatusType, VersionPurgeStatusType, +}; use rustfs_madmin::heal_commands::HealResultItem; use rustfs_rio::{DecompressReader, HashReader, LimitReader, WarpReader}; use rustfs_utils::CompressionAlgorithm; +use rustfs_utils::http::headers::{AMZ_OBJECT_TAGGING, RESERVED_METADATA_PREFIX_LOWER}; use rustfs_utils::path::decode_dir_object; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt::Debug; use std::io::Cursor; +use std::pin::Pin; use std::str::FromStr as _; use std::sync::Arc; +use std::task::{Context, Poll}; use time::OffsetDateTime; -use tokio::io::{AsyncRead, AsyncReadExt}; +use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf}; +use tokio_util::sync::CancellationToken; use tracing::warn; use uuid::Uuid; @@ -221,6 +228,12 @@ impl GetObjectReader { } } +impl AsyncRead for GetObjectReader { + fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { + Pin::new(&mut self.stream).poll_read(cx, buf) + } +} + #[derive(Debug, Clone)] pub struct HTTPRangeSpec { pub is_suffix_length: bool, @@ -326,6 +339,7 @@ pub struct ObjectOptions { pub skip_decommissioned: bool, pub skip_rebalancing: bool, + pub skip_free_version: bool, pub data_movement: bool, pub src_pool_idx: usize, @@ -334,11 +348,10 @@ pub struct ObjectOptions { pub metadata_chg: bool, pub http_preconditions: Option, + pub delete_replication: Option, pub replication_request: bool, pub delete_marker: bool, - pub skip_free_version: bool, - pub transition: TransitionOptions, pub expiration: ExpirationOptions, pub lifecycle_audit_event: LcAuditEvent, @@ -346,15 +359,66 @@ pub struct ObjectOptions { pub eval_metadata: Option>, } -// impl Default for ObjectOptions { -// fn default() -> Self { -// Self { -// max_parity: Default::default(), -// mod_time: OffsetDateTime::UNIX_EPOCH, -// part_number: Default::default(), -// } -// } -// } +impl ObjectOptions { + pub fn set_delete_replication_state(&mut self, dsc: ReplicateDecision) { + let mut rs = ReplicationState { + replicate_decision_str: dsc.to_string(), + ..Default::default() + }; + if self.version_id.is_none() { + rs.replication_status_internal = dsc.pending_status(); + rs.targets = replication_statuses_map(rs.replication_status_internal.as_deref().unwrap_or_default()); + } else { + rs.version_purge_status_internal = dsc.pending_status(); + rs.purge_targets = version_purge_statuses_map(rs.version_purge_status_internal.as_deref().unwrap_or_default()); + } + + self.delete_replication = Some(rs) + } + + pub fn set_replica_status(&mut self, status: ReplicationStatusType) { + if let Some(rs) = self.delete_replication.as_mut() { + rs.replica_status = status; + rs.replica_timestamp = Some(OffsetDateTime::now_utc()); + } else { + self.delete_replication = Some(ReplicationState { + replica_status: status, + replica_timestamp: Some(OffsetDateTime::now_utc()), + ..Default::default() + }); + } + } + + pub fn version_purge_status(&self) -> VersionPurgeStatusType { + self.delete_replication + .as_ref() + .map(|v| v.composite_version_purge_status()) + .unwrap_or(VersionPurgeStatusType::Empty) + } + + pub fn delete_marker_replication_status(&self) -> ReplicationStatusType { + self.delete_replication + .as_ref() + .map(|v| v.composite_replication_status()) + .unwrap_or(ReplicationStatusType::Empty) + } + + pub fn put_replication_state(&self) -> ReplicationState { + let rs = match self + .user_defined + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_STATUS}").as_str()) + { + Some(v) => v.to_string(), + None => return ReplicationState::default(), + }; + + ReplicationState { + replication_status_internal: Some(rs.to_string()), + targets: replication_statuses_map(rs.as_str()), + ..Default::default() + } + } +} #[derive(Debug, Default, Serialize, Deserialize)] pub struct BucketOptions { @@ -423,6 +487,7 @@ pub struct ObjectInfo { pub is_latest: bool, pub content_type: Option, pub content_encoding: Option, + pub expires: Option, pub num_versions: usize, pub successor_mod_time: Option, pub put_object_reader: Option, @@ -430,10 +495,11 @@ pub struct ObjectInfo { pub inlined: bool, pub metadata_only: bool, pub version_only: bool, - pub replication_status_internal: String, + pub replication_status_internal: Option, pub replication_status: ReplicationStatusType, - pub version_purge_status_internal: String, + pub version_purge_status_internal: Option, pub version_purge_status: VersionPurgeStatusType, + pub replication_decision: String, pub checksum: Vec, } @@ -470,7 +536,9 @@ impl Clone for ObjectInfo { replication_status: self.replication_status.clone(), version_purge_status_internal: self.version_purge_status_internal.clone(), version_purge_status: self.version_purge_status.clone(), + replication_decision: self.replication_decision.clone(), checksum: Default::default(), + expires: self.expires, } } } @@ -665,7 +733,10 @@ impl ObjectInfo { }; for fi in versions.iter() { - // TODO:VersionPurgeStatus + if !fi.version_purge_status().is_empty() { + continue; + } + let versioned = vcfg.clone().map(|v| v.0.versioned(&entry.name)).unwrap_or_default(); objects.push(ObjectInfo::from_file_info(fi, bucket, &entry.name, versioned)); } @@ -770,6 +841,32 @@ impl ObjectInfo { objects } + + pub fn replication_state(&self) -> ReplicationState { + ReplicationState { + replication_status_internal: self.replication_status_internal.clone(), + version_purge_status_internal: self.version_purge_status_internal.clone(), + replicate_decision_str: self.replication_decision.clone(), + targets: replication_statuses_map(self.replication_status_internal.clone().unwrap_or_default().as_str()), + purge_targets: version_purge_statuses_map(self.version_purge_status_internal.clone().unwrap_or_default().as_str()), + reset_statuses_map: self + .user_defined + .iter() + .filter_map(|(k, v)| { + if k.starts_with(&format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}")) { + Some(( + k.trim_start_matches(&format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}-")) + .to_string(), + v.clone(), + )) + } else { + None + } + }) + .collect(), + ..Default::default() + } + } } #[derive(Debug, Default)] @@ -927,17 +1024,52 @@ pub struct ListPartsInfo { pub struct ObjectToDelete { pub object_name: String, pub version_id: Option, + pub delete_marker_replication_status: Option, + pub version_purge_status: Option, + pub version_purge_statuses: Option, + pub replicate_decision_str: Option, } + +impl ObjectToDelete { + pub fn replication_state(&self) -> ReplicationState { + ReplicationState { + replication_status_internal: self.delete_marker_replication_status.clone(), + version_purge_status_internal: self.version_purge_statuses.clone(), + replicate_decision_str: self.replicate_decision_str.clone().unwrap_or_default(), + targets: replication_statuses_map(self.delete_marker_replication_status.as_deref().unwrap_or_default()), + purge_targets: version_purge_statuses_map(self.version_purge_statuses.as_deref().unwrap_or_default()), + ..Default::default() + } + } +} + #[derive(Debug, Default, Clone)] pub struct DeletedObject { pub delete_marker: bool, - pub delete_marker_version_id: Option, + pub delete_marker_version_id: Option, pub object_name: String, - pub version_id: Option, + pub version_id: Option, // MTime of DeleteMarker on source that needs to be propagated to replica pub delete_marker_mtime: Option, // to support delete marker replication - // pub replication_state: ReplicationState, + pub replication_state: Option, + pub found: bool, +} + +impl DeletedObject { + pub fn version_purge_status(&self) -> VersionPurgeStatusType { + self.replication_state + .as_ref() + .map(|v| v.composite_version_purge_status()) + .unwrap_or(VersionPurgeStatusType::Empty) + } + + pub fn delete_marker_replication_status(&self) -> ReplicationStatusType { + self.replication_state + .as_ref() + .map(|v| v.composite_replication_status()) + .unwrap_or(ReplicationStatusType::Empty) + } } #[derive(Debug, Default, Clone)] @@ -949,8 +1081,33 @@ pub struct ListObjectVersionsInfo { pub prefixes: Vec, } +type WalkFilter = fn(&FileInfo) -> bool; + +#[derive(Clone, Default)] +pub struct WalkOptions { + pub filter: Option, // return WalkFilter returns 'true/false' + pub marker: Option, // set to skip until this object + pub latest_only: bool, // returns only latest versions for all matching objects + pub ask_disks: String, // dictates how many disks are being listed + pub versions_sort: WalkVersionsSortOrder, // sort order for versions of the same object; default: Ascending order in ModTime + pub limit: usize, // maximum number of items, 0 means no limit +} + +#[derive(Clone, Default, PartialEq, Eq)] +pub enum WalkVersionsSortOrder { + #[default] + Ascending, + Descending, +} + +#[derive(Debug)] +pub struct ObjectInfoOrErr { + pub item: Option, + pub err: Option, +} + #[async_trait::async_trait] -pub trait ObjectIO: Send + Sync + 'static { +pub trait ObjectIO: Send + Sync + Debug + 'static { // GetObjectNInfo FIXME: async fn get_object_reader( &self, @@ -966,7 +1123,7 @@ pub trait ObjectIO: Send + Sync + 'static { #[async_trait::async_trait] #[allow(clippy::too_many_arguments)] -pub trait StorageAPI: ObjectIO { +pub trait StorageAPI: ObjectIO + Debug { // NewNSLock TODO: // Shutdown TODO: // NSScanner TODO: @@ -1000,7 +1157,15 @@ pub trait StorageAPI: ObjectIO { delimiter: Option, max_keys: i32, ) -> Result; - // Walk TODO: + + async fn walk( + self: Arc, + rx: CancellationToken, + bucket: &str, + prefix: &str, + result: tokio::sync::mpsc::Sender, + opts: WalkOptions, + ) -> Result<()>; async fn get_object_info(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result; async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()>; @@ -1021,7 +1186,7 @@ pub trait StorageAPI: ObjectIO { bucket: &str, objects: Vec, opts: ObjectOptions, - ) -> Result<(Vec, Vec>)>; + ) -> (Vec, Vec>); // TransitionObject TODO: // RestoreTransitionedObject TODO: diff --git a/crates/ecstore/src/store_list_objects.rs b/crates/ecstore/src/store_list_objects.rs index 24c99870..4d267066 100644 --- a/crates/ecstore/src/store_list_objects.rs +++ b/crates/ecstore/src/store_list_objects.rs @@ -23,20 +23,23 @@ use crate::error::{ }; use crate::set_disk::SetDisks; use crate::store::check_list_objs_args; -use crate::store_api::{ListObjectVersionsInfo, ListObjectsInfo, ObjectInfo, ObjectOptions}; +use crate::store_api::{ + ListObjectVersionsInfo, ListObjectsInfo, ObjectInfo, ObjectInfoOrErr, ObjectOptions, WalkOptions, WalkVersionsSortOrder, +}; use crate::store_utils::is_reserved_or_invalid_bucket; use crate::{store::ECStore, store_api::ListObjectsV2Info}; use futures::future::join_all; use rand::seq::SliceRandom; use rustfs_filemeta::{ - FileInfo, MetaCacheEntries, MetaCacheEntriesSorted, MetaCacheEntriesSortedResult, MetaCacheEntry, MetadataResolutionParams, + MetaCacheEntries, MetaCacheEntriesSorted, MetaCacheEntriesSortedResult, MetaCacheEntry, MetadataResolutionParams, merge_file_meta_versions, }; use rustfs_utils::path::{self, SLASH_SEPARATOR, base_dir_from_prefix}; use std::collections::HashMap; use std::sync::Arc; -use tokio::sync::broadcast::{self, Receiver as B_Receiver}; +use tokio::sync::broadcast::{self}; use tokio::sync::mpsc::{self, Receiver, Sender}; +use tokio_util::sync::CancellationToken; use tracing::{error, info}; use uuid::Uuid; @@ -529,14 +532,15 @@ impl ECStore { } // cancel channel - let (cancel_tx, cancel_rx) = broadcast::channel(1); + let cancel = CancellationToken::new(); + let (err_tx, mut err_rx) = broadcast::channel::>(1); let (sender, recv) = mpsc::channel(o.limit as usize); let store = self.clone(); let opts = o.clone(); - let cancel_rx1 = cancel_rx.resubscribe(); + let cancel_rx1 = cancel.clone(); let err_tx1 = err_tx.clone(); let job1 = tokio::spawn(async move { let mut opts = opts; @@ -547,7 +551,7 @@ impl ECStore { } }); - let cancel_rx2 = cancel_rx.resubscribe(); + let cancel_rx2 = cancel.clone(); let (result_tx, mut result_rx) = mpsc::channel(1); let err_tx2 = err_tx.clone(); @@ -559,7 +563,7 @@ impl ECStore { } // cancel call exit spawns - let _ = cancel_tx.send(true); + cancel.cancel(); }); let mut result = { @@ -615,7 +619,7 @@ impl ECStore { // Read all async fn list_merged( &self, - rx: B_Receiver, + rx: CancellationToken, opts: ListPathOptions, sender: Sender, ) -> Result> { @@ -631,9 +635,8 @@ impl ECStore { inputs.push(recv); let opts = opts.clone(); - - let rx = rx.resubscribe(); - futures.push(set.list_path(rx, opts, send)); + let rx_clone = rx.clone(); + futures.push(set.list_path(rx_clone, opts, send)); } } @@ -695,9 +698,9 @@ impl ECStore { } #[allow(unused_assignments)] - pub async fn walk( + pub async fn walk_internal( self: Arc, - rx: B_Receiver, + rx: CancellationToken, bucket: &str, prefix: &str, result: Sender, @@ -711,11 +714,11 @@ impl ECStore { for eset in self.pools.iter() { for set in eset.disk_set.iter() { let (mut disks, infos, _) = set.get_online_disks_with_healing_and_info(true).await; - let rx = rx.resubscribe(); let opts = opts.clone(); let (sender, list_out_rx) = mpsc::channel::(1); inputs.push(list_out_rx); + let rx_clone = rx.clone(); futures.push(async move { let mut ask_disks = get_list_quorum(&opts.ask_disks, set.set_drive_count as i32); if ask_disks == -1 { @@ -770,7 +773,7 @@ impl ECStore { let tx2 = sender.clone(); list_path_raw( - rx.resubscribe(), + rx_clone, ListPathRawOptions { disks: disks.iter().cloned().map(Some).collect(), fallback_disks: fallback_disks.iter().cloned().map(Some).collect(), @@ -936,33 +939,8 @@ impl ECStore { } } -type WalkFilter = fn(&FileInfo) -> bool; - -#[derive(Clone, Default)] -pub struct WalkOptions { - pub filter: Option, // return WalkFilter returns 'true/false' - pub marker: Option, // set to skip until this object - pub latest_only: bool, // returns only latest versions for all matching objects - pub ask_disks: String, // dictates how many disks are being listed - pub versions_sort: WalkVersionsSortOrder, // sort order for versions of the same object; default: Ascending order in ModTime - pub limit: usize, // maximum number of items, 0 means no limit -} - -#[derive(Clone, Default, PartialEq, Eq)] -pub enum WalkVersionsSortOrder { - #[default] - Ascending, - Descending, -} - -#[derive(Debug)] -pub struct ObjectInfoOrErr { - pub item: Option, - pub err: Option, -} - async fn gather_results( - _rx: B_Receiver, + _rx: CancellationToken, opts: ListPathOptions, recv: Receiver, results_tx: Sender, @@ -1067,12 +1045,11 @@ async fn select_from( // TODO: exit when cancel async fn merge_entry_channels( - rx: B_Receiver, + rx: CancellationToken, in_channels: Vec>, out_channel: Sender, read_quorum: usize, ) -> Result<()> { - let mut rx = rx; let mut in_channels = in_channels; if in_channels.len() == 1 { loop { @@ -1085,7 +1062,7 @@ async fn merge_entry_channels( return Ok(()) } }, - _ = rx.recv()=>{ + _ = rx.cancelled()=>{ info!("merge_entry_channels rx.recv() cancel"); return Ok(()) }, @@ -1228,7 +1205,7 @@ async fn merge_entry_channels( } impl SetDisks { - pub async fn list_path(&self, rx: B_Receiver, opts: ListPathOptions, sender: Sender) -> Result<()> { + pub async fn list_path(&self, rx: CancellationToken, opts: ListPathOptions, sender: Sender) -> Result<()> { let (mut disks, infos, _) = self.get_online_disks_with_healing_and_info(true).await; let mut ask_disks = get_list_quorum(&opts.ask_disks, self.set_drive_count as i32); diff --git a/crates/ecstore/src/store_utils.rs b/crates/ecstore/src/store_utils.rs index 7cbcb6d0..f9be4316 100644 --- a/crates/ecstore/src/store_utils.rs +++ b/crates/ecstore/src/store_utils.rs @@ -15,8 +15,8 @@ use crate::config::storageclass::STANDARD; use crate::disk::RUSTFS_META_BUCKET; use regex::Regex; -use rustfs_filemeta::headers::AMZ_OBJECT_TAGGING; -use rustfs_filemeta::headers::AMZ_STORAGE_CLASS; +use rustfs_utils::http::headers::AMZ_OBJECT_TAGGING; +use rustfs_utils::http::headers::AMZ_STORAGE_CLASS; use std::collections::HashMap; use std::io::{Error, Result}; diff --git a/crates/filemeta/Cargo.toml b/crates/filemeta/Cargo.toml index e3ed2bce..5c7a3589 100644 --- a/crates/filemeta/Cargo.toml +++ b/crates/filemeta/Cargo.toml @@ -35,7 +35,7 @@ uuid = { workspace = true, features = ["v4", "fast-rng", "serde"] } tokio = { workspace = true, features = ["io-util", "macros", "sync"] } xxhash-rust = { workspace = true, features = ["xxh64"] } bytes.workspace = true -rustfs-utils = { workspace = true, features = ["hash"] } +rustfs-utils = { workspace = true, features = ["hash","http"] } byteorder = { workspace = true } tracing.workspace = true thiserror.workspace = true diff --git a/crates/filemeta/src/fileinfo.rs b/crates/filemeta/src/fileinfo.rs index e7ae3636..b6fefe5d 100644 --- a/crates/filemeta/src/fileinfo.rs +++ b/crates/filemeta/src/fileinfo.rs @@ -13,11 +13,11 @@ // limitations under the License. use crate::error::{Error, Result}; -use crate::headers::RESERVED_METADATA_PREFIX_LOWER; -use crate::headers::RUSTFS_HEALING; +use crate::{ReplicationState, ReplicationStatusType, VersionPurgeStatusType}; use bytes::Bytes; use rmp_serde::Serializer; use rustfs_utils::HashAlgorithm; +use rustfs_utils::http::headers::{RESERVED_METADATA_PREFIX_LOWER, RUSTFS_HEALING}; use serde::Deserialize; use serde::Serialize; use std::collections::HashMap; @@ -202,7 +202,7 @@ pub struct FileInfo { // MarkDeleted marks this version as deleted pub mark_deleted: bool, // ReplicationState - Internal replication state to be passed back in ObjectInfo - // pub replication_state: Option, // TODO: implement ReplicationState + pub replication_state_internal: Option, pub data: Option, pub num_versions: usize, pub successor_mod_time: Option, @@ -471,6 +471,29 @@ impl FileInfo { // TODO: Add replication_state comparison when implemented // && self.replication_state == other.replication_state } + + pub fn version_purge_status(&self) -> VersionPurgeStatusType { + self.replication_state_internal + .as_ref() + .map(|v| v.composite_version_purge_status()) + .unwrap_or(VersionPurgeStatusType::Empty) + } + pub fn replication_status(&self) -> ReplicationStatusType { + self.replication_state_internal + .as_ref() + .map(|v| v.composite_replication_status()) + .unwrap_or(ReplicationStatusType::Empty) + } + pub fn delete_marker_replication_status(&self) -> ReplicationStatusType { + if self.deleted { + self.replication_state_internal + .as_ref() + .map(|v| v.composite_replication_status()) + .unwrap_or(ReplicationStatusType::Empty) + } else { + ReplicationStatusType::Empty + } + } } #[derive(Debug, Default, Clone, Serialize, Deserialize)] diff --git a/crates/filemeta/src/filemeta.rs b/crates/filemeta/src/filemeta.rs index bccf59fe..37fb1542 100644 --- a/crates/filemeta/src/filemeta.rs +++ b/crates/filemeta/src/filemeta.rs @@ -15,12 +15,13 @@ use crate::error::{Error, Result}; use crate::fileinfo::{ErasureAlgo, ErasureInfo, FileInfo, FileInfoVersions, ObjectPartInfo, RawFileInfo}; use crate::filemeta_inline::InlineData; -use crate::headers::{ +use crate::{ReplicationStatusType, VersionPurgeStatusType}; +use byteorder::ByteOrder; +use bytes::Bytes; +use rustfs_utils::http::headers::{ self, AMZ_META_UNENCRYPTED_CONTENT_LENGTH, AMZ_META_UNENCRYPTED_CONTENT_MD5, AMZ_STORAGE_CLASS, RESERVED_METADATA_PREFIX, RESERVED_METADATA_PREFIX_LOWER, VERSION_PURGE_STATUS_KEY, }; -use byteorder::ByteOrder; -use bytes::Bytes; use s3s::header::X_AMZ_RESTORE; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; @@ -30,6 +31,7 @@ use std::io::{Read, Write}; use std::{collections::HashMap, io::Cursor}; use time::OffsetDateTime; use tokio::io::AsyncRead; +use tracing::error; use uuid::Uuid; use xxhash_rust::xxh64; @@ -159,39 +161,57 @@ impl FileMeta { let i = buf.len() as u64; // check version, buf = buf[8..] - let (buf, _, _) = Self::check_xl2_v1(buf)?; + let (buf, _, _) = Self::check_xl2_v1(buf).map_err(|e| { + error!("failed to check XL2 v1 format: {}", e); + e + })?; let (mut size_buf, buf) = buf.split_at(5); // Get meta data, buf = crc + data - let bin_len = rmp::decode::read_bin_len(&mut size_buf)?; + let bin_len = rmp::decode::read_bin_len(&mut size_buf).map_err(|e| { + error!("failed to read binary length for metadata: {}", e); + Error::other(format!("failed to read binary length for metadata: {e}")) + })?; if buf.len() < bin_len as usize { + error!("insufficient data for metadata: expected {} bytes, got {} bytes", bin_len, buf.len()); return Err(Error::other("insufficient data for metadata")); } let (meta, buf) = buf.split_at(bin_len as usize); if buf.len() < 5 { + error!("insufficient data for CRC: expected 5 bytes, got {} bytes", buf.len()); return Err(Error::other("insufficient data for CRC")); } let (mut crc_buf, buf) = buf.split_at(5); // crc check - let crc = rmp::decode::read_u32(&mut crc_buf)?; + let crc = rmp::decode::read_u32(&mut crc_buf).map_err(|e| { + error!("failed to read CRC value: {}", e); + Error::other(format!("failed to read CRC value: {e}")) + })?; let meta_crc = xxh64::xxh64(meta, XXHASH_SEED) as u32; if crc != meta_crc { + error!("xl file crc check failed: expected CRC {:#x}, got {:#x}", meta_crc, crc); return Err(Error::other("xl file crc check failed")); } if !buf.is_empty() { self.data.update(buf); - self.data.validate()?; + self.data.validate().map_err(|e| { + error!("data validation failed: {}", e); + e + })?; } // Parse meta if !meta.is_empty() { - let (versions_len, _, meta_ver, meta) = Self::decode_xl_headers(meta)?; + let (versions_len, _, meta_ver, meta) = Self::decode_xl_headers(meta).map_err(|e| { + error!("failed to decode XL headers: {}", e); + e + })?; // let (_, meta) = meta.split_at(read_size as usize); @@ -201,24 +221,30 @@ impl FileMeta { let mut cur: Cursor<&[u8]> = Cursor::new(meta); for _ in 0..versions_len { - let bin_len = rmp::decode::read_bin_len(&mut cur)? as usize; - let start = cur.position() as usize; - let end = start + bin_len; - let header_buf = &meta[start..end]; + let bin_len = rmp::decode::read_bin_len(&mut cur).map_err(|e| { + error!("failed to read binary length for version header: {}", e); + Error::other(format!("failed to read binary length for version header: {e}")) + })? as usize; + + let mut header_buf = vec![0u8; bin_len]; + + cur.read_exact(&mut header_buf)?; let mut ver = FileMetaShallowVersion::default(); - ver.header.unmarshal_msg(header_buf)?; + ver.header.unmarshal_msg(&header_buf).map_err(|e| { + error!("failed to unmarshal version header: {}", e); + e + })?; - cur.set_position(end as u64); + let bin_len = rmp::decode::read_bin_len(&mut cur).map_err(|e| { + error!("failed to read binary length for version metadata: {}", e); + Error::other(format!("failed to read binary length for version metadata: {e}")) + })? as usize; - let bin_len = rmp::decode::read_bin_len(&mut cur)? as usize; - let start = cur.position() as usize; - let end = start + bin_len; - let ver_meta_buf = &meta[start..end]; + let mut ver_meta_buf = vec![0u8; bin_len]; + cur.read_exact(&mut ver_meta_buf)?; - ver.meta.extend_from_slice(ver_meta_buf); - - cur.set_position(end as u64); + ver.meta.extend_from_slice(&ver_meta_buf); self.versions.push(ver); } @@ -487,39 +513,39 @@ impl FileMeta { let version = FileMetaVersion::from(fi); + self.add_version_filemata(version) + } + + pub fn add_version_filemata(&mut self, version: FileMetaVersion) -> Result<()> { if !version.valid() { return Err(Error::other("file meta version invalid")); } - // should replace - for (idx, ver) in self.versions.iter().enumerate() { - if ver.header.version_id != vid { - continue; - } - - return self.set_idx(idx, version); + // 1000 is the limit of versions TODO: make it configurable + if self.versions.len() + 1 > 1000 { + return Err(Error::other( + "You've exceeded the limit on the number of versions you can create on this object", + )); } - // TODO: version count limit ! + if self.versions.is_empty() { + self.versions.push(FileMetaShallowVersion::try_from(version)?); + return Ok(()); + } + + let vid = version.get_version_id(); + + if let Some(fidx) = self.versions.iter().position(|v| v.header.version_id == vid) { + return self.set_idx(fidx, version); + } let mod_time = version.get_mod_time(); - // puth a -1 mod time value , so we can relplace this - self.versions.push(FileMetaShallowVersion { - header: FileMetaVersionHeader { - mod_time: Some(OffsetDateTime::from_unix_timestamp(-1)?), - ..Default::default() - }, - ..Default::default() - }); - for (idx, exist) in self.versions.iter().enumerate() { if let Some(ref ex_mt) = exist.header.mod_time { if let Some(ref in_md) = mod_time { if ex_mt <= in_md { - // insert self.versions.insert(idx, FileMetaShallowVersion::try_from(version)?); - self.versions.pop(); return Ok(()); } } @@ -527,35 +553,33 @@ impl FileMeta { } Err(Error::other("add_version failed")) - } - pub fn add_version_filemata(&mut self, ver: FileMetaVersion) -> Result<()> { - if !ver.valid() { - return Err(Error::other("attempted to add invalid version")); - } + // if !ver.valid() { + // return Err(Error::other("attempted to add invalid version")); + // } - if self.versions.len() + 1 >= 100 { - return Err(Error::other( - "You've exceeded the limit on the number of versions you can create on this object", - )); - } + // if self.versions.len() + 1 >= 100 { + // return Err(Error::other( + // "You've exceeded the limit on the number of versions you can create on this object", + // )); + // } - let mod_time = ver.get_mod_time(); - let encoded = ver.marshal_msg()?; - let new_version = FileMetaShallowVersion { - header: ver.header(), - meta: encoded, - }; + // let mod_time = ver.get_mod_time(); + // let encoded = ver.marshal_msg()?; + // let new_version = FileMetaShallowVersion { + // header: ver.header(), + // meta: encoded, + // }; - // Find the insertion position: insert before the first element with mod_time >= new mod_time - // This maintains descending order by mod_time (newest first) - let insert_pos = self - .versions - .iter() - .position(|existing| existing.header.mod_time <= mod_time) - .unwrap_or(self.versions.len()); - self.versions.insert(insert_pos, new_version); - Ok(()) + // // Find the insertion position: insert before the first element with mod_time >= new mod_time + // // This maintains descending order by mod_time (newest first) + // let insert_pos = self + // .versions + // .iter() + // .position(|existing| existing.header.mod_time <= mod_time) + // .unwrap_or(self.versions.len()); + // self.versions.insert(insert_pos, new_version); + // Ok(()) } // delete_version deletes version, returns data_dir @@ -575,10 +599,97 @@ impl FileMeta { } let mut update_version = fi.mark_deleted; - /*if fi.version_purge_status().is_empty() + if fi.version_purge_status().is_empty() + && (fi.delete_marker_replication_status() == ReplicationStatusType::Replica + || fi.delete_marker_replication_status() == ReplicationStatusType::Empty) { update_version = fi.mark_deleted; - }*/ + } else { + if fi.deleted + && fi.version_purge_status() != VersionPurgeStatusType::Complete + && (!fi.version_purge_status().is_empty() || fi.delete_marker_replication_status().is_empty()) + { + update_version = true; + } + + if !fi.version_purge_status().is_empty() && fi.version_purge_status() != VersionPurgeStatusType::Complete { + update_version = true; + } + } + + if fi.deleted { + if !fi.delete_marker_replication_status().is_empty() { + if let Some(delete_marker) = ventry.delete_marker.as_mut() { + if fi.delete_marker_replication_status() == ReplicationStatusType::Replica { + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replica-status"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replica_status.clone()) + .unwrap_or_default() + .as_str() + .as_bytes() + .to_vec(), + ); + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replica-timestamp"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replica_timestamp.unwrap_or(OffsetDateTime::UNIX_EPOCH).to_string()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + } else { + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replication_status_internal.clone().unwrap_or_default()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replication_timestamp.unwrap_or(OffsetDateTime::UNIX_EPOCH).to_string()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + } + } + } + + if !fi.version_purge_status().is_empty() { + if let Some(delete_marker) = ventry.delete_marker.as_mut() { + delete_marker.meta_sys.insert( + VERSION_PURGE_STATUS_KEY.to_string(), + fi.replication_state_internal + .as_ref() + .map(|v| v.version_purge_status_internal.clone().unwrap_or_default()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + } + } + + if let Some(delete_marker) = ventry.delete_marker.as_mut() { + for (k, v) in fi + .replication_state_internal + .as_ref() + .map(|v| v.reset_statuses_map.clone()) + .unwrap_or_default() + { + delete_marker.meta_sys.insert(k.clone(), v.clone().as_bytes().to_vec()); + } + } + } + + // ??? if fi.transition_status == TRANSITION_COMPLETE { update_version = false; } @@ -591,22 +702,111 @@ impl FileMeta { match ver.header.version_type { VersionType::Invalid | VersionType::Legacy => return Err(Error::other("invalid file meta version")), VersionType::Delete => { - self.versions.remove(i); - if fi.deleted && fi.version_id.is_none() { - self.add_version_filemata(ventry)?; + if update_version { + let mut v = self.get_idx(i)?; + if v.delete_marker.is_none() { + v.delete_marker = Some(MetaDeleteMarker { + version_id: fi.version_id, + mod_time: fi.mod_time, + meta_sys: HashMap::new(), + }); + } + + if let Some(delete_marker) = v.delete_marker.as_mut() { + if !fi.delete_marker_replication_status().is_empty() { + if fi.delete_marker_replication_status() == ReplicationStatusType::Replica { + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replica-status"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replica_status.clone()) + .unwrap_or_default() + .as_str() + .as_bytes() + .to_vec(), + ); + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replica-timestamp"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replica_timestamp.unwrap_or(OffsetDateTime::UNIX_EPOCH).to_string()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + } else { + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replication_status_internal.clone().unwrap_or_default()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + delete_marker.meta_sys.insert( + format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp"), + fi.replication_state_internal + .as_ref() + .map(|v| v.replication_timestamp.unwrap_or(OffsetDateTime::UNIX_EPOCH).to_string()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + } + } + + for (k, v) in fi + .replication_state_internal + .as_ref() + .map(|v| v.reset_statuses_map.clone()) + .unwrap_or_default() + { + delete_marker.meta_sys.insert(k.clone(), v.clone().as_bytes().to_vec()); + } + } + + self.set_idx(i, v)?; return Ok(None); } + self.versions.remove(i); + + if (fi.mark_deleted && fi.version_purge_status() != VersionPurgeStatusType::Complete) + || (fi.deleted && fi.version_id.is_none()) + { + self.add_version_filemata(ventry)?; + } return Ok(None); } VersionType::Object => { if update_version && !fi.deleted { - let v = self.get_idx(i)?; + let mut v = self.get_idx(i)?; - self.versions.remove(i); + if let Some(obj) = v.object.as_mut() { + obj.meta_sys.insert( + VERSION_PURGE_STATUS_KEY.to_string(), + fi.replication_state_internal + .as_ref() + .map(|v| v.version_purge_status_internal.clone().unwrap_or_default()) + .unwrap_or_default() + .as_bytes() + .to_vec(), + ); + for (k, v) in fi + .replication_state_internal + .as_ref() + .map(|v| v.reset_statuses_map.clone()) + .unwrap_or_default() + { + obj.meta_sys.insert(k.clone(), v.clone().as_bytes().to_vec()); + } + } - let a = v.object.map(|v| v.data_dir).unwrap_or_default(); - return Ok(a); + let old_dir = v.object.as_ref().map(|v| v.data_dir).unwrap_or_default(); + self.set_idx(i, v)?; + + return Ok(old_dir); } } } @@ -641,31 +841,37 @@ impl FileMeta { let obj_version_id = obj.version_id; let obj_data_dir = obj.data_dir; - if fi.expire_restored { + let mut err = if fi.expire_restored { obj.remove_restore_hdrs(); - self.set_idx(i, ver)?; + self.set_idx(i, ver).err() } else if fi.transition_status == TRANSITION_COMPLETE { obj.set_transition(fi); obj.reset_inline_data(); - self.set_idx(i, ver)?; + self.set_idx(i, ver).err() } else { self.versions.remove(i); let (free_version, to_free) = obj.init_free_version(fi); if to_free { - self.add_version_filemata(free_version)?; + self.add_version_filemata(free_version).err() + } else { + None } - } + }; if fi.deleted { - self.add_version_filemata(ventry)?; + err = self.add_version_filemata(ventry).err(); } if self.shared_data_dir_count(obj_version_id, obj_data_dir) > 0 { return Ok(None); } + if let Some(e) = err { + return Err(e); + } + Ok(obj_data_dir) } @@ -1642,17 +1848,15 @@ impl MetaObject { free_entry.delete_marker = Some(MetaDeleteMarker { version_id: Some(vid), mod_time: self.mod_time, - meta_sys: Some(HashMap::>::new()), + meta_sys: HashMap::>::new(), }); - free_entry - .delete_marker - .as_mut() - .unwrap() + let delete_marker = free_entry.delete_marker.as_mut().unwrap(); + + delete_marker .meta_sys - .as_mut() - .unwrap() .insert(format!("{RESERVED_METADATA_PREFIX_LOWER}{FREE_VERSION}"), vec![]); + let tier_key = format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITION_TIER}"); let tier_obj_key = format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_OBJECTNAME}"); let tier_obj_vid_key = format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_VERSION_ID}"); @@ -1660,14 +1864,7 @@ impl MetaObject { let aa = [tier_key, tier_obj_key, tier_obj_vid_key]; for (k, v) in &self.meta_sys { if aa.contains(k) { - free_entry - .delete_marker - .as_mut() - .unwrap() - .meta_sys - .as_mut() - .unwrap() - .insert(k.clone(), v.clone()); + delete_marker.meta_sys.insert(k.clone(), v.clone()); } } return (free_entry, true); @@ -1737,19 +1934,16 @@ pub struct MetaDeleteMarker { #[serde(rename = "MTime")] pub mod_time: Option, // Object delete marker modified time #[serde(rename = "MetaSys")] - pub meta_sys: Option>>, // Delete marker internal metadata + pub meta_sys: HashMap>, // Delete marker internal metadata } impl MetaDeleteMarker { pub fn free_version(&self) -> bool { - self.meta_sys - .as_ref() - .map(|v| v.get(FREE_VERSION_META_HEADER).is_some()) - .unwrap_or_default() + self.meta_sys.contains_key(FREE_VERSION_META_HEADER) } pub fn into_fileinfo(&self, volume: &str, path: &str, _all_parts: bool) -> FileInfo { - let metadata = self.meta_sys.clone().unwrap_or_default(); + let metadata = self.meta_sys.clone(); FileInfo { version_id: self.version_id.filter(|&vid| !vid.is_nil()), @@ -1895,7 +2089,7 @@ impl From for MetaDeleteMarker { Self { version_id: value.version_id, mod_time: value.mod_time, - meta_sys: None, + meta_sys: HashMap::new(), } } } @@ -2794,7 +2988,7 @@ mod test { let delete_marker = MetaDeleteMarker { version_id: Some(Uuid::new_v4()), mod_time: Some(OffsetDateTime::now_utc()), - meta_sys: None, + meta_sys: HashMap::new(), }; let delete_version = FileMetaVersion { diff --git a/crates/filemeta/src/lib.rs b/crates/filemeta/src/lib.rs index 48b1bf16..dc7fa4fd 100644 --- a/crates/filemeta/src/lib.rs +++ b/crates/filemeta/src/lib.rs @@ -16,8 +16,9 @@ mod error; pub mod fileinfo; mod filemeta; mod filemeta_inline; -pub mod headers; -pub mod metacache; +// pub mod headers; +mod metacache; +mod replication; pub mod test_data; @@ -26,3 +27,4 @@ pub use fileinfo::*; pub use filemeta::*; pub use filemeta_inline::*; pub use metacache::*; +pub use replication::*; diff --git a/crates/filemeta/src/replication.rs b/crates/filemeta/src/replication.rs new file mode 100644 index 00000000..47eb7add --- /dev/null +++ b/crates/filemeta/src/replication.rs @@ -0,0 +1,494 @@ +use core::fmt; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::time::Duration; +use time::OffsetDateTime; + +/// StatusType of Replication for x-amz-replication-status header +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Hash)] +pub enum ReplicationStatusType { + /// Pending - replication is pending. + Pending, + /// Completed - replication completed ok. + Completed, + /// CompletedLegacy was called "COMPLETE" incorrectly. + CompletedLegacy, + /// Failed - replication failed. + Failed, + /// Replica - this is a replica. + Replica, + #[default] + Empty, +} + +impl ReplicationStatusType { + /// Returns string representation of status + pub fn as_str(&self) -> &'static str { + match self { + ReplicationStatusType::Pending => "PENDING", + ReplicationStatusType::Completed => "COMPLETED", + ReplicationStatusType::CompletedLegacy => "COMPLETE", + ReplicationStatusType::Failed => "FAILED", + ReplicationStatusType::Replica => "REPLICA", + ReplicationStatusType::Empty => "", + } + } + pub fn is_empty(&self) -> bool { + matches!(self, ReplicationStatusType::Empty) + } +} + +impl fmt::Display for ReplicationStatusType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl From<&str> for ReplicationStatusType { + fn from(s: &str) -> Self { + match s { + "PENDING" => ReplicationStatusType::Pending, + "COMPLETED" => ReplicationStatusType::Completed, + "COMPLETE" => ReplicationStatusType::CompletedLegacy, + "FAILED" => ReplicationStatusType::Failed, + "REPLICA" => ReplicationStatusType::Replica, + _ => ReplicationStatusType::Empty, + } + } +} + +impl From for ReplicationStatusType { + fn from(status: VersionPurgeStatusType) -> Self { + match status { + VersionPurgeStatusType::Pending => ReplicationStatusType::Pending, + VersionPurgeStatusType::Complete => ReplicationStatusType::Completed, + VersionPurgeStatusType::Failed => ReplicationStatusType::Failed, + VersionPurgeStatusType::Empty => ReplicationStatusType::Empty, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum VersionPurgeStatusType { + Pending, + Complete, + Failed, + #[default] + Empty, +} + +impl VersionPurgeStatusType { + /// Returns string representation of version purge status + pub fn as_str(&self) -> &'static str { + match self { + VersionPurgeStatusType::Pending => "PENDING", + VersionPurgeStatusType::Complete => "COMPLETE", + VersionPurgeStatusType::Failed => "FAILED", + VersionPurgeStatusType::Empty => "", + } + } + + /// Returns true if the version is pending purge. + pub fn is_pending(&self) -> bool { + matches!(self, VersionPurgeStatusType::Pending | VersionPurgeStatusType::Failed) + } + + pub fn is_empty(&self) -> bool { + matches!(self, VersionPurgeStatusType::Empty) + } +} + +impl fmt::Display for VersionPurgeStatusType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl From<&str> for VersionPurgeStatusType { + fn from(s: &str) -> Self { + match s { + "PENDING" => VersionPurgeStatusType::Pending, + "COMPLETE" => VersionPurgeStatusType::Complete, + "FAILED" => VersionPurgeStatusType::Failed, + _ => VersionPurgeStatusType::Empty, + } + } +} + +/// Type - replication type enum +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum ReplicationType { + #[default] + Unset, + Object, + Delete, + Metadata, + Heal, + ExistingObject, + Resync, + All, +} + +impl ReplicationType { + pub fn as_str(&self) -> &'static str { + match self { + ReplicationType::Unset => "", + ReplicationType::Object => "OBJECT", + ReplicationType::Delete => "DELETE", + ReplicationType::Metadata => "METADATA", + ReplicationType::Heal => "HEAL", + ReplicationType::ExistingObject => "EXISTING_OBJECT", + ReplicationType::Resync => "RESYNC", + ReplicationType::All => "ALL", + } + } + + pub fn is_valid(&self) -> bool { + matches!( + self, + ReplicationType::Object + | ReplicationType::Delete + | ReplicationType::Metadata + | ReplicationType::Heal + | ReplicationType::ExistingObject + | ReplicationType::Resync + | ReplicationType::All + ) + } + + pub fn is_data_replication(&self) -> bool { + matches!(self, ReplicationType::Object | ReplicationType::Delete | ReplicationType::Heal) + } +} + +impl fmt::Display for ReplicationType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl From<&str> for ReplicationType { + fn from(s: &str) -> Self { + match s { + "UNSET" => ReplicationType::Unset, + "OBJECT" => ReplicationType::Object, + "DELETE" => ReplicationType::Delete, + "METADATA" => ReplicationType::Metadata, + "HEAL" => ReplicationType::Heal, + "EXISTING_OBJECT" => ReplicationType::ExistingObject, + "RESYNC" => ReplicationType::Resync, + "ALL" => ReplicationType::All, + _ => ReplicationType::Unset, + } + } +} + +/// ReplicationState represents internal replication state +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)] +pub struct ReplicationState { + pub replica_timestamp: Option, + pub replica_status: ReplicationStatusType, + pub delete_marker: bool, + pub replication_timestamp: Option, + pub replication_status_internal: Option, + pub version_purge_status_internal: Option, + pub replicate_decision_str: String, + pub targets: HashMap, + pub purge_targets: HashMap, + pub reset_statuses_map: HashMap, +} + +impl ReplicationState { + pub fn new() -> Self { + Self::default() + } + + /// Returns true if replication state is identical for version purge statuses and replication statuses + pub fn equal(&self, other: &ReplicationState) -> bool { + self.replica_status == other.replica_status + && self.replication_status_internal == other.replication_status_internal + && self.version_purge_status_internal == other.version_purge_status_internal + } + + /// Returns overall replication status for the object version being replicated + pub fn composite_replication_status(&self) -> ReplicationStatusType { + if let Some(replication_status_internal) = &self.replication_status_internal { + match ReplicationStatusType::from(replication_status_internal.as_str()) { + ReplicationStatusType::Pending + | ReplicationStatusType::Completed + | ReplicationStatusType::Failed + | ReplicationStatusType::Replica => { + return ReplicationStatusType::from(replication_status_internal.as_str()); + } + _ => { + let repl_status = get_composite_replication_status(&self.targets); + + if self.replica_timestamp.is_none() { + return repl_status; + } + + if repl_status == ReplicationStatusType::Completed { + if let (Some(replica_timestamp), Some(replication_timestamp)) = + (self.replica_timestamp, self.replication_timestamp) + { + if replica_timestamp > replication_timestamp { + return self.replica_status.clone(); + } + } + } + + return repl_status; + } + } + } else if self.replica_status != ReplicationStatusType::default() { + return self.replica_status.clone(); + } + + ReplicationStatusType::default() + } + + /// Returns overall replication purge status for the permanent delete being replicated + pub fn composite_version_purge_status(&self) -> VersionPurgeStatusType { + match VersionPurgeStatusType::from(self.version_purge_status_internal.clone().unwrap_or_default().as_str()) { + VersionPurgeStatusType::Pending | VersionPurgeStatusType::Complete | VersionPurgeStatusType::Failed => { + VersionPurgeStatusType::from(self.version_purge_status_internal.clone().unwrap_or_default().as_str()) + } + _ => get_composite_version_purge_status(&self.purge_targets), + } + } + + /// Returns replicatedInfos struct initialized with the previous state of replication + pub fn target_state(&self, arn: &str) -> ReplicatedTargetInfo { + ReplicatedTargetInfo { + arn: arn.to_string(), + prev_replication_status: self.targets.get(arn).cloned().unwrap_or_default(), + version_purge_status: self.purge_targets.get(arn).cloned().unwrap_or_default(), + resync_timestamp: self.reset_statuses_map.get(arn).cloned().unwrap_or_default(), + ..Default::default() + } + } +} + +pub fn get_composite_replication_status(targets: &HashMap) -> ReplicationStatusType { + if targets.is_empty() { + return ReplicationStatusType::Empty; + } + + let mut completed = 0; + for status in targets.values() { + match status { + ReplicationStatusType::Failed => return ReplicationStatusType::Failed, + ReplicationStatusType::Completed => completed += 1, + _ => {} + } + } + + if completed == targets.len() { + ReplicationStatusType::Completed + } else { + ReplicationStatusType::Pending + } +} + +pub fn get_composite_version_purge_status(targets: &HashMap) -> VersionPurgeStatusType { + if targets.is_empty() { + return VersionPurgeStatusType::default(); + } + + let mut completed = 0; + for status in targets.values() { + match status { + VersionPurgeStatusType::Failed => return VersionPurgeStatusType::Failed, + VersionPurgeStatusType::Complete => completed += 1, + _ => {} + } + } + + if completed == targets.len() { + VersionPurgeStatusType::Complete + } else { + VersionPurgeStatusType::Pending + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum ReplicationAction { + /// Replicate all data + All, + /// Replicate only metadata + Metadata, + /// Do not replicate + #[default] + None, +} + +impl ReplicationAction { + /// Returns string representation of replication action + pub fn as_str(&self) -> &'static str { + match self { + ReplicationAction::All => "all", + ReplicationAction::Metadata => "metadata", + ReplicationAction::None => "none", + } + } +} + +impl fmt::Display for ReplicationAction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl From<&str> for ReplicationAction { + fn from(s: &str) -> Self { + match s { + "all" => ReplicationAction::All, + "metadata" => ReplicationAction::Metadata, + "none" => ReplicationAction::None, + _ => ReplicationAction::None, + } + } +} + +/// ReplicatedTargetInfo struct represents replication info on a target +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ReplicatedTargetInfo { + pub arn: String, + pub size: i64, + pub duration: Duration, + pub replication_action: ReplicationAction, + pub op_type: ReplicationType, + pub replication_status: ReplicationStatusType, + pub prev_replication_status: ReplicationStatusType, + pub version_purge_status: VersionPurgeStatusType, + pub resync_timestamp: String, + pub replication_resynced: bool, + pub endpoint: String, + pub secure: bool, + pub error: Option, +} + +impl ReplicatedTargetInfo { + /// Returns true for a target if arn is empty + pub fn is_empty(&self) -> bool { + self.arn.is_empty() + } +} + +/// ReplicatedInfos struct contains replication information for multiple targets +#[derive(Debug, Clone)] +pub struct ReplicatedInfos { + pub replication_timestamp: Option, + pub targets: Vec, +} + +impl ReplicatedInfos { + /// Returns the total size of completed replications + pub fn completed_size(&self) -> i64 { + let mut sz = 0i64; + for target in &self.targets { + if target.is_empty() { + continue; + } + if target.replication_status == ReplicationStatusType::Completed + && target.prev_replication_status != ReplicationStatusType::Completed + { + sz += target.size; + } + } + sz + } + + /// Returns true if replication was attempted on any of the targets for the object version queued + pub fn replication_resynced(&self) -> bool { + for target in &self.targets { + if target.is_empty() || !target.replication_resynced { + continue; + } + return true; + } + false + } + + /// Returns internal representation of replication status for all targets + pub fn replication_status_internal(&self) -> Option { + let mut result = String::new(); + for target in &self.targets { + if target.is_empty() { + continue; + } + result.push_str(&format!("{}={};", target.arn, target.replication_status)); + } + if result.is_empty() { None } else { Some(result) } + } + + /// Returns overall replication status across all targets + pub fn replication_status(&self) -> ReplicationStatusType { + if self.targets.is_empty() { + return ReplicationStatusType::Empty; + } + + let mut completed = 0; + for target in &self.targets { + match target.replication_status { + ReplicationStatusType::Failed => return ReplicationStatusType::Failed, + ReplicationStatusType::Completed => completed += 1, + _ => {} + } + } + + if completed == self.targets.len() { + ReplicationStatusType::Completed + } else { + ReplicationStatusType::Pending + } + } + + /// Returns overall version purge status across all targets + pub fn version_purge_status(&self) -> VersionPurgeStatusType { + if self.targets.is_empty() { + return VersionPurgeStatusType::Empty; + } + + let mut completed = 0; + for target in &self.targets { + match target.version_purge_status { + VersionPurgeStatusType::Failed => return VersionPurgeStatusType::Failed, + VersionPurgeStatusType::Complete => completed += 1, + _ => {} + } + } + + if completed == self.targets.len() { + VersionPurgeStatusType::Complete + } else { + VersionPurgeStatusType::Pending + } + } + + /// Returns internal representation of version purge status for all targets + pub fn version_purge_status_internal(&self) -> Option { + let mut result = String::new(); + for target in &self.targets { + if target.is_empty() || target.version_purge_status.is_empty() { + continue; + } + result.push_str(&format!("{}={};", target.arn, target.version_purge_status)); + } + if result.is_empty() { None } else { Some(result) } + } + + /// Returns replication action based on target that actually performed replication + pub fn action(&self) -> ReplicationAction { + for target in &self.targets { + if target.is_empty() { + continue; + } + // rely on replication action from target that actually performed replication now. + if target.prev_replication_status != ReplicationStatusType::Completed { + return target.replication_action; + } + } + ReplicationAction::None + } +} diff --git a/crates/filemeta/src/test_data.rs b/crates/filemeta/src/test_data.rs index d1e8b25f..cf274255 100644 --- a/crates/filemeta/src/test_data.rs +++ b/crates/filemeta/src/test_data.rs @@ -67,7 +67,7 @@ pub fn create_real_xlmeta() -> Result> { let delete_marker = MetaDeleteMarker { version_id: Some(delete_version_id), mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312260)?), // 1 minute later - meta_sys: None, + meta_sys: HashMap::new(), }; let delete_file_version = FileMetaVersion { @@ -151,7 +151,7 @@ pub fn create_complex_xlmeta() -> Result> { let delete_marker = MetaDeleteMarker { version_id: Some(delete_version_id), mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312200 + i * 60 + 30)?), - meta_sys: None, + meta_sys: HashMap::new(), }; let delete_file_version = FileMetaVersion { diff --git a/crates/iam/Cargo.toml b/crates/iam/Cargo.toml index f132c69b..e119fe34 100644 --- a/crates/iam/Cargo.toml +++ b/crates/iam/Cargo.toml @@ -46,5 +46,6 @@ jsonwebtoken = { workspace = true } tracing.workspace = true rustfs-madmin.workspace = true rustfs-utils = { workspace = true, features = ["path"] } +tokio-util.workspace = true [dev-dependencies] diff --git a/crates/iam/src/store/object.rs b/crates/iam/src/store/object.rs index e9994406..9d630f31 100644 --- a/crates/iam/src/store/object.rs +++ b/crates/iam/src/store/object.rs @@ -20,6 +20,8 @@ use crate::{ manager::{extract_jwt_claims, get_default_policyes}, }; use futures::future::join_all; +use rustfs_ecstore::StorageAPI as _; +use rustfs_ecstore::store_api::{ObjectInfoOrErr, WalkOptions}; use rustfs_ecstore::{ config::{ RUSTFS_CONFIG_PREFIX, @@ -28,15 +30,14 @@ use rustfs_ecstore::{ global::get_global_action_cred, store::ECStore, store_api::{ObjectInfo, ObjectOptions}, - store_list_objects::{ObjectInfoOrErr, WalkOptions}, }; use rustfs_policy::{auth::UserIdentity, policy::PolicyDoc}; use rustfs_utils::path::{SLASH_SEPARATOR, path_join_buf}; use serde::{Serialize, de::DeserializeOwned}; use std::sync::LazyLock; use std::{collections::HashMap, sync::Arc}; -use tokio::sync::broadcast::{self, Receiver as B_Receiver}; use tokio::sync::mpsc::{self, Sender}; +use tokio_util::sync::CancellationToken; use tracing::{debug, info, warn}; pub static IAM_CONFIG_PREFIX: LazyLock = LazyLock::new(|| format!("{RUSTFS_CONFIG_PREFIX}/iam")); @@ -144,7 +145,7 @@ impl ObjectStore { Ok((Self::decrypt_data(&data)?, obj)) } - async fn list_iam_config_items(&self, prefix: &str, ctx_rx: B_Receiver, sender: Sender) { + async fn list_iam_config_items(&self, prefix: &str, ctx: CancellationToken, sender: Sender) { // debug!("list iam config items, prefix: {}", &prefix); // TODO: Implement walk, use walk @@ -156,7 +157,11 @@ impl ObjectStore { let (tx, mut rx) = mpsc::channel::(100); let path = prefix.to_owned(); - tokio::spawn(async move { store.walk(ctx_rx, Self::BUCKET_NAME, &path, tx, WalkOptions::default()).await }); + tokio::spawn(async move { + store + .walk(ctx.clone(), Self::BUCKET_NAME, &path, tx, WalkOptions::default()) + .await + }); let prefix = prefix.to_owned(); tokio::spawn(async move { @@ -190,10 +195,11 @@ impl ObjectStore { } async fn list_all_iamconfig_items(&self) -> Result>> { - let (ctx_tx, ctx_rx) = broadcast::channel(1); let (tx, mut rx) = mpsc::channel::(100); - self.list_iam_config_items(format!("{}/", *IAM_CONFIG_PREFIX).as_str(), ctx_rx, tx) + let ctx = CancellationToken::new(); + + self.list_iam_config_items(format!("{}/", *IAM_CONFIG_PREFIX).as_str(), ctx.clone(), tx) .await; let mut res = HashMap::new(); @@ -201,7 +207,7 @@ impl ObjectStore { while let Some(v) = rx.recv().await { if let Some(err) = v.err { warn!("list_iam_config_items {:?}", err); - let _ = ctx_tx.send(true); + ctx.cancel(); return Err(err); } @@ -215,7 +221,7 @@ impl ObjectStore { } } - let _ = ctx_tx.send(true); + ctx.cancel(); Ok(res) } @@ -477,15 +483,15 @@ impl Store for ObjectStore { UserType::None => "", }; - let (ctx_tx, ctx_rx) = broadcast::channel(1); + let ctx = CancellationToken::new(); let (tx, mut rx) = mpsc::channel::(100); - self.list_iam_config_items(base_prefix, ctx_rx, tx).await; + self.list_iam_config_items(base_prefix, ctx.clone(), tx).await; while let Some(v) = rx.recv().await { if let Some(err) = v.err { warn!("list_iam_config_items {:?}", err); - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); return Err(err); } @@ -495,7 +501,7 @@ impl Store for ObjectStore { self.load_user(&name, user_type, m).await?; } } - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); Ok(()) } async fn load_secret_key(&self, name: &str, user_type: UserType) -> Result { @@ -539,15 +545,15 @@ impl Store for ObjectStore { Ok(()) } async fn load_groups(&self, m: &mut HashMap) -> Result<()> { - let (ctx_tx, ctx_rx) = broadcast::channel(1); + let ctx = CancellationToken::new(); let (tx, mut rx) = mpsc::channel::(100); - self.list_iam_config_items(&IAM_CONFIG_GROUPS_PREFIX, ctx_rx, tx).await; + self.list_iam_config_items(&IAM_CONFIG_GROUPS_PREFIX, ctx.clone(), tx).await; while let Some(v) = rx.recv().await { if let Some(err) = v.err { warn!("list_iam_config_items {:?}", err); - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); return Err(err); } @@ -557,7 +563,7 @@ impl Store for ObjectStore { self.load_group(&name, m).await?; } } - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); Ok(()) } @@ -603,15 +609,15 @@ impl Store for ObjectStore { Ok(()) } async fn load_policy_docs(&self, m: &mut HashMap) -> Result<()> { - let (ctx_tx, ctx_rx) = broadcast::channel(1); + let ctx = CancellationToken::new(); let (tx, mut rx) = mpsc::channel::(100); - self.list_iam_config_items(&IAM_CONFIG_POLICIES_PREFIX, ctx_rx, tx).await; + self.list_iam_config_items(&IAM_CONFIG_POLICIES_PREFIX, ctx.clone(), tx).await; while let Some(v) = rx.recv().await { if let Some(err) = v.err { warn!("list_iam_config_items {:?}", err); - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); return Err(err); } @@ -621,7 +627,7 @@ impl Store for ObjectStore { self.load_policy_doc(&name, m).await?; } } - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); Ok(()) } @@ -678,15 +684,15 @@ impl Store for ObjectStore { } } }; - let (ctx_tx, ctx_rx) = broadcast::channel(1); + let ctx = CancellationToken::new(); let (tx, mut rx) = mpsc::channel::(100); - self.list_iam_config_items(base_path, ctx_rx, tx).await; + self.list_iam_config_items(base_path, ctx.clone(), tx).await; while let Some(v) = rx.recv().await { if let Some(err) = v.err { warn!("list_iam_config_items {:?}", err); - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); return Err(err); } @@ -696,7 +702,7 @@ impl Store for ObjectStore { self.load_mapped_policy(name, user_type, is_group, m).await?; } } - let _ = ctx_tx.send(true); + let _ = ctx.cancel(); // TODO: check if this is needed Ok(()) } diff --git a/crates/kms/src/backends/local.rs b/crates/kms/src/backends/local.rs index 970a71f3..3ea03c33 100644 --- a/crates/kms/src/backends/local.rs +++ b/crates/kms/src/backends/local.rs @@ -111,7 +111,7 @@ impl LocalKmsClient { /// Get the file path for a master key fn master_key_path(&self, key_id: &str) -> PathBuf { - self.config.key_dir.join(format!("{}.key", key_id)) + self.config.key_dir.join(format!("{key_id}.key")) } /// Load a master key from disk @@ -334,12 +334,11 @@ impl KmsClient for LocalKmsClient { if let Some(actual_value) = envelope.encryption_context.get(key) { if actual_value != expected_value { return Err(KmsError::context_mismatch(format!( - "Context mismatch for key '{}': expected '{}', got '{}'", - key, expected_value, actual_value + "Context mismatch for key '{key}': expected '{expected_value}', got '{actual_value}'" ))); } } else { - return Err(KmsError::context_mismatch(format!("Missing context key '{}'", key))); + return Err(KmsError::context_mismatch(format!("Missing context key '{key}'"))); } } } @@ -720,14 +719,14 @@ impl KmsBackend for LocalKmsBackend { .client .load_master_key(key_id) .await - .map_err(|_| crate::error::KmsError::key_not_found(format!("Key {} not found", key_id)))?; + .map_err(|_| crate::error::KmsError::key_not_found(format!("Key {key_id} not found")))?; let (deletion_date_str, deletion_date_dt) = if request.force_immediate.unwrap_or(false) { // For immediate deletion, actually delete the key from filesystem let key_path = self.client.master_key_path(key_id); tokio::fs::remove_file(&key_path) .await - .map_err(|e| crate::error::KmsError::internal_error(format!("Failed to delete key file: {}", e)))?; + .map_err(|e| crate::error::KmsError::internal_error(format!("Failed to delete key file: {e}")))?; // Remove from cache let mut cache = self.client.key_cache.write().await; @@ -773,9 +772,9 @@ impl KmsBackend for LocalKmsBackend { let key_path = self.client.master_key_path(key_id); let content = tokio::fs::read(&key_path) .await - .map_err(|e| crate::error::KmsError::internal_error(format!("Failed to read key file: {}", e)))?; + .map_err(|e| crate::error::KmsError::internal_error(format!("Failed to read key file: {e}")))?; let stored_key: crate::backends::local::StoredMasterKey = serde_json::from_slice(&content) - .map_err(|e| crate::error::KmsError::internal_error(format!("Failed to parse stored key: {}", e)))?; + .map_err(|e| crate::error::KmsError::internal_error(format!("Failed to parse stored key: {e}")))?; // Decrypt the existing key material to preserve it let existing_key_material = if let Some(ref cipher) = self.client.master_cipher { @@ -821,13 +820,10 @@ impl KmsBackend for LocalKmsBackend { .client .load_master_key(key_id) .await - .map_err(|_| crate::error::KmsError::key_not_found(format!("Key {} not found", key_id)))?; + .map_err(|_| crate::error::KmsError::key_not_found(format!("Key {key_id} not found")))?; if master_key.status != KeyStatus::PendingDeletion { - return Err(crate::error::KmsError::invalid_key_state(format!( - "Key {} is not pending deletion", - key_id - ))); + return Err(crate::error::KmsError::invalid_key_state(format!("Key {key_id} is not pending deletion"))); } // Cancel the deletion by resetting the state diff --git a/crates/kms/src/backends/vault.rs b/crates/kms/src/backends/vault.rs index c8841a9f..1d1768bf 100644 --- a/crates/kms/src/backends/vault.rs +++ b/crates/kms/src/backends/vault.rs @@ -89,10 +89,10 @@ impl VaultKmsClient { let settings = settings_builder .build() - .map_err(|e| KmsError::backend_error(format!("Failed to build Vault client settings: {}", e)))?; + .map_err(|e| KmsError::backend_error(format!("Failed to build Vault client settings: {e}")))?; let client = - VaultClient::new(settings).map_err(|e| KmsError::backend_error(format!("Failed to create Vault client: {}", e)))?; + VaultClient::new(settings).map_err(|e| KmsError::backend_error(format!("Failed to create Vault client: {e}")))?; info!("Successfully connected to Vault at {}", config.address); @@ -144,7 +144,7 @@ impl VaultKmsClient { kv2::set(&self.client, &self.kv_mount, &path, key_data) .await - .map_err(|e| KmsError::backend_error(format!("Failed to store key in Vault: {}", e)))?; + .map_err(|e| KmsError::backend_error(format!("Failed to store key in Vault: {e}")))?; debug!("Stored key {} in Vault at path {}", key_id, path); Ok(()) @@ -176,7 +176,7 @@ impl VaultKmsClient { let secret: VaultKeyData = kv2::read(&self.client, &self.kv_mount, &path).await.map_err(|e| match e { vaultrs::error::ClientError::ResponseWrapError => KmsError::key_not_found(key_id), vaultrs::error::ClientError::APIError { code: 404, .. } => KmsError::key_not_found(key_id), - _ => KmsError::backend_error(format!("Failed to read key from Vault: {}", e)), + _ => KmsError::backend_error(format!("Failed to read key from Vault: {e}")), })?; debug!("Retrieved key {} from Vault, tags: {:?}", key_id, secret.tags); @@ -200,7 +200,7 @@ impl VaultKmsClient { debug!("Key path doesn't exist in Vault (404), returning empty list"); Ok(Vec::new()) } - Err(e) => Err(KmsError::backend_error(format!("Failed to list keys in Vault: {}", e))), + Err(e) => Err(KmsError::backend_error(format!("Failed to list keys in Vault: {e}"))), } } @@ -214,7 +214,7 @@ impl VaultKmsClient { .await .map_err(|e| match e { vaultrs::error::ClientError::APIError { code: 404, .. } => KmsError::key_not_found(key_id), - _ => KmsError::backend_error(format!("Failed to delete key metadata from Vault: {}", e)), + _ => KmsError::backend_error(format!("Failed to delete key metadata from Vault: {e}")), })?; debug!("Permanently deleted key {} metadata from Vault at path {}", key_id, path); @@ -649,7 +649,7 @@ impl KmsBackend for VaultKmsBackend { let mut key_metadata = match self.describe_key(describe_request).await { Ok(response) => response.key_metadata, Err(_) => { - return Err(crate::error::KmsError::key_not_found(format!("Key {} not found", key_id))); + return Err(crate::error::KmsError::key_not_found(format!("Key {key_id} not found"))); } }; @@ -705,15 +705,12 @@ impl KmsBackend for VaultKmsBackend { let mut key_metadata = match self.describe_key(describe_request).await { Ok(response) => response.key_metadata, Err(_) => { - return Err(crate::error::KmsError::key_not_found(format!("Key {} not found", key_id))); + return Err(crate::error::KmsError::key_not_found(format!("Key {key_id} not found"))); } }; if key_metadata.key_state != KeyState::PendingDeletion { - return Err(crate::error::KmsError::invalid_key_state(format!( - "Key {} is not pending deletion", - key_id - ))); + return Err(crate::error::KmsError::invalid_key_state(format!("Key {key_id} is not pending deletion"))); } // Cancel the deletion by resetting the state diff --git a/crates/kms/src/config.rs b/crates/kms/src/config.rs index 47663c83..c3df3e88 100644 --- a/crates/kms/src/config.rs +++ b/crates/kms/src/config.rs @@ -315,7 +315,7 @@ impl KmsConfig { config.backend = match backend_type.to_lowercase().as_str() { "local" => KmsBackend::Local, "vault" => KmsBackend::Vault, - _ => return Err(KmsError::configuration_error(format!("Unknown KMS backend: {}", backend_type))), + _ => return Err(KmsError::configuration_error(format!("Unknown KMS backend: {backend_type}"))), }; } diff --git a/crates/kms/src/encryption/service.rs b/crates/kms/src/encryption/service.rs index 1139c7f0..94ed0c50 100644 --- a/crates/kms/src/encryption/service.rs +++ b/crates/kms/src/encryption/service.rs @@ -227,7 +227,7 @@ impl ObjectEncryptionService { self.kms_manager .create_key(create_req) .await - .map_err(|e| KmsError::backend_error(format!("Failed to auto-create SSE-S3 key {}: {}", actual_key_id, e)))?; + .map_err(|e| KmsError::backend_error(format!("Failed to auto-create SSE-S3 key {actual_key_id}: {e}")))?; } } else { // For SSE-KMS, key must exist @@ -235,7 +235,7 @@ impl ObjectEncryptionService { key_id: actual_key_id.to_string(), }; self.kms_manager.describe_key(describe_req).await.map_err(|_| { - KmsError::invalid_operation(format!("SSE-KMS key '{}' not found. Please create it first.", actual_key_id)) + KmsError::invalid_operation(format!("SSE-KMS key '{actual_key_id}' not found. Please create it first.")) })?; } @@ -250,7 +250,7 @@ impl ObjectEncryptionService { .kms_manager .generate_data_key(request) .await - .map_err(|e| KmsError::backend_error(format!("Failed to generate data key: {}", e)))?; + .map_err(|e| KmsError::backend_error(format!("Failed to generate data key: {e}")))?; let plaintext_key = data_key.plaintext_key; @@ -325,7 +325,7 @@ impl ObjectEncryptionService { .kms_manager .decrypt(decrypt_request) .await - .map_err(|e| KmsError::backend_error(format!("Failed to decrypt data key: {}", e)))?; + .map_err(|e| KmsError::backend_error(format!("Failed to decrypt data key: {e}")))?; // Create cipher let cipher = create_cipher(&algorithm, &decrypt_response.plaintext)?; @@ -379,7 +379,7 @@ impl ObjectEncryptionService { // Validate key MD5 if provided if let Some(expected_md5) = customer_key_md5 { let actual_md5 = md5::compute(customer_key); - let actual_md5_hex = format!("{:x}", actual_md5); + let actual_md5_hex = format!("{actual_md5:x}"); if actual_md5_hex != expected_md5.to_lowercase() { return Err(KmsError::validation_error("Customer key MD5 mismatch")); } @@ -487,12 +487,11 @@ impl ObjectEncryptionService { Some(actual_value) if actual_value == expected_value => continue, Some(actual_value) => { return Err(KmsError::context_mismatch(format!( - "Context mismatch for '{}': expected '{}', got '{}'", - key, expected_value, actual_value + "Context mismatch for '{key}': expected '{expected_value}', got '{actual_value}'" ))); } None => { - return Err(KmsError::context_mismatch(format!("Missing context key '{}'", key))); + return Err(KmsError::context_mismatch(format!("Missing context key '{key}'"))); } } } @@ -562,13 +561,13 @@ impl ObjectEncryptionService { .ok_or_else(|| KmsError::validation_error("Missing IV header"))?; let iv = base64::engine::general_purpose::STANDARD .decode(iv) - .map_err(|e| KmsError::validation_error(format!("Invalid IV: {}", e)))?; + .map_err(|e| KmsError::validation_error(format!("Invalid IV: {e}")))?; let tag = if let Some(tag_str) = headers.get("x-rustfs-encryption-tag") { Some( base64::engine::general_purpose::STANDARD .decode(tag_str) - .map_err(|e| KmsError::validation_error(format!("Invalid tag: {}", e)))?, + .map_err(|e| KmsError::validation_error(format!("Invalid tag: {e}")))?, ) } else { None @@ -577,14 +576,14 @@ impl ObjectEncryptionService { let encrypted_data_key = if let Some(key_str) = headers.get("x-rustfs-encryption-key") { base64::engine::general_purpose::STANDARD .decode(key_str) - .map_err(|e| KmsError::validation_error(format!("Invalid encrypted key: {}", e)))? + .map_err(|e| KmsError::validation_error(format!("Invalid encrypted key: {e}")))? } else { Vec::new() // Empty for SSE-C }; let encryption_context = if let Some(context_str) = headers.get("x-rustfs-encryption-context") { serde_json::from_str(context_str) - .map_err(|e| KmsError::validation_error(format!("Invalid encryption context: {}", e)))? + .map_err(|e| KmsError::validation_error(format!("Invalid encryption context: {e}")))? } else { HashMap::new() }; diff --git a/crates/kms/src/error.rs b/crates/kms/src/error.rs index 331641ce..740bed14 100644 --- a/crates/kms/src/error.rs +++ b/crates/kms/src/error.rs @@ -225,7 +225,7 @@ impl KmsError { impl From for KmsError { fn from(error: url::ParseError) -> Self { Self::ConfigurationError { - message: format!("Invalid URL: {}", error), + message: format!("Invalid URL: {error}"), } } } @@ -233,7 +233,7 @@ impl From for KmsError { impl From for KmsError { fn from(error: reqwest::Error) -> Self { Self::BackendError { - message: format!("HTTP request failed: {}", error), + message: format!("HTTP request failed: {error}"), } } } diff --git a/crates/kms/src/service_manager.rs b/crates/kms/src/service_manager.rs index 4afd515b..f877e010 100644 --- a/crates/kms/src/service_manager.rs +++ b/crates/kms/src/service_manager.rs @@ -137,7 +137,7 @@ impl KmsServiceManager { Ok(()) } Err(e) => { - let err_msg = format!("Failed to create KMS backend: {}", e); + let err_msg = format!("Failed to create KMS backend: {e}"); error!("{}", err_msg); let mut status = self.status.write().await; *status = KmsServiceStatus::Error(err_msg.clone()); @@ -218,7 +218,7 @@ impl KmsServiceManager { error!("KMS health check error: {}", e); // Update status to error let mut status = self.status.write().await; - *status = KmsServiceStatus::Error(format!("Health check failed: {}", e)); + *status = KmsServiceStatus::Error(format!("Health check failed: {e}")); Err(e) } } diff --git a/crates/lock/src/fast_lock/manager.rs b/crates/lock/src/fast_lock/manager.rs index 4a8ee867..20b10bf6 100644 --- a/crates/lock/src/fast_lock/manager.rs +++ b/crates/lock/src/fast_lock/manager.rs @@ -106,6 +106,10 @@ impl FastObjectLockManager { object: impl Into>, owner: impl Into>, ) -> Result { + // let bucket = bucket.into(); + // let object = object.into(); + // let owner = owner.into(); + // error!("acquire_write_lock: bucket={:?}, object={:?}, owner={:?}", bucket, object, owner); let request = ObjectLockRequest::new_write(bucket, object, owner); self.acquire_lock(request).await } diff --git a/crates/mcp/src/s3_client.rs b/crates/mcp/src/s3_client.rs index e1d738b4..7d9a1489 100644 --- a/crates/mcp/src/s3_client.rs +++ b/crates/mcp/src/s3_client.rs @@ -296,11 +296,11 @@ impl S3Client { .context(format!("Failed to resolve file path: {local_path}"))?; if !canonical_path.exists() { - anyhow::bail!("File does not exist: {}", local_path); + anyhow::bail!("File does not exist: {local_path}"); } if !canonical_path.is_file() { - anyhow::bail!("Path is not a file: {}", local_path); + anyhow::bail!("Path is not a file: {local_path}"); } let metadata = tokio::fs::metadata(&canonical_path) @@ -432,7 +432,7 @@ impl S3Client { while let Some(bytes_result) = byte_stream.try_next().await.context("Failed to read object content")? { if total_read + bytes_result.len() > max_size { - anyhow::bail!("Object size exceeds maximum allowed size of {} bytes", max_size); + anyhow::bail!("Object size exceeds maximum allowed size of {max_size} bytes"); } content.extend_from_slice(&bytes_result); total_read += bytes_result.len(); diff --git a/crates/rio/src/lib.rs b/crates/rio/src/lib.rs index 9342b7a0..aeb73e60 100644 --- a/crates/rio/src/lib.rs +++ b/crates/rio/src/lib.rs @@ -82,6 +82,7 @@ pub trait HashReaderDetector { impl Reader for crate::HashReader {} impl Reader for crate::HardLimitReader {} impl Reader for crate::EtagReader {} +impl Reader for crate::LimitReader where R: Reader {} impl Reader for crate::CompressReader where R: Reader {} impl Reader for crate::EncryptReader where R: Reader {} impl Reader for crate::DecryptReader where R: Reader {} diff --git a/crates/rio/src/limit_reader.rs b/crates/rio/src/limit_reader.rs index c95e98b1..3c9a7ae3 100644 --- a/crates/rio/src/limit_reader.rs +++ b/crates/rio/src/limit_reader.rs @@ -37,7 +37,7 @@ use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{AsyncRead, ReadBuf}; -use crate::{EtagResolvable, HashReaderDetector, HashReaderMut}; +use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, TryGetIndex}; pin_project! { #[derive(Debug)] @@ -118,6 +118,8 @@ where } } +impl TryGetIndex for LimitReader where R: AsyncRead + Unpin + Send + Sync {} + #[cfg(test)] mod tests { use std::io::Cursor; diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 2526c41c..9c03dc54 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -53,6 +53,7 @@ s3s = { workspace = true, optional = true } serde = { workspace = true, optional = true } sha1 = { workspace = true, optional = true } sha2 = { workspace = true, optional = true } +convert_case = "0.8.0" siphasher = { workspace = true, optional = true } snap = { workspace = true, optional = true } sysinfo = { workspace = true, optional = true } @@ -90,4 +91,5 @@ hash = ["dep:highway", "dep:md-5", "dep:sha2", "dep:blake3", "dep:serde", "dep:s os = ["dep:nix", "dep:tempfile", "winapi"] # operating system utilities integration = [] # integration test features sys = ["dep:sysinfo"] # system information features -full = ["ip", "tls", "net", "io", "hash", "os", "integration", "path", "crypto", "string", "compress", "sys", "notify"] # all features +http = [] +full = ["ip", "tls", "net", "io", "hash", "os", "integration", "path", "crypto", "string", "compress", "sys", "notify","http"] # all features diff --git a/crates/utils/src/hash.rs b/crates/utils/src/hash.rs index 3e06b232..975cdc46 100644 --- a/crates/utils/src/hash.rs +++ b/crates/utils/src/hash.rs @@ -109,6 +109,8 @@ use siphasher::sip::SipHasher; pub const EMPTY_STRING_SHA256_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; +pub const DEFAULT_SIP_HASH_KEY: [u8; 16] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + pub fn sip_hash(key: &str, cardinality: usize, id: &[u8; 16]) -> usize { // Your key, must be 16 bytes diff --git a/crates/utils/src/http/headers.rs b/crates/utils/src/http/headers.rs new file mode 100644 index 00000000..afe55497 --- /dev/null +++ b/crates/utils/src/http/headers.rs @@ -0,0 +1,277 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use convert_case::{Case, Casing}; +use std::collections::HashMap; +use std::sync::LazyLock; + +pub const LAST_MODIFIED: &str = "Last-Modified"; +pub const DATE: &str = "Date"; +pub const ETAG: &str = "ETag"; +pub const CONTENT_TYPE: &str = "Content-Type"; +pub const CONTENT_MD5: &str = "Content-Md5"; +pub const CONTENT_ENCODING: &str = "Content-Encoding"; +pub const EXPIRES: &str = "Expires"; +pub const CONTENT_LENGTH: &str = "Content-Length"; +pub const CONTENT_LANGUAGE: &str = "Content-Language"; +pub const CONTENT_RANGE: &str = "Content-Range"; +pub const CONNECTION: &str = "Connection"; +pub const ACCEPT_RANGES: &str = "Accept-Ranges"; +pub const AMZ_BUCKET_REGION: &str = "X-Amz-Bucket-Region"; +pub const SERVER_INFO: &str = "Server"; +pub const RETRY_AFTER: &str = "Retry-After"; +pub const LOCATION: &str = "Location"; +pub const CACHE_CONTROL: &str = "Cache-Control"; +pub const CONTENT_DISPOSITION: &str = "Content-Disposition"; +pub const AUTHORIZATION: &str = "Authorization"; +pub const ACTION: &str = "Action"; +pub const RANGE: &str = "Range"; + +// S3 storage class +pub const AMZ_STORAGE_CLASS: &str = "x-amz-storage-class"; + +// S3 object version ID +pub const AMZ_VERSION_ID: &str = "x-amz-version-id"; +pub const AMZ_DELETE_MARKER: &str = "x-amz-delete-marker"; + +// S3 object tagging +pub const AMZ_OBJECT_TAGGING: &str = "X-Amz-Tagging"; +pub const AMZ_TAG_COUNT: &str = "x-amz-tagging-count"; +pub const AMZ_TAG_DIRECTIVE: &str = "X-Amz-Tagging-Directive"; + +// S3 transition restore +pub const AMZ_RESTORE: &str = "x-amz-restore"; +pub const AMZ_RESTORE_EXPIRY_DAYS: &str = "X-Amz-Restore-Expiry-Days"; +pub const AMZ_RESTORE_REQUEST_DATE: &str = "X-Amz-Restore-Request-Date"; +pub const AMZ_RESTORE_OUTPUT_PATH: &str = "x-amz-restore-output-path"; + +// S3 extensions +pub const AMZ_COPY_SOURCE_IF_MODIFIED_SINCE: &str = "x-amz-copy-source-if-modified-since"; +pub const AMZ_COPY_SOURCE_IF_UNMODIFIED_SINCE: &str = "x-amz-copy-source-if-unmodified-since"; + +pub const AMZ_COPY_SOURCE_IF_NONE_MATCH: &str = "x-amz-copy-source-if-none-match"; +pub const AMZ_COPY_SOURCE_IF_MATCH: &str = "x-amz-copy-source-if-match"; + +pub const AMZ_COPY_SOURCE: &str = "X-Amz-Copy-Source"; +pub const AMZ_COPY_SOURCE_VERSION_ID: &str = "X-Amz-Copy-Source-Version-Id"; +pub const AMZ_COPY_SOURCE_RANGE: &str = "X-Amz-Copy-Source-Range"; +pub const AMZ_METADATA_DIRECTIVE: &str = "X-Amz-Metadata-Directive"; +pub const AMZ_OBJECT_LOCK_MODE: &str = "X-Amz-Object-Lock-Mode"; +pub const AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE: &str = "X-Amz-Object-Lock-Retain-Until-Date"; +pub const AMZ_OBJECT_LOCK_LEGAL_HOLD: &str = "X-Amz-Object-Lock-Legal-Hold"; +pub const AMZ_OBJECT_LOCK_BYPASS_GOVERNANCE: &str = "X-Amz-Bypass-Governance-Retention"; +pub const AMZ_BUCKET_REPLICATION_STATUS: &str = "X-Amz-Replication-Status"; + +// AmzSnowballExtract will trigger unpacking of an archive content +pub const AMZ_SNOWBALL_EXTRACT: &str = "X-Amz-Meta-Snowball-Auto-Extract"; + +// Object lock enabled +pub const AMZ_OBJECT_LOCK_ENABLED: &str = "x-amz-bucket-object-lock-enabled"; + +// Multipart parts count +pub const AMZ_MP_PARTS_COUNT: &str = "x-amz-mp-parts-count"; + +// Object date/time of expiration +pub const AMZ_EXPIRATION: &str = "x-amz-expiration"; + +// Dummy putBucketACL +pub const AMZ_ACL: &str = "x-amz-acl"; + +// Signature V4 related constants. +pub const AMZ_CONTENT_SHA256: &str = "X-Amz-Content-Sha256"; +pub const AMZ_DATE: &str = "X-Amz-Date"; +pub const AMZ_ALGORITHM: &str = "X-Amz-Algorithm"; +pub const AMZ_EXPIRES: &str = "X-Amz-Expires"; +pub const AMZ_SIGNED_HEADERS: &str = "X-Amz-SignedHeaders"; +pub const AMZ_SIGNATURE: &str = "X-Amz-Signature"; +pub const AMZ_CREDENTIAL: &str = "X-Amz-Credential"; +pub const AMZ_SECURITY_TOKEN: &str = "X-Amz-Security-Token"; +pub const AMZ_DECODED_CONTENT_LENGTH: &str = "X-Amz-Decoded-Content-Length"; +pub const AMZ_TRAILER: &str = "X-Amz-Trailer"; +pub const AMZ_MAX_PARTS: &str = "X-Amz-Max-Parts"; +pub const AMZ_PART_NUMBER_MARKER: &str = "X-Amz-Part-Number-Marker"; + +// Constants used for GetObjectAttributes and GetObjectVersionAttributes +pub const AMZ_OBJECT_ATTRIBUTES: &str = "X-Amz-Object-Attributes"; + +// AWS server-side encryption headers for SSE-S3, SSE-KMS and SSE-C. +pub const AMZ_SERVER_SIDE_ENCRYPTION: &str = "X-Amz-Server-Side-Encryption"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_KMS_ID: &str = "X-Amz-Server-Side-Encryption-Aws-Kms-Key-Id"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_KMS_CONTEXT: &str = "X-Amz-Server-Side-Encryption-Context"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: &str = "X-Amz-Server-Side-Encryption-Customer-Algorithm"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: &str = "X-Amz-Server-Side-Encryption-Customer-Key"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: &str = "X-Amz-Server-Side-Encryption-Customer-Key-Md5"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_COPY_CUSTOMER_ALGORITHM: &str = + "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Algorithm"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_COPY_CUSTOMER_KEY: &str = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Key"; +pub const AMZ_SERVER_SIDE_ENCRYPTION_COPY_CUSTOMER_KEY_MD5: &str = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Key-Md5"; + +pub const AMZ_ENCRYPTION_AES: &str = "AES256"; +pub const AMZ_ENCRYPTION_KMS: &str = "aws:kms"; + +// Signature v2 related constants +pub const AMZ_SIGNATURE_V2: &str = "Signature"; +pub const AMZ_ACCESS_KEY_ID: &str = "AWSAccessKeyId"; + +// Response request id. +pub const AMZ_REQUEST_ID: &str = "x-amz-request-id"; +pub const AMZ_REQUEST_HOST_ID: &str = "x-amz-id-2"; + +// Content Checksums +pub const AMZ_CHECKSUM_ALGO: &str = "x-amz-checksum-algorithm"; +pub const AMZ_CHECKSUM_CRC32: &str = "x-amz-checksum-crc32"; +pub const AMZ_CHECKSUM_CRC32C: &str = "x-amz-checksum-crc32c"; +pub const AMZ_CHECKSUM_SHA1: &str = "x-amz-checksum-sha1"; +pub const AMZ_CHECKSUM_SHA256: &str = "x-amz-checksum-sha256"; +pub const AMZ_CHECKSUM_CRC64NVME: &str = "x-amz-checksum-crc64nvme"; +pub const AMZ_CHECKSUM_MODE: &str = "x-amz-checksum-mode"; +pub const AMZ_CHECKSUM_TYPE: &str = "x-amz-checksum-type"; +pub const AMZ_CHECKSUM_TYPE_FULL_OBJECT: &str = "FULL_OBJECT"; +pub const AMZ_CHECKSUM_TYPE_COMPOSITE: &str = "COMPOSITE"; + +// Post Policy related +pub const AMZ_META_UUID: &str = "X-Amz-Meta-Uuid"; +pub const AMZ_META_NAME: &str = "X-Amz-Meta-Name"; + +pub const AMZ_META_UNENCRYPTED_CONTENT_LENGTH: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Length"; +pub const AMZ_META_UNENCRYPTED_CONTENT_MD5: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Md5"; + +pub const RESERVED_METADATA_PREFIX: &str = "X-RustFS-Internal-"; +pub const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-"; + +pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing"; +// pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov"; + +// pub const X_RUSTFS_INLINE_DATA: &str = "x-rustfs-inline-data"; + +pub const VERSION_PURGE_STATUS_KEY: &str = "X-Rustfs-Internal-purgestatus"; + +pub const X_RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing"; +pub const X_RUSTFS_DATA_MOV: &str = "X-Rustfs-Internal-data-mov"; + +pub const AMZ_TAGGING_DIRECTIVE: &str = "X-Amz-Tagging-Directive"; + +pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov"; + +pub const RUSTFS_FORCE_DELETE: &str = "X-Rustfs-Force-Delete"; + +pub const RUSTFS_REPLICATION_RESET_STATUS: &str = "X-Rustfs-Replication-Reset-Status"; +pub const RUSTFS_REPLICATION_AUTUAL_OBJECT_SIZE: &str = "X-Rustfs-Replication-Actual-Object-Size"; + +pub const RUSTFS_BUCKET_SOURCE_VERSION_ID: &str = "X-Rustfs-Source-Version-Id"; +pub const RUSTFS_BUCKET_SOURCE_MTIME: &str = "X-Rustfs-Source-Mtime"; +pub const RUSTFS_BUCKET_SOURCE_ETAG: &str = "X-Rustfs-Source-Etag"; +pub const RUSTFS_BUCKET_REPLICATION_DELETE_MARKER: &str = "X-Rustfs-Source-DeleteMarker"; +pub const RUSTFS_BUCKET_REPLICATION_PROXY_REQUEST: &str = "X-Rustfs-Source-Proxy-Request"; +pub const RUSTFS_BUCKET_REPLICATION_REQUEST: &str = "X-Rustfs-Source-Replication-Request"; +pub const RUSTFS_BUCKET_REPLICATION_CHECK: &str = "X-Rustfs-Source-Replication-Check"; + +// SSEC encryption header constants +pub const SSEC_ALGORITHM_HEADER: &str = "x-amz-server-side-encryption-customer-algorithm"; +pub const SSEC_KEY_HEADER: &str = "x-amz-server-side-encryption-customer-key"; +pub const SSEC_KEY_MD5_HEADER: &str = "x-amz-server-side-encryption-customer-key-md5"; + +pub const AMZ_WEBSITE_REDIRECT_LOCATION: &str = "x-amz-website-redirect-location"; + +pub trait HeaderExt { + fn lookup(&self, s: &str) -> Option<&str>; +} + +impl HeaderExt for HashMap { + fn lookup(&self, s: &str) -> Option<&str> { + let train = s.to_case(Case::Train); + let lower = s.to_ascii_lowercase(); + let keys = [s, lower.as_str(), train.as_str()]; + + for key in keys { + if let Some(v) = self.get(key) { + return Some(v); + } + } + + None + } +} + +static SUPPORTED_QUERY_VALUES: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + m.insert("attributes".to_string(), true); + m.insert("partNumber".to_string(), true); + m.insert("versionId".to_string(), true); + m.insert("response-cache-control".to_string(), true); + m.insert("response-content-disposition".to_string(), true); + m.insert("response-content-encoding".to_string(), true); + m.insert("response-content-language".to_string(), true); + m.insert("response-content-type".to_string(), true); + m.insert("response-expires".to_string(), true); + m +}); +static SUPPORTED_HEADERS: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + m.insert("content-type".to_string(), true); + m.insert("cache-control".to_string(), true); + m.insert("content-encoding".to_string(), true); + m.insert("content-disposition".to_string(), true); + m.insert("content-language".to_string(), true); + m.insert("x-amz-website-redirect-location".to_string(), true); + m.insert("x-amz-object-lock-mode".to_string(), true); + m.insert("x-amz-metadata-directive".to_string(), true); + m.insert("x-amz-object-lock-retain-until-date".to_string(), true); + m.insert("expires".to_string(), true); + m.insert("x-amz-replication-status".to_string(), true); + m +}); +static SSE_HEADERS: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + m.insert("x-amz-server-side-encryption".to_string(), true); + m.insert("x-amz-server-side-encryption-aws-kms-key-id".to_string(), true); + m.insert("x-amz-server-side-encryption-context".to_string(), true); + m.insert("x-amz-server-side-encryption-customer-algorithm".to_string(), true); + m.insert("x-amz-server-side-encryption-customer-key".to_string(), true); + m.insert("x-amz-server-side-encryption-customer-key-md5".to_string(), true); + m +}); + +pub fn is_standard_query_value(qs_key: &str) -> bool { + *SUPPORTED_QUERY_VALUES.get(qs_key).unwrap_or(&false) +} + +pub fn is_storageclass_header(header_key: &str) -> bool { + header_key.to_lowercase() == AMZ_STORAGE_CLASS.to_lowercase() +} + +pub fn is_standard_header(header_key: &str) -> bool { + *SUPPORTED_HEADERS.get(&header_key.to_lowercase()).unwrap_or(&false) +} + +pub fn is_sse_header(header_key: &str) -> bool { + *SSE_HEADERS.get(&header_key.to_lowercase()).unwrap_or(&false) +} + +pub fn is_amz_header(header_key: &str) -> bool { + let key = header_key.to_lowercase(); + key.starts_with("x-amz-meta-") + || key.starts_with("x-amz-grant-") + || key == "x-amz-acl" + || is_sse_header(header_key) + || key.starts_with("x-amz-checksum-") +} + +pub fn is_rustfs_header(header_key: &str) -> bool { + header_key.to_lowercase().starts_with("x-rustfs-") +} + +pub fn is_minio_header(header_key: &str) -> bool { + header_key.to_lowercase().starts_with("x-minio-") +} diff --git a/crates/utils/src/http/mod.rs b/crates/utils/src/http/mod.rs new file mode 100644 index 00000000..ce2459d6 --- /dev/null +++ b/crates/utils/src/http/mod.rs @@ -0,0 +1,3 @@ +pub mod headers; + +pub use headers::*; diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 2b636f26..b8970d08 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -21,6 +21,9 @@ pub mod ip; #[cfg(feature = "net")] pub mod net; +#[cfg(feature = "http")] +pub mod http; + #[cfg(feature = "net")] pub use dns_resolver::*; #[cfg(feature = "net")] diff --git a/crates/utils/src/string.rs b/crates/utils/src/string.rs index a0da9ac9..fc27f9d5 100644 --- a/crates/utils/src/string.rs +++ b/crates/utils/src/string.rs @@ -354,6 +354,17 @@ pub fn gen_secret_key(length: usize) -> Result { Ok(key_str) } +/// Tests whether the string s begins with prefix ignoring case +pub fn strings_has_prefix_fold(s: &str, prefix: &str) -> bool { + if s.len() < prefix.len() { + return false; + } + + let s_prefix = &s[..prefix.len()]; + // Test match with case first, then case-insensitive + s_prefix == prefix || s_prefix.to_lowercase() == prefix.to_lowercase() +} + #[cfg(test)] mod tests { use super::*; diff --git a/rustfs/src/admin/handlers.rs b/rustfs/src/admin/handlers.rs index ddb9227f..3521d378 100644 --- a/rustfs/src/admin/handlers.rs +++ b/rustfs/src/admin/handlers.rs @@ -23,18 +23,19 @@ use futures::{Stream, StreamExt}; use http::{HeaderMap, Uri}; use hyper::StatusCode; use matchit::Params; -use percent_encoding::{AsciiSet, CONTROLS, percent_encode}; use rustfs_common::heal_channel::HealOpts; use rustfs_ecstore::admin_server_info::get_server_info; -use rustfs_ecstore::bucket::metadata_sys::{self, get_replication_config}; +use rustfs_ecstore::bucket::bucket_target_sys::BucketTargetSys; +use rustfs_ecstore::bucket::metadata::BUCKET_TARGETS_FILE; +use rustfs_ecstore::bucket::metadata_sys; use rustfs_ecstore::bucket::target::BucketTarget; use rustfs_ecstore::bucket::versioning_sys::BucketVersioningSys; -use rustfs_ecstore::cmd::bucket_targets::{self, GLOBAL_Bucket_Target_Sys}; use rustfs_ecstore::data_usage::{ aggregate_local_snapshots, compute_bucket_usage, load_data_usage_from_backend, store_data_usage_in_backend, }; use rustfs_ecstore::error::StorageError; use rustfs_ecstore::global::get_global_action_cred; +use rustfs_ecstore::global::global_rustfs_port; use rustfs_ecstore::metrics_realtime::{CollectMetricsOpts, MetricType, collect_local_metrics}; use rustfs_ecstore::new_object_layer_fn; use rustfs_ecstore::pools::{get_total_usable_capacity, get_total_usable_capacity_free}; @@ -57,7 +58,6 @@ use s3s::stream::{ByteStream, DynByteStream}; use s3s::{Body, S3Error, S3Request, S3Response, S3Result, s3_error}; use s3s::{S3ErrorCode, StdError}; use serde::{Deserialize, Serialize}; -// use serde_json::to_vec; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::pin::Pin; @@ -70,6 +70,7 @@ use tokio::{select, spawn}; use tokio_stream::wrappers::ReceiverStream; use tracing::debug; use tracing::{error, info, warn}; +use url::Host; // use url::UrlQuery; pub mod bucket_meta; @@ -88,7 +89,6 @@ pub mod trace; pub mod user; #[cfg(not(target_os = "windows"))] use pprof::protos::Message; -use urlencoding::decode; #[allow(dead_code)] #[derive(Debug, Serialize, Default)] @@ -971,128 +971,135 @@ impl Operation for GetReplicationMetricsHandler { pub struct SetRemoteTargetHandler {} #[async_trait::async_trait] impl Operation for SetRemoteTargetHandler { - async fn call(&self, mut _req: S3Request, _params: Params<'_, '_>) -> S3Result> { - //return Ok(S3Response::new((StatusCode::OK, Body::from("OK".to_string())))); - debug!("Processing SetRemoteTargetHandler request"); - info!("SetRemoteTargetHandler credentials: {:?}", _req.credentials); - let queries = extract_query_params(&_req.uri); - let Some(_cred) = _req.credentials else { - error!("credentials null"); - return Err(s3_error!(InvalidRequest, "get cred failed")); + async fn call(&self, req: S3Request, _params: Params<'_, '_>) -> S3Result> { + let queries = extract_query_params(&req.uri); + + let Some(bucket) = queries.get("bucket") else { + return Err(s3_error!(InvalidRequest, "bucket is required")); }; - let _is_owner = true; // Treat as true for now, decide based on request later - let body = _req.input.store_all_unlimited().await.unwrap(); - debug!("Request body received, size: {} bytes", body.len()); - if let Some(bucket) = queries.get("bucket") { - if bucket.is_empty() { - info!("have bucket: {}", bucket); - return Err(S3Error::with_message(S3ErrorCode::InternalError, "No buckets found".to_string())); + let update = queries.get("update").is_some_and(|v| v == "true"); + + warn!("set remote target, bucket: {}, update: {}", bucket, update); + + if bucket.is_empty() { + return Err(s3_error!(InvalidRequest, "bucket is required")); + } + + let Some(store) = new_object_layer_fn() else { + return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string())); + }; + + store + .get_bucket_info(bucket, &rustfs_ecstore::store_api::BucketOptions::default()) + .await + .map_err(ApiError::from)?; + + let mut input = req.input; + let body = match input.store_all_unlimited().await { + Ok(b) => b, + Err(e) => { + warn!("get body failed, e: {:?}", e); + return Err(s3_error!(InvalidRequest, "get body failed")); } - let Some(store) = new_object_layer_fn() else { - return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string())); - }; + }; - // let binfo:BucketInfo = store - // .get_bucket_info(bucket, &rustfs_ecstore::store_api::BucketOptions::default()).await; - match store - .get_bucket_info(bucket, &rustfs_ecstore::store_api::BucketOptions::default()) - .await - { - Ok(info) => { - info!("Bucket Info: {:?}", info); - if !info.versioning { - return Ok(S3Response::new((StatusCode::FORBIDDEN, Body::from("bucket need versioned".to_string())))); - } - } - Err(err) => { - error!("Error: {:?}", err); - return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from("empty bucket".to_string())))); - } - } + let mut remote_target: BucketTarget = serde_json::from_slice(&body).map_err(|e| { + tracing::error!("Failed to parse BucketTarget from body: {}", e); + ApiError::other(e) + })?; - tracing::debug!("body is: {}", std::str::from_utf8(&body).unwrap_or("Invalid UTF-8")); + let Ok(target_url) = remote_target.url() else { + return Err(S3Error::with_message(S3ErrorCode::InternalError, "Invalid target url".to_string())); + }; - let mut remote_target: BucketTarget = serde_json::from_slice(&body).map_err(|e| { - tracing::error!("Failed to parse BucketTarget from body: {}", e); - ApiError::other(e) - })?; - remote_target.source_bucket = bucket.clone(); + let same_target = rustfs_utils::net::is_local_host( + target_url.host().unwrap_or(Host::Domain("localhost")), + target_url.port().unwrap_or(80), + global_rustfs_port(), + ) + .unwrap_or_default(); - info!("remote target {} And arn is:", remote_target.source_bucket.clone()); + if same_target && bucket == &remote_target.target_bucket { + return Err(S3Error::with_message(S3ErrorCode::IncorrectEndpoint, "Same target".to_string())); + } - if let Some(val) = remote_target.arn.clone() { - info!("arn is {}", val); - } + remote_target.source_bucket = bucket.clone(); - if let Some(sys) = GLOBAL_Bucket_Target_Sys.get() { - let (arn, exist) = sys.get_remote_arn(bucket, Some(&remote_target), "").await; - info!("exist: {} {}", exist, arn.clone().unwrap_or_default()); - if exist && arn.is_some() { - let jsonarn = serde_json::to_string(&arn).expect("failed to serialize"); - //Ok(S3Response::new) - return Ok(S3Response::new((StatusCode::OK, Body::from(jsonarn)))); - } else { - remote_target.arn = arn; - match sys.set_target(bucket, &remote_target, false, false).await { - Ok(_) => { - { - //todo various persistence work - let targets = sys.list_targets(Some(bucket), None).await; - info!("targets is {}", targets.len()); - match serde_json::to_vec(&targets) { - Ok(json) => { - debug!("Serialized targets configuration, size: {} bytes", json.len()); - //metadata_sys::GLOBAL_BucketMetadataSys:: - //BUCKET_TARGETS_FILE: &str = "bucket-targets.json" - let _ = metadata_sys::update(bucket, "bucket-targets.json", json).await; - // if let Err(err) = metadata_sys::GLOBAL_BucketMetadataSys.get(). - // .update(ctx, bucket, "bucketTargetsFile", tgt_bytes) - // .await - // { - // write_error_response(ctx, &err)?; - // return Err(err); - // } - } - Err(e) => { - error!("Serialization failed: {}", e); - } - } - } + let bucket_target_sys = BucketTargetSys::get(); - let jsonarn = serde_json::to_string(&remote_target.arn.clone()).expect("failed to serialize"); - return Ok(S3Response::new((StatusCode::OK, Body::from(jsonarn)))); - } - Err(e) => { - error!("set target error {}", e); - return Ok(S3Response::new(( - StatusCode::BAD_REQUEST, - Body::from("remote target not ready".to_string()), - ))); - } - } - } - } else { - error!("GLOBAL_BUCKET _TARGET_SYS is not initialized"); - return Err(S3Error::with_message( - S3ErrorCode::InternalError, - "GLOBAL_BUCKET_TARGET_SYS is not initialized".to_string(), - )); + if !update { + let (arn, exist) = bucket_target_sys.get_remote_arn(bucket, Some(&remote_target), "").await; + remote_target.arn = arn.clone(); + if exist && !arn.is_empty() { + let arn_str = serde_json::to_string(&arn).unwrap_or_default(); + + warn!("return exists, arn: {}", arn_str); + return Ok(S3Response::new((StatusCode::OK, Body::from(arn_str)))); } } - // return Err(s3_error!(InvalidArgument)); - return Ok(S3Response::new((StatusCode::OK, Body::from("Ok".to_string())))); + + if remote_target.arn.is_empty() { + return Err(S3Error::with_message(S3ErrorCode::InternalError, "ARN is empty".to_string())); + } + + if update { + let Some(mut target) = bucket_target_sys + .get_remote_bucket_target_by_arn(bucket, &remote_target.arn) + .await + else { + return Err(S3Error::with_message(S3ErrorCode::InternalError, "Target not found".to_string())); + }; + + target.credentials = remote_target.credentials; + target.endpoint = remote_target.endpoint; + target.secure = remote_target.secure; + target.target_bucket = remote_target.target_bucket; + + target.path = remote_target.path; + target.replication_sync = remote_target.replication_sync; + target.bandwidth_limit = remote_target.bandwidth_limit; + target.health_check_duration = remote_target.health_check_duration; + + warn!("update target, target: {:?}", target); + remote_target = target; + } + + let arn = remote_target.arn.clone(); + + bucket_target_sys + .set_target(bucket, &remote_target, update) + .await + .map_err(|e| S3Error::with_message(S3ErrorCode::InternalError, e.to_string()))?; + + let targets = bucket_target_sys.list_bucket_targets(bucket).await.map_err(|e| { + error!("Failed to list bucket targets: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to list bucket targets".to_string()) + })?; + let json_targets = serde_json::to_vec(&targets).map_err(|e| { + error!("Serialization error: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to serialize targets".to_string()) + })?; + + metadata_sys::update(bucket, BUCKET_TARGETS_FILE, json_targets) + .await + .map_err(|e| { + error!("Failed to update bucket targets: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, format!("Failed to update bucket targets: {e}")) + })?; + + let arn_str = serde_json::to_string(&arn).unwrap_or_default(); + + Ok(S3Response::new((StatusCode::OK, Body::from(arn_str)))) } } pub struct ListRemoteTargetHandler {} #[async_trait::async_trait] impl Operation for ListRemoteTargetHandler { - async fn call(&self, _req: S3Request, _params: Params<'_, '_>) -> S3Result> { - warn!("list GetRemoteTargetHandler, params: {:?}", _req.credentials); - - let queries = extract_query_params(&_req.uri); - let Some(_cred) = _req.credentials else { + async fn call(&self, req: S3Request, _params: Params<'_, '_>) -> S3Result> { + let queries = extract_query_params(&req.uri); + let Some(_cred) = req.credentials else { error!("credentials null"); return Err(s3_error!(InvalidRequest, "get cred failed")); }; @@ -1110,65 +1117,48 @@ impl Operation for ListRemoteTargetHandler { return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not initialized".to_string())); }; - match store + if let Err(err) = store .get_bucket_info(bucket, &rustfs_ecstore::store_api::BucketOptions::default()) .await { - Ok(info) => { - info!("Bucket Info: {:?}", info); - if !info.versioning { - return Ok(S3Response::new(( - StatusCode::FORBIDDEN, - Body::from("Bucket needs versioning".to_string()), - ))); - } - } - Err(err) => { - error!("Error fetching bucket info: {:?}", err); - return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from("Invalid bucket".to_string())))); - } + error!("Error fetching bucket info: {:?}", err); + return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from("Invalid bucket".to_string())))); } - if let Some(sys) = GLOBAL_Bucket_Target_Sys.get() { - let targets = sys.list_targets(Some(bucket), None).await; - info!("target sys len {}", targets.len()); - if targets.is_empty() { - return Ok(S3Response::new(( - StatusCode::NOT_FOUND, - Body::from("No remote targets found".to_string()), - ))); - } + let sys = BucketTargetSys::get(); + let targets = sys.list_targets(bucket, "").await; - let json_targets = serde_json::to_string(&targets).map_err(|e| { - error!("Serialization error: {}", e); - S3Error::with_message(S3ErrorCode::InternalError, "Failed to serialize targets".to_string()) - })?; + let json_targets = serde_json::to_vec(&targets).map_err(|e| { + error!("Serialization error: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to serialize targets".to_string()) + })?; - return Ok(S3Response::new((StatusCode::OK, Body::from(json_targets)))); - } else { - error!("GLOBAL_BUCKET_TARGET_SYS is not initialized"); - return Err(S3Error::with_message( - S3ErrorCode::InternalError, - "GLOBAL_BUCKET_TARGET_SYS is not initialized".to_string(), - )); - } + let mut header = HeaderMap::new(); + header.insert(CONTENT_TYPE, "application/json".parse().unwrap()); + + return Ok(S3Response::with_headers((StatusCode::OK, Body::from(json_targets)), header)); } - warn!("Bucket parameter is missing in request"); - Ok(S3Response::new(( - StatusCode::BAD_REQUEST, - Body::from("Bucket parameter is required".to_string()), - ))) - //return Err(s3_error!(NotImplemented)); + let targets: Vec = Vec::new(); + + let json_targets = serde_json::to_vec(&targets).map_err(|e| { + error!("Serialization error: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to serialize targets".to_string()) + })?; + + let mut header = HeaderMap::new(); + header.insert(CONTENT_TYPE, "application/json".parse().unwrap()); + + return Ok(S3Response::with_headers((StatusCode::OK, Body::from(json_targets)), header)); } } -const COLON: AsciiSet = CONTROLS.add(b':'); + pub struct RemoveRemoteTargetHandler {} #[async_trait::async_trait] impl Operation for RemoveRemoteTargetHandler { - async fn call(&self, _req: S3Request, _params: Params<'_, '_>) -> S3Result> { + async fn call(&self, req: S3Request, _params: Params<'_, '_>) -> S3Result> { debug!("remove remote target called"); - let queries = extract_query_params(&_req.uri); + let queries = extract_query_params(&req.uri); let Some(bucket) = queries.get("bucket") else { return Ok(S3Response::new(( StatusCode::BAD_REQUEST, @@ -1176,54 +1166,45 @@ impl Operation for RemoveRemoteTargetHandler { ))); }; - let mut need_delete = true; + let Some(arn_str) = queries.get("arn") else { + return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from("ARN is required".to_string())))); + }; - if let Some(arnstr) = queries.get("arn") { - let _arn = bucket_targets::ARN::parse(arnstr); + let Some(store) = new_object_layer_fn() else { + return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not initialized".to_string())); + }; - match get_replication_config(bucket).await { - Ok((conf, _ts)) => { - for ru in conf.rules { - let encoded = percent_encode(ru.destination.bucket.as_bytes(), &COLON); - let encoded_str = encoded.to_string(); - if *arnstr == encoded_str { - //error!("target in use"); - //return Ok(S3Response::new((StatusCode::OK, Body::from("Ok".to_string())))); - need_delete = false; - break; - } - //info!("bucket: {} and arn str is {} ", encoded_str, arnstr); - } - } - Err(err) => { - error!("get replication config err: {}", err); - return Ok(S3Response::new((StatusCode::NOT_FOUND, Body::from(err.to_string())))); - } - } - if need_delete { - info!("arn {} is in use, cannot delete", arnstr); - let decoded_str = decode(arnstr).unwrap(); - error!("need delete target is {}", decoded_str); - bucket_targets::remove_bucket_target(bucket, arnstr).await; - } + if let Err(err) = store + .get_bucket_info(bucket, &rustfs_ecstore::store_api::BucketOptions::default()) + .await + { + error!("Error fetching bucket info: {:?}", err); + return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from("Invalid bucket".to_string())))); } - // List bucket targets and return as JSON to client - // match bucket_targets::list_bucket_targets(bucket).await { - // Ok(targets) => { - // let json_targets = serde_json::to_string(&targets).map_err(|e| { - // error!("Serialization error: {}", e); - // S3Error::with_message(S3ErrorCode::InternalError, "Failed to serialize targets".to_string()) - // })?; - // return Ok(S3Response::new((StatusCode::OK, Body::from(json_targets)))); - // } - // Err(e) => { - // error!("list bucket targets failed: {:?}", e); - // return Err(S3Error::with_message( - // S3ErrorCode::InternalError, - // "list bucket targets failed".to_string(), - // )); - // } - // } + + let sys = BucketTargetSys::get(); + + sys.remove_target(bucket, arn_str).await.map_err(|e| { + error!("Failed to remove target: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to remove target".to_string()) + })?; + + let targets = sys.list_bucket_targets(bucket).await.map_err(|e| { + error!("Failed to list bucket targets: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to list bucket targets".to_string()) + })?; + + let json_targets = serde_json::to_vec(&targets).map_err(|e| { + error!("Serialization error: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, "Failed to serialize targets".to_string()) + })?; + + metadata_sys::update(bucket, BUCKET_TARGETS_FILE, json_targets) + .await + .map_err(|e| { + error!("Failed to update bucket targets: {}", e); + S3Error::with_message(S3ErrorCode::InternalError, format!("Failed to update bucket targets: {e}")) + })?; return Ok(S3Response::new((StatusCode::NO_CONTENT, Body::from("".to_string())))); } diff --git a/rustfs/src/admin/handlers/pools.rs b/rustfs/src/admin/handlers/pools.rs index a1b60aab..c7e4017d 100644 --- a/rustfs/src/admin/handlers/pools.rs +++ b/rustfs/src/admin/handlers/pools.rs @@ -19,7 +19,7 @@ use rustfs_policy::policy::action::{Action, AdminAction}; use s3s::{Body, S3Error, S3ErrorCode, S3Request, S3Response, S3Result, header::CONTENT_TYPE, s3_error}; use serde::Deserialize; use serde_urlencoded::from_bytes; -use tokio::sync::broadcast; +use tokio_util::sync::CancellationToken; use tracing::warn; use crate::{ @@ -232,8 +232,7 @@ impl Operation for StartDecommission { let pools: Vec<&str> = query.pool.split(",").collect(); let mut pools_indices = Vec::with_capacity(pools.len()); - // TODO: ctx - let (_ctx_tx, ctx_rx) = broadcast::channel::(1); + let ctx = CancellationToken::new(); for pool in pools.iter() { let idx = { @@ -264,7 +263,7 @@ impl Operation for StartDecommission { } if !pools_indices.is_empty() { - store.decommission(ctx_rx, pools_indices).await.map_err(ApiError::from)?; + store.decommission(ctx.clone(), pools_indices).await.map_err(ApiError::from)?; } Ok(S3Response::new((StatusCode::OK, Body::default()))) diff --git a/rustfs/src/main.rs b/rustfs/src/main.rs index 5f39827e..8d860ddd 100644 --- a/rustfs/src/main.rs +++ b/rustfs/src/main.rs @@ -39,21 +39,22 @@ use rustfs_ahm::{ scanner::data_scanner::ScannerConfig, shutdown_ahm_services, }; use rustfs_common::globals::set_global_addr; -use rustfs_config::{DEFAULT_UPDATE_CHECK, ENV_UPDATE_CHECK}; +use rustfs_config::DEFAULT_UPDATE_CHECK; +use rustfs_config::ENV_UPDATE_CHECK; +use rustfs_ecstore::bucket::metadata_sys; +use rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys; +use rustfs_ecstore::bucket::replication::{GLOBAL_REPLICATION_POOL, init_background_replication}; +use rustfs_ecstore::config as ecconfig; +use rustfs_ecstore::config::GLOBAL_CONFIG_SYS; +use rustfs_ecstore::store_api::BucketOptions; use rustfs_ecstore::{ StorageAPI, - bucket::metadata_sys, - bucket::metadata_sys::init_bucket_metadata_sys, - cmd::bucket_replication::init_bucket_replication_pool, - config as ecconfig, - config::GLOBAL_CONFIG_SYS, endpoints::EndpointServerPools, global::{set_global_rustfs_port, shutdown_background_services}, notification_sys::new_global_notification_sys, set_global_endpoints, store::ECStore, store::init_local_disks, - store_api::BucketOptions, update_erasure_type, }; use rustfs_iam::init_iam_sys; @@ -65,6 +66,7 @@ use s3s::s3_error; use std::io::{Error, Result}; use std::str::FromStr; use std::sync::Arc; +use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, instrument, warn}; #[cfg(all(target_os = "linux", target_env = "gnu"))] @@ -276,15 +278,21 @@ async fn run(opt: config::Opt) -> Result<()> { // Initialize the local disk init_local_disks(endpoint_pools.clone()).await.map_err(Error::other)?; + let ctx = CancellationToken::new(); + // init store - let store = ECStore::new(server_addr, endpoint_pools.clone()).await.inspect_err(|err| { - error!("ECStore::new {:?}", err); - })?; + let store = ECStore::new(server_addr, endpoint_pools.clone(), ctx.clone()) + .await + .inspect_err(|err| { + error!("ECStore::new {:?}", err); + })?; ecconfig::init(); // config system configuration GLOBAL_CONFIG_SYS.init(store.clone()).await?; + // init replication_pool + init_background_replication(store.clone()).await; // Initialize KMS system if enabled init_kms_system(&opt).await?; @@ -307,6 +315,10 @@ async fn run(opt: config::Opt) -> Result<()> { // Collect bucket names into a vector let buckets: Vec = buckets_list.into_iter().map(|v| v.name).collect(); + if let Some(pool) = GLOBAL_REPLICATION_POOL.get() { + pool.clone().init_resync(ctx.clone(), buckets.clone()).await?; + } + init_bucket_metadata_sys(store.clone(), buckets.clone()).await; init_iam_sys(store.clone()).await.map_err(Error::other)?; @@ -358,8 +370,6 @@ async fn run(opt: config::Opt) -> Result<()> { // print server info print_server_info(); - // initialize bucket replication pool - init_bucket_replication_pool().await; init_update_check(); @@ -369,11 +379,11 @@ async fn run(opt: config::Opt) -> Result<()> { match wait_for_shutdown().await { #[cfg(unix)] ShutdownSignal::CtrlC | ShutdownSignal::Sigint | ShutdownSignal::Sigterm => { - handle_shutdown(&state_manager, &shutdown_tx).await; + handle_shutdown(&state_manager, &shutdown_tx, ctx.clone()).await; } #[cfg(not(unix))] ShutdownSignal::CtrlC => { - handle_shutdown(&state_manager, &shutdown_tx).await; + handle_shutdown(&state_manager, &shutdown_tx, ctx.clone()).await; } } @@ -393,7 +403,13 @@ fn parse_bool_env_var(var_name: &str, default: bool) -> bool { } /// Handles the shutdown process of the server -async fn handle_shutdown(state_manager: &ServiceStateManager, shutdown_tx: &tokio::sync::broadcast::Sender<()>) { +async fn handle_shutdown( + state_manager: &ServiceStateManager, + shutdown_tx: &tokio::sync::broadcast::Sender<()>, + ctx: CancellationToken, +) { + ctx.cancel(); + info!( target: "rustfs::main::handle_shutdown", "Shutdown signal received in main thread" @@ -630,13 +646,13 @@ async fn init_kms_system(opt: &config::Opt) -> Result<()> { service_manager .configure(kms_config) .await - .map_err(|e| Error::other(format!("Failed to configure KMS: {}", e)))?; + .map_err(|e| Error::other(format!("Failed to configure KMS: {e}")))?; // Start the KMS service service_manager .start() .await - .map_err(|e| Error::other(format!("Failed to start KMS: {}", e)))?; + .map_err(|e| Error::other(format!("Failed to start KMS: {e}")))?; info!("KMS service configured and started successfully"); } else { diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index c21d8435..a786e7ab 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -29,6 +29,33 @@ use chrono::Utc; use datafusion::arrow::csv::WriterBuilder as CsvWriterBuilder; use datafusion::arrow::json::WriterBuilder as JsonWriterBuilder; use datafusion::arrow::json::writer::JsonArray; +use http::StatusCode; +use rustfs_ecstore::bucket::metadata_sys::get_replication_config; +use rustfs_ecstore::bucket::object_lock::objectlock_sys::BucketObjectLockSys; +use rustfs_ecstore::bucket::replication::DeletedObjectReplicationInfo; +use rustfs_ecstore::bucket::replication::REPLICATE_INCOMING_DELETE; +use rustfs_ecstore::bucket::replication::ReplicationConfigurationExt; +use rustfs_ecstore::bucket::replication::check_replicate_delete; +use rustfs_ecstore::bucket::replication::get_must_replicate_options; +use rustfs_ecstore::bucket::replication::must_replicate; +use rustfs_ecstore::bucket::replication::schedule_replication; +use rustfs_ecstore::bucket::replication::schedule_replication_delete; +use rustfs_ecstore::bucket::versioning::VersioningApi; +use rustfs_ecstore::disk::error::DiskError; +use rustfs_ecstore::disk::error_reduce::is_all_buckets_not_found; +use rustfs_ecstore::error::is_err_bucket_not_found; +use rustfs_ecstore::error::is_err_object_not_found; +use rustfs_ecstore::error::is_err_version_not_found; +use rustfs_ecstore::set_disk::MAX_PARTS_COUNT; +use rustfs_ecstore::store_api::ObjectInfo; +use rustfs_filemeta::ReplicationStatusType; +use rustfs_filemeta::ReplicationType; +use rustfs_filemeta::VersionPurgeStatusType; +use rustfs_s3select_api::object_store::bytes_stream; +use rustfs_s3select_api::query::Context; +use rustfs_s3select_api::query::Query; +use rustfs_s3select_query::get_global_db; + // use rustfs_ecstore::store_api::RESERVED_METADATA_PREFIX; use base64::{Engine, engine::general_purpose::STANDARD as BASE64_STANDARD}; use futures::StreamExt; @@ -49,16 +76,10 @@ use rustfs_ecstore::bucket::tagging::decode_tags; use rustfs_ecstore::bucket::tagging::encode_tags; use rustfs_ecstore::bucket::utils::serialize; use rustfs_ecstore::bucket::versioning_sys::BucketVersioningSys; -use rustfs_ecstore::cmd::bucket_replication::ReplicationStatusType; -use rustfs_ecstore::cmd::bucket_replication::ReplicationType; -use rustfs_ecstore::cmd::bucket_replication::get_must_replicate_options; -use rustfs_ecstore::cmd::bucket_replication::must_replicate; -use rustfs_ecstore::cmd::bucket_replication::schedule_replication; use rustfs_ecstore::compress::MIN_COMPRESSIBLE_SIZE; use rustfs_ecstore::compress::is_compressible; use rustfs_ecstore::error::StorageError; use rustfs_ecstore::new_object_layer_fn; -use rustfs_ecstore::set_disk::MAX_PARTS_COUNT; use rustfs_ecstore::set_disk::{DEFAULT_READ_BUFFER_SIZE, is_valid_storage_class}; use rustfs_ecstore::store_api::BucketOptions; use rustfs_ecstore::store_api::CompletePart; @@ -72,8 +93,6 @@ use rustfs_ecstore::store_api::ObjectToDelete; use rustfs_ecstore::store_api::PutObjReader; use rustfs_ecstore::store_api::StorageAPI; use rustfs_filemeta::fileinfo::ObjectPartInfo; -use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER; -use rustfs_filemeta::headers::{AMZ_DECODED_CONTENT_LENGTH, AMZ_OBJECT_TAGGING}; use rustfs_kms::DataKey; use rustfs_kms::service_manager::get_global_encryption_service; use rustfs_kms::types::{EncryptionMetadata, ObjectEncryptionContext}; @@ -88,13 +107,13 @@ use rustfs_rio::HashReader; use rustfs_rio::Reader; use rustfs_rio::WarpReader; use rustfs_rio::{DecryptReader, EncryptReader, HardLimitReader}; -use rustfs_s3select_api::object_store::bytes_stream; -use rustfs_s3select_api::query::Context; -use rustfs_s3select_api::query::Query; -use rustfs_s3select_query::get_global_db; use rustfs_targets::EventName; use rustfs_targets::arn::{TargetID, TargetIDError}; use rustfs_utils::CompressionAlgorithm; +use rustfs_utils::http::AMZ_BUCKET_REPLICATION_STATUS; +use rustfs_utils::http::headers::RESERVED_METADATA_PREFIX_LOWER; +use rustfs_utils::http::headers::{AMZ_DECODED_CONTENT_LENGTH, AMZ_OBJECT_TAGGING}; +use rustfs_utils::path::is_dir_object; use rustfs_utils::path::path_join_buf; use rustfs_zip::CompressionFormat; use s3s::S3; @@ -188,7 +207,7 @@ async fn create_managed_encryption_material( let (data_key, encrypted_data_key) = service .create_data_key(&kms_key_candidate, &context) .await - .map_err(|e| ApiError::from(StorageError::other(format!("Failed to create data key: {}", e))))?; + .map_err(|e| ApiError::from(StorageError::other(format!("Failed to create data key: {e}"))))?; let metadata = EncryptionMetadata { algorithm: algorithm_str.to_string(), @@ -227,7 +246,7 @@ async fn decrypt_managed_encryption_key( let parsed = service .headers_to_metadata(metadata) - .map_err(|e| ApiError::from(StorageError::other(format!("Failed to parse encryption metadata: {}", e))))?; + .map_err(|e| ApiError::from(StorageError::other(format!("Failed to parse encryption metadata: {e}"))))?; if parsed.iv.len() != 12 { return Err(ApiError::from(StorageError::other("Invalid encryption nonce length; expected 12 bytes"))); @@ -237,7 +256,7 @@ async fn decrypt_managed_encryption_key( let data_key = service .decrypt_data_key(&parsed.encrypted_data_key, &context) .await - .map_err(|e| ApiError::from(StorageError::other(format!("Failed to decrypt data key: {}", e))))?; + .map_err(|e| ApiError::from(StorageError::other(format!("Failed to decrypt data key: {e}"))))?; let key_bytes = data_key.plaintext_key; let mut nonce = [0u8; 12]; @@ -967,67 +986,120 @@ impl S3 for FS { /// Delete an object #[tracing::instrument(level = "debug", skip(self, req))] - async fn delete_object(&self, req: S3Request) -> S3Result> { + async fn delete_object(&self, mut req: S3Request) -> S3Result> { let DeleteObjectInput { bucket, key, version_id, .. } = req.input.clone(); + let replica = req + .headers + .get(AMZ_BUCKET_REPLICATION_STATUS) + .map(|v| v.to_str().unwrap_or_default() == ReplicationStatusType::Replica.as_str()) + .unwrap_or_default(); + + if replica { + authorize_request(&mut req, Action::S3Action(S3Action::ReplicateDeleteAction)).await?; + } + let metadata = extract_metadata(&req.headers); - let opts: ObjectOptions = del_opts(&bucket, &key, version_id, &req.headers, metadata) + let mut opts: ObjectOptions = del_opts(&bucket, &key, version_id, &req.headers, metadata) .await .map_err(ApiError::from)?; - let version_id = opts.version_id.as_ref().map(|v| Uuid::parse_str(v).ok()).unwrap_or_default(); - let dobj = ObjectToDelete { - object_name: key.clone(), - version_id, - }; + // TODO: check object lock - let objects: Vec = vec![dobj]; + let lock_cfg = BucketObjectLockSys::get(&bucket).await; + if lock_cfg.is_some() && opts.delete_prefix { + return Err(S3Error::with_message( + S3ErrorCode::Custom("force-delete is forbidden on Object Locking enabled buckets".into()), + "force-delete is forbidden on Object Locking enabled buckets", + )); + } + + // let mut vid = opts.version_id.clone(); + + if replica { + opts.set_replica_status(ReplicationStatusType::Replica); + + // if opts.version_purge_status().is_empty() { + // vid = None; + // } + } let Some(store) = new_object_layer_fn() else { return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string())); }; - let (dobjs, _errs) = store.delete_objects(&bucket, objects, opts).await.map_err(ApiError::from)?; - // TODO: let errors; + let obj_info = { + match store.delete_object(&bucket, &key, opts).await { + Ok(obj) => obj, + Err(err) => { + if is_err_bucket_not_found(&err) { + return Err(S3Error::with_message(S3ErrorCode::NoSuchBucket, "Bucket not found".to_string())); + } - let (delete_marker, version_id) = { - if let Some((a, b)) = dobjs - .iter() - .map(|v| { - let delete_marker = { if v.delete_marker { Some(true) } else { None } }; + if is_err_object_not_found(&err) || is_err_version_not_found(&err) { + // TODO: send event - let version_id = v.version_id.clone(); + return Ok(S3Response::with_status(DeleteObjectOutput::default(), StatusCode::NO_CONTENT)); + } - (delete_marker, version_id) - }) - .next() - { - (a, b) - } else { - (None, None) + return Err(ApiError::from(err).into()); + } } }; - let del_version_id = version_id.as_ref().map(|v| v.to_string()).unwrap_or_default(); + + if obj_info.name.is_empty() { + return Ok(S3Response::with_status(DeleteObjectOutput::default(), StatusCode::NO_CONTENT)); + } + + if obj_info.replication_status == ReplicationStatusType::Replica + || obj_info.version_purge_status == VersionPurgeStatusType::Pending + { + schedule_replication_delete(DeletedObjectReplicationInfo { + delete_object: rustfs_ecstore::store_api::DeletedObject { + delete_marker: obj_info.delete_marker, + delete_marker_version_id: if obj_info.delete_marker { obj_info.version_id } else { None }, + object_name: key.clone(), + version_id: if obj_info.delete_marker { None } else { obj_info.version_id }, + delete_marker_mtime: obj_info.mod_time, + replication_state: Some(obj_info.replication_state()), + ..Default::default() + }, + bucket: bucket.clone(), + event_type: REPLICATE_INCOMING_DELETE.to_string(), + ..Default::default() + }) + .await; + } + + let delete_marker = obj_info.delete_marker; + let version_id = obj_info.version_id; + let output = DeleteObjectOutput { - delete_marker, - version_id, + delete_marker: Some(delete_marker), + version_id: version_id.map(|v| v.to_string()), ..Default::default() }; + let event_name = if delete_marker { + EventName::ObjectRemovedDeleteMarkerCreated + } else { + EventName::ObjectRemovedDelete + }; + let event_args = rustfs_notify::event::EventArgs { - event_name: EventName::ObjectRemovedDelete, + event_name, bucket_name: bucket.clone(), object: rustfs_ecstore::store_api::ObjectInfo { - name: key, - bucket, + name: key.clone(), + bucket: bucket.clone(), ..Default::default() }, req_params: rustfs_utils::extract_req_params_header(&req.headers), resp_elements: rustfs_utils::extract_resp_elements(&S3Response::new(DeleteBucketOutput {})), - version_id: del_version_id, + version_id: version_id.map(|v| v.to_string()).unwrap_or_default(), host: rustfs_utils::get_request_host(&req.headers), user_agent: rustfs_utils::get_request_user_agent(&req.headers), }; @@ -1043,54 +1115,228 @@ impl S3 for FS { /// Delete multiple objects #[tracing::instrument(level = "debug", skip(self, req))] async fn delete_objects(&self, req: S3Request) -> S3Result> { - // info!("delete_objects args {:?}", req.input); - let DeleteObjectsInput { bucket, delete, .. } = req.input; - let objects: Vec = delete - .objects - .iter() - .map(|v| { - let version_id = v.version_id.as_ref().map(|v| Uuid::parse_str(v).ok()).unwrap_or_default(); - ObjectToDelete { + if delete.objects.is_empty() || delete.objects.len() > 1000 { + return Err(S3Error::with_message( + S3ErrorCode::InvalidArgument, + "No objects to delete or too many objects to delete".to_string(), + )); + } + + let replicate_deletes = has_replication_rules( + &bucket, + &delete + .objects + .iter() + .map(|v| ObjectToDelete { object_name: v.key.clone(), - version_id, - } - }) - .collect(); + ..Default::default() + }) + .collect::>(), + ) + .await; let Some(store) = new_object_layer_fn() else { return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string())); }; - let metadata = extract_metadata(&req.headers); + let has_lock_enable = BucketObjectLockSys::get(&bucket).await.is_some(); - let opts: ObjectOptions = del_opts(&bucket, "", None, &req.headers, metadata) - .await - .map_err(ApiError::from)?; + let version_cfg = BucketVersioningSys::get(&bucket).await.unwrap_or_default(); - let (dobjs, errs) = store.delete_objects(&bucket, objects, opts).await.map_err(ApiError::from)?; + #[derive(Default, Clone)] + struct DeleteResult { + delete_object: Option, + error: Option, + } - let deleted = dobjs + let mut delete_results = vec![DeleteResult::default(); delete.objects.len()]; + + let mut object_to_delete = Vec::new(); + let mut object_to_delete_index = HashMap::new(); + + for (idx, object) in delete.objects.iter().enumerate() { + // TODO: check auth + if let Some(version_id) = object.version_id.clone() { + let _vid = match Uuid::parse_str(&version_id) { + Ok(v) => v, + Err(err) => { + delete_results[idx].error = Some(Error { + code: Some("NoSuchVersion".to_string()), + key: Some(object.key.clone()), + message: Some(err.to_string()), + version_id: Some(version_id), + }); + + continue; + } + }; + }; + + let mut object = ObjectToDelete { + object_name: object.key.clone(), + version_id: object.version_id.clone().map(|v| Uuid::parse_str(&v).unwrap()), + ..Default::default() + }; + + let opts = ObjectOptions { + version_id: object.version_id.map(|v| v.to_string()), + versioned: version_cfg.prefix_enabled(&object.object_name), + version_suspended: version_cfg.suspended(), + ..Default::default() + }; + + let mut goi = ObjectInfo::default(); + let mut gerr = None; + + if replicate_deletes || object.version_id.is_some() && has_lock_enable { + (goi, gerr) = match store.get_object_info(&bucket, &object.object_name, &opts).await { + Ok(res) => (res, None), + Err(e) => (ObjectInfo::default(), Some(e.to_string())), + }; + } + + if is_dir_object(&object.object_name) && object.version_id.is_none() { + object.version_id = Some(Uuid::nil()); + } + + if replicate_deletes { + let dsc = check_replicate_delete( + &bucket, + &ObjectToDelete { + object_name: object.object_name.clone(), + version_id: object.version_id, + ..Default::default() + }, + &goi, + &opts, + gerr.clone(), + ) + .await; + if dsc.replicate_any() { + if object.version_id.is_some() { + object.version_purge_status = Some(VersionPurgeStatusType::Pending); + object.version_purge_statuses = dsc.pending_status(); + } else { + object.delete_marker_replication_status = dsc.pending_status(); + } + object.replicate_decision_str = Some(dsc.to_string()); + } + } + + // TODO: Retention + object_to_delete_index.insert(object.object_name.clone(), idx); + object_to_delete.push(object); + } + + let (mut dobjs, errs) = { + store + .delete_objects( + &bucket, + object_to_delete.clone(), + ObjectOptions { + version_suspended: version_cfg.suspended(), + ..Default::default() + }, + ) + .await + }; + + if is_all_buckets_not_found( + &errs + .iter() + .map(|v| v.as_ref().map(|v| v.clone().into())) + .collect::>>() as &[Option], + ) { + return Err(S3Error::with_message(S3ErrorCode::NoSuchBucket, "Bucket not found".to_string())); + } + + for (i, err) in errs.into_iter().enumerate() { + let obj = dobjs[i].clone(); + + // let replication_state = obj.replication_state.clone().unwrap_or_default(); + + // let obj_to_del = ObjectToDelete { + // object_name: decode_dir_object(dobjs[i].object_name.as_str()), + // version_id: obj.version_id, + // delete_marker_replication_status: replication_state.replication_status_internal.clone(), + // version_purge_status: Some(obj.version_purge_status()), + // version_purge_statuses: replication_state.version_purge_status_internal.clone(), + // replicate_decision_str: Some(replication_state.replicate_decision_str.clone()), + // }; + + let Some(didx) = object_to_delete_index.get(&obj.object_name) else { + continue; + }; + + if err.is_none() + || err + .clone() + .is_some_and(|v| is_err_object_not_found(&v) || is_err_version_not_found(&v)) + { + if replicate_deletes { + dobjs[i].replication_state = Some(object_to_delete[i].replication_state()); + } + delete_results[*didx].delete_object = Some(dobjs[i].clone()); + continue; + } + + if let Some(err) = err { + delete_results[*didx].error = Some(Error { + code: Some(err.to_string()), + key: Some(object_to_delete[i].object_name.clone()), + message: Some(err.to_string()), + version_id: object_to_delete[i].version_id.map(|v| v.to_string()), + }); + } + } + + let deleted = delete_results .iter() + .filter_map(|v| v.delete_object.clone()) .map(|v| DeletedObject { delete_marker: { if v.delete_marker { Some(true) } else { None } }, - delete_marker_version_id: v.delete_marker_version_id.clone(), + delete_marker_version_id: v.delete_marker_version_id.map(|v| v.to_string()), key: Some(v.object_name.clone()), - version_id: v.version_id.clone(), + version_id: if is_dir_object(v.object_name.as_str()) && v.version_id == Some(Uuid::nil()) { + None + } else { + v.version_id.map(|v| v.to_string()) + }, }) .collect(); - // TODO: let errors; - for err in errs.iter().flatten() { - warn!("delete_objects err {:?}", err); - } + let errors = delete_results.iter().filter_map(|v| v.error.clone()).collect::>(); let output = DeleteObjectsOutput { deleted: Some(deleted), - // errors, + errors: Some(errors), ..Default::default() }; + + for dobjs in delete_results.iter() { + if let Some(dobj) = &dobjs.delete_object { + if replicate_deletes + && (dobj.delete_marker_replication_status() == ReplicationStatusType::Pending + || dobj.version_purge_status() == VersionPurgeStatusType::Pending) + { + let mut dobj = dobj.clone(); + if is_dir_object(dobj.object_name.as_str()) && dobj.version_id.is_none() { + dobj.version_id = Some(Uuid::nil()); + } + + let deleted_object = DeletedObjectReplicationInfo { + delete_object: dobj, + bucket: bucket.clone(), + event_type: REPLICATE_INCOMING_DELETE.to_string(), + ..Default::default() + }; + schedule_replication_delete(deleted_object).await; + } + } + } + // Asynchronous call will not block the response of the current request tokio::spawn(async move { for dobj in dobjs { @@ -1330,7 +1576,7 @@ impl S3 for FS { // Decode the base64 key let key_bytes = BASE64_STANDARD .decode(sse_key) - .map_err(|e| ApiError::from(StorageError::other(format!("Invalid SSE-C key: {}", e))))?; + .map_err(|e| ApiError::from(StorageError::other(format!("Invalid SSE-C key: {e}"))))?; // Verify key length (should be 32 bytes for AES-256) if key_bytes.len() != 32 { @@ -1349,7 +1595,7 @@ impl S3 for FS { // Generate the same deterministic nonce from object key let mut nonce = [0u8; 12]; - let nonce_source = format!("{}-{}", bucket, key); + let nonce_source = format!("{bucket}-{key}"); let nonce_hash = md5::compute(nonce_source.as_bytes()); nonce.copy_from_slice(&nonce_hash.0[..12]); @@ -1999,7 +2245,7 @@ impl S3 for FS { // Decode the base64 key let key_bytes = BASE64_STANDARD .decode(sse_key) - .map_err(|e| ApiError::from(StorageError::other(format!("Invalid SSE-C key: {}", e))))?; + .map_err(|e| ApiError::from(StorageError::other(format!("Invalid SSE-C key: {e}"))))?; // Verify key length (should be 32 bytes for AES-256) if key_bytes.len() != 32 { @@ -2025,7 +2271,7 @@ impl S3 for FS { // Generate a deterministic nonce from object key for consistency let mut nonce = [0u8; 12]; - let nonce_source = format!("{}-{}", bucket, key); + let nonce_source = format!("{bucket}-{key}"); let nonce_hash = md5::compute(nonce_source.as_bytes()); nonce.copy_from_slice(&nonce_hash.0[..12]); @@ -2070,17 +2316,17 @@ impl S3 for FS { .map_err(ApiError::from)?; let repoptions = - get_must_replicate_options(&mt2, "", ReplicationStatusType::Unknown, ReplicationType::ObjectReplicationType, &opts); + get_must_replicate_options(&mt2, "".to_string(), ReplicationStatusType::Empty, ReplicationType::Object, opts.clone()); + + let dsc = must_replicate(&bucket, &key, repoptions).await; - let dsc = must_replicate(&bucket, &key, &repoptions).await; - // warn!("dsc {}", &dsc.replicate_any().clone()); if dsc.replicate_any() { let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp"); let now: DateTime = Utc::now(); let formatted_time = now.to_rfc3339(); opts.user_defined.insert(k, formatted_time); let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status"); - opts.user_defined.insert(k, dsc.pending_status()); + opts.user_defined.insert(k, dsc.pending_status().unwrap_or_default()); } let obj_info = store @@ -2091,13 +2337,12 @@ impl S3 for FS { let e_tag = obj_info.etag.clone(); let repoptions = - get_must_replicate_options(&mt2, "", ReplicationStatusType::Unknown, ReplicationType::ObjectReplicationType, &opts); + get_must_replicate_options(&mt2, "".to_string(), ReplicationStatusType::Empty, ReplicationType::Object, opts); - let dsc = must_replicate(&bucket, &key, &repoptions).await; + let dsc = must_replicate(&bucket, &key, repoptions).await; if dsc.replicate_any() { - let objectlayer = new_object_layer_fn(); - schedule_replication(obj_info, objectlayer.unwrap(), dsc, 1).await; + schedule_replication(obj_info, store, dsc, ReplicationType::Object).await; } let output = PutObjectOutput { @@ -2853,6 +3098,7 @@ impl S3 for FS { ); let obj_info = store + .clone() .complete_multipart_upload(&bucket, &key, &upload_id, uploaded_parts, opts) .await .map_err(ApiError::from)?; @@ -2879,14 +3125,13 @@ impl S3 for FS { let mt2 = HashMap::new(); let repoptions = - get_must_replicate_options(&mt2, "", ReplicationStatusType::Unknown, ReplicationType::ObjectReplicationType, opts); + get_must_replicate_options(&mt2, "".to_string(), ReplicationStatusType::Empty, ReplicationType::Object, opts.clone()); - let dsc = must_replicate(&bucket, &key, &repoptions).await; + let dsc = must_replicate(&bucket, &key, repoptions).await; if dsc.replicate_any() { warn!("need multipart replication"); - let objectlayer = new_object_layer_fn(); - schedule_replication(obj_info, objectlayer.unwrap(), dsc, 1).await; + schedule_replication(obj_info, store, dsc, ReplicationType::Object).await; } tracing::info!( "TDD: About to return S3Response with output: SSE={:?}, KMS={:?}", @@ -4371,6 +4616,22 @@ pub(crate) fn process_lambda_configurations( } } +pub(crate) async fn has_replication_rules(bucket: &str, objects: &[ObjectToDelete]) -> bool { + let (cfg, _created) = match get_replication_config(bucket).await { + Ok(replication_config) => replication_config, + Err(_err) => { + return false; + } + }; + + for object in objects { + if cfg.has_active_rules(&object.object_name, true) { + return true; + } + } + false +} + #[cfg(test)] mod tests { use super::*; diff --git a/rustfs/src/storage/options.rs b/rustfs/src/storage/options.rs index 82032b2a..1493d4b8 100644 --- a/rustfs/src/storage/options.rs +++ b/rustfs/src/storage/options.rs @@ -18,10 +18,13 @@ use rustfs_ecstore::error::Result; use rustfs_ecstore::error::StorageError; use rustfs_ecstore::store_api::{HTTPPreconditions, HTTPRangeSpec, ObjectOptions}; +use rustfs_utils::http::RUSTFS_BUCKET_REPLICATION_DELETE_MARKER; +use rustfs_utils::http::RUSTFS_BUCKET_SOURCE_VERSION_ID; use rustfs_utils::path::is_dir_object; use s3s::{S3Result, s3_error}; use std::collections::HashMap; use std::sync::LazyLock; +use tracing::error; use uuid::Uuid; /// Creates options for deleting an object in a bucket. @@ -35,22 +38,32 @@ pub async fn del_opts( let versioned = BucketVersioningSys::prefix_enabled(bucket, object).await; let version_suspended = BucketVersioningSys::suspended(bucket).await; - // TODO: delete_prefix + let vid = if vid.is_none() { + headers + .get(RUSTFS_BUCKET_SOURCE_VERSION_ID) + .map(|v| v.to_str().unwrap().to_owned()) + } else { + vid + }; let vid = vid.map(|v| v.as_str().trim().to_owned()); if let Some(ref id) = vid { - if let Err(_err) = Uuid::parse_str(id.as_str()) { + if let Err(err) = Uuid::parse_str(id.as_str()) { + error!("del_opts: invalid version id: {} error: {}", id, err); return Err(StorageError::InvalidVersionID(bucket.to_owned(), object.to_owned(), id.clone())); } if !versioned { + error!("del_opts: object not versioned: {}", object); return Err(StorageError::InvalidArgument(bucket.to_owned(), object.to_owned(), id.clone())); } } - let mut opts = put_opts_from_headers(headers, metadata.clone()) - .map_err(|err| StorageError::InvalidArgument(bucket.to_owned(), object.to_owned(), err.to_string()))?; + let mut opts = put_opts_from_headers(headers, metadata.clone()).map_err(|err| { + error!("del_opts: invalid argument: {} error: {}", object, err); + StorageError::InvalidArgument(bucket.to_owned(), object.to_owned(), err.to_string()) + })?; opts.version_id = { if is_dir_object(object) && vid.is_none() { @@ -62,6 +75,11 @@ pub async fn del_opts( opts.version_suspended = version_suspended; opts.versioned = versioned; + opts.delete_marker = headers + .get(RUSTFS_BUCKET_REPLICATION_DELETE_MARKER) + .map(|v| v.to_str().unwrap() == "true") + .unwrap_or_default(); + Ok(opts) } @@ -144,6 +162,14 @@ pub async fn put_opts( let versioned = BucketVersioningSys::prefix_enabled(bucket, object).await; let version_suspended = BucketVersioningSys::prefix_suspended(bucket, object).await; + let vid = if vid.is_none() { + headers + .get(RUSTFS_BUCKET_SOURCE_VERSION_ID) + .map(|v| v.to_str().unwrap().to_owned()) + } else { + vid + }; + let vid = vid.map(|v| v.as_str().trim().to_owned()); if let Some(ref id) = vid {