From cd1e244c68885dacafeb83ab1773e5a7eb566c7a Mon Sep 17 00:00:00 2001 From: weisd Date: Mon, 20 Oct 2025 23:46:13 +0800 Subject: [PATCH] Refactor: Introduce content checksums and improve multipart/object metadata handling (#671) * feat: adapt to s3s typed etag support * refactor: move replication struct to rustfs_filemeta, fix filemeta transition bug * add head_object checksum, filter object metadata output * fix multipart checksum * fix multipart checksum * add content md5,sha256 check * fix test * fix cargo --------- Co-authored-by: overtrue --- Cargo.lock | 14 + Cargo.toml | 1 + .../ahm/tests/lifecycle_integration_test.rs | 4 +- crates/audit/tests/config_parsing_test.rs | 14 +- crates/audit/tests/integration_test.rs | 4 +- crates/audit/tests/performance_test.rs | 58 +- crates/audit/tests/system_integration_test.rs | 28 +- crates/ecstore/Cargo.toml | 1 + crates/ecstore/src/bucket/replication/mod.rs | 2 - .../bucket/replication/replication_pool.rs | 16 +- .../replication/replication_resyncer.rs | 81 +- .../bucket/replication/replication_type.rs | 470 ------- crates/ecstore/src/client/api_put_object.rs | 4 +- .../src/client/api_put_object_multipart.rs | 5 +- .../src/client/api_put_object_streaming.rs | 5 +- crates/ecstore/src/client/api_remove.rs | 3 +- crates/ecstore/src/client/api_s3_datatypes.rs | 4 +- crates/ecstore/src/client/checksum.rs | 351 ++++++ crates/ecstore/src/client/mod.rs | 1 + crates/ecstore/src/client/transition_api.rs | 2 +- crates/ecstore/src/client/utils.rs | 8 + crates/ecstore/src/erasure_coding/decode.rs | 4 + crates/ecstore/src/erasure_coding/encode.rs | 4 +- crates/ecstore/src/erasure_coding/erasure.rs | 8 +- crates/ecstore/src/lib.rs | 2 +- crates/ecstore/src/pools.rs | 4 +- crates/ecstore/src/rebalance.rs | 3 +- crates/ecstore/src/set_disk.rs | 197 +-- crates/ecstore/src/store.rs | 5 +- crates/ecstore/src/store_api.rs | 68 +- crates/filemeta/Cargo.toml | 2 + crates/filemeta/src/fileinfo.rs | 4 +- crates/filemeta/src/filemeta.rs | 172 ++- crates/filemeta/src/replication.rs | 396 ++++++ crates/iam/src/manager.rs | 6 +- crates/iam/src/sys.rs | 18 +- crates/rio/Cargo.toml | 10 +- crates/rio/src/checksum.rs | 1101 +++++++++++++++++ crates/rio/src/errors.rs | 73 ++ crates/rio/src/etag.rs | 77 +- crates/rio/src/etag_reader.rs | 14 +- crates/rio/src/hash_reader.rs | 312 ++++- crates/rio/src/lib.rs | 5 + crates/signer/Cargo.toml | 1 + crates/signer/src/request_signature_v2.rs | 8 +- crates/utils/src/crypto.rs | 8 +- crates/utils/src/http/headers.rs | 1 + crates/utils/src/notify/net.rs | 2 +- rustfs/Cargo.toml | 3 +- rustfs/src/admin/handlers/kms_dynamic.rs | 18 +- rustfs/src/admin/handlers/kms_keys.rs | 10 +- rustfs/src/admin/handlers/sts.rs | 6 +- rustfs/src/server/http.rs | 7 +- rustfs/src/storage/ecfs.rs | 475 ++++++- rustfs/src/storage/options.rs | 37 +- scripts/run.sh | 4 +- 56 files changed, 3331 insertions(+), 810 deletions(-) delete mode 100644 crates/ecstore/src/bucket/replication/replication_type.rs create mode 100644 crates/ecstore/src/client/checksum.rs create mode 100644 crates/rio/src/checksum.rs create mode 100644 crates/rio/src/errors.rs diff --git a/Cargo.lock b/Cargo.lock index 61133274..a89dbbf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6280,6 +6280,7 @@ dependencies = [ "axum-extra", "axum-server", "base64 0.22.1", + "base64-simd", "bytes", "chrono", "clap", @@ -6288,6 +6289,7 @@ dependencies = [ "flatbuffers", "futures", "futures-util", + "hex-simd", "http 1.3.1", "http-body 1.0.1", "hyper 1.7.0", @@ -6475,6 +6477,7 @@ dependencies = [ "aws-sdk-s3", "aws-smithy-types", "base64 0.22.1", + "base64-simd", "byteorder", "bytes", "bytesize", @@ -6549,6 +6552,8 @@ dependencies = [ "bytes", "crc32fast", "criterion", + "lazy_static", + "regex", "rmp", "rmp-serde", "rustfs-utils", @@ -6765,17 +6770,25 @@ name = "rustfs-rio" version = "0.0.5" dependencies = [ "aes-gcm", + "base64 0.22.1", + "base64-simd", "bytes", "crc32fast", + "crc64fast-nvme", "futures", + "hex-simd", "http 1.3.1", "md-5", "pin-project-lite", "rand 0.9.2", "reqwest", "rustfs-utils", + "s3s", "serde", "serde_json", + "sha1 0.10.6", + "sha2 0.10.9", + "thiserror 2.0.17", "tokio", "tokio-test", "tokio-util", @@ -6827,6 +6840,7 @@ dependencies = [ name = "rustfs-signer" version = "0.0.5" dependencies = [ + "base64-simd", "bytes", "http 1.3.1", "hyper 1.7.0", diff --git a/Cargo.toml b/Cargo.toml index 003616b1..97a3e642 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,6 +121,7 @@ chrono = { version = "0.4.42", features = ["serde"] } clap = { version = "4.5.49", features = ["derive", "env"] } const-str = { version = "0.7.0", features = ["std", "proc"] } crc32fast = "1.5.0" +crc64fast-nvme = "1.2.0" criterion = { version = "0.7", features = ["html_reports"] } crossbeam-queue = "0.3.12" datafusion = "50.2.0" diff --git a/crates/ahm/tests/lifecycle_integration_test.rs b/crates/ahm/tests/lifecycle_integration_test.rs index 68d4aa64..11485ee1 100644 --- a/crates/ahm/tests/lifecycle_integration_test.rs +++ b/crates/ahm/tests/lifecycle_integration_test.rs @@ -343,7 +343,7 @@ mod serial_tests { set_bucket_lifecycle(bucket_name.as_str()) .await .expect("Failed to set lifecycle configuration"); - println!("✅ Lifecycle configuration set for bucket: {}", bucket_name); + println!("✅ Lifecycle configuration set for bucket: {bucket_name}"); // Verify lifecycle configuration was set match rustfs_ecstore::bucket::metadata_sys::get(bucket_name.as_str()).await { @@ -477,7 +477,7 @@ mod serial_tests { set_bucket_lifecycle_deletemarker(bucket_name.as_str()) .await .expect("Failed to set lifecycle configuration"); - println!("✅ Lifecycle configuration set for bucket: {}", bucket_name); + println!("✅ Lifecycle configuration set for bucket: {bucket_name}"); // Verify lifecycle configuration was set match rustfs_ecstore::bucket::metadata_sys::get(bucket_name.as_str()).await { diff --git a/crates/audit/tests/config_parsing_test.rs b/crates/audit/tests/config_parsing_test.rs index 6daa4cc3..6b8edceb 100644 --- a/crates/audit/tests/config_parsing_test.rs +++ b/crates/audit/tests/config_parsing_test.rs @@ -81,8 +81,8 @@ fn test_config_section_names() { fn test_environment_variable_parsing() { // Test environment variable prefix patterns let env_prefix = "RUSTFS_"; - let audit_webhook_prefix = format!("{}AUDIT_WEBHOOK_", env_prefix); - let audit_mqtt_prefix = format!("{}AUDIT_MQTT_", env_prefix); + let audit_webhook_prefix = format!("{env_prefix}AUDIT_WEBHOOK_"); + let audit_mqtt_prefix = format!("{env_prefix}AUDIT_MQTT_"); assert_eq!(audit_webhook_prefix, "RUSTFS_AUDIT_WEBHOOK_"); assert_eq!(audit_mqtt_prefix, "RUSTFS_AUDIT_MQTT_"); @@ -141,13 +141,13 @@ fn test_duration_parsing_formats() { let result = parse_duration_test(input); match (result, expected_seconds) { (Some(duration), Some(expected)) => { - assert_eq!(duration.as_secs(), expected, "Failed for input: {}", input); + assert_eq!(duration.as_secs(), expected, "Failed for input: {input}"); } (None, None) => { // Both None, test passes } _ => { - panic!("Mismatch for input: {}, got: {:?}, expected: {:?}", input, result, expected_seconds); + panic!("Mismatch for input: {input}, got: {result:?}, expected: {expected_seconds:?}"); } } } @@ -188,13 +188,13 @@ fn test_url_validation() { for url_str in valid_urls { let result = Url::parse(url_str); - assert!(result.is_ok(), "Valid URL should parse: {}", url_str); + assert!(result.is_ok(), "Valid URL should parse: {url_str}"); } for url_str in &invalid_urls[..3] { // Skip the ftp one as it's technically valid let result = Url::parse(url_str); - assert!(result.is_err(), "Invalid URL should not parse: {}", url_str); + assert!(result.is_err(), "Invalid URL should not parse: {url_str}"); } } @@ -214,6 +214,6 @@ fn test_qos_parsing() { 0..=2 => Some(q), _ => None, }); - assert_eq!(result, expected, "Failed for QoS input: {}", input); + assert_eq!(result, expected, "Failed for QoS input: {input}"); } } diff --git a/crates/audit/tests/integration_test.rs b/crates/audit/tests/integration_test.rs index 4fdba224..c0e2384a 100644 --- a/crates/audit/tests/integration_test.rs +++ b/crates/audit/tests/integration_test.rs @@ -57,7 +57,7 @@ async fn test_config_parsing_webhook() { } Err(e) => { // Other errors might indicate parsing issues - println!("Unexpected error: {}", e); + println!("Unexpected error: {e}"); } Ok(_) => { // Unexpected success in test environment without server storage @@ -103,6 +103,6 @@ fn test_enable_value_parsing() { for (input, expected) in test_cases { let result = matches!(input.to_lowercase().as_str(), "1" | "on" | "true" | "yes"); - assert_eq!(result, expected, "Failed for input: {}", input); + assert_eq!(result, expected, "Failed for input: {input}"); } } diff --git a/crates/audit/tests/performance_test.rs b/crates/audit/tests/performance_test.rs index acf03924..6921d21f 100644 --- a/crates/audit/tests/performance_test.rs +++ b/crates/audit/tests/performance_test.rs @@ -32,10 +32,10 @@ async fn test_audit_system_startup_performance() { let _result = timeout(Duration::from_secs(5), system.start(config)).await; let elapsed = start.elapsed(); - println!("Audit system startup took: {:?}", elapsed); + println!("Audit system startup took: {elapsed:?}"); // Should complete within 5 seconds - assert!(elapsed < Duration::from_secs(5), "Startup took too long: {:?}", elapsed); + assert!(elapsed < Duration::from_secs(5), "Startup took too long: {elapsed:?}"); // Clean up let _ = system.close().await; @@ -54,8 +54,8 @@ async fn test_concurrent_target_creation() { for i in 1..=5 { let mut kvs = rustfs_ecstore::config::KVS::new(); kvs.insert("enable".to_string(), "on".to_string()); - kvs.insert("endpoint".to_string(), format!("http://localhost:302{}/webhook", i)); - webhook_section.insert(format!("instance_{}", i), kvs); + kvs.insert("endpoint".to_string(), format!("http://localhost:302{i}/webhook")); + webhook_section.insert(format!("instance_{i}"), kvs); } config.0.insert("audit_webhook".to_string(), webhook_section); @@ -66,10 +66,10 @@ async fn test_concurrent_target_creation() { let result = registry.create_targets_from_config(&config).await; let elapsed = start.elapsed(); - println!("Concurrent target creation took: {:?}", elapsed); + println!("Concurrent target creation took: {elapsed:?}"); // Should complete quickly even with multiple targets - assert!(elapsed < Duration::from_secs(10), "Target creation took too long: {:?}", elapsed); + assert!(elapsed < Duration::from_secs(10), "Target creation took too long: {elapsed:?}"); // Verify it fails with expected error (server not initialized) match result { @@ -77,7 +77,7 @@ async fn test_concurrent_target_creation() { // Expected in test environment } Err(e) => { - println!("Unexpected error during concurrent creation: {}", e); + println!("Unexpected error during concurrent creation: {e}"); } Ok(_) => { println!("Unexpected success in test environment"); @@ -93,7 +93,7 @@ async fn test_audit_log_dispatch_performance() { let config = rustfs_ecstore::config::Config(HashMap::new()); let start_result = system.start(config).await; if start_result.is_err() { - println!("AuditSystem failed to start: {:?}", start_result); + println!("AuditSystem failed to start: {start_result:?}"); return; // 或 assert!(false, "AuditSystem failed to start"); } @@ -104,14 +104,14 @@ async fn test_audit_log_dispatch_performance() { let id = 1; let mut req_header = HashMap::new(); - req_header.insert("authorization".to_string(), format!("Bearer test-token-{}", id)); + req_header.insert("authorization".to_string(), format!("Bearer test-token-{id}")); req_header.insert("content-type".to_string(), "application/octet-stream".to_string()); let mut resp_header = HashMap::new(); resp_header.insert("x-response".to_string(), "ok".to_string()); let mut tags = HashMap::new(); - tags.insert(format!("tag-{}", id), json!("sample")); + tags.insert(format!("tag-{id}"), json!("sample")); let mut req_query = HashMap::new(); req_query.insert("id".to_string(), id.to_string()); @@ -119,7 +119,7 @@ async fn test_audit_log_dispatch_performance() { let api_details = ApiDetails { name: Some("PutObject".to_string()), bucket: Some("test-bucket".to_string()), - object: Some(format!("test-object-{}", id)), + object: Some(format!("test-object-{id}")), status: Some("success".to_string()), status_code: Some(200), input_bytes: Some(1024), @@ -134,7 +134,7 @@ async fn test_audit_log_dispatch_performance() { // Create sample audit log entry let audit_entry = AuditEntry { version: "1".to_string(), - deployment_id: Some(format!("test-deployment-{}", id)), + deployment_id: Some(format!("test-deployment-{id}")), site_name: Some("test-site".to_string()), time: Utc::now(), event: EventName::ObjectCreatedPut, @@ -142,9 +142,9 @@ async fn test_audit_log_dispatch_performance() { trigger: "api".to_string(), api: api_details, remote_host: Some("127.0.0.1".to_string()), - request_id: Some(format!("test-request-{}", id)), + request_id: Some(format!("test-request-{id}")), user_agent: Some("test-agent".to_string()), - req_path: Some(format!("/test-bucket/test-object-{}", id)), + req_path: Some(format!("/test-bucket/test-object-{id}")), req_host: Some("test-host".to_string()), req_node: Some("node-1".to_string()), req_claims: None, @@ -152,8 +152,8 @@ async fn test_audit_log_dispatch_performance() { req_header: Some(req_header), resp_header: Some(resp_header), tags: Some(tags), - access_key: Some(format!("AKIA{}", id)), - parent_user: Some(format!("parent-{}", id)), + access_key: Some(format!("AKIA{id}")), + parent_user: Some(format!("parent-{id}")), error: None, }; @@ -163,10 +163,10 @@ async fn test_audit_log_dispatch_performance() { let result = system.dispatch(Arc::new(audit_entry)).await; let elapsed = start.elapsed(); - println!("Audit log dispatch took: {:?}", elapsed); + println!("Audit log dispatch took: {elapsed:?}"); // Should be very fast (sub-millisecond for no targets) - assert!(elapsed < Duration::from_millis(100), "Dispatch took too long: {:?}", elapsed); + assert!(elapsed < Duration::from_millis(100), "Dispatch took too long: {elapsed:?}"); // Should succeed even with no targets assert!(result.is_ok(), "Dispatch should succeed with no targets"); @@ -226,10 +226,10 @@ fn test_event_name_mask_performance() { } let elapsed = start.elapsed(); - println!("Event mask calculation (5000 ops) took: {:?}", elapsed); + println!("Event mask calculation (5000 ops) took: {elapsed:?}"); // Should be very fast - assert!(elapsed < Duration::from_millis(100), "Mask calculation too slow: {:?}", elapsed); + assert!(elapsed < Duration::from_millis(100), "Mask calculation too slow: {elapsed:?}"); } #[test] @@ -254,10 +254,10 @@ fn test_event_name_expansion_performance() { } let elapsed = start.elapsed(); - println!("Event expansion (4000 ops) took: {:?}", elapsed); + println!("Event expansion (4000 ops) took: {elapsed:?}"); // Should be very fast - assert!(elapsed < Duration::from_millis(100), "Expansion too slow: {:?}", elapsed); + assert!(elapsed < Duration::from_millis(100), "Expansion too slow: {elapsed:?}"); } #[tokio::test] @@ -274,10 +274,10 @@ async fn test_registry_operations_performance() { } let elapsed = start.elapsed(); - println!("Registry operations (2000 ops) took: {:?}", elapsed); + println!("Registry operations (2000 ops) took: {elapsed:?}"); // Should be very fast for empty registry - assert!(elapsed < Duration::from_millis(100), "Registry ops too slow: {:?}", elapsed); + assert!(elapsed < Duration::from_millis(100), "Registry ops too slow: {elapsed:?}"); } // Performance requirements validation @@ -294,7 +294,7 @@ fn test_performance_requirements() { // Simulate processing 3000 events worth of operations for i in 0..3000 { // Simulate event name parsing and processing - let _event_id = format!("s3:ObjectCreated:Put_{}", i); + let _event_id = format!("s3:ObjectCreated:Put_{i}"); let _timestamp = chrono::Utc::now().to_rfc3339(); // Simulate basic audit entry creation overhead @@ -305,16 +305,16 @@ fn test_performance_requirements() { let elapsed = start.elapsed(); let eps = 3000.0 / elapsed.as_secs_f64(); - println!("Simulated 3000 events in {:?} ({:.0} EPS)", elapsed, eps); + println!("Simulated 3000 events in {elapsed:?} ({eps:.0} EPS)"); // Our core processing should easily handle 3k EPS worth of CPU overhead // The actual EPS limit will be determined by network I/O to targets - assert!(eps > 10000.0, "Core processing too slow for 3k EPS target: {} EPS", eps); + assert!(eps > 10000.0, "Core processing too slow for 3k EPS target: {eps} EPS"); // P99 latency requirement: < 30ms // For core processing, we should be much faster than this let avg_latency = elapsed / 3000; - println!("Average processing latency: {:?}", avg_latency); + println!("Average processing latency: {avg_latency:?}"); - assert!(avg_latency < Duration::from_millis(1), "Processing latency too high: {:?}", avg_latency); + assert!(avg_latency < Duration::from_millis(1), "Processing latency too high: {avg_latency:?}"); } diff --git a/crates/audit/tests/system_integration_test.rs b/crates/audit/tests/system_integration_test.rs index bd135e86..9948d898 100644 --- a/crates/audit/tests/system_integration_test.rs +++ b/crates/audit/tests/system_integration_test.rs @@ -52,7 +52,7 @@ async fn test_complete_audit_system_lifecycle() { assert_eq!(system.get_state().await, system::AuditSystemState::Running); } Err(e) => { - panic!("Unexpected error: {}", e); + panic!("Unexpected error: {e}"); } } @@ -103,7 +103,7 @@ async fn test_audit_log_dispatch_with_no_targets() { // Also acceptable since system not running } Err(e) => { - panic!("Unexpected error: {}", e); + panic!("Unexpected error: {e}"); } } } @@ -172,7 +172,7 @@ async fn test_config_parsing_with_multiple_instances() { // Expected - parsing worked but save failed } Err(e) => { - println!("Config parsing error: {}", e); + println!("Config parsing error: {e}"); // Other errors might indicate parsing issues, but not necessarily failures } Ok(_) => { @@ -261,7 +261,7 @@ async fn test_concurrent_operations() { let (i, state, is_running) = task.await.expect("Task should complete"); assert_eq!(state, system::AuditSystemState::Stopped); assert!(!is_running); - println!("Task {} completed successfully", i); + println!("Task {i} completed successfully"); } } @@ -295,8 +295,8 @@ async fn test_performance_under_load() { } let elapsed = start.elapsed(); - println!("100 concurrent dispatches took: {:?}", elapsed); - println!("Successes: {}, Errors: {}", success_count, error_count); + println!("100 concurrent dispatches took: {elapsed:?}"); + println!("Successes: {success_count}, Errors: {error_count}"); // Should complete reasonably quickly assert!(elapsed < Duration::from_secs(5), "Concurrent operations took too long"); @@ -318,14 +318,14 @@ fn create_sample_audit_entry_with_id(id: u32) -> AuditEntry { use std::collections::HashMap; let mut req_header = HashMap::new(); - req_header.insert("authorization".to_string(), format!("Bearer test-token-{}", id)); + req_header.insert("authorization".to_string(), format!("Bearer test-token-{id}")); req_header.insert("content-type".to_string(), "application/octet-stream".to_string()); let mut resp_header = HashMap::new(); resp_header.insert("x-response".to_string(), "ok".to_string()); let mut tags = HashMap::new(); - tags.insert(format!("tag-{}", id), json!("sample")); + tags.insert(format!("tag-{id}"), json!("sample")); let mut req_query = HashMap::new(); req_query.insert("id".to_string(), id.to_string()); @@ -333,7 +333,7 @@ fn create_sample_audit_entry_with_id(id: u32) -> AuditEntry { let api_details = ApiDetails { name: Some("PutObject".to_string()), bucket: Some("test-bucket".to_string()), - object: Some(format!("test-object-{}", id)), + object: Some(format!("test-object-{id}")), status: Some("success".to_string()), status_code: Some(200), input_bytes: Some(1024), @@ -348,7 +348,7 @@ fn create_sample_audit_entry_with_id(id: u32) -> AuditEntry { AuditEntry { version: "1".to_string(), - deployment_id: Some(format!("test-deployment-{}", id)), + deployment_id: Some(format!("test-deployment-{id}")), site_name: Some("test-site".to_string()), time: Utc::now(), event: EventName::ObjectCreatedPut, @@ -356,9 +356,9 @@ fn create_sample_audit_entry_with_id(id: u32) -> AuditEntry { trigger: "api".to_string(), api: api_details, remote_host: Some("127.0.0.1".to_string()), - request_id: Some(format!("test-request-{}", id)), + request_id: Some(format!("test-request-{id}")), user_agent: Some("test-agent".to_string()), - req_path: Some(format!("/test-bucket/test-object-{}", id)), + req_path: Some(format!("/test-bucket/test-object-{id}")), req_host: Some("test-host".to_string()), req_node: Some("node-1".to_string()), req_claims: None, @@ -366,8 +366,8 @@ fn create_sample_audit_entry_with_id(id: u32) -> AuditEntry { req_header: Some(req_header), resp_header: Some(resp_header), tags: Some(tags), - access_key: Some(format!("AKIA{}", id)), - parent_user: Some(format!("parent-{}", id)), + access_key: Some(format!("AKIA{id}")), + parent_user: Some(format!("parent-{id}")), error: None, } } diff --git a/crates/ecstore/Cargo.toml b/crates/ecstore/Cargo.toml index 399a7ff6..eb5fbd34 100644 --- a/crates/ecstore/Cargo.toml +++ b/crates/ecstore/Cargo.toml @@ -101,6 +101,7 @@ aws-credential-types = { workspace = true } aws-smithy-types = { workspace = true } parking_lot = { workspace = true } moka = { workspace = true } +base64-simd.workspace = true [target.'cfg(not(windows))'.dependencies] nix = { workspace = true } diff --git a/crates/ecstore/src/bucket/replication/mod.rs b/crates/ecstore/src/bucket/replication/mod.rs index a8a9baa8..01fb71ef 100644 --- a/crates/ecstore/src/bucket/replication/mod.rs +++ b/crates/ecstore/src/bucket/replication/mod.rs @@ -17,12 +17,10 @@ pub mod datatypes; mod replication_pool; mod replication_resyncer; mod replication_state; -mod replication_type; mod rule; pub use config::*; pub use datatypes::*; pub use replication_pool::*; pub use replication_resyncer::*; -pub use replication_type::*; pub use rule::*; diff --git a/crates/ecstore/src/bucket/replication/replication_pool.rs b/crates/ecstore/src/bucket/replication/replication_pool.rs index 313282b5..84c3a6ae 100644 --- a/crates/ecstore/src/bucket/replication/replication_pool.rs +++ b/crates/ecstore/src/bucket/replication/replication_pool.rs @@ -1,9 +1,4 @@ use crate::StorageAPI; -use crate::bucket::replication::MrfReplicateEntry; -use crate::bucket::replication::ReplicateDecision; -use crate::bucket::replication::ReplicateObjectInfo; -use crate::bucket::replication::ReplicationWorkerOperation; -use crate::bucket::replication::ResyncDecision; use crate::bucket::replication::ResyncOpts; use crate::bucket::replication::ResyncStatusType; use crate::bucket::replication::replicate_delete; @@ -18,16 +13,21 @@ use crate::bucket::replication::replication_resyncer::{ BucketReplicationResyncStatus, DeletedObjectReplicationInfo, ReplicationResyncer, }; use crate::bucket::replication::replication_state::ReplicationStats; -use crate::bucket::replication::replication_statuses_map; -use crate::bucket::replication::version_purge_statuses_map; use crate::config::com::read_config; use crate::error::Error as EcstoreError; use crate::store_api::ObjectInfo; use lazy_static::lazy_static; +use rustfs_filemeta::MrfReplicateEntry; +use rustfs_filemeta::ReplicateDecision; +use rustfs_filemeta::ReplicateObjectInfo; use rustfs_filemeta::ReplicatedTargetInfo; use rustfs_filemeta::ReplicationStatusType; use rustfs_filemeta::ReplicationType; +use rustfs_filemeta::ReplicationWorkerOperation; +use rustfs_filemeta::ResyncDecision; +use rustfs_filemeta::replication_statuses_map; +use rustfs_filemeta::version_purge_statuses_map; use rustfs_utils::http::RESERVED_METADATA_PREFIX_LOWER; use time::OffsetDateTime; use time::format_description::well_known::Rfc3339; @@ -996,7 +996,7 @@ pub async fn schedule_replication(oi: ObjectInfo, o: Arc, dsc: target_purge_statuses: purge_statuses, replication_timestamp: tm, user_tags: oi.user_tags, - checksum: vec![], + checksum: None, retry_count: 0, event_type: "".to_string(), existing_obj_resync: ResyncDecision::default(), diff --git a/crates/ecstore/src/bucket/replication/replication_resyncer.rs b/crates/ecstore/src/bucket/replication/replication_resyncer.rs index e3a4b90d..5f89b742 100644 --- a/crates/ecstore/src/bucket/replication/replication_resyncer.rs +++ b/crates/ecstore/src/bucket/replication/replication_resyncer.rs @@ -2,12 +2,8 @@ use crate::bucket::bucket_target_sys::{ AdvancedPutOptions, BucketTargetSys, PutObjectOptions, PutObjectPartOptions, RemoveObjectOptions, TargetClient, }; use crate::bucket::metadata_sys; -use crate::bucket::replication::{MrfReplicateEntry, ReplicationWorkerOperation, ResyncStatusType}; -use crate::bucket::replication::{ - ObjectOpts, REPLICATE_EXISTING, REPLICATE_EXISTING_DELETE, REPLICATION_RESET, ReplicateObjectInfo, - ReplicationConfigurationExt as _, ResyncTargetDecision, get_replication_state, parse_replicate_decision, - replication_statuses_map, target_reset_header, version_purge_statuses_map, -}; +use crate::bucket::replication::ResyncStatusType; +use crate::bucket::replication::{ObjectOpts, ReplicationConfigurationExt as _}; use crate::bucket::tagging::decode_tags_to_map; use crate::bucket::target::BucketTargets; use crate::bucket::versioning_sys::BucketVersioningSys; @@ -29,14 +25,17 @@ use byteorder::ByteOrder; use futures::future::join_all; use http::HeaderMap; +use regex::Regex; use rustfs_filemeta::{ - ReplicatedInfos, ReplicatedTargetInfo, ReplicationAction, ReplicationState, ReplicationStatusType, ReplicationType, - VersionPurgeStatusType, + MrfReplicateEntry, REPLICATE_EXISTING, REPLICATE_EXISTING_DELETE, REPLICATION_RESET, ReplicateDecision, ReplicateObjectInfo, + ReplicateTargetDecision, ReplicatedInfos, ReplicatedTargetInfo, ReplicationAction, ReplicationState, ReplicationStatusType, + ReplicationType, ReplicationWorkerOperation, ResyncDecision, ResyncTargetDecision, VersionPurgeStatusType, + get_replication_state, parse_replicate_decision, replication_statuses_map, target_reset_header, version_purge_statuses_map, }; use rustfs_utils::http::{ AMZ_BUCKET_REPLICATION_STATUS, AMZ_OBJECT_TAGGING, AMZ_TAGGING_DIRECTIVE, CONTENT_ENCODING, HeaderExt as _, - RESERVED_METADATA_PREFIX, RESERVED_METADATA_PREFIX_LOWER, RUSTFS_REPLICATION_AUTUAL_OBJECT_SIZE, SSEC_ALGORITHM_HEADER, - SSEC_KEY_HEADER, SSEC_KEY_MD5_HEADER, headers, + RESERVED_METADATA_PREFIX, RESERVED_METADATA_PREFIX_LOWER, RUSTFS_REPLICATION_AUTUAL_OBJECT_SIZE, + RUSTFS_REPLICATION_RESET_STATUS, SSEC_ALGORITHM_HEADER, SSEC_KEY_HEADER, SSEC_KEY_MD5_HEADER, headers, }; use rustfs_utils::path::path_join_buf; use rustfs_utils::string::strings_has_prefix_fold; @@ -56,9 +55,6 @@ use tokio::time::Duration as TokioDuration; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; -use super::replication_type::{ReplicateDecision, ReplicateTargetDecision, ResyncDecision}; -use regex::Regex; - const REPLICATION_DIR: &str = ".replication"; const RESYNC_FILE_NAME: &str = "resync.bin"; const RESYNC_META_FORMAT: u16 = 1; @@ -663,7 +659,7 @@ pub async fn get_heal_replicate_object_info(oi: &ObjectInfo, rcfg: &ReplicationC replication_timestamp: None, ssec: false, // TODO: add ssec support user_tags: oi.user_tags.clone(), - checksum: Vec::new(), + checksum: oi.checksum.clone(), retry_count: 0, } } @@ -849,7 +845,7 @@ impl ReplicationConfig { { resync_decision.targets.insert( decision.arn.clone(), - ResyncTargetDecision::resync_target( + resync_target( &oi, &target.arn, &target.reset_id, @@ -864,6 +860,59 @@ impl ReplicationConfig { } } +pub fn resync_target( + oi: &ObjectInfo, + arn: &str, + reset_id: &str, + reset_before_date: Option, + status: ReplicationStatusType, +) -> ResyncTargetDecision { + let rs = oi + .user_defined + .get(target_reset_header(arn).as_str()) + .or(oi.user_defined.get(RUSTFS_REPLICATION_RESET_STATUS)) + .map(|s| s.to_string()); + + let mut dec = ResyncTargetDecision::default(); + + let mod_time = oi.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH); + + if rs.is_none() { + let reset_before_date = reset_before_date.unwrap_or(OffsetDateTime::UNIX_EPOCH); + if !reset_id.is_empty() && mod_time < reset_before_date { + dec.replicate = true; + return dec; + } + + dec.replicate = status == ReplicationStatusType::Empty; + + return dec; + } + + if reset_id.is_empty() || reset_before_date.is_none() { + return dec; + } + + let rs = rs.unwrap(); + let reset_before_date = reset_before_date.unwrap(); + + let parts: Vec<&str> = rs.splitn(2, ';').collect(); + + if parts.len() != 2 { + return dec; + } + + let new_reset = parts[0] == reset_id; + + if !new_reset && status == ReplicationStatusType::Completed { + return dec; + } + + dec.replicate = new_reset && mod_time < reset_before_date; + + dec +} + pub struct MustReplicateOptions { meta: HashMap, status: ReplicationStatusType, @@ -933,7 +982,7 @@ pub async fn check_replicate_delete( let rcfg = match get_replication_config(bucket).await { Ok(Some(config)) => config, Ok(None) => { - warn!("No replication config found for bucket: {}", bucket); + // warn!("No replication config found for bucket: {}", bucket); return ReplicateDecision::default(); } Err(err) => { diff --git a/crates/ecstore/src/bucket/replication/replication_type.rs b/crates/ecstore/src/bucket/replication/replication_type.rs deleted file mode 100644 index c474d508..00000000 --- a/crates/ecstore/src/bucket/replication/replication_type.rs +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright 2024 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::error::{Error, Result}; -use crate::store_api::ObjectInfo; - -use regex::Regex; - -use rustfs_filemeta::VersionPurgeStatusType; -use rustfs_filemeta::{ReplicatedInfos, ReplicationType}; -use rustfs_filemeta::{ReplicationState, ReplicationStatusType}; -use rustfs_utils::http::RESERVED_METADATA_PREFIX_LOWER; -use rustfs_utils::http::RUSTFS_REPLICATION_RESET_STATUS; -use serde::{Deserialize, Serialize}; -use std::any::Any; -use std::collections::HashMap; -use std::fmt; -use time::OffsetDateTime; -use uuid::Uuid; - -pub const REPLICATION_RESET: &str = "replication-reset"; -pub const REPLICATION_STATUS: &str = "replication-status"; - -// ReplicateQueued - replication being queued trail -pub const REPLICATE_QUEUED: &str = "replicate:queue"; - -// ReplicateExisting - audit trail for existing objects replication -pub const REPLICATE_EXISTING: &str = "replicate:existing"; -// ReplicateExistingDelete - audit trail for delete replication triggered for existing delete markers -pub const REPLICATE_EXISTING_DELETE: &str = "replicate:existing:delete"; - -// ReplicateMRF - audit trail for replication from Most Recent Failures (MRF) queue -pub const REPLICATE_MRF: &str = "replicate:mrf"; -// ReplicateIncoming - audit trail of inline replication -pub const REPLICATE_INCOMING: &str = "replicate:incoming"; -// ReplicateIncomingDelete - audit trail of inline replication of deletes. -pub const REPLICATE_INCOMING_DELETE: &str = "replicate:incoming:delete"; - -// ReplicateHeal - audit trail for healing of failed/pending replications -pub const REPLICATE_HEAL: &str = "replicate:heal"; -// ReplicateHealDelete - audit trail of healing of failed/pending delete replications. -pub const REPLICATE_HEAL_DELETE: &str = "replicate:heal:delete"; - -#[derive(Serialize, Deserialize, Debug)] -pub struct MrfReplicateEntry { - #[serde(rename = "bucket")] - pub bucket: String, - - #[serde(rename = "object")] - pub object: String, - - #[serde(skip_serializing, skip_deserializing)] - pub version_id: Option, - - #[serde(rename = "retryCount")] - pub retry_count: i32, - - #[serde(skip_serializing, skip_deserializing)] - pub size: i64, -} - -pub trait ReplicationWorkerOperation: Any + Send + Sync { - fn to_mrf_entry(&self) -> MrfReplicateEntry; - fn as_any(&self) -> &dyn Any; - fn get_bucket(&self) -> &str; - fn get_object(&self) -> &str; - fn get_size(&self) -> i64; - fn is_delete_marker(&self) -> bool; - fn get_op_type(&self) -> ReplicationType; -} - -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct ReplicateTargetDecision { - pub replicate: bool, - pub synchronous: bool, - pub arn: String, - pub id: String, -} - -impl ReplicateTargetDecision { - pub fn new(arn: String, replicate: bool, sync: bool) -> Self { - Self { - replicate, - synchronous: sync, - arn, - id: String::new(), - } - } -} - -impl fmt::Display for ReplicateTargetDecision { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{};{};{};{}", self.replicate, self.synchronous, self.arn, self.id) - } -} - -/// ReplicateDecision represents replication decision for each target -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReplicateDecision { - pub targets_map: HashMap, -} - -impl ReplicateDecision { - pub fn new() -> Self { - Self { - targets_map: HashMap::new(), - } - } - - /// Returns true if at least one target qualifies for replication - pub fn replicate_any(&self) -> bool { - self.targets_map.values().any(|t| t.replicate) - } - - /// Returns true if at least one target qualifies for synchronous replication - pub fn is_synchronous(&self) -> bool { - self.targets_map.values().any(|t| t.synchronous) - } - - /// Updates ReplicateDecision with target's replication decision - pub fn set(&mut self, target: ReplicateTargetDecision) { - self.targets_map.insert(target.arn.clone(), target); - } - - /// Returns a stringified representation of internal replication status with all targets marked as `PENDING` - pub fn pending_status(&self) -> Option { - let mut result = String::new(); - for target in self.targets_map.values() { - if target.replicate { - result.push_str(&format!("{}={};", target.arn, ReplicationStatusType::Pending.as_str())); - } - } - if result.is_empty() { None } else { Some(result) } - } -} - -impl fmt::Display for ReplicateDecision { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut result = String::new(); - for (key, value) in &self.targets_map { - result.push_str(&format!("{key}={value},")); - } - write!(f, "{}", result.trim_end_matches(',')) - } -} - -impl Default for ReplicateDecision { - fn default() -> Self { - Self::new() - } -} - -// parse k-v pairs of target ARN to stringified ReplicateTargetDecision delimited by ',' into a -// ReplicateDecision struct -pub fn parse_replicate_decision(_bucket: &str, s: &str) -> Result { - let mut decision = ReplicateDecision::new(); - - if s.is_empty() { - return Ok(decision); - } - - for p in s.split(',') { - if p.is_empty() { - continue; - } - - let slc = p.split('=').collect::>(); - if slc.len() != 2 { - return Err(Error::other(format!("invalid replicate decision format: {s}"))); - } - - let tgt_str = slc[1].trim_matches('"'); - let tgt = tgt_str.split(';').collect::>(); - if tgt.len() != 4 { - return Err(Error::other(format!("invalid replicate decision format: {s}"))); - } - - let tgt = ReplicateTargetDecision { - replicate: tgt[0] == "true", - synchronous: tgt[1] == "true", - arn: tgt[2].to_string(), - id: tgt[3].to_string(), - }; - decision.targets_map.insert(slc[0].to_string(), tgt); - } - - Ok(decision) - - // r = ReplicateDecision{ - // targetsMap: make(map[string]replicateTargetDecision), - // } - // if len(s) == 0 { - // return - // } - // for _, p := range strings.Split(s, ",") { - // if p == "" { - // continue - // } - // slc := strings.Split(p, "=") - // if len(slc) != 2 { - // return r, errInvalidReplicateDecisionFormat - // } - // tgtStr := strings.TrimSuffix(strings.TrimPrefix(slc[1], `"`), `"`) - // tgt := strings.Split(tgtStr, ";") - // if len(tgt) != 4 { - // return r, errInvalidReplicateDecisionFormat - // } - // r.targetsMap[slc[0]] = replicateTargetDecision{Replicate: tgt[0] == "true", Synchronous: tgt[1] == "true", Arn: tgt[2], ID: tgt[3]} - // } -} - -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct ResyncTargetDecision { - pub replicate: bool, - pub reset_id: String, - pub reset_before_date: Option, -} - -pub fn target_reset_header(arn: &str) -> String { - format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}-{arn}") -} - -impl ResyncTargetDecision { - pub fn resync_target( - oi: &ObjectInfo, - arn: &str, - reset_id: &str, - reset_before_date: Option, - status: ReplicationStatusType, - ) -> Self { - let rs = oi - .user_defined - .get(target_reset_header(arn).as_str()) - .or(oi.user_defined.get(RUSTFS_REPLICATION_RESET_STATUS)) - .map(|s| s.to_string()); - - let mut dec = Self::default(); - - let mod_time = oi.mod_time.unwrap_or(OffsetDateTime::UNIX_EPOCH); - - if rs.is_none() { - let reset_before_date = reset_before_date.unwrap_or(OffsetDateTime::UNIX_EPOCH); - if !reset_id.is_empty() && mod_time < reset_before_date { - dec.replicate = true; - return dec; - } - - dec.replicate = status == ReplicationStatusType::Empty; - - return dec; - } - - if reset_id.is_empty() || reset_before_date.is_none() { - return dec; - } - - let rs = rs.unwrap(); - let reset_before_date = reset_before_date.unwrap(); - - let parts: Vec<&str> = rs.splitn(2, ';').collect(); - - if parts.len() != 2 { - return dec; - } - - let new_reset = parts[0] == reset_id; - - if !new_reset && status == ReplicationStatusType::Completed { - return dec; - } - - dec.replicate = new_reset && mod_time < reset_before_date; - - dec - } -} - -/// ResyncDecision is a struct representing a map with target's individual resync decisions -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ResyncDecision { - pub targets: HashMap, -} - -impl ResyncDecision { - pub fn new() -> Self { - Self { targets: HashMap::new() } - } - - /// Returns true if no targets with resync decision present - pub fn is_empty(&self) -> bool { - self.targets.is_empty() - } - - pub fn must_resync(&self) -> bool { - self.targets.values().any(|v| v.replicate) - } - - pub fn must_resync_target(&self, tgt_arn: &str) -> bool { - self.targets.get(tgt_arn).map(|v| v.replicate).unwrap_or(false) - } -} - -impl Default for ResyncDecision { - fn default() -> Self { - Self::new() - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReplicateObjectInfo { - pub name: String, - pub size: i64, - pub actual_size: i64, - pub bucket: String, - pub version_id: Option, - pub etag: Option, - pub mod_time: Option, - pub replication_status: ReplicationStatusType, - pub replication_status_internal: Option, - pub delete_marker: bool, - pub version_purge_status_internal: Option, - pub version_purge_status: VersionPurgeStatusType, - pub replication_state: Option, - pub op_type: ReplicationType, - pub event_type: String, - pub dsc: ReplicateDecision, - pub existing_obj_resync: ResyncDecision, - pub target_statuses: HashMap, - pub target_purge_statuses: HashMap, - pub replication_timestamp: Option, - pub ssec: bool, - pub user_tags: String, - pub checksum: Vec, - pub retry_count: u32, -} - -impl ReplicationWorkerOperation for ReplicateObjectInfo { - fn as_any(&self) -> &dyn Any { - self - } - - fn to_mrf_entry(&self) -> MrfReplicateEntry { - MrfReplicateEntry { - bucket: self.bucket.clone(), - object: self.name.clone(), - version_id: self.version_id, - retry_count: self.retry_count as i32, - size: self.size, - } - } - - fn get_bucket(&self) -> &str { - &self.bucket - } - - fn get_object(&self) -> &str { - &self.name - } - - fn get_size(&self) -> i64 { - self.size - } - - fn is_delete_marker(&self) -> bool { - self.delete_marker - } - - fn get_op_type(&self) -> ReplicationType { - self.op_type - } -} - -lazy_static::lazy_static! { - static ref REPL_STATUS_REGEX: Regex = Regex::new(r"([^=].*?)=([^,].*?);").unwrap(); -} - -impl ReplicateObjectInfo { - /// Returns replication status of a target - pub fn target_replication_status(&self, arn: &str) -> ReplicationStatusType { - let binding = self.replication_status_internal.clone().unwrap_or_default(); - let captures = REPL_STATUS_REGEX.captures_iter(&binding); - for cap in captures { - if cap.len() == 3 && &cap[1] == arn { - return ReplicationStatusType::from(&cap[2]); - } - } - ReplicationStatusType::default() - } - - /// Returns the relevant info needed by MRF - pub fn to_mrf_entry(&self) -> MrfReplicateEntry { - MrfReplicateEntry { - bucket: self.bucket.clone(), - object: self.name.clone(), - version_id: self.version_id, - retry_count: self.retry_count as i32, - size: self.size, - } - } -} - -// constructs a replication status map from string representation -pub fn replication_statuses_map(s: &str) -> HashMap { - let mut targets = HashMap::new(); - let rep_stat_matches = REPL_STATUS_REGEX.captures_iter(s).map(|c| c.extract()); - for (_, [arn, status]) in rep_stat_matches { - if arn.is_empty() { - continue; - } - let status = ReplicationStatusType::from(status); - targets.insert(arn.to_string(), status); - } - targets -} - -// constructs a version purge status map from string representation -pub fn version_purge_statuses_map(s: &str) -> HashMap { - let mut targets = HashMap::new(); - let purge_status_matches = REPL_STATUS_REGEX.captures_iter(s).map(|c| c.extract()); - for (_, [arn, status]) in purge_status_matches { - if arn.is_empty() { - continue; - } - let status = VersionPurgeStatusType::from(status); - targets.insert(arn.to_string(), status); - } - targets -} - -pub fn get_replication_state(rinfos: &ReplicatedInfos, prev_state: &ReplicationState, _vid: Option) -> ReplicationState { - let reset_status_map: Vec<(String, String)> = rinfos - .targets - .iter() - .filter(|v| !v.resync_timestamp.is_empty()) - .map(|t| (target_reset_header(t.arn.as_str()), t.resync_timestamp.clone())) - .collect(); - - let repl_statuses = rinfos.replication_status_internal(); - let vpurge_statuses = rinfos.version_purge_status_internal(); - - let mut reset_statuses_map = prev_state.reset_statuses_map.clone(); - for (key, value) in reset_status_map { - reset_statuses_map.insert(key, value); - } - - ReplicationState { - replicate_decision_str: prev_state.replicate_decision_str.clone(), - reset_statuses_map, - replica_timestamp: prev_state.replica_timestamp, - replica_status: prev_state.replica_status.clone(), - targets: replication_statuses_map(&repl_statuses.clone().unwrap_or_default()), - replication_status_internal: repl_statuses, - replication_timestamp: rinfos.replication_timestamp, - purge_targets: version_purge_statuses_map(&vpurge_statuses.clone().unwrap_or_default()), - version_purge_status_internal: vpurge_statuses, - - ..Default::default() - } -} diff --git a/crates/ecstore/src/client/api_put_object.rs b/crates/ecstore/src/client/api_put_object.rs index 19f38895..b4e1c207 100644 --- a/crates/ecstore/src/client/api_put_object.rs +++ b/crates/ecstore/src/client/api_put_object.rs @@ -30,7 +30,8 @@ use s3s::header::{ X_AMZ_STORAGE_CLASS, X_AMZ_WEBSITE_REDIRECT_LOCATION, }; //use crate::disk::{BufferReader, Reader}; -use crate::checksum::ChecksumMode; +use crate::client::checksum::ChecksumMode; +use crate::client::utils::base64_encode; use crate::client::{ api_error_response::{err_entity_too_large, err_invalid_argument}, api_put_object_common::optimal_part_info, @@ -41,7 +42,6 @@ use crate::client::{ transition_api::{ReaderImpl, TransitionClient, UploadInfo}, utils::{is_amz_header, is_minio_header, is_rustfs_header, is_standard_header, is_storageclass_header}, }; -use rustfs_utils::crypto::base64_encode; #[derive(Debug, Clone)] pub struct AdvancedPutOptions { diff --git a/crates/ecstore/src/client/api_put_object_multipart.rs b/crates/ecstore/src/client/api_put_object_multipart.rs index 84ba5810..54c09858 100644 --- a/crates/ecstore/src/client/api_put_object_multipart.rs +++ b/crates/ecstore/src/client/api_put_object_multipart.rs @@ -25,7 +25,8 @@ use time::OffsetDateTime; use tracing::warn; use uuid::Uuid; -use crate::checksum::ChecksumMode; +use crate::client::checksum::ChecksumMode; +use crate::client::utils::base64_encode; use crate::client::{ api_error_response::{ err_entity_too_large, err_entity_too_small, err_invalid_argument, http_resp_to_error_response, to_error_response, @@ -38,7 +39,7 @@ use crate::client::{ constants::{ISO8601_DATEFORMAT, MAX_PART_SIZE, MAX_SINGLE_PUT_OBJECT_SIZE}, transition_api::{ReaderImpl, RequestMetadata, TransitionClient, UploadInfo}, }; -use rustfs_utils::{crypto::base64_encode, path::trim_etag}; +use rustfs_utils::path::trim_etag; use s3s::header::{X_AMZ_EXPIRATION, X_AMZ_VERSION_ID}; impl TransitionClient { diff --git a/crates/ecstore/src/client/api_put_object_streaming.rs b/crates/ecstore/src/client/api_put_object_streaming.rs index ca985eca..eb052d7c 100644 --- a/crates/ecstore/src/client/api_put_object_streaming.rs +++ b/crates/ecstore/src/client/api_put_object_streaming.rs @@ -29,7 +29,7 @@ use tokio_util::sync::CancellationToken; use tracing::warn; use uuid::Uuid; -use crate::checksum::{ChecksumMode, add_auto_checksum_headers, apply_auto_checksum}; +use crate::client::checksum::{ChecksumMode, add_auto_checksum_headers, apply_auto_checksum}; use crate::client::{ api_error_response::{err_invalid_argument, err_unexpected_eof, http_resp_to_error_response}, api_put_object::PutObjectOptions, @@ -40,7 +40,8 @@ use crate::client::{ transition_api::{ReaderImpl, RequestMetadata, TransitionClient, UploadInfo}, }; -use rustfs_utils::{crypto::base64_encode, path::trim_etag}; +use crate::client::utils::base64_encode; +use rustfs_utils::path::trim_etag; use s3s::header::{X_AMZ_EXPIRATION, X_AMZ_VERSION_ID}; pub struct UploadedPartRes { diff --git a/crates/ecstore/src/client/api_remove.rs b/crates/ecstore/src/client/api_remove.rs index 80188721..beacd550 100644 --- a/crates/ecstore/src/client/api_remove.rs +++ b/crates/ecstore/src/client/api_remove.rs @@ -20,7 +20,7 @@ use bytes::Bytes; use http::{HeaderMap, HeaderValue, Method, StatusCode}; -use rustfs_utils::{HashAlgorithm, crypto::base64_encode}; +use rustfs_utils::HashAlgorithm; use s3s::S3ErrorCode; use s3s::dto::ReplicationStatus; use s3s::header::X_AMZ_BYPASS_GOVERNANCE_RETENTION; @@ -29,6 +29,7 @@ use std::{collections::HashMap, sync::Arc}; use time::OffsetDateTime; use tokio::sync::mpsc::{self, Receiver, Sender}; +use crate::client::utils::base64_encode; use crate::client::{ api_error_response::{ErrorResponse, http_resp_to_error_response, to_error_response}, transition_api::{ReaderImpl, RequestMetadata, TransitionClient}, diff --git a/crates/ecstore/src/client/api_s3_datatypes.rs b/crates/ecstore/src/client/api_s3_datatypes.rs index cd92b3ec..865ce63a 100644 --- a/crates/ecstore/src/client/api_s3_datatypes.rs +++ b/crates/ecstore/src/client/api_s3_datatypes.rs @@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use time::OffsetDateTime; -use crate::checksum::ChecksumMode; +use crate::client::checksum::ChecksumMode; use crate::client::transition_api::ObjectMultipartInfo; -use rustfs_utils::crypto::base64_decode; +use crate::client::utils::base64_decode; use super::transition_api; diff --git a/crates/ecstore/src/client/checksum.rs b/crates/ecstore/src/client/checksum.rs new file mode 100644 index 00000000..5a725f97 --- /dev/null +++ b/crates/ecstore/src/client/checksum.rs @@ -0,0 +1,351 @@ +#![allow(clippy::map_entry)] +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#![allow(unused_imports)] +#![allow(unused_variables)] +#![allow(unused_mut)] +#![allow(unused_assignments)] +#![allow(unused_must_use)] +#![allow(clippy::all)] + +use lazy_static::lazy_static; +use rustfs_checksums::ChecksumAlgorithm; +use std::collections::HashMap; + +use crate::client::utils::base64_decode; +use crate::client::utils::base64_encode; +use crate::client::{api_put_object::PutObjectOptions, api_s3_datatypes::ObjectPart}; +use crate::{disk::DiskAPI, store_api::GetObjectReader}; +use s3s::header::{ + X_AMZ_CHECKSUM_ALGORITHM, X_AMZ_CHECKSUM_CRC32, X_AMZ_CHECKSUM_CRC32C, X_AMZ_CHECKSUM_SHA1, X_AMZ_CHECKSUM_SHA256, +}; + +use enumset::{EnumSet, EnumSetType, enum_set}; + +#[derive(Debug, EnumSetType, Default)] +#[enumset(repr = "u8")] +pub enum ChecksumMode { + #[default] + ChecksumNone, + ChecksumSHA256, + ChecksumSHA1, + ChecksumCRC32, + ChecksumCRC32C, + ChecksumCRC64NVME, + ChecksumFullObject, +} + +lazy_static! { + static ref C_ChecksumMask: EnumSet = { + let mut s = EnumSet::all(); + s.remove(ChecksumMode::ChecksumFullObject); + s + }; + static ref C_ChecksumFullObjectCRC32: EnumSet = + enum_set!(ChecksumMode::ChecksumCRC32 | ChecksumMode::ChecksumFullObject); + static ref C_ChecksumFullObjectCRC32C: EnumSet = + enum_set!(ChecksumMode::ChecksumCRC32C | ChecksumMode::ChecksumFullObject); +} +const AMZ_CHECKSUM_CRC64NVME: &str = "x-amz-checksum-crc64nvme"; + +impl ChecksumMode { + //pub const CRC64_NVME_POLYNOMIAL: i64 = 0xad93d23594c93659; + + pub fn base(&self) -> ChecksumMode { + let s = EnumSet::from(*self).intersection(*C_ChecksumMask); + match s.as_u8() { + 1_u8 => ChecksumMode::ChecksumNone, + 2_u8 => ChecksumMode::ChecksumSHA256, + 4_u8 => ChecksumMode::ChecksumSHA1, + 8_u8 => ChecksumMode::ChecksumCRC32, + 16_u8 => ChecksumMode::ChecksumCRC32C, + 32_u8 => ChecksumMode::ChecksumCRC64NVME, + _ => panic!("enum err."), + } + } + + pub fn is(&self, t: ChecksumMode) -> bool { + *self & t == t + } + + pub fn key(&self) -> String { + //match c & checksumMask { + match self { + ChecksumMode::ChecksumCRC32 => { + return X_AMZ_CHECKSUM_CRC32.to_string(); + } + ChecksumMode::ChecksumCRC32C => { + return X_AMZ_CHECKSUM_CRC32C.to_string(); + } + ChecksumMode::ChecksumSHA1 => { + return X_AMZ_CHECKSUM_SHA1.to_string(); + } + ChecksumMode::ChecksumSHA256 => { + return X_AMZ_CHECKSUM_SHA256.to_string(); + } + ChecksumMode::ChecksumCRC64NVME => { + return AMZ_CHECKSUM_CRC64NVME.to_string(); + } + _ => { + return "".to_string(); + } + } + } + + pub fn can_composite(&self) -> bool { + let s = EnumSet::from(*self).intersection(*C_ChecksumMask); + match s.as_u8() { + 2_u8 => true, + 4_u8 => true, + 8_u8 => true, + 16_u8 => true, + _ => false, + } + } + + pub fn can_merge_crc(&self) -> bool { + let s = EnumSet::from(*self).intersection(*C_ChecksumMask); + match s.as_u8() { + 8_u8 => true, + 16_u8 => true, + 32_u8 => true, + _ => false, + } + } + + pub fn full_object_requested(&self) -> bool { + let s = EnumSet::from(*self).intersection(*C_ChecksumMask); + match s.as_u8() { + //C_ChecksumFullObjectCRC32 as u8 => true, + //C_ChecksumFullObjectCRC32C as u8 => true, + 32_u8 => true, + _ => false, + } + } + + pub fn key_capitalized(&self) -> String { + self.key() + } + + pub fn raw_byte_len(&self) -> usize { + let u = EnumSet::from(*self).intersection(*C_ChecksumMask).as_u8(); + if u == ChecksumMode::ChecksumCRC32 as u8 || u == ChecksumMode::ChecksumCRC32C as u8 { + 4 + } else if u == ChecksumMode::ChecksumSHA1 as u8 { + use sha1::Digest; + sha1::Sha1::output_size() as usize + } else if u == ChecksumMode::ChecksumSHA256 as u8 { + use sha2::Digest; + sha2::Sha256::output_size() as usize + } else if u == ChecksumMode::ChecksumCRC64NVME as u8 { + 8 + } else { + 0 + } + } + + pub fn hasher(&self) -> Result, std::io::Error> { + match /*C_ChecksumMask & **/self { + ChecksumMode::ChecksumCRC32 => { + return Ok(ChecksumAlgorithm::Crc32.into_impl()); + } + ChecksumMode::ChecksumCRC32C => { + return Ok(ChecksumAlgorithm::Crc32c.into_impl()); + } + ChecksumMode::ChecksumSHA1 => { + return Ok(ChecksumAlgorithm::Sha1.into_impl()); + } + ChecksumMode::ChecksumSHA256 => { + return Ok(ChecksumAlgorithm::Sha256.into_impl()); + } + ChecksumMode::ChecksumCRC64NVME => { + return Ok(ChecksumAlgorithm::Crc64Nvme.into_impl()); + } + _ => return Err(std::io::Error::other("unsupported checksum type")), + } + } + + pub fn is_set(&self) -> bool { + let s = EnumSet::from(*self).intersection(*C_ChecksumMask); + s.len() == 1 + } + + pub fn set_default(&mut self, t: ChecksumMode) { + if !self.is_set() { + *self = t; + } + } + + pub fn encode_to_string(&self, b: &[u8]) -> Result { + if !self.is_set() { + return Ok("".to_string()); + } + let mut h = self.hasher()?; + h.update(b); + let hash = h.finalize(); + Ok(base64_encode(hash.as_ref())) + } + + pub fn to_string(&self) -> String { + //match c & checksumMask { + match self { + ChecksumMode::ChecksumCRC32 => { + return "CRC32".to_string(); + } + ChecksumMode::ChecksumCRC32C => { + return "CRC32C".to_string(); + } + ChecksumMode::ChecksumSHA1 => { + return "SHA1".to_string(); + } + ChecksumMode::ChecksumSHA256 => { + return "SHA256".to_string(); + } + ChecksumMode::ChecksumNone => { + return "".to_string(); + } + ChecksumMode::ChecksumCRC64NVME => { + return "CRC64NVME".to_string(); + } + _ => { + return "".to_string(); + } + } + } + + // pub fn check_sum_reader(&self, r: GetObjectReader) -> Result { + // let mut h = self.hasher()?; + // Ok(Checksum::new(self.clone(), h.sum().as_bytes())) + // } + + // pub fn check_sum_bytes(&self, b: &[u8]) -> Result { + // let mut h = self.hasher()?; + // Ok(Checksum::new(self.clone(), h.sum().as_bytes())) + // } + + pub fn composite_checksum(&self, p: &mut [ObjectPart]) -> Result { + if !self.can_composite() { + return Err(std::io::Error::other("cannot do composite checksum")); + } + p.sort_by(|i, j| { + if i.part_num < j.part_num { + std::cmp::Ordering::Less + } else if i.part_num > j.part_num { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + }); + let c = self.base(); + let crc_bytes = Vec::::with_capacity(p.len() * self.raw_byte_len() as usize); + let mut h = self.hasher()?; + h.update(crc_bytes.as_ref()); + let hash = h.finalize(); + Ok(Checksum { + checksum_type: self.clone(), + r: hash.as_ref().to_vec(), + computed: false, + }) + } + + pub fn full_object_checksum(&self, p: &mut [ObjectPart]) -> Result { + todo!(); + } +} + +#[derive(Default)] +pub struct Checksum { + checksum_type: ChecksumMode, + r: Vec, + computed: bool, +} + +#[allow(dead_code)] +impl Checksum { + fn new(t: ChecksumMode, b: &[u8]) -> Checksum { + if t.is_set() && b.len() == t.raw_byte_len() { + return Checksum { + checksum_type: t, + r: b.to_vec(), + computed: false, + }; + } + Checksum::default() + } + + #[allow(dead_code)] + fn new_checksum_string(t: ChecksumMode, s: &str) -> Result { + let b = match base64_decode(s.as_bytes()) { + Ok(b) => b, + Err(err) => return Err(std::io::Error::other(err.to_string())), + }; + if t.is_set() && b.len() == t.raw_byte_len() { + return Ok(Checksum { + checksum_type: t, + r: b, + computed: false, + }); + } + Ok(Checksum::default()) + } + + fn is_set(&self) -> bool { + self.checksum_type.is_set() && self.r.len() == self.checksum_type.raw_byte_len() + } + + fn encoded(&self) -> String { + if !self.is_set() { + return "".to_string(); + } + base64_encode(&self.r) + } + + #[allow(dead_code)] + fn raw(&self) -> Option> { + if !self.is_set() { + return None; + } + Some(self.r.clone()) + } +} + +pub fn add_auto_checksum_headers(opts: &mut PutObjectOptions) { + opts.user_metadata + .insert("X-Amz-Checksum-Algorithm".to_string(), opts.auto_checksum.to_string()); + if opts.auto_checksum.full_object_requested() { + opts.user_metadata + .insert("X-Amz-Checksum-Type".to_string(), "FULL_OBJECT".to_string()); + } +} + +pub fn apply_auto_checksum(opts: &mut PutObjectOptions, all_parts: &mut [ObjectPart]) -> Result<(), std::io::Error> { + if opts.auto_checksum.can_composite() && !opts.auto_checksum.is(ChecksumMode::ChecksumFullObject) { + let crc = opts.auto_checksum.composite_checksum(all_parts)?; + opts.user_metadata = { + let mut hm = HashMap::new(); + hm.insert(opts.auto_checksum.key(), crc.encoded()); + hm + } + } else if opts.auto_checksum.can_merge_crc() { + let crc = opts.auto_checksum.full_object_checksum(all_parts)?; + opts.user_metadata = { + let mut hm = HashMap::new(); + hm.insert(opts.auto_checksum.key_capitalized(), crc.encoded()); + hm.insert("X-Amz-Checksum-Type".to_string(), "FULL_OBJECT".to_string()); + hm + } + } + + Ok(()) +} diff --git a/crates/ecstore/src/client/mod.rs b/crates/ecstore/src/client/mod.rs index 9a87b475..c3c9e237 100644 --- a/crates/ecstore/src/client/mod.rs +++ b/crates/ecstore/src/client/mod.rs @@ -30,6 +30,7 @@ pub mod api_restore; pub mod api_s3_datatypes; pub mod api_stat; pub mod bucket_cache; +pub mod checksum; pub mod constants; pub mod credentials; pub mod object_api_utils; diff --git a/crates/ecstore/src/client/transition_api.rs b/crates/ecstore/src/client/transition_api.rs index 14b6f85f..6d1fd70c 100644 --- a/crates/ecstore/src/client/transition_api.rs +++ b/crates/ecstore/src/client/transition_api.rs @@ -61,7 +61,7 @@ use crate::client::{ constants::{UNSIGNED_PAYLOAD, UNSIGNED_PAYLOAD_TRAILER}, credentials::{CredContext, Credentials, SignatureType, Static}, }; -use crate::{checksum::ChecksumMode, store_api::GetObjectReader}; +use crate::{client::checksum::ChecksumMode, store_api::GetObjectReader}; use rustfs_rio::HashReader; use rustfs_utils::{ net::get_endpoint_url, diff --git a/crates/ecstore/src/client/utils.rs b/crates/ecstore/src/client/utils.rs index ee7d020e..234783ab 100644 --- a/crates/ecstore/src/client/utils.rs +++ b/crates/ecstore/src/client/utils.rs @@ -90,3 +90,11 @@ pub fn is_rustfs_header(header_key: &str) -> bool { pub fn is_minio_header(header_key: &str) -> bool { header_key.to_lowercase().starts_with("x-minio-") } + +pub fn base64_encode(input: &[u8]) -> String { + base64_simd::URL_SAFE_NO_PAD.encode_to_string(input) +} + +pub fn base64_decode(input: &[u8]) -> Result, base64_simd::Error> { + base64_simd::URL_SAFE_NO_PAD.decode_to_vec(input) +} diff --git a/crates/ecstore/src/erasure_coding/decode.rs b/crates/ecstore/src/erasure_coding/decode.rs index ef032ddb..ea92582b 100644 --- a/crates/ecstore/src/erasure_coding/decode.rs +++ b/crates/ecstore/src/erasure_coding/decode.rs @@ -301,6 +301,10 @@ impl Erasure { written += n; } + if ret_err.is_some() { + return (written, ret_err); + } + if written < length { ret_err = Some(Error::LessData.into()); } diff --git a/crates/ecstore/src/erasure_coding/encode.rs b/crates/ecstore/src/erasure_coding/encode.rs index 90d6e973..766c96ca 100644 --- a/crates/ecstore/src/erasure_coding/encode.rs +++ b/crates/ecstore/src/erasure_coding/encode.rs @@ -145,7 +145,9 @@ impl Erasure { return Err(std::io::Error::other(format!("Failed to send encoded data : {err}"))); } } - Ok(_) => break, + Ok(_) => { + break; + } Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { break; } diff --git a/crates/ecstore/src/erasure_coding/erasure.rs b/crates/ecstore/src/erasure_coding/erasure.rs index 21a2e06d..ad6829d5 100644 --- a/crates/ecstore/src/erasure_coding/erasure.rs +++ b/crates/ecstore/src/erasure_coding/erasure.rs @@ -468,15 +468,21 @@ impl Erasure { let mut buf = vec![0u8; block_size]; match rustfs_utils::read_full(&mut *reader, &mut buf).await { Ok(n) if n > 0 => { + warn!("encode_stream_callback_async read n={}", n); total += n; let res = self.encode_data(&buf[..n]); on_block(res).await? } - Ok(_) => break, + Ok(_) => { + warn!("encode_stream_callback_async read unexpected ok"); + break; + } Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + warn!("encode_stream_callback_async read unexpected eof"); break; } Err(e) => { + warn!("encode_stream_callback_async read error={:?}", e); on_block(Err(e)).await?; break; } diff --git a/crates/ecstore/src/lib.rs b/crates/ecstore/src/lib.rs index b28ce0cb..3194f2b8 100644 --- a/crates/ecstore/src/lib.rs +++ b/crates/ecstore/src/lib.rs @@ -44,7 +44,7 @@ mod store_init; pub mod store_list_objects; pub mod store_utils; -pub mod checksum; +// pub mod checksum; pub mod client; pub mod event; pub mod event_notification; diff --git a/crates/ecstore/src/pools.rs b/crates/ecstore/src/pools.rs index a879ba8b..bdf8fb0b 100644 --- a/crates/ecstore/src/pools.rs +++ b/crates/ecstore/src/pools.rs @@ -1262,6 +1262,8 @@ impl ECStore { parts[i] = CompletePart { part_num: pi.part_num, etag: pi.etag, + + ..Default::default() }; } @@ -1289,7 +1291,7 @@ impl ECStore { } let reader = BufReader::new(rd.stream); - let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, false)?; + let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, None, false)?; let mut data = PutObjReader::new(hrd); if let Err(err) = self diff --git a/crates/ecstore/src/rebalance.rs b/crates/ecstore/src/rebalance.rs index 40b53c59..0de0eb6b 100644 --- a/crates/ecstore/src/rebalance.rs +++ b/crates/ecstore/src/rebalance.rs @@ -979,6 +979,7 @@ impl ECStore { parts[i] = CompletePart { part_num: pi.part_num, etag: pi.etag, + ..Default::default() }; } @@ -1005,7 +1006,7 @@ impl ECStore { } let reader = BufReader::new(rd.stream); - let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, false)?; + let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, None, false)?; let mut data = PutObjReader::new(hrd); if let Err(err) = self diff --git a/crates/ecstore/src/set_disk.rs b/crates/ecstore/src/set_disk.rs index 6ef8751a..4803f52a 100644 --- a/crates/ecstore/src/set_disk.rs +++ b/crates/ecstore/src/set_disk.rs @@ -72,13 +72,13 @@ use rustfs_filemeta::{ }; use rustfs_lock::fast_lock::types::LockResult; use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem}; -use rustfs_rio::{EtagResolvable, HashReader, TryGetIndex as _, WarpReader}; +use rustfs_rio::{EtagResolvable, HashReader, HashReaderMut, TryGetIndex as _, WarpReader}; use rustfs_utils::http::headers::AMZ_OBJECT_TAGGING; use rustfs_utils::http::headers::AMZ_STORAGE_CLASS; use rustfs_utils::http::headers::RESERVED_METADATA_PREFIX_LOWER; use rustfs_utils::{ HashAlgorithm, - crypto::{base64_decode, base64_encode, hex}, + crypto::hex, path::{SLASH_SEPARATOR, encode_dir_object, has_suffix, path_join_buf}, }; use rustfs_workers::workers::Workers; @@ -158,10 +158,7 @@ impl SetDisks { LockResult::Conflict { current_owner, current_mode, - } => format!( - "{mode} lock conflicted on {bucket}/{object}: held by {current_owner} as {:?}", - current_mode - ), + } => format!("{mode} lock conflicted on {bucket}/{object}: held by {current_owner} as {current_mode:?}"), LockResult::Acquired => format!("unexpected lock state while acquiring {mode} lock on {bucket}/{object}"), } } @@ -922,9 +919,8 @@ impl SetDisks { } fn get_upload_id_dir(bucket: &str, object: &str, upload_id: &str) -> String { - // warn!("get_upload_id_dir upload_id {:?}", upload_id); - - let upload_uuid = base64_decode(upload_id.as_bytes()) + let upload_uuid = base64_simd::URL_SAFE_NO_PAD + .decode_to_vec(upload_id.as_bytes()) .and_then(|v| { String::from_utf8(v).map_or(Ok(upload_id.to_owned()), |v| { let parts: Vec<_> = v.splitn(2, '.').collect(); @@ -2950,6 +2946,7 @@ impl SetDisks { part.mod_time, part.actual_size, part.index.clone(), + part.checksums.clone(), ); if is_inline_buffer { if let Some(writer) = writers[index].take() { @@ -3528,9 +3525,9 @@ impl ObjectIO for SetDisks { // } if object_info.size == 0 { - if let Some(rs) = range { - let _ = rs.get_offset_length(object_info.size)?; - } + // if let Some(rs) = range { + // let _ = rs.get_offset_length(object_info.size)?; + // } let reader = GetObjectReader { stream: Box::new(Cursor::new(Vec::new())), @@ -3712,7 +3709,7 @@ impl ObjectIO for SetDisks { let stream = mem::replace( &mut data.stream, - HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, false)?, + HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, None, false)?, ); let (reader, w_size) = match Arc::new(erasure).encode(stream, &mut writers, write_quorum).await { @@ -3729,7 +3726,12 @@ impl ObjectIO for SetDisks { // } if (w_size as i64) < data.size() { - return Err(Error::other("put_object write size < data.size()")); + warn!("put_object write size < data.size(), w_size={}, data.size={}", w_size, data.size()); + return Err(Error::other(format!( + "put_object write size < data.size(), w_size={}, data.size={}", + w_size, + data.size() + ))); } if user_defined.contains_key(&format!("{RESERVED_METADATA_PREFIX_LOWER}compression")) { @@ -3756,31 +3758,42 @@ impl ObjectIO for SetDisks { } } + if fi.checksum.is_none() { + if let Some(content_hash) = data.as_hash_reader().content_hash() { + fi.checksum = Some(content_hash.to_bytes(&[])); + } + } + if let Some(sc) = user_defined.get(AMZ_STORAGE_CLASS) { if sc == storageclass::STANDARD { let _ = user_defined.remove(AMZ_STORAGE_CLASS); } } - let now = OffsetDateTime::now_utc(); + let mod_time = if let Some(mod_time) = opts.mod_time { + Some(mod_time) + } else { + Some(OffsetDateTime::now_utc()) + }; - for (i, fi) in parts_metadatas.iter_mut().enumerate() { - fi.metadata = user_defined.clone(); + for (i, pfi) in parts_metadatas.iter_mut().enumerate() { + pfi.metadata = user_defined.clone(); if is_inline_buffer { if let Some(writer) = writers[i].take() { - fi.data = Some(writer.into_inline_data().map(bytes::Bytes::from).unwrap_or_default()); + pfi.data = Some(writer.into_inline_data().map(bytes::Bytes::from).unwrap_or_default()); } - fi.set_inline_data(); + pfi.set_inline_data(); } - fi.mod_time = Some(now); - fi.size = w_size as i64; - fi.versioned = opts.versioned || opts.version_suspended; - fi.add_object_part(1, etag.clone(), w_size, fi.mod_time, actual_size, index_op.clone()); + pfi.mod_time = mod_time; + pfi.size = w_size as i64; + pfi.versioned = opts.versioned || opts.version_suspended; + pfi.add_object_part(1, etag.clone(), w_size, mod_time, actual_size, index_op.clone(), None); + pfi.checksum = fi.checksum.clone(); if opts.data_movement { - fi.set_data_moved(); + pfi.set_data_moved(); } } @@ -3815,7 +3828,8 @@ impl ObjectIO for SetDisks { fi.replication_state_internal = Some(opts.put_replication_state()); - // TODO: version support + fi.is_latest = true; + Ok(ObjectInfo::from_file_info(&fi, bucket, object, opts.versioned || opts.version_suspended)) } } @@ -4430,8 +4444,6 @@ impl StorageAPI for SetDisks { .await .map_err(|e| to_object_err(e, vec![bucket, object]))?; - // warn!("get object_info fi {:?}", &fi); - let oi = ObjectInfo::from_file_info(&fi, bucket, object, opts.versioned || opts.version_suspended); Ok(oi) @@ -4759,6 +4771,11 @@ impl StorageAPI for SetDisks { uploaded_parts.push(CompletePart { part_num: p_info.part_num, etag: p_info.etag, + checksum_crc32: None, + checksum_crc32c: None, + checksum_sha1: None, + checksum_sha256: None, + checksum_crc64nvme: None, }); } if let Err(err) = self.complete_multipart_upload(bucket, object, &res.upload_id, uploaded_parts, &ObjectOptions { @@ -4834,64 +4851,24 @@ impl StorageAPI for SetDisks { let write_quorum = fi.write_quorum(self.default_write_quorum()); - let disks = self.disks.read().await; + if let Some(checksum) = fi.metadata.get(rustfs_rio::RUSTFS_MULTIPART_CHECKSUM) + && !checksum.is_empty() + && data + .as_hash_reader() + .content_crc_type() + .is_none_or(|v| v.to_string() != *checksum) + { + return Err(Error::other(format!("checksum mismatch: {checksum}"))); + } + + let disks = self.disks.read().await.clone(); - let disks = disks.clone(); let shuffle_disks = Self::shuffle_disks(&disks, &fi.erasure.distribution); let part_suffix = format!("part.{part_id}"); let tmp_part = format!("{}x{}", Uuid::new_v4(), OffsetDateTime::now_utc().unix_timestamp()); let tmp_part_path = Arc::new(format!("{tmp_part}/{part_suffix}")); - // let mut writers = Vec::with_capacity(disks.len()); - // let erasure = Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size); - // let shared_size = erasure.shard_size(erasure.block_size); - - // let futures = disks.iter().map(|disk| { - // let disk = disk.clone(); - // let tmp_part_path = tmp_part_path.clone(); - // tokio::spawn(async move { - // if let Some(disk) = disk { - // // let writer = disk.append_file(RUSTFS_META_TMP_BUCKET, &tmp_part_path).await?; - // // let filewriter = disk - // // .create_file("", RUSTFS_META_TMP_BUCKET, &tmp_part_path, data.content_length) - // // .await?; - // match new_bitrot_filewriter( - // disk.clone(), - // RUSTFS_META_TMP_BUCKET, - // &tmp_part_path, - // false, - // DEFAULT_BITROT_ALGO, - // shared_size, - // ) - // .await - // { - // Ok(writer) => Ok(Some(writer)), - // Err(e) => Err(e), - // } - // } else { - // Ok(None) - // } - // }) - // }); - // for x in join_all(futures).await { - // let x = x??; - // writers.push(x); - // } - - // let erasure = Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size); - - // let stream = replace(&mut data.stream, Box::new(empty())); - // let etag_stream = EtagReader::new(stream); - - // let (w_size, mut etag) = Arc::new(erasure) - // .encode(etag_stream, &mut writers, data.content_length, write_quorum) - // .await?; - - // if let Err(err) = close_bitrot_writers(&mut writers).await { - // error!("close_bitrot_writers err {:?}", err); - // } - let erasure = erasure_coding::Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size); let mut writers = Vec::with_capacity(shuffle_disks.len()); @@ -4944,7 +4921,7 @@ impl StorageAPI for SetDisks { let stream = mem::replace( &mut data.stream, - HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, false)?, + HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, None, false)?, ); let (reader, w_size) = Arc::new(erasure).encode(stream, &mut writers, write_quorum).await?; // TODO: 出错,删除临时目录 @@ -4952,7 +4929,12 @@ impl StorageAPI for SetDisks { let _ = mem::replace(&mut data.stream, reader); if (w_size as i64) < data.size() { - return Err(Error::other("put_object_part write size < data.size()")); + warn!("put_object_part write size < data.size(), w_size={}, data.size={}", w_size, data.size()); + return Err(Error::other(format!( + "put_object_part write size < data.size(), w_size={}, data.size={}", + w_size, + data.size() + ))); } let index_op = data.stream.try_get_index().map(|v| v.clone().into_vec()); @@ -5227,7 +5209,8 @@ impl StorageAPI for SetDisks { uploads.push(MultipartInfo { bucket: bucket.to_owned(), object: object.to_owned(), - upload_id: base64_encode(format!("{}.{}", get_global_deployment_id().unwrap_or_default(), upload_id).as_bytes()), + upload_id: base64_simd::URL_SAFE_NO_PAD + .encode_to_string(format!("{}.{}", get_global_deployment_id().unwrap_or_default(), upload_id).as_bytes()), initiated: Some(start_time), ..Default::default() }); @@ -5348,6 +5331,14 @@ impl StorageAPI for SetDisks { } } + if let Some(checksum) = &opts.want_checksum { + user_defined.insert(rustfs_rio::RUSTFS_MULTIPART_CHECKSUM.to_string(), checksum.checksum_type.to_string()); + user_defined.insert( + rustfs_rio::RUSTFS_MULTIPART_CHECKSUM_TYPE.to_string(), + checksum.checksum_type.obj_type().to_string(), + ); + } + let (shuffle_disks, mut parts_metadatas) = Self::shuffle_disks_and_parts_metadata(&disks, &parts_metadata, &fi); let mod_time = opts.mod_time.unwrap_or(OffsetDateTime::now_utc()); @@ -5362,7 +5353,8 @@ impl StorageAPI for SetDisks { let upload_uuid = format!("{}x{}", Uuid::new_v4(), mod_time.unix_timestamp_nanos()); - let upload_id = base64_encode(format!("{}.{}", get_global_deployment_id().unwrap_or_default(), upload_uuid).as_bytes()); + let upload_id = base64_simd::URL_SAFE_NO_PAD + .encode_to_string(format!("{}.{}", get_global_deployment_id().unwrap_or_default(), upload_uuid).as_bytes()); let upload_path = Self::get_upload_id_dir(bucket, object, upload_uuid.as_str()); @@ -5379,7 +5371,11 @@ impl StorageAPI for SetDisks { // evalDisks - Ok(MultipartUploadResult { upload_id }) + Ok(MultipartUploadResult { + upload_id, + checksum_algo: user_defined.get(rustfs_rio::RUSTFS_MULTIPART_CHECKSUM).cloned(), + checksum_type: user_defined.get(rustfs_rio::RUSTFS_MULTIPART_CHECKSUM_TYPE).cloned(), + }) } #[tracing::instrument(skip(self))] @@ -5467,6 +5463,25 @@ impl StorageAPI for SetDisks { return Err(Error::other("part result number err")); } + if let Some(cs) = fi.metadata.get(rustfs_rio::RUSTFS_MULTIPART_CHECKSUM) { + let Some(checksum_type) = fi.metadata.get(rustfs_rio::RUSTFS_MULTIPART_CHECKSUM_TYPE) else { + return Err(Error::other("checksum type not found")); + }; + + if opts.want_checksum.is_some() + && !opts.want_checksum.as_ref().is_some_and(|v| { + v.checksum_type + .is(rustfs_rio::ChecksumType::from_string_with_obj_type(cs, checksum_type)) + }) + { + return Err(Error::other(format!( + "checksum type mismatch, got {:?}, want {:?}", + opts.want_checksum.as_ref().unwrap(), + rustfs_rio::ChecksumType::from_string_with_obj_type(cs, checksum_type) + ))); + } + } + for (i, part) in object_parts.iter().enumerate() { if let Some(err) = &part.error { error!("complete_multipart_upload part error: {:?}", &err); @@ -5487,6 +5502,7 @@ impl StorageAPI for SetDisks { part.mod_time, part.actual_size, part.index.clone(), + part.checksums.clone(), ); } @@ -6422,10 +6438,20 @@ mod tests { CompletePart { part_num: 1, etag: Some("d41d8cd98f00b204e9800998ecf8427e".to_string()), + checksum_crc32: None, + checksum_crc32c: None, + checksum_sha1: None, + checksum_sha256: None, + checksum_crc64nvme: None, }, CompletePart { part_num: 2, etag: Some("098f6bcd4621d373cade4e832627b4f6".to_string()), + checksum_crc32: None, + checksum_crc32c: None, + checksum_sha1: None, + checksum_sha256: None, + checksum_crc64nvme: None, }, ]; @@ -6442,6 +6468,11 @@ mod tests { let single_part = vec![CompletePart { part_num: 1, etag: Some("d41d8cd98f00b204e9800998ecf8427e".to_string()), + checksum_crc32: None, + checksum_crc32c: None, + checksum_sha1: None, + checksum_sha256: None, + checksum_crc64nvme: None, }]; let single_result = get_complete_multipart_md5(&single_part); assert!(single_result.ends_with("-1")); diff --git a/crates/ecstore/src/store.rs b/crates/ecstore/src/store.rs index 41a15b2a..0e3a1554 100644 --- a/crates/ecstore/src/store.rs +++ b/crates/ecstore/src/store.rs @@ -59,7 +59,6 @@ use rustfs_common::globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Host, GLOBAL_ use rustfs_common::heal_channel::{HealItemType, HealOpts}; use rustfs_filemeta::FileInfo; use rustfs_madmin::heal_commands::HealResultItem; -use rustfs_utils::crypto::base64_decode; use rustfs_utils::path::{SLASH_SEPARATOR, decode_dir_object, encode_dir_object, path_join_buf}; use s3s::dto::{BucketVersioningStatus, ObjectLockConfiguration, ObjectLockEnabled, VersioningConfiguration}; use std::cmp::Ordering; @@ -2421,7 +2420,7 @@ fn check_list_multipart_args( } } - if let Err(_e) = base64_decode(upload_id_marker.as_bytes()) { + if let Err(_e) = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(upload_id_marker.as_bytes()) { return Err(StorageError::MalformedUploadID(upload_id_marker.to_owned())); } } @@ -2448,7 +2447,7 @@ fn check_new_multipart_args(bucket: &str, object: &str) -> Result<()> { } fn check_multipart_object_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { - if let Err(e) = base64_decode(upload_id.as_bytes()) { + if let Err(e) = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(upload_id.as_bytes()) { return Err(StorageError::MalformedUploadID(format!("{bucket}/{object}-{upload_id},err:{e}"))); }; check_object_args(bucket, object) diff --git a/crates/ecstore/src/store_api.rs b/crates/ecstore/src/store_api.rs index b619d387..5f22b47e 100644 --- a/crates/ecstore/src/store_api.rs +++ b/crates/ecstore/src/store_api.rs @@ -13,9 +13,6 @@ // limitations under the License. use crate::bucket::metadata_sys::get_versioning_config; -use crate::bucket::replication::REPLICATION_RESET; -use crate::bucket::replication::REPLICATION_STATUS; -use crate::bucket::replication::{ReplicateDecision, replication_statuses_map, version_purge_statuses_map}; use crate::bucket::versioning::VersioningApi as _; use crate::disk::DiskStore; use crate::error::{Error, Result}; @@ -25,12 +22,15 @@ use crate::{ bucket::lifecycle::lifecycle::ExpirationOptions, bucket::lifecycle::{bucket_lifecycle_ops::TransitionedObject, lifecycle::TransitionOptions}, }; +use bytes::Bytes; use http::{HeaderMap, HeaderValue}; use rustfs_common::heal_channel::HealOpts; use rustfs_filemeta::{ - FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, ReplicationState, ReplicationStatusType, VersionPurgeStatusType, + FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, REPLICATION_RESET, REPLICATION_STATUS, ReplicateDecision, ReplicationState, + ReplicationStatusType, VersionPurgeStatusType, replication_statuses_map, version_purge_statuses_map, }; use rustfs_madmin::heal_commands::HealResultItem; +use rustfs_rio::Checksum; use rustfs_rio::{DecompressReader, HashReader, LimitReader, WarpReader}; use rustfs_utils::CompressionAlgorithm; use rustfs_utils::http::headers::{AMZ_OBJECT_TAGGING, RESERVED_METADATA_PREFIX_LOWER}; @@ -92,11 +92,28 @@ impl PutObjReader { PutObjReader { stream } } + pub fn as_hash_reader(&self) -> &HashReader { + &self.stream + } + pub fn from_vec(data: Vec) -> Self { + use sha2::{Digest, Sha256}; let content_length = data.len() as i64; + let sha256hex = if content_length > 0 { + Some(hex_simd::encode_to_string(Sha256::digest(&data), hex_simd::AsciiCase::Lower)) + } else { + None + }; PutObjReader { - stream: HashReader::new(Box::new(WarpReader::new(Cursor::new(data))), content_length, content_length, None, false) - .unwrap(), + stream: HashReader::new( + Box::new(WarpReader::new(Cursor::new(data))), + content_length, + content_length, + None, + sha256hex, + false, + ) + .unwrap(), } } @@ -374,6 +391,8 @@ pub struct ObjectOptions { pub lifecycle_audit_event: LcAuditEvent, pub eval_metadata: Option>, + + pub want_checksum: Option, } impl ObjectOptions { @@ -456,6 +475,8 @@ pub struct BucketInfo { #[derive(Debug, Default, Clone)] pub struct MultipartUploadResult { pub upload_id: String, + pub checksum_algo: Option, + pub checksum_type: Option, } #[derive(Debug, Default, Clone)] @@ -471,13 +492,24 @@ pub struct PartInfo { pub struct CompletePart { pub part_num: usize, pub etag: Option, + // pub size: Option, + pub checksum_crc32: Option, + pub checksum_crc32c: Option, + pub checksum_sha1: Option, + pub checksum_sha256: Option, + pub checksum_crc64nvme: Option, } impl From for CompletePart { fn from(value: s3s::dto::CompletedPart) -> Self { Self { part_num: value.part_number.unwrap_or_default() as usize, - etag: value.e_tag.map(|e| e.value().to_owned()), + etag: value.e_tag.map(|v| v.value().to_owned()), + checksum_crc32: value.checksum_crc32, + checksum_crc32c: value.checksum_crc32c, + checksum_sha1: value.checksum_sha1, + checksum_sha256: value.checksum_sha256, + checksum_crc64nvme: value.checksum_crc64nvme, } } } @@ -517,7 +549,7 @@ pub struct ObjectInfo { pub version_purge_status_internal: Option, pub version_purge_status: VersionPurgeStatusType, pub replication_decision: String, - pub checksum: Vec, + pub checksum: Option, } impl Clone for ObjectInfo { @@ -554,7 +586,7 @@ impl Clone for ObjectInfo { version_purge_status_internal: self.version_purge_status_internal.clone(), version_purge_status: self.version_purge_status.clone(), replication_decision: self.replication_decision.clone(), - checksum: Default::default(), + checksum: self.checksum.clone(), expires: self.expires, } } @@ -694,6 +726,7 @@ impl ObjectInfo { inlined, user_defined: metadata, transitioned_object, + checksum: fi.checksum.clone(), ..Default::default() } } @@ -884,6 +917,23 @@ impl ObjectInfo { ..Default::default() } } + + pub fn decrypt_checksums(&self, part: usize, _headers: &HeaderMap) -> Result<(HashMap, bool)> { + if part > 0 { + if let Some(checksums) = self.parts.iter().find(|p| p.number == part).and_then(|p| p.checksums.clone()) { + return Ok((checksums, true)); + } + } + + // TODO: decrypt checksums + + if let Some(data) = &self.checksum { + let (checksums, is_multipart) = rustfs_rio::read_checksums(data.as_ref(), 0); + return Ok((checksums, is_multipart)); + } + + Ok((HashMap::new(), false)) + } } #[derive(Debug, Default)] diff --git a/crates/filemeta/Cargo.toml b/crates/filemeta/Cargo.toml index 5c7a3589..453a7f2e 100644 --- a/crates/filemeta/Cargo.toml +++ b/crates/filemeta/Cargo.toml @@ -40,6 +40,8 @@ byteorder = { workspace = true } tracing.workspace = true thiserror.workspace = true s3s.workspace = true +lazy_static.workspace = true +regex.workspace = true [dev-dependencies] criterion = { workspace = true } diff --git a/crates/filemeta/src/fileinfo.rs b/crates/filemeta/src/fileinfo.rs index b6fefe5d..4646ff1f 100644 --- a/crates/filemeta/src/fileinfo.rs +++ b/crates/filemeta/src/fileinfo.rs @@ -284,6 +284,7 @@ impl FileInfo { Ok(t) } + #[allow(clippy::too_many_arguments)] pub fn add_object_part( &mut self, num: usize, @@ -292,6 +293,7 @@ impl FileInfo { mod_time: Option, actual_size: i64, index: Option, + checksums: Option>, ) { let part = ObjectPartInfo { etag, @@ -300,7 +302,7 @@ impl FileInfo { mod_time, actual_size, index, - checksums: None, + checksums, error: None, }; diff --git a/crates/filemeta/src/filemeta.rs b/crates/filemeta/src/filemeta.rs index 37fb1542..a3a1cd48 100644 --- a/crates/filemeta/src/filemeta.rs +++ b/crates/filemeta/src/filemeta.rs @@ -15,9 +15,12 @@ use crate::error::{Error, Result}; use crate::fileinfo::{ErasureAlgo, ErasureInfo, FileInfo, FileInfoVersions, ObjectPartInfo, RawFileInfo}; use crate::filemeta_inline::InlineData; -use crate::{ReplicationStatusType, VersionPurgeStatusType}; +use crate::{ + ReplicationState, ReplicationStatusType, VersionPurgeStatusType, replication_statuses_map, version_purge_statuses_map, +}; use byteorder::ByteOrder; use bytes::Bytes; +use rustfs_utils::http::AMZ_BUCKET_REPLICATION_STATUS; use rustfs_utils::http::headers::{ self, AMZ_META_UNENCRYPTED_CONTENT_LENGTH, AMZ_META_UNENCRYPTED_CONTENT_MD5, AMZ_STORAGE_CLASS, RESERVED_METADATA_PREFIX, RESERVED_METADATA_PREFIX_LOWER, VERSION_PURGE_STATUS_KEY, @@ -30,6 +33,7 @@ use std::hash::Hasher; use std::io::{Read, Write}; use std::{collections::HashMap, io::Cursor}; use time::OffsetDateTime; +use time::format_description::well_known::Rfc3339; use tokio::io::AsyncRead; use tracing::error; use uuid::Uuid; @@ -1742,7 +1746,25 @@ impl MetaObject { } } - // todo: ReplicationState,Delete + let replication_state_internal = get_internal_replication_state(&metadata); + + let mut deleted = false; + + if let Some(v) = replication_state_internal.as_ref() { + if !v.composite_version_purge_status().is_empty() { + deleted = true; + } + + let st = v.composite_replication_status(); + if !st.is_empty() { + metadata.insert(AMZ_BUCKET_REPLICATION_STATUS.to_string(), st.to_string()); + } + } + + let checksum = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}crc").as_str()) + .map(|v| Bytes::from(v.clone())); let erasure = ErasureInfo { algorithm: self.erasure_algorithm.to_string(), @@ -1754,6 +1776,26 @@ impl MetaObject { ..Default::default() }; + let transition_status = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITION_STATUS}").as_str()) + .map(|v| String::from_utf8_lossy(v).to_string()) + .unwrap_or_default(); + let transitioned_objname = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_OBJECTNAME}").as_str()) + .map(|v| String::from_utf8_lossy(v).to_string()) + .unwrap_or_default(); + let transition_version_id = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_VERSION_ID}").as_str()) + .map(|v| Uuid::from_slice(v.as_slice()).unwrap_or_default()); + let transition_tier = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITION_TIER}").as_str()) + .map(|v| String::from_utf8_lossy(v).to_string()) + .unwrap_or_default(); + FileInfo { version_id, erasure, @@ -1764,6 +1806,13 @@ impl MetaObject { volume: volume.to_string(), parts, metadata, + replication_state_internal, + deleted, + checksum, + transition_status, + transitioned_objname, + transition_version_id, + transition_tier, ..Default::default() } } @@ -1904,6 +1953,38 @@ impl From for MetaObject { } } + if !value.transition_status.is_empty() { + meta_sys.insert( + format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITION_STATUS}"), + value.transition_status.as_bytes().to_vec(), + ); + } + + if !value.transitioned_objname.is_empty() { + meta_sys.insert( + format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_OBJECTNAME}"), + value.transitioned_objname.as_bytes().to_vec(), + ); + } + + if let Some(vid) = &value.transition_version_id { + meta_sys.insert( + format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_VERSION_ID}"), + vid.as_bytes().to_vec(), + ); + } + + if !value.transition_tier.is_empty() { + meta_sys.insert( + format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITION_TIER}"), + value.transition_tier.as_bytes().to_vec(), + ); + } + + if let Some(content_hash) = value.checksum { + meta_sys.insert(format!("{RESERVED_METADATA_PREFIX_LOWER}crc"), content_hash.to_vec()); + } + Self { version_id: value.version_id, data_dir: value.data_dir, @@ -1927,6 +2008,50 @@ impl From for MetaObject { } } +fn get_internal_replication_state(metadata: &HashMap) -> Option { + let mut rs = ReplicationState::default(); + let mut has = false; + + for (k, v) in metadata.iter() { + if k == VERSION_PURGE_STATUS_KEY { + rs.version_purge_status_internal = Some(v.clone()); + rs.purge_targets = version_purge_statuses_map(v.as_str()); + has = true; + continue; + } + + if let Some(sub_key) = k.strip_prefix(RESERVED_METADATA_PREFIX_LOWER) { + match sub_key { + "replica-timestamp" => { + has = true; + rs.replica_timestamp = Some(OffsetDateTime::parse(v, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH)); + } + "replica-status" => { + has = true; + rs.replica_status = ReplicationStatusType::from(v.as_str()); + } + "replication-timestamp" => { + has = true; + rs.replication_timestamp = Some(OffsetDateTime::parse(v, &Rfc3339).unwrap_or(OffsetDateTime::UNIX_EPOCH)) + } + "replication-status" => { + has = true; + rs.replication_status_internal = Some(v.clone()); + rs.targets = replication_statuses_map(v.as_str()); + } + _ => { + if let Some(arn) = sub_key.strip_prefix("replication-reset-") { + has = true; + rs.reset_statuses_map.insert(arn.to_string(), v.clone()); + } + } + } + } + } + + if has { Some(rs) } else { None } +} + #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)] pub struct MetaDeleteMarker { #[serde(rename = "ID")] @@ -1939,24 +2064,51 @@ pub struct MetaDeleteMarker { impl MetaDeleteMarker { pub fn free_version(&self) -> bool { - self.meta_sys.contains_key(FREE_VERSION_META_HEADER) + self.meta_sys + .contains_key(format!("{RESERVED_METADATA_PREFIX_LOWER}{FREE_VERSION}").as_str()) } pub fn into_fileinfo(&self, volume: &str, path: &str, _all_parts: bool) -> FileInfo { - let metadata = self.meta_sys.clone(); + let metadata = self + .meta_sys + .clone() + .into_iter() + .map(|(k, v)| (k, String::from_utf8_lossy(&v).to_string())) + .collect(); + let replication_state_internal = get_internal_replication_state(&metadata); - FileInfo { + let mut fi = FileInfo { version_id: self.version_id.filter(|&vid| !vid.is_nil()), name: path.to_string(), volume: volume.to_string(), deleted: true, mod_time: self.mod_time, - metadata: metadata - .into_iter() - .map(|(k, v)| (k, String::from_utf8_lossy(&v).to_string())) - .collect(), + metadata, + replication_state_internal, ..Default::default() + }; + + if self.free_version() { + fi.set_tier_free_version(); + fi.transition_tier = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITION_TIER}").as_str()) + .map(|v| String::from_utf8_lossy(v).to_string()) + .unwrap_or_default(); + + fi.transitioned_objname = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_OBJECTNAME}").as_str()) + .map(|v| String::from_utf8_lossy(v).to_string()) + .unwrap_or_default(); + + fi.transition_version_id = self + .meta_sys + .get(format!("{RESERVED_METADATA_PREFIX_LOWER}{TRANSITIONED_VERSION_ID}").as_str()) + .map(|v| Uuid::from_slice(v.as_slice()).unwrap_or_default()); } + + fi } pub fn unmarshal_msg(&mut self, buf: &[u8]) -> Result { @@ -2160,8 +2312,6 @@ pub enum Flags { InlineData = 1 << 2, } -const FREE_VERSION_META_HEADER: &str = "free-version"; - // mergeXLV2Versions pub fn merge_file_meta_versions( mut quorum: usize, diff --git a/crates/filemeta/src/replication.rs b/crates/filemeta/src/replication.rs index 47eb7add..e81cc60c 100644 --- a/crates/filemeta/src/replication.rs +++ b/crates/filemeta/src/replication.rs @@ -1,8 +1,36 @@ +use bytes::Bytes; use core::fmt; +use regex::Regex; +use rustfs_utils::http::RESERVED_METADATA_PREFIX_LOWER; use serde::{Deserialize, Serialize}; +use std::any::Any; use std::collections::HashMap; use std::time::Duration; use time::OffsetDateTime; +use uuid::Uuid; + +pub const REPLICATION_RESET: &str = "replication-reset"; +pub const REPLICATION_STATUS: &str = "replication-status"; + +// ReplicateQueued - replication being queued trail +pub const REPLICATE_QUEUED: &str = "replicate:queue"; + +// ReplicateExisting - audit trail for existing objects replication +pub const REPLICATE_EXISTING: &str = "replicate:existing"; +// ReplicateExistingDelete - audit trail for delete replication triggered for existing delete markers +pub const REPLICATE_EXISTING_DELETE: &str = "replicate:existing:delete"; + +// ReplicateMRF - audit trail for replication from Most Recent Failures (MRF) queue +pub const REPLICATE_MRF: &str = "replicate:mrf"; +// ReplicateIncoming - audit trail of inline replication +pub const REPLICATE_INCOMING: &str = "replicate:incoming"; +// ReplicateIncomingDelete - audit trail of inline replication of deletes. +pub const REPLICATE_INCOMING_DELETE: &str = "replicate:incoming:delete"; + +// ReplicateHeal - audit trail for healing of failed/pending replications +pub const REPLICATE_HEAL: &str = "replicate:heal"; +// ReplicateHealDelete - audit trail of healing of failed/pending delete replications. +pub const REPLICATE_HEAL_DELETE: &str = "replicate:heal:delete"; /// StatusType of Replication for x-amz-replication-status header #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Hash)] @@ -492,3 +520,371 @@ impl ReplicatedInfos { ReplicationAction::None } } + +#[derive(Serialize, Deserialize, Debug)] +pub struct MrfReplicateEntry { + #[serde(rename = "bucket")] + pub bucket: String, + + #[serde(rename = "object")] + pub object: String, + + #[serde(skip_serializing, skip_deserializing)] + pub version_id: Option, + + #[serde(rename = "retryCount")] + pub retry_count: i32, + + #[serde(skip_serializing, skip_deserializing)] + pub size: i64, +} + +pub trait ReplicationWorkerOperation: Any + Send + Sync { + fn to_mrf_entry(&self) -> MrfReplicateEntry; + fn as_any(&self) -> &dyn Any; + fn get_bucket(&self) -> &str; + fn get_object(&self) -> &str; + fn get_size(&self) -> i64; + fn is_delete_marker(&self) -> bool; + fn get_op_type(&self) -> ReplicationType; +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ReplicateTargetDecision { + pub replicate: bool, + pub synchronous: bool, + pub arn: String, + pub id: String, +} + +impl ReplicateTargetDecision { + pub fn new(arn: String, replicate: bool, sync: bool) -> Self { + Self { + replicate, + synchronous: sync, + arn, + id: String::new(), + } + } +} + +impl fmt::Display for ReplicateTargetDecision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{};{};{};{}", self.replicate, self.synchronous, self.arn, self.id) + } +} + +/// ReplicateDecision represents replication decision for each target +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReplicateDecision { + pub targets_map: HashMap, +} + +impl ReplicateDecision { + pub fn new() -> Self { + Self { + targets_map: HashMap::new(), + } + } + + /// Returns true if at least one target qualifies for replication + pub fn replicate_any(&self) -> bool { + self.targets_map.values().any(|t| t.replicate) + } + + /// Returns true if at least one target qualifies for synchronous replication + pub fn is_synchronous(&self) -> bool { + self.targets_map.values().any(|t| t.synchronous) + } + + /// Updates ReplicateDecision with target's replication decision + pub fn set(&mut self, target: ReplicateTargetDecision) { + self.targets_map.insert(target.arn.clone(), target); + } + + /// Returns a stringified representation of internal replication status with all targets marked as `PENDING` + pub fn pending_status(&self) -> Option { + let mut result = String::new(); + for target in self.targets_map.values() { + if target.replicate { + result.push_str(&format!("{}={};", target.arn, ReplicationStatusType::Pending.as_str())); + } + } + if result.is_empty() { None } else { Some(result) } + } +} + +impl fmt::Display for ReplicateDecision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut result = String::new(); + for (key, value) in &self.targets_map { + result.push_str(&format!("{key}={value},")); + } + write!(f, "{}", result.trim_end_matches(',')) + } +} + +impl Default for ReplicateDecision { + fn default() -> Self { + Self::new() + } +} + +// parse k-v pairs of target ARN to stringified ReplicateTargetDecision delimited by ',' into a +// ReplicateDecision struct +pub fn parse_replicate_decision(_bucket: &str, s: &str) -> std::io::Result { + let mut decision = ReplicateDecision::new(); + + if s.is_empty() { + return Ok(decision); + } + + for p in s.split(',') { + if p.is_empty() { + continue; + } + + let slc = p.split('=').collect::>(); + if slc.len() != 2 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("invalid replicate decision format: {s}"), + )); + } + + let tgt_str = slc[1].trim_matches('"'); + let tgt = tgt_str.split(';').collect::>(); + if tgt.len() != 4 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("invalid replicate decision format: {s}"), + )); + } + + let tgt = ReplicateTargetDecision { + replicate: tgt[0] == "true", + synchronous: tgt[1] == "true", + arn: tgt[2].to_string(), + id: tgt[3].to_string(), + }; + decision.targets_map.insert(slc[0].to_string(), tgt); + } + + Ok(decision) + + // r = ReplicateDecision{ + // targetsMap: make(map[string]replicateTargetDecision), + // } + // if len(s) == 0 { + // return + // } + // for _, p := range strings.Split(s, ",") { + // if p == "" { + // continue + // } + // slc := strings.Split(p, "=") + // if len(slc) != 2 { + // return r, errInvalidReplicateDecisionFormat + // } + // tgtStr := strings.TrimSuffix(strings.TrimPrefix(slc[1], `"`), `"`) + // tgt := strings.Split(tgtStr, ";") + // if len(tgt) != 4 { + // return r, errInvalidReplicateDecisionFormat + // } + // r.targetsMap[slc[0]] = replicateTargetDecision{Replicate: tgt[0] == "true", Synchronous: tgt[1] == "true", Arn: tgt[2], ID: tgt[3]} + // } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReplicateObjectInfo { + pub name: String, + pub size: i64, + pub actual_size: i64, + pub bucket: String, + pub version_id: Option, + pub etag: Option, + pub mod_time: Option, + pub replication_status: ReplicationStatusType, + pub replication_status_internal: Option, + pub delete_marker: bool, + pub version_purge_status_internal: Option, + pub version_purge_status: VersionPurgeStatusType, + pub replication_state: Option, + pub op_type: ReplicationType, + pub event_type: String, + pub dsc: ReplicateDecision, + pub existing_obj_resync: ResyncDecision, + pub target_statuses: HashMap, + pub target_purge_statuses: HashMap, + pub replication_timestamp: Option, + pub ssec: bool, + pub user_tags: String, + pub checksum: Option, + pub retry_count: u32, +} + +impl ReplicationWorkerOperation for ReplicateObjectInfo { + fn as_any(&self) -> &dyn Any { + self + } + + fn to_mrf_entry(&self) -> MrfReplicateEntry { + MrfReplicateEntry { + bucket: self.bucket.clone(), + object: self.name.clone(), + version_id: self.version_id, + retry_count: self.retry_count as i32, + size: self.size, + } + } + + fn get_bucket(&self) -> &str { + &self.bucket + } + + fn get_object(&self) -> &str { + &self.name + } + + fn get_size(&self) -> i64 { + self.size + } + + fn is_delete_marker(&self) -> bool { + self.delete_marker + } + + fn get_op_type(&self) -> ReplicationType { + self.op_type + } +} + +lazy_static::lazy_static! { + static ref REPL_STATUS_REGEX: Regex = Regex::new(r"([^=].*?)=([^,].*?);").unwrap(); +} + +impl ReplicateObjectInfo { + /// Returns replication status of a target + pub fn target_replication_status(&self, arn: &str) -> ReplicationStatusType { + let binding = self.replication_status_internal.clone().unwrap_or_default(); + let captures = REPL_STATUS_REGEX.captures_iter(&binding); + for cap in captures { + if cap.len() == 3 && &cap[1] == arn { + return ReplicationStatusType::from(&cap[2]); + } + } + ReplicationStatusType::default() + } + + /// Returns the relevant info needed by MRF + pub fn to_mrf_entry(&self) -> MrfReplicateEntry { + MrfReplicateEntry { + bucket: self.bucket.clone(), + object: self.name.clone(), + version_id: self.version_id, + retry_count: self.retry_count as i32, + size: self.size, + } + } +} + +// constructs a replication status map from string representation +pub fn replication_statuses_map(s: &str) -> HashMap { + let mut targets = HashMap::new(); + let rep_stat_matches = REPL_STATUS_REGEX.captures_iter(s).map(|c| c.extract()); + for (_, [arn, status]) in rep_stat_matches { + if arn.is_empty() { + continue; + } + let status = ReplicationStatusType::from(status); + targets.insert(arn.to_string(), status); + } + targets +} + +// constructs a version purge status map from string representation +pub fn version_purge_statuses_map(s: &str) -> HashMap { + let mut targets = HashMap::new(); + let purge_status_matches = REPL_STATUS_REGEX.captures_iter(s).map(|c| c.extract()); + for (_, [arn, status]) in purge_status_matches { + if arn.is_empty() { + continue; + } + let status = VersionPurgeStatusType::from(status); + targets.insert(arn.to_string(), status); + } + targets +} + +pub fn get_replication_state(rinfos: &ReplicatedInfos, prev_state: &ReplicationState, _vid: Option) -> ReplicationState { + let reset_status_map: Vec<(String, String)> = rinfos + .targets + .iter() + .filter(|v| !v.resync_timestamp.is_empty()) + .map(|t| (target_reset_header(t.arn.as_str()), t.resync_timestamp.clone())) + .collect(); + + let repl_statuses = rinfos.replication_status_internal(); + let vpurge_statuses = rinfos.version_purge_status_internal(); + + let mut reset_statuses_map = prev_state.reset_statuses_map.clone(); + for (key, value) in reset_status_map { + reset_statuses_map.insert(key, value); + } + + ReplicationState { + replicate_decision_str: prev_state.replicate_decision_str.clone(), + reset_statuses_map, + replica_timestamp: prev_state.replica_timestamp, + replica_status: prev_state.replica_status.clone(), + targets: replication_statuses_map(&repl_statuses.clone().unwrap_or_default()), + replication_status_internal: repl_statuses, + replication_timestamp: rinfos.replication_timestamp, + purge_targets: version_purge_statuses_map(&vpurge_statuses.clone().unwrap_or_default()), + version_purge_status_internal: vpurge_statuses, + + ..Default::default() + } +} + +pub fn target_reset_header(arn: &str) -> String { + format!("{RESERVED_METADATA_PREFIX_LOWER}{REPLICATION_RESET}-{arn}") +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ResyncTargetDecision { + pub replicate: bool, + pub reset_id: String, + pub reset_before_date: Option, +} + +/// ResyncDecision is a struct representing a map with target's individual resync decisions +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ResyncDecision { + pub targets: HashMap, +} + +impl ResyncDecision { + pub fn new() -> Self { + Self { targets: HashMap::new() } + } + + /// Returns true if no targets with resync decision present + pub fn is_empty(&self) -> bool { + self.targets.is_empty() + } + + pub fn must_resync(&self) -> bool { + self.targets.values().any(|v| v.replicate) + } + + pub fn must_resync_target(&self, tgt_arn: &str) -> bool { + self.targets.get(tgt_arn).map(|v| v.replicate).unwrap_or(false) + } +} + +impl Default for ResyncDecision { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/iam/src/manager.rs b/crates/iam/src/manager.rs index 3223b034..b3cbba96 100644 --- a/crates/iam/src/manager.rs +++ b/crates/iam/src/manager.rs @@ -33,7 +33,6 @@ use rustfs_policy::{ EMBEDDED_POLICY_TYPE, INHERITED_POLICY_TYPE, Policy, PolicyDoc, default::DEFAULT_POLICIES, iam_policy_claim_name_sa, }, }; -use rustfs_utils::crypto::base64_encode; use rustfs_utils::path::path_join_buf; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -555,7 +554,10 @@ where return Err(Error::PolicyTooLarge); } - m.insert(SESSION_POLICY_NAME.to_owned(), Value::String(base64_encode(&policy_buf))); + m.insert( + SESSION_POLICY_NAME.to_owned(), + Value::String(base64_simd::URL_SAFE_NO_PAD.encode_to_string(&policy_buf)), + ); m.insert(iam_policy_claim_name_sa(), Value::String(EMBEDDED_POLICY_TYPE.to_owned())); } } diff --git a/crates/iam/src/sys.rs b/crates/iam/src/sys.rs index b59663ab..10a2804a 100644 --- a/crates/iam/src/sys.rs +++ b/crates/iam/src/sys.rs @@ -37,7 +37,6 @@ use rustfs_policy::auth::{ use rustfs_policy::policy::Args; use rustfs_policy::policy::opa; use rustfs_policy::policy::{EMBEDDED_POLICY_TYPE, INHERITED_POLICY_TYPE, Policy, PolicyDoc, iam_policy_claim_name_sa}; -use rustfs_utils::crypto::{base64_decode, base64_encode}; use serde_json::Value; use serde_json::json; use std::collections::HashMap; @@ -363,7 +362,10 @@ impl IamSys { m.insert("parent".to_owned(), Value::String(parent_user.to_owned())); if !policy_buf.is_empty() { - m.insert(SESSION_POLICY_NAME.to_owned(), Value::String(base64_encode(&policy_buf))); + m.insert( + SESSION_POLICY_NAME.to_owned(), + Value::String(base64_simd::URL_SAFE_NO_PAD.encode_to_string(&policy_buf)), + ); m.insert(iam_policy_claim_name_sa(), Value::String(EMBEDDED_POLICY_TYPE.to_owned())); } else { m.insert(iam_policy_claim_name_sa(), Value::String(INHERITED_POLICY_TYPE.to_owned())); @@ -456,7 +458,9 @@ impl IamSys { let op_sp = claims.get(SESSION_POLICY_NAME); if let (Some(pt), Some(sp)) = (op_pt, op_sp) { if pt == EMBEDDED_POLICY_TYPE { - let policy = serde_json::from_slice(&base64_decode(sp.as_str().unwrap_or_default().as_bytes())?)?; + let policy = serde_json::from_slice( + &base64_simd::URL_SAFE_NO_PAD.decode_to_vec(sp.as_str().unwrap_or_default().as_bytes())?, + )?; return Ok((sa, Some(policy))); } } @@ -515,7 +519,9 @@ impl IamSys { let op_sp = claims.get(SESSION_POLICY_NAME); if let (Some(pt), Some(sp)) = (op_pt, op_sp) { if pt == EMBEDDED_POLICY_TYPE { - let policy = serde_json::from_slice(&base64_decode(sp.as_str().unwrap_or_default().as_bytes())?)?; + let policy = serde_json::from_slice( + &base64_simd::URL_SAFE_NO_PAD.decode_to_vec(sp.as_str().unwrap_or_default().as_bytes())?, + )?; return Ok((sa, Some(policy))); } } @@ -906,7 +912,9 @@ pub fn get_claims_from_token_with_secret(token: &str, secret: &str) -> Result bool { + if t == Self::NONE { + return self == Self::NONE; + } + (self.0 & t.0) == t.0 + } + + /// Get the base checksum type (without flags) + pub fn base(self) -> ChecksumType { + ChecksumType(self.0 & Self::BASE_TYPE_MASK) + } + + /// Get the header key for this checksum type + pub fn key(self) -> Option<&'static str> { + match self.base() { + Self::CRC32 => Some("x-amz-checksum-crc32"), + Self::CRC32C => Some("x-amz-checksum-crc32c"), + Self::SHA1 => Some("x-amz-checksum-sha1"), + Self::SHA256 => Some("x-amz-checksum-sha256"), + Self::CRC64_NVME => Some("x-amz-checksum-crc64nvme"), + _ => None, + } + } + + /// Get the size of the raw (unencoded) checksum in bytes + pub fn raw_byte_len(self) -> usize { + match self.base() { + Self::CRC32 | Self::CRC32C => 4, + Self::SHA1 => 20, + Self::SHA256 => SHA256_SIZE, + Self::CRC64_NVME => 8, + _ => 0, + } + } + + /// Check if the checksum type is set and valid + pub fn is_set(self) -> bool { + !self.is(Self::INVALID) && !self.base().is(Self::NONE) + } + + /// Check if this checksum type can be merged + pub fn can_merge(self) -> bool { + self.is(Self::CRC64_NVME) || self.is(Self::CRC32C) || self.is(Self::CRC32) + } + + /// Create a hasher for this checksum type + pub fn hasher(self) -> Option> { + match self.base() { + Self::CRC32 => Some(Box::new(Crc32IeeeHasher::new())), + Self::CRC32C => Some(Box::new(Crc32CastagnoliHasher::new())), + Self::SHA1 => Some(Box::new(Sha1Hasher::new())), + Self::SHA256 => Some(Box::new(Sha256Hasher::new())), + Self::CRC64_NVME => Some(Box::new(Crc64NvmeHasher::new())), + _ => None, + } + } + + /// Check if checksum is trailing + pub fn trailing(self) -> bool { + self.is(Self::TRAILING) + } + + /// Check if full object checksum was requested + pub fn full_object_requested(self) -> bool { + (self.0 & Self::FULL_OBJECT.0) == Self::FULL_OBJECT.0 || self.is(Self::CRC64_NVME) + } + + /// Get object type string for x-amz-checksum-type header + pub fn obj_type(self) -> &'static str { + if self.full_object_requested() { + "FULL_OBJECT" + } else if self.is_set() { + "COMPOSITE" + } else { + "" + } + } + + pub fn from_header(headers: &HeaderMap) -> Self { + Self::from_string_with_obj_type( + headers + .get("x-amz-checksum-algorithm") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""), + headers.get("x-amz-checksum-type").and_then(|v| v.to_str().ok()).unwrap_or(""), + ) + } + + /// Create checksum type from string algorithm + pub fn from_string(alg: &str) -> Self { + Self::from_string_with_obj_type(alg, "") + } + + /// Create checksum type from algorithm and object type + pub fn from_string_with_obj_type(alg: &str, obj_type: &str) -> Self { + let full = match obj_type { + "FULL_OBJECT" => Self::FULL_OBJECT, + "COMPOSITE" | "" => Self::NONE, + _ => return Self::INVALID, + }; + + match alg.to_uppercase().as_str() { + "CRC32" => ChecksumType(Self::CRC32.0 | full.0), + "CRC32C" => ChecksumType(Self::CRC32C.0 | full.0), + "SHA1" => { + if full != Self::NONE { + return Self::INVALID; + } + Self::SHA1 + } + "SHA256" => { + if full != Self::NONE { + return Self::INVALID; + } + Self::SHA256 + } + "CRC64NVME" => { + // AWS seems to ignore full value and just assume it + Self::CRC64_NVME + } + "" => { + if full != Self::NONE { + return Self::INVALID; + } + Self::NONE + } + _ => Self::INVALID, + } + } +} + +impl std::fmt::Display for ChecksumType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.base() { + Self::CRC32 => write!(f, "CRC32"), + Self::CRC32C => write!(f, "CRC32C"), + Self::SHA1 => write!(f, "SHA1"), + Self::SHA256 => write!(f, "SHA256"), + Self::CRC64_NVME => write!(f, "CRC64NVME"), + Self::NONE => write!(f, ""), + _ => write!(f, "invalid"), + } + } +} + +/// Base checksum types list +pub const BASE_CHECKSUM_TYPES: &[ChecksumType] = &[ + ChecksumType::SHA256, + ChecksumType::SHA1, + ChecksumType::CRC32, + ChecksumType::CRC64_NVME, + ChecksumType::CRC32C, +]; + +/// Checksum structure containing type and encoded value +#[derive(Debug, Clone, PartialEq, Default)] +pub struct Checksum { + pub checksum_type: ChecksumType, + pub encoded: String, + pub raw: Vec, + pub want_parts: i32, +} + +impl Checksum { + /// Create a new checksum from data + pub fn new_from_data(checksum_type: ChecksumType, data: &[u8]) -> Option { + if !checksum_type.is_set() { + return None; + } + + let mut hasher = checksum_type.hasher()?; + hasher.write_all(data).ok()?; + let raw = hasher.finalize(); + let encoded = general_purpose::STANDARD.encode(&raw); + + let checksum = Checksum { + checksum_type, + encoded, + raw, + want_parts: 0, + }; + + if checksum.valid() { Some(checksum) } else { None } + } + + /// Create a new checksum from algorithm string and base64 value + pub fn new_from_string(alg: &str, value: &str) -> Option { + Self::new_with_type(ChecksumType::from_string(alg), value) + } + + /// Create a new checksum with specific type and value + pub fn new_with_type(mut checksum_type: ChecksumType, value: &str) -> Option { + if !checksum_type.is_set() { + return None; + } + + let mut want_parts = 0; + let value_string; + + // Handle multipart format (value-parts) + if value.contains('-') { + let parts: Vec<&str> = value.split('-').collect(); + if parts.len() != 2 { + return None; + } + value_string = parts[0].to_string(); + want_parts = parts[1].parse().ok()?; + checksum_type = ChecksumType(checksum_type.0 | ChecksumType::MULTIPART.0); + } else { + value_string = value.to_string(); + } + // let raw = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(&value_string).ok()?; + let raw = general_purpose::STANDARD.decode(&value_string).ok()?; + + let checksum = Checksum { + checksum_type, + encoded: value_string, + raw, + want_parts, + }; + + if checksum.valid() { Some(checksum) } else { None } + } + + /// Check if checksum is valid + pub fn valid(&self) -> bool { + if self.checksum_type == ChecksumType::INVALID { + return false; + } + if self.encoded.is_empty() || self.checksum_type.trailing() { + return self.checksum_type.is(ChecksumType::NONE) || self.checksum_type.trailing(); + } + self.checksum_type.raw_byte_len() == self.raw.len() + } + + /// Check if content matches this checksum + pub fn matches(&self, content: &[u8], parts: i32) -> Result<(), ChecksumMismatch> { + if self.encoded.is_empty() { + return Ok(()); + } + + let mut hasher = self.checksum_type.hasher().ok_or_else(|| ChecksumMismatch { + want: self.encoded.clone(), + got: "no hasher available".to_string(), + })?; + + hasher.write_all(content).map_err(|_| ChecksumMismatch { + want: self.encoded.clone(), + got: "write error".to_string(), + })?; + + let sum = hasher.finalize(); + + if self.want_parts > 0 && self.want_parts != parts { + return Err(ChecksumMismatch { + want: format!("{}-{}", self.encoded, self.want_parts), + got: format!("{}-{}", general_purpose::STANDARD.encode(&sum), parts), + }); + } + + if sum != self.raw { + return Err(ChecksumMismatch { + want: self.encoded.clone(), + got: general_purpose::STANDARD.encode(&sum), + }); + } + + Ok(()) + } + + /// Convert checksum to map representation + pub fn as_map(&self) -> Option> { + if !self.valid() { + return None; + } + let mut map = HashMap::new(); + map.insert(self.checksum_type.to_string(), self.encoded.clone()); + Some(map) + } + + pub fn to_bytes(&self, parts: &[u8]) -> Bytes { + self.append_to(Vec::new(), parts).into() + } + + /// Append checksum to byte buffer + pub fn append_to(&self, mut buffer: Vec, parts: &[u8]) -> Vec { + // Encode checksum type as varint + let mut type_bytes = Vec::new(); + encode_varint(&mut type_bytes, self.checksum_type.0 as u64); + buffer.extend_from_slice(&type_bytes); + + // Remove trailing flag when serializing + let crc = self.raw.clone(); + if self.checksum_type.trailing() { + // When serializing, we don't care if it was trailing + } + + if crc.len() != self.checksum_type.raw_byte_len() { + return buffer; + } + + buffer.extend_from_slice(&crc); + + if self.checksum_type.is(ChecksumType::MULTIPART) { + let mut checksums = 0; + if self.want_parts > 0 && !self.checksum_type.is(ChecksumType::INCLUDES_MULTIPART) { + checksums = self.want_parts; + } + + // Ensure we don't divide by 0 + let raw_len = self.checksum_type.raw_byte_len(); + if raw_len == 0 || parts.len() % raw_len != 0 { + checksums = 0; + } else if !parts.is_empty() { + checksums = (parts.len() / raw_len) as i32; + } + + let parts_to_append = if self.checksum_type.is(ChecksumType::INCLUDES_MULTIPART) { + parts + } else { + &[] + }; + + let mut checksums_bytes = Vec::new(); + encode_varint(&mut checksums_bytes, checksums as u64); + buffer.extend_from_slice(&checksums_bytes); + + if !parts_to_append.is_empty() { + buffer.extend_from_slice(parts_to_append); + } + } + + buffer + } + + /// Add a part checksum into the current checksum, as if the content of each was appended. + /// The size of the content that produced the second checksum must be provided. + /// Not all checksum types can be merged, use the can_merge method to check. + /// Checksum types must match. + pub fn add_part(&mut self, other: &Checksum, size: i64) -> Result<(), String> { + if !other.checksum_type.can_merge() { + return Err("checksum type cannot be merged".to_string()); + } + + if size == 0 { + return Ok(()); + } + + if !self.checksum_type.is(other.checksum_type.base()) { + return Err(format!( + "checksum type does not match got {} and {}", + self.checksum_type, other.checksum_type + )); + } + + // If never set, just add first checksum + if self.raw.is_empty() { + self.raw = other.raw.clone(); + self.encoded = other.encoded.clone(); + return Ok(()); + } + + if !self.valid() { + return Err("invalid base checksum".to_string()); + } + + if !other.valid() { + return Err("invalid part checksum".to_string()); + } + + match self.checksum_type.base() { + ChecksumType::CRC32 => { + let crc1 = u32::from_be_bytes([self.raw[0], self.raw[1], self.raw[2], self.raw[3]]); + let crc2 = u32::from_be_bytes([other.raw[0], other.raw[1], other.raw[2], other.raw[3]]); + let combined = crc32_combine(0xEDB88320, crc1, crc2, size); // IEEE polynomial + self.raw = combined.to_be_bytes().to_vec(); + } + ChecksumType::CRC32C => { + let crc1 = u32::from_be_bytes([self.raw[0], self.raw[1], self.raw[2], self.raw[3]]); + let crc2 = u32::from_be_bytes([other.raw[0], other.raw[1], other.raw[2], other.raw[3]]); + let combined = crc32_combine(0x82F63B78, crc1, crc2, size); // Castagnoli polynomial + self.raw = combined.to_be_bytes().to_vec(); + } + ChecksumType::CRC64_NVME => { + let crc1 = u64::from_be_bytes([ + self.raw[0], + self.raw[1], + self.raw[2], + self.raw[3], + self.raw[4], + self.raw[5], + self.raw[6], + self.raw[7], + ]); + let crc2 = u64::from_be_bytes([ + other.raw[0], + other.raw[1], + other.raw[2], + other.raw[3], + other.raw[4], + other.raw[5], + other.raw[6], + other.raw[7], + ]); + let combined = crc64_combine(CRC64_NVME_POLYNOMIAL.reverse_bits(), crc1, crc2, size); + self.raw = combined.to_be_bytes().to_vec(); + } + _ => { + return Err(format!("unknown checksum type: {}", self.checksum_type)); + } + } + + self.encoded = general_purpose::STANDARD.encode(&self.raw); + Ok(()) + } +} + +/// Get content checksum from headers +pub fn get_content_checksum(headers: &HeaderMap) -> Result, std::io::Error> { + // Check for trailing checksums + if let Some(trailer_header) = headers.get("x-amz-trailer") { + let mut result = None; + let trailer_str = trailer_header + .to_str() + .map_err(|_| std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid header value"))?; + let trailing_headers: Vec<&str> = trailer_str.split(',').map(|s| s.trim()).collect(); + + for header in trailing_headers { + let mut duplicates = false; + for &checksum_type in crate::checksum::BASE_CHECKSUM_TYPES { + if let Some(key) = checksum_type.key() { + if header.eq_ignore_ascii_case(key) { + duplicates = result.is_some(); + result = Some(Checksum { + checksum_type: ChecksumType(checksum_type.0 | ChecksumType::TRAILING.0), + encoded: String::new(), + raw: Vec::new(), + want_parts: 0, + }); + } + } + } + if duplicates { + return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid checksum")); + } + } + + if let Some(mut res) = result { + match headers.get("x-amz-checksum-type").and_then(|v| v.to_str().ok()) { + Some("FULL_OBJECT") => { + if !res.checksum_type.can_merge() { + return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid checksum")); + } + res.checksum_type = ChecksumType(res.checksum_type.0 | ChecksumType::FULL_OBJECT.0); + } + Some("COMPOSITE") | Some("") | None => {} + _ => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid checksum")), + } + return Ok(Some(res)); + } + } + + let (checksum_type, value) = get_content_checksum_direct(headers); + if checksum_type == ChecksumType::NONE { + if value.is_empty() { + return Ok(None); + } + return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid checksum")); + } + + let checksum = Checksum::new_with_type(checksum_type, &value); + Ok(checksum) +} + +/// Get content checksum type and value directly from headers +fn get_content_checksum_direct(headers: &HeaderMap) -> (ChecksumType, String) { + let mut checksum_type = ChecksumType::NONE; + + if let Some(alg) = headers.get("x-amz-checksum-algorithm").and_then(|v| v.to_str().ok()) { + checksum_type = ChecksumType::from_string_with_obj_type( + alg, + headers.get("x-amz-checksum-type").and_then(|s| s.to_str().ok()).unwrap_or(""), + ); + + if headers.get("x-amz-checksum-type").and_then(|v| v.to_str().ok()) == Some("FULL_OBJECT") { + if !checksum_type.can_merge() { + return (ChecksumType::INVALID, String::new()); + } + checksum_type = ChecksumType(checksum_type.0 | ChecksumType::FULL_OBJECT.0); + } + + if checksum_type.is_set() { + if let Some(key) = checksum_type.key() { + if let Some(value) = headers.get(key).and_then(|v| v.to_str().ok()) { + return (checksum_type, value.to_string()); + } else { + return (ChecksumType::NONE, String::new()); + } + } + } + return (checksum_type, String::new()); + } + + // Check individual checksum headers + for &ct in crate::checksum::BASE_CHECKSUM_TYPES { + if let Some(key) = ct.key() { + if let Some(value) = headers.get(key).and_then(|v| v.to_str().ok()) { + // If already set, invalid + if checksum_type != ChecksumType::NONE { + return (ChecksumType::INVALID, String::new()); + } + checksum_type = ct; + + if headers.get("x-amz-checksum-type").and_then(|v| v.to_str().ok()) == Some("FULL_OBJECT") { + if !checksum_type.can_merge() { + return (ChecksumType::INVALID, String::new()); + } + checksum_type = ChecksumType(checksum_type.0 | ChecksumType::FULL_OBJECT.0); + } + return (checksum_type, value.to_string()); + } + } + } + + (checksum_type, String::new()) +} + +/// Trait for checksum hashers +pub trait ChecksumHasher: Write + Send + Sync { + fn finalize(&mut self) -> Vec; + fn reset(&mut self); +} + +/// CRC32 IEEE hasher +pub struct Crc32IeeeHasher { + hasher: Crc32Hasher, +} + +impl Default for Crc32IeeeHasher { + fn default() -> Self { + Self::new() + } +} + +impl Crc32IeeeHasher { + pub fn new() -> Self { + Self { + hasher: Crc32Hasher::new(), + } + } +} + +impl Write for Crc32IeeeHasher { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.hasher.update(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl ChecksumHasher for Crc32IeeeHasher { + fn finalize(&mut self) -> Vec { + self.hasher.clone().finalize().to_be_bytes().to_vec() + } + + fn reset(&mut self) { + self.hasher = Crc32Hasher::new(); + } +} + +/// CRC32 Castagnoli hasher +pub struct Crc32CastagnoliHasher { + hasher: crc32fast::Hasher, +} + +impl Default for Crc32CastagnoliHasher { + fn default() -> Self { + Self::new() + } +} + +impl Crc32CastagnoliHasher { + pub fn new() -> Self { + Self { + hasher: crc32fast::Hasher::new_with_initial(0), + } + } +} + +impl Write for Crc32CastagnoliHasher { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.hasher.update(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl ChecksumHasher for Crc32CastagnoliHasher { + fn finalize(&mut self) -> Vec { + self.hasher.clone().finalize().to_be_bytes().to_vec() + } + + fn reset(&mut self) { + self.hasher = crc32fast::Hasher::new_with_initial(0); + } +} + +/// SHA1 hasher +pub struct Sha1Hasher { + hasher: Sha1, +} + +impl Default for Sha1Hasher { + fn default() -> Self { + Self::new() + } +} + +impl Sha1Hasher { + pub fn new() -> Self { + Self { hasher: Sha1::new() } + } +} + +impl Write for Sha1Hasher { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.hasher.update(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl ChecksumHasher for Sha1Hasher { + fn finalize(&mut self) -> Vec { + self.hasher.clone().finalize().to_vec() + } + + fn reset(&mut self) { + self.hasher = Sha1::new(); + } +} + +/// SHA256 hasher +pub struct Sha256Hasher { + hasher: Sha256, +} + +impl Default for Sha256Hasher { + fn default() -> Self { + Self::new() + } +} + +impl Sha256Hasher { + pub fn new() -> Self { + Self { hasher: Sha256::new() } + } +} + +impl Write for Sha256Hasher { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.hasher.update(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl ChecksumHasher for Sha256Hasher { + fn finalize(&mut self) -> Vec { + self.hasher.clone().finalize().to_vec() + } + + fn reset(&mut self) { + self.hasher = Sha256::new(); + } +} + +/// CRC64 NVME hasher +#[derive(Default)] +pub struct Crc64NvmeHasher { + hasher: crc64fast_nvme::Digest, +} + +impl Crc64NvmeHasher { + pub fn new() -> Self { + Self { + hasher: Default::default(), + } + } +} + +impl Write for Crc64NvmeHasher { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.hasher.write(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl ChecksumHasher for Crc64NvmeHasher { + fn finalize(&mut self) -> Vec { + self.hasher.sum64().to_be_bytes().to_vec() + } + + fn reset(&mut self) { + self.hasher = Default::default(); + } +} + +/// Encode unsigned integer as varint +fn encode_varint(buf: &mut Vec, mut value: u64) { + while value >= 0x80 { + buf.push((value as u8) | 0x80); + value >>= 7; + } + buf.push(value as u8); +} + +/// Decode varint from buffer +pub fn decode_varint(buf: &[u8]) -> Option<(u64, usize)> { + let mut result = 0u64; + let mut shift = 0; + let mut pos = 0; + + for &byte in buf { + pos += 1; + result |= ((byte & 0x7F) as u64) << shift; + + if byte & 0x80 == 0 { + return Some((result, pos)); + } + + shift += 7; + if shift >= 64 { + return None; // Overflow + } + } + + None // Incomplete varint +} + +/// Read checksums from byte buffer +pub fn read_checksums(mut buf: &[u8], part: i32) -> (HashMap, bool) { + let mut result = HashMap::new(); + let mut is_multipart = false; + + while !buf.is_empty() { + let (checksum_type_val, n) = match decode_varint(buf) { + Some((val, n)) => (val, n), + None => break, + }; + buf = &buf[n..]; + + let checksum_type = ChecksumType(checksum_type_val as u32); + let length = checksum_type.raw_byte_len(); + + if length == 0 || buf.len() < length { + break; + } + + let checksum_bytes = &buf[..length]; + buf = &buf[length..]; + let mut checksum_str = general_purpose::STANDARD.encode(checksum_bytes); + + if checksum_type.is(ChecksumType::MULTIPART) { + is_multipart = true; + let (parts_count, n) = match decode_varint(buf) { + Some((val, n)) => (val, n), + None => break, + }; + buf = &buf[n..]; + + if !checksum_type.full_object_requested() { + checksum_str = format!("{checksum_str}-{parts_count}"); + } else if part <= 0 { + result.insert("x-amz-checksum-type".to_string(), "FULL_OBJECT".to_string()); + } + + if part > 0 { + checksum_str.clear(); + } + + if checksum_type.is(ChecksumType::INCLUDES_MULTIPART) { + let want_len = parts_count as usize * length; + if buf.len() < want_len { + break; + } + + // Read part checksum + if part > 0 && (part as u64) <= parts_count { + let offset = ((part - 1) as usize) * length; + let part_checksum = &buf[offset..offset + length]; + checksum_str = general_purpose::STANDARD.encode(part_checksum); + } + buf = &buf[want_len..]; + } + } else if part > 1 { + // For non-multipart, checksum is part 1 + checksum_str.clear(); + } + + if !checksum_str.is_empty() { + result.insert(checksum_type.to_string(), checksum_str); + } + } + + (result, is_multipart) +} + +/// Read all part checksums from buffer +pub fn read_part_checksums(mut buf: &[u8]) -> Vec> { + let mut result = Vec::new(); + + while !buf.is_empty() { + let (checksum_type_val, n) = match decode_varint(buf) { + Some((val, n)) => (val, n), + None => break, + }; + buf = &buf[n..]; + + let checksum_type = ChecksumType(checksum_type_val as u32); + let length = checksum_type.raw_byte_len(); + + if length == 0 || buf.len() < length { + break; + } + + // Skip main checksum + buf = &buf[length..]; + + let (parts_count, n) = match decode_varint(buf) { + Some((val, n)) => (val, n), + None => break, + }; + buf = &buf[n..]; + + if !checksum_type.is(ChecksumType::INCLUDES_MULTIPART) { + continue; + } + + if result.is_empty() { + result.resize(parts_count as usize, HashMap::new()); + } + + for part_checksum in result.iter_mut() { + if buf.len() < length { + break; + } + + let checksum_bytes = &buf[..length]; + buf = &buf[length..]; + let checksum_str = general_purpose::STANDARD.encode(checksum_bytes); + + part_checksum.insert(checksum_type.to_string(), checksum_str); + } + } + + result +} + +/// CRC64 NVME polynomial constant +const CRC64_NVME_POLYNOMIAL: u64 = 0xad93d23594c93659; + +/// GF(2) matrix multiplication +fn gf2_matrix_times(mat: &[u64], mut vec: u64) -> u64 { + let mut sum = 0u64; + let mut mat_iter = mat.iter(); + + while vec != 0 { + if vec & 1 != 0 { + if let Some(&m) = mat_iter.next() { + sum ^= m; + } + } + vec >>= 1; + mat_iter.next(); + } + sum +} + +/// Square a GF(2) matrix +fn gf2_matrix_square(square: &mut [u64], mat: &[u64]) { + if square.len() != mat.len() { + panic!("square matrix size mismatch"); + } + + for (i, &m) in mat.iter().enumerate() { + square[i] = gf2_matrix_times(mat, m); + } +} + +/// Combine two CRC32 values +/// +/// Returns the combined CRC-32 hash value of the two passed CRC-32 +/// hash values crc1 and crc2. poly represents the generator polynomial +/// and len2 specifies the byte length that the crc2 hash covers. +fn crc32_combine(poly: u32, crc1: u32, crc2: u32, len2: i64) -> u32 { + // Degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1; + } + + let mut even = [0u64; 32]; // even-power-of-two zeros operator + let mut odd = [0u64; 32]; // odd-power-of-two zeros operator + + // Put operator for one zero bit in odd + odd[0] = poly as u64; // CRC-32 polynomial + let mut row = 1u64; + for (_i, odd_val) in odd.iter_mut().enumerate().skip(1) { + *odd_val = row; + row <<= 1; + } + + // Put operator for two zero bits in even + gf2_matrix_square(&mut even, &odd); + + // Put operator for four zero bits in odd + gf2_matrix_square(&mut odd, &even); + + // Apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + let mut crc1n = crc1 as u64; + let mut len2 = len2; + + loop { + // Apply zeros operator for this bit of len2 + gf2_matrix_square(&mut even, &odd); + if len2 & 1 != 0 { + crc1n = gf2_matrix_times(&even, crc1n); + } + len2 >>= 1; + + // If no more bits set, then done + if len2 == 0 { + break; + } + + // Another iteration of the loop with odd and even swapped + gf2_matrix_square(&mut odd, &even); + if len2 & 1 != 0 { + crc1n = gf2_matrix_times(&odd, crc1n); + } + len2 >>= 1; + + // If no more bits set, then done + if len2 == 0 { + break; + } + } + + // Return combined crc + crc1n ^= crc2 as u64; + crc1n as u32 +} + +/// Combine two CRC64 values +fn crc64_combine(poly: u64, crc1: u64, crc2: u64, len2: i64) -> u64 { + // Degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1; + } + + let mut even = [0u64; 64]; // even-power-of-two zeros operator + let mut odd = [0u64; 64]; // odd-power-of-two zeros operator + + // Put operator for one zero bit in odd + odd[0] = poly; // CRC-64 polynomial + let mut row = 1u64; + for (_i, odd_val) in odd.iter_mut().enumerate().skip(1) { + *odd_val = row; + row <<= 1; + } + + // Put operator for two zero bits in even + gf2_matrix_square(&mut even, &odd); + + // Put operator for four zero bits in odd + gf2_matrix_square(&mut odd, &even); + + // Apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + let mut crc1n = crc1; + let mut len2 = len2; + + loop { + // Apply zeros operator for this bit of len2 + gf2_matrix_square(&mut even, &odd); + if len2 & 1 != 0 { + crc1n = gf2_matrix_times(&even, crc1n); + } + len2 >>= 1; + + // If no more bits set, then done + if len2 == 0 { + break; + } + + // Another iteration of the loop with odd and even swapped + gf2_matrix_square(&mut odd, &even); + if len2 & 1 != 0 { + crc1n = gf2_matrix_times(&odd, crc1n); + } + len2 >>= 1; + + // If no more bits set, then done + if len2 == 0 { + break; + } + } + + // Return combined crc + crc1n ^ crc2 +} diff --git a/crates/rio/src/errors.rs b/crates/rio/src/errors.rs new file mode 100644 index 00000000..7be06b96 --- /dev/null +++ b/crates/rio/src/errors.rs @@ -0,0 +1,73 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use thiserror::Error; + +/// SHA256 mismatch error - when content SHA256 does not match what was sent from client +#[derive(Error, Debug, Clone, PartialEq)] +#[error("Bad sha256: Expected {expected_sha256} does not match calculated {calculated_sha256}")] +pub struct Sha256Mismatch { + pub expected_sha256: String, + pub calculated_sha256: String, +} + +/// Bad digest error - Content-MD5 you specified did not match what we received +#[derive(Error, Debug, Clone, PartialEq)] +#[error("Bad digest: Expected {expected_md5} does not match calculated {calculated_md5}")] +pub struct BadDigest { + pub expected_md5: String, + pub calculated_md5: String, +} + +/// Size too small error - reader size too small +#[derive(Error, Debug, Clone, PartialEq)] +#[error("Size small: got {got}, want {want}")] +pub struct SizeTooSmall { + pub want: i64, + pub got: i64, +} + +/// Size too large error - reader size too large +#[derive(Error, Debug, Clone, PartialEq)] +#[error("Size large: got {got}, want {want}")] +pub struct SizeTooLarge { + pub want: i64, + pub got: i64, +} + +/// Size mismatch error +#[derive(Error, Debug, Clone, PartialEq)] +#[error("Size mismatch: got {got}, want {want}")] +pub struct SizeMismatch { + pub want: i64, + pub got: i64, +} + +/// Checksum mismatch error - when content checksum does not match what was sent from client +#[derive(Error, Debug, Clone, PartialEq)] +#[error("Bad checksum: Want {want} does not match calculated {got}")] +pub struct ChecksumMismatch { + pub want: String, + pub got: String, +} + +/// Invalid checksum error +#[derive(Error, Debug, Clone, PartialEq)] +#[error("invalid checksum")] +pub struct InvalidChecksum; + +/// Check if an error is a checksum mismatch +pub fn is_checksum_mismatch(err: &(dyn std::error::Error + 'static)) -> bool { + err.downcast_ref::().is_some() +} diff --git a/crates/rio/src/etag.rs b/crates/rio/src/etag.rs index bebcacd1..90428a4b 100644 --- a/crates/rio/src/etag.rs +++ b/crates/rio/src/etag.rs @@ -51,6 +51,7 @@ mod tests { use crate::{CompressReader, EncryptReader, EtagReader, HashReader}; use crate::{WarpReader, resolve_etag_generic}; + use md5::Md5; use rustfs_utils::compress::CompressionAlgorithm; use std::io::Cursor; use tokio::io::BufReader; @@ -72,7 +73,7 @@ mod tests { let reader = BufReader::new(Cursor::new(&data[..])); let reader = Box::new(WarpReader::new(reader)); let mut hash_reader = - HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_etag".to_string()), false).unwrap(); + HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_etag".to_string()), None, false).unwrap(); // Test HashReader ETag resolution assert_eq!(resolve_etag_generic(&mut hash_reader), Some("hash_etag".to_string())); @@ -105,20 +106,30 @@ mod tests { assert_eq!(resolve_etag_generic(&mut encrypt_reader), Some("encrypt_etag".to_string())); } - #[test] - fn test_complex_nesting() { + #[tokio::test] + async fn test_complex_nesting() { + use md5::Digest; + use tokio::io::AsyncReadExt; let data = b"test data for complex nesting"; + + let mut hasher = Md5::new(); + hasher.update(data); + let etag = hasher.finalize(); + let etag_hex = hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower); + let reader = BufReader::new(Cursor::new(&data[..])); let reader = Box::new(WarpReader::new(reader)); // Create a complex nested structure: CompressReader>>> - let etag_reader = EtagReader::new(reader, Some("nested_etag".to_string())); + let etag_reader = EtagReader::new(reader, Some(etag_hex.clone())); let key = [0u8; 32]; let nonce = [0u8; 12]; let encrypt_reader = EncryptReader::new(etag_reader, key, nonce); let mut compress_reader = CompressReader::new(encrypt_reader, CompressionAlgorithm::Gzip); + compress_reader.read_to_end(&mut Vec::new()).await.unwrap(); + // Test that nested structure can resolve ETag - assert_eq!(resolve_etag_generic(&mut compress_reader), Some("nested_etag".to_string())); + assert_eq!(resolve_etag_generic(&mut compress_reader), Some(etag_hex)); } #[test] @@ -127,51 +138,80 @@ mod tests { let reader = BufReader::new(Cursor::new(&data[..])); let reader = Box::new(WarpReader::new(reader)); // Create nested structure: CompressReader>> - let hash_reader = - HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_nested_etag".to_string()), false).unwrap(); + let hash_reader = HashReader::new( + reader, + data.len() as i64, + data.len() as i64, + Some("hash_nested_etag".to_string()), + None, + false, + ) + .unwrap(); let mut compress_reader = CompressReader::new(hash_reader, CompressionAlgorithm::Deflate); // Test that nested HashReader can be resolved assert_eq!(resolve_etag_generic(&mut compress_reader), Some("hash_nested_etag".to_string())); } - #[test] - fn test_comprehensive_etag_extraction() { + #[tokio::test] + async fn test_comprehensive_etag_extraction() { + use md5::Digest; + use tokio::io::AsyncReadExt; println!("🔍 Testing comprehensive ETag extraction with real reader types..."); // Test 1: Simple EtagReader let data1 = b"simple test"; + let mut hasher = Md5::new(); + hasher.update(data1); + let etag = hasher.finalize(); + let etag_hex = hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower); let reader1 = BufReader::new(Cursor::new(&data1[..])); let reader1 = Box::new(WarpReader::new(reader1)); - let mut etag_reader = EtagReader::new(reader1, Some("simple_etag".to_string())); - assert_eq!(resolve_etag_generic(&mut etag_reader), Some("simple_etag".to_string())); + let mut etag_reader = EtagReader::new(reader1, Some(etag_hex.clone())); + etag_reader.read_to_end(&mut Vec::new()).await.unwrap(); + assert_eq!(resolve_etag_generic(&mut etag_reader), Some(etag_hex.clone())); // Test 2: HashReader with ETag let data2 = b"hash test"; + let mut hasher = Md5::new(); + hasher.update(data2); + let etag = hasher.finalize(); + let etag_hex = hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower); let reader2 = BufReader::new(Cursor::new(&data2[..])); let reader2 = Box::new(WarpReader::new(reader2)); let mut hash_reader = - HashReader::new(reader2, data2.len() as i64, data2.len() as i64, Some("hash_etag".to_string()), false).unwrap(); - assert_eq!(resolve_etag_generic(&mut hash_reader), Some("hash_etag".to_string())); + HashReader::new(reader2, data2.len() as i64, data2.len() as i64, Some(etag_hex.clone()), None, false).unwrap(); + hash_reader.read_to_end(&mut Vec::new()).await.unwrap(); + assert_eq!(resolve_etag_generic(&mut hash_reader), Some(etag_hex.clone())); // Test 3: Single wrapper - CompressReader let data3 = b"compress test"; + let mut hasher = Md5::new(); + hasher.update(data3); + let etag = hasher.finalize(); + let etag_hex = hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower); let reader3 = BufReader::new(Cursor::new(&data3[..])); let reader3 = Box::new(WarpReader::new(reader3)); - let etag_reader3 = EtagReader::new(reader3, Some("compress_wrapped_etag".to_string())); + let etag_reader3 = EtagReader::new(reader3, Some(etag_hex.clone())); let mut compress_reader = CompressReader::new(etag_reader3, CompressionAlgorithm::Zstd); - assert_eq!(resolve_etag_generic(&mut compress_reader), Some("compress_wrapped_etag".to_string())); + compress_reader.read_to_end(&mut Vec::new()).await.unwrap(); + assert_eq!(resolve_etag_generic(&mut compress_reader), Some(etag_hex.clone())); // Test 4: Double wrapper - CompressReader> let data4 = b"double wrap test"; + let mut hasher = Md5::new(); + hasher.update(data4); + let etag = hasher.finalize(); + let etag_hex = hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower); let reader4 = BufReader::new(Cursor::new(&data4[..])); let reader4 = Box::new(WarpReader::new(reader4)); - let etag_reader4 = EtagReader::new(reader4, Some("double_wrapped_etag".to_string())); + let etag_reader4 = EtagReader::new(reader4, Some(etag_hex.clone())); let key = [1u8; 32]; let nonce = [1u8; 12]; let encrypt_reader4 = EncryptReader::new(etag_reader4, key, nonce); let mut compress_reader4 = CompressReader::new(encrypt_reader4, CompressionAlgorithm::Gzip); - assert_eq!(resolve_etag_generic(&mut compress_reader4), Some("double_wrapped_etag".to_string())); + compress_reader4.read_to_end(&mut Vec::new()).await.unwrap(); + assert_eq!(resolve_etag_generic(&mut compress_reader4), Some(etag_hex.clone())); println!("✅ All ETag extraction methods work correctly!"); println!("✅ Trait-based approach handles recursive unwrapping!"); @@ -195,6 +235,7 @@ mod tests { data.len() as i64, data.len() as i64, Some("real_world_etag".to_string()), + None, false, ) .unwrap(); @@ -239,7 +280,7 @@ mod tests { let data = b"no etag test"; let reader = BufReader::new(Cursor::new(&data[..])); let reader = Box::new(WarpReader::new(reader)); - let mut hash_reader_no_etag = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap(); + let mut hash_reader_no_etag = HashReader::new(reader, data.len() as i64, data.len() as i64, None, None, false).unwrap(); assert_eq!(resolve_etag_generic(&mut hash_reader_no_etag), None); // Test with EtagReader that has None etag diff --git a/crates/rio/src/etag_reader.rs b/crates/rio/src/etag_reader.rs index 187b5ad7..77f2de84 100644 --- a/crates/rio/src/etag_reader.rs +++ b/crates/rio/src/etag_reader.rs @@ -19,6 +19,7 @@ use pin_project_lite::pin_project; use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{AsyncRead, ReadBuf}; +use tracing::error; pin_project! { pub struct EtagReader { @@ -43,7 +44,8 @@ impl EtagReader { /// Get the final md5 value (etag) as a hex string, only compute once. /// Can be called multiple times, always returns the same result after finished. pub fn get_etag(&mut self) -> String { - format!("{:x}", self.md5.clone().finalize()) + let etag = self.md5.clone().finalize().to_vec(); + hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower) } } @@ -60,8 +62,10 @@ impl AsyncRead for EtagReader { // EOF *this.finished = true; if let Some(checksum) = this.checksum { - let etag = format!("{:x}", this.md5.clone().finalize()); - if *checksum != etag { + let etag = this.md5.clone().finalize().to_vec(); + let etag_hex = hex_simd::encode_to_string(etag, hex_simd::AsciiCase::Lower); + if *checksum != etag_hex { + error!("Checksum mismatch, expected={:?}, actual={:?}", checksum, etag_hex); return Poll::Ready(Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Checksum mismatch"))); } } @@ -214,7 +218,7 @@ mod tests { let data = b"checksum test data"; let mut hasher = Md5::new(); hasher.update(data); - let expected = format!("{:x}", hasher.finalize()); + let expected = hex_simd::encode_to_string(hasher.finalize(), hex_simd::AsciiCase::Lower); let reader = BufReader::new(&data[..]); let reader = Box::new(WarpReader::new(reader)); let mut etag_reader = EtagReader::new(reader, Some(expected.clone())); @@ -233,7 +237,7 @@ mod tests { let wrong_checksum = "deadbeefdeadbeefdeadbeefdeadbeef".to_string(); let reader = BufReader::new(&data[..]); let reader = Box::new(WarpReader::new(reader)); - let mut etag_reader = EtagReader::new(reader, Some(wrong_checksum)); + let mut etag_reader = EtagReader::new(reader, Some(wrong_checksum.clone())); let mut buf = Vec::new(); // Verification failed, should return InvalidData error diff --git a/crates/rio/src/hash_reader.rs b/crates/rio/src/hash_reader.rs index a694b0ae..dbfdbaf0 100644 --- a/crates/rio/src/hash_reader.rs +++ b/crates/rio/src/hash_reader.rs @@ -50,7 +50,7 @@ //! let diskable_md5 = false; //! //! // Method 1: Simple creation (recommended for most cases) -//! let hash_reader = HashReader::new(reader, size, actual_size, etag.clone(), diskable_md5).unwrap(); +//! let hash_reader = HashReader::new(reader, size, actual_size, etag.clone(), None, diskable_md5).unwrap(); //! //! // Method 2: With manual wrapping to recreate original logic //! let reader2 = BufReader::new(Cursor::new(&data[..])); @@ -71,7 +71,7 @@ //! // No wrapping needed //! reader2 //! }; -//! let hash_reader2 = HashReader::new(wrapped_reader, size, actual_size, etag, diskable_md5).unwrap(); +//! let hash_reader2 = HashReader::new(wrapped_reader, size, actual_size, etag.clone(), None, diskable_md5).unwrap(); //! # }); //! ``` //! @@ -88,28 +88,43 @@ //! # tokio_test::block_on(async { //! let data = b"test"; //! let reader = BufReader::new(Cursor::new(&data[..])); -//! let hash_reader = HashReader::new(Box::new(WarpReader::new(reader)), 4, 4, None, false).unwrap(); +//! let hash_reader = HashReader::new(Box::new(WarpReader::new(reader)), 4, 4, None, None,false).unwrap(); //! //! // Check if a type is a HashReader //! assert!(hash_reader.is_hash_reader()); //! //! // Use new for compatibility (though it's simpler to use new() directly) //! let reader2 = BufReader::new(Cursor::new(&data[..])); -//! let result = HashReader::new(Box::new(WarpReader::new(reader2)), 4, 4, None, false); +//! let result = HashReader::new(Box::new(WarpReader::new(reader2)), 4, 4, None, None, false); //! assert!(result.is_ok()); //! # }); //! ``` +use crate::Checksum; +use crate::ChecksumHasher; +use crate::ChecksumType; +use crate::Sha256Hasher; +use crate::compress_index::{Index, TryGetIndex}; +use crate::get_content_checksum; +use crate::{EtagReader, EtagResolvable, HardLimitReader, HashReaderDetector, Reader, WarpReader}; +use base64::Engine; +use base64::engine::general_purpose; +use http::HeaderMap; use pin_project_lite::pin_project; +use s3s::TrailingHeaders; +use std::collections::HashMap; +use std::io::Cursor; +use std::io::Write; +use std::mem; use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{AsyncRead, ReadBuf}; - -use crate::compress_index::{Index, TryGetIndex}; -use crate::{EtagReader, EtagResolvable, HardLimitReader, HashReaderDetector, Reader}; +use tracing::error; /// Trait for mutable operations on HashReader pub trait HashReaderMut { + fn into_inner(self) -> Box; + fn take_inner(&mut self) -> Box; fn bytes_read(&self) -> u64; fn checksum(&self) -> &Option; fn set_checksum(&mut self, checksum: Option); @@ -117,6 +132,10 @@ pub trait HashReaderMut { fn set_size(&mut self, size: i64); fn actual_size(&self) -> i64; fn set_actual_size(&mut self, actual_size: i64); + fn content_hash(&self) -> &Option; + fn content_sha256(&self) -> &Option; + fn get_trailer(&self) -> Option<&TrailingHeaders>; + fn set_trailer(&mut self, trailer: Option); } pin_project! { @@ -129,7 +148,14 @@ pin_project! { pub actual_size: i64, pub diskable_md5: bool, bytes_read: u64, - // TODO: content_hash + content_hash: Option, + content_hasher: Option>, + content_sha256: Option, + content_sha256_hasher: Option, + checksum_on_finish: bool, + + trailer_s3s: Option, + } } @@ -139,7 +165,8 @@ impl HashReader { mut inner: Box, size: i64, actual_size: i64, - md5: Option, + md5hex: Option, + sha256hex: Option, diskable_md5: bool, ) -> std::io::Result { // Check if it's already a HashReader and update its parameters @@ -152,7 +179,7 @@ impl HashReader { } if let Some(checksum) = existing_hash_reader.checksum() { - if let Some(ref md5) = md5 { + if let Some(ref md5) = md5hex { if checksum != md5 { return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "HashReader checksum mismatch")); } @@ -166,7 +193,7 @@ impl HashReader { )); } - existing_hash_reader.set_checksum(md5.clone()); + existing_hash_reader.set_checksum(md5hex.clone()); if existing_hash_reader.size() < 0 && size >= 0 { existing_hash_reader.set_size(size); @@ -176,13 +203,29 @@ impl HashReader { existing_hash_reader.set_actual_size(actual_size); } + let size = existing_hash_reader.size(); + let actual_size = existing_hash_reader.actual_size(); + let content_hash = existing_hash_reader.content_hash().clone(); + let content_hasher = existing_hash_reader + .content_hash() + .clone() + .map(|hash| hash.checksum_type.hasher().unwrap()); + let content_sha256 = existing_hash_reader.content_sha256().clone(); + let content_sha256_hasher = existing_hash_reader.content_sha256().clone().map(|_| Sha256Hasher::new()); + let inner = existing_hash_reader.take_inner(); return Ok(Self { inner, size, - checksum: md5, + checksum: md5hex.clone(), actual_size, diskable_md5, bytes_read: 0, + content_sha256, + content_sha256_hasher, + content_hash, + content_hasher, + checksum_on_finish: false, + trailer_s3s: existing_hash_reader.get_trailer().cloned(), }); } @@ -190,23 +233,33 @@ impl HashReader { let hr = HardLimitReader::new(inner, size); inner = Box::new(hr); if !diskable_md5 && !inner.is_hash_reader() { - let er = EtagReader::new(inner, md5.clone()); + let er = EtagReader::new(inner, md5hex.clone()); inner = Box::new(er); } } else if !diskable_md5 { - let er = EtagReader::new(inner, md5.clone()); + let er = EtagReader::new(inner, md5hex.clone()); inner = Box::new(er); } Ok(Self { inner, size, - checksum: md5, + checksum: md5hex, actual_size, diskable_md5, bytes_read: 0, + content_hash: None, + content_hasher: None, + content_sha256: sha256hex.clone(), + content_sha256_hasher: sha256hex.clone().map(|_| Sha256Hasher::new()), + checksum_on_finish: false, + trailer_s3s: None, }) } + pub fn into_inner(self) -> Box { + self.inner + } + /// Update HashReader parameters pub fn update_params(&mut self, size: i64, actual_size: i64, etag: Option) { if self.size < 0 && size >= 0 { @@ -228,9 +281,112 @@ impl HashReader { pub fn actual_size(&self) -> i64 { self.actual_size } + + pub fn add_checksum_from_s3s( + &mut self, + headers: &HeaderMap, + trailing_headers: Option, + ignore_value: bool, + ) -> Result<(), std::io::Error> { + let cs = get_content_checksum(headers)?; + + if ignore_value { + return Ok(()); + } + + if let Some(checksum) = cs { + if checksum.checksum_type.trailing() { + self.trailer_s3s = trailing_headers.clone(); + } + + self.content_hash = Some(checksum.clone()); + + return self.add_non_trailing_checksum(Some(checksum), ignore_value); + } + + Ok(()) + } + + pub fn add_checksum_no_trailer(&mut self, header: &HeaderMap, ignore_value: bool) -> Result<(), std::io::Error> { + let cs = get_content_checksum(header)?; + + if let Some(checksum) = cs { + self.content_hash = Some(checksum.clone()); + + return self.add_non_trailing_checksum(Some(checksum), ignore_value); + } + Ok(()) + } + + pub fn add_non_trailing_checksum(&mut self, checksum: Option, ignore_value: bool) -> Result<(), std::io::Error> { + if let Some(checksum) = checksum { + self.content_hash = Some(checksum.clone()); + + if ignore_value { + return Ok(()); + } + + if let Some(hasher) = checksum.checksum_type.hasher() { + self.content_hasher = Some(hasher); + } else { + return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid checksum type")); + } + } + Ok(()) + } + + pub fn checksum(&self) -> Option { + if self + .content_hash + .as_ref() + .is_none_or(|v| !v.checksum_type.is_set() || !v.valid()) + { + return None; + } + self.content_hash.clone() + } + pub fn content_crc_type(&self) -> Option { + self.content_hash.as_ref().map(|v| v.checksum_type) + } + + pub fn content_crc(&self) -> HashMap { + let mut map = HashMap::new(); + if let Some(checksum) = self.content_hash.as_ref() { + if !checksum.valid() || checksum.checksum_type.is(ChecksumType::NONE) { + return map; + } + + if checksum.checksum_type.trailing() { + if let Some(trailer) = self.trailer_s3s.as_ref() { + if let Some(Some(checksum_str)) = trailer.read(|headers| { + headers + .get(checksum.checksum_type.to_string()) + .and_then(|value| value.to_str().ok().map(|s| s.to_string())) + }) { + map.insert(checksum.checksum_type.to_string(), checksum_str); + } + } + return map; + } + + map.insert(checksum.checksum_type.to_string(), checksum.encoded.clone()); + + return map; + } + map + } } impl HashReaderMut for HashReader { + fn into_inner(self) -> Box { + self.inner + } + + fn take_inner(&mut self) -> Box { + // Replace inner with an empty reader to move it out safely while keeping self valid + mem::replace(&mut self.inner, Box::new(WarpReader::new(Cursor::new(Vec::new())))) + } + fn bytes_read(&self) -> u64 { self.bytes_read } @@ -258,22 +414,105 @@ impl HashReaderMut for HashReader { fn set_actual_size(&mut self, actual_size: i64) { self.actual_size = actual_size; } + + fn content_hash(&self) -> &Option { + &self.content_hash + } + + fn content_sha256(&self) -> &Option { + &self.content_sha256 + } + + fn get_trailer(&self) -> Option<&TrailingHeaders> { + self.trailer_s3s.as_ref() + } + + fn set_trailer(&mut self, trailer: Option) { + self.trailer_s3s = trailer; + } } impl AsyncRead for HashReader { fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { let this = self.project(); - let poll = this.inner.poll_read(cx, buf); - if let Poll::Ready(Ok(())) = &poll { - let filled = buf.filled().len(); - *this.bytes_read += filled as u64; - if filled == 0 { - // EOF - // TODO: check content_hash + let before = buf.filled().len(); + match this.inner.poll_read(cx, buf) { + Poll::Pending => Poll::Pending, + Poll::Ready(Ok(())) => { + let data = &buf.filled()[before..]; + let filled = data.len(); + + *this.bytes_read += filled as u64; + + if filled > 0 { + // Update SHA256 hasher + if let Some(hasher) = this.content_sha256_hasher { + if let Err(e) = hasher.write_all(data) { + error!("SHA256 hasher write error, error={:?}", e); + return Poll::Ready(Err(std::io::Error::other(e))); + } + } + + // Update content hasher + if let Some(hasher) = this.content_hasher { + if let Err(e) = hasher.write_all(data) { + return Poll::Ready(Err(std::io::Error::other(e))); + } + } + } + + if filled == 0 && !*this.checksum_on_finish { + // check SHA256 + if let (Some(hasher), Some(expected_sha256)) = (this.content_sha256_hasher, this.content_sha256) { + let sha256 = hex_simd::encode_to_string(hasher.finalize(), hex_simd::AsciiCase::Lower); + if sha256 != *expected_sha256 { + error!("SHA256 mismatch, expected={:?}, actual={:?}", expected_sha256, sha256); + return Poll::Ready(Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "SHA256 mismatch"))); + } + } + + // check content hasher + if let (Some(hasher), Some(expected_content_hash)) = (this.content_hasher, this.content_hash) { + if expected_content_hash.checksum_type.trailing() { + if let Some(trailer) = this.trailer_s3s.as_ref() { + if let Some(Some(checksum_str)) = trailer.read(|headers| { + expected_content_hash.checksum_type.key().and_then(|key| { + headers.get(key).and_then(|value| value.to_str().ok().map(|s| s.to_string())) + }) + }) { + expected_content_hash.encoded = checksum_str; + expected_content_hash.raw = general_purpose::STANDARD + .decode(&expected_content_hash.encoded) + .map_err(|_| std::io::Error::other("Invalid base64 checksum"))?; + + if expected_content_hash.raw.is_empty() { + return Poll::Ready(Err(std::io::Error::other("Content hash mismatch"))); + } + } + } + } else { + let content_hash = hasher.finalize(); + if content_hash != expected_content_hash.raw { + error!( + "Content hash mismatch, expected={:?}, actual={:?}", + hex_simd::encode_to_string(&expected_content_hash.raw, hex_simd::AsciiCase::Lower), + hex_simd::encode_to_string(content_hash, hex_simd::AsciiCase::Lower) + ); + return Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Content hash mismatch", + ))); + } + } + } + + *this.checksum_on_finish = true; + } + Poll::Ready(Ok(())) } + Poll::Ready(Err(e)) => Poll::Ready(Err(e)), } - poll } } @@ -323,7 +562,7 @@ mod tests { // Test 1: Simple creation let reader1 = BufReader::new(Cursor::new(&data[..])); let reader1 = Box::new(WarpReader::new(reader1)); - let hash_reader1 = HashReader::new(reader1, size, actual_size, etag.clone(), false).unwrap(); + let hash_reader1 = HashReader::new(reader1, size, actual_size, etag.clone(), None, false).unwrap(); assert_eq!(hash_reader1.size(), size); assert_eq!(hash_reader1.actual_size(), actual_size); @@ -332,7 +571,7 @@ mod tests { let reader2 = Box::new(WarpReader::new(reader2)); let hard_limit = HardLimitReader::new(reader2, size); let hard_limit = Box::new(hard_limit); - let hash_reader2 = HashReader::new(hard_limit, size, actual_size, etag.clone(), false).unwrap(); + let hash_reader2 = HashReader::new(hard_limit, size, actual_size, etag.clone(), None, false).unwrap(); assert_eq!(hash_reader2.size(), size); assert_eq!(hash_reader2.actual_size(), actual_size); @@ -341,7 +580,7 @@ mod tests { let reader3 = Box::new(WarpReader::new(reader3)); let etag_reader = EtagReader::new(reader3, etag.clone()); let etag_reader = Box::new(etag_reader); - let hash_reader3 = HashReader::new(etag_reader, size, actual_size, etag.clone(), false).unwrap(); + let hash_reader3 = HashReader::new(etag_reader, size, actual_size, etag.clone(), None, false).unwrap(); assert_eq!(hash_reader3.size(), size); assert_eq!(hash_reader3.actual_size(), actual_size); } @@ -351,7 +590,7 @@ mod tests { let data = b"hello hashreader"; let reader = BufReader::new(Cursor::new(&data[..])); let reader = Box::new(WarpReader::new(reader)); - let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap(); + let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, None, false).unwrap(); let mut buf = Vec::new(); let _ = hash_reader.read_to_end(&mut buf).await.unwrap(); // Since we removed EtagReader integration, etag might be None @@ -365,7 +604,7 @@ mod tests { let data = b"no etag"; let reader = BufReader::new(Cursor::new(&data[..])); let reader = Box::new(WarpReader::new(reader)); - let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, true).unwrap(); + let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, None, true).unwrap(); let mut buf = Vec::new(); let _ = hash_reader.read_to_end(&mut buf).await.unwrap(); // Etag should be None when diskable_md5 is true @@ -381,10 +620,17 @@ mod tests { let reader = Box::new(WarpReader::new(reader)); // Create a HashReader first let hash_reader = - HashReader::new(reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false).unwrap(); + HashReader::new(reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), None, false).unwrap(); let hash_reader = Box::new(WarpReader::new(hash_reader)); // Now try to create another HashReader from the existing one using new - let result = HashReader::new(hash_reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false); + let result = HashReader::new( + hash_reader, + data.len() as i64, + data.len() as i64, + Some("test_etag".to_string()), + None, + false, + ); assert!(result.is_ok()); let final_reader = result.unwrap(); @@ -422,7 +668,7 @@ mod tests { let reader = Box::new(WarpReader::new(reader)); // Create HashReader - let mut hr = HashReader::new(reader, size, actual_size, Some(expected.clone()), false).unwrap(); + let mut hr = HashReader::new(reader, size, actual_size, Some(expected.clone()), None, false).unwrap(); // If compression is enabled, compress data first let compressed_data = if is_compress { @@ -518,7 +764,7 @@ mod tests { let reader = BufReader::new(Cursor::new(data.clone())); let reader = Box::new(WarpReader::new(reader)); - let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap(); + let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, None, false).unwrap(); // Test compression let compress_reader = CompressReader::new(hash_reader, CompressionAlgorithm::Gzip); @@ -564,7 +810,7 @@ mod tests { let reader = BufReader::new(Cursor::new(data.clone())); let reader = Box::new(WarpReader::new(reader)); - let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap(); + let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, None, false).unwrap(); // Compress let compress_reader = CompressReader::new(hash_reader, algorithm); diff --git a/crates/rio/src/lib.rs b/crates/rio/src/lib.rs index aeb73e60..2d6738e4 100644 --- a/crates/rio/src/lib.rs +++ b/crates/rio/src/lib.rs @@ -34,6 +34,11 @@ pub use hardlimit_reader::HardLimitReader; mod hash_reader; pub use hash_reader::*; +mod checksum; +pub use checksum::*; + +mod errors; +pub use errors::*; pub mod reader; pub use reader::WarpReader; diff --git a/crates/signer/Cargo.toml b/crates/signer/Cargo.toml index 6613ab6e..646ed353 100644 --- a/crates/signer/Cargo.toml +++ b/crates/signer/Cargo.toml @@ -34,6 +34,7 @@ hyper.workspace = true serde_urlencoded.workspace = true rustfs-utils = { workspace = true, features = ["full"] } s3s.workspace = true +base64-simd.workspace = true [dev-dependencies] diff --git a/crates/signer/src/request_signature_v2.rs b/crates/signer/src/request_signature_v2.rs index 17666705..64c4407e 100644 --- a/crates/signer/src/request_signature_v2.rs +++ b/crates/signer/src/request_signature_v2.rs @@ -20,7 +20,7 @@ use std::fmt::Write; use time::{OffsetDateTime, format_description}; use super::utils::get_host_addr; -use rustfs_utils::crypto::{base64_encode, hex, hmac_sha1}; +use rustfs_utils::crypto::{hex, hmac_sha1}; use s3s::Body; const _SIGN_V4_ALGORITHM: &str = "AWS4-HMAC-SHA256"; @@ -111,7 +111,11 @@ pub fn sign_v2( } let auth_header = format!("{SIGN_V2_ALGORITHM} {access_key_id}:"); - let auth_header = format!("{}{}", auth_header, base64_encode(&hmac_sha1(secret_access_key, string_to_sign))); + let auth_header = format!( + "{}{}", + auth_header, + base64_simd::URL_SAFE_NO_PAD.encode_to_string(hmac_sha1(secret_access_key, string_to_sign)) + ); headers.insert("Authorization", auth_header.parse().unwrap()); diff --git a/crates/utils/src/crypto.rs b/crates/utils/src/crypto.rs index d29011a2..720d01cf 100644 --- a/crates/utils/src/crypto.rs +++ b/crates/utils/src/crypto.rs @@ -17,11 +17,11 @@ use std::mem::MaybeUninit; use hex_simd::{AsOut, AsciiCase}; use hyper::body::Bytes; -pub fn base64_encode(input: &[u8]) -> String { +pub fn base64_encode_url_safe_no_pad(input: &[u8]) -> String { base64_simd::URL_SAFE_NO_PAD.encode_to_string(input) } -pub fn base64_decode(input: &[u8]) -> Result, base64_simd::Error> { +pub fn base64_decode_url_safe_no_pad(input: &[u8]) -> Result, base64_simd::Error> { base64_simd::URL_SAFE_NO_PAD.decode_to_vec(input) } @@ -89,11 +89,11 @@ pub fn hex_sha256_chunk(chunk: &[Bytes], f: impl FnOnce(&str) -> R) -> R { fn test_base64_encoding_decoding() { let original_uuid_timestamp = "c0194290-d911-45cb-8e12-79ec563f46a8x1735460504394878000"; - let encoded_string = base64_encode(original_uuid_timestamp.as_bytes()); + let encoded_string = base64_encode_url_safe_no_pad(original_uuid_timestamp.as_bytes()); println!("Encoded: {}", &encoded_string); - let decoded_bytes = base64_decode(encoded_string.clone().as_bytes()).unwrap(); + let decoded_bytes = base64_decode_url_safe_no_pad(encoded_string.clone().as_bytes()).unwrap(); let decoded_string = String::from_utf8(decoded_bytes).unwrap(); assert_eq!(decoded_string, original_uuid_timestamp) diff --git a/crates/utils/src/http/headers.rs b/crates/utils/src/http/headers.rs index afe55497..82cd5efa 100644 --- a/crates/utils/src/http/headers.rs +++ b/crates/utils/src/http/headers.rs @@ -176,6 +176,7 @@ pub const RUSTFS_BUCKET_REPLICATION_DELETE_MARKER: &str = "X-Rustfs-Source-Delet pub const RUSTFS_BUCKET_REPLICATION_PROXY_REQUEST: &str = "X-Rustfs-Source-Proxy-Request"; pub const RUSTFS_BUCKET_REPLICATION_REQUEST: &str = "X-Rustfs-Source-Replication-Request"; pub const RUSTFS_BUCKET_REPLICATION_CHECK: &str = "X-Rustfs-Source-Replication-Check"; +pub const RUSTFS_BUCKET_REPLICATION_SSEC_CHECKSUM: &str = "X-Rustfs-Source-Replication-Ssec-Crc"; // SSEC encryption header constants pub const SSEC_ALGORITHM_HEADER: &str = "x-amz-server-side-encryption-customer-algorithm"; diff --git a/crates/utils/src/notify/net.rs b/crates/utils/src/notify/net.rs index 32fcbac8..807d9442 100644 --- a/crates/utils/src/notify/net.rs +++ b/crates/utils/src/notify/net.rs @@ -186,7 +186,7 @@ impl std::fmt::Display for ParsedURL { s.pop(); } - write!(f, "{}", s) + write!(f, "{s}") } } diff --git a/rustfs/Cargo.toml b/rustfs/Cargo.toml index f0e4436e..f57ec0ed 100644 --- a/rustfs/Cargo.toml +++ b/rustfs/Cargo.toml @@ -120,7 +120,8 @@ url = { workspace = true } urlencoding = { workspace = true } uuid = { workspace = true } zip = { workspace = true } - +base64-simd.workspace = true +hex-simd.workspace = true [target.'cfg(any(target_os = "macos", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies] sysctl = { workspace = true } diff --git a/rustfs/src/admin/handlers/kms_dynamic.rs b/rustfs/src/admin/handlers/kms_dynamic.rs index 7b027ad1..57b4e595 100644 --- a/rustfs/src/admin/handlers/kms_dynamic.rs +++ b/rustfs/src/admin/handlers/kms_dynamic.rs @@ -65,7 +65,7 @@ impl Operation for ConfigureKmsHandler { Ok(req) => req, Err(e) => { error!("Invalid JSON in configure request: {}", e); - return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from(format!("Invalid JSON: {}", e))))); + return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from(format!("Invalid JSON: {e}"))))); } } }; @@ -92,7 +92,7 @@ impl Operation for ConfigureKmsHandler { (true, "KMS configured successfully".to_string(), status) } Err(e) => { - let error_msg = format!("Failed to configure KMS: {}", e); + let error_msg = format!("Failed to configure KMS: {e}"); error!("{}", error_msg); let status = service_manager.get_status().await; (false, error_msg, status) @@ -155,7 +155,7 @@ impl Operation for StartKmsHandler { Ok(req) => req, Err(e) => { error!("Invalid JSON in start request: {}", e); - return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from(format!("Invalid JSON: {}", e))))); + return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from(format!("Invalid JSON: {e}"))))); } } }; @@ -205,14 +205,14 @@ impl Operation for StartKmsHandler { (true, "KMS service restarted successfully".to_string(), status) } Err(e) => { - let error_msg = format!("Failed to restart KMS service: {}", e); + let error_msg = format!("Failed to restart KMS service: {e}"); error!("{}", error_msg); let status = service_manager.get_status().await; (false, error_msg, status) } }, Err(e) => { - let error_msg = format!("Failed to stop KMS service for restart: {}", e); + let error_msg = format!("Failed to stop KMS service for restart: {e}"); error!("{}", error_msg); let status = service_manager.get_status().await; (false, error_msg, status) @@ -227,7 +227,7 @@ impl Operation for StartKmsHandler { (true, "KMS service started successfully".to_string(), status) } Err(e) => { - let error_msg = format!("Failed to start KMS service: {}", e); + let error_msg = format!("Failed to start KMS service: {e}"); error!("{}", error_msg); let status = service_manager.get_status().await; (false, error_msg, status) @@ -296,7 +296,7 @@ impl Operation for StopKmsHandler { (true, "KMS service stopped successfully".to_string(), status) } Err(e) => { - let error_msg = format!("Failed to stop KMS service: {}", e); + let error_msg = format!("Failed to stop KMS service: {e}"); error!("{}", error_msg); let status = service_manager.get_status().await; (false, error_msg, status) @@ -436,7 +436,7 @@ impl Operation for ReconfigureKmsHandler { Ok(req) => req, Err(e) => { error!("Invalid JSON in reconfigure request: {}", e); - return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from(format!("Invalid JSON: {}", e))))); + return Ok(S3Response::new((StatusCode::BAD_REQUEST, Body::from(format!("Invalid JSON: {e}"))))); } } }; @@ -463,7 +463,7 @@ impl Operation for ReconfigureKmsHandler { (true, "KMS reconfigured and restarted successfully".to_string(), status) } Err(e) => { - let error_msg = format!("Failed to reconfigure KMS: {}", e); + let error_msg = format!("Failed to reconfigure KMS: {e}"); error!("{}", error_msg); let status = service_manager.get_status().await; (false, error_msg, status) diff --git a/rustfs/src/admin/handlers/kms_keys.rs b/rustfs/src/admin/handlers/kms_keys.rs index 32eaaff9..3b52841a 100644 --- a/rustfs/src/admin/handlers/kms_keys.rs +++ b/rustfs/src/admin/handlers/kms_keys.rs @@ -160,7 +160,7 @@ impl Operation for CreateKmsKeyHandler { error!("Failed to create KMS key: {}", e); let response = CreateKmsKeyResponse { success: false, - message: format!("Failed to create key: {}", e), + message: format!("Failed to create key: {e}"), key_id: "".to_string(), key_metadata: None, }; @@ -310,7 +310,7 @@ impl Operation for DeleteKmsKeyHandler { }; let response = DeleteKmsKeyResponse { success: false, - message: format!("Failed to delete key: {}", e), + message: format!("Failed to delete key: {e}"), key_id: request.key_id, deletion_date: None, }; @@ -442,7 +442,7 @@ impl Operation for CancelKmsKeyDeletionHandler { error!("Failed to cancel deletion for KMS key {}: {}", request.key_id, e); let response = CancelKmsKeyDeletionResponse { success: false, - message: format!("Failed to cancel key deletion: {}", e), + message: format!("Failed to cancel key deletion: {e}"), key_id: request.key_id, key_metadata: None, }; @@ -554,7 +554,7 @@ impl Operation for ListKmsKeysHandler { error!("Failed to list KMS keys: {}", e); let response = ListKmsKeysResponse { success: false, - message: format!("Failed to list keys: {}", e), + message: format!("Failed to list keys: {e}"), keys: vec![], truncated: false, next_marker: None, @@ -671,7 +671,7 @@ impl Operation for DescribeKmsKeyHandler { let response = DescribeKmsKeyResponse { success: false, - message: format!("Failed to describe key: {}", e), + message: format!("Failed to describe key: {e}"), key_metadata: None, }; diff --git a/rustfs/src/admin/handlers/sts.rs b/rustfs/src/admin/handlers/sts.rs index 5d487df4..90a3a050 100644 --- a/rustfs/src/admin/handlers/sts.rs +++ b/rustfs/src/admin/handlers/sts.rs @@ -21,7 +21,6 @@ use matchit::Params; use rustfs_ecstore::bucket::utils::serialize; use rustfs_iam::{manager::get_token_signing_key, sys::SESSION_POLICY_NAME}; use rustfs_policy::{auth::get_new_credentials_with_metadata, policy::Policy}; -use rustfs_utils::crypto::base64_encode; use s3s::{ Body, S3Error, S3ErrorCode, S3Request, S3Response, S3Result, dto::{AssumeRoleOutput, Credentials, Timestamp}, @@ -175,7 +174,10 @@ pub fn populate_session_policy(claims: &mut HashMap, policy: &str return Err(s3_error!(InvalidRequest, "policy too large")); } - claims.insert(SESSION_POLICY_NAME.to_string(), serde_json::Value::String(base64_encode(&policy_buf))); + claims.insert( + SESSION_POLICY_NAME.to_string(), + serde_json::Value::String(base64_simd::URL_SAFE_NO_PAD.encode_to_string(&policy_buf)), + ); } Ok(()) diff --git a/rustfs/src/server/http.rs b/rustfs/src/server/http.rs index a916c45d..93090abd 100644 --- a/rustfs/src/server/http.rs +++ b/rustfs/src/server/http.rs @@ -160,7 +160,7 @@ pub async fn start_http_server( let api_endpoints = format!("http://{local_ip}:{server_port}"); let localhost_endpoint = format!("http://127.0.0.1:{server_port}"); info!(" API: {} {}", api_endpoints, localhost_endpoint); - println!(" API: {} {}", api_endpoints, localhost_endpoint); + println!(" API: {api_endpoints} {localhost_endpoint}"); info!(" RootUser: {}", opt.access_key.clone()); info!(" RootPass: {}", opt.secret_key.clone()); if DEFAULT_ACCESS_KEY.eq(&opt.access_key) && DEFAULT_SECRET_KEY.eq(&opt.secret_key) { @@ -675,9 +675,8 @@ pub(crate) fn get_tokio_runtime_builder() -> tokio::runtime::Builder { builder.thread_name(thread_name.clone()); println!( "Starting Tokio runtime with configured parameters:\n\ - worker_threads: {}, max_blocking_threads: {}, thread_stack_size: {}, thread_keep_alive: {}, \ - global_queue_interval: {}, thread_name: {}", - worker_threads, max_blocking_threads, thread_stack_size, thread_keep_alive, global_queue_interval, thread_name + worker_threads: {worker_threads}, max_blocking_threads: {max_blocking_threads}, thread_stack_size: {thread_stack_size}, thread_keep_alive: {thread_keep_alive}, \ + global_queue_interval: {global_queue_interval}, thread_name: {thread_name}" ); builder } diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index 759e37c6..fdd3c96c 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -41,8 +41,8 @@ use rustfs_ecstore::{ object_lock::objectlock_sys::BucketObjectLockSys, policy_sys::PolicySys, replication::{ - DeletedObjectReplicationInfo, REPLICATE_INCOMING_DELETE, ReplicationConfigurationExt, check_replicate_delete, - get_must_replicate_options, must_replicate, schedule_replication, schedule_replication_delete, + DeletedObjectReplicationInfo, ReplicationConfigurationExt, check_replicate_delete, get_must_replicate_options, + must_replicate, schedule_replication, schedule_replication_delete, }, tagging::{decode_tags, encode_tags}, utils::serialize, @@ -71,6 +71,7 @@ use rustfs_ecstore::{ // RESERVED_METADATA_PREFIX, }, }; +use rustfs_filemeta::REPLICATE_INCOMING_DELETE; use rustfs_filemeta::{ReplicationStatusType, ReplicationType, VersionPurgeStatusType, fileinfo::ObjectPartInfo}; use rustfs_kms::{ DataKey, @@ -95,6 +96,10 @@ use rustfs_targets::{ EventName, arn::{TargetID, TargetIDError}, }; +use rustfs_utils::http::{ + AMZ_CHECKSUM_MODE, AMZ_CHECKSUM_TYPE, AMZ_CONTENT_SHA256, AMZ_META_UNENCRYPTED_CONTENT_LENGTH, + AMZ_META_UNENCRYPTED_CONTENT_MD5, +}; use rustfs_utils::{ CompressionAlgorithm, http::{ @@ -104,6 +109,7 @@ use rustfs_utils::{ path::{is_dir_object, path_join_buf}, }; use rustfs_zip::CompressionFormat; +use s3s::header::{X_AMZ_OBJECT_LOCK_MODE, X_AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE}; use s3s::{S3, S3Error, S3ErrorCode, S3Request, S3Response, S3Result, dto::*, s3_error}; use std::{ collections::HashMap, @@ -341,19 +347,36 @@ impl FS { } async fn put_object_extract(&self, req: S3Request) -> S3Result> { + let input = req.input; + let PutObjectInput { body, bucket, key, version_id, + content_length, + content_md5, .. - } = req.input; + } = input; + let event_version_id = version_id; let Some(body) = body else { return Err(s3_error!(IncompleteBody)) }; let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))); - // let etag_stream = EtagReader::new(body); + let size = match content_length { + Some(c) => c, + None => { + if let Some(val) = req.headers.get(AMZ_DECODED_CONTENT_LENGTH) { + match atoi::atoi::(val.as_bytes()) { + Some(x) => x, + None => return Err(s3_error!(UnexpectedContent)), + } + } else { + return Err(s3_error!(UnexpectedContent)); + } + } + }; let Some(ext) = Path::new(&key).extension().and_then(|s| s.to_str()) else { return Err(s3_error!(InvalidArgument, "key extension not found")); @@ -361,8 +384,34 @@ impl FS { let ext = ext.to_owned(); + let md5hex = if let Some(base64_md5) = content_md5 { + let md5 = base64_simd::STANDARD + .decode_to_vec(base64_md5.as_bytes()) + .map_err(|e| ApiError::from(StorageError::other(format!("Invalid content MD5: {e}"))))?; + Some(hex_simd::encode_to_string(&md5, hex_simd::AsciiCase::Lower)) + } else { + None + }; + + let sha256hex = req.headers.get(AMZ_CONTENT_SHA256).and_then(|v| { + v.to_str() + .ok() + .filter(|&v| v != "UNSIGNED-PAYLOAD" && v != "STREAMING-UNSIGNED-PAYLOAD-TRAILER") + .map(|v| v.to_string()) + }); + + let actual_size = size; + + let reader: Box = Box::new(WarpReader::new(body)); + + let mut hreader = HashReader::new(reader, size, actual_size, md5hex, sha256hex, false).map_err(ApiError::from)?; + + if let Err(err) = hreader.add_checksum_from_s3s(&req.headers, req.trailing_headers.clone(), false) { + return Err(ApiError::from(StorageError::other(format!("add_checksum error={err:?}"))).into()); + } + // TODO: support zip - let decoder = CompressionFormat::from_extension(&ext).get_decoder(body).map_err(|e| { + let decoder = CompressionFormat::from_extension(&ext).get_decoder(hreader).map_err(|e| { error!("get_decoder err {:?}", e); s3_error!(InvalidArgument, "get_decoder err") })?; @@ -423,13 +472,13 @@ impl FS { ); metadata.insert(format!("{RESERVED_METADATA_PREFIX_LOWER}actual-size",), size.to_string()); - let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let hrd = HashReader::new(reader, size, actual_size, None, None, false).map_err(ApiError::from)?; reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default())); size = -1; } - let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let hrd = HashReader::new(reader, size, actual_size, None, None, false).map_err(ApiError::from)?; let mut reader = PutObjReader::new(hrd); let _obj_info = store @@ -479,9 +528,51 @@ impl FS { // Err(e) => error!("Decompression failed: {}", e), // } + let mut checksum_crc32 = input.checksum_crc32; + let mut checksum_crc32c = input.checksum_crc32c; + let mut checksum_sha1 = input.checksum_sha1; + let mut checksum_sha256 = input.checksum_sha256; + let mut checksum_crc64nvme = input.checksum_crc64nvme; + + if let Some(alg) = &input.checksum_algorithm { + if let Some(Some(checksum_str)) = req.trailing_headers.as_ref().map(|trailer| { + let key = match alg.as_str() { + ChecksumAlgorithm::CRC32 => rustfs_rio::ChecksumType::CRC32.key(), + ChecksumAlgorithm::CRC32C => rustfs_rio::ChecksumType::CRC32C.key(), + ChecksumAlgorithm::SHA1 => rustfs_rio::ChecksumType::SHA1.key(), + ChecksumAlgorithm::SHA256 => rustfs_rio::ChecksumType::SHA256.key(), + ChecksumAlgorithm::CRC64NVME => rustfs_rio::ChecksumType::CRC64_NVME.key(), + _ => return None, + }; + trailer.read(|headers| { + headers + .get(key.unwrap_or_default()) + .and_then(|value| value.to_str().ok().map(|s| s.to_string())) + }) + }) { + match alg.as_str() { + ChecksumAlgorithm::CRC32 => checksum_crc32 = checksum_str, + ChecksumAlgorithm::CRC32C => checksum_crc32c = checksum_str, + ChecksumAlgorithm::SHA1 => checksum_sha1 = checksum_str, + ChecksumAlgorithm::SHA256 => checksum_sha256 = checksum_str, + ChecksumAlgorithm::CRC64NVME => checksum_crc64nvme = checksum_str, + _ => (), + } + } + } + + warn!( + "put object extract checksum_crc32={checksum_crc32:?}, checksum_crc32c={checksum_crc32c:?}, checksum_sha1={checksum_sha1:?}, checksum_sha256={checksum_sha256:?}, checksum_crc64nvme={checksum_crc64nvme:?}", + ); + // TODO: etag let output = PutObjectOutput { - // e_tag: Some(etag_stream.etag().await), + // e_tag: hreader.try_resolve_etag().map(|v| ETag::Strong(v)), + checksum_crc32, + checksum_crc32c, + checksum_sha1, + checksum_sha256, + checksum_crc64nvme, ..Default::default() }; Ok(S3Response::new(output)) @@ -682,7 +773,7 @@ impl S3 for FS { .remove(&format!("{RESERVED_METADATA_PREFIX_LOWER}compression-size")); } - let mut reader = HashReader::new(reader, length, actual_size, None, false).map_err(ApiError::from)?; + let mut reader = HashReader::new(reader, length, actual_size, None, None, false).map_err(ApiError::from)?; if let Some(ref sse_alg) = effective_sse { if is_managed_sse(sse_alg) { @@ -702,7 +793,7 @@ impl S3 for FS { effective_kms_key_id = Some(kms_key_used.clone()); let encrypt_reader = EncryptReader::new(reader, key_bytes, nonce); - reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, false).map_err(ApiError::from)?; + reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, None, false).map_err(ApiError::from)?; } } @@ -1471,7 +1562,7 @@ impl S3 for FS { let mut content_length = info.size; - let content_range = if let Some(rs) = rs { + let content_range = if let Some(rs) = &rs { let total_size = info.get_actual_size().map_err(ApiError::from)?; let (start, length) = rs.get_offset_length(total_size).map_err(ApiError::from)?; content_length = length; @@ -1654,6 +1745,42 @@ impl S3 for FS { .cloned(); let ssekms_key_id = info.user_defined.get("x-amz-server-side-encryption-aws-kms-key-id").cloned(); + let mut checksum_crc32 = None; + let mut checksum_crc32c = None; + let mut checksum_sha1 = None; + let mut checksum_sha256 = None; + let mut checksum_crc64nvme = None; + let mut checksum_type = None; + + // checksum + if let Some(checksum_mode) = req.headers.get(AMZ_CHECKSUM_MODE) + && checksum_mode.to_str().unwrap_or_default() == "ENABLED" + && rs.is_none() + { + let (checksums, _is_multipart) = + info.decrypt_checksums(opts.part_number.unwrap_or(0), &req.headers) + .map_err(|e| { + error!("decrypt_checksums error: {}", e); + ApiError::from(e) + })?; + + for (key, checksum) in checksums { + if key == AMZ_CHECKSUM_TYPE { + checksum_type = Some(ChecksumType::from(checksum)); + continue; + } + + match rustfs_rio::ChecksumType::from_string(key.as_str()) { + rustfs_rio::ChecksumType::CRC32 => checksum_crc32 = Some(checksum), + rustfs_rio::ChecksumType::CRC32C => checksum_crc32c = Some(checksum), + rustfs_rio::ChecksumType::SHA1 => checksum_sha1 = Some(checksum), + rustfs_rio::ChecksumType::SHA256 => checksum_sha256 = Some(checksum), + rustfs_rio::ChecksumType::CRC64_NVME => checksum_crc64nvme = Some(checksum), + _ => (), + } + } + } + let output = GetObjectOutput { body, content_length: Some(response_content_length), @@ -1662,11 +1789,17 @@ impl S3 for FS { accept_ranges: Some("bytes".to_string()), content_range, e_tag: info.etag.map(|etag| to_s3s_etag(&etag)), - metadata: Some(info.user_defined), + metadata: filter_object_metadata(&info.user_defined), server_side_encryption, sse_customer_algorithm, sse_customer_key_md5, ssekms_key_id, + checksum_crc32, + checksum_crc32c, + checksum_sha1, + checksum_sha256, + checksum_crc64nvme, + checksum_type, ..Default::default() }; @@ -1757,7 +1890,6 @@ impl S3 for FS { let info = store.get_object_info(&bucket, &key, &opts).await.map_err(ApiError::from)?; - // warn!("head_object info {:?}", &info); let event_info = info.clone(); let content_type = { if let Some(content_type) = &info.content_type { @@ -1777,7 +1909,10 @@ impl S3 for FS { // TODO: range download - let content_length = info.get_actual_size().map_err(ApiError::from)?; + let content_length = info.get_actual_size().map_err(|e| { + error!("get_actual_size error: {}", e); + ApiError::from(e) + })?; let metadata_map = info.user_defined.clone(); let server_side_encryption = metadata_map @@ -1789,19 +1924,57 @@ impl S3 for FS { let sse_customer_key_md5 = metadata_map.get("x-amz-server-side-encryption-customer-key-md5").cloned(); let ssekms_key_id = metadata_map.get("x-amz-server-side-encryption-aws-kms-key-id").cloned(); - let metadata = metadata_map; + let mut checksum_crc32 = None; + let mut checksum_crc32c = None; + let mut checksum_sha1 = None; + let mut checksum_sha256 = None; + let mut checksum_crc64nvme = None; + let mut checksum_type = None; + + // checksum + if let Some(checksum_mode) = req.headers.get(AMZ_CHECKSUM_MODE) + && checksum_mode.to_str().unwrap_or_default() == "ENABLED" + && rs.is_none() + { + let (checksums, _is_multipart) = info + .decrypt_checksums(opts.part_number.unwrap_or(0), &req.headers) + .map_err(ApiError::from)?; + + warn!("get object metadata checksums: {:?}", checksums); + for (key, checksum) in checksums { + if key == AMZ_CHECKSUM_TYPE { + checksum_type = Some(ChecksumType::from(checksum)); + continue; + } + + match rustfs_rio::ChecksumType::from_string(key.as_str()) { + rustfs_rio::ChecksumType::CRC32 => checksum_crc32 = Some(checksum), + rustfs_rio::ChecksumType::CRC32C => checksum_crc32c = Some(checksum), + rustfs_rio::ChecksumType::SHA1 => checksum_sha1 = Some(checksum), + rustfs_rio::ChecksumType::SHA256 => checksum_sha256 = Some(checksum), + rustfs_rio::ChecksumType::CRC64_NVME => checksum_crc64nvme = Some(checksum), + _ => (), + } + } + } let output = HeadObjectOutput { content_length: Some(content_length), content_type, last_modified, e_tag: info.etag.map(|etag| to_s3s_etag(&etag)), - metadata: Some(metadata), + metadata: filter_object_metadata(&metadata_map), version_id: info.version_id.map(|v| v.to_string()), server_side_encryption, sse_customer_algorithm, sse_customer_key_md5, ssekms_key_id, + checksum_crc32, + checksum_crc32c, + checksum_sha1, + checksum_sha256, + checksum_crc64nvme, + checksum_type, // metadata: object_metadata, ..Default::default() }; @@ -2105,6 +2278,7 @@ impl S3 for FS { sse_customer_key, sse_customer_key_md5, ssekms_key_id, + content_md5, .. } = input; @@ -2171,7 +2345,7 @@ impl S3 for FS { extract_metadata_from_mime_with_object_name(&req.headers, &mut metadata, Some(&key)); if let Some(tags) = tagging { - metadata.insert(AMZ_OBJECT_TAGGING.to_owned(), tags); + metadata.insert(AMZ_OBJECT_TAGGING.to_owned(), tags.to_string()); } // TDD: Store effective SSE information in metadata for GET responses @@ -2192,10 +2366,30 @@ impl S3 for FS { metadata.insert("x-amz-server-side-encryption-aws-kms-key-id".to_string(), kms_key_id.clone()); } + let mut opts: ObjectOptions = put_opts(&bucket, &key, version_id.clone(), &req.headers, metadata.clone()) + .await + .map_err(ApiError::from)?; + let mut reader: Box = Box::new(WarpReader::new(body)); let actual_size = size; + let mut md5hex = if let Some(base64_md5) = content_md5 { + let md5 = base64_simd::STANDARD + .decode_to_vec(base64_md5.as_bytes()) + .map_err(|e| ApiError::from(StorageError::other(format!("Invalid content MD5: {e}"))))?; + Some(hex_simd::encode_to_string(&md5, hex_simd::AsciiCase::Lower)) + } else { + None + }; + + let mut sha256hex = req.headers.get(AMZ_CONTENT_SHA256).and_then(|v| { + v.to_str() + .ok() + .filter(|&v| v != "UNSIGNED-PAYLOAD" && v != "STREAMING-UNSIGNED-PAYLOAD-TRAILER") + .map(|v| v.to_string()) + }); + if is_compressible(&req.headers, &key) && size > MIN_COMPRESSIBLE_SIZE as i64 { metadata.insert( format!("{RESERVED_METADATA_PREFIX_LOWER}compression"), @@ -2203,14 +2397,29 @@ impl S3 for FS { ); metadata.insert(format!("{RESERVED_METADATA_PREFIX_LOWER}actual-size",), size.to_string()); - let hrd = HashReader::new(reader, size as i64, size as i64, None, false).map_err(ApiError::from)?; + let mut hrd = HashReader::new(reader, size as i64, size as i64, md5hex, sha256hex, false).map_err(ApiError::from)?; + + if let Err(err) = hrd.add_checksum_from_s3s(&req.headers, req.trailing_headers.clone(), false) { + return Err(ApiError::from(StorageError::other(format!("add_checksum error={err:?}"))).into()); + } + + opts.want_checksum = hrd.checksum(); reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default())); size = -1; + md5hex = None; + sha256hex = None; } - // TODO: md5 check - let mut reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let mut reader = HashReader::new(reader, size, actual_size, md5hex, sha256hex, false).map_err(ApiError::from)?; + + if size >= 0 { + if let Err(err) = reader.add_checksum_from_s3s(&req.headers, req.trailing_headers.clone(), false) { + return Err(ApiError::from(StorageError::other(format!("add_checksum error={err:?}"))).into()); + } + + opts.want_checksum = reader.checksum(); + } // Apply SSE-C encryption if customer provided key if let (Some(_), Some(sse_key), Some(sse_key_md5_provided)) = @@ -2251,7 +2460,7 @@ impl S3 for FS { // Apply encryption let encrypt_reader = EncryptReader::new(reader, key_array, nonce); - reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, false).map_err(ApiError::from)?; + reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, None, false).map_err(ApiError::from)?; } // Apply managed SSE (SSE-S3 or SSE-KMS) when requested @@ -2275,20 +2484,16 @@ impl S3 for FS { effective_kms_key_id = Some(kms_key_used.clone()); let encrypt_reader = EncryptReader::new(reader, key_bytes, nonce); - reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, false).map_err(ApiError::from)?; + reader = + HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, None, false).map_err(ApiError::from)?; } } } let mut reader = PutObjReader::new(reader); - let mt = metadata.clone(); let mt2 = metadata.clone(); - let mut opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, mt) - .await - .map_err(ApiError::from)?; - let repoptions = get_must_replicate_options(&mt2, "".to_string(), ReplicationStatusType::Empty, ReplicationType::Object, opts.clone()); @@ -2319,18 +2524,56 @@ impl S3 for FS { schedule_replication(obj_info, store, dsc, ReplicationType::Object).await; } + let mut checksum_crc32 = input.checksum_crc32; + let mut checksum_crc32c = input.checksum_crc32c; + let mut checksum_sha1 = input.checksum_sha1; + let mut checksum_sha256 = input.checksum_sha256; + let mut checksum_crc64nvme = input.checksum_crc64nvme; + + if let Some(alg) = &input.checksum_algorithm { + if let Some(Some(checksum_str)) = req.trailing_headers.as_ref().map(|trailer| { + let key = match alg.as_str() { + ChecksumAlgorithm::CRC32 => rustfs_rio::ChecksumType::CRC32.key(), + ChecksumAlgorithm::CRC32C => rustfs_rio::ChecksumType::CRC32C.key(), + ChecksumAlgorithm::SHA1 => rustfs_rio::ChecksumType::SHA1.key(), + ChecksumAlgorithm::SHA256 => rustfs_rio::ChecksumType::SHA256.key(), + ChecksumAlgorithm::CRC64NVME => rustfs_rio::ChecksumType::CRC64_NVME.key(), + _ => return None, + }; + trailer.read(|headers| { + headers + .get(key.unwrap_or_default()) + .and_then(|value| value.to_str().ok().map(|s| s.to_string())) + }) + }) { + match alg.as_str() { + ChecksumAlgorithm::CRC32 => checksum_crc32 = checksum_str, + ChecksumAlgorithm::CRC32C => checksum_crc32c = checksum_str, + ChecksumAlgorithm::SHA1 => checksum_sha1 = checksum_str, + ChecksumAlgorithm::SHA256 => checksum_sha256 = checksum_str, + ChecksumAlgorithm::CRC64NVME => checksum_crc64nvme = checksum_str, + _ => (), + } + } + } + let output = PutObjectOutput { e_tag, server_side_encryption: effective_sse, // TDD: Return effective encryption config - sse_customer_algorithm, - sse_customer_key_md5, + sse_customer_algorithm: sse_customer_algorithm.clone(), + sse_customer_key_md5: sse_customer_key_md5.clone(), ssekms_key_id: effective_kms_key_id, // TDD: Return effective KMS key ID + checksum_crc32, + checksum_crc32c, + checksum_sha1, + checksum_sha256, + checksum_crc64nvme, ..Default::default() }; let event_args = rustfs_notify::event::EventArgs { event_name: EventName::ObjectCreatedPut, - bucket_name: bucket, + bucket_name: bucket.clone(), object: event_info, req_params: rustfs_utils::extract_req_params_header(&req.headers), resp_elements: rustfs_utils::extract_resp_elements(&S3Response::new(output.clone())), @@ -2460,14 +2703,29 @@ impl S3 for FS { ); } - let opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, metadata) + let mut opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, metadata) .await .map_err(ApiError::from)?; - let MultipartUploadResult { upload_id, .. } = store + let checksum_type = rustfs_rio::ChecksumType::from_header(&req.headers); + if checksum_type.is(rustfs_rio::ChecksumType::INVALID) { + return Err(s3_error!(InvalidArgument, "Invalid checksum type")); + } else if checksum_type.is_set() && !checksum_type.is(rustfs_rio::ChecksumType::TRAILING) { + opts.want_checksum = Some(rustfs_rio::Checksum { + checksum_type, + ..Default::default() + }); + } + + let MultipartUploadResult { + upload_id, + checksum_algo, + checksum_type, + } = store .new_multipart_upload(&bucket, &key, &opts) .await .map_err(ApiError::from)?; + let object_name = key.clone(); let bucket_name = bucket.clone(); let output = CreateMultipartUploadOutput { @@ -2477,6 +2735,8 @@ impl S3 for FS { server_side_encryption: effective_sse, // TDD: Return effective encryption config sse_customer_algorithm, ssekms_key_id: effective_kms_key_id, // TDD: Return effective KMS key ID + checksum_algorithm: checksum_algo.map(ChecksumAlgorithm::from), + checksum_type: checksum_type.map(ChecksumType::from), ..Default::default() }; @@ -2509,6 +2769,7 @@ impl S3 for FS { #[instrument(level = "debug", skip(self, req))] async fn upload_part(&self, req: S3Request) -> S3Result> { + let input = req.input; let UploadPartInput { body, bucket, @@ -2521,7 +2782,7 @@ impl S3 for FS { sse_customer_key_md5: _sse_customer_key_md5, // content_md5, .. - } = req.input; + } = input; let part_id = part_number as usize; @@ -2648,19 +2909,46 @@ impl S3 for FS { } */ + let mut md5hex = if let Some(base64_md5) = input.content_md5 { + let md5 = base64_simd::STANDARD + .decode_to_vec(base64_md5.as_bytes()) + .map_err(|e| ApiError::from(StorageError::other(format!("Invalid content MD5: {e}"))))?; + Some(hex_simd::encode_to_string(&md5, hex_simd::AsciiCase::Lower)) + } else { + None + }; + + let mut sha256hex = req.headers.get(AMZ_CONTENT_SHA256).and_then(|v| { + v.to_str() + .ok() + .filter(|&v| v != "UNSIGNED-PAYLOAD" && v != "STREAMING-UNSIGNED-PAYLOAD-TRAILER") + .map(|v| v.to_string()) + }); + if is_compressible { - let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let mut hrd = HashReader::new(reader, size, actual_size, md5hex, sha256hex, false).map_err(ApiError::from)?; + + if let Err(err) = hrd.add_checksum_from_s3s(&req.headers, req.trailing_headers.clone(), false) { + return Err(ApiError::from(StorageError::other(format!("add_checksum error={err:?}"))).into()); + } + let compress_reader = CompressReader::new(hrd, CompressionAlgorithm::default()); reader = Box::new(compress_reader); size = -1; + md5hex = None; + sha256hex = None; } - let mut reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let mut reader = HashReader::new(reader, size, actual_size, md5hex, sha256hex, false).map_err(ApiError::from)?; + + if let Err(err) = reader.add_checksum_from_s3s(&req.headers, req.trailing_headers.clone(), size < 0) { + return Err(ApiError::from(StorageError::other(format!("add_checksum error={err:?}"))).into()); + } if let Some((key_bytes, base_nonce, _)) = decrypt_managed_encryption_key(&bucket, &key, &fi.user_defined).await? { let part_nonce = derive_part_nonce(base_nonce, part_id); let encrypt_reader = EncryptReader::new(reader, key_bytes, part_nonce); - reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, false).map_err(ApiError::from)?; + reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, None, false).map_err(ApiError::from)?; } let mut reader = PutObjReader::new(reader); @@ -2670,7 +2958,45 @@ impl S3 for FS { .await .map_err(ApiError::from)?; + let mut checksum_crc32 = input.checksum_crc32; + let mut checksum_crc32c = input.checksum_crc32c; + let mut checksum_sha1 = input.checksum_sha1; + let mut checksum_sha256 = input.checksum_sha256; + let mut checksum_crc64nvme = input.checksum_crc64nvme; + + if let Some(alg) = &input.checksum_algorithm { + if let Some(Some(checksum_str)) = req.trailing_headers.as_ref().map(|trailer| { + let key = match alg.as_str() { + ChecksumAlgorithm::CRC32 => rustfs_rio::ChecksumType::CRC32.key(), + ChecksumAlgorithm::CRC32C => rustfs_rio::ChecksumType::CRC32C.key(), + ChecksumAlgorithm::SHA1 => rustfs_rio::ChecksumType::SHA1.key(), + ChecksumAlgorithm::SHA256 => rustfs_rio::ChecksumType::SHA256.key(), + ChecksumAlgorithm::CRC64NVME => rustfs_rio::ChecksumType::CRC64_NVME.key(), + _ => return None, + }; + trailer.read(|headers| { + headers + .get(key.unwrap_or_default()) + .and_then(|value| value.to_str().ok().map(|s| s.to_string())) + }) + }) { + match alg.as_str() { + ChecksumAlgorithm::CRC32 => checksum_crc32 = checksum_str, + ChecksumAlgorithm::CRC32C => checksum_crc32c = checksum_str, + ChecksumAlgorithm::SHA1 => checksum_sha1 = checksum_str, + ChecksumAlgorithm::SHA256 => checksum_sha256 = checksum_str, + ChecksumAlgorithm::CRC64NVME => checksum_crc64nvme = checksum_str, + _ => (), + } + } + } + let output = UploadPartOutput { + checksum_crc32, + checksum_crc32c, + checksum_sha1, + checksum_sha256, + checksum_crc64nvme, e_tag: info.etag.map(|etag| to_s3s_etag(&etag)), ..Default::default() }; @@ -2828,17 +3154,17 @@ impl S3 for FS { let mut size = length; if is_compressible { - let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let hrd = HashReader::new(reader, size, actual_size, None, None, false).map_err(ApiError::from)?; reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default())); size = -1; } - let mut reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?; + let mut reader = HashReader::new(reader, size, actual_size, None, None, false).map_err(ApiError::from)?; if let Some((key_bytes, base_nonce, _)) = decrypt_managed_encryption_key(&bucket, &key, &mp_info.user_defined).await? { let part_nonce = derive_part_nonce(base_nonce, part_id); let encrypt_reader = EncryptReader::new(reader, key_bytes, part_nonce); - reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, false).map_err(ApiError::from)?; + reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, None, false).map_err(ApiError::from)?; } let mut reader = PutObjReader::new(reader); @@ -3030,6 +3356,13 @@ impl S3 for FS { uploaded_parts.push(CompletePart::from(part)); } + // is part number sorted? + if !uploaded_parts.is_sorted_by_key(|p| p.part_num) { + return Err(s3_error!(InvalidPart, "Part numbers must be sorted")); + } + + // TODO: check object lock + let Some(store) = new_object_layer_fn() else { return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string())); }; @@ -3039,6 +3372,7 @@ impl S3 for FS { "TDD: Attempting to get multipart info for bucket={}, key={}, upload_id={}", bucket, key, upload_id ); + let multipart_info = store .get_multipart_info(&bucket, &key, &upload_id, &ObjectOptions::default()) .await @@ -3078,6 +3412,35 @@ impl S3 for FS { "TDD: Creating output with SSE: {:?}, KMS Key: {:?}", server_side_encryption, ssekms_key_id ); + + let mut checksum_crc32 = None; + let mut checksum_crc32c = None; + let mut checksum_sha1 = None; + let mut checksum_sha256 = None; + let mut checksum_crc64nvme = None; + let mut checksum_type = None; + + // checksum + let (checksums, _is_multipart) = obj_info + .decrypt_checksums(opts.part_number.unwrap_or(0), &req.headers) + .map_err(ApiError::from)?; + + for (key, checksum) in checksums { + if key == AMZ_CHECKSUM_TYPE { + checksum_type = Some(ChecksumType::from(checksum)); + continue; + } + + match rustfs_rio::ChecksumType::from_string(key.as_str()) { + rustfs_rio::ChecksumType::CRC32 => checksum_crc32 = Some(checksum), + rustfs_rio::ChecksumType::CRC32C => checksum_crc32c = Some(checksum), + rustfs_rio::ChecksumType::SHA1 => checksum_sha1 = Some(checksum), + rustfs_rio::ChecksumType::SHA256 => checksum_sha256 = Some(checksum), + rustfs_rio::ChecksumType::CRC64_NVME => checksum_crc64nvme = Some(checksum), + _ => (), + } + } + let output = CompleteMultipartUploadOutput { bucket: Some(bucket.clone()), key: Some(key.clone()), @@ -3085,6 +3448,12 @@ impl S3 for FS { location: Some("us-east-1".to_string()), server_side_encryption, // TDD: Return encryption info ssekms_key_id, // TDD: Return KMS key ID if present + checksum_crc32, + checksum_crc32c, + checksum_sha1, + checksum_sha256, + checksum_crc64nvme, + checksum_type, ..Default::default() }; info!( @@ -4600,6 +4969,34 @@ pub(crate) async fn has_replication_rules(bucket: &str, objects: &[ObjectToDelet false } +fn filter_object_metadata(metadata: &HashMap) -> Option> { + let mut filtered_metadata = HashMap::new(); + for (k, v) in metadata { + if k.starts_with(RESERVED_METADATA_PREFIX_LOWER) { + continue; + } + if v.is_empty() && (k == &X_AMZ_OBJECT_LOCK_MODE.to_string() || k == &X_AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE.to_string()) { + continue; + } + + if k == AMZ_META_UNENCRYPTED_CONTENT_MD5 || k == AMZ_META_UNENCRYPTED_CONTENT_LENGTH { + continue; + } + + // let lower_key = k.to_ascii_lowercase(); + // if lower_key.starts_with("x-amz-meta-") || lower_key.starts_with("x-rustfs-meta-") { + // filtered_metadata.insert(lower_key, v.to_string()); + // } + + filtered_metadata.insert(k.clone(), v.clone()); + } + if filtered_metadata.is_empty() { + None + } else { + Some(filtered_metadata) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rustfs/src/storage/options.rs b/rustfs/src/storage/options.rs index 1493d4b8..80061b38 100644 --- a/rustfs/src/storage/options.rs +++ b/rustfs/src/storage/options.rs @@ -18,7 +18,10 @@ use rustfs_ecstore::error::Result; use rustfs_ecstore::error::StorageError; use rustfs_ecstore::store_api::{HTTPPreconditions, HTTPRangeSpec, ObjectOptions}; +use rustfs_utils::http::RESERVED_METADATA_PREFIX_LOWER; use rustfs_utils::http::RUSTFS_BUCKET_REPLICATION_DELETE_MARKER; +use rustfs_utils::http::RUSTFS_BUCKET_REPLICATION_REQUEST; +use rustfs_utils::http::RUSTFS_BUCKET_REPLICATION_SSEC_CHECKSUM; use rustfs_utils::http::RUSTFS_BUCKET_SOURCE_VERSION_ID; use rustfs_utils::path::is_dir_object; use s3s::{S3Result, s3_error}; @@ -125,14 +128,14 @@ pub async fn get_opts( Ok(opts) } -fn fill_conditional_writes_opts_from_header(headers: &HeaderMap, opts: &mut ObjectOptions) -> Result<()> { +fn fill_conditional_writes_opts_from_header(headers: &HeaderMap, opts: &mut ObjectOptions) -> std::io::Result<()> { if headers.contains_key("If-None-Match") || headers.contains_key("If-Match") { let mut preconditions = HTTPPreconditions::default(); if let Some(if_none_match) = headers.get("If-None-Match") { preconditions.if_none_match = Some( if_none_match .to_str() - .map_err(|_| StorageError::other("Invalid If-None-Match header"))? + .map_err(|_| std::io::Error::other("Invalid If-None-Match header"))? .to_string(), ); } @@ -140,7 +143,7 @@ fn fill_conditional_writes_opts_from_header(headers: &HeaderMap, op preconditions.if_match = Some( if_match .to_str() - .map_err(|_| StorageError::other("Invalid If-Match header"))? + .map_err(|_| std::io::Error::other("Invalid If-Match header"))? .to_string(), ); } @@ -200,8 +203,32 @@ pub async fn put_opts( Ok(opts) } -pub fn get_complete_multipart_upload_opts(headers: &HeaderMap) -> Result { - let mut opts = ObjectOptions::default(); +pub fn get_complete_multipart_upload_opts(headers: &HeaderMap) -> std::io::Result { + let mut user_defined = HashMap::new(); + + let mut replication_request = false; + if let Some(v) = headers.get(RUSTFS_BUCKET_REPLICATION_REQUEST) { + user_defined.insert( + format!("{RESERVED_METADATA_PREFIX_LOWER}Actual-Object-Size"), + v.to_str().unwrap_or_default().to_owned(), + ); + replication_request = true; + } + + if let Some(v) = headers.get(RUSTFS_BUCKET_REPLICATION_SSEC_CHECKSUM) { + user_defined.insert( + RUSTFS_BUCKET_REPLICATION_SSEC_CHECKSUM.to_string(), + v.to_str().unwrap_or_default().to_owned(), + ); + } + + let mut opts = ObjectOptions { + want_checksum: rustfs_rio::get_content_checksum(headers)?, + user_defined, + replication_request, + ..Default::default() + }; + fill_conditional_writes_opts_from_header(headers, &mut opts)?; Ok(opts) } diff --git a/scripts/run.sh b/scripts/run.sh index eb1a01fa..a5448ab8 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -59,7 +59,7 @@ export RUSTFS_EXTERNAL_ADDRESS=":9000" #export RUSTFS_OBS_SERVICE_NAME=rustfs # Service name #export RUSTFS_OBS_SERVICE_VERSION=0.1.0 # Service version export RUSTFS_OBS_ENVIRONMENT=develop # Environment name -export RUSTFS_OBS_LOGGER_LEVEL=info # Log level, supports trace, debug, info, warn, error +export RUSTFS_OBS_LOGGER_LEVEL=debug # Log level, supports trace, debug, info, warn, error export RUSTFS_OBS_LOCAL_LOGGING_ENABLED=true # Whether to enable local logging export RUSTFS_OBS_LOG_DIRECTORY="$current_dir/deploy/logs" # Log directory export RUSTFS_OBS_LOG_ROTATION_TIME="hour" # Log rotation time unit, can be "second", "minute", "hour", "day" @@ -102,7 +102,7 @@ export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR_MASTER="$current_dir/deploy/logs/notify" export RUSTFS_NS_SCANNER_INTERVAL=60 # Object scanning interval in seconds # exportRUSTFS_SKIP_BACKGROUND_TASK=true -export RUSTFS_COMPRESSION_ENABLED=true # Whether to enable compression +# export RUSTFS_COMPRESSION_ENABLED=true # Whether to enable compression #export RUSTFS_REGION="us-east-1"