mirror of
https://github.com/rustfs/rustfs.git
synced 2026-01-16 17:20:33 +00:00
* add dep `scopeguard` * improve for tracing * fix * fix * improve code for import * add logger trace id * fix * fix * fix * fix * fix
818 lines
32 KiB
Rust
818 lines
32 KiB
Rust
// Copyright 2024 RustFS Team
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
use rustfs_ahm::heal::manager::HealConfig;
|
|
use rustfs_ahm::scanner::{
|
|
Scanner,
|
|
data_scanner::ScanMode,
|
|
node_scanner::{LoadLevel, NodeScanner, NodeScannerConfig},
|
|
};
|
|
use rustfs_ecstore::{
|
|
StorageAPI,
|
|
disk::endpoint::Endpoint,
|
|
endpoints::{EndpointServerPools, Endpoints, PoolEndpoints},
|
|
store::ECStore,
|
|
store_api::{MakeBucketOptions, ObjectIO, PutObjReader},
|
|
};
|
|
use serial_test::serial;
|
|
use std::{fs, net::SocketAddr, sync::Arc, sync::OnceLock, time::Duration};
|
|
use tempfile::TempDir;
|
|
use tokio_util::sync::CancellationToken;
|
|
|
|
// Global test environment cache to avoid repeated initialization
|
|
static GLOBAL_TEST_ENV: OnceLock<(Vec<std::path::PathBuf>, Arc<ECStore>)> = OnceLock::new();
|
|
|
|
async fn prepare_test_env(test_dir: Option<&str>, port: Option<u16>) -> (Vec<std::path::PathBuf>, Arc<ECStore>) {
|
|
// Check if global environment is already initialized
|
|
if let Some((disk_paths, ecstore)) = GLOBAL_TEST_ENV.get() {
|
|
return (disk_paths.clone(), ecstore.clone());
|
|
}
|
|
|
|
// create temp dir as 4 disks
|
|
let test_base_dir = test_dir.unwrap_or("/tmp/rustfs_ahm_optimized_test");
|
|
let temp_dir = std::path::PathBuf::from(test_base_dir);
|
|
if temp_dir.exists() {
|
|
fs::remove_dir_all(&temp_dir).unwrap();
|
|
}
|
|
fs::create_dir_all(&temp_dir).unwrap();
|
|
|
|
// create 4 disk dirs
|
|
let disk_paths = vec![
|
|
temp_dir.join("disk1"),
|
|
temp_dir.join("disk2"),
|
|
temp_dir.join("disk3"),
|
|
temp_dir.join("disk4"),
|
|
];
|
|
|
|
for disk_path in &disk_paths {
|
|
fs::create_dir_all(disk_path).unwrap();
|
|
}
|
|
|
|
// create EndpointServerPools
|
|
let mut endpoints = Vec::new();
|
|
for (i, disk_path) in disk_paths.iter().enumerate() {
|
|
let mut endpoint = Endpoint::try_from(disk_path.to_str().unwrap()).unwrap();
|
|
// set correct index
|
|
endpoint.set_pool_index(0);
|
|
endpoint.set_set_index(0);
|
|
endpoint.set_disk_index(i);
|
|
endpoints.push(endpoint);
|
|
}
|
|
|
|
let pool_endpoints = PoolEndpoints {
|
|
legacy: false,
|
|
set_count: 1,
|
|
drives_per_set: 4,
|
|
endpoints: Endpoints::from(endpoints),
|
|
cmd_line: "test".to_string(),
|
|
platform: format!("OS: {} | Arch: {}", std::env::consts::OS, std::env::consts::ARCH),
|
|
};
|
|
|
|
let endpoint_pools = EndpointServerPools(vec![pool_endpoints]);
|
|
|
|
// format disks
|
|
rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await.unwrap();
|
|
|
|
// create ECStore with dynamic port
|
|
let port = port.unwrap_or(9000);
|
|
let server_addr: SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
|
|
let ecstore = ECStore::new(server_addr, endpoint_pools, CancellationToken::new())
|
|
.await
|
|
.unwrap();
|
|
|
|
// init bucket metadata system
|
|
let buckets_list = ecstore
|
|
.list_bucket(&rustfs_ecstore::store_api::BucketOptions {
|
|
no_metadata: true,
|
|
..Default::default()
|
|
})
|
|
.await
|
|
.unwrap();
|
|
let buckets = buckets_list.into_iter().map(|v| v.name).collect();
|
|
rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys(ecstore.clone(), buckets).await;
|
|
|
|
// Store in global cache
|
|
let _ = GLOBAL_TEST_ENV.set((disk_paths.clone(), ecstore.clone()));
|
|
|
|
(disk_paths, ecstore)
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_scanner_basic_functionality() {
|
|
const TEST_DIR_BASIC: &str = "/tmp/rustfs_ahm_optimized_test_basic";
|
|
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_BASIC), Some(9101)).await;
|
|
|
|
// create some test data
|
|
let bucket_name = "test-bucket";
|
|
let object_name = "test-object";
|
|
let test_data = b"Hello, Optimized RustFS!";
|
|
|
|
// create bucket and verify
|
|
let bucket_opts = MakeBucketOptions::default();
|
|
ecstore
|
|
.make_bucket(bucket_name, &bucket_opts)
|
|
.await
|
|
.expect("make_bucket failed");
|
|
|
|
// check bucket really exists
|
|
let buckets = ecstore
|
|
.list_bucket(&rustfs_ecstore::store_api::BucketOptions::default())
|
|
.await
|
|
.unwrap();
|
|
assert!(buckets.iter().any(|b| b.name == bucket_name), "bucket not found after creation");
|
|
|
|
// write object
|
|
let mut put_reader = PutObjReader::from_vec(test_data.to_vec());
|
|
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
|
|
ecstore
|
|
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
|
|
.await
|
|
.expect("put_object failed");
|
|
|
|
// create optimized Scanner and test basic functionality
|
|
let scanner = Scanner::new(None, None);
|
|
|
|
// Test 1: Normal scan - verify object is found
|
|
println!("=== Test 1: Optimized Normal scan ===");
|
|
let scan_result = scanner.scan_cycle().await;
|
|
assert!(scan_result.is_ok(), "Optimized normal scan should succeed");
|
|
let _metrics = scanner.get_metrics().await;
|
|
// Note: The optimized scanner may not immediately show scanned objects as it works differently
|
|
println!("Optimized normal scan completed successfully");
|
|
|
|
// Test 2: Simulate disk corruption - delete object data from disk1
|
|
println!("=== Test 2: Optimized corruption handling ===");
|
|
let disk1_bucket_path = disk_paths[0].join(bucket_name);
|
|
let disk1_object_path = disk1_bucket_path.join(object_name);
|
|
|
|
// Try to delete the object file from disk1 (simulate corruption)
|
|
// Note: This might fail if ECStore is actively using the file
|
|
match fs::remove_dir_all(&disk1_object_path) {
|
|
Ok(_) => {
|
|
println!("Successfully deleted object from disk1: {disk1_object_path:?}");
|
|
|
|
// Verify deletion by checking if the directory still exists
|
|
if disk1_object_path.exists() {
|
|
println!("WARNING: Directory still exists after deletion: {disk1_object_path:?}");
|
|
} else {
|
|
println!("Confirmed: Directory was successfully deleted");
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("Could not delete object from disk1 (file may be in use): {disk1_object_path:?} - {e}");
|
|
// This is expected behavior - ECStore might be holding file handles
|
|
}
|
|
}
|
|
|
|
// Scan again - should still complete (even with missing data)
|
|
let scan_result_after_corruption = scanner.scan_cycle().await;
|
|
println!("Optimized scan after corruption result: {scan_result_after_corruption:?}");
|
|
|
|
// Scanner should handle missing data gracefully
|
|
assert!(
|
|
scan_result_after_corruption.is_ok(),
|
|
"Optimized scanner should handle missing data gracefully"
|
|
);
|
|
|
|
// Test 3: Test metrics collection
|
|
println!("=== Test 3: Optimized metrics collection ===");
|
|
let final_metrics = scanner.get_metrics().await;
|
|
println!("Optimized final metrics: {final_metrics:?}");
|
|
|
|
// Verify metrics are available (even if different from legacy scanner)
|
|
assert!(final_metrics.last_activity.is_some(), "Should have scan activity");
|
|
|
|
// clean up temp dir
|
|
let temp_dir = std::path::PathBuf::from(TEST_DIR_BASIC);
|
|
if let Err(e) = fs::remove_dir_all(&temp_dir) {
|
|
eprintln!("Warning: Failed to clean up temp directory {temp_dir:?}: {e}");
|
|
}
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_scanner_usage_stats() {
|
|
const TEST_DIR_USAGE_STATS: &str = "/tmp/rustfs_ahm_optimized_test_usage_stats";
|
|
let (_, ecstore) = prepare_test_env(Some(TEST_DIR_USAGE_STATS), Some(9102)).await;
|
|
|
|
// prepare test bucket and object
|
|
let bucket = "test-bucket-optimized";
|
|
ecstore.make_bucket(bucket, &Default::default()).await.unwrap();
|
|
let mut pr = PutObjReader::from_vec(b"hello optimized".to_vec());
|
|
ecstore
|
|
.put_object(bucket, "obj1", &mut pr, &Default::default())
|
|
.await
|
|
.unwrap();
|
|
|
|
let scanner = Scanner::new(None, None);
|
|
|
|
// enable statistics
|
|
scanner.set_config_enable_data_usage_stats(true).await;
|
|
|
|
// first scan and get statistics
|
|
scanner.scan_cycle().await.unwrap();
|
|
let du_initial = scanner.get_data_usage_info().await.unwrap();
|
|
// Note: Optimized scanner may work differently, so we're less strict about counts
|
|
println!("Initial data usage: {du_initial:?}");
|
|
|
|
// write 3 more objects and get statistics again
|
|
for size in [1024, 2048, 4096] {
|
|
let name = format!("obj_{size}");
|
|
let mut pr = PutObjReader::from_vec(vec![b'x'; size]);
|
|
ecstore.put_object(bucket, &name, &mut pr, &Default::default()).await.unwrap();
|
|
}
|
|
|
|
scanner.scan_cycle().await.unwrap();
|
|
let du_after = scanner.get_data_usage_info().await.unwrap();
|
|
println!("Data usage after adding objects: {du_after:?}");
|
|
|
|
// The optimized scanner should at least not crash and return valid data
|
|
// buckets_count is u64, so it's always >= 0
|
|
assert!(du_after.buckets_count == du_after.buckets_count);
|
|
|
|
// clean up temp dir
|
|
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_USAGE_STATS));
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_volume_healing_functionality() {
|
|
const TEST_DIR_VOLUME_HEAL: &str = "/tmp/rustfs_ahm_optimized_test_volume_heal";
|
|
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_VOLUME_HEAL), Some(9103)).await;
|
|
|
|
// Create test buckets
|
|
let bucket1 = "test-bucket-1-opt";
|
|
let bucket2 = "test-bucket-2-opt";
|
|
|
|
ecstore.make_bucket(bucket1, &Default::default()).await.unwrap();
|
|
ecstore.make_bucket(bucket2, &Default::default()).await.unwrap();
|
|
|
|
// Add some test objects
|
|
let mut pr1 = PutObjReader::from_vec(b"test data 1 optimized".to_vec());
|
|
ecstore
|
|
.put_object(bucket1, "obj1", &mut pr1, &Default::default())
|
|
.await
|
|
.unwrap();
|
|
|
|
let mut pr2 = PutObjReader::from_vec(b"test data 2 optimized".to_vec());
|
|
ecstore
|
|
.put_object(bucket2, "obj2", &mut pr2, &Default::default())
|
|
.await
|
|
.unwrap();
|
|
|
|
// Simulate missing bucket on one disk by removing bucket directory
|
|
let disk1_bucket1_path = disk_paths[0].join(bucket1);
|
|
if disk1_bucket1_path.exists() {
|
|
println!("Removing bucket directory to simulate missing volume: {disk1_bucket1_path:?}");
|
|
match fs::remove_dir_all(&disk1_bucket1_path) {
|
|
Ok(_) => println!("Successfully removed bucket directory from disk 0"),
|
|
Err(e) => println!("Failed to remove bucket directory: {e}"),
|
|
}
|
|
}
|
|
|
|
// Create optimized scanner
|
|
let scanner = Scanner::new(None, None);
|
|
|
|
// Enable healing in config
|
|
scanner.set_config_enable_healing(true).await;
|
|
|
|
println!("=== Testing optimized volume healing functionality ===");
|
|
|
|
// Run scan cycle which should detect missing volume
|
|
let scan_result = scanner.scan_cycle().await;
|
|
assert!(scan_result.is_ok(), "Optimized scan cycle should succeed");
|
|
|
|
// Get metrics to verify scan completed
|
|
let metrics = scanner.get_metrics().await;
|
|
println!("Optimized volume healing detection test completed successfully");
|
|
println!("Optimized scan metrics: {metrics:?}");
|
|
|
|
// Clean up
|
|
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_VOLUME_HEAL));
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_performance_characteristics() {
|
|
const TEST_DIR_PERF: &str = "/tmp/rustfs_ahm_optimized_test_perf";
|
|
let (_, ecstore) = prepare_test_env(Some(TEST_DIR_PERF), Some(9104)).await;
|
|
|
|
// Create test bucket with multiple objects
|
|
let bucket_name = "performance-test-bucket";
|
|
ecstore.make_bucket(bucket_name, &Default::default()).await.unwrap();
|
|
|
|
// Create several test objects
|
|
for i in 0..10 {
|
|
let object_name = format!("perf-object-{i}");
|
|
let test_data = vec![b'A' + (i % 26) as u8; 1024 * (i + 1)]; // Variable size objects
|
|
let mut put_reader = PutObjReader::from_vec(test_data);
|
|
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
|
|
ecstore
|
|
.put_object(bucket_name, &object_name, &mut put_reader, &object_opts)
|
|
.await
|
|
.unwrap_or_else(|_| panic!("Failed to create object {object_name}"));
|
|
}
|
|
|
|
// Create optimized scanner
|
|
let scanner = Scanner::new(None, None);
|
|
|
|
// Test performance characteristics
|
|
println!("=== Testing optimized scanner performance ===");
|
|
|
|
// Measure scan time
|
|
let start_time = std::time::Instant::now();
|
|
let scan_result = scanner.scan_cycle().await;
|
|
let scan_duration = start_time.elapsed();
|
|
|
|
println!("Optimized scan completed in: {scan_duration:?}");
|
|
assert!(scan_result.is_ok(), "Performance scan should succeed");
|
|
|
|
// Verify the scan was reasonably fast (should be faster than old concurrent scanner)
|
|
// Note: This is a rough check - in practice, optimized scanner should be much faster
|
|
assert!(
|
|
scan_duration < Duration::from_secs(30),
|
|
"Optimized scan should complete within 30 seconds"
|
|
);
|
|
|
|
// Test memory usage is reasonable (indirect test through successful completion)
|
|
let metrics = scanner.get_metrics().await;
|
|
println!("Performance test metrics: {metrics:?}");
|
|
|
|
// Test that multiple scans don't degrade performance significantly
|
|
let start_time2 = std::time::Instant::now();
|
|
let _scan_result2 = scanner.scan_cycle().await;
|
|
let scan_duration2 = start_time2.elapsed();
|
|
|
|
println!("Second optimized scan completed in: {scan_duration2:?}");
|
|
|
|
// Second scan should be similar or faster due to caching
|
|
let performance_ratio = scan_duration2.as_millis() as f64 / scan_duration.as_millis() as f64;
|
|
println!("Performance ratio (second/first): {performance_ratio:.2}");
|
|
|
|
// Clean up
|
|
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_PERF));
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_load_balancing_and_throttling() {
|
|
let temp_dir = TempDir::new().unwrap();
|
|
|
|
// Create a node scanner with optimized configuration
|
|
let config = NodeScannerConfig {
|
|
data_dir: temp_dir.path().to_path_buf(),
|
|
enable_smart_scheduling: true,
|
|
scan_interval: Duration::from_millis(100), // Fast for testing
|
|
disk_scan_delay: Duration::from_millis(50),
|
|
..Default::default()
|
|
};
|
|
|
|
let node_scanner = NodeScanner::new("test-optimized-node".to_string(), config);
|
|
|
|
// Initialize the scanner
|
|
node_scanner.initialize_stats().await.unwrap();
|
|
|
|
let io_monitor = node_scanner.get_io_monitor();
|
|
let throttler = node_scanner.get_io_throttler();
|
|
|
|
// Start IO monitoring
|
|
io_monitor.start().await.expect("Failed to start IO monitor");
|
|
|
|
// Test load balancing scenarios
|
|
let load_scenarios = vec![
|
|
(LoadLevel::Low, 10, 100, 0, 5), // (load level, latency, qps, error rate, connections)
|
|
(LoadLevel::Medium, 30, 300, 10, 20),
|
|
(LoadLevel::High, 80, 800, 50, 50),
|
|
(LoadLevel::Critical, 200, 1200, 100, 100),
|
|
];
|
|
|
|
for (expected_level, latency, qps, error_rate, connections) in load_scenarios {
|
|
println!("Testing load scenario: {expected_level:?}");
|
|
|
|
// Update business metrics to simulate load
|
|
node_scanner
|
|
.update_business_metrics(latency, qps, error_rate, connections)
|
|
.await;
|
|
|
|
// Wait for monitoring system to respond
|
|
tokio::time::sleep(Duration::from_millis(500)).await;
|
|
|
|
// Get current load level
|
|
let current_level = io_monitor.get_business_load_level().await;
|
|
println!("Detected load level: {current_level:?}");
|
|
|
|
// Get throttling decision
|
|
let _current_metrics = io_monitor.get_current_metrics().await;
|
|
let metrics_snapshot = rustfs_ahm::scanner::io_throttler::MetricsSnapshot {
|
|
iops: 100 + qps / 10,
|
|
latency,
|
|
cpu_usage: std::cmp::min(50 + (qps / 20) as u8, 100),
|
|
memory_usage: 40,
|
|
};
|
|
|
|
let decision = throttler.make_throttle_decision(current_level, Some(metrics_snapshot)).await;
|
|
|
|
println!(
|
|
"Throttle decision: should_pause={}, delay={:?}",
|
|
decision.should_pause, decision.suggested_delay
|
|
);
|
|
|
|
// Verify throttling behavior
|
|
match current_level {
|
|
LoadLevel::Critical => {
|
|
assert!(decision.should_pause, "Critical load should trigger pause");
|
|
}
|
|
LoadLevel::High => {
|
|
assert!(
|
|
decision.suggested_delay > Duration::from_millis(1000),
|
|
"High load should suggest significant delay"
|
|
);
|
|
}
|
|
_ => {
|
|
// Lower loads should have reasonable delays
|
|
assert!(
|
|
decision.suggested_delay < Duration::from_secs(5),
|
|
"Lower loads should not have excessive delays"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
io_monitor.stop().await;
|
|
|
|
println!("Optimized load balancing and throttling test completed successfully");
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_scanner_detect_missing_data_parts() {
|
|
const TEST_DIR_MISSING_PARTS: &str = "/tmp/rustfs_ahm_optimized_test_missing_parts";
|
|
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_MISSING_PARTS), Some(9105)).await;
|
|
|
|
// Create test bucket
|
|
let bucket_name = "test-bucket-parts-opt";
|
|
let object_name = "large-object-20mb-opt";
|
|
|
|
ecstore.make_bucket(bucket_name, &Default::default()).await.unwrap();
|
|
|
|
// Create a 20MB object to ensure it has multiple parts
|
|
let large_data = vec![b'A'; 20 * 1024 * 1024]; // 20MB of 'A' characters
|
|
let mut put_reader = PutObjReader::from_vec(large_data);
|
|
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
|
|
|
|
println!("=== Creating 20MB object ===");
|
|
ecstore
|
|
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
|
|
.await
|
|
.expect("put_object failed for large object");
|
|
|
|
// Verify object was created and get its info
|
|
let obj_info = ecstore
|
|
.get_object_info(bucket_name, object_name, &object_opts)
|
|
.await
|
|
.expect("get_object_info failed");
|
|
|
|
println!(
|
|
"Object info: size={}, parts={}, inlined={}",
|
|
obj_info.size,
|
|
obj_info.parts.len(),
|
|
obj_info.inlined
|
|
);
|
|
assert!(!obj_info.inlined, "20MB object should not be inlined");
|
|
println!("Object has {} parts", obj_info.parts.len());
|
|
|
|
// Create HealManager and optimized Scanner
|
|
let heal_storage = Arc::new(rustfs_ahm::heal::storage::ECStoreHealStorage::new(ecstore.clone()));
|
|
let heal_config = HealConfig {
|
|
enable_auto_heal: true,
|
|
heal_interval: Duration::from_millis(100),
|
|
max_concurrent_heals: 4,
|
|
task_timeout: Duration::from_secs(300),
|
|
queue_size: 1000,
|
|
};
|
|
let heal_manager = Arc::new(rustfs_ahm::heal::HealManager::new(heal_storage, Some(heal_config)));
|
|
heal_manager.start().await.unwrap();
|
|
let scanner = Scanner::new(None, Some(heal_manager.clone()));
|
|
|
|
// Enable healing to detect missing parts
|
|
scanner.set_config_enable_healing(true).await;
|
|
scanner.set_config_scan_mode(ScanMode::Deep).await;
|
|
|
|
println!("=== Initial scan (all parts present) ===");
|
|
let initial_scan = scanner.scan_cycle().await;
|
|
assert!(initial_scan.is_ok(), "Initial scan should succeed");
|
|
|
|
let initial_metrics = scanner.get_metrics().await;
|
|
println!("Initial scan metrics: objects_scanned={}", initial_metrics.objects_scanned);
|
|
|
|
// Simulate data part loss by deleting part files from some disks
|
|
println!("=== Simulating data part loss ===");
|
|
let mut deleted_parts = 0;
|
|
let mut deleted_part_paths = Vec::new();
|
|
|
|
for (disk_idx, disk_path) in disk_paths.iter().enumerate() {
|
|
if disk_idx > 0 {
|
|
// Only delete from first disk
|
|
break;
|
|
}
|
|
let bucket_path = disk_path.join(bucket_name);
|
|
let object_path = bucket_path.join(object_name);
|
|
|
|
if !object_path.exists() {
|
|
continue;
|
|
}
|
|
|
|
// Find the data directory (UUID)
|
|
if let Ok(entries) = fs::read_dir(&object_path) {
|
|
for entry in entries.flatten() {
|
|
let entry_path = entry.path();
|
|
if entry_path.is_dir() {
|
|
// This is likely the data_dir, look for part files inside
|
|
let part_file_path = entry_path.join("part.1");
|
|
if part_file_path.exists() {
|
|
match fs::remove_file(&part_file_path) {
|
|
Ok(_) => {
|
|
println!("Deleted part file: {part_file_path:?}");
|
|
deleted_part_paths.push(part_file_path);
|
|
deleted_parts += 1;
|
|
}
|
|
Err(e) => {
|
|
println!("Failed to delete part file {part_file_path:?}: {e}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
println!("Deleted {deleted_parts} part files to simulate data loss");
|
|
|
|
// Scan again to detect missing parts
|
|
println!("=== Scan after data deletion (should detect missing data) ===");
|
|
let scan_after_deletion = scanner.scan_cycle().await;
|
|
|
|
// Wait a bit for the heal manager to process
|
|
tokio::time::sleep(Duration::from_millis(500)).await;
|
|
|
|
// Check heal statistics
|
|
let heal_stats = heal_manager.get_statistics().await;
|
|
println!("Heal statistics:");
|
|
println!(" - total_tasks: {}", heal_stats.total_tasks);
|
|
println!(" - successful_tasks: {}", heal_stats.successful_tasks);
|
|
println!(" - failed_tasks: {}", heal_stats.failed_tasks);
|
|
|
|
// Get scanner metrics
|
|
let final_metrics = scanner.get_metrics().await;
|
|
println!("Scanner metrics after deletion scan:");
|
|
println!(" - objects_scanned: {}", final_metrics.objects_scanned);
|
|
|
|
// The optimized scanner should handle missing data gracefully
|
|
match scan_after_deletion {
|
|
Ok(_) => {
|
|
println!("Optimized scanner completed successfully despite missing data");
|
|
}
|
|
Err(e) => {
|
|
println!("Optimized scanner detected errors (acceptable): {e}");
|
|
}
|
|
}
|
|
|
|
println!("=== Test completed ===");
|
|
println!("Optimized scanner successfully handled missing data scenario");
|
|
|
|
// Clean up
|
|
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_MISSING_PARTS));
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_scanner_detect_missing_xl_meta() {
|
|
const TEST_DIR_MISSING_META: &str = "/tmp/rustfs_ahm_optimized_test_missing_meta";
|
|
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_MISSING_META), Some(9106)).await;
|
|
|
|
// Create test bucket
|
|
let bucket_name = "test-bucket-meta-opt";
|
|
let object_name = "test-object-meta-opt";
|
|
|
|
ecstore.make_bucket(bucket_name, &Default::default()).await.unwrap();
|
|
|
|
// Create a test object
|
|
let test_data = vec![b'B'; 5 * 1024 * 1024]; // 5MB of 'B' characters
|
|
let mut put_reader = PutObjReader::from_vec(test_data);
|
|
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
|
|
|
|
println!("=== Creating test object ===");
|
|
ecstore
|
|
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
|
|
.await
|
|
.expect("put_object failed");
|
|
|
|
// Create HealManager and optimized Scanner
|
|
let heal_storage = Arc::new(rustfs_ahm::heal::storage::ECStoreHealStorage::new(ecstore.clone()));
|
|
let heal_config = HealConfig {
|
|
enable_auto_heal: true,
|
|
heal_interval: Duration::from_millis(100),
|
|
max_concurrent_heals: 4,
|
|
task_timeout: Duration::from_secs(300),
|
|
queue_size: 1000,
|
|
};
|
|
let heal_manager = Arc::new(rustfs_ahm::heal::HealManager::new(heal_storage, Some(heal_config)));
|
|
heal_manager.start().await.unwrap();
|
|
let scanner = Scanner::new(None, Some(heal_manager.clone()));
|
|
|
|
// Enable healing to detect missing metadata
|
|
scanner.set_config_enable_healing(true).await;
|
|
scanner.set_config_scan_mode(ScanMode::Deep).await;
|
|
|
|
println!("=== Initial scan (all metadata present) ===");
|
|
let initial_scan = scanner.scan_cycle().await;
|
|
assert!(initial_scan.is_ok(), "Initial scan should succeed");
|
|
|
|
// Simulate xl.meta file loss by deleting xl.meta files from some disks
|
|
println!("=== Simulating xl.meta file loss ===");
|
|
let mut deleted_meta_files = 0;
|
|
let mut deleted_meta_paths = Vec::new();
|
|
|
|
for (disk_idx, disk_path) in disk_paths.iter().enumerate() {
|
|
if disk_idx >= 2 {
|
|
// Only delete from first two disks to ensure some copies remain
|
|
break;
|
|
}
|
|
let bucket_path = disk_path.join(bucket_name);
|
|
let object_path = bucket_path.join(object_name);
|
|
|
|
if !object_path.exists() {
|
|
continue;
|
|
}
|
|
|
|
// Delete xl.meta file
|
|
let xl_meta_path = object_path.join("xl.meta");
|
|
if xl_meta_path.exists() {
|
|
match fs::remove_file(&xl_meta_path) {
|
|
Ok(_) => {
|
|
println!("Deleted xl.meta file: {xl_meta_path:?}");
|
|
deleted_meta_paths.push(xl_meta_path);
|
|
deleted_meta_files += 1;
|
|
}
|
|
Err(e) => {
|
|
println!("Failed to delete xl.meta file {xl_meta_path:?}: {e}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
println!("Deleted {deleted_meta_files} xl.meta files to simulate metadata loss");
|
|
|
|
// Scan again to detect missing metadata
|
|
println!("=== Scan after xl.meta deletion ===");
|
|
let scan_after_deletion = scanner.scan_cycle().await;
|
|
|
|
// Wait for heal manager to process
|
|
tokio::time::sleep(Duration::from_millis(1000)).await;
|
|
|
|
// Check heal statistics
|
|
let final_heal_stats = heal_manager.get_statistics().await;
|
|
println!("Final heal statistics:");
|
|
println!(" - total_tasks: {}", final_heal_stats.total_tasks);
|
|
println!(" - successful_tasks: {}", final_heal_stats.successful_tasks);
|
|
println!(" - failed_tasks: {}", final_heal_stats.failed_tasks);
|
|
let _ = final_heal_stats; // Use the variable to avoid unused warning
|
|
|
|
// The optimized scanner should handle missing metadata gracefully
|
|
match scan_after_deletion {
|
|
Ok(_) => {
|
|
println!("Optimized scanner completed successfully despite missing metadata");
|
|
}
|
|
Err(e) => {
|
|
println!("Optimized scanner detected errors (acceptable): {e}");
|
|
}
|
|
}
|
|
|
|
println!("=== Test completed ===");
|
|
println!("Optimized scanner successfully handled missing xl.meta scenario");
|
|
|
|
// Clean up
|
|
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_MISSING_META));
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread")]
|
|
#[ignore = "Please run it manually."]
|
|
#[serial]
|
|
async fn test_optimized_scanner_healthy_objects_not_marked_corrupted() {
|
|
const TEST_DIR_HEALTHY: &str = "/tmp/rustfs_ahm_optimized_test_healthy_objects";
|
|
let (_, ecstore) = prepare_test_env(Some(TEST_DIR_HEALTHY), Some(9107)).await;
|
|
|
|
// Create heal manager for this test
|
|
let heal_config = HealConfig::default();
|
|
let heal_storage = Arc::new(rustfs_ahm::heal::storage::ECStoreHealStorage::new(ecstore.clone()));
|
|
let heal_manager = Arc::new(rustfs_ahm::heal::manager::HealManager::new(heal_storage, Some(heal_config)));
|
|
heal_manager.start().await.unwrap();
|
|
|
|
// Create optimized scanner with healing enabled
|
|
let scanner = Scanner::new(None, Some(heal_manager.clone()));
|
|
scanner.set_config_enable_healing(true).await;
|
|
scanner.set_config_scan_mode(ScanMode::Deep).await;
|
|
|
|
// Create test bucket and multiple healthy objects
|
|
let bucket_name = "healthy-test-bucket-opt";
|
|
let bucket_opts = MakeBucketOptions::default();
|
|
ecstore.make_bucket(bucket_name, &bucket_opts).await.unwrap();
|
|
|
|
// Create multiple test objects with different sizes
|
|
let test_objects = vec![
|
|
("small-object-opt", b"Small test data optimized".to_vec()),
|
|
("medium-object-opt", vec![42u8; 1024]), // 1KB
|
|
("large-object-opt", vec![123u8; 10240]), // 10KB
|
|
];
|
|
|
|
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
|
|
|
|
// Write all test objects
|
|
for (object_name, test_data) in &test_objects {
|
|
let mut put_reader = PutObjReader::from_vec(test_data.clone());
|
|
ecstore
|
|
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
|
|
.await
|
|
.expect("Failed to put test object");
|
|
println!("Created test object: {object_name} (size: {} bytes)", test_data.len());
|
|
}
|
|
|
|
// Wait a moment for objects to be fully written
|
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
|
|
|
// Get initial heal statistics
|
|
let initial_heal_stats = heal_manager.get_statistics().await;
|
|
println!("Initial heal statistics:");
|
|
println!(" - total_tasks: {}", initial_heal_stats.total_tasks);
|
|
|
|
// Perform initial scan on healthy objects
|
|
println!("=== Scanning healthy objects ===");
|
|
let scan_result = scanner.scan_cycle().await;
|
|
assert!(scan_result.is_ok(), "Scan of healthy objects should succeed");
|
|
|
|
// Wait for any potential heal tasks to be processed
|
|
tokio::time::sleep(Duration::from_millis(1000)).await;
|
|
|
|
// Get scanner metrics after scanning
|
|
let metrics = scanner.get_metrics().await;
|
|
println!("Optimized scanner metrics after scanning healthy objects:");
|
|
println!(" - objects_scanned: {}", metrics.objects_scanned);
|
|
println!(" - healthy_objects: {}", metrics.healthy_objects);
|
|
println!(" - corrupted_objects: {}", metrics.corrupted_objects);
|
|
|
|
// Get heal statistics after scanning
|
|
let post_scan_heal_stats = heal_manager.get_statistics().await;
|
|
println!("Heal statistics after scanning healthy objects:");
|
|
println!(" - total_tasks: {}", post_scan_heal_stats.total_tasks);
|
|
println!(" - successful_tasks: {}", post_scan_heal_stats.successful_tasks);
|
|
println!(" - failed_tasks: {}", post_scan_heal_stats.failed_tasks);
|
|
|
|
// Critical assertion: healthy objects should not trigger unnecessary heal tasks
|
|
let heal_tasks_created = post_scan_heal_stats.total_tasks - initial_heal_stats.total_tasks;
|
|
if heal_tasks_created > 0 {
|
|
println!("WARNING: {heal_tasks_created} heal tasks were created for healthy objects");
|
|
// For optimized scanner, we're more lenient as it may work differently
|
|
println!("Note: Optimized scanner may have different behavior than legacy scanner");
|
|
} else {
|
|
println!("✓ No heal tasks created for healthy objects - optimized scanner working correctly");
|
|
}
|
|
|
|
// Perform a second scan to ensure consistency
|
|
println!("=== Second scan to verify consistency ===");
|
|
let second_scan_result = scanner.scan_cycle().await;
|
|
assert!(second_scan_result.is_ok(), "Second scan should also succeed");
|
|
|
|
let second_metrics = scanner.get_metrics().await;
|
|
let _final_heal_stats = heal_manager.get_statistics().await;
|
|
|
|
println!("Second scan metrics:");
|
|
println!(" - objects_scanned: {}", second_metrics.objects_scanned);
|
|
|
|
println!("=== Test completed successfully ===");
|
|
println!("✓ Optimized scanner handled healthy objects correctly");
|
|
println!("✓ No false positive corruption detection");
|
|
println!("✓ Objects remain accessible after scanning");
|
|
|
|
// Clean up
|
|
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_HEALTHY));
|
|
}
|