From ab752458ce431c6397175d167beee2ea00507d3e Mon Sep 17 00:00:00 2001 From: weisd Date: Mon, 5 Jan 2026 15:57:15 +0800 Subject: [PATCH] Fix Path Traversal and Enhance Object Validation (#1387) --- crates/ecstore/src/bucket/utils.rs | 421 ++++++++++++++++++++++- crates/ecstore/src/disk/disk_store.rs | 25 +- crates/ecstore/src/disk/error.rs | 6 + crates/ecstore/src/disk/local.rs | 307 +++++++++++------ crates/ecstore/src/store.rs | 271 +-------------- crates/ecstore/src/store_list_objects.rs | 2 +- crates/utils/src/path.rs | 329 +++++++++++++++++- rustfs/src/admin/handlers.rs | 2 +- 8 files changed, 967 insertions(+), 396 deletions(-) diff --git a/crates/ecstore/src/bucket/utils.rs b/crates/ecstore/src/bucket/utils.rs index 98e0b119..a012798b 100644 --- a/crates/ecstore/src/bucket/utils.rs +++ b/crates/ecstore/src/bucket/utils.rs @@ -13,7 +13,8 @@ // limitations under the License. use crate::disk::RUSTFS_META_BUCKET; -use crate::error::{Error, Result}; +use crate::error::{Error, Result, StorageError}; +use rustfs_utils::path::SLASH_SEPARATOR; use s3s::xml; pub fn is_meta_bucketname(name: &str) -> bool { @@ -21,6 +22,7 @@ pub fn is_meta_bucketname(name: &str) -> bool { } use regex::Regex; +use tracing::instrument; lazy_static::lazy_static! { static ref VALID_BUCKET_NAME: Regex = Regex::new(r"^[A-Za-z0-9][A-Za-z0-9\.\-\_\:]{1,61}[A-Za-z0-9]$").unwrap(); @@ -113,3 +115,420 @@ pub fn serialize(val: &T) -> xml::SerResult> { } Ok(buf) } + +pub fn has_bad_path_component(path: &str) -> bool { + let n = path.len(); + if n > 32 << 10 { + // At 32K we are beyond reasonable. + return true; + } + + let bytes = path.as_bytes(); + let mut i = 0; + + // Skip leading slashes (for sake of Windows \ is included as well) + while i < n && (bytes[i] == b'/' || bytes[i] == b'\\') { + i += 1; + } + + while i < n { + // Find the next segment + let start = i; + while i < n && bytes[i] != b'/' && bytes[i] != b'\\' { + i += 1; + } + + // Trim whitespace of segment + let mut segment_start = start; + let mut segment_end = i; + + while segment_start < segment_end && bytes[segment_start].is_ascii_whitespace() { + segment_start += 1; + } + while segment_end > segment_start && bytes[segment_end - 1].is_ascii_whitespace() { + segment_end -= 1; + } + + // Check for ".." or "." + match segment_end - segment_start { + 2 if segment_start + 1 < n && bytes[segment_start] == b'.' && bytes[segment_start + 1] == b'.' => { + return true; + } + 1 if bytes[segment_start] == b'.' => { + return true; + } + _ => {} + } + + if i < n { + i += 1; + } + } + + false +} + +pub fn is_valid_object_prefix(object: &str) -> bool { + if has_bad_path_component(object) { + return false; + } + + if !object.is_char_boundary(0) || std::str::from_utf8(object.as_bytes()).is_err() { + return false; + } + + if object.contains("//") { + return false; + } + + // This is valid for AWS S3 but it will never + // work with file systems, we will reject here + // to return object name invalid rather than + // a cryptic error from the file system. + !object.contains('\0') +} + +pub fn is_valid_object_name(object: &str) -> bool { + // Implement object name validation + if object.is_empty() { + return false; + } + + if object.ends_with(SLASH_SEPARATOR) { + return false; + } + + is_valid_object_prefix(object) +} + +pub fn check_object_name_for_length_and_slash(bucket: &str, object: &str) -> Result<()> { + if object.len() > 1024 { + return Err(StorageError::ObjectNameTooLong(bucket.to_owned(), object.to_owned())); + } + + if object.starts_with(SLASH_SEPARATOR) { + return Err(StorageError::ObjectNamePrefixAsSlash(bucket.to_owned(), object.to_owned())); + } + + #[cfg(target_os = "windows")] + { + if object.contains(':') + || object.contains('*') + || object.contains('?') + || object.contains('"') + || object.contains('|') + || object.contains('<') + || object.contains('>') + // || object.contains('\\') + { + return Err(StorageError::ObjectNameInvalid(bucket.to_owned(), object.to_owned())); + } + } + + Ok(()) +} + +pub fn check_copy_obj_args(bucket: &str, object: &str) -> Result<()> { + check_bucket_and_object_names(bucket, object) +} + +pub fn check_get_obj_args(bucket: &str, object: &str) -> Result<()> { + check_bucket_and_object_names(bucket, object) +} + +pub fn check_del_obj_args(bucket: &str, object: &str) -> Result<()> { + check_bucket_and_object_names(bucket, object) +} + +pub fn check_bucket_and_object_names(bucket: &str, object: &str) -> Result<()> { + if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { + return Err(StorageError::BucketNameInvalid(bucket.to_string())); + } + + if object.is_empty() { + return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); + } + + if !is_valid_object_prefix(object) { + return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); + } + + // if cfg!(target_os = "windows") && object.contains('\\') { + // return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); + // } + + Ok(()) +} + +pub fn check_list_objs_args(bucket: &str, prefix: &str, _marker: &Option) -> Result<()> { + if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { + return Err(StorageError::BucketNameInvalid(bucket.to_string())); + } + + if !is_valid_object_prefix(prefix) { + return Err(StorageError::ObjectNameInvalid(bucket.to_string(), prefix.to_string())); + } + + Ok(()) +} + +pub fn check_list_multipart_args( + bucket: &str, + prefix: &str, + key_marker: &Option, + upload_id_marker: &Option, + _delimiter: &Option, +) -> Result<()> { + check_list_objs_args(bucket, prefix, key_marker)?; + + if let Some(upload_id_marker) = upload_id_marker { + if let Some(key_marker) = key_marker + && key_marker.ends_with('/') + { + return Err(StorageError::InvalidUploadIDKeyCombination( + upload_id_marker.to_string(), + key_marker.to_string(), + )); + } + + if let Err(_e) = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(upload_id_marker.as_bytes()) { + return Err(StorageError::MalformedUploadID(upload_id_marker.to_owned())); + } + } + + Ok(()) +} + +pub fn check_object_args(bucket: &str, object: &str) -> Result<()> { + if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { + return Err(StorageError::BucketNameInvalid(bucket.to_string())); + } + + check_object_name_for_length_and_slash(bucket, object)?; + + if !is_valid_object_name(object) { + return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); + } + + Ok(()) +} + +pub fn check_new_multipart_args(bucket: &str, object: &str) -> Result<()> { + check_object_args(bucket, object) +} + +pub fn check_multipart_object_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { + if let Err(e) = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(upload_id.as_bytes()) { + return Err(StorageError::MalformedUploadID(format!("{bucket}/{object}-{upload_id},err:{e}"))); + }; + check_object_args(bucket, object) +} + +pub fn check_put_object_part_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { + check_multipart_object_args(bucket, object, upload_id) +} + +pub fn check_list_parts_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { + check_multipart_object_args(bucket, object, upload_id) +} + +pub fn check_complete_multipart_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { + check_multipart_object_args(bucket, object, upload_id) +} + +pub fn check_abort_multipart_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { + check_multipart_object_args(bucket, object, upload_id) +} + +#[instrument(level = "debug")] +pub fn check_put_object_args(bucket: &str, object: &str) -> Result<()> { + if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { + return Err(StorageError::BucketNameInvalid(bucket.to_string())); + } + + check_object_name_for_length_and_slash(bucket, object)?; + + if object.is_empty() || !is_valid_object_prefix(object) { + return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Test validation functions + #[test] + fn test_is_valid_object_name() { + // Valid cases + assert!(is_valid_object_name("valid-object-name")); + assert!(is_valid_object_name("object/with/slashes")); + assert!(is_valid_object_name("object with spaces")); + assert!(is_valid_object_name("object_with_underscores")); + assert!(is_valid_object_name("object.with.dots")); + assert!(is_valid_object_name("single")); + assert!(is_valid_object_name("file.txt")); + assert!(is_valid_object_name("path/to/file.txt")); + assert!(is_valid_object_name("a/b/c/d/e/f")); + assert!(is_valid_object_name("object-123")); + assert!(is_valid_object_name("object(1)")); + assert!(is_valid_object_name("object[1]")); + assert!(is_valid_object_name("object@domain.com")); + + // Invalid cases - empty string + assert!(!is_valid_object_name("")); + + // Invalid cases - ends with slash (object names cannot end with slash) + assert!(!is_valid_object_name("object/")); + assert!(!is_valid_object_name("path/to/file/")); + assert!(!is_valid_object_name("ends/with/slash/")); + + // Invalid cases - bad path components (inherited from is_valid_object_prefix) + assert!(!is_valid_object_name(".")); + assert!(!is_valid_object_name("..")); + assert!(!is_valid_object_name("object/..")); + assert!(!is_valid_object_name("object/.")); + assert!(!is_valid_object_name("../object")); + assert!(!is_valid_object_name("./object")); + assert!(!is_valid_object_name("path/../other")); + assert!(!is_valid_object_name("path/./other")); + assert!(!is_valid_object_name("a/../b/../c")); + assert!(!is_valid_object_name("a/./b/./c")); + + // Invalid cases - double slashes + assert!(!is_valid_object_name("object//with//double//slashes")); + assert!(!is_valid_object_name("//leading/double/slash")); + assert!(!is_valid_object_name("trailing/double/slash//")); + + // Invalid cases - null characters + assert!(!is_valid_object_name("object\x00with\x00null")); + assert!(!is_valid_object_name("object\x00")); + assert!(!is_valid_object_name("\x00object")); + + // Invalid cases - overly long path (>32KB) + let long_path = "a/".repeat(16385); // 16385 * 2 = 32770 bytes, over 32KB (32768) + assert!(!is_valid_object_name(&long_path)); + + // Valid cases - prefixes that are valid for object names too + assert!(is_valid_object_name("prefix")); + assert!(is_valid_object_name("deep/nested/object")); + assert!(is_valid_object_name("normal_object")); + } + + #[test] + fn test_is_valid_object_prefix() { + // Valid cases + assert!(is_valid_object_prefix("valid-prefix")); + assert!(is_valid_object_prefix("")); + assert!(is_valid_object_prefix("prefix/with/slashes")); + assert!(is_valid_object_prefix("prefix/")); + assert!(is_valid_object_prefix("deep/nested/prefix/")); + assert!(is_valid_object_prefix("normal-prefix")); + assert!(is_valid_object_prefix("prefix_with_underscores")); + assert!(is_valid_object_prefix("prefix.with.dots")); + + // Invalid cases - bad path components + assert!(!is_valid_object_prefix(".")); + assert!(!is_valid_object_prefix("..")); + assert!(!is_valid_object_prefix("prefix/..")); + assert!(!is_valid_object_prefix("prefix/.")); + assert!(!is_valid_object_prefix("../prefix")); + assert!(!is_valid_object_prefix("./prefix")); + assert!(!is_valid_object_prefix("prefix/../other")); + assert!(!is_valid_object_prefix("prefix/./other")); + assert!(!is_valid_object_prefix("a/../b/../c")); + assert!(!is_valid_object_prefix("a/./b/./c")); + + // Invalid cases - double slashes + assert!(!is_valid_object_prefix("prefix//with//double//slashes")); + assert!(!is_valid_object_prefix("//leading/double/slash")); + assert!(!is_valid_object_prefix("trailing/double/slash//")); + + // Invalid cases - null characters + assert!(!is_valid_object_prefix("prefix\x00with\x00null")); + assert!(!is_valid_object_prefix("prefix\x00")); + assert!(!is_valid_object_prefix("\x00prefix")); + + // Invalid cases - overly long path (>32KB) + let long_path = "a/".repeat(16385); // 16385 * 2 = 32770 bytes, over 32KB (32768) + assert!(!is_valid_object_prefix(&long_path)); + } + + #[test] + fn test_check_bucket_and_object_names() { + // Valid names + assert!(check_bucket_and_object_names("valid-bucket", "valid-object").is_ok()); + + // Invalid bucket names + assert!(check_bucket_and_object_names("", "valid-object").is_err()); + assert!(check_bucket_and_object_names("INVALID", "valid-object").is_err()); + + // Invalid object names + assert!(check_bucket_and_object_names("valid-bucket", "").is_err()); + } + + #[test] + fn test_check_list_objs_args() { + assert!(check_list_objs_args("valid-bucket", "", &None).is_ok()); + assert!(check_list_objs_args("", "", &None).is_err()); + assert!(check_list_objs_args("INVALID", "", &None).is_err()); + } + + #[test] + fn test_check_multipart_args() { + assert!(check_new_multipart_args("valid-bucket", "valid-object").is_ok()); + assert!(check_new_multipart_args("", "valid-object").is_err()); + assert!(check_new_multipart_args("valid-bucket", "").is_err()); + + // Use valid base64 encoded upload_id + let valid_upload_id = "dXBsb2FkLWlk"; // base64 encoded "upload-id" + assert!(check_multipart_object_args("valid-bucket", "valid-object", valid_upload_id).is_ok()); + assert!(check_multipart_object_args("", "valid-object", valid_upload_id).is_err()); + assert!(check_multipart_object_args("valid-bucket", "", valid_upload_id).is_err()); + // Empty string is valid base64 (decodes to empty vec), so this should pass bucket/object validation + // but fail on empty upload_id check in the function logic + assert!(check_multipart_object_args("valid-bucket", "valid-object", "").is_ok()); + assert!(check_multipart_object_args("valid-bucket", "valid-object", "invalid-base64!").is_err()); + } + + #[test] + fn test_validation_functions_comprehensive() { + // Test object name validation edge cases + assert!(!is_valid_object_name("")); + assert!(is_valid_object_name("a")); + assert!(is_valid_object_name("test.txt")); + assert!(is_valid_object_name("folder/file.txt")); + assert!(is_valid_object_name("very-long-object-name-with-many-characters")); + + // Test prefix validation + assert!(is_valid_object_prefix("")); + assert!(is_valid_object_prefix("prefix")); + assert!(is_valid_object_prefix("prefix/")); + assert!(is_valid_object_prefix("deep/nested/prefix/")); + } + + #[test] + fn test_argument_validation_comprehensive() { + // Test bucket and object name validation + assert!(check_bucket_and_object_names("test-bucket", "test-object").is_ok()); + assert!(check_bucket_and_object_names("test-bucket", "folder/test-object").is_ok()); + + // Test list objects arguments + assert!(check_list_objs_args("test-bucket", "prefix", &Some("marker".to_string())).is_ok()); + assert!(check_list_objs_args("test-bucket", "", &None).is_ok()); + + // Test multipart upload arguments with valid base64 upload_id + let valid_upload_id = "dXBsb2FkLWlk"; // base64 encoded "upload-id" + assert!(check_put_object_part_args("test-bucket", "test-object", valid_upload_id).is_ok()); + assert!(check_list_parts_args("test-bucket", "test-object", valid_upload_id).is_ok()); + assert!(check_complete_multipart_args("test-bucket", "test-object", valid_upload_id).is_ok()); + assert!(check_abort_multipart_args("test-bucket", "test-object", valid_upload_id).is_ok()); + + // Test put object arguments + assert!(check_put_object_args("test-bucket", "test-object").is_ok()); + assert!(check_put_object_args("", "test-object").is_err()); + assert!(check_put_object_args("test-bucket", "").is_err()); + } +} diff --git a/crates/ecstore/src/disk/disk_store.rs b/crates/ecstore/src/disk/disk_store.rs index 3ccd8c7d..0f8fbf4d 100644 --- a/crates/ecstore/src/disk/disk_store.rs +++ b/crates/ecstore/src/disk/disk_store.rs @@ -30,7 +30,7 @@ use std::{ }; use tokio::{sync::RwLock, time}; use tokio_util::sync::CancellationToken; -use tracing::{debug, info, warn}; +use tracing::{info, warn}; use uuid::Uuid; /// Disk health status constants @@ -44,7 +44,6 @@ pub const SKIP_IF_SUCCESS_BEFORE: Duration = Duration::from_secs(5); pub const CHECK_TIMEOUT_DURATION: Duration = Duration::from_secs(5); lazy_static::lazy_static! { - static ref TEST_OBJ: String = format!("health-check-{}", Uuid::new_v4()); static ref TEST_DATA: Bytes = Bytes::from(vec![42u8; 2048]); static ref TEST_BUCKET: String = ".rustfs.sys/tmp".to_string(); } @@ -256,8 +255,9 @@ impl LocalDiskWrapper { tokio::time::sleep(Duration::from_secs(1)).await; - debug!("health check: performing health check"); - if Self::perform_health_check(disk.clone(), &TEST_BUCKET, &TEST_OBJ, &TEST_DATA, true, CHECK_TIMEOUT_DURATION).await.is_err() && health.swap_ok_to_faulty() { + + let test_obj = format!("health-check-{}", Uuid::new_v4()); + if Self::perform_health_check(disk.clone(), &TEST_BUCKET, &test_obj, &TEST_DATA, true, CHECK_TIMEOUT_DURATION).await.is_err() && health.swap_ok_to_faulty() { // Health check failed, disk is considered faulty health.increment_waiting(); // Balance the increment from failed operation @@ -326,7 +326,7 @@ impl LocalDiskWrapper { Ok(result) => match result { Ok(()) => Ok(()), Err(e) => { - debug!("health check: failed: {:?}", e); + warn!("health check: failed: {:?}", e); if e == DiskError::FaultyDisk { return Err(e); @@ -359,7 +359,8 @@ impl LocalDiskWrapper { return; } - match Self::perform_health_check(disk.clone(), &TEST_BUCKET, &TEST_OBJ, &TEST_DATA, false, CHECK_TIMEOUT_DURATION).await { + let test_obj = format!("health-check-{}", Uuid::new_v4()); + match Self::perform_health_check(disk.clone(), &TEST_BUCKET, &test_obj, &TEST_DATA, false, CHECK_TIMEOUT_DURATION).await { Ok(_) => { info!("Disk {} is back online", disk.to_string()); health.set_ok(); @@ -484,11 +485,15 @@ impl DiskAPI for LocalDiskWrapper { return false; }; - let Some(current_disk_id) = *self.disk_id.read().await else { - return false; - }; + // if disk_id is not set use the current disk_id + if let Some(current_disk_id) = *self.disk_id.read().await { + return current_disk_id == disk_id; + } else { + // if disk_id is not set, update the disk_id + let _ = self.set_disk_id_internal(Some(disk_id)).await; + } - current_disk_id == disk_id + return true; } fn is_local(&self) -> bool { diff --git a/crates/ecstore/src/disk/error.rs b/crates/ecstore/src/disk/error.rs index 9fb5f81c..ebe9df4f 100644 --- a/crates/ecstore/src/disk/error.rs +++ b/crates/ecstore/src/disk/error.rs @@ -145,6 +145,9 @@ pub enum DiskError { #[error("timeout")] Timeout, + + #[error("invalid path")] + InvalidPath, } impl DiskError { @@ -373,6 +376,7 @@ impl Clone for DiskError { DiskError::ShortWrite => DiskError::ShortWrite, DiskError::SourceStalled => DiskError::SourceStalled, DiskError::Timeout => DiskError::Timeout, + DiskError::InvalidPath => DiskError::InvalidPath, } } } @@ -421,6 +425,7 @@ impl DiskError { DiskError::ShortWrite => 0x27, DiskError::SourceStalled => 0x28, DiskError::Timeout => 0x29, + DiskError::InvalidPath => 0x2A, } } @@ -467,6 +472,7 @@ impl DiskError { 0x27 => Some(DiskError::ShortWrite), 0x28 => Some(DiskError::SourceStalled), 0x29 => Some(DiskError::Timeout), + 0x2A => Some(DiskError::InvalidPath), _ => None, } } diff --git a/crates/ecstore/src/disk/local.rs b/crates/ecstore/src/disk/local.rs index e622f41f..7f2c5c48 100644 --- a/crates/ecstore/src/disk/local.rs +++ b/crates/ecstore/src/disk/local.rs @@ -372,7 +372,7 @@ impl LocalDisk { }; // Normalize path components to avoid filesystem calls - let normalized = self.normalize_path_components(&abs_path); + let normalized = normalize_path_components(abs_path.as_path()); // Cache the result { @@ -393,57 +393,39 @@ impl LocalDisk { Ok(normalized) } - // Lightweight path normalization without filesystem calls - fn normalize_path_components(&self, path: &Path) -> PathBuf { - let mut result = PathBuf::new(); - - for component in path.components() { - match component { - std::path::Component::Normal(name) => { - result.push(name); - } - std::path::Component::ParentDir => { - result.pop(); - } - std::path::Component::CurDir => { - // Ignore current directory components - } - std::path::Component::RootDir => { - result.push(component); - } - std::path::Component::Prefix(_prefix) => { - result.push(component); - } - } - } - - result - } - - // Highly optimized object path generation + // Get the absolute path of an object pub fn get_object_path(&self, bucket: &str, key: &str) -> Result { // For high-frequency paths, use faster string concatenation let cache_key = if key.is_empty() { bucket.to_string() } else { - // Use with_capacity to pre-allocate, reducing memory reallocations - let mut path_str = String::with_capacity(bucket.len() + key.len() + 1); - path_str.push_str(bucket); - path_str.push('/'); - path_str.push_str(key); - path_str + path_join_buf(&[bucket, key]) }; - // Fast path: directly calculate based on root, avoiding cache lookup overhead for simple cases - Ok(self.root.join(&cache_key)) + let path = self.root.join(cache_key); + self.check_valid_path(&path)?; + Ok(path) } + // Get the absolute path of a bucket pub fn get_bucket_path(&self, bucket: &str) -> Result { - Ok(self.root.join(bucket)) + let bucket_path = self.root.join(bucket); + self.check_valid_path(&bucket_path)?; + Ok(bucket_path) + } + + // Check if a path is valid + fn check_valid_path>(&self, path: P) -> Result<()> { + let path = normalize_path_components(path); + if path.starts_with(&self.root) { + Ok(()) + } else { + Err(DiskError::InvalidPath) + } } // Batch path generation with single lock acquisition - pub fn get_object_paths_batch(&self, requests: &[(String, String)]) -> Result> { + fn get_object_paths_batch(&self, requests: &[(String, String)]) -> Result> { let mut results = Vec::with_capacity(requests.len()); let mut cache_misses = Vec::new(); @@ -451,7 +433,7 @@ impl LocalDisk { { let cache = self.path_cache.read(); for (i, (bucket, key)) in requests.iter().enumerate() { - let cache_key = format!("{bucket}/{key}"); + let cache_key = path_join_buf(&[bucket, key]); if let Some(cached_path) = cache.get(&cache_key) { results.push((i, cached_path.clone())); } else { @@ -484,12 +466,12 @@ impl LocalDisk { } // Optimized metadata reading with caching - pub async fn read_metadata_cached(&self, path: PathBuf) -> Result> { + async fn read_metadata_cached(&self, path: PathBuf) -> Result> { read_metadata_cached(path).await } // Smart prefetching for related files - pub async fn read_version_with_prefetch( + async fn read_version_with_prefetch( &self, volume: &str, path: &str, @@ -513,7 +495,7 @@ impl LocalDisk { } // Batch metadata reading for multiple objects - pub async fn read_metadata_batch(&self, requests: Vec<(String, String)>) -> Result>>> { + async fn read_metadata_batch(&self, requests: Vec<(String, String)>) -> Result>>> { let paths: Vec = requests .iter() .map(|(bucket, key)| self.get_object_path(bucket, &format!("{}/{}", key, super::STORAGE_FORMAT_FILE))) @@ -542,9 +524,7 @@ impl LocalDisk { // }) // } - #[allow(unreachable_code)] - #[allow(unused_variables)] - pub async fn move_to_trash(&self, delete_path: &PathBuf, recursive: bool, immediate_purge: bool) -> Result<()> { + async fn move_to_trash(&self, delete_path: &PathBuf, recursive: bool, immediate_purge: bool) -> Result<()> { // if recursive { // remove_all_std(delete_path).map_err(to_volume_error)?; // } else { @@ -600,7 +580,7 @@ impl LocalDisk { #[tracing::instrument(level = "debug", skip(self))] #[async_recursion::async_recursion] - pub async fn delete_file( + async fn delete_file( &self, base_path: &PathBuf, delete_path: &PathBuf, @@ -673,7 +653,7 @@ impl LocalDisk { return Err(DiskError::FileNotFound); } - let meta_path = file_path.as_ref().join(Path::new(STORAGE_FORMAT_FILE)); + let meta_path = path_join(&[file_path.as_ref(), Path::new(STORAGE_FORMAT_FILE)]); let res = { if read_data { @@ -853,11 +833,11 @@ impl LocalDisk { async fn write_all_meta(&self, volume: &str, path: &str, buf: &[u8], sync: bool) -> Result<()> { let volume_dir = self.get_bucket_path(volume)?; - let file_path = volume_dir.join(Path::new(&path)); + let file_path = self.get_object_path(volume, path)?; check_path_length(file_path.to_string_lossy().as_ref())?; let tmp_volume_dir = self.get_bucket_path(super::RUSTFS_META_TMP_BUCKET)?; - let tmp_file_path = tmp_volume_dir.join(Path::new(Uuid::new_v4().to_string().as_str())); + let tmp_file_path = self.get_object_path(super::RUSTFS_META_TMP_BUCKET, Uuid::new_v4().to_string().as_str())?; self.write_all_internal(&tmp_file_path, InternalBuf::Ref(buf), sync, &tmp_volume_dir) .await?; @@ -881,22 +861,15 @@ impl LocalDisk { // write_all_private with check_path_length #[tracing::instrument(level = "debug", skip_all)] - pub async fn write_all_private(&self, volume: &str, path: &str, buf: Bytes, sync: bool, skip_parent: &Path) -> Result<()> { - let volume_dir = self.get_bucket_path(volume)?; - let file_path = volume_dir.join(Path::new(&path)); + async fn write_all_private(&self, volume: &str, path: &str, buf: Bytes, sync: bool, skip_parent: &Path) -> Result<()> { + let file_path = self.get_object_path(volume, path)?; check_path_length(file_path.to_string_lossy().as_ref())?; self.write_all_internal(&file_path, InternalBuf::Owned(buf), sync, skip_parent) .await } // write_all_internal do write file - pub async fn write_all_internal( - &self, - file_path: &Path, - data: InternalBuf<'_>, - sync: bool, - skip_parent: &Path, - ) -> Result<()> { + async fn write_all_internal(&self, file_path: &Path, data: InternalBuf<'_>, sync: bool, skip_parent: &Path) -> Result<()> { let flags = O_CREATE | O_WRONLY | O_TRUNC; let mut f = { @@ -1214,7 +1187,7 @@ fn is_root_path(path: impl AsRef) -> bool { } // Filter std::io::ErrorKind::NotFound -pub async fn read_file_exists(path: impl AsRef) -> Result<(Bytes, Option)> { +async fn read_file_exists(path: impl AsRef) -> Result<(Bytes, Option)> { let p = path.as_ref(); let (data, meta) = match read_file_all(&p).await { Ok((data, meta)) => (data, Some(meta)), @@ -1235,7 +1208,7 @@ pub async fn read_file_exists(path: impl AsRef) -> Result<(Bytes, Option) -> Result<(Bytes, Metadata)> { +async fn read_file_all(path: impl AsRef) -> Result<(Bytes, Metadata)> { let p = path.as_ref(); let meta = read_file_metadata(&path).await?; @@ -1244,7 +1217,7 @@ pub async fn read_file_all(path: impl AsRef) -> Result<(Bytes, Metadata)> Ok((data.into(), meta)) } -pub async fn read_file_metadata(p: impl AsRef) -> Result { +async fn read_file_metadata(p: impl AsRef) -> Result { let meta = fs::metadata(&p).await.map_err(to_file_error)?; Ok(meta) @@ -1267,6 +1240,34 @@ fn skip_access_checks(p: impl AsRef) -> bool { false } +// Lightweight path normalization without filesystem calls +fn normalize_path_components(path: impl AsRef) -> PathBuf { + let path = path.as_ref(); + let mut result = PathBuf::new(); + + for component in path.components() { + match component { + std::path::Component::Normal(name) => { + result.push(name); + } + std::path::Component::ParentDir => { + result.pop(); + } + std::path::Component::CurDir => { + // Ignore current directory components + } + std::path::Component::RootDir => { + result.push(component); + } + std::path::Component::Prefix(_prefix) => { + result.push(component); + } + } + } + + result +} + #[async_trait::async_trait] impl DiskAPI for LocalDisk { #[tracing::instrument(skip(self))] @@ -1403,8 +1404,9 @@ impl DiskAPI for LocalDisk { return Ok(format_info.data.clone()); } } - // TOFIX: + let p = self.get_object_path(volume, path)?; + let (data, _) = read_file_all(&p).await?; Ok(data) @@ -1424,7 +1426,8 @@ impl DiskAPI for LocalDisk { return Err(to_access_error(e, DiskError::VolumeAccessDenied).into()); } - let file_path = volume_dir.join(Path::new(&path)); + let file_path = self.get_object_path(volume, path)?; + check_path_length(file_path.to_string_lossy().to_string().as_str())?; self.delete_file(&volume_dir, &file_path, opt.recursive, opt.immediate) @@ -1449,10 +1452,15 @@ impl DiskAPI for LocalDisk { let erasure = &fi.erasure; for (i, part) in fi.parts.iter().enumerate() { let checksum_info = erasure.get_checksum_info(part.number); - let part_path = Path::new(&volume_dir) - .join(path) - .join(fi.data_dir.map_or("".to_string(), |dir| dir.to_string())) - .join(format!("part.{}", part.number)); + let part_path = self.get_object_path( + volume, + path_join_buf(&[ + path, + &fi.data_dir.map_or("".to_string(), |dir| dir.to_string()), + &format!("part.{}", part.number), + ]) + .as_str(), + )?; let err = self .bitrot_verify( &part_path, @@ -1494,9 +1502,14 @@ impl DiskAPI for LocalDisk { .unwrap_or_default(); if let Err(err) = access( - volume_dir - .clone() - .join(path.parent().unwrap_or(Path::new("")).join(format!("part.{num}"))), + self.get_object_path( + bucket, + path_join_buf(&[ + path.parent().unwrap_or(Path::new("")).to_string_lossy().as_ref(), + &format!("part.{num}"), + ]) + .as_str(), + )?, ) .await { @@ -1509,7 +1522,7 @@ impl DiskAPI for LocalDisk { } let data = match self - .read_all_data(bucket, volume_dir.clone(), volume_dir.clone().join(path)) + .read_all_data(bucket, volume_dir.clone(), self.get_object_path(bucket, path.to_string_lossy().as_ref())?) .await { Ok(data) => data, @@ -1542,18 +1555,24 @@ impl DiskAPI for LocalDisk { #[tracing::instrument(skip(self))] async fn check_parts(&self, volume: &str, path: &str, fi: &FileInfo) -> Result { let volume_dir = self.get_bucket_path(volume)?; - check_path_length(volume_dir.join(path).to_string_lossy().as_ref())?; + let file_path = self.get_object_path(volume, path)?; + check_path_length(file_path.to_string_lossy().as_ref())?; let mut resp = CheckPartsResp { results: vec![0; fi.parts.len()], }; for (i, part) in fi.parts.iter().enumerate() { - let file_path = Path::new(&volume_dir) - .join(path) - .join(fi.data_dir.map_or("".to_string(), |dir| dir.to_string())) - .join(format!("part.{}", part.number)); + let part_path = self.get_object_path( + volume, + path_join_buf(&[ + path, + &fi.data_dir.map_or("".to_string(), |dir| dir.to_string()), + &format!("part.{}", part.number), + ]) + .as_str(), + )?; - match lstat(&file_path).await { + match lstat(&part_path).await { Ok(st) => { if st.is_dir() { resp.results[i] = CHECK_PART_FILE_NOT_FOUND; @@ -1611,8 +1630,8 @@ impl DiskAPI for LocalDisk { return Err(DiskError::FileAccessDenied); } - let src_file_path = src_volume_dir.join(Path::new(src_path)); - let dst_file_path = dst_volume_dir.join(Path::new(dst_path)); + let src_file_path = self.get_object_path(src_volume, src_path)?; + let dst_file_path = self.get_object_path(dst_volume, dst_path)?; // warn!("rename_part src_file_path:{:?}, dst_file_path:{:?}", &src_file_path, &dst_file_path); @@ -1673,11 +1692,11 @@ impl DiskAPI for LocalDisk { return Err(Error::from(DiskError::FileAccessDenied)); } - let src_file_path = src_volume_dir.join(Path::new(&src_path)); - check_path_length(src_file_path.to_string_lossy().to_string().as_str())?; + let src_file_path = self.get_object_path(src_volume, src_path)?; + check_path_length(src_file_path.to_string_lossy().as_ref())?; - let dst_file_path = dst_volume_dir.join(Path::new(&dst_path)); - check_path_length(dst_file_path.to_string_lossy().to_string().as_str())?; + let dst_file_path = self.get_object_path(dst_volume, dst_path)?; + check_path_length(dst_file_path.to_string_lossy().as_ref())?; if src_is_dir { let meta_op = match lstat(&src_file_path).await { @@ -1722,8 +1741,8 @@ impl DiskAPI for LocalDisk { } let volume_dir = self.get_bucket_path(volume)?; - let file_path = volume_dir.join(Path::new(&path)); - check_path_length(file_path.to_string_lossy().to_string().as_str())?; + let file_path = self.get_object_path(volume, path)?; + check_path_length(file_path.to_string_lossy().as_ref())?; // TODO: writeAllDirect io.copy // info!("file_path: {:?}", file_path); @@ -1749,8 +1768,8 @@ impl DiskAPI for LocalDisk { .map_err(|e| to_access_error(e, DiskError::VolumeAccessDenied))?; } - let file_path = volume_dir.join(Path::new(&path)); - check_path_length(file_path.to_string_lossy().to_string().as_str())?; + let file_path = self.get_object_path(volume, path)?; + check_path_length(file_path.to_string_lossy().as_ref())?; let f = self.open_file(file_path, O_CREATE | O_APPEND | O_WRONLY, volume_dir).await?; @@ -1768,8 +1787,8 @@ impl DiskAPI for LocalDisk { .map_err(|e| to_access_error(e, DiskError::VolumeAccessDenied))?; } - let file_path = volume_dir.join(Path::new(&path)); - check_path_length(file_path.to_string_lossy().to_string().as_str())?; + let file_path = self.get_object_path(volume, path)?; + check_path_length(file_path.to_string_lossy().as_ref())?; let f = self.open_file(file_path, O_RDONLY, volume_dir).await?; @@ -1785,8 +1804,8 @@ impl DiskAPI for LocalDisk { .map_err(|e| to_access_error(e, DiskError::VolumeAccessDenied))?; } - let file_path = volume_dir.join(Path::new(&path)); - check_path_length(file_path.to_string_lossy().to_string().as_str())?; + let file_path = self.get_object_path(volume, path)?; + check_path_length(file_path.to_string_lossy().as_ref())?; let mut f = self.open_file(file_path, O_RDONLY, volume_dir).await?; @@ -1819,7 +1838,7 @@ impl DiskAPI for LocalDisk { } let volume_dir = self.get_bucket_path(volume)?; - let dir_path_abs = volume_dir.join(Path::new(&dir_path.trim_start_matches(SLASH_SEPARATOR))); + let dir_path_abs = self.get_object_path(volume, dir_path.trim_start_matches(SLASH_SEPARATOR))?; let entries = match os::read_dir(&dir_path_abs, count).await { Ok(res) => res, @@ -1923,8 +1942,8 @@ impl DiskAPI for LocalDisk { } // xl.meta path - let src_file_path = src_volume_dir.join(Path::new(format!("{}/{}", &src_path, STORAGE_FORMAT_FILE).as_str())); - let dst_file_path = dst_volume_dir.join(Path::new(format!("{}/{}", &dst_path, STORAGE_FORMAT_FILE).as_str())); + let src_file_path = self.get_object_path(src_volume, format!("{}/{}", &src_path, STORAGE_FORMAT_FILE).as_str())?; + let dst_file_path = self.get_object_path(dst_volume, format!("{}/{}", &dst_path, STORAGE_FORMAT_FILE).as_str())?; // data_dir path let has_data_dir_path = { @@ -1938,12 +1957,14 @@ impl DiskAPI for LocalDisk { }; if let Some(data_dir) = has_data_dir { - let src_data_path = src_volume_dir.join(Path::new( + let src_data_path = self.get_object_path( + src_volume, rustfs_utils::path::retain_slash(format!("{}/{}", &src_path, data_dir).as_str()).as_str(), - )); - let dst_data_path = dst_volume_dir.join(Path::new( + )?; + let dst_data_path = self.get_object_path( + dst_volume, rustfs_utils::path::retain_slash(format!("{}/{}", &dst_path, data_dir).as_str()).as_str(), - )); + )?; Some((src_data_path, dst_data_path)) } else { @@ -2146,7 +2167,7 @@ impl DiskAPI for LocalDisk { } for path in paths.iter() { - let file_path = volume_dir.join(Path::new(path)); + let file_path = self.get_object_path(volume, path)?; check_path_length(file_path.to_string_lossy().as_ref())?; @@ -2159,8 +2180,7 @@ impl DiskAPI for LocalDisk { #[tracing::instrument(skip(self))] async fn update_metadata(&self, volume: &str, path: &str, fi: FileInfo, opts: &UpdateMetadataOpts) -> Result<()> { if !fi.metadata.is_empty() { - let volume_dir = self.get_bucket_path(volume)?; - let file_path = volume_dir.join(Path::new(&path)); + let file_path = self.get_object_path(volume, path)?; check_path_length(file_path.to_string_lossy().as_ref())?; @@ -2273,11 +2293,11 @@ impl DiskAPI for LocalDisk { let volume_dir = self.get_bucket_path(volume)?; - let file_path = volume_dir.join(Path::new(&path)); + let file_path = self.get_object_path(volume, path)?; check_path_length(file_path.to_string_lossy().as_ref())?; - let xl_path = file_path.join(Path::new(STORAGE_FORMAT_FILE)); + let xl_path = path_join(&[file_path.as_path(), Path::new(STORAGE_FORMAT_FILE)]); let buf = match self.read_all_data(volume, &volume_dir, &xl_path).await { Ok(res) => res, Err(err) => { @@ -2304,7 +2324,7 @@ impl DiskAPI for LocalDisk { let vid = fi.version_id.unwrap_or_default(); let _ = meta.data.remove(vec![vid, uuid])?; - let old_path = file_path.join(Path::new(uuid.to_string().as_str())); + let old_path = path_join(&[file_path.as_path(), Path::new(uuid.to_string().as_str())]); check_path_length(old_path.to_string_lossy().as_ref())?; if let Err(err) = self.move_to_trash(&old_path, true, false).await @@ -2326,9 +2346,14 @@ impl DiskAPI for LocalDisk { if let Some(old_data_dir) = opts.old_data_dir && opts.undo_write { - let src_path = - file_path.join(Path::new(format!("{old_data_dir}{SLASH_SEPARATOR}{STORAGE_FORMAT_FILE_BACKUP}").as_str())); - let dst_path = file_path.join(Path::new(format!("{path}{SLASH_SEPARATOR}{STORAGE_FORMAT_FILE}").as_str())); + let src_path = path_join(&[ + file_path.as_path(), + Path::new(format!("{old_data_dir}{SLASH_SEPARATOR}{STORAGE_FORMAT_FILE_BACKUP}").as_str()), + ]); + let dst_path = path_join(&[ + file_path.as_path(), + Path::new(format!("{path}{SLASH_SEPARATOR}{STORAGE_FORMAT_FILE}").as_str()), + ]); return rename_all(src_path, dst_path, file_path).await; } @@ -2480,7 +2505,7 @@ mod test { RUSTFS_META_BUCKET, ]; - let paths: Vec<_> = vols.iter().map(|v| Path::new(v).join("test")).collect(); + let paths: Vec<_> = vols.iter().map(|v| path_join(&[Path::new(v), Path::new("test")])).collect(); for p in paths.iter() { assert!(skip_access_checks(p.to_str().unwrap())); @@ -2782,4 +2807,66 @@ mod test { #[cfg(not(windows))] assert!(!is_root_path("\\")); } + + #[test] + fn test_normalize_path_components() { + // Test basic relative path + assert_eq!(normalize_path_components("a/b/c"), PathBuf::from("a/b/c")); + + // Test path with current directory components (should be ignored) + assert_eq!(normalize_path_components("a/./b/./c"), PathBuf::from("a/b/c")); + + // Test path with parent directory components + assert_eq!(normalize_path_components("a/b/../c"), PathBuf::from("a/c")); + + // Test path with multiple parent directory components + assert_eq!(normalize_path_components("a/b/c/../../d"), PathBuf::from("a/d")); + + // Test path that goes beyond root + assert_eq!(normalize_path_components("a/../../../b"), PathBuf::from("b")); + + // Test absolute path + assert_eq!(normalize_path_components("/a/b/c"), PathBuf::from("/a/b/c")); + + // Test absolute path with parent components + assert_eq!(normalize_path_components("/a/b/../c"), PathBuf::from("/a/c")); + + // Test complex path with mixed components + assert_eq!(normalize_path_components("a/./b/../c/./d/../e"), PathBuf::from("a/c/e")); + + // Test path with only current directory + assert_eq!(normalize_path_components("."), PathBuf::from("")); + + // Test path with only parent directory + assert_eq!(normalize_path_components(".."), PathBuf::from("")); + + // Test path with multiple current directories + assert_eq!(normalize_path_components("./././a"), PathBuf::from("a")); + + // Test path with multiple parent directories + assert_eq!(normalize_path_components("../../a"), PathBuf::from("a")); + + // Test empty path + assert_eq!(normalize_path_components(""), PathBuf::from("")); + + // Test path starting with current directory + assert_eq!(normalize_path_components("./a/b"), PathBuf::from("a/b")); + + // Test path starting with parent directory + assert_eq!(normalize_path_components("../a/b"), PathBuf::from("a/b")); + + // Test complex case with multiple levels of parent navigation + assert_eq!(normalize_path_components("a/b/c/../../../d/e/f/../../g"), PathBuf::from("d/g")); + + // Test path that completely cancels out + assert_eq!(normalize_path_components("a/b/../../../c/d/../../.."), PathBuf::from("")); + + // Test Windows-style paths (if applicable) + #[cfg(windows)] + { + assert_eq!(normalize_path_components("C:\\a\\b\\c"), PathBuf::from("C:\\a\\b\\c")); + + assert_eq!(normalize_path_components("C:\\a\\..\\b"), PathBuf::from("C:\\b")); + } + } } diff --git a/crates/ecstore/src/store.rs b/crates/ecstore/src/store.rs index 4d4ed7a4..c22aa4fa 100644 --- a/crates/ecstore/src/store.rs +++ b/crates/ecstore/src/store.rs @@ -16,6 +16,17 @@ use crate::bucket::lifecycle::bucket_lifecycle_ops::init_background_expiry; use crate::bucket::metadata_sys::{self, set_bucket_metadata}; +use crate::bucket::utils::check_abort_multipart_args; +use crate::bucket::utils::check_complete_multipart_args; +use crate::bucket::utils::check_copy_obj_args; +use crate::bucket::utils::check_del_obj_args; +use crate::bucket::utils::check_get_obj_args; +use crate::bucket::utils::check_list_multipart_args; +use crate::bucket::utils::check_list_parts_args; +use crate::bucket::utils::check_new_multipart_args; +use crate::bucket::utils::check_object_args; +use crate::bucket::utils::check_put_object_args; +use crate::bucket::utils::check_put_object_part_args; use crate::bucket::utils::{check_valid_bucket_name, check_valid_bucket_name_strict, is_meta_bucketname}; use crate::config::GLOBAL_STORAGE_CLASS; use crate::config::storageclass; @@ -59,7 +70,7 @@ use rustfs_common::heal_channel::{HealItemType, HealOpts}; use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT}; use rustfs_filemeta::FileInfo; use rustfs_madmin::heal_commands::HealResultItem; -use rustfs_utils::path::{SLASH_SEPARATOR, decode_dir_object, encode_dir_object, path_join_buf}; +use rustfs_utils::path::{decode_dir_object, encode_dir_object, path_join_buf}; use s3s::dto::{BucketVersioningStatus, ObjectLockConfiguration, ObjectLockEnabled, VersioningConfiguration}; use std::cmp::Ordering; use std::net::SocketAddr; @@ -2341,172 +2352,6 @@ async fn init_local_peer(endpoint_pools: &EndpointServerPools, host: &String, po *GLOBAL_LOCAL_NODE_NAME.write().await = peer_set[0].clone(); } -pub fn is_valid_object_prefix(_object: &str) -> bool { - // Implement object prefix validation - // !object.is_empty() // Placeholder - // FIXME: TODO: - true -} - -fn is_valid_object_name(object: &str) -> bool { - // Implement object name validation - !object.is_empty() // Placeholder -} - -fn check_object_name_for_length_and_slash(bucket: &str, object: &str) -> Result<()> { - if object.len() > 1024 { - return Err(StorageError::ObjectNameTooLong(bucket.to_owned(), object.to_owned())); - } - - if object.starts_with(SLASH_SEPARATOR) { - return Err(StorageError::ObjectNamePrefixAsSlash(bucket.to_owned(), object.to_owned())); - } - - #[cfg(target_os = "windows")] - { - if object.contains(':') - || object.contains('*') - || object.contains('?') - || object.contains('"') - || object.contains('|') - || object.contains('<') - || object.contains('>') - // || object.contains('\\') - { - return Err(StorageError::ObjectNameInvalid(bucket.to_owned(), object.to_owned())); - } - } - - Ok(()) -} - -fn check_copy_obj_args(bucket: &str, object: &str) -> Result<()> { - check_bucket_and_object_names(bucket, object) -} - -fn check_get_obj_args(bucket: &str, object: &str) -> Result<()> { - check_bucket_and_object_names(bucket, object) -} - -fn check_del_obj_args(bucket: &str, object: &str) -> Result<()> { - check_bucket_and_object_names(bucket, object) -} - -fn check_bucket_and_object_names(bucket: &str, object: &str) -> Result<()> { - if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { - return Err(StorageError::BucketNameInvalid(bucket.to_string())); - } - - if object.is_empty() { - return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); - } - - if !is_valid_object_prefix(object) { - return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); - } - - // if cfg!(target_os = "windows") && object.contains('\\') { - // return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); - // } - - Ok(()) -} - -pub fn check_list_objs_args(bucket: &str, prefix: &str, _marker: &Option) -> Result<()> { - if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { - return Err(StorageError::BucketNameInvalid(bucket.to_string())); - } - - if !is_valid_object_prefix(prefix) { - return Err(StorageError::ObjectNameInvalid(bucket.to_string(), prefix.to_string())); - } - - Ok(()) -} - -fn check_list_multipart_args( - bucket: &str, - prefix: &str, - key_marker: &Option, - upload_id_marker: &Option, - _delimiter: &Option, -) -> Result<()> { - check_list_objs_args(bucket, prefix, key_marker)?; - - if let Some(upload_id_marker) = upload_id_marker { - if let Some(key_marker) = key_marker - && key_marker.ends_with('/') - { - return Err(StorageError::InvalidUploadIDKeyCombination( - upload_id_marker.to_string(), - key_marker.to_string(), - )); - } - - if let Err(_e) = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(upload_id_marker.as_bytes()) { - return Err(StorageError::MalformedUploadID(upload_id_marker.to_owned())); - } - } - - Ok(()) -} - -fn check_object_args(bucket: &str, object: &str) -> Result<()> { - if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { - return Err(StorageError::BucketNameInvalid(bucket.to_string())); - } - - check_object_name_for_length_and_slash(bucket, object)?; - - if !is_valid_object_name(object) { - return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); - } - - Ok(()) -} - -fn check_new_multipart_args(bucket: &str, object: &str) -> Result<()> { - check_object_args(bucket, object) -} - -fn check_multipart_object_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { - if let Err(e) = base64_simd::URL_SAFE_NO_PAD.decode_to_vec(upload_id.as_bytes()) { - return Err(StorageError::MalformedUploadID(format!("{bucket}/{object}-{upload_id},err:{e}"))); - }; - check_object_args(bucket, object) -} - -fn check_put_object_part_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { - check_multipart_object_args(bucket, object, upload_id) -} - -fn check_list_parts_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { - check_multipart_object_args(bucket, object, upload_id) -} - -fn check_complete_multipart_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { - check_multipart_object_args(bucket, object, upload_id) -} - -fn check_abort_multipart_args(bucket: &str, object: &str, upload_id: &str) -> Result<()> { - check_multipart_object_args(bucket, object, upload_id) -} - -#[instrument(level = "debug")] -fn check_put_object_args(bucket: &str, object: &str) -> Result<()> { - if !is_meta_bucketname(bucket) && check_valid_bucket_name_strict(bucket).is_err() { - return Err(StorageError::BucketNameInvalid(bucket.to_string())); - } - - check_object_name_for_length_and_slash(bucket, object)?; - - if object.is_empty() || !is_valid_object_prefix(object) { - return Err(StorageError::ObjectNameInvalid(bucket.to_string(), object.to_string())); - } - - Ok(()) -} - pub async fn get_disk_infos(disks: &[Option]) -> Vec> { let opts = &DiskInfoOptions::default(); let mut res = vec![None; disks.len()]; @@ -2622,59 +2467,6 @@ pub async fn has_space_for(dis: &[Option], size: i64) -> Result mod tests { use super::*; - // Test validation functions - #[test] - fn test_is_valid_object_name() { - assert!(is_valid_object_name("valid-object-name")); - assert!(!is_valid_object_name("")); - assert!(is_valid_object_name("object/with/slashes")); - assert!(is_valid_object_name("object with spaces")); - } - - #[test] - fn test_is_valid_object_prefix() { - assert!(is_valid_object_prefix("valid-prefix")); - assert!(is_valid_object_prefix("")); - assert!(is_valid_object_prefix("prefix/with/slashes")); - } - - #[test] - fn test_check_bucket_and_object_names() { - // Valid names - assert!(check_bucket_and_object_names("valid-bucket", "valid-object").is_ok()); - - // Invalid bucket names - assert!(check_bucket_and_object_names("", "valid-object").is_err()); - assert!(check_bucket_and_object_names("INVALID", "valid-object").is_err()); - - // Invalid object names - assert!(check_bucket_and_object_names("valid-bucket", "").is_err()); - } - - #[test] - fn test_check_list_objs_args() { - assert!(check_list_objs_args("valid-bucket", "", &None).is_ok()); - assert!(check_list_objs_args("", "", &None).is_err()); - assert!(check_list_objs_args("INVALID", "", &None).is_err()); - } - - #[test] - fn test_check_multipart_args() { - assert!(check_new_multipart_args("valid-bucket", "valid-object").is_ok()); - assert!(check_new_multipart_args("", "valid-object").is_err()); - assert!(check_new_multipart_args("valid-bucket", "").is_err()); - - // Use valid base64 encoded upload_id - let valid_upload_id = "dXBsb2FkLWlk"; // base64 encoded "upload-id" - assert!(check_multipart_object_args("valid-bucket", "valid-object", valid_upload_id).is_ok()); - assert!(check_multipart_object_args("", "valid-object", valid_upload_id).is_err()); - assert!(check_multipart_object_args("valid-bucket", "", valid_upload_id).is_err()); - // Empty string is valid base64 (decodes to empty vec), so this should pass bucket/object validation - // but fail on empty upload_id check in the function logic - assert!(check_multipart_object_args("valid-bucket", "valid-object", "").is_ok()); - assert!(check_multipart_object_args("valid-bucket", "valid-object", "invalid-base64!").is_err()); - } - #[tokio::test] async fn test_get_disk_infos() { let disks = vec![None, None]; // Empty disks for testing @@ -2768,43 +2560,4 @@ mod tests { } assert_eq!(count, 1); } - - #[test] - fn test_validation_functions_comprehensive() { - // Test object name validation edge cases - assert!(!is_valid_object_name("")); - assert!(is_valid_object_name("a")); - assert!(is_valid_object_name("test.txt")); - assert!(is_valid_object_name("folder/file.txt")); - assert!(is_valid_object_name("very-long-object-name-with-many-characters")); - - // Test prefix validation - assert!(is_valid_object_prefix("")); - assert!(is_valid_object_prefix("prefix")); - assert!(is_valid_object_prefix("prefix/")); - assert!(is_valid_object_prefix("deep/nested/prefix/")); - } - - #[test] - fn test_argument_validation_comprehensive() { - // Test bucket and object name validation - assert!(check_bucket_and_object_names("test-bucket", "test-object").is_ok()); - assert!(check_bucket_and_object_names("test-bucket", "folder/test-object").is_ok()); - - // Test list objects arguments - assert!(check_list_objs_args("test-bucket", "prefix", &Some("marker".to_string())).is_ok()); - assert!(check_list_objs_args("test-bucket", "", &None).is_ok()); - - // Test multipart upload arguments with valid base64 upload_id - let valid_upload_id = "dXBsb2FkLWlk"; // base64 encoded "upload-id" - assert!(check_put_object_part_args("test-bucket", "test-object", valid_upload_id).is_ok()); - assert!(check_list_parts_args("test-bucket", "test-object", valid_upload_id).is_ok()); - assert!(check_complete_multipart_args("test-bucket", "test-object", valid_upload_id).is_ok()); - assert!(check_abort_multipart_args("test-bucket", "test-object", valid_upload_id).is_ok()); - - // Test put object arguments - assert!(check_put_object_args("test-bucket", "test-object").is_ok()); - assert!(check_put_object_args("", "test-object").is_err()); - assert!(check_put_object_args("test-bucket", "").is_err()); - } } diff --git a/crates/ecstore/src/store_list_objects.rs b/crates/ecstore/src/store_list_objects.rs index a1977969..c28a4c42 100644 --- a/crates/ecstore/src/store_list_objects.rs +++ b/crates/ecstore/src/store_list_objects.rs @@ -14,6 +14,7 @@ use crate::StorageAPI; use crate::bucket::metadata_sys::get_versioning_config; +use crate::bucket::utils::check_list_objs_args; use crate::bucket::versioning::VersioningApi; use crate::cache_value::metacache_set::{ListPathRawOptions, list_path_raw}; use crate::disk::error::DiskError; @@ -22,7 +23,6 @@ use crate::error::{ Error, Result, StorageError, is_all_not_found, is_all_volume_not_found, is_err_bucket_not_found, to_object_err, }; use crate::set_disk::SetDisks; -use crate::store::check_list_objs_args; use crate::store_api::{ ListObjectVersionsInfo, ListObjectsInfo, ObjectInfo, ObjectInfoOrErr, ObjectOptions, WalkOptions, WalkVersionsSortOrder, }; diff --git a/crates/utils/src/path.rs b/crates/utils/src/path.rs index e263033c..0eb7c9f7 100644 --- a/crates/utils/src/path.rs +++ b/crates/utils/src/path.rs @@ -74,18 +74,21 @@ pub fn has_prefix(s: &str, prefix: &str) -> bool { s.starts_with(prefix) } -pub fn path_join(elem: &[PathBuf]) -> PathBuf { - let mut joined_path = PathBuf::new(); - - for path in elem { - joined_path.push(path); - } - - joined_path +pub fn path_join>(elem: &[P]) -> PathBuf { + path_join_buf( + elem.iter() + .map(|p| p.as_ref().to_string_lossy().into_owned()) + .collect::>() + .iter() + .map(|s| s.as_str()) + .collect::>() + .as_slice(), + ) + .into() } pub fn path_join_buf(elements: &[&str]) -> String { - let trailing_slash = !elements.is_empty() && elements.last().unwrap().ends_with(SLASH_SEPARATOR); + let trailing_slash = !elements.is_empty() && elements.last().is_some_and(|last| last.ends_with(SLASH_SEPARATOR)); let mut dst = String::new(); let mut added = 0; @@ -100,14 +103,87 @@ pub fn path_join_buf(elements: &[&str]) -> String { } } - let result = dst.to_string(); - let cpath = Path::new(&result).components().collect::(); - let clean_path = cpath.to_string_lossy(); + if path_needs_clean(dst.as_bytes()) { + let mut clean_path = clean(&dst); + if trailing_slash { + clean_path.push_str(SLASH_SEPARATOR); + } + return clean_path; + } if trailing_slash { - return format!("{clean_path}{SLASH_SEPARATOR}"); + dst.push_str(SLASH_SEPARATOR); } - clean_path.to_string() + + dst +} + +/// path_needs_clean returns whether path cleaning may change the path. +/// Will detect all cases that will be cleaned, +/// but may produce false positives on non-trivial paths. +fn path_needs_clean(path: &[u8]) -> bool { + if path.is_empty() { + return true; + } + + let rooted = path[0] == b'/'; + let n = path.len(); + + let (mut r, mut w) = if rooted { (1, 1) } else { (0, 0) }; + + while r < n { + match path[r] { + b if b > 127 => { + // Non ascii. + return true; + } + b'/' => { + // multiple / elements + return true; + } + b'.' => { + if r + 1 == n || path[r + 1] == b'/' { + // . element - assume it has to be cleaned. + return true; + } + if r + 1 < n && path[r + 1] == b'.' && (r + 2 == n || path[r + 2] == b'/') { + // .. element: remove to last / - assume it has to be cleaned. + return true; + } + // Handle single dot case + if r + 1 == n { + // . element - assume it has to be cleaned. + return true; + } + // Copy the dot + w += 1; + r += 1; + } + _ => { + // real path element. + // add slash if needed + if (rooted && w != 1) || (!rooted && w != 0) { + w += 1; + } + // copy element + while r < n && path[r] != b'/' { + w += 1; + r += 1; + } + // allow one slash, not at end + if r < n - 1 && path[r] == b'/' { + r += 1; + } + } + } + } + + // Turn empty string into "." + if w == 0 { + return true; + } + + false } pub fn path_to_bucket_object_with_base_path(bash_path: &str, path: &str) -> (String, String) { @@ -345,4 +421,229 @@ mod tests { assert_eq!(clean("/abc/def/../../.."), "/"); assert_eq!(clean("abc/def/../../../ghi/jkl/../../../mno"), "../../mno"); } + + #[test] + fn test_path_needs_clean() { + struct PathTest { + path: &'static str, + result: &'static str, + } + + let cleantests = vec![ + // Already clean + PathTest { path: "", result: "." }, + PathTest { + path: "abc", + result: "abc", + }, + PathTest { + path: "abc/def", + result: "abc/def", + }, + PathTest { + path: "a/b/c", + result: "a/b/c", + }, + PathTest { path: ".", result: "." }, + PathTest { + path: "..", + result: "..", + }, + PathTest { + path: "../..", + result: "../..", + }, + PathTest { + path: "../../abc", + result: "../../abc", + }, + PathTest { + path: "/abc", + result: "/abc", + }, + PathTest { + path: "/abc/def", + result: "/abc/def", + }, + PathTest { path: "/", result: "/" }, + // Remove trailing slash + PathTest { + path: "abc/", + result: "abc", + }, + PathTest { + path: "abc/def/", + result: "abc/def", + }, + PathTest { + path: "a/b/c/", + result: "a/b/c", + }, + PathTest { path: "./", result: "." }, + PathTest { + path: "../", + result: "..", + }, + PathTest { + path: "../../", + result: "../..", + }, + PathTest { + path: "/abc/", + result: "/abc", + }, + // Remove doubled slash + PathTest { + path: "abc//def//ghi", + result: "abc/def/ghi", + }, + PathTest { + path: "//abc", + result: "/abc", + }, + PathTest { + path: "///abc", + result: "/abc", + }, + PathTest { + path: "//abc//", + result: "/abc", + }, + PathTest { + path: "abc//", + result: "abc", + }, + // Remove . elements + PathTest { + path: "abc/./def", + result: "abc/def", + }, + PathTest { + path: "/./abc/def", + result: "/abc/def", + }, + PathTest { + path: "abc/.", + result: "abc", + }, + // Remove .. elements + PathTest { + path: "abc/def/ghi/../jkl", + result: "abc/def/jkl", + }, + PathTest { + path: "abc/def/../ghi/../jkl", + result: "abc/jkl", + }, + PathTest { + path: "abc/def/..", + result: "abc", + }, + PathTest { + path: "abc/def/../..", + result: ".", + }, + PathTest { + path: "/abc/def/../..", + result: "/", + }, + PathTest { + path: "abc/def/../../..", + result: "..", + }, + PathTest { + path: "/abc/def/../../..", + result: "/", + }, + PathTest { + path: "abc/def/../../../ghi/jkl/../../../mno", + result: "../../mno", + }, + // Combinations + PathTest { + path: "abc/./../def", + result: "def", + }, + PathTest { + path: "abc//./../def", + result: "def", + }, + PathTest { + path: "abc/../../././../def", + result: "../../def", + }, + ]; + + for test in cleantests { + let want = test.path != test.result; + let got = path_needs_clean(test.path.as_bytes()); + if want && !got { + panic!("input: {:?}, want {}, got {}", test.path, want, got); + } + + assert_eq!(clean(test.path), test.result); + } + } + + #[test] + fn test_path_join() { + // Test empty input + let result = path_join::<&str>(&[]); + assert_eq!(result, PathBuf::from(".")); + + // Test single path + let result = path_join(&[PathBuf::from("abc")]); + assert_eq!(result, PathBuf::from("abc")); + + // Test single absolute path + let result = path_join(&[PathBuf::from("/abc")]); + assert_eq!(result, PathBuf::from("/abc")); + + // Test multiple relative paths + let result = path_join(&[PathBuf::from("a"), PathBuf::from("b"), PathBuf::from("c")]); + assert_eq!(result, PathBuf::from("a/b/c")); + + // Test absolute path with relative paths + let result = path_join(&[PathBuf::from("/a"), PathBuf::from("b"), PathBuf::from("c")]); + assert_eq!(result, PathBuf::from("/a/b/c")); + + // Test paths with dots + let result = path_join(&[PathBuf::from("a"), PathBuf::from("."), PathBuf::from("b")]); + assert_eq!(result, PathBuf::from("a/b")); + + // Test paths with double dots + let result = path_join(&[ + PathBuf::from("a"), + PathBuf::from("b"), + PathBuf::from(".."), + PathBuf::from("c"), + ]); + assert_eq!(result, PathBuf::from("a/c")); + + // Test paths that need cleaning + let result = path_join(&[PathBuf::from("a//b"), PathBuf::from("c")]); + assert_eq!(result, PathBuf::from("a/b/c")); + + // Test trailing slash preservation + let result = path_join(&[PathBuf::from("a"), PathBuf::from("b/")]); + assert_eq!(result, PathBuf::from("a/b/")); + + // Test empty path in middle + let result = path_join(&[PathBuf::from("a"), PathBuf::from(""), PathBuf::from("b")]); + assert_eq!(result, PathBuf::from("a/b")); + + // Test multiple absolute paths (should concatenate) + let result = path_join(&[PathBuf::from("/a"), PathBuf::from("/b"), PathBuf::from("c")]); + assert_eq!(result, PathBuf::from("/a/b/c")); + + // Test complex case with various path elements + let result = path_join(&[ + PathBuf::from("a"), + PathBuf::from(".."), + PathBuf::from("b"), + PathBuf::from("."), + PathBuf::from("c"), + ]); + assert_eq!(result, PathBuf::from("b/c")); + } } diff --git a/rustfs/src/admin/handlers.rs b/rustfs/src/admin/handlers.rs index d3126882..3f1a5614 100644 --- a/rustfs/src/admin/handlers.rs +++ b/rustfs/src/admin/handlers.rs @@ -32,6 +32,7 @@ use rustfs_ecstore::bucket::bucket_target_sys::BucketTargetSys; use rustfs_ecstore::bucket::metadata::BUCKET_TARGETS_FILE; use rustfs_ecstore::bucket::metadata_sys; use rustfs_ecstore::bucket::target::BucketTarget; +use rustfs_ecstore::bucket::utils::is_valid_object_prefix; use rustfs_ecstore::bucket::versioning_sys::BucketVersioningSys; use rustfs_ecstore::data_usage::{ aggregate_local_snapshots, compute_bucket_usage, load_data_usage_from_backend, store_data_usage_in_backend, @@ -41,7 +42,6 @@ use rustfs_ecstore::global::global_rustfs_port; use rustfs_ecstore::metrics_realtime::{CollectMetricsOpts, MetricType, collect_local_metrics}; use rustfs_ecstore::new_object_layer_fn; use rustfs_ecstore::pools::{get_total_usable_capacity, get_total_usable_capacity_free}; -use rustfs_ecstore::store::is_valid_object_prefix; use rustfs_ecstore::store_api::BucketOptions; use rustfs_ecstore::store_api::StorageAPI; use rustfs_ecstore::store_utils::is_reserved_or_invalid_bucket;