From 136db7e0c922d2c496551e4c4103a03658fd53da Mon Sep 17 00:00:00 2001 From: 0xdx2 Date: Sat, 27 Dec 2025 22:18:16 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20function=20to=20extract=20user-de?= =?UTF-8?q?fined=20metadata=20keys=20and=20integrat=E2=80=A6=20(#1281)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 0xdx2 Signed-off-by: houseme Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: houseme --- crates/ecstore/src/store_utils.rs | 3 +- crates/utils/src/lib.rs | 2 + crates/utils/src/obj/metadata.rs | 268 ++++++++++++++++++++++++++++++ crates/utils/src/obj/mod.rs | 16 ++ rustfs/src/storage/ecfs.rs | 14 +- 5 files changed, 299 insertions(+), 4 deletions(-) create mode 100644 crates/utils/src/obj/metadata.rs create mode 100644 crates/utils/src/obj/mod.rs diff --git a/crates/ecstore/src/store_utils.rs b/crates/ecstore/src/store_utils.rs index ea9e8379..a3b1f68f 100644 --- a/crates/ecstore/src/store_utils.rs +++ b/crates/ecstore/src/store_utils.rs @@ -15,8 +15,7 @@ use crate::config::storageclass::STANDARD; use crate::disk::RUSTFS_META_BUCKET; use regex::Regex; -use rustfs_utils::http::headers::AMZ_OBJECT_TAGGING; -use rustfs_utils::http::headers::AMZ_STORAGE_CLASS; +use rustfs_utils::http::headers::{AMZ_OBJECT_TAGGING, AMZ_STORAGE_CLASS}; use std::collections::HashMap; use std::io::{Error, Result}; diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index c08dc80a..cb23e249 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -84,3 +84,5 @@ pub use notify::*; mod envs; pub use envs::*; + +pub mod obj; diff --git a/crates/utils/src/obj/metadata.rs b/crates/utils/src/obj/metadata.rs new file mode 100644 index 00000000..2cbda85b --- /dev/null +++ b/crates/utils/src/obj/metadata.rs @@ -0,0 +1,268 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::http::{RESERVED_METADATA_PREFIX_LOWER, is_minio_header, is_rustfs_header}; +use std::collections::HashMap; + +/// Extract user-defined metadata keys from object metadata. +/// +/// This function filters out system-level metadata and returns only user-defined keys. +/// +/// Excluded keys include: +/// - S3 standard headers: content-type, cache-control, content-encoding, content-disposition, +/// content-language, expires +/// - x-amz-* headers (except user metadata with x-amz-meta- prefix which are stripped) +/// - x-rustfs-internal-* headers (system internal metadata) +/// - Storage/replication system keys: x-amz-storage-class, x-amz-tagging, x-amz-replication-status +/// - Object metadata: etag, md5Sum, last-modified +/// +/// # Arguments +/// * `metadata` - The complete metadata HashMap from ObjectInfo.user_defined +/// +/// # Returns +/// A new HashMap containing only user-defined metadata entries. Keys that use +/// the user-metadata prefix (for example `x-amz-meta-`) are returned with that +/// prefix stripped. +/// +/// Note: The keys in the returned map may therefore differ from the keys in +/// the input `metadata` map and cannot be used directly to remove entries +/// from `metadata`. If you need to identify which original keys to remove, +/// consider using an in-place filtering approach or returning the original +/// keys instead. +/// +/// # Example +/// ``` +/// use std::collections::HashMap; +/// use rustfs_utils::obj::extract_user_defined_metadata; +/// +/// let mut metadata = HashMap::new(); +/// metadata.insert("content-type".to_string(), "application/json".to_string()); +/// metadata.insert("x-minio-key".to_string(), "application/json".to_string()); +/// metadata.insert("x-amz-grant-sse".to_string(), "application/json".to_string()); +/// metadata.insert("x-amz-meta-user-key".to_string(), "user-value".to_string()); +/// metadata.insert("my-custom-key".to_string(), "custom-value".to_string()); +/// +/// let user_keys = extract_user_defined_metadata(&metadata); +/// assert_eq!(user_keys.len(), 2); +/// assert_eq!(user_keys.get("user-key"), Some(&"user-value".to_string())); +/// assert_eq!(user_keys.get("my-custom-key"), Some(&"custom-value".to_string())); +/// ``` +pub fn extract_user_defined_metadata(metadata: &HashMap) -> HashMap { + let mut user_metadata = HashMap::new(); + + let system_headers = [ + "content-type", + "cache-control", + "content-encoding", + "content-disposition", + "content-language", + "expires", + "content-length", + "content-md5", + "content-range", + "last-modified", + "etag", + "md5sum", + "date", + ]; + + for (key, value) in metadata { + let lower_key = key.to_ascii_lowercase(); + + if lower_key.starts_with(RESERVED_METADATA_PREFIX_LOWER) { + continue; + } + + if system_headers.contains(&lower_key.as_str()) { + continue; + } + + if let Some(user_key) = lower_key.strip_prefix("x-amz-meta-") { + if !user_key.is_empty() { + user_metadata.insert(user_key.to_string(), value.clone()); + } + continue; + } + + // Check if it's x-rustfs-meta-* and extract user key + if let Some(user_key) = lower_key.strip_prefix("x-rustfs-meta-") { + if !user_key.is_empty() { + user_metadata.insert(user_key.to_string(), value.clone()); + } + continue; + } + + // Skip other x-amz-* headers + if lower_key.starts_with("x-amz-") { + continue; + } + + // Skip other RustFS headers (x-rustfs-replication-*, etc.) + if is_rustfs_header(key) { + continue; + } + + // Skip MinIO headers (compatibility) + if is_minio_header(key) { + continue; + } + + // All other keys are considered user-defined + user_metadata.insert(key.clone(), value.clone()); + } + + user_metadata +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_user_defined_metadata_basic() { + let mut metadata = HashMap::new(); + metadata.insert("my-key".to_string(), "my-value".to_string()); + metadata.insert("custom-header".to_string(), "custom-value".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 2); + assert_eq!(user_metadata.get("my-key"), Some(&"my-value".to_string())); + assert_eq!(user_metadata.get("custom-header"), Some(&"custom-value".to_string())); + } + + #[test] + fn test_extract_user_defined_metadata_exclude_system_headers() { + let mut metadata = HashMap::new(); + metadata.insert("content-type".to_string(), "application/json".to_string()); + metadata.insert("cache-control".to_string(), "no-cache".to_string()); + metadata.insert("content-encoding".to_string(), "gzip".to_string()); + metadata.insert("content-disposition".to_string(), "attachment".to_string()); + metadata.insert("content-language".to_string(), "en-US".to_string()); + metadata.insert("expires".to_string(), "Wed, 21 Oct 2015 07:28:00 GMT".to_string()); + metadata.insert("etag".to_string(), "abc123".to_string()); + metadata.insert("last-modified".to_string(), "Tue, 20 Oct 2015 07:28:00 GMT".to_string()); + metadata.insert("my-key".to_string(), "my-value".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 1); + assert_eq!(user_metadata.get("my-key"), Some(&"my-value".to_string())); + assert!(!user_metadata.contains_key("content-type")); + assert!(!user_metadata.contains_key("cache-control")); + assert!(!user_metadata.contains_key("etag")); + } + + #[test] + fn test_extract_user_defined_metadata_strip_amz_meta_prefix() { + let mut metadata = HashMap::new(); + metadata.insert("x-amz-meta-user-id".to_string(), "12345".to_string()); + metadata.insert("x-amz-meta-project".to_string(), "test-project".to_string()); + metadata.insert("x-amz-storage-class".to_string(), "STANDARD".to_string()); + metadata.insert("x-amz-tagging".to_string(), "key=value".to_string()); + metadata.insert("x-amz-replication-status".to_string(), "COMPLETED".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 2); + assert_eq!(user_metadata.get("user-id"), Some(&"12345".to_string())); + assert_eq!(user_metadata.get("project"), Some(&"test-project".to_string())); + assert!(!user_metadata.contains_key("x-amz-meta-user-id")); + assert!(!user_metadata.contains_key("x-amz-storage-class")); + assert!(!user_metadata.contains_key("x-amz-tagging")); + } + + #[test] + fn test_extract_user_defined_metadata_exclude_rustfs_internal() { + let mut metadata: HashMap = HashMap::new(); + metadata.insert("x-rustfs-internal-healing".to_string(), "true".to_string()); + metadata.insert("x-rustfs-internal-data-mov".to_string(), "value".to_string()); + metadata.insert("X-RustFS-Internal-purgestatus".to_string(), "status".to_string()); + metadata.insert("x-rustfs-meta-custom".to_string(), "custom-value".to_string()); + metadata.insert("my-key".to_string(), "my-value".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 2); + assert_eq!(user_metadata.get("custom"), Some(&"custom-value".to_string())); + assert_eq!(user_metadata.get("my-key"), Some(&"my-value".to_string())); + assert!(!user_metadata.contains_key("x-rustfs-internal-healing")); + assert!(!user_metadata.contains_key("x-rustfs-internal-data-mov")); + } + + #[test] + fn test_extract_user_defined_metadata_exclude_minio_headers() { + let mut metadata = HashMap::new(); + metadata.insert("x-minio-custom".to_string(), "minio-value".to_string()); + metadata.insert("x-minio-internal".to_string(), "internal".to_string()); + metadata.insert("my-key".to_string(), "my-value".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 1); + assert_eq!(user_metadata.get("my-key"), Some(&"my-value".to_string())); + assert!(!user_metadata.contains_key("x-minio-custom")); + } + + #[test] + fn test_extract_user_defined_metadata_mixed() { + let mut metadata = HashMap::new(); + // System headers + metadata.insert("content-type".to_string(), "application/json".to_string()); + metadata.insert("cache-control".to_string(), "no-cache".to_string()); + // AMZ headers + metadata.insert("x-amz-meta-version".to_string(), "1.0".to_string()); + metadata.insert("x-amz-storage-class".to_string(), "STANDARD".to_string()); + // RustFS internal + metadata.insert("x-rustfs-internal-healing".to_string(), "true".to_string()); + metadata.insert("x-rustfs-meta-source".to_string(), "upload".to_string()); + // User defined + metadata.insert("my-custom-key".to_string(), "custom-value".to_string()); + metadata.insert("another-key".to_string(), "another-value".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 4); + assert_eq!(user_metadata.get("version"), Some(&"1.0".to_string())); + assert_eq!(user_metadata.get("source"), Some(&"upload".to_string())); + assert_eq!(user_metadata.get("my-custom-key"), Some(&"custom-value".to_string())); + assert_eq!(user_metadata.get("another-key"), Some(&"another-value".to_string())); + assert!(!user_metadata.contains_key("content-type")); + assert!(!user_metadata.contains_key("x-amz-storage-class")); + assert!(!user_metadata.contains_key("x-rustfs-internal-healing")); + } + + #[test] + fn test_extract_user_defined_metadata_empty() { + let metadata = HashMap::new(); + let user_metadata = extract_user_defined_metadata(&metadata); + assert!(user_metadata.is_empty()); + } + + #[test] + fn test_extract_user_defined_metadata_case_insensitive() { + let mut metadata = HashMap::new(); + metadata.insert("Content-Type".to_string(), "application/json".to_string()); + metadata.insert("CACHE-CONTROL".to_string(), "no-cache".to_string()); + metadata.insert("X-Amz-Meta-UserId".to_string(), "12345".to_string()); + metadata.insert("My-Custom-Key".to_string(), "value".to_string()); + + let user_metadata = extract_user_defined_metadata(&metadata); + + assert_eq!(user_metadata.len(), 2); + assert_eq!(user_metadata.get("userid"), Some(&"12345".to_string())); + assert_eq!(user_metadata.get("My-Custom-Key"), Some(&"value".to_string())); + assert!(!user_metadata.contains_key("Content-Type")); + } +} diff --git a/crates/utils/src/obj/mod.rs b/crates/utils/src/obj/mod.rs new file mode 100644 index 00000000..c7d09fab --- /dev/null +++ b/crates/utils/src/obj/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod metadata; +pub use metadata::*; diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index b4949454..a1d4e770 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -119,6 +119,7 @@ use rustfs_utils::{ RESERVED_METADATA_PREFIX_LOWER, }, }, + obj::extract_user_defined_metadata, path::{is_dir_object, path_join_buf}, }; use rustfs_zip::CompressionFormat; @@ -813,6 +814,8 @@ impl S3 for FS { sse_customer_algorithm, sse_customer_key, sse_customer_key_md5, + metadata_directive, + metadata, .. } = req.input.clone(); let (src_bucket, src_key, version_id) = match copy_source { @@ -1001,7 +1004,6 @@ impl S3 for FS { src_info.put_object_reader = Some(PutObjReader::new(reader)); // check quota - // TODO: src metadata for (k, v) in compress_metadata { src_info.user_defined.insert(k, v); @@ -1020,7 +1022,15 @@ impl S3 for FS { .insert("x-amz-server-side-encryption-customer-key-md5".to_string(), sse_md5.clone()); } - // TODO: src tags + if metadata_directive.as_ref().map(|d| d.as_str()) == Some(MetadataDirective::REPLACE) { + let src_user_defined = extract_user_defined_metadata(&src_info.user_defined); + src_user_defined.keys().for_each(|k| { + src_info.user_defined.remove(k); + }); + if let Some(metadata) = metadata { + src_info.user_defined.extend(metadata); + } + } let oi = store .copy_object(&src_bucket, &src_key, &bucket, &key, &mut src_info, &src_opts, &dst_opts)