fix: restore s3 compatibility regressions and CI coverage (#1793)

This commit is contained in:
安正超
2026-02-13 12:26:52 +08:00
committed by GitHub
parent 921cfb849c
commit 2fc36bb52e
5 changed files with 291 additions and 25 deletions

View File

@@ -19,7 +19,6 @@ on:
branches: [ main ]
paths-ignore:
- "**.md"
- "**.txt"
- "docs/**"
- "deploy/**"
- "scripts/dev_*.sh"
@@ -39,7 +38,6 @@ on:
branches: [ main ]
paths-ignore:
- "**.md"
- "**.txt"
- "docs/**"
- "deploy/**"
- "scripts/dev_*.sh"
@@ -180,3 +178,43 @@ jobs:
name: e2e-test-logs-${{ github.run_number }}
path: /tmp/rustfs.log
retention-days: 3
s3-implemented-tests:
name: S3 Implemented Tests
needs: skip-check
if: needs.skip-check.outputs.should_skip != 'true'
runs-on: ubicloud-standard-4
timeout-minutes: 60
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Setup Rust environment
uses: ./.github/actions/setup
with:
rust-version: stable
cache-shared-key: ci-s3tests-${{ hashFiles('**/Cargo.lock') }}
cache-save-if: ${{ github.ref == 'refs/heads/main' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Build debug binary
run: |
touch rustfs/build.rs
cargo build -p rustfs --bins --jobs 2
- name: Run implemented s3-tests
run: |
DEPLOY_MODE=binary \
RUSTFS_BINARY=./target/debug/rustfs \
TEST_MODE=single \
MAXFAIL=1 \
./scripts/s3-tests/run.sh
- name: Upload s3 test artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: s3tests-implemented-${{ github.run_number }}
path: artifacts/s3tests-single/**
if-no-files-found: ignore
retention-days: 3

View File

@@ -976,6 +976,7 @@ impl LocalDisk {
opts: &WalkDirOptions,
out: &mut MetacacheWriter<W>,
objs_returned: &mut i32,
emit_current_object: bool,
) -> Result<()>
where
W: AsyncWrite + Unpin + Send,
@@ -1036,6 +1037,7 @@ impl LocalDisk {
let bucket = opts.bucket.as_str();
let mut dir_objes = HashSet::new();
let mut object_data_dirs = HashSet::new();
// First-level filtering
for item in entries.iter_mut() {
@@ -1080,21 +1082,41 @@ impl LocalDisk {
let name = entry.trim_end_matches(SLASH_SEPARATOR);
let name = decode_dir_object(format!("{}/{}", &current, &name).as_str());
// if opts.limit > 0
// && let Ok(meta) = FileMeta::load(&metadata)
// && !meta.all_hidden(true)
// {
*objs_returned += 1;
// }
if emit_current_object {
// if opts.limit > 0
// && let Ok(meta) = FileMeta::load(&metadata)
// && !meta.all_hidden(true)
// {
*objs_returned += 1;
// }
out.write_obj(&MetaCacheEntry {
name: name.clone(),
metadata: metadata.to_vec(),
..Default::default()
})
.await?;
out.write_obj(&MetaCacheEntry {
name: name.clone(),
metadata: metadata.to_vec(),
..Default::default()
})
.await?;
}
return Ok(());
// Keep scanning this directory so nested children like
// "foo/bar/xyzzy" are still discoverable when "foo/bar" exists.
if opts.recursive
&& let Ok(fi) = get_file_info(
&metadata,
bucket,
&name,
"",
FileInfoOpts {
data: false,
include_free_versions: false,
},
)
&& let Some(data_dir) = fi.data_dir
{
object_data_dirs.insert(data_dir.to_string());
}
continue;
}
}
@@ -1121,7 +1143,13 @@ impl LocalDisk {
continue;
}
// Skip object data directories. They are internal storage layout, not user objects.
if opts.recursive && object_data_dirs.contains(entry) {
continue;
}
let name = path_join_buf(&[current.as_str(), entry.as_str()]);
let object_dir_name = name.clone();
while let Some(pop) = dir_stack.last().cloned()
&& pop < name
@@ -1133,7 +1161,7 @@ impl LocalDisk {
.await?;
if opts.recursive
&& let Err(er) = Box::pin(self.scan_dir(pop, prefix.clone(), opts, out, objs_returned)).await
&& let Err(er) = Box::pin(self.scan_dir(pop, prefix.clone(), opts, out, objs_returned, true)).await
{
error!("scan_dir err {:?}", er);
}
@@ -1172,6 +1200,22 @@ impl LocalDisk {
// {
*objs_returned += 1;
// }
// This object directory can also contain nested children objects.
// Recurse, but do not emit the current object again.
if opts.recursive
&& let Err(er) = Box::pin(self.scan_dir(
format!("{object_dir_name}{SLASH_SEPARATOR}"),
prefix.clone(),
opts,
out,
objs_returned,
false,
))
.await
{
warn!("scan_dir err {:?}", &er);
}
}
Err(err) => {
if err == Error::FileNotFound || err == Error::IsNotRegular {
@@ -1200,7 +1244,7 @@ impl LocalDisk {
.await?;
if opts.recursive
&& let Err(er) = Box::pin(self.scan_dir(dir, prefix.clone(), opts, out, objs_returned)).await
&& let Err(er) = Box::pin(self.scan_dir(dir, prefix.clone(), opts, out, objs_returned, true)).await
{
warn!("scan_dir err {:?}", &er);
}
@@ -1958,6 +2002,7 @@ impl DiskAPI for LocalDisk {
&opts,
&mut out,
&mut objs_returned,
true,
)
.await?;
@@ -2635,6 +2680,7 @@ async fn get_disk_info(drive_path: PathBuf) -> Result<(rustfs_utils::os::DiskInf
#[cfg(test)]
mod test {
use super::*;
use rustfs_filemeta::MetacacheReader;
#[tokio::test]
async fn test_skip_access_checks() {
@@ -2934,6 +2980,134 @@ mod test {
let _ = fs::remove_file(test_file).await;
}
#[tokio::test]
async fn test_scan_dir_lists_nested_child_when_parent_has_xlmeta() {
let test_dir = format!("./test_scan_dir_nested_{}", uuid::Uuid::new_v4());
fs::create_dir_all(&test_dir).await.unwrap();
let endpoint = Endpoint::try_from(test_dir.as_str()).unwrap();
let disk = LocalDisk::new(&endpoint, false).await.unwrap();
let bucket = "test-volume";
disk.make_volume(bucket).await.unwrap();
// Parent object metadata: foo/bar
let parent_meta = disk.get_object_path(bucket, "foo/bar/xl.meta").unwrap();
if let Some(parent) = parent_meta.parent() {
fs::create_dir_all(parent).await.unwrap();
}
fs::write(&parent_meta, b"parent-object").await.unwrap();
// Child object metadata: foo/bar/xyzzy
let child_meta = disk.get_object_path(bucket, "foo/bar/xyzzy/xl.meta").unwrap();
if let Some(parent) = child_meta.parent() {
fs::create_dir_all(parent).await.unwrap();
}
fs::write(&child_meta, b"child-object").await.unwrap();
let (rd, mut wr) = tokio::io::duplex(16 * 1024);
let mut out = MetacacheWriter::new(&mut wr);
let mut objs_returned = 0;
let opts = WalkDirOptions {
bucket: bucket.to_string(),
base_dir: "foo/bar".to_string(),
recursive: true,
..Default::default()
};
disk.scan_dir("foo/bar".to_string(), String::new(), &opts, &mut out, &mut objs_returned, true)
.await
.unwrap();
out.close().await.unwrap();
let mut reader = MetacacheReader::new(rd);
let entries = reader.read_all().await.unwrap();
let object_names = entries
.iter()
.filter(|entry| !entry.metadata.is_empty())
.map(|entry| entry.name.clone())
.collect::<Vec<_>>();
assert!(
object_names.iter().any(|name| name == "foo/bar" || name == "foo/bar/"),
"expected parent object in scan result, got: {:?}",
object_names
);
assert!(
object_names
.iter()
.any(|name| name == "foo/bar/xyzzy" || name == "foo/bar/xyzzy/"),
"expected nested child object in scan result, got: {:?}",
object_names
);
let _ = fs::remove_dir_all(&test_dir).await;
}
#[tokio::test]
async fn test_scan_dir_lists_parent_and_child_from_bucket_root() {
let test_dir = format!("./test_scan_dir_root_nested_{}", uuid::Uuid::new_v4());
fs::create_dir_all(&test_dir).await.unwrap();
let endpoint = Endpoint::try_from(test_dir.as_str()).unwrap();
let disk = LocalDisk::new(&endpoint, false).await.unwrap();
let bucket = "test-volume";
disk.make_volume(bucket).await.unwrap();
// Parent object metadata: foo/bar
let parent_meta = disk.get_object_path(bucket, "foo/bar/xl.meta").unwrap();
if let Some(parent) = parent_meta.parent() {
fs::create_dir_all(parent).await.unwrap();
}
fs::write(&parent_meta, b"parent-object").await.unwrap();
// Child object metadata: foo/bar/xyzzy
let child_meta = disk.get_object_path(bucket, "foo/bar/xyzzy/xl.meta").unwrap();
if let Some(parent) = child_meta.parent() {
fs::create_dir_all(parent).await.unwrap();
}
fs::write(&child_meta, b"child-object").await.unwrap();
let (rd, mut wr) = tokio::io::duplex(16 * 1024);
let mut out = MetacacheWriter::new(&mut wr);
let mut objs_returned = 0;
let opts = WalkDirOptions {
bucket: bucket.to_string(),
base_dir: String::new(),
recursive: true,
..Default::default()
};
disk.scan_dir(String::new(), String::new(), &opts, &mut out, &mut objs_returned, true)
.await
.unwrap();
out.close().await.unwrap();
let mut reader = MetacacheReader::new(rd);
let entries = reader.read_all().await.unwrap();
let object_names = entries
.iter()
.filter(|entry| !entry.metadata.is_empty())
.map(|entry| entry.name.trim_start_matches('/').trim_end_matches('/').to_string())
.collect::<Vec<_>>();
assert!(
object_names.iter().any(|name| name == "foo/bar"),
"expected parent object in root scan result, got: {:?}",
object_names
);
assert!(
object_names.iter().any(|name| name == "foo/bar/xyzzy"),
"expected child object in root scan result, got: {:?}",
object_names
);
let _ = fs::remove_dir_all(&test_dir).await;
}
#[test]
fn test_is_root_path() {
// Unix root path

View File

@@ -4265,7 +4265,9 @@ impl S3 for FS {
return Err(s3_error!(MalformedPolicy));
}
let data = serde_json::to_vec(&cfg).map_err(|e| s3_error!(InternalError, "parse policy failed {:?}", e))?;
// Preserve the original JSON text so GetBucketPolicy can return byte-for-byte content.
// s3-tests expects exact string round-trip equality.
let data = policy.into_bytes();
metadata_sys::update(&bucket, BUCKET_POLICY_CONFIG, data)
.await

View File

@@ -27,6 +27,7 @@ mod tests {
use rustfs_config::MI_B;
use rustfs_ecstore::set_disk::DEFAULT_READ_BUFFER_SIZE;
use rustfs_ecstore::store_api::ObjectInfo;
use rustfs_policy::policy::{BucketPolicy, Validator};
use rustfs_utils::http::{AMZ_OBJECT_LOCK_LEGAL_HOLD_LOWER, RESERVED_METADATA_PREFIX_LOWER};
use rustfs_zip::CompressionFormat;
use s3s::dto::{
@@ -1012,6 +1013,34 @@ mod tests {
assert_eq!(filtered_version_marker.unwrap(), "null");
}
#[test]
fn test_bucket_policy_round_trip_preserves_original_json_text() {
let policy = r#"{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Principal": {"AWS": "*"},
"Action": "s3:ListBucket",
"Resource": [
"arn:aws:s3:::example-bucket",
"arn:aws:s3:::example-bucket/*"
]
}]
}"#;
let parsed: BucketPolicy = serde_json::from_str(policy).unwrap();
assert!(parsed.is_valid().is_ok());
// Normalized serialization can differ (for example, Action becomes an array).
let normalized = serde_json::to_string(&parsed).unwrap();
assert_ne!(normalized, policy);
// Stored raw policy bytes must preserve exact text for GetBucketPolicy round trip.
let stored = policy.as_bytes().to_vec();
let round_trip = String::from_utf8(stored).unwrap();
assert_eq!(round_trip, policy);
}
#[test]
fn test_matches_origin_pattern_exact_match() {
// Test exact match

View File

@@ -392,13 +392,9 @@ pub fn extract_metadata_from_mime_with_object_name(
pub(crate) fn filter_object_metadata(metadata: &HashMap<String, String>) -> Option<HashMap<String, String>> {
// HTTP headers that should NOT be returned in the Metadata field.
// These headers are returned as separate response headers, not user metadata.
//
// Note: content-type and content-disposition are intentionally NOT excluded here.
// They remain in the filtered metadata so they can continue to be exposed via
// x-amz-meta-* style user metadata for backward compatibility, while the HEAD
// implementation also mirrors their values into the standard Content-Type and
// Content-Disposition response headers where appropriate.
const EXCLUDED_HEADERS: &[&str] = &[
"content-type",
"content-disposition",
"content-encoding",
"content-language",
"cache-control",
@@ -437,7 +433,6 @@ pub(crate) fn filter_object_metadata(metadata: &HashMap<String, String>) -> Opti
}
// Skip excluded HTTP headers (they are returned as separate headers, not metadata)
// Note: content-type and content-disposition are NOT excluded and will be treated as user metadata
if EXCLUDED_HEADERS.contains(&lower_key.as_str()) {
continue;
}
@@ -1209,6 +1204,34 @@ mod tests {
assert_eq!(metadata.get("content-type"), Some(&"custom/type".to_string()));
}
#[test]
fn test_filter_object_metadata_excludes_standard_headers() {
let mut metadata = HashMap::new();
metadata.insert("content-type".to_string(), "application/octet-stream".to_string());
metadata.insert("content-disposition".to_string(), "inline".to_string());
metadata.insert("cache-control".to_string(), "no-cache".to_string());
metadata.insert("x-amz-storage-class".to_string(), "STANDARD".to_string());
metadata.insert("custom-key".to_string(), "custom-value".to_string());
let filtered = filter_object_metadata(&metadata).unwrap();
assert_eq!(filtered.len(), 1);
assert_eq!(filtered.get("custom-key"), Some(&"custom-value".to_string()));
assert!(!filtered.contains_key("content-type"));
assert!(!filtered.contains_key("content-disposition"));
assert!(!filtered.contains_key("cache-control"));
assert!(!filtered.contains_key("x-amz-storage-class"));
}
#[test]
fn test_filter_object_metadata_returns_none_for_only_content_type() {
let mut metadata = HashMap::new();
metadata.insert("content-type".to_string(), "application/octet-stream".to_string());
let filtered = filter_object_metadata(&metadata);
assert!(filtered.is_none(), "content-type must not be exposed as user metadata");
}
#[test]
fn test_detect_content_type_from_object_name() {
// Test Parquet files (our custom handling)