Compare commits

..

2 Commits

Author SHA1 Message Date
houseme
705cc0c9f6 Merge branch 'main' of github.com:rustfs/rustfs into feature/metric-1205 2025-12-21 17:56:06 +08:00
houseme
6273b138f6 upgrade mio version to 1.1.1 2025-12-05 14:55:17 +08:00
104 changed files with 3569 additions and 4176 deletions

View File

@@ -40,7 +40,7 @@ env:
jobs:
security-audit:
name: Security Audit
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 15
steps:
- name: Checkout repository
@@ -65,7 +65,7 @@ jobs:
dependency-review:
name: Dependency Review
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
if: github.event_name == 'pull_request'
permissions:
contents: read

View File

@@ -83,7 +83,7 @@ jobs:
# Build strategy check - determine build type based on trigger
build-check:
name: Build Strategy Check
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
outputs:
should_build: ${{ steps.check.outputs.should_build }}
build_type: ${{ steps.check.outputs.build_type }}
@@ -167,19 +167,19 @@ jobs:
matrix:
include:
# Linux builds
- os: ubicloud-standard-2
- os: ubicloud-standard-4
target: x86_64-unknown-linux-musl
cross: false
platform: linux
- os: ubicloud-standard-2
- os: ubicloud-standard-4
target: aarch64-unknown-linux-musl
cross: true
platform: linux
- os: ubicloud-standard-2
- os: ubicloud-standard-4
target: x86_64-unknown-linux-gnu
cross: false
platform: linux
- os: ubicloud-standard-2
- os: ubicloud-standard-4
target: aarch64-unknown-linux-gnu
cross: true
platform: linux
@@ -532,7 +532,7 @@ jobs:
name: Build Summary
needs: [ build-check, build-rustfs ]
if: always() && needs.build-check.outputs.should_build == 'true'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
steps:
- name: Build completion summary
shell: bash
@@ -584,7 +584,7 @@ jobs:
name: Create GitHub Release
needs: [ build-check, build-rustfs ]
if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
permissions:
contents: write
outputs:
@@ -670,7 +670,7 @@ jobs:
name: Upload Release Assets
needs: [ build-check, build-rustfs, create-release ]
if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
permissions:
contents: write
actions: read
@@ -751,7 +751,7 @@ jobs:
name: Update Latest Version
needs: [ build-check, upload-release-assets ]
if: startsWith(github.ref, 'refs/tags/')
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
steps:
- name: Update latest.json
env:
@@ -801,7 +801,7 @@ jobs:
name: Publish Release
needs: [ build-check, create-release, upload-release-assets ]
if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
permissions:
contents: write
steps:

View File

@@ -69,7 +69,7 @@ concurrency:
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
CARGO_BUILD_JOBS: 2
CARGO_BUILD_JOBS: 8
jobs:
@@ -78,7 +78,7 @@ jobs:
permissions:
actions: write
contents: read
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
@@ -93,7 +93,7 @@ jobs:
typos:
name: Typos
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
@@ -136,7 +136,7 @@ jobs:
name: End-to-End Tests
needs: skip-check
if: needs.skip-check.outputs.should_skip != 'true'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 30
steps:
- name: Checkout repository
@@ -166,7 +166,7 @@ jobs:
run: |
touch rustfs/build.rs
# Limit concurrency to prevent OOM
cargo build -p rustfs --bins --jobs 2
cargo build -p rustfs --bins --jobs 4
- name: Run end-to-end tests
run: |

View File

@@ -72,7 +72,7 @@ jobs:
# Check if we should build Docker images
build-check:
name: Docker Build Check
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
outputs:
should_build: ${{ steps.check.outputs.should_build }}
should_push: ${{ steps.check.outputs.should_push }}
@@ -264,7 +264,7 @@ jobs:
name: Build Docker Images
needs: build-check
if: needs.build-check.outputs.should_build == 'true'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 60
steps:
- name: Checkout repository
@@ -404,7 +404,7 @@ jobs:
name: Docker Build Summary
needs: [ build-check, build-docker ]
if: always() && needs.build-check.outputs.should_build == 'true'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
steps:
- name: Docker build completion summary
run: |

View File

@@ -37,7 +37,7 @@ env:
jobs:
mint-single:
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 40
steps:
- name: Checkout
@@ -114,7 +114,7 @@ jobs:
mint-multi:
if: github.event_name == 'workflow_dispatch' && github.event.inputs.run-multi == 'true'
needs: mint-single
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 60
steps:
- name: Checkout

View File

@@ -58,7 +58,7 @@ defaults:
jobs:
s3tests-single:
if: github.event.inputs.test-mode == 'single'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 120
steps:
- uses: actions/checkout@v6
@@ -212,7 +212,7 @@ jobs:
s3tests-multi:
if: github.event_name == 'workflow_dispatch' && github.event.inputs.test-mode == 'multi'
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 150
steps:
- uses: actions/checkout@v6

View File

@@ -27,7 +27,7 @@ env:
jobs:
build-helm-package:
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
# Only run on successful builds triggered by tag pushes (version format: x.y.z or x.y.z-suffix)
if: |
github.event.workflow_run.conclusion == 'success' &&
@@ -63,7 +63,7 @@ jobs:
retention-days: 1
publish-helm-package:
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
needs: [ build-helm-package ]
steps:

View File

@@ -40,7 +40,7 @@ env:
jobs:
performance-profile:
name: Performance Profiling
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 30
steps:
- name: Checkout repository
@@ -115,7 +115,7 @@ jobs:
benchmark:
name: Benchmark Tests
runs-on: ubicloud-standard-2
runs-on: ubicloud-standard-4
timeout-minutes: 45
steps:
- name: Checkout repository

74
Cargo.lock generated
View File

@@ -216,12 +216,9 @@ dependencies = [
[[package]]
name = "arc-swap"
version = "1.8.0"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e"
dependencies = [
"rustversion",
]
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]]
name = "argon2"
@@ -518,9 +515,9 @@ dependencies = [
[[package]]
name = "async-lock"
version = "3.4.2"
version = "3.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311"
checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc"
dependencies = [
"event-listener",
"event-listener-strategy",
@@ -694,9 +691,9 @@ dependencies = [
[[package]]
name = "aws-sdk-s3"
version = "1.119.0"
version = "1.118.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c"
checksum = "d3e6b7079f85d9ea9a70643c9f89f50db70f5ada868fa9cfe08c1ffdf51abc13"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -945,9 +942,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
version = "1.9.5"
version = "1.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5"
checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -4846,9 +4843,9 @@ dependencies = [
[[package]]
name = "libz-rs-sys"
version = "0.5.5"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415"
checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c"
dependencies = [
"zlib-rs",
]
@@ -6728,9 +6725,9 @@ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
[[package]]
name = "reqwest"
version = "0.12.28"
version = "0.12.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f"
dependencies = [
"base64",
"bytes",
@@ -6854,19 +6851,22 @@ dependencies = [
[[package]]
name = "rmp"
version = "0.8.15"
version = "0.8.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c"
checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4"
dependencies = [
"byteorder",
"num-traits",
"paste",
]
[[package]]
name = "rmp-serde"
version = "1.3.1"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155"
checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db"
dependencies = [
"byteorder",
"rmp",
"serde",
]
@@ -7040,7 +7040,6 @@ dependencies = [
"hex-simd",
"http 1.4.0",
"http-body 1.0.1",
"http-body-util",
"hyper 1.8.1",
"hyper-util",
"jemalloc_pprof",
@@ -7303,7 +7302,6 @@ dependencies = [
"tonic",
"tower",
"tracing",
"tracing-subscriber",
"url",
"urlencoding",
"uuid",
@@ -7439,7 +7437,6 @@ dependencies = [
name = "rustfs-notify"
version = "0.0.5"
dependencies = [
"arc-swap",
"async-trait",
"axum",
"chrono",
@@ -7658,7 +7655,6 @@ dependencies = [
"hmac 0.13.0-rc.3",
"http 1.4.0",
"hyper 1.8.1",
"ipnet",
"libc",
"local-ip-address",
"lz4",
@@ -7755,9 +7751,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "1.1.3"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [
"bitflags 2.10.0",
"errno",
@@ -7872,8 +7868,8 @@ checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea"
[[package]]
name = "s3s"
version = "0.13.0-alpha"
source = "git+https://github.com/s3s-project/s3s.git?branch=main#f6198bbf49abe60066fe47cbbefcb7078863b3e9"
version = "0.12.0-rc.6"
source = "git+https://github.com/s3s-project/s3s.git?branch=main#df2434d7ad2f0b774e68f25cae90c053dcb84f24"
dependencies = [
"arrayvec",
"async-trait",
@@ -8125,15 +8121,15 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.147"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4"
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
"serde_core",
"zmij",
]
[[package]]
@@ -8853,14 +8849,14 @@ dependencies = [
[[package]]
name = "tempfile"
version = "3.24.0"
version = "3.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
dependencies = [
"fastrand",
"getrandom 0.3.4",
"once_cell",
"rustix 1.1.3",
"rustix 1.1.2",
"windows-sys 0.61.2",
]
@@ -10250,7 +10246,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156"
dependencies = [
"libc",
"rustix 1.1.3",
"rustix 1.1.2",
]
[[package]]
@@ -10427,15 +10423,9 @@ dependencies = [
[[package]]
name = "zlib-rs"
version = "0.5.5"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3"
[[package]]
name = "zmij"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba"
checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235"
[[package]]
name = "zopfli"

View File

@@ -109,8 +109,7 @@ hyper-rustls = { version = "0.27.7", default-features = false, features = ["nati
hyper-util = { version = "0.1.19", features = ["tokio", "server-auto", "server-graceful"] }
http = "1.4.0"
http-body = "1.0.1"
http-body-util = "0.1.3"
reqwest = { version = "0.12.28", default-features = false, features = ["rustls-tls-webpki-roots", "charset", "http2", "system-proxy", "stream", "json", "blocking"] }
reqwest = { version = "0.12.26", default-features = false, features = ["rustls-tls-webpki-roots", "charset", "http2", "system-proxy", "stream", "json", "blocking"] }
socket2 = "0.6.1"
tokio = { version = "1.48.0", features = ["fs", "rt-multi-thread"] }
tokio-rustls = { version = "0.26.4", default-features = false, features = ["logging", "tls12", "ring"] }
@@ -122,7 +121,6 @@ tonic-prost = { version = "0.14.2" }
tonic-prost-build = { version = "0.14.2" }
tower = { version = "0.5.2", features = ["timeout"] }
tower-http = { version = "0.6.8", features = ["cors"] }
ipnet = "2.11.0"
# Serialization and Data Formats
bytes = { version = "1.11.0", features = ["serde"] }
@@ -133,10 +131,10 @@ form_urlencoded = "1.2.2"
prost = "0.14.1"
quick-xml = "0.38.4"
rmcp = { version = "0.12.0" }
rmp = { version = "0.8.15" }
rmp-serde = { version = "1.3.1" }
rmp = { version = "0.8.14" }
rmp-serde = { version = "1.3.0" }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = { version = "1.0.147", features = ["raw_value"] }
serde_json = { version = "1.0.145", features = ["raw_value"] }
serde_urlencoded = "0.7.1"
schemars = "1.1.0"
@@ -165,13 +163,13 @@ time = { version = "0.3.44", features = ["std", "parsing", "formatting", "macros
# Utilities and Tools
anyhow = "1.0.100"
arc-swap = "1.8.0"
arc-swap = "1.7.1"
astral-tokio-tar = "0.5.6"
atoi = "2.0.0"
atomic_enum = "0.3.0"
aws-config = { version = "1.8.12" }
aws-credential-types = { version = "1.2.11" }
aws-sdk-s3 = { version = "1.119.0", default-features = false, features = ["sigv4a", "rustls", "rt-tokio"] }
aws-sdk-s3 = { version = "1.117.0", default-features = false, features = ["sigv4a", "rustls", "rt-tokio"] }
aws-smithy-types = { version = "1.3.5" }
base64 = "0.22.1"
base64-simd = "0.8.0"
@@ -224,7 +222,7 @@ regex = { version = "1.12.2" }
rumqttc = { version = "0.25.1" }
rust-embed = { version = "8.9.0" }
rustc-hash = { version = "2.1.1" }
s3s = { version = "0.13.0-alpha", features = ["minio"], git = "https://github.com/s3s-project/s3s.git", branch = "main" }
s3s = { version = "0.12.0-rc.6", features = ["minio"], git = "https://github.com/s3s-project/s3s.git", branch = "main" }
serial_test = "3.2.0"
shadow-rs = { version = "1.5.0", default-features = false }
siphasher = "1.0.1"
@@ -237,7 +235,7 @@ strum = { version = "0.27.2", features = ["derive"] }
sysctl = "0.7.1"
sysinfo = "0.37.2"
temp-env = "0.3.6"
tempfile = "3.24.0"
tempfile = "3.23.0"
test-case = "3.3.1"
thiserror = "2.0.17"
tracing = { version = "0.1.44" }

View File

@@ -9,53 +9,30 @@ CONTAINER_NAME ?= rustfs-dev
DOCKERFILE_PRODUCTION = Dockerfile
DOCKERFILE_SOURCE = Dockerfile.source
# Fatal check
# Checks all required dependencies and exits with error if not found
# (e.g., cargo, rustfmt)
check-%:
@command -v $* >/dev/null 2>&1 || { \
echo >&2 "❌ '$*' is not installed."; \
exit 1; \
}
# Warning-only check
# Checks for optional dependencies and issues a warning if not found
# (e.g., cargo-nextest for enhanced testing)
warn-%:
@command -v $* >/dev/null 2>&1 || { \
echo >&2 "⚠️ '$*' is not installed."; \
}
# For checking dependencies use check-<dep-name> or warn-<dep-name>
.PHONY: core-deps fmt-deps test-deps
core-deps: check-cargo
fmt-deps: check-rustfmt
test-deps: warn-cargo-nextest
# Code quality and formatting targets
.PHONY: fmt
fmt: core-deps fmt-deps
fmt:
@echo "🔧 Formatting code..."
cargo fmt --all
.PHONY: fmt-check
fmt-check: core-deps fmt-deps
fmt-check:
@echo "📝 Checking code formatting..."
cargo fmt --all --check
.PHONY: clippy
clippy: core-deps
clippy:
@echo "🔍 Running clippy checks..."
cargo clippy --fix --allow-dirty
cargo clippy --all-targets --all-features -- -D warnings
.PHONY: check
check: core-deps
check:
@echo "🔨 Running compilation check..."
cargo check --all-targets
.PHONY: test
test: core-deps test-deps
test:
@echo "🧪 Running tests..."
@if command -v cargo-nextest >/dev/null 2>&1; then \
cargo nextest run --all --exclude e2e_test; \
@@ -65,16 +42,16 @@ test: core-deps test-deps
fi
cargo test --all --doc
.PHONY: pre-commit
pre-commit: fmt clippy check test
@echo "✅ All pre-commit checks passed!"
.PHONY: setup-hooks
setup-hooks:
@echo "🔧 Setting up git hooks..."
chmod +x .git/hooks/pre-commit
@echo "✅ Git hooks setup complete!"
.PHONY: pre-commit
pre-commit: fmt clippy check test
@echo "✅ All pre-commit checks passed!"
.PHONY: e2e-server
e2e-server:
sh $(shell pwd)/scripts/run.sh
@@ -209,6 +186,8 @@ docker-dev-push:
--push \
.
# Local production builds using direct buildx (alternative to docker-buildx.sh)
.PHONY: docker-buildx-production-local
docker-buildx-production-local:
@@ -268,6 +247,8 @@ dev-env-stop:
.PHONY: dev-env-restart
dev-env-restart: dev-env-stop dev-env-start
# ========================================================================================
# Build Utilities
# ========================================================================================

View File

@@ -174,7 +174,7 @@ nix run
### Accessing RustFS
5. **Access the Console**: Open your web browser and navigate to `http://localhost:9001` to access the RustFS console.
5. **Access the Console**: Open your web browser and navigate to `http://localhost:9000` to access the RustFS console.
* Default credentials: `rustfsadmin` / `rustfsadmin`
6. **Create a Bucket**: Use the console to create a new bucket for your objects.
7. **Upload Objects**: You can upload files directly through the console or use S3-compatible APIs/clients to interact with your RustFS instance.

View File

@@ -468,17 +468,14 @@ impl HealManager {
let active_heals = self.active_heals.clone();
let cancel_token = self.cancel_token.clone();
let storage = self.storage.clone();
let mut duration = {
let config = config.read().await;
config.heal_interval
};
if duration < Duration::from_secs(1) {
duration = Duration::from_secs(1);
}
info!("start_auto_disk_scanner: Starting auto disk scanner with interval: {:?}", duration);
info!(
"start_auto_disk_scanner: Starting auto disk scanner with interval: {:?}",
config.read().await.heal_interval
);
tokio::spawn(async move {
let mut interval = interval(duration);
let mut interval = interval(config.read().await.heal_interval);
loop {
tokio::select! {

View File

@@ -30,7 +30,7 @@ use rustfs_ecstore::{
bucket::versioning::VersioningApi,
bucket::versioning_sys::BucketVersioningSys,
data_usage::{aggregate_local_snapshots, compute_bucket_usage, store_data_usage_in_backend},
disk::{DiskAPI, DiskStore, RUSTFS_META_BUCKET, WalkDirOptions},
disk::{Disk, DiskAPI, DiskStore, RUSTFS_META_BUCKET, WalkDirOptions},
set_disk::SetDisks,
store_api::ObjectInfo,
};
@@ -1977,7 +1977,7 @@ impl Scanner {
} else {
// Apply lifecycle actions
if let Some(lifecycle_config) = &lifecycle_config {
if disk.is_local() {
if let Disk::Local(_local_disk) = &**disk {
let vcfg = BucketVersioningSys::get(bucket).await.ok();
let mut scanner_item = ScannerItem {

View File

@@ -21,11 +21,10 @@ use rustfs_ecstore::bucket::metadata_sys::{BucketMetadataSys, GLOBAL_BucketMetad
use rustfs_ecstore::endpoints::EndpointServerPools;
use rustfs_ecstore::store::ECStore;
use rustfs_ecstore::store_api::{ObjectIO, PutObjReader, StorageAPI};
use std::sync::{Arc, Once};
use std::sync::Arc;
use tempfile::TempDir;
use tokio::sync::RwLock;
use tokio_util::sync::CancellationToken;
use tracing::Level;
/// Build a minimal single-node ECStore over a temp directory and populate objects.
async fn create_store_with_objects(count: usize) -> (TempDir, std::sync::Arc<ECStore>) {
@@ -75,22 +74,8 @@ async fn create_store_with_objects(count: usize) -> (TempDir, std::sync::Arc<ECS
(temp_dir, store)
}
static INIT: Once = Once::new();
fn init_tracing(filter_level: Level) {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_max_level(filter_level)
.with_timer(tracing_subscriber::fmt::time::UtcTime::rfc_3339())
.with_thread_names(true)
.try_init();
});
}
#[tokio::test]
async fn fallback_builds_full_counts_over_100_objects() {
init_tracing(Level::ERROR);
let (_tmp, store) = create_store_with_objects(1000).await;
let scanner = Scanner::new(None, None);

View File

@@ -38,13 +38,9 @@ use walkdir::WalkDir;
static GLOBAL_ENV: OnceLock<(Vec<PathBuf>, Arc<ECStore>, Arc<ECStoreHealStorage>)> = OnceLock::new();
static INIT: Once = Once::new();
pub fn init_tracing() {
fn init_tracing() {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::UtcTime::rfc_3339())
.with_thread_names(true)
.try_init();
let _ = tracing_subscriber::fmt::try_init();
});
}
@@ -360,7 +356,7 @@ mod serial_tests {
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_secs(1),
heal_interval: Duration::from_secs(2),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));

View File

@@ -60,9 +60,8 @@ impl TargetFactory for WebhookTargetFactory {
let endpoint = config
.lookup(WEBHOOK_ENDPOINT)
.ok_or_else(|| TargetError::Configuration("Missing webhook endpoint".to_string()))?;
let parsed_endpoint = endpoint.trim();
let endpoint_url = Url::parse(parsed_endpoint)
.map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{parsed_endpoint}')")))?;
let endpoint_url = Url::parse(&endpoint)
.map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{endpoint}')")))?;
let args = WebhookArgs {
enable: true, // If we are here, it's already enabled.

View File

@@ -19,10 +19,6 @@ pub mod globals;
pub mod heal_channel;
pub mod last_minute;
pub mod metrics;
mod readiness;
pub use globals::*;
pub use readiness::{GlobalReadiness, SystemStage};
// is ','
pub static DEFAULT_DELIMITER: u8 = 44;

View File

@@ -1,136 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::atomic::{AtomicU8, Ordering};
/// Represents the various stages of system startup
#[repr(u8)]
pub enum SystemStage {
Booting = 0,
StorageReady = 1, // Disks online, Quorum met
IamReady = 2, // Users and Policies loaded into cache
FullReady = 3, // System ready to serve all traffic
}
/// Global readiness tracker for the service
/// This struct uses atomic operations to track the readiness status of various components
/// of the service in a thread-safe manner.
pub struct GlobalReadiness {
status: AtomicU8,
}
impl Default for GlobalReadiness {
fn default() -> Self {
Self::new()
}
}
impl GlobalReadiness {
/// Create a new GlobalReadiness instance with initial status as Starting
/// # Returns
/// A new instance of GlobalReadiness
pub fn new() -> Self {
Self {
status: AtomicU8::new(SystemStage::Booting as u8),
}
}
/// Update the system to a new stage
///
/// # Arguments
/// * `step` - The SystemStage step to mark as ready
pub fn mark_stage(&self, step: SystemStage) {
self.status.fetch_max(step as u8, Ordering::SeqCst);
}
/// Check if the service is fully ready
/// # Returns
/// `true` if the service is fully ready, `false` otherwise
pub fn is_ready(&self) -> bool {
self.status.load(Ordering::SeqCst) == SystemStage::FullReady as u8
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use std::thread;
#[test]
fn test_initial_state() {
let readiness = GlobalReadiness::new();
assert!(!readiness.is_ready());
assert_eq!(readiness.status.load(Ordering::SeqCst), SystemStage::Booting as u8);
}
#[test]
fn test_mark_stage_progression() {
let readiness = GlobalReadiness::new();
readiness.mark_stage(SystemStage::StorageReady);
assert!(!readiness.is_ready());
assert_eq!(readiness.status.load(Ordering::SeqCst), SystemStage::StorageReady as u8);
readiness.mark_stage(SystemStage::IamReady);
assert!(!readiness.is_ready());
assert_eq!(readiness.status.load(Ordering::SeqCst), SystemStage::IamReady as u8);
readiness.mark_stage(SystemStage::FullReady);
assert!(readiness.is_ready());
}
#[test]
fn test_no_regression() {
let readiness = GlobalReadiness::new();
readiness.mark_stage(SystemStage::FullReady);
readiness.mark_stage(SystemStage::IamReady); // Should not regress
assert!(readiness.is_ready());
}
#[test]
fn test_concurrent_marking() {
let readiness = Arc::new(GlobalReadiness::new());
let mut handles = vec![];
for _ in 0..10 {
let r = Arc::clone(&readiness);
handles.push(thread::spawn(move || {
r.mark_stage(SystemStage::StorageReady);
r.mark_stage(SystemStage::IamReady);
r.mark_stage(SystemStage::FullReady);
}));
}
for h in handles {
h.join().unwrap();
}
assert!(readiness.is_ready());
}
#[test]
fn test_is_ready_only_at_full_ready() {
let readiness = GlobalReadiness::new();
assert!(!readiness.is_ready());
readiness.mark_stage(SystemStage::StorageReady);
assert!(!readiness.is_ready());
readiness.mark_stage(SystemStage::IamReady);
assert!(!readiness.is_ready());
readiness.mark_stage(SystemStage::FullReady);
assert!(readiness.is_ready());
}
}

View File

@@ -39,10 +39,3 @@ pub const DEFAULT_MAX_IO_EVENTS_PER_TICK: usize = 1024;
/// Event polling default (Tokio default 61)
pub const DEFAULT_EVENT_INTERVAL: u32 = 61;
pub const DEFAULT_RNG_SEED: Option<u64> = None; // None means random
/// Threshold for small object seek support in megabytes.
///
/// When an object is smaller than this size, rustfs will provide seek support.
///
/// Default is set to 10MB.
pub const DEFAULT_OBJECT_SEEK_SUPPORT_THRESHOLD: usize = 10 * 1024 * 1024;

View File

@@ -51,18 +51,6 @@ pub const ENV_NOTIFY_TARGET_STREAM_CONCURRENCY: &str = "RUSTFS_NOTIFY_TARGET_STR
/// Adjust this value based on your system's capabilities and expected load.
pub const DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY: usize = 20;
/// Name of the environment variable that configures send concurrency.
/// Controls how many send operations are processed in parallel by the notification system.
/// Defaults to [`DEFAULT_NOTIFY_SEND_CONCURRENCY`] if not set.
/// Example: `RUSTFS_NOTIFY_SEND_CONCURRENCY=64`.
pub const ENV_NOTIFY_SEND_CONCURRENCY: &str = "RUSTFS_NOTIFY_SEND_CONCURRENCY";
/// Default concurrency for send operations in the notification system
/// This value is used if the environment variable `RUSTFS_NOTIFY_SEND_CONCURRENCY` is not set.
/// It defines how many send operations can be processed in parallel by the notification system at any given time.
/// Adjust this value based on your system's capabilities and expected load.
pub const DEFAULT_NOTIFY_SEND_CONCURRENCY: usize = 64;
#[allow(dead_code)]
pub const NOTIFY_SUB_SYSTEMS: &[&str] = &[NOTIFY_MQTT_SUB_SYS, NOTIFY_WEBHOOK_SUB_SYS];

View File

@@ -1,138 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Test for HeadObject on deleted objects with versioning enabled
//!
//! This test reproduces the issue where getting a deleted object returns
//! 200 OK instead of 404 NoSuchKey when versioning is enabled.
#![cfg(test)]
use aws_config::meta::region::RegionProviderChain;
use aws_sdk_s3::Client;
use aws_sdk_s3::config::{Credentials, Region};
use aws_sdk_s3::error::SdkError;
use aws_sdk_s3::types::{BucketVersioningStatus, VersioningConfiguration};
use bytes::Bytes;
use serial_test::serial;
use std::error::Error;
use tracing::info;
const ENDPOINT: &str = "http://localhost:9000";
const ACCESS_KEY: &str = "rustfsadmin";
const SECRET_KEY: &str = "rustfsadmin";
const BUCKET: &str = "test-head-deleted-versioning-bucket";
async fn create_aws_s3_client() -> Result<Client, Box<dyn Error>> {
let region_provider = RegionProviderChain::default_provider().or_else(Region::new("us-east-1"));
let shared_config = aws_config::defaults(aws_config::BehaviorVersion::latest())
.region(region_provider)
.credentials_provider(Credentials::new(ACCESS_KEY, SECRET_KEY, None, None, "static"))
.endpoint_url(ENDPOINT)
.load()
.await;
let client = Client::from_conf(
aws_sdk_s3::Config::from(&shared_config)
.to_builder()
.force_path_style(true)
.build(),
);
Ok(client)
}
/// Setup test bucket, creating it if it doesn't exist, and enable versioning
async fn setup_test_bucket(client: &Client) -> Result<(), Box<dyn Error>> {
match client.create_bucket().bucket(BUCKET).send().await {
Ok(_) => {}
Err(SdkError::ServiceError(e)) => {
let e = e.into_err();
let error_code = e.meta().code().unwrap_or("");
if !error_code.eq("BucketAlreadyExists") && !error_code.eq("BucketAlreadyOwnedByYou") {
return Err(e.into());
}
}
Err(e) => {
return Err(e.into());
}
}
// Enable versioning
client
.put_bucket_versioning()
.bucket(BUCKET)
.versioning_configuration(
VersioningConfiguration::builder()
.status(BucketVersioningStatus::Enabled)
.build(),
)
.send()
.await?;
Ok(())
}
/// Test that HeadObject on a deleted object returns NoSuchKey when versioning is enabled
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_head_deleted_object_versioning_returns_nosuchkey() -> Result<(), Box<dyn std::error::Error>> {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.with_test_writer()
.try_init();
info!("🧪 Starting test_head_deleted_object_versioning_returns_nosuchkey");
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
let key = "test-head-deleted-versioning.txt";
let content = b"Test content for HeadObject with versioning";
// Upload and verify
client
.put_object()
.bucket(BUCKET)
.key(key)
.body(Bytes::from_static(content).into())
.send()
.await?;
// Delete the object (creates a delete marker)
client.delete_object().bucket(BUCKET).key(key).send().await?;
// Try to head the deleted object (latest version is delete marker)
let head_result = client.head_object().bucket(BUCKET).key(key).send().await;
assert!(head_result.is_err(), "HeadObject on deleted object should return an error");
match head_result.unwrap_err() {
SdkError::ServiceError(service_err) => {
let s3_err = service_err.into_err();
assert!(
s3_err.meta().code() == Some("NoSuchKey")
|| s3_err.meta().code() == Some("NotFound")
|| s3_err.meta().code() == Some("404"),
"Error should be NoSuchKey or NotFound, got: {s3_err:?}"
);
info!("✅ HeadObject correctly returns NoSuchKey/NotFound");
}
other_err => {
panic!("Expected ServiceError but got: {other_err:?}");
}
}
Ok(())
}

View File

@@ -14,7 +14,6 @@
mod conditional_writes;
mod get_deleted_object_test;
mod head_deleted_object_versioning_test;
mod lifecycle;
mod lock;
mod node_interact_test;

View File

@@ -113,7 +113,6 @@ faster-hex = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
criterion = { workspace = true, features = ["html_reports"] }
temp-env = { workspace = true }
tracing-subscriber = { workspace = true }
[build-dependencies]
shadow-rs = { workspace = true, features = ["build", "metadata"] }

View File

@@ -23,7 +23,7 @@ use crate::{
};
use crate::data_usage::load_data_usage_cache;
use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, heal_channel::DriveState};
use rustfs_common::{globals::GLOBAL_LOCAL_NODE_NAME, heal_channel::DriveState};
use rustfs_madmin::{
BackendDisks, Disk, ErasureSetInfo, ITEM_INITIALIZING, ITEM_OFFLINE, ITEM_ONLINE, InfoMessage, ServerProperties,
};

View File

@@ -16,7 +16,7 @@ use crate::disk::error::DiskError;
use crate::disk::{self, DiskAPI, DiskStore, WalkDirOptions};
use futures::future::join_all;
use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetacacheReader, is_io_eof};
use std::{future::Future, pin::Pin};
use std::{future::Future, pin::Pin, sync::Arc};
use tokio::spawn;
use tokio_util::sync::CancellationToken;
use tracing::{error, info, warn};
@@ -71,14 +71,14 @@ pub async fn list_path_raw(rx: CancellationToken, opts: ListPathRawOptions) -> d
let mut jobs: Vec<tokio::task::JoinHandle<std::result::Result<(), DiskError>>> = Vec::new();
let mut readers = Vec::with_capacity(opts.disks.len());
let fds = opts.fallback_disks.iter().flatten().cloned().collect::<Vec<_>>();
let fds = Arc::new(opts.fallback_disks.clone());
let cancel_rx = CancellationToken::new();
for disk in opts.disks.iter() {
let opdisk = disk.clone();
let opts_clone = opts.clone();
let mut fds_clone = fds.clone();
let fds_clone = fds.clone();
let cancel_rx_clone = cancel_rx.clone();
let (rd, mut wr) = tokio::io::duplex(64);
readers.push(MetacacheReader::new(rd));
@@ -113,20 +113,21 @@ pub async fn list_path_raw(rx: CancellationToken, opts: ListPathRawOptions) -> d
}
while need_fallback {
let disk_op = {
if fds_clone.is_empty() {
None
} else {
let disk = fds_clone.remove(0);
if disk.is_online().await { Some(disk.clone()) } else { None }
// warn!("list_path_raw: while need_fallback start");
let disk = match fds_clone.iter().find(|d| d.is_some()) {
Some(d) => {
if let Some(disk) = d.clone() {
disk
} else {
warn!("list_path_raw: fallback disk is none");
break;
}
}
None => {
warn!("list_path_raw: fallback disk is none2");
break;
}
};
let Some(disk) = disk_op else {
warn!("list_path_raw: fallback disk is none");
break;
};
match disk
.as_ref()
.walk_dir(

View File

@@ -0,0 +1,350 @@
#![allow(clippy::map_entry)]
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_must_use)]
#![allow(clippy::all)]
use lazy_static::lazy_static;
use rustfs_checksums::ChecksumAlgorithm;
use std::collections::HashMap;
use crate::client::{api_put_object::PutObjectOptions, api_s3_datatypes::ObjectPart};
use crate::{disk::DiskAPI, store_api::GetObjectReader};
use rustfs_utils::crypto::{base64_decode, base64_encode};
use s3s::header::{
X_AMZ_CHECKSUM_ALGORITHM, X_AMZ_CHECKSUM_CRC32, X_AMZ_CHECKSUM_CRC32C, X_AMZ_CHECKSUM_SHA1, X_AMZ_CHECKSUM_SHA256,
};
use enumset::{EnumSet, EnumSetType, enum_set};
#[derive(Debug, EnumSetType, Default)]
#[enumset(repr = "u8")]
pub enum ChecksumMode {
#[default]
ChecksumNone,
ChecksumSHA256,
ChecksumSHA1,
ChecksumCRC32,
ChecksumCRC32C,
ChecksumCRC64NVME,
ChecksumFullObject,
}
lazy_static! {
static ref C_ChecksumMask: EnumSet<ChecksumMode> = {
let mut s = EnumSet::all();
s.remove(ChecksumMode::ChecksumFullObject);
s
};
static ref C_ChecksumFullObjectCRC32: EnumSet<ChecksumMode> =
enum_set!(ChecksumMode::ChecksumCRC32 | ChecksumMode::ChecksumFullObject);
static ref C_ChecksumFullObjectCRC32C: EnumSet<ChecksumMode> =
enum_set!(ChecksumMode::ChecksumCRC32C | ChecksumMode::ChecksumFullObject);
}
const AMZ_CHECKSUM_CRC64NVME: &str = "x-amz-checksum-crc64nvme";
impl ChecksumMode {
//pub const CRC64_NVME_POLYNOMIAL: i64 = 0xad93d23594c93659;
pub fn base(&self) -> ChecksumMode {
let s = EnumSet::from(*self).intersection(*C_ChecksumMask);
match s.as_u8() {
1_u8 => ChecksumMode::ChecksumNone,
2_u8 => ChecksumMode::ChecksumSHA256,
4_u8 => ChecksumMode::ChecksumSHA1,
8_u8 => ChecksumMode::ChecksumCRC32,
16_u8 => ChecksumMode::ChecksumCRC32C,
32_u8 => ChecksumMode::ChecksumCRC64NVME,
_ => panic!("enum err."),
}
}
pub fn is(&self, t: ChecksumMode) -> bool {
*self & t == t
}
pub fn key(&self) -> String {
//match c & checksumMask {
match self {
ChecksumMode::ChecksumCRC32 => {
return X_AMZ_CHECKSUM_CRC32.to_string();
}
ChecksumMode::ChecksumCRC32C => {
return X_AMZ_CHECKSUM_CRC32C.to_string();
}
ChecksumMode::ChecksumSHA1 => {
return X_AMZ_CHECKSUM_SHA1.to_string();
}
ChecksumMode::ChecksumSHA256 => {
return X_AMZ_CHECKSUM_SHA256.to_string();
}
ChecksumMode::ChecksumCRC64NVME => {
return AMZ_CHECKSUM_CRC64NVME.to_string();
}
_ => {
return "".to_string();
}
}
}
pub fn can_composite(&self) -> bool {
let s = EnumSet::from(*self).intersection(*C_ChecksumMask);
match s.as_u8() {
2_u8 => true,
4_u8 => true,
8_u8 => true,
16_u8 => true,
_ => false,
}
}
pub fn can_merge_crc(&self) -> bool {
let s = EnumSet::from(*self).intersection(*C_ChecksumMask);
match s.as_u8() {
8_u8 => true,
16_u8 => true,
32_u8 => true,
_ => false,
}
}
pub fn full_object_requested(&self) -> bool {
let s = EnumSet::from(*self).intersection(*C_ChecksumMask);
match s.as_u8() {
//C_ChecksumFullObjectCRC32 as u8 => true,
//C_ChecksumFullObjectCRC32C as u8 => true,
32_u8 => true,
_ => false,
}
}
pub fn key_capitalized(&self) -> String {
self.key()
}
pub fn raw_byte_len(&self) -> usize {
let u = EnumSet::from(*self).intersection(*C_ChecksumMask).as_u8();
if u == ChecksumMode::ChecksumCRC32 as u8 || u == ChecksumMode::ChecksumCRC32C as u8 {
4
} else if u == ChecksumMode::ChecksumSHA1 as u8 {
use sha1::Digest;
sha1::Sha1::output_size() as usize
} else if u == ChecksumMode::ChecksumSHA256 as u8 {
use sha2::Digest;
sha2::Sha256::output_size() as usize
} else if u == ChecksumMode::ChecksumCRC64NVME as u8 {
8
} else {
0
}
}
pub fn hasher(&self) -> Result<Box<dyn rustfs_checksums::http::HttpChecksum>, std::io::Error> {
match /*C_ChecksumMask & **/self {
ChecksumMode::ChecksumCRC32 => {
return Ok(ChecksumAlgorithm::Crc32.into_impl());
}
ChecksumMode::ChecksumCRC32C => {
return Ok(ChecksumAlgorithm::Crc32c.into_impl());
}
ChecksumMode::ChecksumSHA1 => {
return Ok(ChecksumAlgorithm::Sha1.into_impl());
}
ChecksumMode::ChecksumSHA256 => {
return Ok(ChecksumAlgorithm::Sha256.into_impl());
}
ChecksumMode::ChecksumCRC64NVME => {
return Ok(ChecksumAlgorithm::Crc64Nvme.into_impl());
}
_ => return Err(std::io::Error::other("unsupported checksum type")),
}
}
pub fn is_set(&self) -> bool {
let s = EnumSet::from(*self).intersection(*C_ChecksumMask);
s.len() == 1
}
pub fn set_default(&mut self, t: ChecksumMode) {
if !self.is_set() {
*self = t;
}
}
pub fn encode_to_string(&self, b: &[u8]) -> Result<String, std::io::Error> {
if !self.is_set() {
return Ok("".to_string());
}
let mut h = self.hasher()?;
h.update(b);
let hash = h.finalize();
Ok(base64_encode(hash.as_ref()))
}
pub fn to_string(&self) -> String {
//match c & checksumMask {
match self {
ChecksumMode::ChecksumCRC32 => {
return "CRC32".to_string();
}
ChecksumMode::ChecksumCRC32C => {
return "CRC32C".to_string();
}
ChecksumMode::ChecksumSHA1 => {
return "SHA1".to_string();
}
ChecksumMode::ChecksumSHA256 => {
return "SHA256".to_string();
}
ChecksumMode::ChecksumNone => {
return "".to_string();
}
ChecksumMode::ChecksumCRC64NVME => {
return "CRC64NVME".to_string();
}
_ => {
return "<invalid>".to_string();
}
}
}
// pub fn check_sum_reader(&self, r: GetObjectReader) -> Result<Checksum, std::io::Error> {
// let mut h = self.hasher()?;
// Ok(Checksum::new(self.clone(), h.sum().as_bytes()))
// }
// pub fn check_sum_bytes(&self, b: &[u8]) -> Result<Checksum, std::io::Error> {
// let mut h = self.hasher()?;
// Ok(Checksum::new(self.clone(), h.sum().as_bytes()))
// }
pub fn composite_checksum(&self, p: &mut [ObjectPart]) -> Result<Checksum, std::io::Error> {
if !self.can_composite() {
return Err(std::io::Error::other("cannot do composite checksum"));
}
p.sort_by(|i, j| {
if i.part_num < j.part_num {
std::cmp::Ordering::Less
} else if i.part_num > j.part_num {
std::cmp::Ordering::Greater
} else {
std::cmp::Ordering::Equal
}
});
let c = self.base();
let crc_bytes = Vec::<u8>::with_capacity(p.len() * self.raw_byte_len() as usize);
let mut h = self.hasher()?;
h.update(crc_bytes.as_ref());
let hash = h.finalize();
Ok(Checksum {
checksum_type: self.clone(),
r: hash.as_ref().to_vec(),
computed: false,
})
}
pub fn full_object_checksum(&self, p: &mut [ObjectPart]) -> Result<Checksum, std::io::Error> {
todo!();
}
}
#[derive(Default)]
pub struct Checksum {
checksum_type: ChecksumMode,
r: Vec<u8>,
computed: bool,
}
#[allow(dead_code)]
impl Checksum {
fn new(t: ChecksumMode, b: &[u8]) -> Checksum {
if t.is_set() && b.len() == t.raw_byte_len() {
return Checksum {
checksum_type: t,
r: b.to_vec(),
computed: false,
};
}
Checksum::default()
}
#[allow(dead_code)]
fn new_checksum_string(t: ChecksumMode, s: &str) -> Result<Checksum, std::io::Error> {
let b = match base64_decode(s.as_bytes()) {
Ok(b) => b,
Err(err) => return Err(std::io::Error::other(err.to_string())),
};
if t.is_set() && b.len() == t.raw_byte_len() {
return Ok(Checksum {
checksum_type: t,
r: b,
computed: false,
});
}
Ok(Checksum::default())
}
fn is_set(&self) -> bool {
self.checksum_type.is_set() && self.r.len() == self.checksum_type.raw_byte_len()
}
fn encoded(&self) -> String {
if !self.is_set() {
return "".to_string();
}
base64_encode(&self.r)
}
#[allow(dead_code)]
fn raw(&self) -> Option<Vec<u8>> {
if !self.is_set() {
return None;
}
Some(self.r.clone())
}
}
pub fn add_auto_checksum_headers(opts: &mut PutObjectOptions) {
opts.user_metadata
.insert("X-Amz-Checksum-Algorithm".to_string(), opts.auto_checksum.to_string());
if opts.auto_checksum.full_object_requested() {
opts.user_metadata
.insert("X-Amz-Checksum-Type".to_string(), "FULL_OBJECT".to_string());
}
}
pub fn apply_auto_checksum(opts: &mut PutObjectOptions, all_parts: &mut [ObjectPart]) -> Result<(), std::io::Error> {
if opts.auto_checksum.can_composite() && !opts.auto_checksum.is(ChecksumMode::ChecksumFullObject) {
let crc = opts.auto_checksum.composite_checksum(all_parts)?;
opts.user_metadata = {
let mut hm = HashMap::new();
hm.insert(opts.auto_checksum.key(), crc.encoded());
hm
}
} else if opts.auto_checksum.can_merge_crc() {
let crc = opts.auto_checksum.full_object_checksum(all_parts)?;
opts.user_metadata = {
let mut hm = HashMap::new();
hm.insert(opts.auto_checksum.key_capitalized(), crc.encoded());
hm.insert("X-Amz-Checksum-Type".to_string(), "FULL_OBJECT".to_string());
hm
}
}
Ok(())
}

View File

@@ -0,0 +1,270 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// use crate::error::StdError;
// use bytes::Bytes;
// use futures::pin_mut;
// use futures::stream::{Stream, StreamExt};
// use std::future::Future;
// use std::pin::Pin;
// use std::task::{Context, Poll};
// use transform_stream::AsyncTryStream;
// pub type SyncBoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + Sync + 'a>>;
// pub struct ChunkedStream<'a> {
// /// inner
// inner: AsyncTryStream<Bytes, StdError, SyncBoxFuture<'a, Result<(), StdError>>>,
// remaining_length: usize,
// }
// impl<'a> ChunkedStream<'a> {
// pub fn new<S>(body: S, content_length: usize, chunk_size: usize, need_padding: bool) -> Self
// where
// S: Stream<Item = Result<Bytes, StdError>> + Send + Sync + 'a,
// {
// let inner = AsyncTryStream::<_, _, SyncBoxFuture<'a, Result<(), StdError>>>::new(|mut y| {
// #[allow(clippy::shadow_same)] // necessary for `pin_mut!`
// Box::pin(async move {
// pin_mut!(body);
// // Data left over from the previous call
// let mut prev_bytes = Bytes::new();
// let mut read_size = 0;
// loop {
// let data: Vec<Bytes> = {
// // Read a fixed-size chunk
// match Self::read_data(body.as_mut(), prev_bytes, chunk_size).await {
// None => break,
// Some(Err(e)) => return Err(e),
// Some(Ok((data, remaining_bytes))) => {
// // debug!(
// // "content_length:{},read_size:{}, read_data data:{}, remaining_bytes: {} ",
// // content_length,
// // read_size,
// // data.len(),
// // remaining_bytes.len()
// // );
// prev_bytes = remaining_bytes;
// data
// }
// }
// };
// for bytes in data {
// read_size += bytes.len();
// // debug!("read_size {}, content_length {}", read_size, content_length,);
// y.yield_ok(bytes).await;
// }
// if read_size + prev_bytes.len() >= content_length {
// // debug!(
// // "Finished reading: read_size:{} + prev_bytes.len({}) == content_length {}",
// // read_size,
// // prev_bytes.len(),
// // content_length,
// // );
// // Pad with zeros?
// if !need_padding {
// y.yield_ok(prev_bytes).await;
// break;
// }
// let mut bytes = vec![0u8; chunk_size];
// let (left, _) = bytes.split_at_mut(prev_bytes.len());
// left.copy_from_slice(&prev_bytes);
// y.yield_ok(Bytes::from(bytes)).await;
// break;
// }
// }
// // debug!("chunked stream exit");
// Ok(())
// })
// });
// Self {
// inner,
// remaining_length: content_length,
// }
// }
// /// read data and return remaining bytes
// async fn read_data<S>(
// mut body: Pin<&mut S>,
// prev_bytes: Bytes,
// data_size: usize,
// ) -> Option<Result<(Vec<Bytes>, Bytes), StdError>>
// where
// S: Stream<Item = Result<Bytes, StdError>> + Send,
// {
// let mut bytes_buffer = Vec::new();
// // Run only once
// let mut push_data_bytes = |mut bytes: Bytes| {
// // debug!("read from body {} split per {}, prev_bytes: {}", bytes.len(), data_size, prev_bytes.len());
// if bytes.is_empty() {
// return None;
// }
// if data_size == 0 {
// return Some(bytes);
// }
// // Merge with the previous data
// if !prev_bytes.is_empty() {
// let need_size = data_size.wrapping_sub(prev_bytes.len());
// // debug!(
// // "Previous leftover {}, take {} now, total: {}",
// // prev_bytes.len(),
// // need_size,
// // prev_bytes.len() + need_size
// // );
// if bytes.len() >= need_size {
// let data = bytes.split_to(need_size);
// let mut combined = Vec::new();
// combined.extend_from_slice(&prev_bytes);
// combined.extend_from_slice(&data);
// // debug!(
// // "Fetched more bytes than needed: {}, merged result {}, remaining bytes {}",
// // need_size,
// // combined.len(),
// // bytes.len(),
// // );
// bytes_buffer.push(Bytes::from(combined));
// } else {
// let mut combined = Vec::new();
// combined.extend_from_slice(&prev_bytes);
// combined.extend_from_slice(&bytes);
// // debug!(
// // "Fetched fewer bytes than needed: {}, merged result {}, remaining bytes {}, return immediately",
// // need_size,
// // combined.len(),
// // bytes.len(),
// // );
// return Some(Bytes::from(combined));
// }
// }
// // If the fetched data exceeds the chunk, slice the required size
// if data_size <= bytes.len() {
// let n = bytes.len() / data_size;
// for _ in 0..n {
// let data = bytes.split_to(data_size);
// // println!("bytes_buffer.push: {}, remaining: {}", data.len(), bytes.len());
// bytes_buffer.push(data);
// }
// Some(bytes)
// } else {
// // Insufficient data
// Some(bytes)
// }
// };
// // Remaining data
// let remaining_bytes = 'outer: {
// // // Exit if the previous data was sufficient
// // if let Some(remaining_bytes) = push_data_bytes(prev_bytes) {
// // println!("Consuming leftovers");
// // break 'outer remaining_bytes;
// // }
// loop {
// match body.next().await? {
// Err(e) => return Some(Err(e)),
// Ok(bytes) => {
// if let Some(remaining_bytes) = push_data_bytes(bytes) {
// break 'outer remaining_bytes;
// }
// }
// }
// }
// };
// Some(Ok((bytes_buffer, remaining_bytes)))
// }
// fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Result<Bytes, StdError>>> {
// let ans = Pin::new(&mut self.inner).poll_next(cx);
// if let Poll::Ready(Some(Ok(ref bytes))) = ans {
// self.remaining_length = self.remaining_length.saturating_sub(bytes.len());
// }
// ans
// }
// // pub fn exact_remaining_length(&self) -> usize {
// // self.remaining_length
// // }
// }
// impl Stream for ChunkedStream<'_> {
// type Item = Result<Bytes, StdError>;
// fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
// self.poll(cx)
// }
// fn size_hint(&self) -> (usize, Option<usize>) {
// (0, None)
// }
// }
// #[cfg(test)]
// mod test {
// use super::*;
// #[tokio::test]
// async fn test_chunked_stream() {
// let chunk_size = 4;
// let data1 = vec![1u8; 7777]; // 65536
// let data2 = vec![1u8; 7777]; // 65536
// let content_length = data1.len() + data2.len();
// let chunk1 = Bytes::from(data1);
// let chunk2 = Bytes::from(data2);
// let chunk_results: Vec<Result<Bytes, _>> = vec![Ok(chunk1), Ok(chunk2)];
// let stream = futures::stream::iter(chunk_results);
// let mut chunked_stream = ChunkedStream::new(stream, content_length, chunk_size, true);
// loop {
// let ans1 = chunked_stream.next().await;
// if ans1.is_none() {
// break;
// }
// let bytes = ans1.unwrap().unwrap();
// assert!(bytes.len() == chunk_size)
// }
// // assert_eq!(ans1.unwrap(), chunk1_data.as_slice());
// }
// }

View File

@@ -0,0 +1,59 @@
#![allow(clippy::map_entry)]
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, sync::Arc};
use crate::{
disk::{
error::{is_unformatted_disk, DiskError},
format::{DistributionAlgoVersion, FormatV3},
new_disk, DiskAPI, DiskInfo, DiskOption, DiskStore,
},
store_api::{
BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, GetObjectReader, HTTPRangeSpec,
ListMultipartsInfo, ListObjectVersionsInfo, ListObjectsV2Info, MakeBucketOptions, MultipartInfo, MultipartUploadResult,
ObjectIO, ObjectInfo, ObjectOptions, ObjectToDelete, PartInfo, PutObjReader, StorageAPI,
},
credentials::{Credentials, SignatureType,},
api_put_object_multipart::UploadPartParams,
};
use http::HeaderMap;
use tokio_util::sync::CancellationToken;
use tracing::warn;
use tracing::{error, info};
use url::Url;
struct HookReader {
source: GetObjectReader,
hook: GetObjectReader,
}
impl HookReader {
pub fn new(source: GetObjectReader, hook: GetObjectReader) -> HookReader {
HookReader {
source,
hook,
}
}
fn seek(&self, offset: i64, whence: i64) -> Result<i64> {
todo!();
}
fn read(&self, b: &[u8]) -> Result<i64> {
todo!();
}
}

View File

@@ -1,770 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::disk::{
CheckPartsResp, DeleteOptions, DiskAPI, DiskError, DiskInfo, DiskInfoOptions, DiskLocation, Endpoint, Error,
FileInfoVersions, ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, Result, UpdateMetadataOpts, VolumeInfo,
WalkDirOptions, local::LocalDisk,
};
use bytes::Bytes;
use rustfs_filemeta::{FileInfo, ObjectPartInfo, RawFileInfo};
use rustfs_utils::string::parse_bool_with_default;
use std::{
path::PathBuf,
sync::{
Arc,
atomic::{AtomicI64, AtomicU32, Ordering},
},
time::Duration,
};
use tokio::{sync::RwLock, time};
use tokio_util::sync::CancellationToken;
use tracing::{debug, info, warn};
use uuid::Uuid;
/// Disk health status constants
const DISK_HEALTH_OK: u32 = 0;
const DISK_HEALTH_FAULTY: u32 = 1;
pub const ENV_RUSTFS_DRIVE_ACTIVE_MONITORING: &str = "RUSTFS_DRIVE_ACTIVE_MONITORING";
pub const ENV_RUSTFS_DRIVE_MAX_TIMEOUT_DURATION: &str = "RUSTFS_DRIVE_MAX_TIMEOUT_DURATION";
pub const CHECK_EVERY: Duration = Duration::from_secs(15);
pub const SKIP_IF_SUCCESS_BEFORE: Duration = Duration::from_secs(5);
pub const CHECK_TIMEOUT_DURATION: Duration = Duration::from_secs(5);
lazy_static::lazy_static! {
static ref TEST_OBJ: String = format!("health-check-{}", Uuid::new_v4());
static ref TEST_DATA: Bytes = Bytes::from(vec![42u8; 2048]);
static ref TEST_BUCKET: String = ".rustfs.sys/tmp".to_string();
}
pub fn get_max_timeout_duration() -> Duration {
std::env::var(ENV_RUSTFS_DRIVE_MAX_TIMEOUT_DURATION)
.map(|v| Duration::from_secs(v.parse::<u64>().unwrap_or(30)))
.unwrap_or(Duration::from_secs(30))
}
/// DiskHealthTracker tracks the health status of a disk.
/// Similar to Go's diskHealthTracker.
#[derive(Debug)]
pub struct DiskHealthTracker {
/// Atomic timestamp of last successful operation
pub last_success: AtomicI64,
/// Atomic timestamp of last operation start
pub last_started: AtomicI64,
/// Atomic disk status (OK or Faulty)
pub status: AtomicU32,
/// Atomic number of waiting operations
pub waiting: AtomicU32,
}
impl DiskHealthTracker {
/// Create a new disk health tracker
pub fn new() -> Self {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
Self {
last_success: AtomicI64::new(now),
last_started: AtomicI64::new(now),
status: AtomicU32::new(DISK_HEALTH_OK),
waiting: AtomicU32::new(0),
}
}
/// Log a successful operation
pub fn log_success(&self) {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
self.last_success.store(now, Ordering::Relaxed);
}
/// Check if disk is faulty
pub fn is_faulty(&self) -> bool {
self.status.load(Ordering::Relaxed) == DISK_HEALTH_FAULTY
}
/// Set disk as faulty
pub fn set_faulty(&self) {
self.status.store(DISK_HEALTH_FAULTY, Ordering::Relaxed);
}
/// Set disk as OK
pub fn set_ok(&self) {
self.status.store(DISK_HEALTH_OK, Ordering::Relaxed);
}
pub fn swap_ok_to_faulty(&self) -> bool {
self.status
.compare_exchange(DISK_HEALTH_OK, DISK_HEALTH_FAULTY, Ordering::Relaxed, Ordering::Relaxed)
.is_ok()
}
/// Increment waiting operations counter
pub fn increment_waiting(&self) {
self.waiting.fetch_add(1, Ordering::Relaxed);
}
/// Decrement waiting operations counter
pub fn decrement_waiting(&self) {
self.waiting.fetch_sub(1, Ordering::Relaxed);
}
/// Get waiting operations count
pub fn waiting_count(&self) -> u32 {
self.waiting.load(Ordering::Relaxed)
}
/// Get last success timestamp
pub fn last_success(&self) -> i64 {
self.last_success.load(Ordering::Relaxed)
}
}
impl Default for DiskHealthTracker {
fn default() -> Self {
Self::new()
}
}
/// Health check context key for tracking disk operations
#[derive(Debug, Clone)]
struct HealthDiskCtxKey;
#[derive(Debug)]
struct HealthDiskCtxValue {
last_success: Arc<AtomicI64>,
}
impl HealthDiskCtxValue {
fn log_success(&self) {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
self.last_success.store(now, Ordering::Relaxed);
}
}
/// LocalDiskWrapper wraps a DiskStore with health tracking capabilities.
/// This is similar to Go's xlStorageDiskIDCheck.
#[derive(Debug, Clone)]
pub struct LocalDiskWrapper {
/// The underlying disk store
disk: Arc<LocalDisk>,
/// Health tracker
health: Arc<DiskHealthTracker>,
/// Whether health checking is enabled
health_check: bool,
/// Cancellation token for monitoring tasks
cancel_token: CancellationToken,
/// Disk ID for stale checking
disk_id: Arc<RwLock<Option<Uuid>>>,
}
impl LocalDiskWrapper {
/// Create a new LocalDiskWrapper
pub fn new(disk: Arc<LocalDisk>, health_check: bool) -> Self {
// Check environment variable for health check override
// Default to true if not set, but only enable if both param and env are true
let env_health_check = std::env::var(ENV_RUSTFS_DRIVE_ACTIVE_MONITORING)
.map(|v| parse_bool_with_default(&v, true))
.unwrap_or(true);
let ret = Self {
disk,
health: Arc::new(DiskHealthTracker::new()),
health_check: health_check && env_health_check,
cancel_token: CancellationToken::new(),
disk_id: Arc::new(RwLock::new(None)),
};
ret.start_monitoring();
ret
}
pub fn get_disk(&self) -> Arc<LocalDisk> {
self.disk.clone()
}
/// Start the disk monitoring if health_check is enabled
pub fn start_monitoring(&self) {
if self.health_check {
let health = Arc::clone(&self.health);
let cancel_token = self.cancel_token.clone();
let disk = Arc::clone(&self.disk);
tokio::spawn(async move {
Self::monitor_disk_writable(disk, health, cancel_token).await;
});
}
}
/// Stop the disk monitoring
pub async fn stop_monitoring(&self) {
self.cancel_token.cancel();
}
/// Monitor disk writability periodically
async fn monitor_disk_writable(disk: Arc<LocalDisk>, health: Arc<DiskHealthTracker>, cancel_token: CancellationToken) {
// TODO: config interval
let mut interval = time::interval(CHECK_EVERY);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
return;
}
_ = interval.tick() => {
if cancel_token.is_cancelled() {
return;
}
if health.status.load(Ordering::Relaxed) != DISK_HEALTH_OK {
continue;
}
let last_success_nanos = health.last_success.load(Ordering::Relaxed);
let elapsed = Duration::from_nanos(
(std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64 - last_success_nanos) as u64
);
if elapsed < SKIP_IF_SUCCESS_BEFORE {
continue;
}
tokio::time::sleep(Duration::from_secs(1)).await;
debug!("health check: performing health check");
if Self::perform_health_check(disk.clone(), &TEST_BUCKET, &TEST_OBJ, &TEST_DATA, true, CHECK_TIMEOUT_DURATION).await.is_err() && health.swap_ok_to_faulty() {
// Health check failed, disk is considered faulty
health.increment_waiting(); // Balance the increment from failed operation
let health_clone = Arc::clone(&health);
let disk_clone = disk.clone();
let cancel_clone = cancel_token.clone();
tokio::spawn(async move {
Self::monitor_disk_status(disk_clone, health_clone, cancel_clone).await;
});
}
}
}
}
}
/// Perform a health check by writing and reading a test file
async fn perform_health_check(
disk: Arc<LocalDisk>,
test_bucket: &str,
test_filename: &str,
test_data: &Bytes,
check_faulty_only: bool,
timeout_duration: Duration,
) -> Result<()> {
// Perform health check with timeout
let health_check_result = tokio::time::timeout(timeout_duration, async {
// Try to write test data
disk.write_all(test_bucket, test_filename, test_data.clone()).await?;
// Try to read back the data
let read_data = disk.read_all(test_bucket, test_filename).await?;
// Verify data integrity
if read_data.len() != test_data.len() {
warn!(
"health check: test file data length mismatch: expected {} bytes, got {}",
test_data.len(),
read_data.len()
);
if check_faulty_only {
return Ok(());
}
return Err(DiskError::FaultyDisk);
}
// Clean up
disk.delete(
test_bucket,
test_filename,
DeleteOptions {
recursive: false,
immediate: false,
undo_write: false,
old_data_dir: None,
},
)
.await?;
Ok(())
})
.await;
match health_check_result {
Ok(result) => match result {
Ok(()) => Ok(()),
Err(e) => {
debug!("health check: failed: {:?}", e);
if e == DiskError::FaultyDisk {
return Err(e);
}
if check_faulty_only { Ok(()) } else { Err(e) }
}
},
Err(_) => {
// Timeout occurred
warn!("health check: timeout after {:?}", timeout_duration);
Err(DiskError::FaultyDisk)
}
}
}
/// Monitor disk status and try to bring it back online
async fn monitor_disk_status(disk: Arc<LocalDisk>, health: Arc<DiskHealthTracker>, cancel_token: CancellationToken) {
const CHECK_EVERY: Duration = Duration::from_secs(5);
let mut interval = time::interval(CHECK_EVERY);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
return;
}
_ = interval.tick() => {
if cancel_token.is_cancelled() {
return;
}
match Self::perform_health_check(disk.clone(), &TEST_BUCKET, &TEST_OBJ, &TEST_DATA, false, CHECK_TIMEOUT_DURATION).await {
Ok(_) => {
info!("Disk {} is back online", disk.to_string());
health.set_ok();
health.decrement_waiting();
return;
}
Err(e) => {
warn!("Disk {} still faulty: {:?}", disk.to_string(), e);
}
}
}
}
}
}
async fn check_id(&self, want_id: Option<Uuid>) -> Result<()> {
if want_id.is_none() {
return Ok(());
}
let stored_disk_id = self.disk.get_disk_id().await?;
if stored_disk_id != want_id {
return Err(Error::other(format!("Disk ID mismatch wanted {:?}, got {:?}", want_id, stored_disk_id)));
}
Ok(())
}
/// Check if disk ID is stale
async fn check_disk_stale(&self) -> Result<()> {
let Some(current_disk_id) = *self.disk_id.read().await else {
return Ok(());
};
let stored_disk_id = match self.disk.get_disk_id().await? {
Some(id) => id,
None => return Ok(()), // Empty disk ID is allowed during initialization
};
if current_disk_id != stored_disk_id {
return Err(DiskError::DiskNotFound);
}
Ok(())
}
/// Set the disk ID
pub async fn set_disk_id_internal(&self, id: Option<Uuid>) -> Result<()> {
let mut disk_id = self.disk_id.write().await;
*disk_id = id;
Ok(())
}
/// Get the current disk ID
pub async fn get_current_disk_id(&self) -> Option<Uuid> {
*self.disk_id.read().await
}
/// Track disk health for an operation.
/// This method should wrap disk operations to ensure health checking.
pub async fn track_disk_health<T, F, Fut>(&self, operation: F, timeout_duration: Duration) -> Result<T>
where
F: FnOnce() -> Fut,
Fut: std::future::Future<Output = Result<T>>,
{
// Check if disk is faulty
if self.health.is_faulty() {
warn!("disk {} health is faulty, returning error", self.to_string());
return Err(DiskError::FaultyDisk);
}
// Check if disk is stale
self.check_disk_stale().await?;
// Record operation start
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
self.health.last_started.store(now, Ordering::Relaxed);
self.health.increment_waiting();
if timeout_duration == Duration::ZERO {
let result = operation().await;
self.health.decrement_waiting();
if result.is_ok() {
self.health.log_success();
}
return result;
}
// Execute the operation with timeout
let result = tokio::time::timeout(timeout_duration, operation()).await;
match result {
Ok(operation_result) => {
// Log success and decrement waiting counter
if operation_result.is_ok() {
self.health.log_success();
}
self.health.decrement_waiting();
operation_result
}
Err(_) => {
// Timeout occurred, mark disk as potentially faulty and decrement waiting counter
self.health.decrement_waiting();
warn!("disk operation timeout after {:?}", timeout_duration);
Err(DiskError::other(format!("disk operation timeout after {:?}", timeout_duration)))
}
}
}
}
#[async_trait::async_trait]
impl DiskAPI for LocalDiskWrapper {
fn to_string(&self) -> String {
self.disk.to_string()
}
async fn is_online(&self) -> bool {
let Ok(Some(disk_id)) = self.disk.get_disk_id().await else {
return false;
};
let Some(current_disk_id) = *self.disk_id.read().await else {
return false;
};
current_disk_id == disk_id
}
fn is_local(&self) -> bool {
self.disk.is_local()
}
fn host_name(&self) -> String {
self.disk.host_name()
}
fn endpoint(&self) -> Endpoint {
self.disk.endpoint()
}
async fn close(&self) -> Result<()> {
self.stop_monitoring().await;
self.disk.close().await
}
async fn get_disk_id(&self) -> Result<Option<Uuid>> {
self.disk.get_disk_id().await
}
async fn set_disk_id(&self, id: Option<Uuid>) -> Result<()> {
self.set_disk_id_internal(id).await
}
fn path(&self) -> PathBuf {
self.disk.path()
}
fn get_disk_location(&self) -> DiskLocation {
self.disk.get_disk_location()
}
async fn disk_info(&self, opts: &DiskInfoOptions) -> Result<DiskInfo> {
if opts.noop && opts.metrics {
let mut info = DiskInfo::default();
// Add health metrics
info.metrics.total_waiting = self.health.waiting_count();
if self.health.is_faulty() {
return Err(DiskError::FaultyDisk);
}
return Ok(info);
}
if self.health.is_faulty() {
return Err(DiskError::FaultyDisk);
}
let result = self.disk.disk_info(opts).await?;
if let Some(current_disk_id) = *self.disk_id.read().await
&& Some(current_disk_id) != result.id
{
return Err(DiskError::DiskNotFound);
};
Ok(result)
}
async fn make_volume(&self, volume: &str) -> Result<()> {
self.track_disk_health(|| async { self.disk.make_volume(volume).await }, get_max_timeout_duration())
.await
}
async fn make_volumes(&self, volumes: Vec<&str>) -> Result<()> {
self.track_disk_health(|| async { self.disk.make_volumes(volumes).await }, get_max_timeout_duration())
.await
}
async fn list_volumes(&self) -> Result<Vec<VolumeInfo>> {
self.track_disk_health(|| async { self.disk.list_volumes().await }, Duration::ZERO)
.await
}
async fn stat_volume(&self, volume: &str) -> Result<VolumeInfo> {
self.track_disk_health(|| async { self.disk.stat_volume(volume).await }, get_max_timeout_duration())
.await
}
async fn delete_volume(&self, volume: &str) -> Result<()> {
self.track_disk_health(|| async { self.disk.delete_volume(volume).await }, Duration::ZERO)
.await
}
async fn walk_dir<W: tokio::io::AsyncWrite + Unpin + Send>(&self, opts: WalkDirOptions, wr: &mut W) -> Result<()> {
self.track_disk_health(|| async { self.disk.walk_dir(opts, wr).await }, Duration::ZERO)
.await
}
async fn delete_version(
&self,
volume: &str,
path: &str,
fi: FileInfo,
force_del_marker: bool,
opts: DeleteOptions,
) -> Result<()> {
self.track_disk_health(
|| async { self.disk.delete_version(volume, path, fi, force_del_marker, opts).await },
get_max_timeout_duration(),
)
.await
}
async fn delete_versions(&self, volume: &str, versions: Vec<FileInfoVersions>, opts: DeleteOptions) -> Vec<Option<Error>> {
// Check if disk is faulty before proceeding
if self.health.is_faulty() {
return vec![Some(DiskError::FaultyDisk); versions.len()];
}
// Check if disk is stale
if let Err(e) = self.check_disk_stale().await {
return vec![Some(e); versions.len()];
}
// Record operation start
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
self.health.last_started.store(now, Ordering::Relaxed);
self.health.increment_waiting();
// Execute the operation
let result = self.disk.delete_versions(volume, versions, opts).await;
self.health.decrement_waiting();
let has_err = result.iter().any(|e| e.is_some());
if !has_err {
// Log success and decrement waiting counter
self.health.log_success();
}
result
}
async fn delete_paths(&self, volume: &str, paths: &[String]) -> Result<()> {
self.track_disk_health(|| async { self.disk.delete_paths(volume, paths).await }, get_max_timeout_duration())
.await
}
async fn write_metadata(&self, org_volume: &str, volume: &str, path: &str, fi: FileInfo) -> Result<()> {
self.track_disk_health(
|| async { self.disk.write_metadata(org_volume, volume, path, fi).await },
get_max_timeout_duration(),
)
.await
}
async fn update_metadata(&self, volume: &str, path: &str, fi: FileInfo, opts: &UpdateMetadataOpts) -> Result<()> {
self.track_disk_health(
|| async { self.disk.update_metadata(volume, path, fi, opts).await },
get_max_timeout_duration(),
)
.await
}
async fn read_version(
&self,
org_volume: &str,
volume: &str,
path: &str,
version_id: &str,
opts: &ReadOptions,
) -> Result<FileInfo> {
self.track_disk_health(
|| async { self.disk.read_version(org_volume, volume, path, version_id, opts).await },
get_max_timeout_duration(),
)
.await
}
async fn read_xl(&self, volume: &str, path: &str, read_data: bool) -> Result<RawFileInfo> {
self.track_disk_health(|| async { self.disk.read_xl(volume, path, read_data).await }, get_max_timeout_duration())
.await
}
async fn rename_data(
&self,
src_volume: &str,
src_path: &str,
fi: FileInfo,
dst_volume: &str,
dst_path: &str,
) -> Result<RenameDataResp> {
self.track_disk_health(
|| async { self.disk.rename_data(src_volume, src_path, fi, dst_volume, dst_path).await },
get_max_timeout_duration(),
)
.await
}
async fn list_dir(&self, origvolume: &str, volume: &str, dir_path: &str, count: i32) -> Result<Vec<String>> {
self.track_disk_health(
|| async { self.disk.list_dir(origvolume, volume, dir_path, count).await },
get_max_timeout_duration(),
)
.await
}
async fn read_file(&self, volume: &str, path: &str) -> Result<crate::disk::FileReader> {
self.track_disk_health(|| async { self.disk.read_file(volume, path).await }, get_max_timeout_duration())
.await
}
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<crate::disk::FileReader> {
self.track_disk_health(
|| async { self.disk.read_file_stream(volume, path, offset, length).await },
get_max_timeout_duration(),
)
.await
}
async fn append_file(&self, volume: &str, path: &str) -> Result<crate::disk::FileWriter> {
self.track_disk_health(|| async { self.disk.append_file(volume, path).await }, Duration::ZERO)
.await
}
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: i64) -> Result<crate::disk::FileWriter> {
self.track_disk_health(
|| async { self.disk.create_file(origvolume, volume, path, file_size).await },
Duration::ZERO,
)
.await
}
async fn rename_file(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str) -> Result<()> {
self.track_disk_health(
|| async { self.disk.rename_file(src_volume, src_path, dst_volume, dst_path).await },
get_max_timeout_duration(),
)
.await
}
async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Bytes) -> Result<()> {
self.track_disk_health(
|| async { self.disk.rename_part(src_volume, src_path, dst_volume, dst_path, meta).await },
get_max_timeout_duration(),
)
.await
}
async fn delete(&self, volume: &str, path: &str, opt: DeleteOptions) -> Result<()> {
self.track_disk_health(|| async { self.disk.delete(volume, path, opt).await }, get_max_timeout_duration())
.await
}
async fn verify_file(&self, volume: &str, path: &str, fi: &FileInfo) -> Result<CheckPartsResp> {
self.track_disk_health(|| async { self.disk.verify_file(volume, path, fi).await }, Duration::ZERO)
.await
}
async fn check_parts(&self, volume: &str, path: &str, fi: &FileInfo) -> Result<CheckPartsResp> {
self.track_disk_health(|| async { self.disk.check_parts(volume, path, fi).await }, Duration::ZERO)
.await
}
async fn read_parts(&self, bucket: &str, paths: &[String]) -> Result<Vec<ObjectPartInfo>> {
self.track_disk_health(|| async { self.disk.read_parts(bucket, paths).await }, Duration::ZERO)
.await
}
async fn read_multiple(&self, req: ReadMultipleReq) -> Result<Vec<ReadMultipleResp>> {
self.track_disk_health(|| async { self.disk.read_multiple(req).await }, Duration::ZERO)
.await
}
async fn write_all(&self, volume: &str, path: &str, data: Bytes) -> Result<()> {
self.track_disk_health(|| async { self.disk.write_all(volume, path, data).await }, get_max_timeout_duration())
.await
}
async fn read_all(&self, volume: &str, path: &str) -> Result<Bytes> {
self.track_disk_health(|| async { self.disk.read_all(volume, path).await }, get_max_timeout_duration())
.await
}
}

View File

@@ -69,7 +69,7 @@ use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
#[derive(Debug, Clone)]
#[derive(Debug)]
pub struct FormatInfo {
pub id: Option<Uuid>,
pub data: Bytes,
@@ -77,6 +77,16 @@ pub struct FormatInfo {
pub last_check: Option<OffsetDateTime>,
}
impl FormatInfo {
pub fn last_check_valid(&self) -> bool {
let now = OffsetDateTime::now_utc();
self.file_info.is_some()
&& self.id.is_some()
&& self.last_check.is_some()
&& (now.unix_timestamp() - self.last_check.unwrap().unix_timestamp() <= 1)
}
}
/// A helper enum to handle internal buffer types for writing data.
pub enum InternalBuf<'a> {
Ref(&'a [u8]),
@@ -175,7 +185,7 @@ impl LocalDisk {
};
let root_clone = root.clone();
let update_fn: UpdateFn<DiskInfo> = Box::new(move || {
let disk_id = id;
let disk_id = id.map_or("".to_string(), |id| id.to_string());
let root = root_clone.clone();
Box::pin(async move {
match get_disk_info(root.clone()).await {
@@ -190,7 +200,7 @@ impl LocalDisk {
minor: info.minor,
fs_type: info.fstype,
root_disk: root,
id: disk_id,
id: disk_id.to_string(),
..Default::default()
};
// if root {
@@ -1285,7 +1295,7 @@ impl DiskAPI for LocalDisk {
}
#[tracing::instrument(skip(self))]
async fn is_online(&self) -> bool {
true
self.check_format_json().await.is_ok()
}
#[tracing::instrument(skip(self))]
@@ -1332,40 +1342,24 @@ impl DiskAPI for LocalDisk {
#[tracing::instrument(level = "debug", skip(self))]
async fn get_disk_id(&self) -> Result<Option<Uuid>> {
let format_info = {
let format_info = self.format_info.read().await;
format_info.clone()
};
let mut format_info = self.format_info.write().await;
let id = format_info.id;
// if format_info.last_check_valid() {
// return Ok(id);
// }
if format_info.file_info.is_some() && id.is_some() {
// check last check time
if let Some(last_check) = format_info.last_check {
if last_check.unix_timestamp() + 1 < OffsetDateTime::now_utc().unix_timestamp() {
return Ok(id);
}
}
if format_info.last_check_valid() {
return Ok(id);
}
let file_meta = self.check_format_json().await?;
if let Some(file_info) = &format_info.file_info {
if super::fs::same_file(&file_meta, file_info) {
let mut format_info = self.format_info.write().await;
format_info.last_check = Some(OffsetDateTime::now_utc());
drop(format_info);
return Ok(id);
}
}
debug!("get_disk_id: read format.json");
let b = fs::read(&self.format_path).await.map_err(to_unformatted_disk_error)?;
let fm = FormatV3::try_from(b.as_slice()).map_err(|e| {
@@ -1381,19 +1375,20 @@ impl DiskAPI for LocalDisk {
return Err(DiskError::InconsistentDisk);
}
let mut format_info = self.format_info.write().await;
format_info.id = Some(disk_id);
format_info.file_info = Some(file_meta);
format_info.data = b.into();
format_info.last_check = Some(OffsetDateTime::now_utc());
drop(format_info);
Ok(Some(disk_id))
}
#[tracing::instrument(skip(self))]
async fn set_disk_id(&self, _id: Option<Uuid>) -> Result<()> {
async fn set_disk_id(&self, id: Option<Uuid>) -> Result<()> {
// No setup is required locally
// TODO: add check_id_store
let mut format_info = self.format_info.write().await;
format_info.id = id;
Ok(())
}
@@ -2443,10 +2438,6 @@ impl DiskAPI for LocalDisk {
info.endpoint = self.endpoint.to_string();
info.scanning = self.scanning.load(Ordering::SeqCst) == 1;
if info.id.is_none() {
info.id = self.get_disk_id().await.unwrap_or(None);
}
Ok(info)
}
}
@@ -2714,6 +2705,39 @@ mod test {
}
}
#[tokio::test]
async fn test_format_info_last_check_valid() {
let now = OffsetDateTime::now_utc();
// Valid format info
let valid_format_info = FormatInfo {
id: Some(Uuid::new_v4()),
data: vec![1, 2, 3].into(),
file_info: Some(fs::metadata("../../../..").await.unwrap()),
last_check: Some(now),
};
assert!(valid_format_info.last_check_valid());
// Invalid format info (missing id)
let invalid_format_info = FormatInfo {
id: None,
data: vec![1, 2, 3].into(),
file_info: Some(fs::metadata("../../../..").await.unwrap()),
last_check: Some(now),
};
assert!(!invalid_format_info.last_check_valid());
// Invalid format info (old timestamp)
let old_time = OffsetDateTime::now_utc() - time::Duration::seconds(10);
let old_format_info = FormatInfo {
id: Some(Uuid::new_v4()),
data: vec![1, 2, 3].into(),
file_info: Some(fs::metadata("../../../..").await.unwrap()),
last_check: Some(old_time),
};
assert!(!old_format_info.last_check_valid());
}
#[tokio::test]
async fn test_read_file_exists() {
let test_file = "./test_read_exists.txt";

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod disk_store;
pub mod endpoint;
pub mod error;
pub mod error_conv;
@@ -31,7 +30,6 @@ pub const FORMAT_CONFIG_FILE: &str = "format.json";
pub const STORAGE_FORMAT_FILE: &str = "xl.meta";
pub const STORAGE_FORMAT_FILE_BACKUP: &str = "xl.meta.bkp";
use crate::disk::disk_store::LocalDiskWrapper;
use crate::rpc::RemoteDisk;
use bytes::Bytes;
use endpoint::Endpoint;
@@ -53,7 +51,7 @@ pub type FileWriter = Box<dyn AsyncWrite + Send + Sync + Unpin>;
#[derive(Debug)]
pub enum Disk {
Local(Box<LocalDiskWrapper>),
Local(Box<LocalDisk>),
Remote(Box<RemoteDisk>),
}
@@ -400,7 +398,7 @@ impl DiskAPI for Disk {
pub async fn new_disk(ep: &Endpoint, opt: &DiskOption) -> Result<DiskStore> {
if ep.is_local {
let s = LocalDisk::new(ep, opt.cleanup).await?;
Ok(Arc::new(Disk::Local(Box::new(LocalDiskWrapper::new(Arc::new(s), opt.health_check)))))
Ok(Arc::new(Disk::Local(Box::new(s))))
} else {
let remote_disk = RemoteDisk::new(ep, opt).await?;
Ok(Arc::new(Disk::Remote(Box::new(remote_disk))))
@@ -536,7 +534,7 @@ pub struct DiskInfo {
pub scanning: bool,
pub endpoint: String,
pub mount_path: String,
pub id: Option<Uuid>,
pub id: String,
pub rotational: bool,
pub metrics: DiskMetrics,
pub error: String,
@@ -1017,7 +1015,7 @@ mod tests {
let endpoint = Endpoint::try_from(test_dir).unwrap();
let local_disk = LocalDisk::new(&endpoint, false).await.unwrap();
let disk = Disk::Local(Box::new(LocalDiskWrapper::new(Arc::new(local_disk), false)));
let disk = Disk::Local(Box::new(local_disk));
// Test basic methods
assert!(disk.is_local());

View File

@@ -0,0 +1,586 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::bitrot::{BitrotReader, BitrotWriter};
use crate::disk::error::{Error, Result};
use crate::disk::error_reduce::{reduce_write_quorum_errs, OBJECT_OP_IGNORED_ERRS};
use crate::io::Etag;
use bytes::{Bytes, BytesMut};
use futures::future::join_all;
use reed_solomon_erasure::galois_8::ReedSolomon;
use smallvec::SmallVec;
use std::any::Any;
use std::io::ErrorKind;
use std::sync::{mpsc, Arc};
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::mpsc;
use tracing::warn;
use tracing::{error, info};
use uuid::Uuid;
use crate::disk::error::DiskError;
#[derive(Default)]
pub struct Erasure {
data_shards: usize,
parity_shards: usize,
encoder: Option<ReedSolomon>,
pub block_size: usize,
_id: Uuid,
_buf: Vec<u8>,
}
impl Erasure {
pub fn new(data_shards: usize, parity_shards: usize, block_size: usize) -> Self {
// debug!(
// "Erasure new data_shards {},parity_shards {} block_size {} ",
// data_shards, parity_shards, block_size
// );
let mut encoder = None;
if parity_shards > 0 {
encoder = Some(ReedSolomon::new(data_shards, parity_shards).unwrap());
}
Erasure {
data_shards,
parity_shards,
block_size,
encoder,
_id: Uuid::new_v4(),
_buf: vec![0u8; block_size],
}
}
#[tracing::instrument(level = "info", skip(self, reader, writers))]
pub async fn encode<S>(
self: Arc<Self>,
mut reader: S,
writers: &mut [Option<BitrotWriter>],
// block_size: usize,
total_size: usize,
write_quorum: usize,
) -> Result<(usize, String)>
where
S: AsyncRead + Etag + Unpin + Send + 'static,
{
let (tx, mut rx) = mpsc::channel(5);
let task = tokio::spawn(async move {
let mut buf = vec![0u8; self.block_size];
let mut total: usize = 0;
loop {
if total_size > 0 {
let new_len = {
let remain = total_size - total;
if remain > self.block_size { self.block_size } else { remain }
};
if new_len == 0 && total > 0 {
break;
}
buf.resize(new_len, 0u8);
match reader.read_exact(&mut buf).await {
Ok(res) => res,
Err(e) => {
if let ErrorKind::UnexpectedEof = e.kind() {
break;
} else {
return Err(e.into());
}
}
};
total += buf.len();
}
let blocks = Arc::new(Box::pin(self.clone().encode_data(&buf)?));
let _ = tx.send(blocks).await;
if total_size == 0 {
break;
}
}
let etag = reader.etag().await;
Ok((total, etag))
});
while let Some(blocks) = rx.recv().await {
let write_futures = writers.iter_mut().enumerate().map(|(i, w_op)| {
let i_inner = i;
let blocks_inner = blocks.clone();
async move {
if let Some(w) = w_op {
w.write(blocks_inner[i_inner].clone()).await.err()
} else {
Some(DiskError::DiskNotFound)
}
}
});
let errs = join_all(write_futures).await;
let none_count = errs.iter().filter(|&x| x.is_none()).count();
if none_count >= write_quorum {
if total_size == 0 {
break;
}
continue;
}
if let Some(err) = reduce_write_quorum_errs(&errs, OBJECT_OP_IGNORED_ERRS, write_quorum) {
warn!("Erasure encode errs {:?}", &errs);
return Err(err);
}
}
task.await?
}
pub async fn decode<W>(
&self,
writer: &mut W,
readers: Vec<Option<BitrotReader>>,
offset: usize,
length: usize,
total_length: usize,
) -> (usize, Option<Error>)
where
W: AsyncWriteExt + Send + Unpin + 'static,
{
if length == 0 {
return (0, None);
}
let mut reader = ShardReader::new(readers, self, offset, total_length);
// debug!("ShardReader {:?}", &reader);
let start_block = offset / self.block_size;
let end_block = (offset + length) / self.block_size;
// debug!("decode block from {} to {}", start_block, end_block);
let mut bytes_written = 0;
for block_idx in start_block..=end_block {
let (block_offset, block_length) = if start_block == end_block {
(offset % self.block_size, length)
} else if block_idx == start_block {
let block_offset = offset % self.block_size;
(block_offset, self.block_size - block_offset)
} else if block_idx == end_block {
(0, (offset + length) % self.block_size)
} else {
(0, self.block_size)
};
if block_length == 0 {
// debug!("block_length == 0 break");
break;
}
// debug!("decode {} block_offset {},block_length {} ", block_idx, block_offset, block_length);
let mut bufs = match reader.read().await {
Ok(bufs) => bufs,
Err(err) => return (bytes_written, Some(err)),
};
if self.parity_shards > 0 {
if let Err(err) = self.decode_data(&mut bufs) {
return (bytes_written, Some(err));
}
}
let written_n = match self
.write_data_blocks(writer, bufs, self.data_shards, block_offset, block_length)
.await
{
Ok(n) => n,
Err(err) => {
error!("write_data_blocks err {:?}", &err);
return (bytes_written, Some(err));
}
};
bytes_written += written_n;
// debug!("decode {} written_n {}, total_written: {} ", block_idx, written_n, bytes_written);
}
if bytes_written != length {
// debug!("bytes_written != length: {} != {} ", bytes_written, length);
return (bytes_written, Some(Error::other("erasure decode less data")));
}
(bytes_written, None)
}
async fn write_data_blocks<W>(
&self,
writer: &mut W,
bufs: Vec<Option<Vec<u8>>>,
data_blocks: usize,
offset: usize,
length: usize,
) -> Result<usize>
where
W: AsyncWrite + Send + Unpin + 'static,
{
if bufs.len() < data_blocks {
return Err(Error::other("read bufs not match data_blocks"));
}
let data_len: usize = bufs
.iter()
.take(data_blocks)
.filter(|v| v.is_some())
.map(|v| v.as_ref().unwrap().len())
.sum();
if data_len < length {
return Err(Error::other(format!("write_data_blocks data_len < length {} < {}", data_len, length)));
}
let mut offset = offset;
// debug!("write_data_blocks offset {}, length {}", offset, length);
let mut write = length;
let mut total_written = 0;
for opt_buf in bufs.iter().take(data_blocks) {
let buf = opt_buf.as_ref().unwrap();
if offset >= buf.len() {
offset -= buf.len();
continue;
}
let buf = &buf[offset..];
offset = 0;
// debug!("write_data_blocks write buf len {}", buf.len());
if write < buf.len() {
let buf = &buf[..write];
// debug!("write_data_blocks write buf less len {}", buf.len());
writer.write_all(buf).await?;
// debug!("write_data_blocks write done len {}", buf.len());
total_written += buf.len();
break;
}
writer.write_all(buf).await?;
let n = buf.len();
// debug!("write_data_blocks write done len {}", n);
write -= n;
total_written += n;
}
Ok(total_written)
}
pub fn total_shard_count(&self) -> usize {
self.data_shards + self.parity_shards
}
#[tracing::instrument(level = "info", skip_all, fields(data_len=data.len()))]
pub fn encode_data(self: Arc<Self>, data: &[u8]) -> Result<Vec<Bytes>> {
let (shard_size, total_size) = self.need_size(data.len());
// Generate the total length required for all shards
let mut data_buffer = BytesMut::with_capacity(total_size);
// Copy the source data
data_buffer.extend_from_slice(data);
data_buffer.resize(total_size, 0u8);
{
// Perform EC encoding; the results go into data_buffer
let data_slices: SmallVec<[&mut [u8]; 16]> = data_buffer.chunks_exact_mut(shard_size).collect();
// Only perform EC encoding when parity shards are present
if self.parity_shards > 0 {
self.encoder.as_ref().unwrap().encode(data_slices).map_err(Error::other)?;
}
}
// Zero-copy shards: every shard references data_buffer
let mut data_buffer = data_buffer.freeze();
let mut shards = Vec::with_capacity(self.total_shard_count());
for _ in 0..self.total_shard_count() {
let shard = data_buffer.split_to(shard_size);
shards.push(shard);
}
Ok(shards)
}
pub fn decode_data(&self, shards: &mut [Option<Vec<u8>>]) -> Result<()> {
if self.parity_shards > 0 {
self.encoder.as_ref().unwrap().reconstruct(shards).map_err(Error::other)?;
}
Ok(())
}
// The length per shard and the total required length
fn need_size(&self, data_size: usize) -> (usize, usize) {
let shard_size = self.shard_size(data_size);
(shard_size, shard_size * (self.total_shard_count()))
}
// Compute each shard size
pub fn shard_size(&self, data_size: usize) -> usize {
data_size.div_ceil(self.data_shards)
}
// returns final erasure size from original size.
pub fn shard_file_size(&self, total_size: usize) -> usize {
if total_size == 0 {
return 0;
}
let num_shards = total_size / self.block_size;
let last_block_size = total_size % self.block_size;
let last_shard_size = last_block_size.div_ceil(self.data_shards);
num_shards * self.shard_size(self.block_size) + last_shard_size
// When writing, EC pads the data so the last shard length should match
// if last_block_size != 0 {
// num_shards += 1
// }
// num_shards * self.shard_size(self.block_size)
}
// where erasure reading begins.
pub fn shard_file_offset(&self, start_offset: usize, length: usize, total_length: usize) -> usize {
let shard_size = self.shard_size(self.block_size);
let shard_file_size = self.shard_file_size(total_length);
let end_shard = (start_offset + length) / self.block_size;
let mut till_offset = end_shard * shard_size + shard_size;
if till_offset > shard_file_size {
till_offset = shard_file_size;
}
till_offset
}
pub async fn heal(
&self,
writers: &mut [Option<BitrotWriter>],
readers: Vec<Option<BitrotReader>>,
total_length: usize,
_prefer: &[bool],
) -> Result<()> {
info!(
"Erasure heal, writers len: {}, readers len: {}, total_length: {}",
writers.len(),
readers.len(),
total_length
);
if writers.len() != self.parity_shards + self.data_shards {
return Err(Error::other("invalid argument"));
}
let mut reader = ShardReader::new(readers, self, 0, total_length);
let start_block = 0;
let mut end_block = total_length / self.block_size;
if total_length % self.block_size != 0 {
end_block += 1;
}
let mut errs = Vec::new();
for _ in start_block..end_block {
let mut bufs = reader.read().await?;
if self.parity_shards > 0 {
self.encoder.as_ref().unwrap().reconstruct(&mut bufs).map_err(Error::other)?;
}
let shards = bufs.into_iter().flatten().map(Bytes::from).collect::<Vec<_>>();
if shards.len() != self.parity_shards + self.data_shards {
return Err(Error::other("can not reconstruct data"));
}
for (i, w) in writers.iter_mut().enumerate() {
if w.is_none() {
continue;
}
match w.as_mut().unwrap().write(shards[i].clone()).await {
Ok(_) => {}
Err(e) => {
info!("write failed, err: {:?}", e);
errs.push(e);
}
}
}
}
if !errs.is_empty() {
return Err(errs[0].clone().into());
}
Ok(())
}
}
#[async_trait::async_trait]
pub trait Writer {
fn as_any(&self) -> &dyn Any;
async fn write(&mut self, buf: Bytes) -> Result<()>;
async fn close(&mut self) -> Result<()> {
Ok(())
}
}
#[async_trait::async_trait]
pub trait ReadAt {
async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)>;
}
pub struct ShardReader {
readers: Vec<Option<BitrotReader>>, // Disk readers
data_block_count: usize, // Total number of shards
parity_block_count: usize,
shard_size: usize, // Block size per shard (read one block at a time)
shard_file_size: usize, // Total size of the shard file
offset: usize, // Offset within the shard
}
impl ShardReader {
pub fn new(readers: Vec<Option<BitrotReader>>, ec: &Erasure, offset: usize, total_length: usize) -> Self {
Self {
readers,
data_block_count: ec.data_shards,
parity_block_count: ec.parity_shards,
shard_size: ec.shard_size(ec.block_size),
shard_file_size: ec.shard_file_size(total_length),
offset: (offset / ec.block_size) * ec.shard_size(ec.block_size),
}
}
pub async fn read(&mut self) -> Result<Vec<Option<Vec<u8>>>> {
// let mut disks = self.readers;
let reader_length = self.readers.len();
// Length of the block to read
let mut read_length = self.shard_size;
if self.offset + read_length > self.shard_file_size {
read_length = self.shard_file_size - self.offset
}
if read_length == 0 {
return Ok(vec![None; reader_length]);
}
// debug!("shard reader read offset {}, shard_size {}", self.offset, read_length);
let mut futures = Vec::with_capacity(reader_length);
let mut errors = Vec::with_capacity(reader_length);
let mut ress = Vec::with_capacity(reader_length);
for disk in self.readers.iter_mut() {
// if disk.is_none() {
// ress.push(None);
// errors.push(Some(Error::new(DiskError::DiskNotFound)));
// continue;
// }
// let disk: &mut BitrotReader = disk.as_mut().unwrap();
let offset = self.offset;
futures.push(async move {
if let Some(disk) = disk {
disk.read_at(offset, read_length).await
} else {
Err(DiskError::DiskNotFound)
}
});
}
let results = join_all(futures).await;
for result in results {
match result {
Ok((res, _)) => {
ress.push(Some(res));
errors.push(None);
}
Err(e) => {
ress.push(None);
errors.push(Some(e));
}
}
}
if !self.can_decode(&ress) {
warn!("ec decode read ress {:?}", &ress);
warn!("ec decode read errors {:?}", &errors);
return Err(Error::other("shard reader read failed"));
}
self.offset += self.shard_size;
Ok(ress)
}
fn can_decode(&self, bufs: &[Option<Vec<u8>>]) -> bool {
let c = bufs.iter().filter(|v| v.is_some()).count();
if self.parity_block_count > 0 {
c >= self.data_block_count
} else {
c == self.data_block_count
}
}
}
// fn shards_to_option_shards<T: Clone>(shards: &[Vec<T>]) -> Vec<Option<Vec<T>>> {
// let mut result = Vec::with_capacity(shards.len());
// for v in shards.iter() {
// let inner: Vec<T> = v.clone();
// result.push(Some(inner));
// }
// result
// }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_erasure() {
let data_shards = 3;
let parity_shards = 2;
let data: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let ec = Erasure::new(data_shards, parity_shards, 1);
let shards = Arc::new(ec).encode_data(data).unwrap();
println!("shards:{:?}", shards);
let mut s: Vec<_> = shards
.iter()
.map(|d| if d.is_empty() { None } else { Some(d.to_vec()) })
.collect();
// let mut s = shards_to_option_shards(&shards);
// s[0] = None;
s[4] = None;
s[3] = None;
println!("sss:{:?}", &s);
let ec = Erasure::new(data_shards, parity_shards, 1);
ec.decode_data(&mut s).unwrap();
// ec.encoder.reconstruct(&mut s).unwrap();
println!("sss:{:?}", &s);
}
}

View File

@@ -20,6 +20,7 @@ pub mod batch_processor;
pub mod bitrot;
pub mod bucket;
pub mod cache_value;
mod chunk_stream;
pub mod compress;
pub mod config;
pub mod data_usage;

View File

@@ -19,7 +19,11 @@ use crate::{
// utils::os::get_drive_stats,
};
use chrono::Utc;
use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_ADDR, heal_channel::DriveState, metrics::global_metrics};
use rustfs_common::{
globals::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_ADDR},
heal_channel::DriveState,
metrics::global_metrics,
};
use rustfs_madmin::metrics::{DiskIOStats, DiskMetric, RealtimeMetrics};
use rustfs_utils::os::get_drive_stats;
use serde::{Deserialize, Serialize};

View File

@@ -13,18 +13,14 @@
// limitations under the License.
use crate::bucket::metadata_sys;
use crate::disk::error::DiskError;
use crate::disk::error::{Error, Result};
use crate::disk::error_reduce::{BUCKET_OP_IGNORED_ERRS, is_all_buckets_not_found, reduce_write_quorum_errs};
use crate::disk::{DiskAPI, DiskStore, disk_store::get_max_timeout_duration};
use crate::disk::{DiskAPI, DiskStore};
use crate::global::GLOBAL_LOCAL_DISK_MAP;
use crate::store::all_local_disk;
use crate::store_utils::is_reserved_or_invalid_bucket;
use crate::{
disk::{
self, VolumeInfo,
disk_store::{CHECK_EVERY, CHECK_TIMEOUT_DURATION, DiskHealthTracker},
},
disk::{self, VolumeInfo},
endpoints::{EndpointServerPools, Node},
store_api::{BucketInfo, BucketOptions, DeleteBucketOptions, MakeBucketOptions},
};
@@ -36,11 +32,10 @@ use rustfs_protos::node_service_time_out_client;
use rustfs_protos::proto_gen::node_service::{
DeleteBucketRequest, GetBucketInfoRequest, HealBucketRequest, ListBucketRequest, MakeBucketRequest,
};
use std::{collections::HashMap, fmt::Debug, sync::Arc, time::Duration};
use tokio::{net::TcpStream, sync::RwLock, time};
use tokio_util::sync::CancellationToken;
use std::{collections::HashMap, fmt::Debug, sync::Arc};
use tokio::sync::RwLock;
use tonic::Request;
use tracing::{debug, info, warn};
use tracing::info;
type Client = Arc<Box<dyn PeerS3Client>>;
@@ -564,160 +559,16 @@ pub struct RemotePeerS3Client {
pub node: Option<Node>,
pub pools: Option<Vec<usize>>,
addr: String,
/// Health tracker for connection monitoring
health: Arc<DiskHealthTracker>,
/// Cancellation token for monitoring tasks
cancel_token: CancellationToken,
}
impl RemotePeerS3Client {
pub fn new(node: Option<Node>, pools: Option<Vec<usize>>) -> Self {
let addr = node.as_ref().map(|v| v.url.to_string()).unwrap_or_default().to_string();
let client = Self {
node,
pools,
addr,
health: Arc::new(DiskHealthTracker::new()),
cancel_token: CancellationToken::new(),
};
// Start health monitoring
client.start_health_monitoring();
client
Self { node, pools, addr }
}
pub fn get_addr(&self) -> String {
self.addr.clone()
}
/// Start health monitoring for the remote peer
fn start_health_monitoring(&self) {
let health = Arc::clone(&self.health);
let cancel_token = self.cancel_token.clone();
let addr = self.addr.clone();
tokio::spawn(async move {
Self::monitor_remote_peer_health(addr, health, cancel_token).await;
});
}
/// Monitor remote peer health periodically
async fn monitor_remote_peer_health(addr: String, health: Arc<DiskHealthTracker>, cancel_token: CancellationToken) {
let mut interval = time::interval(CHECK_EVERY);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
debug!("Health monitoring cancelled for remote peer: {}", addr);
return;
}
_ = interval.tick() => {
if cancel_token.is_cancelled() {
return;
}
// Skip health check if peer is already marked as faulty
if health.is_faulty() {
continue;
}
// Perform basic connectivity check
if Self::perform_connectivity_check(&addr).await.is_err() && health.swap_ok_to_faulty() {
warn!("Remote peer health check failed for {}: marking as faulty", addr);
// Start recovery monitoring
let health_clone = Arc::clone(&health);
let addr_clone = addr.clone();
let cancel_clone = cancel_token.clone();
tokio::spawn(async move {
Self::monitor_remote_peer_recovery(addr_clone, health_clone, cancel_clone).await;
});
}
}
}
}
}
/// Monitor remote peer recovery and mark as healthy when recovered
async fn monitor_remote_peer_recovery(addr: String, health: Arc<DiskHealthTracker>, cancel_token: CancellationToken) {
let mut interval = time::interval(Duration::from_secs(5)); // Check every 5 seconds
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
return;
}
_ = interval.tick() => {
if Self::perform_connectivity_check(&addr).await.is_ok() {
info!("Remote peer recovered: {}", addr);
health.set_ok();
return;
}
}
}
}
}
/// Perform basic connectivity check for remote peer
async fn perform_connectivity_check(addr: &str) -> Result<()> {
use tokio::time::timeout;
let url = url::Url::parse(addr).map_err(|e| Error::other(format!("Invalid URL: {}", e)))?;
let Some(host) = url.host_str() else {
return Err(Error::other("No host in URL".to_string()));
};
let port = url.port_or_known_default().unwrap_or(80);
// Try to establish TCP connection
match timeout(CHECK_TIMEOUT_DURATION, TcpStream::connect((host, port))).await {
Ok(Ok(_)) => Ok(()),
_ => Err(Error::other(format!("Cannot connect to {}:{}", host, port))),
}
}
/// Execute operation with timeout and health tracking
async fn execute_with_timeout<T, F, Fut>(&self, operation: F, timeout_duration: Duration) -> Result<T>
where
F: FnOnce() -> Fut,
Fut: std::future::Future<Output = Result<T>>,
{
// Check if peer is faulty
if self.health.is_faulty() {
return Err(DiskError::FaultyDisk);
}
// Record operation start
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
self.health.last_started.store(now, std::sync::atomic::Ordering::Relaxed);
self.health.increment_waiting();
// Execute operation with timeout
let result = time::timeout(timeout_duration, operation()).await;
match result {
Ok(operation_result) => {
// Log success and decrement waiting counter
if operation_result.is_ok() {
self.health.log_success();
}
self.health.decrement_waiting();
operation_result
}
Err(_) => {
// Timeout occurred, mark peer as potentially faulty
self.health.decrement_waiting();
warn!("Remote peer operation timeout after {:?}", timeout_duration);
Err(Error::other(format!("Remote peer operation timeout after {:?}", timeout_duration)))
}
}
}
}
#[async_trait]
@@ -727,145 +578,115 @@ impl PeerS3Client for RemotePeerS3Client {
}
async fn heal_bucket(&self, bucket: &str, opts: &HealOpts) -> Result<HealResultItem> {
self.execute_with_timeout(
|| async {
let options: String = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(HealBucketRequest {
bucket: bucket.to_string(),
options,
});
let response = client.heal_bucket(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
let options: String = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(HealBucketRequest {
bucket: bucket.to_string(),
options,
});
let response = client.heal_bucket(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
Ok(HealResultItem {
heal_item_type: HealItemType::Bucket.to_string(),
bucket: bucket.to_string(),
set_count: 0,
..Default::default()
})
},
get_max_timeout_duration(),
)
.await
Ok(HealResultItem {
heal_item_type: HealItemType::Bucket.to_string(),
bucket: bucket.to_string(),
set_count: 0,
..Default::default()
})
}
async fn list_bucket(&self, opts: &BucketOptions) -> Result<Vec<BucketInfo>> {
self.execute_with_timeout(
|| async {
let options = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(ListBucketRequest { options });
let response = client.list_bucket(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
let bucket_infos = response
.bucket_infos
.into_iter()
.filter_map(|json_str| serde_json::from_str::<BucketInfo>(&json_str).ok())
.collect();
let options = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(ListBucketRequest { options });
let response = client.list_bucket(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
let bucket_infos = response
.bucket_infos
.into_iter()
.filter_map(|json_str| serde_json::from_str::<BucketInfo>(&json_str).ok())
.collect();
Ok(bucket_infos)
},
get_max_timeout_duration(),
)
.await
Ok(bucket_infos)
}
async fn make_bucket(&self, bucket: &str, opts: &MakeBucketOptions) -> Result<()> {
self.execute_with_timeout(
|| async {
let options = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(MakeBucketRequest {
name: bucket.to_string(),
options,
});
let response = client.make_bucket(request).await?.into_inner();
let options = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(MakeBucketRequest {
name: bucket.to_string(),
options,
});
let response = client.make_bucket(request).await?.into_inner();
// TODO: deal with error
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
// TODO: deal with error
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
Ok(())
},
get_max_timeout_duration(),
)
.await
Ok(())
}
async fn get_bucket_info(&self, bucket: &str, opts: &BucketOptions) -> Result<BucketInfo> {
self.execute_with_timeout(
|| async {
let options = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GetBucketInfoRequest {
bucket: bucket.to_string(),
options,
});
let response = client.get_bucket_info(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
let bucket_info = serde_json::from_str::<BucketInfo>(&response.bucket_info)?;
let options = serde_json::to_string(opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GetBucketInfoRequest {
bucket: bucket.to_string(),
options,
});
let response = client.get_bucket_info(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
let bucket_info = serde_json::from_str::<BucketInfo>(&response.bucket_info)?;
Ok(bucket_info)
},
get_max_timeout_duration(),
)
.await
Ok(bucket_info)
}
async fn delete_bucket(&self, bucket: &str, _opts: &DeleteBucketOptions) -> Result<()> {
self.execute_with_timeout(
|| async {
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(DeleteBucketRequest {
bucket: bucket.to_string(),
});
let response = client.delete_bucket(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
let request = Request::new(DeleteBucketRequest {
bucket: bucket.to_string(),
});
let response = client.delete_bucket(request).await?.into_inner();
if !response.success {
return if let Some(err) = response.error {
Err(err.into())
} else {
Err(Error::other(""))
};
}
Ok(())
},
get_max_timeout_duration(),
)
.await
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -174,56 +174,56 @@ impl SetDisks {
})
}
// async fn cached_disk_health(&self, index: usize) -> Option<bool> {
// let cache = self.disk_health_cache.read().await;
// cache
// .get(index)
// .and_then(|entry| entry.as_ref().and_then(|state| state.cached_value()))
// }
async fn cached_disk_health(&self, index: usize) -> Option<bool> {
let cache = self.disk_health_cache.read().await;
cache
.get(index)
.and_then(|entry| entry.as_ref().and_then(|state| state.cached_value()))
}
// async fn update_disk_health(&self, index: usize, online: bool) {
// let mut cache = self.disk_health_cache.write().await;
// if cache.len() <= index {
// cache.resize(index + 1, None);
// }
// cache[index] = Some(DiskHealthEntry {
// last_check: Instant::now(),
// online,
// });
// }
async fn update_disk_health(&self, index: usize, online: bool) {
let mut cache = self.disk_health_cache.write().await;
if cache.len() <= index {
cache.resize(index + 1, None);
}
cache[index] = Some(DiskHealthEntry {
last_check: Instant::now(),
online,
});
}
// async fn is_disk_online_cached(&self, index: usize, disk: &DiskStore) -> bool {
// if let Some(online) = self.cached_disk_health(index).await {
// return online;
// }
async fn is_disk_online_cached(&self, index: usize, disk: &DiskStore) -> bool {
if let Some(online) = self.cached_disk_health(index).await {
return online;
}
// let disk_clone = disk.clone();
// let online = timeout(DISK_ONLINE_TIMEOUT, async move { disk_clone.is_online().await })
// .await
// .unwrap_or(false);
// self.update_disk_health(index, online).await;
// online
// }
let disk_clone = disk.clone();
let online = timeout(DISK_ONLINE_TIMEOUT, async move { disk_clone.is_online().await })
.await
.unwrap_or(false);
self.update_disk_health(index, online).await;
online
}
// async fn filter_online_disks(&self, disks: Vec<Option<DiskStore>>) -> (Vec<Option<DiskStore>>, usize) {
// let mut filtered = Vec::with_capacity(disks.len());
// let mut online_count = 0;
async fn filter_online_disks(&self, disks: Vec<Option<DiskStore>>) -> (Vec<Option<DiskStore>>, usize) {
let mut filtered = Vec::with_capacity(disks.len());
let mut online_count = 0;
// for (idx, disk) in disks.into_iter().enumerate() {
// if let Some(disk_store) = disk {
// if self.is_disk_online_cached(idx, &disk_store).await {
// filtered.push(Some(disk_store));
// online_count += 1;
// } else {
// filtered.push(None);
// }
// } else {
// filtered.push(None);
// }
// }
for (idx, disk) in disks.into_iter().enumerate() {
if let Some(disk_store) = disk {
if self.is_disk_online_cached(idx, &disk_store).await {
filtered.push(Some(disk_store));
online_count += 1;
} else {
filtered.push(None);
}
} else {
filtered.push(None);
}
}
// (filtered, online_count)
// }
(filtered, online_count)
}
fn format_lock_error(&self, bucket: &str, object: &str, mode: &str, err: &LockResult) -> String {
match err {
LockResult::Timeout => {
@@ -259,28 +259,9 @@ impl SetDisks {
}
async fn get_online_disks(&self) -> Vec<Option<DiskStore>> {
let mut disks = self.get_disks_internal().await;
// TODO: diskinfo filter online
let mut new_disk = Vec::with_capacity(disks.len());
for disk in disks.iter() {
if let Some(d) = disk {
if d.is_online().await {
new_disk.push(disk.clone());
}
}
}
let mut rng = rand::rng();
disks.shuffle(&mut rng);
new_disk
// let disks = self.get_disks_internal().await;
// let (filtered, _) = self.filter_online_disks(disks).await;
// filtered.into_iter().filter(|disk| disk.is_some()).collect()
let disks = self.get_disks_internal().await;
let (filtered, _) = self.filter_online_disks(disks).await;
filtered.into_iter().filter(|disk| disk.is_some()).collect()
}
async fn get_online_local_disks(&self) -> Vec<Option<DiskStore>> {
let mut disks = self.get_online_disks().await;
@@ -1486,9 +1467,7 @@ impl SetDisks {
let object = object.clone();
let version_id = version_id.clone();
tokio::spawn(async move {
if let Some(disk) = disk
&& disk.is_online().await
{
if let Some(disk) = disk {
if version_id.is_empty() {
match disk.read_xl(&bucket, &object, read_data).await {
Ok(info) => {
@@ -1820,14 +1799,14 @@ impl SetDisks {
}
pub async fn renew_disk(&self, ep: &Endpoint) {
debug!("renew_disk: start {:?}", ep);
debug!("renew_disk start {:?}", ep);
let (new_disk, fm) = match Self::connect_endpoint(ep).await {
Ok(res) => res,
Err(e) => {
warn!("renew_disk: connect_endpoint err {:?}", &e);
warn!("connect_endpoint err {:?}", &e);
if ep.is_local && e == DiskError::UnformattedDisk {
info!("renew_disk unformatteddisk will trigger heal_disk, {:?}", ep);
info!("unformatteddisk will trigger heal_disk, {:?}", ep);
let set_disk_id = format!("pool_{}_set_{}", ep.pool_idx, ep.set_idx);
let _ = send_heal_disk(set_disk_id, Some(HealChannelPriority::Normal)).await;
}
@@ -1838,7 +1817,7 @@ impl SetDisks {
let (set_idx, disk_idx) = match self.find_disk_index(&fm) {
Ok(res) => res,
Err(e) => {
warn!("renew_disk: find_disk_index err {:?}", e);
warn!("find_disk_index err {:?}", e);
return;
}
};
@@ -1858,7 +1837,7 @@ impl SetDisks {
}
}
debug!("renew_disk: update {:?}", fm.erasure.this);
debug!("renew_disk update {:?}", fm.erasure.this);
let mut disk_lock = self.disks.write().await;
disk_lock[disk_idx] = Some(new_disk);
@@ -3072,7 +3051,7 @@ impl SetDisks {
for (index, disk) in latest_disks.iter().enumerate() {
if let Some(outdated_disk) = &out_dated_disks[index] {
info!(disk_index = index, "Creating writer for outdated disk");
let writer = match create_bitrot_writer(
let writer = create_bitrot_writer(
is_inline_buffer,
Some(outdated_disk),
RUSTFS_META_TMP_BUCKET,
@@ -3081,19 +3060,7 @@ impl SetDisks {
erasure.shard_size(),
HashAlgorithm::HighwayHash256,
)
.await
{
Ok(writer) => writer,
Err(err) => {
warn!(
"create_bitrot_writer disk {}, err {:?}, skipping operation",
outdated_disk.to_string(),
err
);
writers.push(None);
continue;
}
};
.await?;
writers.push(Some(writer));
} else {
info!(disk_index = index, "Skipping writer (disk not outdated)");
@@ -3823,8 +3790,8 @@ impl ObjectIO for SetDisks {
#[tracing::instrument(level = "debug", skip(self, data,))]
async fn put_object(&self, bucket: &str, object: &str, data: &mut PutObjReader, opts: &ObjectOptions) -> Result<ObjectInfo> {
let disks = self.get_disks_internal().await;
// let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await;
let disks_snapshot = self.get_disks_internal().await;
let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await;
// Acquire per-object exclusive lock via RAII guard. It auto-releases asynchronously on drop.
let _object_lock_guard = if !opts.no_lock {
@@ -3865,13 +3832,13 @@ impl ObjectIO for SetDisks {
write_quorum += 1
}
// if filtered_online < write_quorum {
// warn!(
// "online disk snapshot {} below write quorum {} for {}/{}; returning erasure write quorum error",
// filtered_online, write_quorum, bucket, object
// );
// return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object]));
// }
if filtered_online < write_quorum {
warn!(
"online disk snapshot {} below write quorum {} for {}/{}; returning erasure write quorum error",
filtered_online, write_quorum, bucket, object
);
return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object]));
}
let mut fi = FileInfo::new([bucket, object].join("/").as_str(), data_drives, parity_drives);
@@ -3910,10 +3877,8 @@ impl ObjectIO for SetDisks {
let mut writers = Vec::with_capacity(shuffle_disks.len());
let mut errors = Vec::with_capacity(shuffle_disks.len());
for disk_op in shuffle_disks.iter() {
if let Some(disk) = disk_op
&& disk.is_online().await
{
let writer = match create_bitrot_writer(
if let Some(disk) = disk_op {
let writer = create_bitrot_writer(
is_inline_buffer,
Some(disk),
RUSTFS_META_TMP_BUCKET,
@@ -3922,16 +3887,29 @@ impl ObjectIO for SetDisks {
erasure.shard_size(),
HashAlgorithm::HighwayHash256,
)
.await
{
Ok(writer) => writer,
Err(err) => {
warn!("create_bitrot_writer disk {}, err {:?}, skipping operation", disk.to_string(), err);
errors.push(Some(err));
writers.push(None);
continue;
}
};
.await?;
// let writer = if is_inline_buffer {
// BitrotWriter::new(
// Writer::from_cursor(Cursor::new(Vec::new())),
// erasure.shard_size(),
// HashAlgorithm::HighwayHash256,
// )
// } else {
// let f = match disk
// .create_file("", RUSTFS_META_TMP_BUCKET, &tmp_object, erasure.shard_file_size(data.content_length))
// .await
// {
// Ok(f) => f,
// Err(e) => {
// errors.push(Some(e));
// writers.push(None);
// continue;
// }
// };
// BitrotWriter::new(Writer::from_tokio_writer(f), erasure.shard_size(), HashAlgorithm::HighwayHash256)
// };
writers.push(Some(writer));
errors.push(None);
@@ -4094,7 +4072,7 @@ impl StorageAPI for SetDisks {
async fn local_storage_info(&self) -> rustfs_madmin::StorageInfo {
let disks = self.get_disks_internal().await;
let mut local_disks: Vec<Option<DiskStore>> = Vec::new();
let mut local_disks: Vec<Option<Arc<disk::Disk>>> = Vec::new();
let mut local_endpoints = Vec::new();
for (i, ep) in self.set_endpoints.iter().enumerate() {
@@ -4930,7 +4908,9 @@ impl StorageAPI for SetDisks {
for disk in disks.iter() {
if let Some(disk) = disk {
continue;
if disk.is_online().await {
continue;
}
}
let _ = self.add_partial(bucket, object, opts.version_id.as_ref().expect("err")).await;
break;
@@ -5149,16 +5129,16 @@ impl StorageAPI for SetDisks {
return Err(Error::other(format!("checksum mismatch: {checksum}")));
}
let disks = self.get_disks_internal().await;
// let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await;
let disks_snapshot = self.get_disks_internal().await;
let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await;
// if filtered_online < write_quorum {
// warn!(
// "online disk snapshot {} below write quorum {} for multipart {}/{}; returning erasure write quorum error",
// filtered_online, write_quorum, bucket, object
// );
// return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object]));
// }
if filtered_online < write_quorum {
warn!(
"online disk snapshot {} below write quorum {} for multipart {}/{}; returning erasure write quorum error",
filtered_online, write_quorum, bucket, object
);
return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object]));
}
let shuffle_disks = Self::shuffle_disks(&disks, &fi.erasure.distribution);
@@ -5172,7 +5152,7 @@ impl StorageAPI for SetDisks {
let mut errors = Vec::with_capacity(shuffle_disks.len());
for disk_op in shuffle_disks.iter() {
if let Some(disk) = disk_op {
let writer = match create_bitrot_writer(
let writer = create_bitrot_writer(
false,
Some(disk),
RUSTFS_META_TMP_BUCKET,
@@ -5181,16 +5161,23 @@ impl StorageAPI for SetDisks {
erasure.shard_size(),
HashAlgorithm::HighwayHash256,
)
.await
{
Ok(writer) => writer,
Err(err) => {
warn!("create_bitrot_writer disk {}, err {:?}, skipping operation", disk.to_string(), err);
errors.push(Some(err));
writers.push(None);
continue;
}
};
.await?;
// let writer = {
// let f = match disk
// .create_file("", RUSTFS_META_TMP_BUCKET, &tmp_part_path, erasure.shard_file_size(data.content_length))
// .await
// {
// Ok(f) => f,
// Err(e) => {
// errors.push(Some(e));
// writers.push(None);
// continue;
// }
// };
// BitrotWriter::new(Writer::from_tokio_writer(f), erasure.shard_size(), HashAlgorithm::HighwayHash256)
// };
writers.push(Some(writer));
errors.push(None);
@@ -6782,7 +6769,7 @@ async fn get_disks_info(disks: &[Option<DiskStore>], eps: &[Endpoint]) -> Vec<ru
healing: res.healing,
scanning: res.scanning,
uuid: res.id.map_or("".to_string(), |id| id.to_string()),
uuid: res.id.clone(),
major: res.major as u32,
minor: res.minor as u32,
model: None,

View File

@@ -40,7 +40,7 @@ use futures::future::join_all;
use http::HeaderMap;
use rustfs_common::heal_channel::HealOpts;
use rustfs_common::{
GLOBAL_LOCAL_NODE_NAME,
globals::GLOBAL_LOCAL_NODE_NAME,
heal_channel::{DriveState, HealItemType},
};
use rustfs_filemeta::FileInfo;
@@ -255,7 +255,7 @@ impl Sets {
self.connect_disks().await;
// TODO: config interval
let mut interval = tokio::time::interval(Duration::from_secs(15));
let mut interval = tokio::time::interval(Duration::from_secs(15 * 3));
loop {
tokio::select! {
_= interval.tick()=>{

View File

@@ -55,8 +55,8 @@ use futures::future::join_all;
use http::HeaderMap;
use lazy_static::lazy_static;
use rand::Rng as _;
use rustfs_common::globals::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT};
use rustfs_common::heal_channel::{HealItemType, HealOpts};
use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT};
use rustfs_filemeta::FileInfo;
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_utils::path::{SLASH_SEPARATOR, decode_dir_object, encode_dir_object, path_join_buf};

View File

@@ -265,10 +265,7 @@ pub async fn load_format_erasure(disk: &DiskStore, heal: bool) -> disk::error::R
.map_err(|e| match e {
DiskError::FileNotFound => DiskError::UnformattedDisk,
DiskError::DiskNotFound => DiskError::UnformattedDisk,
_ => {
warn!("load_format_erasure err: {:?} {:?}", disk.to_string(), e);
e
}
_ => e,
})?;
let mut fm = FormatV3::try_from(data.as_ref())?;
@@ -315,18 +312,17 @@ async fn save_format_file_all(disks: &[Option<DiskStore>], formats: &[Option<For
}
pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3>) -> disk::error::Result<()> {
let Some(disk) = disk else {
if disk.is_none() {
return Err(DiskError::DiskNotFound);
};
}
let Some(format) = format else {
return Err(DiskError::other("format is none"));
};
let format = format.as_ref().unwrap();
let json_data = format.to_json()?;
let tmpfile = Uuid::new_v4().to_string();
let disk = disk.as_ref().unwrap();
disk.write_all(RUSTFS_META_BUCKET, tmpfile.as_str(), json_data.into_bytes().into())
.await?;

View File

@@ -0,0 +1,231 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_must_use)]
#![allow(clippy::all)]
use std::collections::HashMap;
use std::sync::Arc;
use azure_core::http::{Body, ClientOptions, RequestContent};
use azure_storage::StorageCredentials;
use azure_storage_blobs::prelude::*;
use crate::client::{
admin_handler_utils::AdminError,
api_put_object::PutObjectOptions,
transition_api::{Options, ReadCloser, ReaderImpl},
};
use crate::tier::{
tier_config::TierAzure,
warm_backend::{WarmBackend, WarmBackendGetOpts},
};
use tracing::warn;
const MAX_MULTIPART_PUT_OBJECT_SIZE: i64 = 1024 * 1024 * 1024 * 1024 * 5;
const MAX_PARTS_COUNT: i64 = 10000;
const _MAX_PART_SIZE: i64 = 1024 * 1024 * 1024 * 5;
const MIN_PART_SIZE: i64 = 1024 * 1024 * 128;
pub struct WarmBackendAzure {
pub client: Arc<BlobServiceClient>,
pub bucket: String,
pub prefix: String,
pub storage_class: String,
}
impl WarmBackendAzure {
pub async fn new(conf: &TierAzure, tier: &str) -> Result<Self, std::io::Error> {
if conf.access_key == "" || conf.secret_key == "" {
return Err(std::io::Error::other("both access and secret keys are required"));
}
if conf.bucket == "" {
return Err(std::io::Error::other("no bucket name was provided"));
}
let creds = StorageCredentials::access_key(conf.access_key.clone(), conf.secret_key.clone());
let client = ClientBuilder::new(conf.access_key.clone(), creds)
//.endpoint(conf.endpoint)
.blob_service_client();
let client = Arc::new(client);
Ok(Self {
client,
bucket: conf.bucket.clone(),
prefix: conf.prefix.strip_suffix("/").unwrap_or(&conf.prefix).to_owned(),
storage_class: "".to_string(),
})
}
/*pub fn tier(&self) -> *blob.AccessTier {
if self.storage_class == "" {
return None;
}
for t in blob.PossibleAccessTierValues() {
if strings.EqualFold(self.storage_class, t) {
return &t
}
}
None
}*/
pub fn get_dest(&self, object: &str) -> String {
let mut dest_obj = object.to_string();
if self.prefix != "" {
dest_obj = format!("{}/{}", &self.prefix, object);
}
return dest_obj;
}
}
#[async_trait::async_trait]
impl WarmBackend for WarmBackendAzure {
async fn put_with_meta(
&self,
object: &str,
r: ReaderImpl,
length: i64,
meta: HashMap<String, String>,
) -> Result<String, std::io::Error> {
let part_size = length;
let client = self.client.clone();
let container_client = client.container_client(self.bucket.clone());
let blob_client = container_client.blob_client(self.get_dest(object));
/*let res = blob_client
.upload(
RequestContent::from(match r {
ReaderImpl::Body(content_body) => content_body.to_vec(),
ReaderImpl::ObjectBody(mut content_body) => content_body.read_all().await?,
}),
false,
length as u64,
None,
)
.await
else {
return Err(std::io::Error::other("upload error"));
};*/
let Ok(res) = blob_client
.put_block_blob(match r {
ReaderImpl::Body(content_body) => content_body.to_vec(),
ReaderImpl::ObjectBody(mut content_body) => content_body.read_all().await?,
})
.content_type("text/plain")
.into_future()
.await
else {
return Err(std::io::Error::other("put_block_blob error"));
};
//self.ToObjectError(err, object)
Ok(res.request_id.to_string())
}
async fn put(&self, object: &str, r: ReaderImpl, length: i64) -> Result<String, std::io::Error> {
self.put_with_meta(object, r, length, HashMap::new()).await
}
async fn get(&self, object: &str, rv: &str, opts: WarmBackendGetOpts) -> Result<ReadCloser, std::io::Error> {
let client = self.client.clone();
let container_client = client.container_client(self.bucket.clone());
let blob_client = container_client.blob_client(self.get_dest(object));
blob_client.get();
todo!();
}
async fn remove(&self, object: &str, rv: &str) -> Result<(), std::io::Error> {
let client = self.client.clone();
let container_client = client.container_client(self.bucket.clone());
let blob_client = container_client.blob_client(self.get_dest(object));
blob_client.delete();
todo!();
}
async fn in_use(&self) -> Result<bool, std::io::Error> {
/*let result = self.client
.list_objects_v2(&self.bucket, &self.prefix, "", "", SLASH_SEPARATOR, 1)
.await?;
Ok(result.common_prefixes.len() > 0 || result.contents.len() > 0)*/
Ok(false)
}
}
/*fn azure_to_object_error(err: Error, params: Vec<String>) -> Option<error> {
if err == nil {
return nil
}
bucket := ""
object := ""
if len(params) >= 1 {
bucket = params[0]
}
if len(params) == 2 {
object = params[1]
}
azureErr, ok := err.(*azcore.ResponseError)
if !ok {
// We don't interpret non Azure errors. As azure errors will
// have StatusCode to help to convert to object errors.
return err
}
serviceCode := azureErr.ErrorCode
statusCode := azureErr.StatusCode
azureCodesToObjectError(err, serviceCode, statusCode, bucket, object)
}*/
/*fn azure_codes_to_object_error(err: Error, service_code: String, status_code: i32, bucket: String, object: String) -> Option<Error> {
switch serviceCode {
case "ContainerNotFound", "ContainerBeingDeleted":
err = BucketNotFound{Bucket: bucket}
case "ContainerAlreadyExists":
err = BucketExists{Bucket: bucket}
case "InvalidResourceName":
err = BucketNameInvalid{Bucket: bucket}
case "RequestBodyTooLarge":
err = PartTooBig{}
case "InvalidMetadata":
err = UnsupportedMetadata{}
case "BlobAccessTierNotSupportedForAccountType":
err = NotImplemented{}
case "OutOfRangeInput":
err = ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
default:
switch statusCode {
case http.StatusNotFound:
if object != "" {
err = ObjectNotFound{
Bucket: bucket,
Object: object,
}
} else {
err = BucketNotFound{Bucket: bucket}
}
case http.StatusBadRequest:
err = BucketNameInvalid{Bucket: bucket}
}
}
return err
}*/

View File

@@ -0,0 +1,52 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub const AMZ_META_UNENCRYPTED_CONTENT_LENGTH: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Length";
pub const AMZ_META_UNENCRYPTED_CONTENT_MD5: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Md5";
pub const AMZ_STORAGE_CLASS: &str = "x-amz-storage-class";
pub const RESERVED_METADATA_PREFIX: &str = "X-RustFS-Internal-";
pub const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-";
pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
// pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";
// pub const X_RUSTFS_INLINE_DATA: &str = "x-rustfs-inline-data";
pub const VERSION_PURGE_STATUS_KEY: &str = "X-Rustfs-Internal-purgestatus";
pub const X_RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
pub const X_RUSTFS_DATA_MOV: &str = "X-Rustfs-Internal-data-mov";
pub const AMZ_OBJECT_TAGGING: &str = "X-Amz-Tagging";
pub const AMZ_BUCKET_REPLICATION_STATUS: &str = "X-Amz-Replication-Status";
pub const AMZ_DECODED_CONTENT_LENGTH: &str = "X-Amz-Decoded-Content-Length";
pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";
// Server-side encryption headers
pub const AMZ_SERVER_SIDE_ENCRYPTION: &str = "x-amz-server-side-encryption";
pub const AMZ_SERVER_SIDE_ENCRYPTION_AWS_KMS_KEY_ID: &str = "x-amz-server-side-encryption-aws-kms-key-id";
pub const AMZ_SERVER_SIDE_ENCRYPTION_CONTEXT: &str = "x-amz-server-side-encryption-context";
pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: &str = "x-amz-server-side-encryption-customer-algorithm";
pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: &str = "x-amz-server-side-encryption-customer-key";
pub const AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: &str = "x-amz-server-side-encryption-customer-key-md5";
// SSE-C copy source headers
pub const AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: &str =
"x-amz-copy-source-server-side-encryption-customer-algorithm";
pub const AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: &str = "x-amz-copy-source-server-side-encryption-customer-key";
pub const AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: &str =
"x-amz-copy-source-server-side-encryption-customer-key-md5";

View File

@@ -109,9 +109,6 @@ pub enum Error {
#[error("io error: {0}")]
Io(std::io::Error),
#[error("system already initialized")]
IamSysAlreadyInitialized,
}
impl PartialEq for Error {
@@ -165,7 +162,6 @@ impl Clone for Error {
Error::PolicyTooLarge => Error::PolicyTooLarge,
Error::ConfigNotFound => Error::ConfigNotFound,
Error::Io(e) => Error::Io(std::io::Error::new(e.kind(), e.to_string())),
Error::IamSysAlreadyInitialized => Error::IamSysAlreadyInitialized,
}
}
}
@@ -230,7 +226,6 @@ impl From<rustfs_policy::error::Error> for Error {
rustfs_policy::error::Error::StringError(s) => Error::StringError(s),
rustfs_policy::error::Error::CryptoError(e) => Error::CryptoError(e),
rustfs_policy::error::Error::ErrCredMalformed => Error::ErrCredMalformed,
rustfs_policy::error::Error::IamSysAlreadyInitialized => Error::IamSysAlreadyInitialized,
}
}
}

View File

@@ -18,58 +18,30 @@ use rustfs_ecstore::store::ECStore;
use std::sync::{Arc, OnceLock};
use store::object::ObjectStore;
use sys::IamSys;
use tracing::{error, info, instrument};
use tracing::{debug, instrument};
pub mod cache;
pub mod error;
pub mod manager;
pub mod store;
pub mod sys;
pub mod utils;
pub mod sys;
static IAM_SYS: OnceLock<Arc<IamSys<ObjectStore>>> = OnceLock::new();
#[instrument(skip(ecstore))]
pub async fn init_iam_sys(ecstore: Arc<ECStore>) -> Result<()> {
if IAM_SYS.get().is_some() {
info!("IAM system already initialized, skipping.");
return Ok(());
}
debug!("init iam system");
let s = IamCache::new(ObjectStore::new(ecstore).await).await;
info!("Starting IAM system initialization sequence...");
// 1. Create the persistent storage adapter
let storage_adapter = ObjectStore::new(ecstore);
// 2. Create the cache manager.
// The `new` method now performs a blocking initial load from disk.
let cache_manager = IamCache::new(storage_adapter).await;
// 3. Construct the system interface
let iam_instance = Arc::new(IamSys::new(cache_manager));
// 4. Securely set the global singleton
if IAM_SYS.set(iam_instance).is_err() {
error!("Critical: Race condition detected during IAM initialization!");
return Err(Error::IamSysAlreadyInitialized);
}
info!("IAM system initialization completed successfully.");
IAM_SYS.get_or_init(move || IamSys::new(s).into());
Ok(())
}
#[inline]
pub fn get() -> Result<Arc<IamSys<ObjectStore>>> {
let sys = IAM_SYS.get().map(Arc::clone).ok_or(Error::IamSysNotInitialized)?;
// Double-check the internal readiness state. The OnceLock is only set
// after initialization and data loading complete, so this is a defensive
// guard to ensure callers never operate on a partially initialized system.
if !sys.is_ready() {
return Err(Error::IamSysNotInitialized);
}
Ok(sys)
IAM_SYS.get().map(Arc::clone).ok_or(Error::IamSysNotInitialized)
}
pub fn get_global_iam_sys() -> Option<Arc<IamSys<ObjectStore>>> {

View File

@@ -37,7 +37,6 @@ use rustfs_policy::{
use rustfs_utils::path::path_join_buf;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::sync::atomic::AtomicU8;
use std::{
collections::{HashMap, HashSet},
sync::{
@@ -77,19 +76,9 @@ fn get_iam_format_file_path() -> String {
path_join_buf(&[&IAM_CONFIG_PREFIX, IAM_FORMAT_FILE])
}
#[repr(u8)]
#[derive(Debug, PartialEq)]
pub enum IamState {
Uninitialized = 0,
Loading = 1,
Ready = 2,
Error = 3,
}
pub struct IamCache<T> {
pub cache: Cache,
pub api: T,
pub state: Arc<AtomicU8>,
pub loading: Arc<AtomicBool>,
pub roles: HashMap<ARN, Vec<String>>,
pub send_chan: Sender<i64>,
@@ -100,19 +89,12 @@ impl<T> IamCache<T>
where
T: Store,
{
/// Create a new IAM system instance
/// # Arguments
/// * `api` - The storage backend implementing the Store trait
///
/// # Returns
/// An Arc-wrapped instance of IamSystem
pub(crate) async fn new(api: T) -> Arc<Self> {
let (sender, receiver) = mpsc::channel::<i64>(100);
let sys = Arc::new(Self {
api,
cache: Cache::default(),
state: Arc::new(AtomicU8::new(IamState::Uninitialized as u8)),
loading: Arc::new(AtomicBool::new(false)),
send_chan: sender,
roles: HashMap::new(),
@@ -123,32 +105,10 @@ where
sys
}
/// Initialize the IAM system
async fn init(self: Arc<Self>, receiver: Receiver<i64>) -> Result<()> {
self.state.store(IamState::Loading as u8, Ordering::SeqCst);
// Ensure the IAM format file is persisted first
self.clone().save_iam_formatter().await?;
self.clone().load().await?;
// Critical: Load all existing users/policies into memory cache
const MAX_RETRIES: usize = 3;
for attempt in 0..MAX_RETRIES {
if let Err(e) = self.clone().load().await {
if attempt == MAX_RETRIES - 1 {
self.state.store(IamState::Error as u8, Ordering::SeqCst);
error!("IAM fail to load initial data after {} attempts: {:?}", MAX_RETRIES, e);
return Err(e);
} else {
warn!("IAM load failed, retrying... attempt {}", attempt + 1);
tokio::time::sleep(Duration::from_secs(1)).await;
}
} else {
break;
}
}
self.state.store(IamState::Ready as u8, Ordering::SeqCst);
info!("IAM System successfully initialized and marked as READY");
// Background ticker for synchronization
// Check if environment variable is set
let skip_background_task = std::env::var("RUSTFS_SKIP_BACKGROUND_TASK").is_ok();
@@ -192,11 +152,6 @@ where
Ok(())
}
/// Check if IAM system is ready
pub fn is_ready(&self) -> bool {
self.state.load(Ordering::SeqCst) == IamState::Ready as u8
}
async fn _notify(&self) {
self.send_chan.send(OffsetDateTime::now_utc().unix_timestamp()).await.unwrap();
}

View File

@@ -38,7 +38,7 @@ use std::sync::LazyLock;
use std::{collections::HashMap, sync::Arc};
use tokio::sync::mpsc::{self, Sender};
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn};
use tracing::{info, warn};
pub static IAM_CONFIG_PREFIX: LazyLock<String> = LazyLock::new(|| format!("{RUSTFS_CONFIG_PREFIX}/iam"));
pub static IAM_CONFIG_USERS_PREFIX: LazyLock<String> = LazyLock::new(|| format!("{RUSTFS_CONFIG_PREFIX}/iam/users/"));
@@ -120,18 +120,52 @@ fn split_path(s: &str, last_index: bool) -> (&str, &str) {
#[derive(Clone)]
pub struct ObjectStore {
object_api: Arc<ECStore>,
prev_cred: Option<rustfs_policy::auth::Credentials>,
}
impl ObjectStore {
const BUCKET_NAME: &'static str = ".rustfs.sys";
const PREV_CRED_FILE: &'static str = "config/iam/prev_cred.json";
pub fn new(object_api: Arc<ECStore>) -> Self {
Self { object_api }
/// Load previous credentials from persistent storage in .rustfs.sys bucket
async fn load_prev_cred(object_api: Arc<ECStore>) -> Option<rustfs_policy::auth::Credentials> {
match read_config(object_api, Self::PREV_CRED_FILE).await {
Ok(data) => serde_json::from_slice::<rustfs_policy::auth::Credentials>(&data).ok(),
Err(_) => None,
}
}
fn decrypt_data(data: &[u8]) -> Result<Vec<u8>> {
let de = rustfs_crypto::decrypt_data(get_global_action_cred().unwrap_or_default().secret_key.as_bytes(), data)?;
Ok(de)
/// Save previous credentials to persistent storage in .rustfs.sys bucket
async fn save_prev_cred(object_api: Arc<ECStore>, cred: &Option<rustfs_policy::auth::Credentials>) -> Result<()> {
match cred {
Some(c) => {
let data = serde_json::to_vec(c).map_err(|e| Error::other(format!("Failed to serialize cred: {}", e)))?;
save_config(object_api, Self::PREV_CRED_FILE, data)
.await
.map_err(|e| Error::other(format!("Failed to write cred to storage: {}", e)))
}
None => {
// If no credentials, remove the config
match delete_config(object_api, Self::PREV_CRED_FILE).await {
Ok(_) => Ok(()),
Err(e) => {
// Ignore ConfigNotFound error when trying to delete non-existent config
if matches!(e, rustfs_ecstore::error::StorageError::ConfigNotFound) {
Ok(())
} else {
Err(Error::other(format!("Failed to delete cred from storage: {}", e)))
}
}
}
}
}
}
pub async fn new(object_api: Arc<ECStore>) -> Self {
// Load previous credentials from persistent storage in .rustfs.sys bucket
let prev_cred = Self::load_prev_cred(object_api.clone()).await.or_else(get_global_action_cred);
Self { object_api, prev_cred }
}
fn encrypt_data(data: &[u8]) -> Result<Vec<u8>> {
@@ -139,10 +173,65 @@ impl ObjectStore {
Ok(en)
}
/// Decrypt data with credential fallback mechanism
/// First tries current credentials, then falls back to previous credentials if available
async fn decrypt_fallback(&self, data: &[u8], path: &str) -> Result<Vec<u8>> {
let current_cred = get_global_action_cred().unwrap_or_default();
// Try current credentials first
match rustfs_crypto::decrypt_data(current_cred.secret_key.as_bytes(), data) {
Ok(decrypted) => {
// Update persistent storage with current credentials for consistency
let _ = Self::save_prev_cred(self.object_api.clone(), &Some(current_cred)).await;
Ok(decrypted)
}
Err(_) => {
// Current credentials failed, try previous credentials
if let Some(ref prev_cred) = self.prev_cred {
match rustfs_crypto::decrypt_data(prev_cred.secret_key.as_bytes(), data) {
Ok(prev_decrypted) => {
warn!("Decryption succeeded with previous credentials, path: {}", path);
// Re-encrypt with current credentials
match rustfs_crypto::encrypt_data(current_cred.secret_key.as_bytes(), &prev_decrypted) {
Ok(re_encrypted) => {
let _ = save_config(self.object_api.clone(), path, re_encrypted).await;
}
Err(e) => {
warn!("Failed to re-encrypt with current credentials: {}, path: {}", e, path);
}
}
// Update persistent storage with current credentials
let _ = Self::save_prev_cred(self.object_api.clone(), &Some(current_cred)).await;
Ok(prev_decrypted)
}
Err(_) => {
// Both attempts failed
warn!("Decryption failed with both current and previous credentials, deleting config: {}", path);
let _ = self.delete_iam_config(path).await;
Err(Error::ConfigNotFound)
}
}
} else {
// No previous credentials available
warn!(
"Decryption failed with current credentials and no previous credentials available, deleting config: {}",
path
);
let _ = self.delete_iam_config(path).await;
Err(Error::ConfigNotFound)
}
}
}
}
async fn load_iamconfig_bytes_with_metadata(&self, path: impl AsRef<str> + Send) -> Result<(Vec<u8>, ObjectInfo)> {
let (data, obj) = read_config_with_metadata(self.object_api.clone(), path.as_ref(), &ObjectOptions::default()).await?;
Ok((Self::decrypt_data(&data)?, obj))
let decrypted_data = self.decrypt_fallback(&data, path.as_ref()).await?;
Ok((decrypted_data, obj))
}
async fn list_iam_config_items(&self, prefix: &str, ctx: CancellationToken, sender: Sender<StringOrErr>) {
@@ -341,27 +430,6 @@ impl ObjectStore {
Ok(policies)
}
/// Checks if the underlying ECStore is ready for metadata operations.
/// This prevents silent failures during the storage boot-up phase.
///
/// Performs a lightweight probe by attempting to read a known configuration object.
/// If the object is not found, it indicates the storage metadata is not ready.
/// The upper-level caller should handle retries if needed.
async fn check_storage_readiness(&self) -> Result<()> {
// Probe path for a fixed object under the IAM root prefix.
// If it doesn't exist, the system bucket or metadata is not ready.
let probe_path = format!("{}/format.json", *IAM_CONFIG_PREFIX);
match read_config(self.object_api.clone(), &probe_path).await {
Ok(_) => Ok(()),
Err(rustfs_ecstore::error::StorageError::ConfigNotFound) => Err(Error::other(format!(
"Storage metadata not ready: probe object '{}' not found (expected IAM config to be initialized)",
probe_path
))),
Err(e) => Err(e.into()),
}
}
// async fn load_policy(&self, name: &str) -> Result<PolicyDoc> {
// let mut policy = self
// .load_iam_config::<PolicyDoc>(&format!("config/iam/policies/{name}/policy.json"))
@@ -407,62 +475,17 @@ impl Store for ObjectStore {
async fn load_iam_config<Item: DeserializeOwned>(&self, path: impl AsRef<str> + Send) -> Result<Item> {
let mut data = read_config(self.object_api.clone(), path.as_ref()).await?;
data = match Self::decrypt_data(&data) {
Ok(v) => v,
Err(err) => {
warn!("delete the config file when decrypt failed failed: {}, path: {}", err, path.as_ref());
// delete the config file when decrypt failed
let _ = self.delete_iam_config(path.as_ref()).await;
return Err(Error::ConfigNotFound);
}
};
data = self.decrypt_fallback(&data, path.as_ref()).await?;
Ok(serde_json::from_slice(&data)?)
}
/// Saves IAM configuration with a retry mechanism on failure.
///
/// Attempts to save the IAM configuration up to 5 times if the storage layer is not ready,
/// using exponential backoff between attempts (starting at 200ms, doubling each retry).
///
/// # Arguments
///
/// * `item` - The IAM configuration item to save, must implement `Serialize` and `Send`.
/// * `path` - The path where the configuration will be saved.
///
/// # Returns
///
/// * `Result<()>` - `Ok(())` on success, or an `Error` if all attempts fail.
#[tracing::instrument(level = "debug", skip(self, item, path))]
async fn save_iam_config<Item: Serialize + Send>(&self, item: Item, path: impl AsRef<str> + Send) -> Result<()> {
let mut data = serde_json::to_vec(&item)?;
data = Self::encrypt_data(&data)?;
let mut attempts = 0;
let max_attempts = 5;
let path_ref = path.as_ref();
loop {
match save_config(self.object_api.clone(), path_ref, data.clone()).await {
Ok(_) => {
debug!("Successfully saved IAM config to {}", path_ref);
return Ok(());
}
Err(e) if attempts < max_attempts => {
attempts += 1;
// Exponential backoff: 200ms, 400ms, 800ms...
let wait_ms = 200 * (1 << attempts);
warn!(
"Storage layer not ready for IAM write (attempt {}/{}). Retrying in {}ms. Path: {}, Error: {:?}",
attempts, max_attempts, wait_ms, path_ref, e
);
tokio::time::sleep(std::time::Duration::from_millis(wait_ms)).await;
}
Err(e) => {
error!("Final failure saving IAM config to {}: {:?}", path_ref, e);
return Err(e.into());
}
}
}
save_config(self.object_api.clone(), path.as_ref(), data).await?;
Ok(())
}
async fn delete_iam_config(&self, path: impl AsRef<str> + Send) -> Result<()> {
delete_config(self.object_api.clone(), path.as_ref()).await?;
@@ -476,16 +499,8 @@ impl Store for ObjectStore {
user_identity: UserIdentity,
_ttl: Option<usize>,
) -> Result<()> {
// Pre-check storage health
self.check_storage_readiness().await?;
let path = get_user_identity_path(name, user_type);
debug!("Saving IAM identity to path: {}", path);
self.save_iam_config(user_identity, path).await.map_err(|e| {
error!("ObjectStore save failure for {}: {:?}", name, e);
e
})
self.save_iam_config(user_identity, get_user_identity_path(name, user_type))
.await
}
async fn delete_user_identity(&self, name: &str, user_type: UserType) -> Result<()> {
self.delete_iam_config(get_user_identity_path(name, user_type))

View File

@@ -67,13 +67,6 @@ pub struct IamSys<T> {
}
impl<T: Store> IamSys<T> {
/// Create a new IamSys instance with the given IamCache store
///
/// # Arguments
/// * `store` - An Arc to the IamCache instance
///
/// # Returns
/// A new instance of IamSys
pub fn new(store: Arc<IamCache<T>>) -> Self {
tokio::spawn(async move {
match opa::lookup_config().await {
@@ -94,11 +87,6 @@ impl<T: Store> IamSys<T> {
roles_map: HashMap::new(),
}
}
/// Check if the IamSys has a watcher configured
///
/// # Returns
/// `true` if a watcher is configured, `false` otherwise
pub fn has_watcher(&self) -> bool {
self.store.api.has_watcher()
}
@@ -871,11 +859,6 @@ impl<T: Store> IamSys<T> {
self.get_combined_policy(&policies).await.is_allowed(args).await
}
/// Check if the underlying store is ready
pub fn is_ready(&self) -> bool {
self.store.is_ready()
}
}
fn is_allowed_by_session_policy(args: &Args<'_>) -> (bool, bool) {

View File

@@ -0,0 +1,325 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Benchmarks comparing fast lock vs old lock performance
#[cfg(test)]
#[allow(dead_code)] // Temporarily disable benchmark tests
mod benchmarks {
use super::super::*;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::task;
/// Benchmark single-threaded lock operations
#[tokio::test]
async fn bench_single_threaded_fast_locks() {
let manager = Arc::new(FastObjectLockManager::new());
let iterations = 10000;
// Warm up
for i in 0..100 {
let _guard = manager
.acquire_write_lock("bucket", &format!("warm_{}", i), "owner")
.await
.unwrap();
}
// Benchmark write locks
let start = Instant::now();
for i in 0..iterations {
let _guard = manager
.acquire_write_lock("bucket", &format!("object_{}", i), "owner")
.await
.unwrap();
}
let duration = start.elapsed();
println!("Fast locks: {} write locks in {:?}", iterations, duration);
println!("Average: {:?} per lock", duration / iterations);
let metrics = manager.get_metrics();
println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0);
// Should be much faster than old implementation
assert!(duration.as_millis() < 1000, "Should complete 10k locks in <1s");
assert!(metrics.shard_metrics.fast_path_rate() > 0.95, "Should have >95% fast path rate");
}
/// Benchmark concurrent lock operations
#[tokio::test]
async fn bench_concurrent_fast_locks() {
let manager = Arc::new(FastObjectLockManager::new());
let concurrent_tasks = 100;
let iterations_per_task = 100;
let start = Instant::now();
let mut handles = Vec::new();
for task_id in 0..concurrent_tasks {
let manager_clone = manager.clone();
let handle = task::spawn(async move {
for i in 0..iterations_per_task {
let object_name = format!("obj_{}_{}", task_id, i);
let _guard = manager_clone
.acquire_write_lock("bucket", &object_name, &format!("owner_{}", task_id))
.await
.unwrap();
// Simulate some work
tokio::task::yield_now().await;
}
});
handles.push(handle);
}
// Wait for all tasks
for handle in handles {
handle.await.unwrap();
}
let duration = start.elapsed();
let total_ops = concurrent_tasks * iterations_per_task;
println!("Concurrent fast locks: {} operations across {} tasks in {:?}",
total_ops, concurrent_tasks, duration);
println!("Throughput: {:.2} ops/sec", total_ops as f64 / duration.as_secs_f64());
let metrics = manager.get_metrics();
println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0);
println!("Contention events: {}", metrics.shard_metrics.contention_events);
// Should maintain high throughput even with concurrency
assert!(duration.as_millis() < 5000, "Should complete concurrent ops in <5s");
}
/// Benchmark contended lock operations
#[tokio::test]
async fn bench_contended_locks() {
let manager = Arc::new(FastObjectLockManager::new());
let concurrent_tasks = 50;
let shared_objects = 10; // High contention on few objects
let iterations_per_task = 50;
let start = Instant::now();
let mut handles = Vec::new();
for task_id in 0..concurrent_tasks {
let manager_clone = manager.clone();
let handle = task::spawn(async move {
for i in 0..iterations_per_task {
let object_name = format!("shared_{}", i % shared_objects);
// Mix of read and write operations
if i % 3 == 0 {
// Write operation
if let Ok(_guard) = manager_clone
.acquire_write_lock("bucket", &object_name, &format!("owner_{}", task_id))
.await
{
tokio::task::yield_now().await;
}
} else {
// Read operation
if let Ok(_guard) = manager_clone
.acquire_read_lock("bucket", &object_name, &format!("owner_{}", task_id))
.await
{
tokio::task::yield_now().await;
}
}
}
});
handles.push(handle);
}
// Wait for all tasks
for handle in handles {
handle.await.unwrap();
}
let duration = start.elapsed();
println!("Contended locks: {} tasks on {} objects in {:?}",
concurrent_tasks, shared_objects, duration);
let metrics = manager.get_metrics();
println!("Total acquisitions: {}", metrics.shard_metrics.total_acquisitions());
println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0);
println!("Average wait time: {:?}", metrics.shard_metrics.avg_wait_time());
println!("Timeout rate: {:.2}%", metrics.shard_metrics.timeout_rate() * 100.0);
// Even with contention, should maintain reasonable performance
assert!(metrics.shard_metrics.timeout_rate() < 0.1, "Should have <10% timeout rate");
assert!(metrics.shard_metrics.avg_wait_time() < Duration::from_millis(100), "Avg wait should be <100ms");
}
/// Benchmark batch operations
#[tokio::test]
async fn bench_batch_operations() {
let manager = FastObjectLockManager::new();
let batch_sizes = vec![10, 50, 100, 500];
for batch_size in batch_sizes {
// Create batch request
let mut batch = BatchLockRequest::new("batch_owner");
for i in 0..batch_size {
batch = batch.add_write_lock("bucket", &format!("batch_obj_{}", i));
}
let start = Instant::now();
let result = manager.acquire_locks_batch(batch).await;
let duration = start.elapsed();
assert!(result.all_acquired, "Batch should succeed");
println!("Batch size {}: {:?} ({:.2} μs per lock)",
batch_size,
duration,
duration.as_micros() as f64 / batch_size as f64);
// Batch should be much faster than individual acquisitions
assert!(duration.as_millis() < batch_size as u128 / 10,
"Batch should be 10x+ faster than individual locks");
}
}
/// Benchmark version-specific locks
#[tokio::test]
async fn bench_versioned_locks() {
let manager = Arc::new(FastObjectLockManager::new());
let objects = 100;
let versions_per_object = 10;
let start = Instant::now();
let mut handles = Vec::new();
for obj_id in 0..objects {
let manager_clone = manager.clone();
let handle = task::spawn(async move {
for version in 0..versions_per_object {
let _guard = manager_clone
.acquire_write_lock_versioned(
"bucket",
&format!("obj_{}", obj_id),
&format!("v{}", version),
"version_owner"
)
.await
.unwrap();
}
});
handles.push(handle);
}
for handle in handles {
handle.await.unwrap();
}
let duration = start.elapsed();
let total_ops = objects * versions_per_object;
println!("Versioned locks: {} version locks in {:?}", total_ops, duration);
println!("Throughput: {:.2} locks/sec", total_ops as f64 / duration.as_secs_f64());
let metrics = manager.get_metrics();
println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0);
// Versioned locks should not interfere with each other
assert!(metrics.shard_metrics.fast_path_rate() > 0.9, "Should maintain high fast path rate");
}
/// Compare with theoretical maximum performance
#[tokio::test]
async fn bench_theoretical_maximum() {
let manager = Arc::new(FastObjectLockManager::new());
let iterations = 100000;
// Measure pure fast path performance (no contention)
let start = Instant::now();
for i in 0..iterations {
let _guard = manager
.acquire_write_lock("bucket", &format!("unique_{}", i), "owner")
.await
.unwrap();
}
let duration = start.elapsed();
println!("Theoretical maximum: {} unique locks in {:?}", iterations, duration);
println!("Rate: {:.2} locks/sec", iterations as f64 / duration.as_secs_f64());
println!("Latency: {:?} per lock", duration / iterations);
let metrics = manager.get_metrics();
println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0);
// Should achieve very high performance with no contention
assert!(metrics.shard_metrics.fast_path_rate() > 0.99, "Should be nearly 100% fast path");
assert!(duration.as_secs_f64() / (iterations as f64) < 0.0001, "Should be <100μs per lock");
}
/// Performance regression test
#[tokio::test]
async fn performance_regression_test() {
let manager = Arc::new(FastObjectLockManager::new());
// This test ensures we maintain performance targets
let test_cases = vec![
("single_thread", 1, 10000),
("low_contention", 10, 1000),
("high_contention", 100, 100),
];
for (test_name, threads, ops_per_thread) in test_cases {
let start = Instant::now();
let mut handles = Vec::new();
for thread_id in 0..threads {
let manager_clone = manager.clone();
let handle = task::spawn(async move {
for op_id in 0..ops_per_thread {
let object = if threads == 1 {
format!("obj_{}_{}", thread_id, op_id)
} else {
format!("obj_{}", op_id % 100) // Create contention
};
let owner = format!("owner_{}", thread_id);
let _guard = manager_clone
.acquire_write_lock("bucket", object, owner)
.await
.unwrap();
}
});
handles.push(handle);
}
for handle in handles {
handle.await.unwrap();
}
let duration = start.elapsed();
let total_ops = threads * ops_per_thread;
let ops_per_sec = total_ops as f64 / duration.as_secs_f64();
println!("{}: {:.2} ops/sec", test_name, ops_per_sec);
// Performance targets (adjust based on requirements)
match test_name {
"single_thread" => assert!(ops_per_sec > 50000.0, "Single thread should exceed 50k ops/sec"),
"low_contention" => assert!(ops_per_sec > 20000.0, "Low contention should exceed 20k ops/sec"),
"high_contention" => assert!(ops_per_sec > 5000.0, "High contention should exceed 5k ops/sec"),
_ => {}
}
}
}
}

View File

@@ -37,6 +37,9 @@ pub mod shard;
pub mod state;
pub mod types;
// #[cfg(test)]
// pub mod benchmarks; // Temporarily disabled due to compilation issues
// Re-export main types
pub use disabled_manager::DisabledLockManager;
pub use guard::FastLockGuard;

View File

@@ -12,6 +12,4 @@ WORKDIR /app
COPY --from=builder /build/target/release/rustfs-mcp /app/
RUN apt-get update && apt-get install -y ca-certificates && update-ca-certificates
ENTRYPOINT ["/app/rustfs-mcp"]
ENTRYPOINT ["/app/rustfs-mcp"]

View File

@@ -30,7 +30,6 @@ rustfs-config = { workspace = true, features = ["notify", "constants"] }
rustfs-ecstore = { workspace = true }
rustfs-targets = { workspace = true }
rustfs-utils = { workspace = true }
arc-swap = { workspace = true }
async-trait = { workspace = true }
chrono = { workspace = true, features = ["serde"] }
futures = { workspace = true }

View File

@@ -60,9 +60,8 @@ impl TargetFactory for WebhookTargetFactory {
let endpoint = config
.lookup(WEBHOOK_ENDPOINT)
.ok_or_else(|| TargetError::Configuration("Missing webhook endpoint".to_string()))?;
let parsed_endpoint = endpoint.trim();
let endpoint_url = Url::parse(parsed_endpoint)
.map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{parsed_endpoint}')")))?;
let endpoint_url = Url::parse(&endpoint)
.map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{endpoint}')")))?;
let args = WebhookArgs {
enable: true, // If we are here, it's already enabled.

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::notification_system_subscriber::NotificationSystemSubscriberView;
use crate::{
Event, error::NotificationError, notifier::EventNotifier, registry::TargetRegistry, rules::BucketNotificationConfig, stream,
};
@@ -105,8 +104,6 @@ pub struct NotificationSystem {
concurrency_limiter: Arc<Semaphore>,
/// Monitoring indicators
metrics: Arc<NotificationMetrics>,
/// Subscriber view
subscriber_view: NotificationSystemSubscriberView,
}
impl NotificationSystem {
@@ -115,7 +112,6 @@ impl NotificationSystem {
let concurrency_limiter =
rustfs_utils::get_env_usize(ENV_NOTIFY_TARGET_STREAM_CONCURRENCY, DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY);
NotificationSystem {
subscriber_view: NotificationSystemSubscriberView::new(),
notifier: Arc::new(EventNotifier::new()),
registry: Arc::new(TargetRegistry::new()),
config: Arc::new(RwLock::new(config)),
@@ -192,11 +188,8 @@ impl NotificationSystem {
}
/// Checks if there are active subscribers for the given bucket and event name.
pub async fn has_subscriber(&self, bucket: &str, event: &EventName) -> bool {
if !self.subscriber_view.has_subscriber(bucket, event) {
return false;
}
self.notifier.has_subscriber(bucket, event).await
pub async fn has_subscriber(&self, bucket: &str, event_name: &EventName) -> bool {
self.notifier.has_subscriber(bucket, event_name).await
}
async fn update_config_and_reload<F>(&self, mut modifier: F) -> Result<(), NotificationError>
@@ -243,18 +236,15 @@ impl NotificationSystem {
pub async fn remove_target(&self, target_id: &TargetID, target_type: &str) -> Result<(), NotificationError> {
info!("Attempting to remove target: {}", target_id);
let ttype = target_type.to_lowercase();
let tname = target_id.name.to_lowercase();
self.update_config_and_reload(|config| {
let mut changed = false;
if let Some(targets_of_type) = config.0.get_mut(&ttype) {
if targets_of_type.remove(&tname).is_some() {
if let Some(targets_of_type) = config.0.get_mut(target_type) {
if targets_of_type.remove(&target_id.name).is_some() {
info!("Removed target {} from configuration", target_id);
changed = true;
}
if targets_of_type.is_empty() {
config.0.remove(&ttype);
config.0.remove(target_type);
}
}
if !changed {
@@ -279,24 +269,20 @@ impl NotificationSystem {
/// If the target configuration is invalid, it returns Err(NotificationError::Configuration).
pub async fn set_target_config(&self, target_type: &str, target_name: &str, kvs: KVS) -> Result<(), NotificationError> {
info!("Setting config for target {} of type {}", target_name, target_type);
let ttype = target_type.to_lowercase();
let tname = target_name.to_lowercase();
self.update_config_and_reload(|config| {
config.0.entry(ttype.clone()).or_default().insert(tname.clone(), kvs.clone());
config
.0
.entry(target_type.to_lowercase())
.or_default()
.insert(target_name.to_lowercase(), kvs.clone());
true // The configuration is always modified
})
.await
}
/// Removes all notification configurations for a bucket.
/// If the configuration is successfully removed, the entire notification system will be automatically reloaded.
///
/// # Arguments
/// * `bucket` - The name of the bucket whose notification configuration is to be removed.
///
pub async fn remove_bucket_notification_config(&self, bucket: &str) {
self.subscriber_view.clear_bucket(bucket);
self.notifier.remove_rules_map(bucket).await;
pub async fn remove_bucket_notification_config(&self, bucket_name: &str) {
self.notifier.remove_rules_map(bucket_name).await;
}
/// Removes a Target configuration.
@@ -313,28 +299,11 @@ impl NotificationSystem {
/// If the target configuration does not exist, it returns Ok(()) without making any changes.
pub async fn remove_target_config(&self, target_type: &str, target_name: &str) -> Result<(), NotificationError> {
info!("Removing config for target {} of type {}", target_name, target_type);
let ttype = target_type.to_lowercase();
let tname = target_name.to_lowercase();
let target_id = TargetID {
id: tname.clone(),
name: ttype.clone(),
};
// Deletion is prohibited if bucket rules refer to it
if self.notifier.is_target_bound_to_any_bucket(&target_id).await {
return Err(NotificationError::Configuration(format!(
"Target is still bound to bucket rules and deletion is prohibited: type={} name={}",
ttype, tname
)));
}
let config_result = self
.update_config_and_reload(|config| {
let mut changed = false;
if let Some(targets) = config.0.get_mut(&ttype) {
if targets.remove(&tname).is_some() {
if let Some(targets) = config.0.get_mut(&target_type.to_lowercase()) {
if targets.remove(&target_name.to_lowercase()).is_some() {
changed = true;
}
if targets.is_empty() {
@@ -350,6 +319,8 @@ impl NotificationSystem {
.await;
if config_result.is_ok() {
let target_id = TargetID::new(target_name.to_string(), target_type.to_string());
// Remove from target list
let target_list = self.notifier.target_list();
let mut target_list_guard = target_list.write().await;
@@ -387,9 +358,6 @@ impl NotificationSystem {
let _ = cancel_tx.send(()).await;
}
// Clear the target_list and ensure that reload is a replacement reconstruction (solve the target_list len unchanged/residual problem)
self.notifier.remove_all_bucket_targets().await;
// Update the config
self.update_config(new_config.clone()).await;
@@ -420,16 +388,15 @@ impl NotificationSystem {
// The storage of the cloned target and the target itself
let store_clone = store.boxed_clone();
// let target_box = target.clone_dyn();
let target_arc = Arc::from(target.clone_dyn());
let target_box = target.clone_dyn();
let target_arc = Arc::from(target_box);
// Add a reference to the monitoring metrics
let metrics = self.metrics.clone();
let semaphore = self.concurrency_limiter.clone();
// Encapsulated enhanced version of start_event_stream
let cancel_tx = self.enhanced_start_event_stream(
store_clone,
target_arc,
self.metrics.clone(),
self.concurrency_limiter.clone(),
);
let cancel_tx = self.enhanced_start_event_stream(store_clone, target_arc, metrics, semaphore);
// Start event stream processing and save cancel sender
// let cancel_tx = start_event_stream(store_clone, target_clone);
@@ -456,18 +423,17 @@ impl NotificationSystem {
/// Loads the bucket notification configuration
pub async fn load_bucket_notification_config(
&self,
bucket: &str,
cfg: &BucketNotificationConfig,
bucket_name: &str,
config: &BucketNotificationConfig,
) -> Result<(), NotificationError> {
self.subscriber_view.apply_bucket_config(bucket, cfg);
let arn_list = self.notifier.get_arn_list(&cfg.region).await;
let arn_list = self.notifier.get_arn_list(&config.region).await;
if arn_list.is_empty() {
return Err(NotificationError::Configuration("No targets configured".to_string()));
}
info!("Available ARNs: {:?}", arn_list);
// Validate the configuration against the available ARNs
if let Err(e) = cfg.validate(&cfg.region, &arn_list) {
debug!("Bucket notification config validation region:{} failed: {}", &cfg.region, e);
if let Err(e) = config.validate(&config.region, &arn_list) {
debug!("Bucket notification config validation region:{} failed: {}", &config.region, e);
if !e.to_string().contains("ARN not found") {
return Err(NotificationError::BucketNotification(e.to_string()));
} else {
@@ -475,9 +441,9 @@ impl NotificationSystem {
}
}
let rules_map = cfg.get_rules_map();
self.notifier.add_rules_map(bucket, rules_map.clone()).await;
info!("Loaded notification config for bucket: {}", bucket);
let rules_map = config.get_rules_map();
self.notifier.add_rules_map(bucket_name, rules_map.clone()).await;
info!("Loaded notification config for bucket: {}", bucket_name);
Ok(())
}

View File

@@ -23,7 +23,6 @@ mod event;
pub mod factory;
mod global;
pub mod integration;
mod notification_system_subscriber;
pub mod notifier;
pub mod registry;
pub mod rules;

View File

@@ -1,74 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::BucketNotificationConfig;
use crate::rules::{BucketRulesSnapshot, DynRulesContainer, SubscriberIndex};
use rustfs_targets::EventName;
/// NotificationSystemSubscriberView - Provides an interface to manage and query
/// the subscription status of buckets in the notification system.
#[derive(Debug)]
pub struct NotificationSystemSubscriberView {
index: SubscriberIndex,
}
impl NotificationSystemSubscriberView {
/// Creates a new NotificationSystemSubscriberView with an empty SubscriberIndex.
///
/// Returns a new instance of NotificationSystemSubscriberView.
pub fn new() -> Self {
Self {
index: SubscriberIndex::default(),
}
}
/// Checks if a bucket has any subscribers for a specific event.
/// This is a quick check using the event mask in the snapshot.
///
/// # Arguments
/// * `bucket` - The name of the bucket to check.
/// * `event` - The event name to check for subscriptions.
///
/// Returns `true` if there are subscribers for the event, `false` otherwise.
#[inline]
pub fn has_subscriber(&self, bucket: &str, event: &EventName) -> bool {
self.index.has_subscriber(bucket, event)
}
/// Builds and atomically replaces a bucket's subscription snapshot from the configuration.
///
/// Core principle: masks and rules are calculated and stored together in the same update.
///
/// # Arguments
/// * `bucket` - The name of the bucket to update.
/// * `cfg` - The bucket notification configuration to compile into a snapshot.
pub fn apply_bucket_config(&self, bucket: &str, cfg: &BucketNotificationConfig) {
// *It is recommended to merge compile into one function to ensure the same origin.
let snapshot: BucketRulesSnapshot<DynRulesContainer> = cfg.compile_snapshot();
// *debug to prevent inconsistencies from being introduced when modifying the compile logic in the future.
snapshot.debug_assert_mask_consistent();
self.index.store_snapshot(bucket, snapshot);
}
/// Clears a bucket's subscription snapshot.
///
/// #Arguments
/// * `bucket` - The name of the bucket to clear.
#[inline]
pub fn clear_bucket(&self, bucket: &str) {
self.index.clear_bucket(bucket);
}
}

View File

@@ -14,21 +14,19 @@
use crate::{error::NotificationError, event::Event, rules::RulesMap};
use hashbrown::HashMap;
use rustfs_config::notify::{DEFAULT_NOTIFY_SEND_CONCURRENCY, ENV_NOTIFY_SEND_CONCURRENCY};
use rustfs_targets::EventName;
use rustfs_targets::Target;
use rustfs_targets::arn::TargetID;
use rustfs_targets::target::EntityTarget;
use starshard::AsyncShardedHashMap;
use std::sync::Arc;
use tokio::sync::{RwLock, Semaphore};
use tokio::sync::RwLock;
use tracing::{debug, error, info, instrument, warn};
/// Manages event notification to targets based on rules
pub struct EventNotifier {
target_list: Arc<RwLock<TargetList>>,
bucket_rules_map: Arc<AsyncShardedHashMap<String, RulesMap, rustc_hash::FxBuildHasher>>,
send_limiter: Arc<Semaphore>,
}
impl Default for EventNotifier {
@@ -39,41 +37,16 @@ impl Default for EventNotifier {
impl EventNotifier {
/// Creates a new EventNotifier
///
/// # Returns
/// Returns a new instance of EventNotifier.
pub fn new() -> Self {
let max_inflight = rustfs_utils::get_env_usize(ENV_NOTIFY_SEND_CONCURRENCY, DEFAULT_NOTIFY_SEND_CONCURRENCY);
EventNotifier {
target_list: Arc::new(RwLock::new(TargetList::new())),
bucket_rules_map: Arc::new(AsyncShardedHashMap::new(0)),
send_limiter: Arc::new(Semaphore::new(max_inflight)),
}
}
/// Checks whether a TargetID is still referenced by any bucket's rules.
///
/// # Arguments
/// * `target_id` - The TargetID to check.
///
/// # Returns
/// Returns `true` if the TargetID is bound to any bucket, otherwise `false`.
pub async fn is_target_bound_to_any_bucket(&self, target_id: &TargetID) -> bool {
// `AsyncShardedHashMap::iter()`: Traverse (bucket_name, rules_map)
let items = self.bucket_rules_map.iter().await;
for (_bucket, rules_map) in items {
if rules_map.contains_target_id(target_id) {
return true;
}
}
false
}
/// Returns a reference to the target list
/// This method provides access to the target list for external use.
///
/// # Returns
/// Returns an `Arc<RwLock<TargetList>>` representing the target list.
pub fn target_list(&self) -> Arc<RwLock<TargetList>> {
Arc::clone(&self.target_list)
}
@@ -81,23 +54,17 @@ impl EventNotifier {
/// Removes all notification rules for a bucket
///
/// # Arguments
/// * `bucket` - The name of the bucket for which to remove rules
/// * `bucket_name` - The name of the bucket for which to remove rules
///
/// This method removes all rules associated with the specified bucket name.
/// It will log a message indicating the removal of rules.
pub async fn remove_rules_map(&self, bucket: &str) {
if self.bucket_rules_map.remove(&bucket.to_string()).await.is_some() {
info!("Removed all notification rules for bucket: {}", bucket);
pub async fn remove_rules_map(&self, bucket_name: &str) {
if self.bucket_rules_map.remove(&bucket_name.to_string()).await.is_some() {
info!("Removed all notification rules for bucket: {}", bucket_name);
}
}
/// Returns a list of ARNs for the registered targets
///
/// # Arguments
/// * `region` - The region to use for generating the ARNs
///
/// # Returns
/// Returns a vector of strings representing the ARNs of the registered targets
pub async fn get_arn_list(&self, region: &str) -> Vec<String> {
let target_list_guard = self.target_list.read().await;
target_list_guard
@@ -108,37 +75,24 @@ impl EventNotifier {
}
/// Adds a rules map for a bucket
///
/// # Arguments
/// * `bucket` - The name of the bucket for which to add the rules map
/// * `rules_map` - The rules map to add for the bucket
pub async fn add_rules_map(&self, bucket: &str, rules_map: RulesMap) {
pub async fn add_rules_map(&self, bucket_name: &str, rules_map: RulesMap) {
if rules_map.is_empty() {
self.bucket_rules_map.remove(&bucket.to_string()).await;
self.bucket_rules_map.remove(&bucket_name.to_string()).await;
} else {
self.bucket_rules_map.insert(bucket.to_string(), rules_map).await;
self.bucket_rules_map.insert(bucket_name.to_string(), rules_map).await;
}
info!("Added rules for bucket: {}", bucket);
info!("Added rules for bucket: {}", bucket_name);
}
/// Gets the rules map for a specific bucket.
///
/// # Arguments
/// * `bucket` - The name of the bucket for which to get the rules map
///
/// # Returns
/// Returns `Some(RulesMap)` if rules exist for the bucket, otherwise returns `None`.
pub async fn get_rules_map(&self, bucket: &str) -> Option<RulesMap> {
self.bucket_rules_map.get(&bucket.to_string()).await
pub async fn get_rules_map(&self, bucket_name: &str) -> Option<RulesMap> {
self.bucket_rules_map.get(&bucket_name.to_string()).await
}
/// Removes notification rules for a bucket
///
/// # Arguments
/// * `bucket` - The name of the bucket for which to remove notification rules
pub async fn remove_notification(&self, bucket: &str) {
self.bucket_rules_map.remove(&bucket.to_string()).await;
info!("Removed notification rules for bucket: {}", bucket);
pub async fn remove_notification(&self, bucket_name: &str) {
self.bucket_rules_map.remove(&bucket_name.to_string()).await;
info!("Removed notification rules for bucket: {}", bucket_name);
}
/// Removes all targets
@@ -171,87 +125,69 @@ impl EventNotifier {
}
/// Sends an event to the appropriate targets based on the bucket rules
///
/// # Arguments
/// * `event` - The event to send
#[instrument(skip_all)]
pub async fn send(&self, event: Arc<Event>) {
let bucket_name = &event.s3.bucket.name;
let object_key = &event.s3.object.key;
let event_name = event.event_name;
if let Some(rules) = self.bucket_rules_map.get(bucket_name).await {
let target_ids = rules.match_rules(event_name, object_key);
if target_ids.is_empty() {
debug!("No matching targets for event in bucket: {}", bucket_name);
return;
}
let target_ids_len = target_ids.len();
let mut handles = vec![];
let Some(rules) = self.bucket_rules_map.get(bucket_name).await else {
debug!("No rules found for bucket: {}", bucket_name);
return;
};
let target_ids = rules.match_rules(event_name, object_key);
if target_ids.is_empty() {
debug!("No matching targets for event in bucket: {}", bucket_name);
return;
}
let target_ids_len = target_ids.len();
let mut handles = vec![];
// Use scope to limit the borrow scope of target_list
let target_list_guard = self.target_list.read().await;
info!("Sending event to targets: {:?}", target_ids);
for target_id in target_ids {
// `get` now returns Option<Arc<dyn Target + Send + Sync>>
if let Some(target_arc) = target_list_guard.get(&target_id) {
// Clone an Arc<Box<dyn Target>> (which is where target_list is stored) to move into an asynchronous task
// target_arc is already Arc, clone it for the async task
let target_for_task = target_arc.clone();
let limiter = self.send_limiter.clone();
let event_clone = event.clone();
let target_name_for_task = target_for_task.name(); // Get the name before generating the task
debug!("Preparing to send event to target: {}", target_name_for_task);
// Use cloned data in closures to avoid borrowing conflicts
// Create an EntityTarget from the event
let entity_target: Arc<EntityTarget<Event>> = Arc::new(EntityTarget {
object_name: object_key.to_string(),
bucket_name: bucket_name.to_string(),
event_name,
data: event_clone.as_ref().clone(),
});
let handle = tokio::spawn(async move {
let _permit = match limiter.acquire_owned().await {
Ok(p) => p,
Err(e) => {
error!("Failed to acquire send permit for target {}: {}", target_name_for_task, e);
return;
}
};
if let Err(e) = target_for_task.save(entity_target.clone()).await {
error!("Failed to send event to target {}: {}", target_name_for_task, e);
// Use scope to limit the borrow scope of target_list
{
let target_list_guard = self.target_list.read().await;
info!("Sending event to targets: {:?}", target_ids);
for target_id in target_ids {
// `get` now returns Option<Arc<dyn Target + Send + Sync>>
if let Some(target_arc) = target_list_guard.get(&target_id) {
// Clone an Arc<Box<dyn Target>> (which is where target_list is stored) to move into an asynchronous task
// target_arc is already Arc, clone it for the async task
let cloned_target_for_task = target_arc.clone();
let event_clone = event.clone();
let target_name_for_task = cloned_target_for_task.name(); // Get the name before generating the task
debug!("Preparing to send event to target: {}", target_name_for_task);
// Use cloned data in closures to avoid borrowing conflicts
// Create an EntityTarget from the event
let entity_target: Arc<EntityTarget<Event>> = Arc::new(EntityTarget {
object_name: object_key.to_string(),
bucket_name: bucket_name.to_string(),
event_name,
data: event_clone.clone().as_ref().clone(),
});
let handle = tokio::spawn(async move {
if let Err(e) = cloned_target_for_task.save(entity_target.clone()).await {
error!("Failed to send event to target {}: {}", target_name_for_task, e);
} else {
debug!("Successfully saved event to target {}", target_name_for_task);
}
});
handles.push(handle);
} else {
debug!("Successfully saved event to target {}", target_name_for_task);
warn!("Target ID {:?} found in rules but not in target list.", target_id);
}
});
handles.push(handle);
} else {
warn!("Target ID {:?} found in rules but not in target list.", target_id);
}
// target_list is automatically released here
}
}
// target_list is automatically released here
drop(target_list_guard);
// Wait for all tasks to be completed
for handle in handles {
if let Err(e) = handle.await {
error!("Task for sending/saving event failed: {}", e);
// Wait for all tasks to be completed
for handle in handles {
if let Err(e) = handle.await {
error!("Task for sending/saving event failed: {}", e);
}
}
info!("Event processing initiated for {} targets for bucket: {}", target_ids_len, bucket_name);
} else {
debug!("No rules found for bucket: {}", bucket_name);
}
info!("Event processing initiated for {} targets for bucket: {}", target_ids_len, bucket_name);
}
/// Initializes the targets for buckets
///
/// # Arguments
/// * `targets_to_init` - A vector of boxed targets to initialize
///
/// # Returns
/// Returns `Ok(())` if initialization is successful, otherwise returns a `NotificationError`.
#[instrument(skip(self, targets_to_init))]
pub async fn init_bucket_targets(
&self,
@@ -282,7 +218,6 @@ impl EventNotifier {
/// A thread-safe list of targets
pub struct TargetList {
/// Map of TargetID to Target
targets: HashMap<TargetID, Arc<dyn Target<Event> + Send + Sync>>,
}
@@ -299,12 +234,6 @@ impl TargetList {
}
/// Adds a target to the list
///
/// # Arguments
/// * `target` - The target to add
///
/// # Returns
/// Returns `Ok(())` if the target was added successfully, or a `NotificationError` if an error occurred.
pub fn add(&mut self, target: Arc<dyn Target<Event> + Send + Sync>) -> Result<(), NotificationError> {
let id = target.id();
if self.targets.contains_key(&id) {
@@ -322,12 +251,6 @@ impl TargetList {
/// Removes a target by ID. Note: This does not stop its associated event stream.
/// Stream cancellation should be handled by EventNotifier.
///
/// # Arguments
/// * `id` - The ID of the target to remove
///
/// # Returns
/// Returns the removed target if it existed, otherwise `None`.
pub async fn remove_target_only(&mut self, id: &TargetID) -> Option<Arc<dyn Target<Event> + Send + Sync>> {
if let Some(target_arc) = self.targets.remove(id) {
if let Err(e) = target_arc.close().await {
@@ -355,12 +278,6 @@ impl TargetList {
}
/// Returns a target by ID
///
/// # Arguments
/// * `id` - The ID of the target to retrieve
///
/// # Returns
/// Returns the target if it exists, otherwise `None`.
pub fn get(&self, id: &TargetID) -> Option<Arc<dyn Target<Event> + Send + Sync>> {
self.targets.get(id).cloned()
}
@@ -375,7 +292,7 @@ impl TargetList {
self.targets.len()
}
/// is_empty can be derived from len()
// is_empty can be derived from len()
pub fn is_empty(&self) -> bool {
self.targets.is_empty()
}

View File

@@ -15,60 +15,13 @@
use super::rules_map::RulesMap;
use super::xml_config::ParseConfigError as BucketNotificationConfigError;
use crate::rules::NotificationConfiguration;
use crate::rules::subscriber_snapshot::{BucketRulesSnapshot, DynRulesContainer, RuleEvents, RulesContainer};
use crate::rules::pattern_rules;
use crate::rules::target_id_set;
use hashbrown::HashMap;
use rustfs_targets::EventName;
use rustfs_targets::arn::TargetID;
use serde::{Deserialize, Serialize};
use std::io::Read;
use std::sync::Arc;
/// A "rule view", only used for snapshot mask/consistency verification.
/// Here we choose to generate the view by "single event" to ensure that event_mask calculation is reliable and simple.
#[derive(Debug)]
struct RuleView {
events: Vec<EventName>,
}
impl RuleEvents for RuleView {
fn subscribed_events(&self) -> &[EventName] {
&self.events
}
}
/// Adapt RulesMap to RulesContainer.
/// Key point: The items returned by iter_rules are &dyn RuleEvents, so a RuleView list is cached in the container.
#[derive(Debug)]
struct CompiledRules {
// Keep RulesMap (can be used later if you want to make more complex judgments during the snapshot reading phase)
#[allow(dead_code)]
rules_map: RulesMap,
// for RulesContainer::iter_rules
rule_views: Vec<RuleView>,
}
impl CompiledRules {
fn from_rules_map(rules_map: &RulesMap) -> Self {
let mut rule_views = Vec::new();
for ev in rules_map.iter_events() {
rule_views.push(RuleView { events: vec![ev] });
}
Self {
rules_map: rules_map.clone(),
rule_views,
}
}
}
impl RulesContainer for CompiledRules {
type Rule = dyn RuleEvents;
fn iter_rules<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Self::Rule> + 'a> {
// Key: Convert &RuleView into &dyn RuleEvents
Box::new(self.rule_views.iter().map(|v| v as &dyn RuleEvents))
}
}
/// Configuration for bucket notifications.
/// This struct now holds the parsed and validated rules in the new RulesMap format.
@@ -166,26 +119,11 @@ impl BucketNotificationConfig {
pub fn set_region(&mut self, region: &str) {
self.region = region.to_string();
}
}
/// Compiles the current BucketNotificationConfig into a BucketRulesSnapshot.
/// This involves transforming the rules into a format suitable for runtime use,
/// and calculating the event mask based on the subscribed events of the rules.
///
/// # Returns
/// A BucketRulesSnapshot containing the compiled rules and event mask.
pub fn compile_snapshot(&self) -> BucketRulesSnapshot<DynRulesContainer> {
// 1) Generate container from RulesMap
let compiled = CompiledRules::from_rules_map(self.get_rules_map());
let rules: Arc<DynRulesContainer> = Arc::new(compiled) as Arc<DynRulesContainer>;
// 2) Calculate event_mask
let mut mask = 0u64;
for rule in rules.iter_rules() {
for ev in rule.subscribed_events() {
mask |= ev.mask();
}
}
BucketRulesSnapshot { event_mask: mask, rules }
// Add a helper to PatternRules if not already present
impl pattern_rules::PatternRules {
pub fn inner(&self) -> &HashMap<String, target_id_set::TargetIdSet> {
&self.rules
}
}

View File

@@ -12,24 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod config;
pub mod pattern;
mod pattern_rules;
mod rules_map;
mod subscriber_index;
mod subscriber_snapshot;
mod target_id_set;
pub mod pattern_rules;
pub mod rules_map;
pub mod target_id_set;
pub mod xml_config; // For XML structure definition and parsing
// Definition and parsing for BucketNotificationConfig
pub mod config; // Definition and parsing for BucketNotificationConfig
// Re-export key types from submodules for easy access to `crate::rules::TypeName`
// Re-export key types from submodules for external use
pub use config::BucketNotificationConfig;
// Assume that BucketNotificationConfigError is also defined in config.rs
// Or if it is still an alias for xml_config::ParseConfigError , adjust accordingly
pub use xml_config::ParseConfigError as BucketNotificationConfigError;
pub use pattern_rules::PatternRules;
pub use rules_map::RulesMap;
pub use subscriber_index::*;
pub use subscriber_snapshot::*;
pub use target_id_set::TargetIdSet;
pub use xml_config::{NotificationConfiguration, ParseConfigError, ParseConfigError as BucketNotificationConfigError};
pub use xml_config::{NotificationConfiguration, ParseConfigError};

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::rules::TargetIdSet;
use crate::rules::pattern;
use super::pattern;
use super::target_id_set::TargetIdSet;
use hashbrown::HashMap;
use rayon::prelude::*;
use rustfs_targets::arn::TargetID;
@@ -27,69 +27,31 @@ pub struct PatternRules {
}
impl PatternRules {
/// Create a new, empty PatternRules.
pub fn new() -> Self {
Default::default()
}
/// Add rules: Pattern and Target ID.
/// If the schema already exists, add target_id to the existing TargetIdSet.
///
/// # Arguments
/// * `pattern` - The object name pattern.
/// * `target_id` - The TargetID to associate with the pattern.
pub fn add(&mut self, pattern: String, target_id: TargetID) {
self.rules.entry(pattern).or_default().insert(target_id);
}
/// Checks if there are any rules that match the given object name.
///
/// # Arguments
/// * `object_name` - The object name to match against the patterns.
///
/// # Returns
/// `true` if any pattern matches the object name, otherwise `false`.
pub fn match_simple(&self, object_name: &str) -> bool {
self.rules.keys().any(|p| pattern::match_simple(p, object_name))
}
/// Returns all TargetIDs that match the object name.
///
/// Performance optimization points:
/// 1) Small collections are serialized directly to avoid rayon scheduling/merging overhead
/// 2) When hitting, no longer temporarily allocate TargetIdSet for each rule, but directly extend
///
/// # Arguments
/// * `object_name` - The object name to match against the patterns.
///
/// # Returns
/// A TargetIdSet containing all TargetIDs that match the object name.
pub fn match_targets(&self, object_name: &str) -> TargetIdSet {
let n = self.rules.len();
if n == 0 {
return TargetIdSet::new();
}
// Experience Threshold: Serial is usually faster below this value (can be adjusted after benchmarking)
const PAR_THRESHOLD: usize = 128;
if n < PAR_THRESHOLD {
let mut out = TargetIdSet::new();
for (pattern_str, target_set) in self.rules.iter() {
if pattern::match_simple(pattern_str, object_name) {
out.extend(target_set.iter().cloned());
}
}
return out;
}
// Parallel path: Each thread accumulates a local set and finally merges it to reduce frequent allocations
self.rules
.par_iter()
.fold(TargetIdSet::new, |mut local, (pattern_str, target_set)| {
.filter_map(|(pattern_str, target_set)| {
if pattern::match_simple(pattern_str, object_name) {
local.extend(target_set.iter().cloned());
Some(target_set.iter().cloned().collect::<TargetIdSet>())
} else {
None
}
local
})
.reduce(TargetIdSet::new, |mut acc, set| {
acc.extend(set);
@@ -103,11 +65,6 @@ impl PatternRules {
/// Merge another PatternRules.
/// Corresponding to Go's `Rules.Union`.
/// # Arguments
/// * `other` - The PatternRules to merge with.
///
/// # Returns
/// A new PatternRules containing the union of both.
pub fn union(&self, other: &Self) -> Self {
let mut new_rules = self.clone();
for (pattern, their_targets) in &other.rules {
@@ -119,13 +76,6 @@ impl PatternRules {
/// Calculate the difference from another PatternRules.
/// Corresponding to Go's `Rules.Difference`.
/// The result contains only the patterns and TargetIDs that are in `self` but not in `other`.
///
/// # Arguments
/// * `other` - The PatternRules to compare against.
///
/// # Returns
/// A new PatternRules containing the difference.
pub fn difference(&self, other: &Self) -> Self {
let mut result_rules = HashMap::new();
for (pattern, self_targets) in &self.rules {
@@ -144,59 +94,4 @@ impl PatternRules {
}
PatternRules { rules: result_rules }
}
/// Merge another PatternRules into self in place.
/// Corresponding to Go's `Rules.UnionInPlace`.
/// # Arguments
/// * `other` - The PatternRules to merge with.
pub fn union_in_place(&mut self, other: &Self) {
for (pattern, their_targets) in &other.rules {
self.rules
.entry(pattern.clone())
.or_default()
.extend(their_targets.iter().cloned());
}
}
/// Calculate the difference from another PatternRules in place.
/// Corresponding to Go's `Rules.DifferenceInPlace`.
/// The result contains only the patterns and TargetIDs that are in `self` but not in `other`.
/// # Arguments
/// * `other` - The PatternRules to compare against.
pub fn difference_in_place(&mut self, other: &Self) {
self.rules.retain(|pattern, self_targets| {
if let Some(other_targets) = other.rules.get(pattern) {
// Remove other_targets from self_targets
self_targets.retain(|tid| !other_targets.contains(tid));
}
!self_targets.is_empty()
});
}
/// Remove a pattern and its associated TargetID set from the PatternRules.
///
/// # Arguments
/// * `pattern` - The pattern to remove.
pub fn remove_pattern(&mut self, pattern: &str) -> bool {
self.rules.remove(pattern).is_some()
}
/// Determine whether the current PatternRules contains the specified TargetID (referenced by any pattern).
///
/// # Parameters
/// * `target_id` - The TargetID to check for existence within the PatternRules
///
/// # Returns
/// * `true` if the TargetID exists in any of the patterns; `false` otherwise.
pub fn contains_target_id(&self, target_id: &TargetID) -> bool {
self.rules.values().any(|set| set.contains(target_id))
}
/// Expose the internal rules for use in scenarios such as BucketNotificationConfig::validate.
///
/// # Returns
/// A reference to the internal HashMap of patterns to TargetIdSets.
pub fn inner(&self) -> &HashMap<String, TargetIdSet> {
&self.rules
}
}

View File

@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::rules::{PatternRules, TargetIdSet};
use super::pattern_rules::PatternRules;
use super::target_id_set::TargetIdSet;
use hashbrown::HashMap;
use rustfs_targets::EventName;
use rustfs_targets::arn::TargetID;
@@ -30,9 +31,6 @@ pub struct RulesMap {
impl RulesMap {
/// Create a new, empty RulesMap.
///
/// # Returns
/// A new instance of RulesMap with an empty map and a total_events_mask set to 0.
pub fn new() -> Self {
Default::default()
}
@@ -69,12 +67,12 @@ impl RulesMap {
/// Merge another RulesMap.
/// `RulesMap.Add(rulesMap2 RulesMap) corresponding to Go
///
/// # Parameters
/// * `other_map` - The other RulesMap to be merged into the current one.
pub fn add_map(&mut self, other_map: &Self) {
for (event_name, other_pattern_rules) in &other_map.map {
self.map.entry(*event_name).or_default().union_in_place(other_pattern_rules);
let self_pattern_rules = self.map.entry(*event_name).or_default();
// PatternRules::union Returns the new PatternRules, we need to modify the existing ones
let merged_rules = self_pattern_rules.union(other_pattern_rules);
*self_pattern_rules = merged_rules;
}
// Directly merge two masks.
self.total_events_mask |= other_map.total_events_mask;
@@ -83,14 +81,11 @@ impl RulesMap {
/// Remove another rule defined in the RulesMap from the current RulesMap.
///
/// After the rule is removed, `total_events_mask` is recalculated to ensure its accuracy.
///
/// # Parameters
/// * `other_map` - The other RulesMap containing rules to be removed from the current one.
pub fn remove_map(&mut self, other_map: &Self) {
let mut events_to_remove = Vec::new();
for (event_name, self_pattern_rules) in &mut self.map {
if let Some(other_pattern_rules) = other_map.map.get(event_name) {
self_pattern_rules.difference_in_place(other_pattern_rules);
*self_pattern_rules = self_pattern_rules.difference(other_pattern_rules);
if self_pattern_rules.is_empty() {
events_to_remove.push(*event_name);
}
@@ -107,9 +102,6 @@ impl RulesMap {
///
/// This method uses a bitmask for a quick check of O(1) complexity.
/// `event_name` can be a compound type, such as `ObjectCreatedAll`.
///
/// # Parameters
/// * `event_name` - The event name to check for subscribers.
pub fn has_subscriber(&self, event_name: &EventName) -> bool {
// event_name.mask() will handle compound events correctly
(self.total_events_mask & event_name.mask()) != 0
@@ -120,54 +112,39 @@ impl RulesMap {
/// # Notice
/// The `event_name` parameter should be a specific, non-compound event type.
/// Because this is taken from the `Event` object that actually occurs.
///
/// # Parameters
/// * `event_name` - The specific event name to match against.
/// * `object_key` - The object key to match against the patterns in the rules.
///
/// # Returns
/// * A set of TargetIDs that match the given event and object key.
pub fn match_rules(&self, event_name: EventName, object_key: &str) -> TargetIdSet {
// Use bitmask to quickly determine whether there is a matching rule
if (self.total_events_mask & event_name.mask()) == 0 {
return TargetIdSet::new(); // No matching rules
}
// In Go, RulesMap[eventName] returns empty rules if the key doesn't exist.
// Rust's HashMap::get returns Option, so missing key means no rules.
// Compound events like ObjectCreatedAll are expanded into specific events during add_rule_config.
// Thus, queries should use specific event names.
// If event_name is compound, expansion happens at addition time.
// match_rules assumes event_name is already a specific event for lookup.
// Callers should expand compound events before calling this method.
// First try to directly match the event name
if let Some(pattern_rules) = self.map.get(&event_name) {
let targets = pattern_rules.match_targets(object_key);
if !targets.is_empty() {
return targets;
}
}
// Go's RulesMap[eventName] is directly retrieved, and if it does not exist, it is empty Rules.
// Rust's HashMap::get returns Option. If the event name does not exist, there is no rule.
// Compound events (such as ObjectCreatedAll) have been expanded as a single event when add_rule_config.
// Therefore, a single event name should be used when querying.
// If event_name itself is a single type, look it up directly.
// If event_name is a compound type, Go's logic is expanded when added.
// Here match_rules should receive events that may already be single.
// If the caller passes in a compound event, it should expand itself or handle this function first.
// Assume that event_name is already a specific event that can be used for searching.
self.map
.get(&event_name)
.map_or_else(TargetIdSet::new, |pr| pr.match_targets(object_key))
}
/// Check if RulesMap is empty.
///
/// # Returns
/// * `true` if there are no rules in the map; `false` otherwise
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
/// Determine whether the current RulesMap contains the specified TargetID (referenced by any event / pattern).
///
/// # Parameters
/// * `target_id` - The TargetID to check for existence within the RulesMap
///
/// # Returns
/// * `true` if the TargetID exists in any of the PatternRules; `false` otherwise.
pub fn contains_target_id(&self, target_id: &TargetID) -> bool {
self.map.values().any(|pr| pr.contains_target_id(target_id))
}
/// Returns a clone of internal rules for use in scenarios such as BucketNotificationConfig::validate.
///
/// # Returns
/// A reference to the internal HashMap of EventName to PatternRules.
pub fn inner(&self) -> &HashMap<EventName, PatternRules> {
&self.map
}
@@ -183,32 +160,18 @@ impl RulesMap {
}
/// Remove rules and optimize performance
///
/// # Parameters
/// * `event_name` - The EventName from which to remove the rule.
/// * `pattern` - The pattern of the rule to be removed.
#[allow(dead_code)]
pub fn remove_rule(&mut self, event_name: &EventName, pattern: &str) {
let mut remove_event = false;
if let Some(pattern_rules) = self.map.get_mut(event_name) {
pattern_rules.remove_pattern(pattern);
pattern_rules.rules.remove(pattern);
if pattern_rules.is_empty() {
remove_event = true;
self.map.remove(event_name);
}
}
if remove_event {
self.map.remove(event_name);
}
self.recalculate_mask(); // Delay calculation mask
}
/// Batch Delete Rules and Optimize Performance
///
/// # Parameters
/// * `event_names` - A slice of EventNames to be removed.
/// Batch Delete Rules
#[allow(dead_code)]
pub fn remove_rules(&mut self, event_names: &[EventName]) {
for event_name in event_names {
@@ -218,27 +181,9 @@ impl RulesMap {
}
/// Update rules and optimize performance
///
/// # Parameters
/// * `event_name` - The EventName to update.
/// * `pattern` - The pattern of the rule to be updated.
/// * `target_id` - The TargetID to be added.
#[allow(dead_code)]
pub fn update_rule(&mut self, event_name: EventName, pattern: String, target_id: TargetID) {
self.map.entry(event_name).or_default().add(pattern, target_id);
self.total_events_mask |= event_name.mask(); // Update only the relevant bitmask
}
/// Iterate all EventName keys contained in this RulesMap.
///
/// Used by snapshot compilation to compute bucket event_mask.
///
/// # Returns
/// An iterator over all EventName keys in the RulesMap.
#[inline]
pub fn iter_events(&self) -> impl Iterator<Item = EventName> + '_ {
// `inner()` is already used by config.rs, so we reuse it here.
// If the key type is `EventName`, `.copied()` is the cheapest way to return values.
self.inner().keys().copied()
}
}

View File

@@ -1,131 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::rules::{BucketRulesSnapshot, BucketSnapshotRef, DynRulesContainer};
use arc_swap::ArcSwap;
use rustfs_targets::EventName;
use starshard::ShardedHashMap;
use std::fmt;
use std::sync::Arc;
/// A global bucket -> snapshot index.
///
/// Read path: lock-free load (ArcSwap)
/// Write path: atomic replacement after building a new snapshot
pub struct SubscriberIndex {
// Use starshard for sharding to reduce lock competition when the number of buckets is large
inner: ShardedHashMap<String, Arc<ArcSwap<BucketRulesSnapshot<DynRulesContainer>>>>,
// Cache an "empty rule container" for empty snapshots (avoids building every time)
empty_rules: Arc<DynRulesContainer>,
}
/// Avoid deriving fields that do not support Debug
impl fmt::Debug for SubscriberIndex {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SubscriberIndex").finish_non_exhaustive()
}
}
impl SubscriberIndex {
/// Create a new SubscriberIndex.
///
/// # Arguments
/// * `empty_rules` - An Arc to an empty rules container used for empty snapshots
///
/// Returns a new instance of SubscriberIndex.
pub fn new(empty_rules: Arc<DynRulesContainer>) -> Self {
Self {
inner: ShardedHashMap::new(64),
empty_rules,
}
}
/// Get the current snapshot of a bucket.
/// If it does not exist, return empty snapshot.
///
/// # Arguments
/// * `bucket` - The name of the bucket to load.
///
/// Returns the snapshot reference for the specified bucket.
pub fn load_snapshot(&self, bucket: &str) -> BucketSnapshotRef {
match self.inner.get(&bucket.to_string()) {
Some(cell) => cell.load_full(),
None => Arc::new(BucketRulesSnapshot::empty(self.empty_rules.clone())),
}
}
/// Quickly determine whether the bucket has a subscription to an event.
/// This judgment can be consistent with subsequent rule matching when reading the same snapshot.
///
/// # Arguments
/// * `bucket` - The name of the bucket to check.
/// * `event` - The event name to check for subscriptions.
///
/// Returns `true` if there are subscribers for the event, `false` otherwise.
#[inline]
pub fn has_subscriber(&self, bucket: &str, event: &EventName) -> bool {
let snap = self.load_snapshot(bucket);
if snap.event_mask == 0 {
return false;
}
snap.has_event(event)
}
/// Atomically update a bucket's snapshot (whole package replacement).
///
/// - The caller first builds the complete `BucketRulesSnapshot` (including event\_mask and rules).
/// - This method ensures that the read path will not observe intermediate states.
///
/// # Arguments
/// * `bucket` - The name of the bucket to update.
/// * `new_snapshot` - The new snapshot to store for the bucket.
pub fn store_snapshot(&self, bucket: &str, new_snapshot: BucketRulesSnapshot<DynRulesContainer>) {
let key = bucket.to_string();
let cell = self.inner.get(&key).unwrap_or_else(|| {
// Insert a default cell (empty snapshot)
let init = Arc::new(ArcSwap::from_pointee(BucketRulesSnapshot::empty(self.empty_rules.clone())));
self.inner.insert(key.clone(), init.clone());
init
});
cell.store(Arc::new(new_snapshot));
}
/// Delete the bucket's subscription view (make it empty).
///
/// # Arguments
/// * `bucket` - The name of the bucket to clear.
pub fn clear_bucket(&self, bucket: &str) {
if let Some(cell) = self.inner.get(&bucket.to_string()) {
cell.store(Arc::new(BucketRulesSnapshot::empty(self.empty_rules.clone())));
}
}
}
impl Default for SubscriberIndex {
fn default() -> Self {
// An available empty rule container is required; here it is implemented using minimal empty
#[derive(Debug)]
struct EmptyRules;
impl crate::rules::subscriber_snapshot::RulesContainer for EmptyRules {
type Rule = dyn crate::rules::subscriber_snapshot::RuleEvents;
fn iter_rules<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Self::Rule> + 'a> {
Box::new(std::iter::empty())
}
}
Self::new(Arc::new(EmptyRules) as Arc<DynRulesContainer>)
}
}

View File

@@ -1,117 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_targets::EventName;
use std::sync::Arc;
/// Let the rules structure provide "what events it is subscribed to".
/// This way BucketRulesSnapshot does not need to know the internal shape of rules.
pub trait RuleEvents {
fn subscribed_events(&self) -> &[EventName];
}
/// Let the rules container provide the ability to iterate over all rules (abstracting only to the minimum necessary).
pub trait RulesContainer {
type Rule: RuleEvents + ?Sized;
fn iter_rules<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Self::Rule> + 'a>;
/// Fast empty judgment for snapshots (fix missing `rules.is_empty()`)
fn is_empty(&self) -> bool {
self.iter_rules().next().is_none()
}
}
/// Represents a bucket's notification subscription view snapshot (immutable).
///
/// - `event_mask`: Quickly determine whether there is a subscription to a certain type of event (bitset/flags).
/// - `rules`: precise rule mapping (prefix/suffix/pattern -> targets).
///
/// The read path only reads this snapshot to ensure consistency.
#[derive(Debug, Clone)]
pub struct BucketRulesSnapshot<R>
where
R: RulesContainer + ?Sized,
{
pub event_mask: u64,
pub rules: Arc<R>,
}
impl<R> BucketRulesSnapshot<R>
where
R: RulesContainer + ?Sized,
{
/// Create an empty snapshot with no subscribed events and no rules.
///
/// # Arguments
/// * `rules` - An Arc to a rules container (can be an empty container).
///
/// # Returns
/// An instance of `BucketRulesSnapshot` with an empty event mask.
#[inline]
pub fn empty(rules: Arc<R>) -> Self {
Self { event_mask: 0, rules }
}
/// Check if the snapshot has any subscribers for the specified event.
///
/// # Arguments
/// * `event` - The event name to check for subscriptions.
///
/// # Returns
/// `true` if there are subscribers for the event, `false` otherwise.
#[inline]
pub fn has_event(&self, event: &EventName) -> bool {
(self.event_mask & event.mask()) != 0
}
/// Check if the snapshot is empty (no subscribed events or rules).
///
/// # Returns
/// `true` if the snapshot is empty, `false` otherwise.
#[inline]
pub fn is_empty(&self) -> bool {
self.event_mask == 0 || self.rules.is_empty()
}
/// [debug] Assert that `event_mask` is consistent with the event declared in `rules`.
///
/// Constraints:
/// - only runs in debug builds (release incurs no cost).
/// - If the rule contains compound events (\*All / Everything), rely on `EventName::mask()` to automatically expand.
#[inline]
pub fn debug_assert_mask_consistent(&self) {
#[cfg(debug_assertions)]
{
let mut recomputed = 0u64;
for rule in self.rules.iter_rules() {
for ev in rule.subscribed_events() {
recomputed |= ev.mask();
}
}
debug_assert!(
recomputed == self.event_mask,
"BucketRulesSnapshot.event_mask inconsistent: stored={:#x}, recomputed={:#x}",
self.event_mask,
recomputed
);
}
}
}
/// Unify trait-object snapshot types (fix Sized / missing generic arguments)
pub type DynRulesContainer = dyn RulesContainer<Rule = dyn RuleEvents> + Send + Sync;
/// Expose Arc form to facilitate sharing.
pub type BucketSnapshotRef = Arc<BucketRulesSnapshot<DynRulesContainer>>;

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::rules::pattern;
use super::pattern;
use hashbrown::HashSet;
use rustfs_targets::EventName;
use rustfs_targets::arn::{ARN, ArnError, TargetIDError};

View File

@@ -13,23 +13,18 @@
// limitations under the License.
use crate::{Event, integration::NotificationMetrics};
use rustfs_targets::{
StoreError, Target, TargetError,
store::{Key, Store},
target::EntityTarget,
};
use rustfs_targets::StoreError;
use rustfs_targets::Target;
use rustfs_targets::TargetError;
use rustfs_targets::store::{Key, Store};
use rustfs_targets::target::EntityTarget;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::{Semaphore, mpsc};
use tokio::time::sleep;
use tracing::{debug, error, info, warn};
/// Streams events from the store to the target with retry logic
///
/// # Arguments
/// - `store`: The event store
/// - `target`: The target to send events to
/// - `cancel_rx`: Receiver to listen for cancellation signals
/// Streams events from the store to the target
pub async fn stream_events(
store: &mut (dyn Store<Event, Error = StoreError, Key = Key> + Send),
target: &dyn Target<Event>,
@@ -72,7 +67,6 @@ pub async fn stream_events(
match target.send_from_store(key.clone()).await {
Ok(_) => {
info!("Successfully sent event for target: {}", target.name());
// send_from_store deletes the event from store on success
success = true;
}
Err(e) => {
@@ -110,13 +104,6 @@ pub async fn stream_events(
}
/// Starts the event streaming process for a target
///
/// # Arguments
/// - `store`: The event store
/// - `target`: The target to send events to
///
/// # Returns
/// A sender to signal cancellation of the event stream
pub fn start_event_stream(
mut store: Box<dyn Store<Event, Error = StoreError, Key = Key> + Send>,
target: Arc<dyn Target<Event> + Send + Sync>,
@@ -132,15 +119,6 @@ pub fn start_event_stream(
}
/// Start event stream with batch processing
///
/// # Arguments
/// - `store`: The event store
/// - `target`: The target to send events to clients
/// - `metrics`: Metrics for monitoring
/// - `semaphore`: Semaphore to limit concurrency
///
/// # Returns
/// A sender to signal cancellation of the event stream
pub fn start_event_stream_with_batching(
mut store: Box<dyn Store<EntityTarget<Event>, Error = StoreError, Key = Key> + Send>,
target: Arc<dyn Target<Event> + Send + Sync>,
@@ -158,16 +136,6 @@ pub fn start_event_stream_with_batching(
}
/// Event stream processing with batch processing
///
/// # Arguments
/// - `store`: The event store
/// - `target`: The target to send events to clients
/// - `cancel_rx`: Receiver to listen for cancellation signals
/// - `metrics`: Metrics for monitoring
/// - `semaphore`: Semaphore to limit concurrency
///
/// # Notes
/// This function processes events in batches to improve efficiency.
pub async fn stream_events_with_batching(
store: &mut (dyn Store<EntityTarget<Event>, Error = StoreError, Key = Key> + Send),
target: &dyn Target<Event>,
@@ -263,17 +231,7 @@ pub async fn stream_events_with_batching(
}
}
/// Processing event batches for targets
/// # Arguments
/// - `batch`: The batch of events to process
/// - `batch_keys`: The corresponding keys of the events in the batch
/// - `target`: The target to send events to clients
/// - `max_retries`: Maximum number of retries for sending an event
/// - `base_delay`: Base delay duration for retries
/// - `metrics`: Metrics for monitoring
/// - `semaphore`: Semaphore to limit concurrency
/// # Notes
/// This function processes a batch of events, sending each event to the target with retry
/// Processing event batches
async fn process_batch(
batch: &mut Vec<EntityTarget<Event>>,
batch_keys: &mut Vec<Key>,
@@ -304,7 +262,6 @@ async fn process_batch(
// Retry logic
while retry_count < max_retries && !success {
// After sending successfully, the event in the storage is deleted synchronously.
match target.send_from_store(key.clone()).await {
Ok(_) => {
info!("Successfully sent event for target: {}, Key: {}", target.name(), key.to_string());

View File

@@ -39,9 +39,9 @@ use rustfs_config::{
ENV_OBS_LOG_DIRECTORY, ENV_OBS_LOG_FLUSH_MS, ENV_OBS_LOG_MESSAGE_CAPA, ENV_OBS_LOG_POOL_CAPA,
},
};
use rustfs_utils::{get_env_opt_str, get_env_u64, get_env_usize, get_local_ip_with_default};
use rustfs_utils::{get_env_u64, get_env_usize, get_local_ip_with_default};
use smallvec::SmallVec;
use std::{borrow::Cow, fs, io::IsTerminal, time::Duration};
use std::{borrow::Cow, env, fs, io::IsTerminal, time::Duration};
use tracing::info;
use tracing_error::ErrorLayer;
use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer};
@@ -574,8 +574,8 @@ pub(crate) fn init_telemetry(config: &OtelConfig) -> Result<OtelGuard, Telemetry
}
// Rule 2: The user has explicitly customized the log directory (determined by whether ENV_OBS_LOG_DIRECTORY is set)
let user_set_log_dir = get_env_opt_str(ENV_OBS_LOG_DIRECTORY);
if user_set_log_dir.filter(|d| !d.is_empty()).is_some() {
let user_set_log_dir = env::var(ENV_OBS_LOG_DIRECTORY).is_ok();
if user_set_log_dir {
return init_file_logging(config, logger_level, is_production);
}

View File

@@ -89,7 +89,6 @@ pub enum Error {
#[error("invalid access_key")]
InvalidAccessKey,
#[error("action not allowed")]
IAMActionNotAllowed,
@@ -107,9 +106,6 @@ pub enum Error {
#[error("io error: {0}")]
Io(std::io::Error),
#[error("system already initialized")]
IamSysAlreadyInitialized,
}
impl Error {

View File

@@ -16,7 +16,7 @@
mod generated;
use proto_gen::node_service::node_service_client::NodeServiceClient;
use rustfs_common::{GLOBAL_CONN_MAP, GLOBAL_ROOT_CERT, evict_connection};
use rustfs_common::globals::{GLOBAL_CONN_MAP, GLOBAL_ROOT_CERT, evict_connection};
use std::{error::Error, time::Duration};
use tonic::{
Request, Status,
@@ -26,11 +26,6 @@ use tonic::{
};
use tracing::{debug, warn};
// Type alias for the complex client type
pub type NodeServiceClientType = NodeServiceClient<
InterceptedService<Channel, Box<dyn Fn(Request<()>) -> Result<Request<()>, Status> + Send + Sync + 'static>>,
>;
pub use generated::*;
// Default 100 MB

View File

@@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use datafusion::{common::DataFusionError, sql::sqlparser::parser::ParserError};
use snafu::{Backtrace, Location, Snafu};
use std::fmt::Display;
pub mod object_store;
pub mod query;

View File

@@ -0,0 +1,13 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

View File

@@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use s3s::dto::SelectObjectContentInput;
use std::sync::Arc;
use s3s::dto::SelectObjectContentInput;
pub mod analyzer;
pub mod ast;
pub mod datasource;
pub mod dispatcher;
pub mod execution;
pub mod function;

View File

@@ -12,17 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::query::Context;
use crate::{QueryError, QueryResult, object_store::EcObjectStore};
use std::sync::Arc;
use datafusion::{
execution::{SessionStateBuilder, context::SessionState, runtime_env::RuntimeEnvBuilder},
parquet::data_type::AsBytes,
prelude::SessionContext,
};
use object_store::{ObjectStore, memory::InMemory, path::Path};
use std::sync::Arc;
use tracing::error;
use crate::{QueryError, QueryResult, object_store::EcObjectStore};
use super::Context;
#[derive(Clone)]
pub struct SessionCtx {
_desc: Arc<SessionCtxDesc>,

View File

@@ -65,7 +65,6 @@ tracing = { workspace = true }
transform-stream = { workspace = true, optional = true }
url = { workspace = true, optional = true }
zstd = { workspace = true, optional = true }
ipnet = { workspace = true, optional = true }
[dev-dependencies]
tempfile = { workspace = true }
@@ -93,5 +92,5 @@ hash = ["dep:highway", "dep:md-5", "dep:sha2", "dep:blake3", "dep:serde", "dep:s
os = ["dep:nix", "dep:tempfile", "winapi"] # operating system utilities
integration = [] # integration test features
sys = ["dep:sysinfo"] # system information features
http = ["dep:convert_case", "dep:http", "dep:regex", "dep:ipnet"] # http utilities
http = ["dep:convert_case", "dep:http", "dep:regex"]
full = ["ip", "tls", "net", "io", "hash", "os", "integration", "path", "crypto", "string", "compress", "sys", "notify", "http"] # all features

View File

@@ -13,10 +13,9 @@
// limitations under the License.
use http::HeaderMap;
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
use regex::Regex;
use std::env;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::net::SocketAddr;
use std::str::FromStr;
use std::sync::LazyLock;
@@ -46,100 +45,6 @@ fn is_xff_header_enabled() -> bool {
== "on"
}
/// TrustedProxies holds configuration for validating proxy sources
#[derive(Debug, Clone)]
pub struct TrustedProxies {
/// List of trusted proxy IP networks (CIDR format)
pub cidrs: Vec<IpNet>,
/// Whether to enable proxy validation
pub enable_validation: bool,
/// Maximum allowed proxy chain length
pub max_chain_length: usize,
}
impl TrustedProxies {
/// Create a new TrustedProxies configuration
pub fn new(cidrs: Vec<String>, enable_validation: bool, max_chain_length: usize) -> Self {
let cidrs = cidrs.into_iter().filter_map(|s| s.parse::<IpNet>().ok()).collect();
Self {
cidrs,
enable_validation,
max_chain_length,
}
}
/// Check if an IP address is within the trusted proxy ranges
pub fn is_trusted_proxy(&self, ip: IpAddr) -> bool {
if !self.enable_validation {
return true; // Backward compatibility: trust all when disabled
}
self.cidrs.iter().any(|net| net.contains(&ip))
}
}
impl Default for TrustedProxies {
fn default() -> Self {
Self {
cidrs: vec![],
enable_validation: true,
max_chain_length: 10,
}
}
}
/// Validate if an IP string represents a valid client IP
/// Returns false for private/loopback addresses and invalid formats
fn is_valid_client_ip(ip_str: &str, max_chain_length: usize) -> bool {
// Handle X-Forwarded-For chains
if ip_str.contains(',') {
let parts: Vec<&str> = ip_str.split(',').map(|s| s.trim()).collect();
if parts.len() > max_chain_length {
return false;
}
// Validate each IP in the chain
for part in parts {
if !is_valid_single_ip(part) {
return false;
}
}
return true;
}
is_valid_single_ip(ip_str)
}
/// Validate a single IP address string
fn is_valid_single_ip(ip_str: &str) -> bool {
match ip_str.parse::<IpAddr>() {
Ok(ip) => {
// Reject private and loopback addresses as client IPs
// (they should come from trusted proxies only)
!is_private(ip) && !ip.is_loopback()
}
Err(_) => false,
}
}
/// Check if an IP address is private
///
/// # Arguments
/// * `ip` - The IP address to check
///
/// # Returns
/// A `bool` indicating whether the IP is private
///
fn is_private(ip: IpAddr) -> bool {
match ip {
IpAddr::V4(ipv4) => ipv4.is_private(),
IpAddr::V6(ipv6) => {
// Check if it's in fc00::/7 (Unique Local Address)
let octets = ipv6.octets();
(octets[0] & 0xfe) == 0xfc
}
}
}
/// GetSourceScheme retrieves the scheme from the X-Forwarded-Proto and RFC7239
/// Forwarded headers (in that order).
///
@@ -242,43 +147,18 @@ pub fn get_source_ip_from_headers(headers: &HeaderMap) -> Option<String> {
addr
}
/// GetSourceIPRaw retrieves the IP from the request headers with trusted proxy validation
/// and falls back to peer_addr when necessary.
/// GetSourceIPRaw retrieves the IP from the request headers
/// and falls back to remote_addr when necessary.
/// however returns without bracketing.
///
/// # Arguments
/// * `headers` - HTTP headers from the request
/// * `peer_addr` - Peer IP address from the connection
/// * `trusted_proxies` - Trusted proxy configuration
/// * `remote_addr` - Remote address as a string
///
/// # Returns
/// A `String` containing the validated source IP address
/// A `String` containing the source IP address
///
pub fn get_source_ip_raw(headers: &HeaderMap, peer_addr: IpAddr, trusted_proxies: &TrustedProxies) -> String {
// If validation is disabled, use legacy behavior for backward compatibility
if !trusted_proxies.enable_validation {
let remote_addr_str = peer_addr.to_string();
return get_source_ip_raw_legacy(headers, &remote_addr_str);
}
// Check if the direct connection is from a trusted proxy
if trusted_proxies.is_trusted_proxy(peer_addr) {
// Trusted proxy: try to get real client IP from headers
if let Some(header_ip) = get_source_ip_from_headers(headers) {
// Validate the IP from headers
if is_valid_client_ip(&header_ip, trusted_proxies.max_chain_length) {
return header_ip;
}
// If header IP is invalid, log warning and fall back to peer
tracing::warn!("Invalid client IP in headers from trusted proxy {}: {}", peer_addr, header_ip);
}
}
// Untrusted source or no valid header IP: use connection peer address
peer_addr.to_string()
}
/// Legacy GetSourceIPRaw for backward compatibility when validation is disabled
fn get_source_ip_raw_legacy(headers: &HeaderMap, remote_addr: &str) -> String {
pub fn get_source_ip_raw(headers: &HeaderMap, remote_addr: &str) -> String {
let addr = get_source_ip_from_headers(headers).unwrap_or_else(|| remote_addr.to_string());
// Default to remote address if headers not set.
@@ -289,20 +169,19 @@ fn get_source_ip_raw_legacy(headers: &HeaderMap, remote_addr: &str) -> String {
}
}
/// GetSourceIP retrieves the IP from the request headers with trusted proxy validation
/// and falls back to peer_addr when necessary.
/// GetSourceIP retrieves the IP from the request headers
/// and falls back to remote_addr when necessary.
/// It brackets IPv6 addresses.
///
/// # Arguments
/// * `headers` - HTTP headers from the request
/// * `peer_addr` - Peer IP address from the connection
/// * `trusted_proxies` - Trusted proxy configuration
/// * `remote_addr` - Remote address as a string
///
/// # Returns
/// A `String` containing the source IP address, with IPv6 addresses bracketed
///
pub fn get_source_ip(headers: &HeaderMap, peer_addr: IpAddr, trusted_proxies: &TrustedProxies) -> String {
let addr = get_source_ip_raw(headers, peer_addr, trusted_proxies);
pub fn get_source_ip(headers: &HeaderMap, remote_addr: &str) -> String {
let addr = get_source_ip_raw(headers, remote_addr);
if addr.contains(':') { format!("[{addr}]") } else { addr }
}
@@ -331,58 +210,18 @@ mod tests {
}
#[test]
fn test_trusted_proxies_validation() {
let trusted_proxies = TrustedProxies::new(vec!["192.168.1.0/24".to_string(), "10.0.0.0/8".to_string()], true, 5);
// Trusted IPs
assert!(trusted_proxies.is_trusted_proxy("192.168.1.1".parse().unwrap()));
assert!(trusted_proxies.is_trusted_proxy("10.1.1.1".parse().unwrap()));
// Untrusted IPs
assert!(!trusted_proxies.is_trusted_proxy("203.0.113.1".parse().unwrap()));
}
#[test]
fn test_get_source_ip_raw_with_trusted_proxy() {
let mut headers = HeaderMap::new();
headers.insert("x-forwarded-for", HeaderValue::from_static("203.0.113.1"));
let trusted_proxies = TrustedProxies::new(vec!["192.168.1.1/32".to_string()], true, 5);
let peer_addr: IpAddr = "192.168.1.1".parse().unwrap();
let result = get_source_ip_raw(&headers, peer_addr, &trusted_proxies);
assert_eq!(result, "203.0.113.1");
}
#[test]
fn test_get_source_ip_raw_with_untrusted_proxy() {
let mut headers = HeaderMap::new();
headers.insert("x-forwarded-for", HeaderValue::from_static("203.0.113.1"));
let trusted_proxies = TrustedProxies::new(vec![], true, 5);
let peer_addr: IpAddr = "203.0.113.2".parse().unwrap();
let result = get_source_ip_raw(&headers, peer_addr, &trusted_proxies);
assert_eq!(result, "203.0.113.2"); // Should use peer_addr
}
#[test]
fn test_get_source_ip_raw_legacy_mode() {
fn test_get_source_ip_raw() {
let headers = create_test_headers();
let trusted_proxies = TrustedProxies::new(vec![], false, 5); // Disabled validation
let peer_addr: IpAddr = "127.0.0.1".parse().unwrap();
let result = get_source_ip_raw(&headers, peer_addr, &trusted_proxies);
assert_eq!(result, "192.168.1.1"); // Should use header IP
let remote_addr = "127.0.0.1:8080";
let result = get_source_ip_raw(&headers, remote_addr);
assert_eq!(result, "192.168.1.1");
}
#[test]
fn test_get_source_ip() {
let headers = create_test_headers();
let trusted_proxies = TrustedProxies::new(vec!["192.168.1.1/32".to_string()], true, 5);
let peer_addr: IpAddr = "192.168.1.1".parse().unwrap();
let result = get_source_ip(&headers, peer_addr, &trusted_proxies);
let remote_addr = "127.0.0.1:8080";
let result = get_source_ip(&headers, remote_addr);
assert_eq!(result, "192.168.1.1");
}
@@ -390,32 +229,8 @@ mod tests {
fn test_get_source_ip_ipv6() {
let mut headers = HeaderMap::new();
headers.insert("x-forwarded-for", HeaderValue::from_static("2001:db8::1"));
let trusted_proxies = TrustedProxies::new(vec!["192.168.1.1/32".to_string()], true, 5);
let peer_addr: IpAddr = "192.168.1.1".parse().unwrap();
let result = get_source_ip(&headers, peer_addr, &trusted_proxies);
let remote_addr = "127.0.0.1:8080";
let result = get_source_ip(&headers, remote_addr);
assert_eq!(result, "[2001:db8::1]");
}
#[test]
fn test_is_valid_client_ip() {
// Valid public IPs
assert!(is_valid_client_ip("203.0.113.1", 5));
assert!(is_valid_client_ip("2001:db8::1", 5));
// Invalid private IPs
assert!(!is_valid_client_ip("192.168.1.1", 5));
assert!(!is_valid_client_ip("10.0.0.1", 5));
assert!(!is_valid_client_ip("127.0.0.1", 5));
// Valid chain
assert!(is_valid_client_ip("203.0.113.1, 198.51.100.1", 5));
// Invalid chain (too long)
assert!(!is_valid_client_ip(
"203.0.113.1, 198.51.100.1, 192.0.2.1, 192.0.2.2, 192.0.2.3, 192.0.2.4",
5
));
}
}

View File

@@ -48,14 +48,6 @@ pub fn parse_bool(str: &str) -> Result<bool> {
}
}
pub fn parse_bool_with_default(str: &str, default: bool) -> bool {
match str {
"1" | "t" | "T" | "true" | "TRUE" | "True" | "on" | "ON" | "On" | "enabled" => true,
"0" | "f" | "F" | "false" | "FALSE" | "False" | "off" | "OFF" | "Off" | "disabled" => false,
_ => default,
}
}
/// Matches a simple pattern against a name using wildcards.
///
/// # Arguments

View File

@@ -37,10 +37,7 @@
src = ./.;
cargoLock = {
lockFile = ./Cargo.lock;
allowBuiltinFetchGit = true;
};
cargoLock.lockFile = ./Cargo.lock;
nativeBuildInputs = with pkgs; [
pkg-config

View File

@@ -44,7 +44,7 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m
| imageRegistryCredentials.username | string | `""` | The username to pull rustfs image from private registry. |
| ingress.className | string | `"traefik"` | Specify the ingress class, traefik or nginx. |
| ingress.enabled | bool | `true` | |
| ingress.hosts[0].host | string | `"example.rustfs.com"` | |
| ingress.hosts[0].host | string | `"your.rustfs.com"` | |
| ingress.hosts[0].paths[0].path | string | `"/"` | |
| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | |
| ingress.nginxAnnotations."nginx.ingress.kubernetes.io/affinity" | string | `"cookie"` | |
@@ -52,7 +52,6 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m
| ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-hash" | string | `"sha1"` | |
| ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-max-age" | string | `"3600"` | |
| ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-name" | string | `"rustfs"` | |
| ingress.customAnnotations | dict | `{}` |Customize annotations. |
| ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie" | string | `"true"` | |
| ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.httponly" | string | `"true"` | |
| ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.name" | string | `"rustfs"` | |
@@ -60,6 +59,8 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m
| ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.secure" | string | `"true"` | |
| ingress.tls.enabled | bool | `false` | Enable tls and access rustfs via https. |
| ingress.tls.certManager.enabled | string | `false` | Enable cert manager support to generate certificate automatically. |
| ingress.tls.certManager.issuer.name | string | `false` | The name of cert manager issuer. |
| ingress.tls.certManager.issuer.kind | string | `false` | The kind of cert manager issuer, issuer or cluster-issuer. |
| ingress.tls.crt | string | "" | The content of certificate file. |
| ingress.tls.key | string | "" | The content of key file. |
| livenessProbe.failureThreshold | int | `3` | |
@@ -93,11 +94,9 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m
| secret.existingSecret | string | `""` | Use existing secret with a credentials. |
| secret.rustfs.access_key | string | `"rustfsadmin"` | RustFS Access Key ID |
| secret.rustfs.secret_key | string | `"rustfsadmin"` | RustFS Secret Key ID |
| service.console_port | int | `9001` | |
| service.ep_port | int | `9000` | |
| service.type | string | `"NodePort"` | |
| service.console.nodePort | int | `32001` | |
| service.console.port | int | `9001` | |
| service.endpoint.nodePort | int | `32000` | |
| service.endpoint.port | int | `9000` | |
| serviceAccount.annotations | object | `{}` | |
| serviceAccount.automount | bool | `true` | |
| serviceAccount.create | bool | `true` | |
@@ -180,12 +179,12 @@ Check the ingress status
```
kubectl -n rustfs get ing
NAME CLASS HOSTS ADDRESS PORTS AGE
rustfs nginx example.rustfs.com 10.43.237.152 80, 443 29m
rustfs nginx your.rustfs.com 10.43.237.152 80, 443 29m
```
Access the rustfs cluster via `https://example.rustfs.com` with the default username and password `rustfsadmin`.
Access the rustfs cluster via `https://your.rustfs.com` with the default username and password `rustfsadmin`.
> Replace the `example.rustfs.com` with your own domain as well as the certificates.
> Replace the `your.rustfs.com` with your own domain as well as the certificates.
# TLS configuration

View File

@@ -104,10 +104,10 @@ Render RUSTFS_VOLUMES
*/}}
{{- define "rustfs.volumes" -}}
{{- if eq (int .Values.replicaCount) 4 }}
{{- printf "http://%s-{0...%d}.%s-headless:%d/data/rustfs{0...%d}" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" . ) (.Values.service.endpoint.port | int) (sub (.Values.replicaCount | int) 1) }}
{{- printf "http://%s-{0...%d}.%s-headless:%d/data/rustfs{0...%d}" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" . ) (.Values.service.ep_port | int) (sub (.Values.replicaCount | int) 1) }}
{{- end }}
{{- if eq (int .Values.replicaCount) 16 }}
{{- printf "http://%s-{0...%d}.%s-headless:%d/data" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" .) (.Values.service.endpoint.port | int) }}
{{- printf "http://%s-{0...%d}.%s-headless:%d/data" (include "rustfs.fullname" .) (sub (.Values.replicaCount | int) 1) (include "rustfs.fullname" .) (.Values.service.ep_port | int) }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,15 @@
{{- if and .Values.ingress.tls.enabled .Values.ingress.tls.certManager.enabled }}
{{- $host := index .Values.ingress.hosts 0 }}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: {{ include "rustfs.fullname" . }}-tls
namespace: {{ .Release.Namespace }}
spec:
secretName: {{ .Values.ingress.tls.secretName }}
issuerRef:
name: {{ .Values.ingress.tls.certManager.issuer.name }}
kind: {{ .Values.ingress.tls.certManager.issuer.kind }}
dnsNames:
- {{ $host.host }}
{{- end }}

View File

@@ -57,10 +57,11 @@ spec:
{{- end }}
initContainers:
- name: init-step
image: "{{ .Values.initStep.image.repository }}:{{ .Values.initStep.image.tag }}"
imagePullPolicy: {{ .Values.initStep.image.pullPolicy }}
image: busybox
imagePullPolicy: {{ .Values.image.pullPolicy }}
securityContext:
{{- toYaml .Values.initStep.containerSecurityContext | nindent 12 }}
runAsUser: 0
runAsGroup: 0
command:
- sh
- -c
@@ -82,10 +83,10 @@ spec:
{{- toYaml .Values.containerSecurityContext | nindent 12 }}
{{- end }}
ports:
- name: endpoint
containerPort: {{ .Values.service.endpoint.port }}
- name: console
containerPort: {{ .Values.service.console.port }}
- containerPort: {{ .Values.service.ep_port }}
name: endpoint
- containerPort: {{ .Values.service.console_port }}
name: console
envFrom:
- configMapRef:
name: {{ include "rustfs.fullname" . }}-config

View File

@@ -2,9 +2,12 @@
{{- $secretName := .Values.ingress.tls.secretName }}
{{- $ingressAnnotations := dict }}
{{- if eq .Values.ingress.className "nginx" }}
{{- $ingressAnnotations = .Values.ingress.nginxAnnotations }}
{{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.nginxAnnotations | default dict) }}
{{- else if eq .Values.ingress.className "" }}
{{- $ingressAnnotations = .Values.ingress.customAnnotations }}
{{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.customAnnoations | default dict) }}
{{- end }}
{{- if .Values.ingress.tls.certManager.enabled }}
{{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.certManagerAnnotations | default dict) }}
{{- end }}
apiVersion: networking.k8s.io/v1
kind: Ingress

View File

@@ -42,7 +42,6 @@ spec:
{{- else }}
{}
{{- if .Values.affinity.podAntiAffinity.enabled }}
{{- end }}
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
@@ -53,6 +52,7 @@ spec:
- {{ include "rustfs.name" . }}
topologyKey: {{ .Values.affinity.podAntiAffinity.topologyKey }}
{{- end }}
{{- end }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
@@ -68,10 +68,11 @@ spec:
{{- end }}
initContainers:
- name: init-step
image: "{{ .Values.initStep.image.repository }}:{{ .Values.initStep.image.tag }}"
imagePullPolicy: {{ .Values.initStep.image.pullPolicy }}
image: busybox
imagePullPolicy: {{ .Values.image.pullPolicy }}
securityContext:
{{- toYaml .Values.initStep.containerSecurityContext | nindent 12 }}
runAsUser: 0
runAsGroup: 0
env:
- name: REPLICA_COUNT
value: {{ .Values.replicaCount | quote }}
@@ -110,10 +111,13 @@ spec:
{{- toYaml .Values.containerSecurityContext | nindent 12 }}
{{- end }}
ports:
- name: endpoint
containerPort: {{ .Values.service.endpoint.port }}
- name: console
containerPort: {{ .Values.service.console.port }}
- containerPort: {{ .Values.service.ep_port }}
name: endpoint
- containerPort: {{ .Values.service.console_port }}
name: console
env:
- name: REPLICA_COUNT
value: {{ .Values.replicaCount | quote }}
envFrom:
- configMapRef:
name: {{ include "rustfs.fullname" . }}-config

View File

@@ -11,5 +11,5 @@ spec:
- name: wget
image: busybox
command: ['wget']
args: ['-O', '/dev/null', '{{ include "rustfs.fullname" . }}-svc:{{ .Values.service.endpoint.port }}/health']
args: ['-O', '/dev/null', '{{ include "rustfs.fullname" . }}-svc:{{ .Values.service.ep_port }}/health']
restartPolicy: Never

View File

@@ -114,17 +114,23 @@ ingress:
nginx.ingress.kubernetes.io/session-cookie-hash: sha1
nginx.ingress.kubernetes.io/session-cookie-max-age: "3600"
nginx.ingress.kubernetes.io/session-cookie-name: rustfs
certManagerAnnotations:
{} # Specify cert manager issuer annotations,cert-manager.io/issuer or cert-manager.io/cluster-issuer.
# cert-manager.io/issuer: "letsencrypt-staging"
customAnnotations: # Specify custom annotations
{} # Customize annotations
hosts:
- host: example.rustfs.com
- host: xmg.rustfs.com
paths:
- path: /
pathType: Prefix
tls:
tls:
enabled: false # Enable tls and access rustfs via https.
certManager:
enabled: false # Enable certmanager to generate certificate for rustfs, default false.
issuer:
name: letsencrypt-staging # Specify cert manager issuer name
kind: Issuer # Specify cert manager issuer kind, Issuer or ClusterIssuer.
secretName: secret-tls
crt: tls.crt
key: tls.key
@@ -177,16 +183,4 @@ storageclass:
dataStorageSize: 256Mi
logStorageSize: 256Mi
# Init container parameters.
initStep:
image:
repository: busybox
pullPolicy: IfNotPresent
tag: "latest"
containerSecurityContext:
runAsUser: 0
runAsGroup: 0
extraManifests: []

View File

@@ -72,7 +72,6 @@ hyper.workspace = true
hyper-util.workspace = true
http.workspace = true
http-body.workspace = true
http-body-util.workspace = true
reqwest = { workspace = true }
socket2 = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread", "macros", "net", "signal", "process", "io-util"] }

View File

@@ -1,18 +1,6 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use crate::auth::get_condition_values;
use http::HeaderMap;
use rustfs_iam::store::object::ObjectStore;
use rustfs_iam::sys::IamSys;
@@ -21,8 +9,8 @@ use rustfs_policy::policy::Args;
use rustfs_policy::policy::action::Action;
use s3s::S3Result;
use s3s::s3_error;
use std::collections::HashMap;
use std::sync::Arc;
use crate::auth::get_condition_values;
pub async fn validate_admin_request(
headers: &HeaderMap,

View File

@@ -14,7 +14,6 @@
use crate::config::build;
use crate::license::get_license;
use crate::server::{CONSOLE_PREFIX, FAVICON_PATH, HEALTH_PREFIX, RUSTFS_ADMIN_PREFIX};
use axum::{
Router,
body::Body,
@@ -46,6 +45,9 @@ use tower_http::timeout::TimeoutLayer;
use tower_http::trace::TraceLayer;
use tracing::{debug, error, info, instrument, warn};
pub(crate) const CONSOLE_PREFIX: &str = "/rustfs/console";
const RUSTFS_ADMIN_PREFIX: &str = "/rustfs/admin/v3";
#[derive(RustEmbed)]
#[folder = "$CARGO_MANIFEST_DIR/static"]
struct StaticFiles;
@@ -455,7 +457,7 @@ fn get_console_config_from_env() -> (bool, u32, u64, String) {
/// # Returns:
/// - `true` if the path is for console access, `false` otherwise.
pub fn is_console_path(path: &str) -> bool {
path == FAVICON_PATH || path.starts_with(CONSOLE_PREFIX)
path == "/favicon.ico" || path.starts_with(CONSOLE_PREFIX)
}
/// Setup comprehensive middleware stack with tower-http features
@@ -475,11 +477,11 @@ fn setup_console_middleware_stack(
auth_timeout: u64,
) -> Router {
let mut app = Router::new()
.route(FAVICON_PATH, get(static_handler))
.route("/favicon.ico", get(static_handler))
.route(&format!("{CONSOLE_PREFIX}/license"), get(license_handler))
.route(&format!("{CONSOLE_PREFIX}/config.json"), get(config_handler))
.route(&format!("{CONSOLE_PREFIX}/version"), get(version_handler))
.route(&format!("{CONSOLE_PREFIX}{HEALTH_PREFIX}"), get(health_check).head(health_check))
.route(&format!("{CONSOLE_PREFIX}/health"), get(health_check).head(health_check))
.nest(CONSOLE_PREFIX, Router::new().fallback_service(get(static_handler)))
.fallback_service(get(static_handler));

View File

@@ -72,6 +72,7 @@ use tokio_stream::wrappers::ReceiverStream;
use tracing::debug;
use tracing::{error, info, warn};
use url::Host;
// use url::UrlQuery;
pub mod bucket_meta;
pub mod event;

View File

@@ -12,13 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{
admin::{auth::validate_admin_request, router::Operation},
auth::{check_key_valid, get_session_token},
};
use http::{HeaderMap, StatusCode};
use matchit::Params;
use rustfs_ecstore::rebalance::RebalanceMeta;
use rustfs_ecstore::{
StorageAPI,
error::StorageError,
@@ -38,6 +33,12 @@ use std::time::Duration;
use time::OffsetDateTime;
use tracing::warn;
use crate::{
admin::{auth::validate_admin_request, router::Operation},
auth::{check_key_valid, get_session_token},
};
use rustfs_ecstore::rebalance::RebalanceMeta;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct RebalanceResp {
pub id: String,

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::admin::router::Operation;
use http::StatusCode;
use hyper::Uri;
use matchit::Params;
@@ -21,6 +20,8 @@ use rustfs_madmin::service_commands::ServiceTraceOpts;
use s3s::{Body, S3Request, S3Response, S3Result, s3_error};
use tracing::warn;
use crate::admin::router::Operation;
#[allow(dead_code)]
fn extract_trace_options(uri: &Uri) -> S3Result<ServiceTraceOpts> {
let mut st_opts = ServiceTraceOpts::default();

View File

@@ -22,7 +22,6 @@ pub mod utils;
#[cfg(test)]
mod console_test;
use crate::server::{ADMIN_PREFIX, HEALTH_PREFIX, PROFILE_CPU_PATH, PROFILE_MEMORY_PATH};
use handlers::{
GetReplicationMetricsHandler, HealthCheckHandler, IsAdminHandler, ListRemoteTargetHandler, RemoveRemoteTargetHandler,
SetRemoteTargetHandler, bucket_meta,
@@ -38,21 +37,17 @@ use router::{AdminOperation, S3Router};
use rpc::register_rpc_route;
use s3s::route::S3Route;
/// Create admin router
///
/// # Arguments
/// * `console_enabled` - Whether the console is enabled
///
/// # Returns
/// An instance of S3Route for admin operations
const ADMIN_PREFIX: &str = "/rustfs/admin";
// const ADMIN_PREFIX: &str = "/minio/admin";
pub fn make_admin_route(console_enabled: bool) -> std::io::Result<impl S3Route> {
let mut r: S3Router<AdminOperation> = S3Router::new(console_enabled);
// Health check endpoint for monitoring and orchestration
r.insert(Method::GET, HEALTH_PREFIX, AdminOperation(&HealthCheckHandler {}))?;
r.insert(Method::HEAD, HEALTH_PREFIX, AdminOperation(&HealthCheckHandler {}))?;
r.insert(Method::GET, PROFILE_CPU_PATH, AdminOperation(&TriggerProfileCPU {}))?;
r.insert(Method::GET, PROFILE_MEMORY_PATH, AdminOperation(&TriggerProfileMemory {}))?;
r.insert(Method::GET, "/health", AdminOperation(&HealthCheckHandler {}))?;
r.insert(Method::HEAD, "/health", AdminOperation(&HealthCheckHandler {}))?;
r.insert(Method::GET, "/profile/cpu", AdminOperation(&TriggerProfileCPU {}))?;
r.insert(Method::GET, "/profile/memory", AdminOperation(&TriggerProfileMemory {}))?;
// 1
r.insert(Method::POST, "/", AdminOperation(&sts::AssumeRoleHandle {}))?;

View File

@@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::admin::ADMIN_PREFIX;
use crate::admin::console::is_console_path;
use crate::admin::console::make_console_server;
use crate::server::{ADMIN_PREFIX, HEALTH_PREFIX, PROFILE_CPU_PATH, PROFILE_MEMORY_PATH, RPC_PREFIX};
use crate::admin::rpc::RPC_PREFIX;
use hyper::HeaderMap;
use hyper::Method;
use hyper::StatusCode;
@@ -85,12 +86,12 @@ where
fn is_match(&self, method: &Method, uri: &Uri, headers: &HeaderMap, _: &mut Extensions) -> bool {
let path = uri.path();
// Profiling endpoints
if method == Method::GET && (path == PROFILE_CPU_PATH || path == PROFILE_MEMORY_PATH) {
if method == Method::GET && (path == "/profile/cpu" || path == "/profile/memory") {
return true;
}
// Health check
if (method == Method::HEAD || method == Method::GET) && path == HEALTH_PREFIX {
if (method == Method::HEAD || method == Method::GET) && path == "/health" {
return true;
}
@@ -116,12 +117,12 @@ where
let path = req.uri.path();
// Profiling endpoints
if req.method == Method::GET && (path == PROFILE_CPU_PATH || path == PROFILE_MEMORY_PATH) {
if req.method == Method::GET && (path == "/profile/cpu" || path == "/profile/memory") {
return Ok(());
}
// Health check
if (req.method == Method::HEAD || req.method == Method::GET) && path == HEALTH_PREFIX {
if (req.method == Method::HEAD || req.method == Method::GET) && path == "/health" {
return Ok(());
}

View File

@@ -15,7 +15,6 @@
use super::router::AdminOperation;
use super::router::Operation;
use super::router::S3Router;
use crate::server::RPC_PREFIX;
use futures::StreamExt;
use http::StatusCode;
use hyper::Method;
@@ -37,6 +36,8 @@ use tokio::io::AsyncWriteExt;
use tokio_util::io::ReaderStream;
use tracing::warn;
pub const RPC_PREFIX: &str = "/rustfs/rpc";
pub fn register_rpc_route(r: &mut S3Router<AdminOperation>) -> std::io::Result<()> {
r.insert(
Method::GET,

View File

@@ -66,7 +66,7 @@ const SIGN_V2_ALGORITHM: &str = "AWS ";
const SIGN_V4_ALGORITHM: &str = "AWS4-HMAC-SHA256";
const STREAMING_CONTENT_SHA256: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD";
const STREAMING_CONTENT_SHA256_TRAILER: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER";
pub(crate) const UNSIGNED_PAYLOAD_TRAILER: &str = "STREAMING-UNSIGNED-PAYLOAD-TRAILER";
pub const UNSIGNED_PAYLOAD_TRAILER: &str = "STREAMING-UNSIGNED-PAYLOAD-TRAILER";
const ACTION_HEADER: &str = "Action";
const AMZ_CREDENTIAL: &str = "X-Amz-Credential";
const AMZ_ACCESS_KEY_ID: &str = "AWSAccessKeyId";

View File

@@ -13,8 +13,7 @@
// limitations under the License.
use crate::storage::ecfs::{process_lambda_configurations, process_queue_configurations, process_topic_configurations};
use crate::{admin, config, version};
use chrono::Datelike;
use crate::{admin, config};
use rustfs_config::{DEFAULT_UPDATE_CHECK, ENV_UPDATE_CHECK};
use rustfs_ecstore::bucket::metadata_sys;
use rustfs_notify::notifier_global;
@@ -24,21 +23,6 @@ use std::env;
use std::io::Error;
use tracing::{debug, error, info, instrument, warn};
#[instrument]
pub(crate) fn print_server_info() {
let current_year = chrono::Utc::now().year();
// Use custom macros to print server information
info!("RustFS Object Storage Server");
info!("Copyright: 2024-{} RustFS, Inc", current_year);
info!("License: Apache-2.0 https://www.apache.org/licenses/LICENSE-2.0");
info!("Version: {}", version::get_version());
info!("Docs: https://rustfs.com/docs/");
}
/// Initialize the asynchronous update check system.
/// This function checks if update checking is enabled via
/// environment variable or default configuration. If enabled,
/// it spawns an asynchronous task to check for updates with a timeout.
pub(crate) fn init_update_check() {
let update_check_enable = env::var(ENV_UPDATE_CHECK)
.unwrap_or_else(|_| DEFAULT_UPDATE_CHECK.to_string())
@@ -86,12 +70,6 @@ pub(crate) fn init_update_check() {
});
}
/// Add existing bucket notification configurations to the global notifier system.
/// This function retrieves notification configurations for each bucket
/// and registers the corresponding event rules with the notifier system.
/// It processes queue, topic, and lambda configurations and maps them to event rules.
/// # Arguments
/// * `buckets` - A vector of bucket names to process
#[instrument(skip_all)]
pub(crate) async fn add_bucket_notification_configuration(buckets: Vec<String>) {
let region_opt = rustfs_ecstore::global::get_global_region();
@@ -150,15 +128,6 @@ pub(crate) async fn add_bucket_notification_configuration(buckets: Vec<String>)
}
/// Initialize KMS system and configure if enabled
///
/// This function initializes the global KMS service manager. If KMS is enabled
/// via command line options, it configures and starts the service accordingly.
/// If not enabled, it attempts to load any persisted KMS configuration from
/// cluster storage and starts the service if found.
/// # Arguments
/// * `opt` - The application configuration options
///
/// Returns `std::io::Result<()>` indicating success or failure
#[instrument(skip(opt))]
pub(crate) async fn init_kms_system(opt: &config::Opt) -> std::io::Result<()> {
// Initialize global KMS service manager (starts in NotConfigured state)

View File

@@ -25,20 +25,19 @@ mod update;
mod version;
// Ensure the correct path for parse_license is imported
use crate::init::{
add_bucket_notification_configuration, init_buffer_profile_system, init_kms_system, init_update_check, print_server_info,
};
use crate::init::{add_bucket_notification_configuration, init_buffer_profile_system, init_kms_system, init_update_check};
use crate::server::{
SHUTDOWN_TIMEOUT, ServiceState, ServiceStateManager, ShutdownSignal, init_cert, init_event_notifier, shutdown_event_notifier,
start_audit_system, start_http_server, stop_audit_system, wait_for_shutdown,
};
use chrono::Datelike;
use clap::Parser;
use license::init_license;
use rustfs_ahm::{
Scanner, create_ahm_services_cancel_token, heal::storage::ECStoreHealStorage, init_heal_manager,
scanner::data_scanner::ScannerConfig, shutdown_ahm_services,
};
use rustfs_common::{GlobalReadiness, SystemStage, set_global_addr};
use rustfs_common::globals::set_global_addr;
use rustfs_ecstore::{
StorageAPI,
bucket::metadata_sys::init_bucket_metadata_sys,
@@ -70,6 +69,25 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
const LOGO: &str = r#"
░█▀▄░█░█░█▀▀░▀█▀░█▀▀░█▀▀
░█▀▄░█░█░▀▀█░░█░░█▀▀░▀▀█
░▀░▀░▀▀▀░▀▀▀░░▀░░▀░░░▀▀▀
"#;
#[instrument]
fn print_server_info() {
let current_year = chrono::Utc::now().year();
// Use custom macros to print server information
info!("RustFS Object Storage Server");
info!("Copyright: 2024-{} RustFS, Inc", current_year);
info!("License: Apache-2.0 https://www.apache.org/licenses/LICENSE-2.0");
info!("Version: {}", version::get_version());
info!("Docs: https://rustfs.com/docs/");
}
fn main() -> Result<()> {
let runtime = server::get_tokio_runtime_builder()
.build()
@@ -102,7 +120,7 @@ async fn async_main() -> Result<()> {
}
// print startup logo
info!("{}", server::LOGO);
info!("{}", LOGO);
// Initialize performance profiling if enabled
profiling::init_from_env().await;
@@ -125,8 +143,6 @@ async fn async_main() -> Result<()> {
#[instrument(skip(opt))]
async fn run(opt: config::Opt) -> Result<()> {
debug!("opt: {:?}", &opt);
// 1. Initialize global readiness tracker
let readiness = Arc::new(GlobalReadiness::new());
if let Some(region) = &opt.region {
rustfs_ecstore::global::set_global_region(region.clone());
@@ -198,14 +214,14 @@ async fn run(opt: config::Opt) -> Result<()> {
let s3_shutdown_tx = {
let mut s3_opt = opt.clone();
s3_opt.console_enable = false;
let s3_shutdown_tx = start_http_server(&s3_opt, state_manager.clone(), readiness.clone()).await?;
let s3_shutdown_tx = start_http_server(&s3_opt, state_manager.clone()).await?;
Some(s3_shutdown_tx)
};
let console_shutdown_tx = if opt.console_enable && !opt.console_address.is_empty() {
let mut console_opt = opt.clone();
console_opt.address = console_opt.console_address.clone();
let console_shutdown_tx = start_http_server(&console_opt, state_manager.clone(), readiness.clone()).await?;
let console_shutdown_tx = start_http_server(&console_opt, state_manager.clone()).await?;
Some(console_shutdown_tx)
} else {
None
@@ -220,7 +236,6 @@ async fn run(opt: config::Opt) -> Result<()> {
let ctx = CancellationToken::new();
// init store
// 2. Start Storage Engine (ECStore)
let store = ECStore::new(server_addr, endpoint_pools.clone(), ctx.clone())
.await
.inspect_err(|err| {
@@ -228,20 +243,10 @@ async fn run(opt: config::Opt) -> Result<()> {
})?;
ecconfig::init();
// config system configuration
GLOBAL_CONFIG_SYS.init(store.clone()).await?;
// // Initialize global configuration system
let mut retry_count = 0;
while let Err(e) = GLOBAL_CONFIG_SYS.init(store.clone()).await {
error!("GLOBAL_CONFIG_SYS.init failed {:?}", e);
// TODO: check error type
retry_count += 1;
if retry_count > 15 {
return Err(Error::other("GLOBAL_CONFIG_SYS.init failed"));
}
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
readiness.mark_stage(SystemStage::StorageReady);
// init replication_pool
// init replication_pool
init_background_replication(store.clone()).await;
// Initialize KMS system if enabled
init_kms_system(&opt).await?;
@@ -274,10 +279,7 @@ async fn run(opt: config::Opt) -> Result<()> {
init_bucket_metadata_sys(store.clone(), buckets.clone()).await;
// 3. Initialize IAM System (Blocking load)
// This ensures data is in memory before moving forward
init_iam_sys(store.clone()).await.map_err(Error::other)?;
readiness.mark_stage(SystemStage::IamReady);
add_bucket_notification_configuration(buckets.clone()).await;
@@ -329,15 +331,6 @@ async fn run(opt: config::Opt) -> Result<()> {
init_update_check();
info!(target: "rustfs::main::run","server started successfully at {}", &server_address);
// 4. Mark as Full Ready now that critical components are warm
readiness.mark_stage(SystemStage::FullReady);
println!(
"RustFS server started successfully at {}, current time: {}",
&server_address,
chrono::offset::Utc::now().to_string()
);
// Perform hibernation for 1 second
tokio::time::sleep(SHUTDOWN_TIMEOUT).await;
// listen to the shutdown signal

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_common::set_global_root_cert;
use rustfs_common::globals::set_global_root_cert;
use rustfs_config::{RUSTFS_CA_CERT, RUSTFS_PUBLIC_CERT, RUSTFS_TLS_CERT};
use tracing::{debug, info};

View File

@@ -17,7 +17,7 @@ use super::compress::{CompressionConfig, CompressionPredicate};
use crate::admin;
use crate::auth::IAMAuth;
use crate::config;
use crate::server::{ReadinessGateLayer, ServiceState, ServiceStateManager, hybrid::hybrid, layer::RedirectLayer};
use crate::server::{ServiceState, ServiceStateManager, hybrid::hybrid, layer::RedirectLayer};
use crate::storage;
use crate::storage::tonic_service::make_server;
use bytes::Bytes;
@@ -29,7 +29,6 @@ use hyper_util::{
service::TowerToHyperService,
};
use metrics::{counter, histogram};
use rustfs_common::GlobalReadiness;
use rustfs_config::{DEFAULT_ACCESS_KEY, DEFAULT_SECRET_KEY, MI_B, RUSTFS_TLS_CERT, RUSTFS_TLS_KEY};
use rustfs_protos::proto_gen::node_service::node_service_server::NodeServiceServer;
use rustfs_utils::net::parse_and_resolve_address;
@@ -113,7 +112,6 @@ fn get_cors_allowed_origins() -> String {
pub async fn start_http_server(
opt: &config::Opt,
worker_state_manager: ServiceStateManager,
readiness: Arc<GlobalReadiness>,
) -> Result<tokio::sync::broadcast::Sender<()>> {
let server_addr = parse_and_resolve_address(opt.address.as_str()).map_err(Error::other)?;
let server_port = server_addr.port();
@@ -121,26 +119,16 @@ pub async fn start_http_server(
// The listening address and port are obtained from the parameters
let listener = {
let mut server_addr = server_addr;
// Try to create a socket for the address family; if that fails, fallback to IPv4.
let mut socket = match socket2::Socket::new(
let mut socket = socket2::Socket::new(
socket2::Domain::for_address(server_addr),
socket2::Type::STREAM,
Some(socket2::Protocol::TCP),
) {
Ok(s) => s,
Err(e) => {
warn!("Failed to create socket for {:?}: {}, falling back to IPv4", server_addr, e);
let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port());
server_addr = ipv4_addr;
socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?
}
};
)?;
// If address is IPv6 try to enable dual-stack; on failure, switch to IPv4 socket.
if server_addr.is_ipv6() {
if let Err(e) = socket.set_only_v6(false) {
warn!("Failed to set IPV6_V6ONLY=false, attempting IPv4 fallback: {}", e);
warn!("Failed to set IPV6_V6ONLY=false, falling back to IPv4-only: {}", e);
// Fallback to a new IPv4 socket if setting dual-stack fails.
let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port());
server_addr = ipv4_addr;
socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?;
@@ -152,27 +140,8 @@ pub async fn start_http_server(
socket.set_reuse_address(true)?;
// Set the socket to non-blocking before passing it to Tokio.
socket.set_nonblocking(true)?;
// Attempt bind; if bind fails for IPv6, try IPv4 fallback once more.
if let Err(bind_err) = socket.bind(&server_addr.into()) {
warn!("Failed to bind to {}: {}.", server_addr, bind_err);
if server_addr.is_ipv6() {
// Try IPv4 fallback
let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port());
server_addr = ipv4_addr;
socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?;
socket.set_reuse_address(true)?;
socket.set_nonblocking(true)?;
socket.bind(&server_addr.into())?;
// [FIX] Ensure fallback socket is moved to listening state as well.
socket.listen(backlog)?;
} else {
return Err(bind_err);
}
} else {
// Listen on the socket when initial bind succeeded
socket.listen(backlog)?;
}
socket.bind(&server_addr.into())?;
socket.listen(backlog)?;
TcpListener::from_std(socket.into())?
};
@@ -210,7 +179,7 @@ pub async fn start_http_server(
println!("Console WebUI (localhost): {protocol}://127.0.0.1:{server_port}/rustfs/console/index.html",);
} else {
info!(target: "rustfs::main::startup","RustFS API: {api_endpoints} {localhost_endpoint}");
println!("RustFS Http API: {api_endpoints} {localhost_endpoint}");
println!("RustFS API: {api_endpoints} {localhost_endpoint}");
println!("RustFS Start Time: {now_time}");
if DEFAULT_ACCESS_KEY.eq(&opt.access_key) && DEFAULT_SECRET_KEY.eq(&opt.secret_key) {
warn!(
@@ -390,7 +359,6 @@ pub async fn start_http_server(
cors_layer: cors_layer.clone(),
compression_config: compression_config.clone(),
is_console,
readiness: readiness.clone(),
};
process_connection(socket, tls_acceptor.clone(), connection_ctx, graceful.clone());
@@ -493,7 +461,6 @@ struct ConnectionContext {
cors_layer: CorsLayer,
compression_config: CompressionConfig,
is_console: bool,
readiness: Arc<GlobalReadiness>,
}
/// Process a single incoming TCP connection.
@@ -517,7 +484,6 @@ fn process_connection(
cors_layer,
compression_config,
is_console,
readiness,
} = context;
// Build services inside each connected task to avoid passing complex service types across tasks,
@@ -528,9 +494,6 @@ fn process_connection(
let hybrid_service = ServiceBuilder::new()
.layer(SetRequestIdLayer::x_request_id(MakeRequestUuid))
.layer(CatchPanicLayer::new())
// CRITICAL: Insert ReadinessGateLayer before business logic
// This stops requests from hitting IAMAuth or Storage if they are not ready.
.layer(ReadinessGateLayer::new(readiness))
.layer(
TraceLayer::new_for_http()
.make_span_with(|request: &HttpRequest<_>| {

View File

@@ -19,8 +19,6 @@ mod event;
mod http;
mod hybrid;
mod layer;
mod prefix;
mod readiness;
mod runtime;
mod service_state;
@@ -28,8 +26,6 @@ pub(crate) use audit::{start_audit_system, stop_audit_system};
pub(crate) use cert::init_cert;
pub(crate) use event::{init_event_notifier, shutdown_event_notifier};
pub(crate) use http::start_http_server;
pub(crate) use prefix::*;
pub(crate) use readiness::ReadinessGateLayer;
pub(crate) use runtime::get_tokio_runtime_builder;
pub(crate) use service_state::SHUTDOWN_TIMEOUT;
pub(crate) use service_state::ServiceState;

View File

@@ -1,55 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// Predefined CPU profiling path for RustFS server.
/// This path is used to access CPU profiling data.
pub(crate) const PROFILE_CPU_PATH: &str = "/profile/cpu";
/// This path is used to access memory profiling data.
pub(crate) const PROFILE_MEMORY_PATH: &str = "/profile/memory";
/// Favicon path to handle browser requests for the favicon.
/// This path serves the favicon.ico file.
pub(crate) const FAVICON_PATH: &str = "/favicon.ico";
/// Predefined health check path for RustFS server.
/// This path is used to check the health status of the server.
pub(crate) const HEALTH_PREFIX: &str = "/health";
/// Predefined administrative prefix for RustFS server routes.
/// This prefix is used for endpoints that handle administrative tasks
/// such as configuration, monitoring, and management.
pub(crate) const ADMIN_PREFIX: &str = "/rustfs/admin";
/// Environment variable name for overriding the default
/// administrative prefix path.
pub(crate) const RUSTFS_ADMIN_PREFIX: &str = "/rustfs/admin/v3";
/// Predefined console prefix for RustFS server routes.
/// This prefix is used for endpoints that handle console-related tasks
/// such as user interface and management.
pub(crate) const CONSOLE_PREFIX: &str = "/rustfs/console";
/// Predefined RPC prefix for RustFS server routes.
/// This prefix is used for endpoints that handle remote procedure calls (RPC).
pub(crate) const RPC_PREFIX: &str = "/rustfs/rpc";
/// LOGO art for RustFS server.
pub(crate) const LOGO: &str = r#"
░█▀▄░█░█░█▀▀░▀█▀░█▀▀░█▀▀
░█▀▄░█░█░▀▀█░░█░░█▀▀░▀▀█
░▀░▀░▀▀▀░▀▀▀░░▀░░▀░░░▀▀▀
"#;

Some files were not shown because too many files have changed in this diff Show More