refactor: NamespaceLock (nslock), AHM→Heal Crate, and Lock/Clippy Fixes (#1664)

Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
Co-authored-by: weisd <2057561+weisd@users.noreply.github.com>
Co-authored-by: houseme <housemecn@gmail.com>
This commit is contained in:
weisd
2026-01-30 13:13:41 +08:00
committed by GitHub
parent 1c085590ca
commit dce117840c
80 changed files with 3787 additions and 16746 deletions

217
Cargo.lock generated
View File

@@ -224,11 +224,11 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "ar_archive_writer"
version = "0.5.1"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b"
checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a"
dependencies = [
"object",
"object 0.32.2",
]
[[package]]
@@ -868,9 +868,9 @@ dependencies = [
[[package]]
name = "aws-smithy-async"
version = "1.2.8"
version = "1.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330762ee48c6cecfad2cb37b1506c16c8e858c90638eda2b1a7272b56f88bd5"
checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c"
dependencies = [
"futures-util",
"pin-project-lite",
@@ -899,9 +899,9 @@ dependencies = [
[[package]]
name = "aws-smithy-eventstream"
version = "0.60.15"
version = "0.60.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0810b22ae554f5076c3eabe1fe89b01aee61c354c575789f67e248e83c5f472b"
checksum = "dc12f8b310e38cad85cf3bef45ad236f470717393c613266ce0a89512286b650"
dependencies = [
"aws-smithy-types",
"bytes",
@@ -932,9 +932,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http-client"
version = "1.1.6"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec918f18147cec121cb142a91b0038f66d99bbe903e585dccf871920e90b22ab"
checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -965,18 +965,18 @@ dependencies = [
[[package]]
name = "aws-smithy-observability"
version = "0.2.1"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a700a7702874cd78b85fecdc9f64f3f72eb22fb713791cb445bcfd2a15bc1ecf"
checksum = "ef1fcbefc7ece1d70dcce29e490f269695dfca2d2bacdeaf9e5c3f799e4e6a42"
dependencies = [
"aws-smithy-runtime-api",
]
[[package]]
name = "aws-smithy-query"
version = "0.60.10"
version = "0.60.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc4a6cdc289a37be7fddb7f4365448187d62c603a40e6d46d13c68e5e81900f"
checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d"
dependencies = [
"aws-smithy-types",
"urlencoding",
@@ -1008,9 +1008,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
version = "1.11.0"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c47b1e62accf759b01aba295e40479d1ba8fb77c2a54f0fed861c809ca49761"
checksum = "efce7aaaf59ad53c5412f14fc19b2d5c6ab2c3ec688d272fd31f76ec12f44fb0"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@@ -1156,7 +1156,7 @@ dependencies = [
"cfg-if",
"libc",
"miniz_oxide",
"object",
"object 0.37.3",
"rustc-demangle",
"windows-link",
]
@@ -1453,9 +1453,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.2.54"
version = "1.2.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583"
checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -1592,9 +1592,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.55"
version = "4.5.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e34525d5bbbd55da2bb745d34b36121baac88d07619a9a09cfcf4a6c0832785"
checksum = "a75ca66430e33a14957acc24c5077b503e7d374151b2b4b3a10c83b4ceb4be0e"
dependencies = [
"clap_builder",
"clap_derive",
@@ -1602,9 +1602,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.55"
version = "4.5.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59a20016a20a3da95bef50ec7238dbd09baeef4311dcdd38ec15aba69812fb61"
checksum = "793207c7fa6300a0608d1080b858e5fdbe713cdc1c8db9fb17777d8a13e63df0"
dependencies = [
"anstream",
"anstyle",
@@ -3257,6 +3257,7 @@ dependencies = [
"serial_test",
"suppaftp",
"tokio",
"tokio-stream",
"tonic",
"tracing",
"tracing-subscriber",
@@ -4011,9 +4012,9 @@ dependencies = [
[[package]]
name = "google-cloud-iam-v1"
version = "1.4.0"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4887dd50f1e7510e9c91b9581313827120f903c489426e15231f367ee191030c"
checksum = "f84b431125034e0928e41e8c117bcbc40b0b55b55464b2e964b26e1ffcb15323"
dependencies = [
"async-trait",
"bytes",
@@ -4022,7 +4023,7 @@ dependencies = [
"google-cloud-type",
"google-cloud-wkt",
"lazy_static",
"reqwest 0.13.1",
"reqwest 0.12.28",
"serde",
"serde_json",
"serde_with",
@@ -4031,9 +4032,9 @@ dependencies = [
[[package]]
name = "google-cloud-longrunning"
version = "1.5.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd5ac41700cf35600752386270a744345d550b843b5356feaa75064e97ceb94a"
checksum = "5d0612f4062f42b141b4d050d1a8a2f860e907a548bde28cb82d4fdf0eb346a3"
dependencies = [
"async-trait",
"bytes",
@@ -4042,7 +4043,7 @@ dependencies = [
"google-cloud-rpc",
"google-cloud-wkt",
"lazy_static",
"reqwest 0.13.1",
"reqwest 0.12.28",
"serde",
"serde_json",
"serde_with",
@@ -4051,9 +4052,9 @@ dependencies = [
[[package]]
name = "google-cloud-lro"
version = "1.3.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a52a26fb12ea9b67eab358dbb8dc5e22aac666cab947a10d15699ff95f203b87"
checksum = "49747b7b684b804a2d1040c2cdb21238b3d568a41ab9e36c423554509112f61d"
dependencies = [
"google-cloud-gax",
"google-cloud-longrunning",
@@ -4483,9 +4484,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
[[package]]
name = "hybrid-array"
version = "0.4.6"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b41fb3dc24fe72c2e3a4685eed55917c2fb228851257f4a8f2d985da9443c3e5"
checksum = "f471e0a81b2f90ffc0cb2f951ae04da57de8baa46fa99112b062a5173a5088d0"
dependencies = [
"typenum",
"zeroize",
@@ -4574,9 +4575,9 @@ dependencies = [
[[package]]
name = "iana-time-zone"
version = "0.1.65"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
@@ -5240,9 +5241,9 @@ dependencies = [
[[package]]
name = "liblzma-sys"
version = "0.4.5"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f2db66f3268487b5033077f266da6777d057949b8f93c8ad82e441df25e6186"
checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736"
dependencies = [
"cc",
"libc",
@@ -5251,9 +5252,9 @@ dependencies = [
[[package]]
name = "libm"
version = "0.2.16"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "libmimalloc-sys"
@@ -5622,9 +5623,9 @@ checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
[[package]]
name = "neli"
version = "0.7.4"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
checksum = "e23bebbf3e157c402c4d5ee113233e5e0610cc27453b2f07eefce649c7365dcc"
dependencies = [
"bitflags 2.10.0",
"byteorder",
@@ -5734,12 +5735,9 @@ dependencies = [
[[package]]
name = "notify-types"
version = "2.1.0"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42b8cfee0e339a0337359f3c88165702ac6e600dc01c0cc9579a92d62b08477a"
dependencies = [
"bitflags 2.10.0",
]
checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d"
[[package]]
name = "ntapi"
@@ -5933,6 +5931,15 @@ dependencies = [
"objc2-core-foundation",
]
[[package]]
name = "object"
version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
dependencies = [
"memchr",
]
[[package]]
name = "object"
version = "0.37.3"
@@ -6001,9 +6008,9 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
[[package]]
name = "openssl-probe"
version = "0.2.1"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391"
[[package]]
name = "opentelemetry"
@@ -6557,9 +6564,9 @@ dependencies = [
[[package]]
name = "pkcs8"
version = "0.11.0-rc.10"
version = "0.11.0-rc.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b226d2cc389763951db8869584fd800cbbe2962bf454e2edeb5172b31ee99774"
checksum = "77089aec8290d0b7bb01b671b091095cf1937670725af4fd73d47249f03b12c0"
dependencies = [
"der 0.8.0-rc.10",
"spki 0.8.0-rc.4",
@@ -6682,9 +6689,9 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppmd-rust"
version = "1.4.0"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efca4c95a19a79d1c98f791f10aebd5c1363b473244630bb7dbde1dc98455a24"
checksum = "d558c559f0450f16f2a27a1f017ef38468c1090c9ce63c8e51366232d53717b4"
[[package]]
name = "pprof"
@@ -6787,9 +6794,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.106"
version = "1.0.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
dependencies = [
"unicode-ident",
]
@@ -6947,9 +6954,9 @@ dependencies = [
[[package]]
name = "psm"
version = "0.1.29"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa96cb91275ed31d6da3e983447320c4eb219ac180fa1679a0889ff32861e2d"
checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01"
dependencies = [
"ar_archive_writer",
"cc",
@@ -7072,9 +7079,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.44"
version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
dependencies = [
"proc-macro2",
]
@@ -7514,7 +7521,7 @@ dependencies = [
"crypto-primes",
"digest 0.11.0-rc.5",
"pkcs1",
"pkcs8 0.11.0-rc.10",
"pkcs8 0.11.0-rc.8",
"rand_core 0.10.0-rc-3",
"sha2 0.11.0-rc.3",
"signature 3.0.0-rc.6",
@@ -7745,7 +7752,6 @@ dependencies = [
"russh",
"russh-sftp",
"rust-embed",
"rustfs-ahm",
"rustfs-appauth",
"rustfs-audit",
"rustfs-common",
@@ -7753,6 +7759,7 @@ dependencies = [
"rustfs-credentials",
"rustfs-ecstore",
"rustfs-filemeta",
"rustfs-heal",
"rustfs-iam",
"rustfs-kms",
"rustfs-lock",
@@ -7798,38 +7805,6 @@ dependencies = [
"zip",
]
[[package]]
name = "rustfs-ahm"
version = "0.0.5"
dependencies = [
"anyhow",
"async-trait",
"chrono",
"futures",
"heed",
"rand 0.10.0-rc.6",
"reqwest 0.13.1",
"rustfs-common",
"rustfs-config",
"rustfs-ecstore",
"rustfs-filemeta",
"rustfs-madmin",
"rustfs-utils",
"s3s",
"serde",
"serde_json",
"serial_test",
"tempfile",
"thiserror 2.0.18",
"time",
"tokio",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"walkdir",
]
[[package]]
name = "rustfs-appauth"
version = "0.0.5"
@@ -8036,6 +8011,38 @@ dependencies = [
"xxhash-rust",
]
[[package]]
name = "rustfs-heal"
version = "0.0.5"
dependencies = [
"anyhow",
"async-trait",
"chrono",
"futures",
"heed",
"rand 0.10.0-rc.6",
"reqwest 0.13.1",
"rustfs-common",
"rustfs-config",
"rustfs-ecstore",
"rustfs-filemeta",
"rustfs-madmin",
"rustfs-utils",
"s3s",
"serde",
"serde_json",
"serial_test",
"tempfile",
"thiserror 2.0.18",
"time",
"tokio",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"walkdir",
]
[[package]]
name = "rustfs-iam"
version = "0.0.5"
@@ -8616,7 +8623,7 @@ checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
[[package]]
name = "s3s"
version = "0.13.0-alpha.2"
source = "git+https://github.com/s3s-project/s3s.git?branch=main#3cdb3fe22fe8a1b7fc3f71ead4beacac2683ba7f"
source = "git+https://github.com/s3s-project/s3s.git?branch=main#26ce04f59c14f130c87b23789d0c3723d9429e41"
dependencies = [
"arc-swap",
"arrayvec",
@@ -8790,9 +8797,9 @@ dependencies = [
[[package]]
name = "sec1"
version = "0.8.0-rc.13"
version = "0.8.0-rc.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2400ed44a13193820aa528a19f376c3843141a8ce96ff34b11104cc79763f2"
checksum = "2568531a8ace88b848310caa98fb2115b151ef924d54aa523e659c21b9d32d71"
dependencies = [
"base16ct 1.0.0",
"hybrid-array",
@@ -9414,7 +9421,7 @@ dependencies = [
"ed25519-dalek 3.0.0-pre.4",
"rand_core 0.10.0-rc-3",
"rsa",
"sec1 0.8.0-rc.13",
"sec1 0.8.0-rc.11",
"sha2 0.11.0-rc.3",
"signature 3.0.0-rc.6",
"ssh-cipher 0.3.0-rc.5",
@@ -10133,9 +10140,9 @@ dependencies = [
[[package]]
name = "tonic-build"
version = "0.14.3"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27aac809edf60b741e2d7db6367214d078856b8a5bff0087e94ff330fb97b6fc"
checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3"
dependencies = [
"prettyplease",
"proc-macro2",
@@ -10381,9 +10388,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "tz-rs"
version = "0.7.2"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b2df019c305771deac375d9657fc75fa394bde69102a870c923e779c3746642"
checksum = "14eff19b8dc1ace5bf7e4d920b2628ae3837f422ff42210cb1567cbf68b5accf"
[[package]]
name = "tzdb"
@@ -11259,18 +11266,18 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.35"
version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdea86ddd5568519879b8187e1cf04e24fce28f7fe046ceecbce472ff19a2572"
checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.35"
version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c15e1b46eff7c6c91195752e0eeed8ef040e391cdece7c25376957d5f15df22"
checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
dependencies = [
"proc-macro2",
"quote",
@@ -11387,9 +11394,9 @@ checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3"
[[package]]
name = "zmij"
version = "1.0.17"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439"
checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2"
[[package]]
name = "zopfli"

View File

@@ -41,7 +41,7 @@ members = [
"crates/utils", # Utility functions and helpers
"crates/workers", # Worker thread pools and task scheduling
"crates/zip", # ZIP file handling and compression
"crates/ahm", # Asynchronous Hash Map for concurrent data structures
"crates/heal", # Erasure set and object healing
"crates/mcp", # MCP server for S3 operations
"crates/kms", # Key Management Service
]
@@ -67,7 +67,7 @@ all = "warn"
[workspace.dependencies]
# RustFS Internal Crates
rustfs = { path = "./rustfs", version = "0.0.5" }
rustfs-ahm = { path = "crates/ahm", version = "0.0.5" }
rustfs-heal = { path = "crates/heal", version = "0.0.5" }
rustfs-appauth = { path = "crates/appauth", version = "0.0.5" }
rustfs-audit = { path = "crates/audit", version = "0.0.5" }
rustfs-checksums = { path = "crates/checksums", version = "0.0.5" }

View File

@@ -1,326 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::scanner::node_scanner::ScanProgress;
use crate::{Error, Result};
use serde::{Deserialize, Serialize};
use std::{
path::{Path, PathBuf},
time::{Duration, SystemTime},
};
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct CheckpointData {
pub version: u32,
pub timestamp: SystemTime,
pub progress: ScanProgress,
pub node_id: String,
pub checksum: u64,
}
impl CheckpointData {
pub fn new(progress: ScanProgress, node_id: String) -> Self {
let mut checkpoint = Self {
version: 1,
timestamp: SystemTime::now(),
progress,
node_id,
checksum: 0,
};
checkpoint.checksum = checkpoint.calculate_checksum();
checkpoint
}
fn calculate_checksum(&self) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.version.hash(&mut hasher);
self.node_id.hash(&mut hasher);
self.progress.current_cycle.hash(&mut hasher);
self.progress.current_disk_index.hash(&mut hasher);
if let Some(ref bucket) = self.progress.current_bucket {
bucket.hash(&mut hasher);
}
if let Some(ref key) = self.progress.last_scan_key {
key.hash(&mut hasher);
}
hasher.finish()
}
pub fn verify_integrity(&self) -> bool {
let calculated_checksum = self.calculate_checksum();
self.checksum == calculated_checksum
}
}
pub struct CheckpointManager {
checkpoint_file: PathBuf,
backup_file: PathBuf,
temp_file: PathBuf,
save_interval: Duration,
last_save: RwLock<SystemTime>,
node_id: String,
}
impl CheckpointManager {
pub fn new(node_id: &str, data_dir: &Path) -> Self {
if !data_dir.exists()
&& let Err(e) = std::fs::create_dir_all(data_dir)
{
error!("create data dir failed {:?}: {}", data_dir, e);
}
let checkpoint_file = data_dir.join(format!("scanner_checkpoint_{node_id}.json"));
let backup_file = data_dir.join(format!("scanner_checkpoint_{node_id}.backup"));
let temp_file = data_dir.join(format!("scanner_checkpoint_{node_id}.tmp"));
Self {
checkpoint_file,
backup_file,
temp_file,
save_interval: Duration::from_secs(30), // 30s
last_save: RwLock::new(SystemTime::UNIX_EPOCH),
node_id: node_id.to_string(),
}
}
pub async fn save_checkpoint(&self, progress: &ScanProgress) -> Result<()> {
let now = SystemTime::now();
let last_save = *self.last_save.read().await;
if now.duration_since(last_save).unwrap_or(Duration::ZERO) < self.save_interval {
return Ok(());
}
let checkpoint_data = CheckpointData::new(progress.clone(), self.node_id.clone());
let json_data = serde_json::to_string_pretty(&checkpoint_data)
.map_err(|e| Error::Serialization(format!("serialize checkpoint failed: {e}")))?;
tokio::fs::write(&self.temp_file, json_data)
.await
.map_err(|e| Error::IO(format!("write temp checkpoint file failed: {e}")))?;
if self.checkpoint_file.exists() {
tokio::fs::copy(&self.checkpoint_file, &self.backup_file)
.await
.map_err(|e| Error::IO(format!("backup checkpoint file failed: {e}")))?;
}
tokio::fs::rename(&self.temp_file, &self.checkpoint_file)
.await
.map_err(|e| Error::IO(format!("replace checkpoint file failed: {e}")))?;
*self.last_save.write().await = now;
debug!(
"save checkpoint to {:?}, cycle: {}, disk index: {}",
self.checkpoint_file, checkpoint_data.progress.current_cycle, checkpoint_data.progress.current_disk_index
);
Ok(())
}
pub async fn load_checkpoint(&self) -> Result<Option<ScanProgress>> {
// first try main checkpoint file
match self.load_checkpoint_from_file(&self.checkpoint_file).await {
Ok(checkpoint) => {
info!(
"restore scan progress from main checkpoint file: cycle={}, disk index={}, last scan key={:?}",
checkpoint.current_cycle, checkpoint.current_disk_index, checkpoint.last_scan_key
);
Ok(Some(checkpoint))
}
Err(e) => {
warn!("main checkpoint file is corrupted or not exists: {}", e);
// try backup file
match self.load_checkpoint_from_file(&self.backup_file).await {
Ok(checkpoint) => {
warn!(
"restore scan progress from backup file: cycle={}, disk index={}",
checkpoint.current_cycle, checkpoint.current_disk_index
);
// copy backup file to main checkpoint file
if let Err(copy_err) = tokio::fs::copy(&self.backup_file, &self.checkpoint_file).await {
warn!("restore main checkpoint file failed: {}", copy_err);
}
Ok(Some(checkpoint))
}
Err(backup_e) => {
warn!("backup file is corrupted or not exists: {}", backup_e);
info!("cannot restore scan progress, will start fresh scan");
Ok(None)
}
}
}
}
}
/// load checkpoint from file
async fn load_checkpoint_from_file(&self, file_path: &Path) -> Result<ScanProgress> {
if !file_path.exists() {
return Err(Error::NotFound(format!("checkpoint file not exists: {file_path:?}")));
}
// read file content
let content = tokio::fs::read_to_string(file_path)
.await
.map_err(|e| Error::IO(format!("read checkpoint file failed: {e}")))?;
// deserialize
let checkpoint_data: CheckpointData =
serde_json::from_str(&content).map_err(|e| Error::Serialization(format!("deserialize checkpoint failed: {e}")))?;
// validate checkpoint data
self.validate_checkpoint(&checkpoint_data)?;
Ok(checkpoint_data.progress)
}
/// validate checkpoint data
fn validate_checkpoint(&self, checkpoint: &CheckpointData) -> Result<()> {
// validate data integrity
if !checkpoint.verify_integrity() {
return Err(Error::InvalidCheckpoint(
"checkpoint data verification failed, may be corrupted".to_string(),
));
}
// validate node id match
if checkpoint.node_id != self.node_id {
return Err(Error::InvalidCheckpoint(format!(
"checkpoint node id not match: expected {}, actual {}",
self.node_id, checkpoint.node_id
)));
}
let now = SystemTime::now();
let checkpoint_age = now.duration_since(checkpoint.timestamp).unwrap_or(Duration::MAX);
// checkpoint is too old (more than 24 hours), may be data expired
if checkpoint_age > Duration::from_secs(24 * 3600) {
return Err(Error::InvalidCheckpoint(format!("checkpoint data is too old: {checkpoint_age:?}")));
}
// validate version compatibility
if checkpoint.version > 1 {
return Err(Error::InvalidCheckpoint(format!(
"unsupported checkpoint version: {}",
checkpoint.version
)));
}
Ok(())
}
/// clean checkpoint file
///
/// called when scanner stops or resets
pub async fn cleanup_checkpoint(&self) -> Result<()> {
// delete main file
if self.checkpoint_file.exists() {
tokio::fs::remove_file(&self.checkpoint_file)
.await
.map_err(|e| Error::IO(format!("delete main checkpoint file failed: {e}")))?;
}
// delete backup file
if self.backup_file.exists() {
tokio::fs::remove_file(&self.backup_file)
.await
.map_err(|e| Error::IO(format!("delete backup checkpoint file failed: {e}")))?;
}
// delete temp file
if self.temp_file.exists() {
tokio::fs::remove_file(&self.temp_file)
.await
.map_err(|e| Error::IO(format!("delete temp checkpoint file failed: {e}")))?;
}
info!("cleaned up all checkpoint files");
Ok(())
}
/// get checkpoint file info
pub async fn get_checkpoint_info(&self) -> Result<Option<CheckpointInfo>> {
if !self.checkpoint_file.exists() {
return Ok(None);
}
let metadata = tokio::fs::metadata(&self.checkpoint_file)
.await
.map_err(|e| Error::IO(format!("get checkpoint file metadata failed: {e}")))?;
let content = tokio::fs::read_to_string(&self.checkpoint_file)
.await
.map_err(|e| Error::IO(format!("read checkpoint file failed: {e}")))?;
let checkpoint_data: CheckpointData =
serde_json::from_str(&content).map_err(|e| Error::Serialization(format!("deserialize checkpoint failed: {e}")))?;
Ok(Some(CheckpointInfo {
file_size: metadata.len(),
last_modified: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
checkpoint_timestamp: checkpoint_data.timestamp,
current_cycle: checkpoint_data.progress.current_cycle,
current_disk_index: checkpoint_data.progress.current_disk_index,
completed_disks_count: checkpoint_data.progress.completed_disks.len(),
is_valid: checkpoint_data.verify_integrity(),
}))
}
/// force save checkpoint (ignore time interval limit)
pub async fn force_save_checkpoint(&self, progress: &ScanProgress) -> Result<()> {
// temporarily reset last save time, force save
*self.last_save.write().await = SystemTime::UNIX_EPOCH;
self.save_checkpoint(progress).await
}
/// set save interval
pub async fn set_save_interval(&mut self, interval: Duration) {
self.save_interval = interval;
info!("checkpoint save interval set to: {:?}", interval);
}
}
/// checkpoint info
#[derive(Debug, Clone)]
pub struct CheckpointInfo {
/// file size
pub file_size: u64,
/// file last modified time
pub last_modified: SystemTime,
/// checkpoint creation time
pub checkpoint_timestamp: SystemTime,
/// current scan cycle
pub current_cycle: u64,
/// current disk index
pub current_disk_index: usize,
/// completed disks count
pub completed_disks_count: usize,
/// checkpoint is valid
pub is_valid: bool,
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,305 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
use tracing::info;
/// Scanner metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScannerMetrics {
/// Total objects scanned since server start
pub objects_scanned: u64,
/// Total object versions scanned since server start
pub versions_scanned: u64,
/// Total directories scanned since server start
pub directories_scanned: u64,
/// Total bucket scans started since server start
pub bucket_scans_started: u64,
/// Total bucket scans finished since server start
pub bucket_scans_finished: u64,
/// Total objects with health issues found
pub objects_with_issues: u64,
/// Total heal tasks queued
pub heal_tasks_queued: u64,
/// Total heal tasks completed
pub heal_tasks_completed: u64,
/// Total heal tasks failed
pub heal_tasks_failed: u64,
/// Total healthy objects found
pub healthy_objects: u64,
/// Total corrupted objects found
pub corrupted_objects: u64,
/// Last scan activity time
pub last_activity: Option<SystemTime>,
/// Current scan cycle
pub current_cycle: u64,
/// Total scan cycles completed
pub total_cycles: u64,
/// Current scan duration
pub current_scan_duration: Option<Duration>,
/// Average scan duration
pub avg_scan_duration: Duration,
/// Objects scanned per second
pub objects_per_second: f64,
/// Buckets scanned per second
pub buckets_per_second: f64,
/// Storage metrics by bucket
pub bucket_metrics: HashMap<String, BucketMetrics>,
/// Disk metrics
pub disk_metrics: HashMap<String, DiskMetrics>,
}
/// Bucket-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BucketMetrics {
/// Bucket name
pub bucket: String,
/// Total objects in bucket
pub total_objects: u64,
/// Total size of objects in bucket (bytes)
pub total_size: u64,
/// Objects with health issues
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Scan duration
pub scan_duration: Option<Duration>,
/// Heal tasks queued for this bucket
pub heal_tasks_queued: u64,
/// Heal tasks completed for this bucket
pub heal_tasks_completed: u64,
/// Heal tasks failed for this bucket
pub heal_tasks_failed: u64,
}
/// Disk-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DiskMetrics {
/// Disk path
pub disk_path: String,
/// Total disk space (bytes)
pub total_space: u64,
/// Used disk space (bytes)
pub used_space: u64,
/// Free disk space (bytes)
pub free_space: u64,
/// Objects scanned on this disk
pub objects_scanned: u64,
/// Objects with issues on this disk
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Whether disk is online
pub is_online: bool,
/// Whether disk is being scanned
pub is_scanning: bool,
}
/// Thread-safe metrics collector
pub struct MetricsCollector {
/// Atomic counters for real-time metrics
objects_scanned: AtomicU64,
versions_scanned: AtomicU64,
directories_scanned: AtomicU64,
bucket_scans_started: AtomicU64,
bucket_scans_finished: AtomicU64,
objects_with_issues: AtomicU64,
heal_tasks_queued: AtomicU64,
heal_tasks_completed: AtomicU64,
heal_tasks_failed: AtomicU64,
current_cycle: AtomicU64,
total_cycles: AtomicU64,
healthy_objects: AtomicU64,
corrupted_objects: AtomicU64,
}
impl MetricsCollector {
/// Create a new metrics collector
pub fn new() -> Self {
Self {
objects_scanned: AtomicU64::new(0),
versions_scanned: AtomicU64::new(0),
directories_scanned: AtomicU64::new(0),
bucket_scans_started: AtomicU64::new(0),
bucket_scans_finished: AtomicU64::new(0),
objects_with_issues: AtomicU64::new(0),
heal_tasks_queued: AtomicU64::new(0),
heal_tasks_completed: AtomicU64::new(0),
heal_tasks_failed: AtomicU64::new(0),
current_cycle: AtomicU64::new(0),
total_cycles: AtomicU64::new(0),
healthy_objects: AtomicU64::new(0),
corrupted_objects: AtomicU64::new(0),
}
}
/// Increment objects scanned count
pub fn increment_objects_scanned(&self, count: u64) {
self.objects_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment versions scanned count
pub fn increment_versions_scanned(&self, count: u64) {
self.versions_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment directories scanned count
pub fn increment_directories_scanned(&self, count: u64) {
self.directories_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans started count
pub fn increment_bucket_scans_started(&self, count: u64) {
self.bucket_scans_started.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans finished count
pub fn increment_bucket_scans_finished(&self, count: u64) {
self.bucket_scans_finished.fetch_add(count, Ordering::Relaxed);
}
/// Increment objects with issues count
pub fn increment_objects_with_issues(&self, count: u64) {
self.objects_with_issues.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks queued count
pub fn increment_heal_tasks_queued(&self, count: u64) {
self.heal_tasks_queued.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks completed count
pub fn increment_heal_tasks_completed(&self, count: u64) {
self.heal_tasks_completed.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks failed count
pub fn increment_heal_tasks_failed(&self, count: u64) {
self.heal_tasks_failed.fetch_add(count, Ordering::Relaxed);
}
/// Set current cycle
pub fn set_current_cycle(&self, cycle: u64) {
self.current_cycle.store(cycle, Ordering::Relaxed);
}
/// Increment total cycles
pub fn increment_total_cycles(&self) {
self.total_cycles.fetch_add(1, Ordering::Relaxed);
}
/// Increment healthy objects count
pub fn increment_healthy_objects(&self) {
self.healthy_objects.fetch_add(1, Ordering::Relaxed);
}
/// Increment corrupted objects count
pub fn increment_corrupted_objects(&self) {
self.corrupted_objects.fetch_add(1, Ordering::Relaxed);
}
/// Get current metrics snapshot
pub fn get_metrics(&self) -> ScannerMetrics {
ScannerMetrics {
objects_scanned: self.objects_scanned.load(Ordering::Relaxed),
versions_scanned: self.versions_scanned.load(Ordering::Relaxed),
directories_scanned: self.directories_scanned.load(Ordering::Relaxed),
bucket_scans_started: self.bucket_scans_started.load(Ordering::Relaxed),
bucket_scans_finished: self.bucket_scans_finished.load(Ordering::Relaxed),
objects_with_issues: self.objects_with_issues.load(Ordering::Relaxed),
heal_tasks_queued: self.heal_tasks_queued.load(Ordering::Relaxed),
heal_tasks_completed: self.heal_tasks_completed.load(Ordering::Relaxed),
heal_tasks_failed: self.heal_tasks_failed.load(Ordering::Relaxed),
healthy_objects: self.healthy_objects.load(Ordering::Relaxed),
corrupted_objects: self.corrupted_objects.load(Ordering::Relaxed),
last_activity: Some(SystemTime::now()),
current_cycle: self.current_cycle.load(Ordering::Relaxed),
total_cycles: self.total_cycles.load(Ordering::Relaxed),
current_scan_duration: None, // Will be set by scanner
avg_scan_duration: Duration::ZERO, // Will be calculated
objects_per_second: 0.0, // Will be calculated
buckets_per_second: 0.0, // Will be calculated
bucket_metrics: HashMap::new(), // Will be populated by scanner
disk_metrics: HashMap::new(), // Will be populated by scanner
}
}
/// Reset all metrics
pub fn reset(&self) {
self.objects_scanned.store(0, Ordering::Relaxed);
self.versions_scanned.store(0, Ordering::Relaxed);
self.directories_scanned.store(0, Ordering::Relaxed);
self.bucket_scans_started.store(0, Ordering::Relaxed);
self.bucket_scans_finished.store(0, Ordering::Relaxed);
self.objects_with_issues.store(0, Ordering::Relaxed);
self.heal_tasks_queued.store(0, Ordering::Relaxed);
self.heal_tasks_completed.store(0, Ordering::Relaxed);
self.heal_tasks_failed.store(0, Ordering::Relaxed);
self.current_cycle.store(0, Ordering::Relaxed);
self.total_cycles.store(0, Ordering::Relaxed);
self.healthy_objects.store(0, Ordering::Relaxed);
self.corrupted_objects.store(0, Ordering::Relaxed);
info!("Scanner metrics reset");
}
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_collector_creation() {
let collector = MetricsCollector::new();
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
assert_eq!(metrics.versions_scanned, 0);
}
#[test]
fn test_metrics_increment() {
let collector = MetricsCollector::new();
collector.increment_objects_scanned(10);
collector.increment_versions_scanned(5);
collector.increment_objects_with_issues(2);
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 10);
assert_eq!(metrics.versions_scanned, 5);
assert_eq!(metrics.objects_with_issues, 2);
}
#[test]
fn test_metrics_reset() {
let collector = MetricsCollector::new();
collector.increment_objects_scanned(10);
collector.reset();
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
}
}

View File

@@ -1,555 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::Result;
use crate::scanner::LoadLevel;
use serde::{Deserialize, Serialize};
use std::{
collections::VecDeque,
sync::{
Arc,
atomic::{AtomicU64, Ordering},
},
time::{Duration, SystemTime},
};
use tokio::sync::RwLock;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn};
/// IO monitor config
#[derive(Debug, Clone)]
pub struct IOMonitorConfig {
/// monitor interval
pub monitor_interval: Duration,
/// history data retention time
pub history_retention: Duration,
/// load evaluation window size
pub load_window_size: usize,
/// whether to enable actual system monitoring
pub enable_system_monitoring: bool,
/// disk path list (for monitoring specific disks)
pub disk_paths: Vec<String>,
}
impl Default for IOMonitorConfig {
fn default() -> Self {
Self {
monitor_interval: Duration::from_secs(1), // 1 second monitor interval
history_retention: Duration::from_secs(300), // keep 5 minutes history
load_window_size: 30, // 30 sample points sliding window
enable_system_monitoring: false, // default use simulated data
disk_paths: Vec::new(),
}
}
}
/// IO monitor metrics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IOMetrics {
/// timestamp
pub timestamp: SystemTime,
/// disk IOPS (read + write)
pub iops: u64,
/// read IOPS
pub read_iops: u64,
/// write IOPS
pub write_iops: u64,
/// disk queue depth
pub queue_depth: u64,
/// average latency (milliseconds)
pub avg_latency: u64,
/// read latency (milliseconds)
pub read_latency: u64,
/// write latency (milliseconds)
pub write_latency: u64,
/// CPU usage (0-100)
pub cpu_usage: u8,
/// memory usage (0-100)
pub memory_usage: u8,
/// disk usage (0-100)
pub disk_utilization: u8,
/// network IO (Mbps)
pub network_io: u64,
}
impl Default for IOMetrics {
fn default() -> Self {
Self {
timestamp: SystemTime::now(),
iops: 0,
read_iops: 0,
write_iops: 0,
queue_depth: 0,
avg_latency: 0,
read_latency: 0,
write_latency: 0,
cpu_usage: 0,
memory_usage: 0,
disk_utilization: 0,
network_io: 0,
}
}
}
/// load level stats
#[derive(Debug, Clone, Default)]
pub struct LoadLevelStats {
/// low load duration (seconds)
pub low_load_duration: u64,
/// medium load duration (seconds)
pub medium_load_duration: u64,
/// high load duration (seconds)
pub high_load_duration: u64,
/// critical load duration (seconds)
pub critical_load_duration: u64,
/// load transitions
pub load_transitions: u64,
}
/// advanced IO monitor
pub struct AdvancedIOMonitor {
/// config
config: Arc<RwLock<IOMonitorConfig>>,
/// current metrics
current_metrics: Arc<RwLock<IOMetrics>>,
/// history metrics (sliding window)
history_metrics: Arc<RwLock<VecDeque<IOMetrics>>>,
/// current load level
current_load_level: Arc<RwLock<LoadLevel>>,
/// load level history
load_level_history: Arc<RwLock<VecDeque<(SystemTime, LoadLevel)>>>,
/// load level stats
load_stats: Arc<RwLock<LoadLevelStats>>,
/// business IO metrics (updated by external)
business_metrics: Arc<BusinessIOMetrics>,
/// cancel token
cancel_token: CancellationToken,
}
/// business IO metrics
pub struct BusinessIOMetrics {
/// business request latency (milliseconds)
pub request_latency: AtomicU64,
/// business request QPS
pub request_qps: AtomicU64,
/// business error rate (0-10000, 0.00%-100.00%)
pub error_rate: AtomicU64,
/// active connections
pub active_connections: AtomicU64,
/// last update time
pub last_update: Arc<RwLock<SystemTime>>,
}
impl Default for BusinessIOMetrics {
fn default() -> Self {
Self {
request_latency: AtomicU64::new(0),
request_qps: AtomicU64::new(0),
error_rate: AtomicU64::new(0),
active_connections: AtomicU64::new(0),
last_update: Arc::new(RwLock::new(SystemTime::UNIX_EPOCH)),
}
}
}
impl AdvancedIOMonitor {
/// create new advanced IO monitor
pub fn new(config: IOMonitorConfig) -> Self {
Self {
config: Arc::new(RwLock::new(config)),
current_metrics: Arc::new(RwLock::new(IOMetrics::default())),
history_metrics: Arc::new(RwLock::new(VecDeque::new())),
current_load_level: Arc::new(RwLock::new(LoadLevel::Low)),
load_level_history: Arc::new(RwLock::new(VecDeque::new())),
load_stats: Arc::new(RwLock::new(LoadLevelStats::default())),
business_metrics: Arc::new(BusinessIOMetrics::default()),
cancel_token: CancellationToken::new(),
}
}
/// start monitoring
pub async fn start(&self) -> Result<()> {
info!("start advanced IO monitor");
let monitor = self.clone_for_background();
tokio::spawn(async move {
if let Err(e) = monitor.monitoring_loop().await {
error!("IO monitoring loop failed: {}", e);
}
});
Ok(())
}
/// stop monitoring
pub async fn stop(&self) {
info!("stop IO monitor");
self.cancel_token.cancel();
}
/// monitoring loop
async fn monitoring_loop(&self) -> Result<()> {
let mut interval = {
let config = self.config.read().await;
tokio::time::interval(config.monitor_interval)
};
let mut last_load_level = LoadLevel::Low;
let mut load_level_start_time = SystemTime::now();
loop {
tokio::select! {
_ = self.cancel_token.cancelled() => {
info!("IO monitoring loop cancelled");
break;
}
_ = interval.tick() => {
// collect system metrics
let metrics = self.collect_system_metrics().await;
// update current metrics
*self.current_metrics.write().await = metrics.clone();
// update history metrics
self.update_metrics_history(metrics.clone()).await;
// calculate load level
let new_load_level = self.calculate_load_level(&metrics).await;
// check if load level changed
if new_load_level != last_load_level {
self.handle_load_level_change(last_load_level, new_load_level, load_level_start_time).await;
last_load_level = new_load_level;
load_level_start_time = SystemTime::now();
}
// update current load level
*self.current_load_level.write().await = new_load_level;
debug!("IO monitor updated: IOPS={}, queue depth={}, latency={}ms, load level={:?}",
metrics.iops, metrics.queue_depth, metrics.avg_latency, new_load_level);
}
}
}
Ok(())
}
/// collect system metrics
async fn collect_system_metrics(&self) -> IOMetrics {
let config = self.config.read().await;
if config.enable_system_monitoring {
// actual system monitoring implementation
self.collect_real_system_metrics().await
} else {
// simulated data
self.generate_simulated_metrics().await
}
}
/// collect real system metrics (need to be implemented according to specific system)
async fn collect_real_system_metrics(&self) -> IOMetrics {
// TODO: implement actual system metrics collection
// can use procfs, sysfs or other system API
let metrics = IOMetrics {
timestamp: SystemTime::now(),
..Default::default()
};
// example: read /proc/diskstats
if let Ok(diskstats) = tokio::fs::read_to_string("/proc/diskstats").await {
// parse disk stats info
// here need to implement specific parsing logic
debug!("read disk stats info: {} bytes", diskstats.len());
}
// example: read /proc/stat to get CPU info
if let Ok(stat) = tokio::fs::read_to_string("/proc/stat").await {
// parse CPU stats info
debug!("read CPU stats info: {} bytes", stat.len());
}
// example: read /proc/meminfo to get memory info
if let Ok(meminfo) = tokio::fs::read_to_string("/proc/meminfo").await {
// parse memory stats info
debug!("read memory stats info: {} bytes", meminfo.len());
}
metrics
}
/// generate simulated metrics (for testing and development)
async fn generate_simulated_metrics(&self) -> IOMetrics {
use rand::Rng;
let mut rng = rand::rng();
// get business metrics impact
let business_latency = self.business_metrics.request_latency.load(Ordering::Relaxed);
let business_qps = self.business_metrics.request_qps.load(Ordering::Relaxed);
// generate simulated system metrics based on business load
let base_iops = 100 + (business_qps / 10);
let base_latency = 5 + (business_latency / 10);
IOMetrics {
timestamp: SystemTime::now(),
iops: base_iops + rng.random_range(0..50),
read_iops: (base_iops * 6 / 10) + rng.random_range(0..20),
write_iops: (base_iops * 4 / 10) + rng.random_range(0..20),
queue_depth: rng.random_range(1..20),
avg_latency: base_latency + rng.random_range(0..10),
read_latency: base_latency + rng.random_range(0..5),
write_latency: base_latency + rng.random_range(0..15),
cpu_usage: rng.random_range(10..70),
memory_usage: rng.random_range(30..80),
disk_utilization: rng.random_range(20..90),
network_io: rng.random_range(10..1000),
}
}
/// update metrics history
async fn update_metrics_history(&self, metrics: IOMetrics) {
let mut history = self.history_metrics.write().await;
let config = self.config.read().await;
// add new metrics
history.push_back(metrics);
// clean expired data
let retention_cutoff = SystemTime::now() - config.history_retention;
while let Some(front) = history.front() {
if front.timestamp < retention_cutoff {
history.pop_front();
} else {
break;
}
}
// limit window size
while history.len() > config.load_window_size {
history.pop_front();
}
}
/// calculate load level
async fn calculate_load_level(&self, metrics: &IOMetrics) -> LoadLevel {
// multi-dimensional load evaluation algorithm
let mut load_score = 0u32;
// IOPS load evaluation (weight: 25%)
let iops_score = match metrics.iops {
0..=200 => 0,
201..=500 => 15,
501..=1000 => 25,
_ => 35,
};
load_score += iops_score;
// latency load evaluation (weight: 30%)
let latency_score = match metrics.avg_latency {
0..=10 => 0,
11..=50 => 20,
51..=100 => 30,
_ => 40,
};
load_score += latency_score;
// queue depth evaluation (weight: 20%)
let queue_score = match metrics.queue_depth {
0..=5 => 0,
6..=15 => 10,
16..=30 => 20,
_ => 25,
};
load_score += queue_score;
// CPU usage evaluation (weight: 15%)
let cpu_score = match metrics.cpu_usage {
0..=30 => 0,
31..=60 => 8,
61..=80 => 12,
_ => 15,
};
load_score += cpu_score;
// disk usage evaluation (weight: 10%)
let disk_score = match metrics.disk_utilization {
0..=50 => 0,
51..=75 => 5,
76..=90 => 8,
_ => 10,
};
load_score += disk_score;
// business metrics impact
let business_latency = self.business_metrics.request_latency.load(Ordering::Relaxed);
let business_error_rate = self.business_metrics.error_rate.load(Ordering::Relaxed);
if business_latency > 100 {
load_score += 20; // business latency too high
}
if business_error_rate > 100 {
// > 1%
load_score += 15; // business error rate too high
}
// history trend analysis
let trend_score = self.calculate_trend_score().await;
load_score += trend_score;
// determine load level based on total score
match load_score {
0..=30 => LoadLevel::Low,
31..=60 => LoadLevel::Medium,
61..=90 => LoadLevel::High,
_ => LoadLevel::Critical,
}
}
/// calculate trend score
async fn calculate_trend_score(&self) -> u32 {
let history = self.history_metrics.read().await;
if history.len() < 5 {
return 0; // data insufficient, cannot analyze trend
}
// analyze trend of last 5 samples
let recent: Vec<_> = history.iter().rev().take(5).collect();
// check IOPS rising trend
let mut iops_trend = 0;
for i in 1..recent.len() {
if recent[i - 1].iops > recent[i].iops {
iops_trend += 1;
}
}
// check latency rising trend
let mut latency_trend = 0;
for i in 1..recent.len() {
if recent[i - 1].avg_latency > recent[i].avg_latency {
latency_trend += 1;
}
}
// if IOPS and latency are both rising, increase load score
if iops_trend >= 3 && latency_trend >= 3 {
15 // obvious rising trend
} else if iops_trend >= 2 || latency_trend >= 2 {
5 // slight rising trend
} else {
0 // no obvious trend
}
}
/// handle load level change
async fn handle_load_level_change(&self, old_level: LoadLevel, new_level: LoadLevel, start_time: SystemTime) {
let duration = SystemTime::now().duration_since(start_time).unwrap_or(Duration::ZERO);
// update stats
{
let mut stats = self.load_stats.write().await;
match old_level {
LoadLevel::Low => stats.low_load_duration += duration.as_secs(),
LoadLevel::Medium => stats.medium_load_duration += duration.as_secs(),
LoadLevel::High => stats.high_load_duration += duration.as_secs(),
LoadLevel::Critical => stats.critical_load_duration += duration.as_secs(),
}
stats.load_transitions += 1;
}
// update history
{
let mut history = self.load_level_history.write().await;
history.push_back((SystemTime::now(), new_level));
// keep history record in reasonable range
while history.len() > 100 {
history.pop_front();
}
}
info!("load level changed: {:?} -> {:?}, duration: {:?}", old_level, new_level, duration);
// if enter critical load state, record warning
if new_level == LoadLevel::Critical {
warn!("system entered critical load state, Scanner will pause running");
}
}
/// get current load level
pub async fn get_business_load_level(&self) -> LoadLevel {
*self.current_load_level.read().await
}
/// get current metrics
pub async fn get_current_metrics(&self) -> IOMetrics {
self.current_metrics.read().await.clone()
}
/// get history metrics
pub async fn get_history_metrics(&self) -> Vec<IOMetrics> {
self.history_metrics.read().await.iter().cloned().collect()
}
/// get load stats
pub async fn get_load_stats(&self) -> LoadLevelStats {
self.load_stats.read().await.clone()
}
/// update business IO metrics
pub async fn update_business_metrics(&self, latency: u64, qps: u64, error_rate: u64, connections: u64) {
self.business_metrics.request_latency.store(latency, Ordering::Relaxed);
self.business_metrics.request_qps.store(qps, Ordering::Relaxed);
self.business_metrics.error_rate.store(error_rate, Ordering::Relaxed);
self.business_metrics.active_connections.store(connections, Ordering::Relaxed);
*self.business_metrics.last_update.write().await = SystemTime::now();
debug!(
"update business metrics: latency={}ms, QPS={}, error rate={}‰, connections={}",
latency, qps, error_rate, connections
);
}
/// clone for background task
fn clone_for_background(&self) -> Self {
Self {
config: self.config.clone(),
current_metrics: self.current_metrics.clone(),
history_metrics: self.history_metrics.clone(),
current_load_level: self.current_load_level.clone(),
load_level_history: self.load_level_history.clone(),
load_stats: self.load_stats.clone(),
business_metrics: self.business_metrics.clone(),
cancel_token: self.cancel_token.clone(),
}
}
/// reset stats
pub async fn reset_stats(&self) {
*self.load_stats.write().await = LoadLevelStats::default();
self.load_level_history.write().await.clear();
self.history_metrics.write().await.clear();
info!("IO monitor stats reset");
}
/// get load level history
pub async fn get_load_level_history(&self) -> Vec<(SystemTime, LoadLevel)> {
self.load_level_history.read().await.iter().cloned().collect()
}
}

View File

@@ -1,499 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::scanner::LoadLevel;
use std::{
sync::{
Arc,
atomic::{AtomicU8, AtomicU64, Ordering},
},
time::{Duration, SystemTime},
};
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
/// IO throttler config
#[derive(Debug, Clone)]
pub struct IOThrottlerConfig {
/// max IOPS limit
pub max_iops: u64,
/// business priority baseline (percentage)
pub base_business_priority: u8,
/// scanner minimum delay (milliseconds)
pub min_scan_delay: u64,
/// scanner maximum delay (milliseconds)
pub max_scan_delay: u64,
/// whether enable dynamic adjustment
pub enable_dynamic_adjustment: bool,
/// adjustment response time (seconds)
pub adjustment_response_time: u64,
}
impl Default for IOThrottlerConfig {
fn default() -> Self {
Self {
max_iops: 1000, // default max 1000 IOPS
base_business_priority: 95, // business priority 95%
min_scan_delay: 5000, // minimum 5s delay
max_scan_delay: 60000, // maximum 60s delay
enable_dynamic_adjustment: true,
adjustment_response_time: 5, // 5 seconds response time
}
}
}
/// resource allocation strategy
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResourceAllocationStrategy {
/// business priority strategy
BusinessFirst,
/// balanced strategy
Balanced,
/// maintenance priority strategy (only used in special cases)
MaintenanceFirst,
}
/// throttle decision
#[derive(Debug, Clone)]
pub struct ThrottleDecision {
/// whether should pause scanning
pub should_pause: bool,
/// suggested scanning delay
pub suggested_delay: Duration,
/// resource allocation suggestion
pub resource_allocation: ResourceAllocation,
/// decision reason
pub reason: String,
}
/// resource allocation
#[derive(Debug, Clone)]
pub struct ResourceAllocation {
/// business IO allocation percentage (0-100)
pub business_percentage: u8,
/// scanner IO allocation percentage (0-100)
pub scanner_percentage: u8,
/// allocation strategy
pub strategy: ResourceAllocationStrategy,
}
/// enhanced IO throttler
///
/// dynamically adjust the resource usage of the scanner based on real-time system load and business demand,
/// ensure business IO gets priority protection.
pub struct AdvancedIOThrottler {
/// config
config: Arc<RwLock<IOThrottlerConfig>>,
/// current IOPS usage (reserved field)
#[allow(dead_code)]
current_iops: Arc<AtomicU64>,
/// business priority weight (0-100)
business_priority: Arc<AtomicU8>,
/// scanning operation delay (milliseconds)
scan_delay: Arc<AtomicU64>,
/// resource allocation strategy
allocation_strategy: Arc<RwLock<ResourceAllocationStrategy>>,
/// throttle history record
throttle_history: Arc<RwLock<Vec<ThrottleRecord>>>,
/// last adjustment time (reserved field)
#[allow(dead_code)]
last_adjustment: Arc<RwLock<SystemTime>>,
}
/// throttle record
#[derive(Debug, Clone)]
pub struct ThrottleRecord {
/// timestamp
pub timestamp: SystemTime,
/// load level
pub load_level: LoadLevel,
/// decision
pub decision: ThrottleDecision,
/// system metrics snapshot
pub metrics_snapshot: MetricsSnapshot,
}
/// metrics snapshot
#[derive(Debug, Clone)]
pub struct MetricsSnapshot {
/// IOPS
pub iops: u64,
/// latency
pub latency: u64,
/// CPU usage
pub cpu_usage: u8,
/// memory usage
pub memory_usage: u8,
}
impl AdvancedIOThrottler {
/// create new advanced IO throttler
pub fn new(config: IOThrottlerConfig) -> Self {
Self {
config: Arc::new(RwLock::new(config)),
current_iops: Arc::new(AtomicU64::new(0)),
business_priority: Arc::new(AtomicU8::new(95)),
scan_delay: Arc::new(AtomicU64::new(5000)),
allocation_strategy: Arc::new(RwLock::new(ResourceAllocationStrategy::BusinessFirst)),
throttle_history: Arc::new(RwLock::new(Vec::new())),
last_adjustment: Arc::new(RwLock::new(SystemTime::UNIX_EPOCH)),
}
}
/// adjust scanning delay based on load level
pub async fn adjust_for_load_level(&self, load_level: LoadLevel) -> Duration {
let config = self.config.read().await;
let delay_ms = match load_level {
LoadLevel::Low => {
// low load: use minimum delay
self.scan_delay.store(config.min_scan_delay, Ordering::Relaxed);
self.business_priority
.store(config.base_business_priority.saturating_sub(5), Ordering::Relaxed);
config.min_scan_delay
}
LoadLevel::Medium => {
// medium load: increase delay moderately
let delay = config.min_scan_delay * 5; // 500ms
self.scan_delay.store(delay, Ordering::Relaxed);
self.business_priority.store(config.base_business_priority, Ordering::Relaxed);
delay
}
LoadLevel::High => {
// high load: increase delay significantly
let delay = config.min_scan_delay * 10; // 50s
self.scan_delay.store(delay, Ordering::Relaxed);
self.business_priority
.store(config.base_business_priority.saturating_add(3), Ordering::Relaxed);
delay
}
LoadLevel::Critical => {
// critical load: maximum delay or pause
let delay = config.max_scan_delay; // 60s
self.scan_delay.store(delay, Ordering::Relaxed);
self.business_priority.store(99, Ordering::Relaxed);
delay
}
};
let duration = Duration::from_millis(delay_ms);
debug!("Adjust scanning delay based on load level {:?}: {:?}", load_level, duration);
duration
}
/// create throttle decision
pub async fn make_throttle_decision(&self, load_level: LoadLevel, metrics: Option<MetricsSnapshot>) -> ThrottleDecision {
let _config = self.config.read().await;
let should_pause = matches!(load_level, LoadLevel::Critical);
let suggested_delay = self.adjust_for_load_level(load_level).await;
let resource_allocation = self.calculate_resource_allocation(load_level).await;
let reason = match load_level {
LoadLevel::Low => "system load is low, scanner can run normally".to_string(),
LoadLevel::Medium => "system load is moderate, scanner is running at reduced speed".to_string(),
LoadLevel::High => "system load is high, scanner is running at significantly reduced speed".to_string(),
LoadLevel::Critical => "system load is too high, scanner is paused".to_string(),
};
let decision = ThrottleDecision {
should_pause,
suggested_delay,
resource_allocation,
reason,
};
// record decision history
if let Some(snapshot) = metrics {
self.record_throttle_decision(load_level, decision.clone(), snapshot).await;
}
decision
}
/// calculate resource allocation
async fn calculate_resource_allocation(&self, load_level: LoadLevel) -> ResourceAllocation {
let strategy = *self.allocation_strategy.read().await;
let (business_pct, scanner_pct) = match (strategy, load_level) {
(ResourceAllocationStrategy::BusinessFirst, LoadLevel::Low) => (90, 10),
(ResourceAllocationStrategy::BusinessFirst, LoadLevel::Medium) => (95, 5),
(ResourceAllocationStrategy::BusinessFirst, LoadLevel::High) => (98, 2),
(ResourceAllocationStrategy::BusinessFirst, LoadLevel::Critical) => (99, 1),
(ResourceAllocationStrategy::Balanced, LoadLevel::Low) => (80, 20),
(ResourceAllocationStrategy::Balanced, LoadLevel::Medium) => (85, 15),
(ResourceAllocationStrategy::Balanced, LoadLevel::High) => (90, 10),
(ResourceAllocationStrategy::Balanced, LoadLevel::Critical) => (95, 5),
(ResourceAllocationStrategy::MaintenanceFirst, _) => (70, 30), // special maintenance mode
};
ResourceAllocation {
business_percentage: business_pct,
scanner_percentage: scanner_pct,
strategy,
}
}
/// check whether should pause scanning
pub async fn should_pause_scanning(&self, load_level: LoadLevel) -> bool {
match load_level {
LoadLevel::Critical => {
warn!("System load reached critical level, pausing scanner");
true
}
_ => false,
}
}
/// record throttle decision
async fn record_throttle_decision(&self, load_level: LoadLevel, decision: ThrottleDecision, metrics: MetricsSnapshot) {
let record = ThrottleRecord {
timestamp: SystemTime::now(),
load_level,
decision,
metrics_snapshot: metrics,
};
let mut history = self.throttle_history.write().await;
history.push(record);
// keep history record in reasonable range (last 1000 records)
while history.len() > 1000 {
history.remove(0);
}
}
/// set resource allocation strategy
pub async fn set_allocation_strategy(&self, strategy: ResourceAllocationStrategy) {
*self.allocation_strategy.write().await = strategy;
info!("Set resource allocation strategy: {:?}", strategy);
}
/// get current resource allocation
pub async fn get_current_allocation(&self) -> ResourceAllocation {
let current_load = LoadLevel::Low; // need to get from external
self.calculate_resource_allocation(current_load).await
}
/// get throttle history
pub async fn get_throttle_history(&self) -> Vec<ThrottleRecord> {
self.throttle_history.read().await.clone()
}
/// get throttle stats
pub async fn get_throttle_stats(&self) -> ThrottleStats {
let history = self.throttle_history.read().await;
let total_decisions = history.len();
let pause_decisions = history.iter().filter(|r| r.decision.should_pause).count();
let mut delay_sum = Duration::ZERO;
for record in history.iter() {
delay_sum += record.decision.suggested_delay;
}
let avg_delay = if total_decisions > 0 {
delay_sum / total_decisions as u32
} else {
Duration::ZERO
};
// count by load level
let low_count = history.iter().filter(|r| r.load_level == LoadLevel::Low).count();
let medium_count = history.iter().filter(|r| r.load_level == LoadLevel::Medium).count();
let high_count = history.iter().filter(|r| r.load_level == LoadLevel::High).count();
let critical_count = history.iter().filter(|r| r.load_level == LoadLevel::Critical).count();
ThrottleStats {
total_decisions,
pause_decisions,
average_delay: avg_delay,
load_level_distribution: LoadLevelDistribution {
low_count,
medium_count,
high_count,
critical_count,
},
}
}
/// reset throttle history
pub async fn reset_history(&self) {
self.throttle_history.write().await.clear();
info!("Reset throttle history");
}
/// update config
pub async fn update_config(&self, new_config: IOThrottlerConfig) {
*self.config.write().await = new_config;
info!("Updated IO throttler configuration");
}
/// get current scanning delay
pub fn get_current_scan_delay(&self) -> Duration {
let delay_ms = self.scan_delay.load(Ordering::Relaxed);
Duration::from_millis(delay_ms)
}
/// get current business priority
pub fn get_current_business_priority(&self) -> u8 {
self.business_priority.load(Ordering::Relaxed)
}
/// simulate business load pressure test
pub async fn simulate_business_pressure(&self, duration: Duration) -> SimulationResult {
info!("Start simulating business load pressure test, duration: {:?}", duration);
let start_time = SystemTime::now();
let mut simulation_records = Vec::new();
// simulate different load level changes
let load_levels = [
LoadLevel::Low,
LoadLevel::Medium,
LoadLevel::High,
LoadLevel::Critical,
LoadLevel::High,
LoadLevel::Medium,
LoadLevel::Low,
];
let step_duration = duration / load_levels.len() as u32;
for (i, &load_level) in load_levels.iter().enumerate() {
let _step_start = SystemTime::now();
// simulate metrics for this load level
let metrics = MetricsSnapshot {
iops: match load_level {
LoadLevel::Low => 200,
LoadLevel::Medium => 500,
LoadLevel::High => 800,
LoadLevel::Critical => 1200,
},
latency: match load_level {
LoadLevel::Low => 10,
LoadLevel::Medium => 25,
LoadLevel::High => 60,
LoadLevel::Critical => 150,
},
cpu_usage: match load_level {
LoadLevel::Low => 30,
LoadLevel::Medium => 50,
LoadLevel::High => 75,
LoadLevel::Critical => 95,
},
memory_usage: match load_level {
LoadLevel::Low => 40,
LoadLevel::Medium => 60,
LoadLevel::High => 80,
LoadLevel::Critical => 90,
},
};
let decision = self.make_throttle_decision(load_level, Some(metrics.clone())).await;
simulation_records.push(SimulationRecord {
step: i + 1,
load_level,
metrics,
decision: decision.clone(),
step_duration,
});
info!(
"simulate step {}: load={:?}, delay={:?}, pause={}",
i + 1,
load_level,
decision.suggested_delay,
decision.should_pause
);
// wait for step duration
tokio::time::sleep(step_duration).await;
}
let total_duration = SystemTime::now().duration_since(start_time).unwrap_or(Duration::ZERO);
SimulationResult {
total_duration,
simulation_records,
final_stats: self.get_throttle_stats().await,
}
}
}
/// throttle stats
#[derive(Debug, Clone)]
pub struct ThrottleStats {
/// total decisions
pub total_decisions: usize,
/// pause decisions
pub pause_decisions: usize,
/// average delay
pub average_delay: Duration,
/// load level distribution
pub load_level_distribution: LoadLevelDistribution,
}
/// load level distribution
#[derive(Debug, Clone)]
pub struct LoadLevelDistribution {
/// low load count
pub low_count: usize,
/// medium load count
pub medium_count: usize,
/// high load count
pub high_count: usize,
/// critical load count
pub critical_count: usize,
}
/// simulation result
#[derive(Debug, Clone)]
pub struct SimulationResult {
/// total duration
pub total_duration: Duration,
/// simulation records
pub simulation_records: Vec<SimulationRecord>,
/// final stats
pub final_stats: ThrottleStats,
}
/// simulation record
#[derive(Debug, Clone)]
pub struct SimulationRecord {
/// step number
pub step: usize,
/// load level
pub load_level: LoadLevel,
/// metrics snapshot
pub metrics: MetricsSnapshot,
/// throttle decision
pub decision: ThrottleDecision,
/// step duration
pub step_duration: Duration,
}
impl Default for AdvancedIOThrottler {
fn default() -> Self {
Self::new(IOThrottlerConfig::default())
}
}

View File

@@ -1,269 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::Result;
use rustfs_common::data_usage::SizeSummary;
use rustfs_common::metrics::IlmAction;
use rustfs_ecstore::bucket::{
lifecycle::{
bucket_lifecycle_audit::LcEventSrc,
bucket_lifecycle_ops::{GLOBAL_ExpiryState, apply_lifecycle_action, eval_action_from_lifecycle},
lifecycle,
lifecycle::Lifecycle,
},
metadata_sys::get_object_lock_config,
object_lock::objectlock_sys::{BucketObjectLockSys, check_object_lock_for_deletion},
versioning::VersioningApi,
versioning_sys::BucketVersioningSys,
};
use rustfs_ecstore::store_api::{ObjectInfo, ObjectToDelete};
use rustfs_filemeta::FileInfo;
use s3s::dto::{BucketLifecycleConfiguration as LifecycleConfig, VersioningConfiguration};
use std::sync::{
Arc,
atomic::{AtomicU64, Ordering},
};
use time::OffsetDateTime;
use tracing::info;
static SCANNER_EXCESS_OBJECT_VERSIONS: AtomicU64 = AtomicU64::new(100);
static SCANNER_EXCESS_OBJECT_VERSIONS_TOTAL_SIZE: AtomicU64 = AtomicU64::new(1024 * 1024 * 1024 * 1024); // 1 TB
#[derive(Clone)]
pub struct ScannerItem {
pub bucket: String,
pub object_name: String,
pub lifecycle: Option<Arc<LifecycleConfig>>,
pub versioning: Option<Arc<VersioningConfiguration>>,
}
impl ScannerItem {
pub fn new(
bucket: String,
lifecycle: Option<Arc<LifecycleConfig>>,
versioning: Option<Arc<VersioningConfiguration>>,
) -> Self {
Self {
bucket,
object_name: "".to_string(),
lifecycle,
versioning,
}
}
pub async fn apply_versions_actions(&self, fivs: &[FileInfo]) -> Result<Vec<ObjectInfo>> {
let obj_infos = self.apply_newer_noncurrent_version_limit(fivs).await?;
if obj_infos.len() >= SCANNER_EXCESS_OBJECT_VERSIONS.load(Ordering::SeqCst) as usize {
// todo
}
let mut cumulative_size = 0;
for obj_info in obj_infos.iter() {
cumulative_size += obj_info.size;
}
if cumulative_size >= SCANNER_EXCESS_OBJECT_VERSIONS_TOTAL_SIZE.load(Ordering::SeqCst) as i64 {
//todo
}
Ok(obj_infos)
}
pub async fn apply_newer_noncurrent_version_limit(&self, fivs: &[FileInfo]) -> Result<Vec<ObjectInfo>> {
let lock_enabled = if let Some(rcfg) = BucketObjectLockSys::get(&self.bucket).await {
rcfg.mode.is_some()
} else {
false
};
let _vcfg = BucketVersioningSys::get(&self.bucket).await?;
let versioned = match BucketVersioningSys::get(&self.bucket).await {
Ok(vcfg) => vcfg.versioned(&self.object_name),
Err(_) => false,
};
let mut object_infos = Vec::with_capacity(fivs.len());
if self.lifecycle.is_none() {
for info in fivs.iter() {
object_infos.push(ObjectInfo::from_file_info(info, &self.bucket, &self.object_name, versioned));
}
return Ok(object_infos);
}
let event = self
.lifecycle
.as_ref()
.expect("lifecycle err.")
.clone()
.noncurrent_versions_expiration_limit(&lifecycle::ObjectOpts {
name: self.object_name.clone(),
..Default::default()
})
.await;
let lim = event.newer_noncurrent_versions;
if lim == 0 || fivs.len() <= lim + 1 {
for fi in fivs.iter() {
object_infos.push(ObjectInfo::from_file_info(fi, &self.bucket, &self.object_name, versioned));
}
return Ok(object_infos);
}
let overflow_versions = &fivs[lim + 1..];
for fi in fivs[..lim + 1].iter() {
object_infos.push(ObjectInfo::from_file_info(fi, &self.bucket, &self.object_name, versioned));
}
let mut to_del = Vec::<ObjectToDelete>::with_capacity(overflow_versions.len());
for fi in overflow_versions.iter() {
let obj = ObjectInfo::from_file_info(fi, &self.bucket, &self.object_name, versioned);
// Lifecycle operations should never bypass governance retention
if lock_enabled && check_object_lock_for_deletion(&self.bucket, &obj, false).await.is_some() {
/*if self.debug {
if obj.version_id.is_some() {
info!("lifecycle: {} v({}) is locked, not deleting\n", obj.name, obj.version_id.expect("err"));
} else {
info!("lifecycle: {} is locked, not deleting\n", obj.name);
}
}*/
object_infos.push(obj);
continue;
}
if OffsetDateTime::now_utc().unix_timestamp()
< lifecycle::expected_expiry_time(obj.successor_mod_time.expect("err"), event.noncurrent_days as i32)
.unix_timestamp()
{
object_infos.push(obj);
continue;
}
to_del.push(ObjectToDelete {
object_name: obj.name,
version_id: obj.version_id,
..Default::default()
});
}
if !to_del.is_empty() {
let mut expiry_state = GLOBAL_ExpiryState.write().await;
expiry_state.enqueue_by_newer_noncurrent(&self.bucket, to_del, event).await;
}
Ok(object_infos)
}
pub async fn apply_actions(&mut self, oi: &ObjectInfo, _size_s: &mut SizeSummary) -> (bool, i64) {
let (action, _size) = self.apply_lifecycle(oi).await;
info!(
"apply_actions {} {} {:?} {:?}",
oi.bucket.clone(),
oi.name.clone(),
oi.version_id.clone(),
oi.user_defined.clone()
);
// Create a mutable clone if you need to modify fields
/*let mut oi = oi.clone();
oi.replication_status = ReplicationStatusType::from(
oi.user_defined
.get("x-amz-bucket-replication-status")
.unwrap_or(&"PENDING".to_string()),
);
info!("apply status is: {:?}", oi.replication_status);
self.heal_replication(&oi, _size_s).await;*/
if action.delete_all() {
return (true, 0);
}
(false, oi.size)
}
async fn apply_lifecycle(&mut self, oi: &ObjectInfo) -> (IlmAction, i64) {
let size = oi.size;
if self.lifecycle.is_none() {
info!("apply_lifecycle: No lifecycle config for object: {}", oi.name);
return (IlmAction::NoneAction, size);
}
info!("apply_lifecycle: Lifecycle config exists for object: {}", oi.name);
let (olcfg, rcfg) = if self.bucket != ".minio.sys" {
(
get_object_lock_config(&self.bucket).await.ok(),
None, // FIXME: replication config
)
} else {
(None, None)
};
info!("apply_lifecycle: Evaluating lifecycle for object: {}", oi.name);
let lifecycle = match self.lifecycle.as_ref() {
Some(lc) => lc,
None => {
info!("No lifecycle configuration found for object: {}", oi.name);
return (IlmAction::NoneAction, 0);
}
};
let lc_evt = eval_action_from_lifecycle(
lifecycle,
olcfg
.as_ref()
.and_then(|(c, _)| c.rule.as_ref().and_then(|r| r.default_retention.clone())),
rcfg.clone(),
oi, // Pass oi directly
)
.await;
info!("lifecycle: {} Initial scan: {} (action: {:?})", oi.name, lc_evt.action, lc_evt.action);
let mut new_size = size;
match lc_evt.action {
IlmAction::DeleteVersionAction | IlmAction::DeleteAllVersionsAction | IlmAction::DelMarkerDeleteAllVersionsAction => {
info!("apply_lifecycle: Object {} marked for version deletion, new_size=0", oi.name);
new_size = 0;
}
IlmAction::DeleteAction => {
info!("apply_lifecycle: Object {} marked for deletion", oi.name);
if let Some(vcfg) = &self.versioning {
if !vcfg.enabled() {
info!("apply_lifecycle: Versioning disabled, setting new_size=0");
new_size = 0;
}
} else {
info!("apply_lifecycle: No versioning config, setting new_size=0");
new_size = 0;
}
}
IlmAction::NoneAction => {
info!("apply_lifecycle: No action for object {}", oi.name);
}
_ => {
info!("apply_lifecycle: Other action {:?} for object {}", lc_evt.action, oi.name);
}
}
if lc_evt.action != IlmAction::NoneAction {
info!("apply_lifecycle: Applying lifecycle action {:?} for object {}", lc_evt.action, oi.name);
apply_lifecycle_action(&lc_evt, &LcEventSrc::Scanner, oi).await;
} else {
info!("apply_lifecycle: Skipping lifecycle action for object {} as no action is needed", oi.name);
}
(lc_evt.action, new_size)
}
}

View File

@@ -1,684 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{Error, Result};
use rustfs_common::data_usage::DiskUsageStatus;
use rustfs_ecstore::data_usage::{
LocalUsageSnapshot, LocalUsageSnapshotMeta, data_usage_state_dir, ensure_data_usage_layout, snapshot_file_name,
write_local_snapshot,
};
use rustfs_ecstore::disk::DiskAPI;
use rustfs_ecstore::store::ECStore;
use rustfs_ecstore::store_api::ObjectInfo;
use rustfs_filemeta::{FileInfo, FileMeta, FileMetaVersion, VersionType};
use serde::{Deserialize, Serialize};
use serde_json::{from_slice, to_vec};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::{fs, task};
use tracing::warn;
use walkdir::WalkDir;
const STATE_FILE_EXTENSION: &str = "";
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct LocalObjectUsage {
pub bucket: String,
pub object: String,
pub last_modified_ns: Option<i128>,
pub versions_count: u64,
pub delete_markers_count: u64,
pub total_size: u64,
pub has_live_object: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
struct IncrementalScanState {
last_scan_ns: Option<i128>,
objects: HashMap<String, LocalObjectUsage>,
}
struct DiskScanResult {
snapshot: LocalUsageSnapshot,
state: IncrementalScanState,
objects_by_bucket: HashMap<String, Vec<LocalObjectRecord>>,
status: DiskUsageStatus,
}
#[derive(Debug, Clone)]
pub struct LocalObjectRecord {
pub usage: LocalObjectUsage,
pub object_info: Option<rustfs_ecstore::store_api::ObjectInfo>,
}
#[derive(Debug, Default)]
pub struct LocalScanOutcome {
pub snapshots: Vec<LocalUsageSnapshot>,
pub bucket_objects: HashMap<String, Vec<LocalObjectRecord>>,
pub disk_status: Vec<DiskUsageStatus>,
}
/// Scan all local primary disks and persist refreshed usage snapshots.
pub async fn scan_and_persist_local_usage(store: Arc<ECStore>) -> Result<LocalScanOutcome> {
let mut snapshots = Vec::new();
let mut bucket_objects: HashMap<String, Vec<LocalObjectRecord>> = HashMap::new();
let mut disk_status = Vec::new();
for (pool_idx, pool) in store.pools.iter().enumerate() {
for set_disks in pool.disk_set.iter() {
let disks = {
let guard = set_disks.disks.read().await;
guard.clone()
};
// Use the first local online disk in the set to avoid missing stats when disk 0 is down
let mut picked = false;
for (disk_index, disk_opt) in disks.into_iter().enumerate() {
let Some(disk) = disk_opt else {
continue;
};
if !disk.is_local() {
continue;
}
if picked {
continue;
}
// Skip offline disks; keep looking for an online candidate
if !disk.is_online().await {
continue;
}
picked = true;
let disk_id = match disk.get_disk_id().await.map_err(Error::from)? {
Some(id) => id.to_string(),
None => {
warn!("Skipping disk without ID: {}", disk.to_string());
continue;
}
};
let root = disk.path();
ensure_data_usage_layout(root.as_path()).await.map_err(Error::from)?;
let meta = LocalUsageSnapshotMeta {
disk_id: disk_id.clone(),
pool_index: Some(pool_idx),
set_index: Some(set_disks.set_index),
disk_index: Some(disk_index),
};
let state_path = state_file_path(root.as_path(), &disk_id);
let state = read_scan_state(&state_path).await?;
let root_clone = root.clone();
let meta_clone = meta.clone();
let handle = task::spawn_blocking(move || scan_disk_blocking(root_clone, meta_clone, state));
match handle.await {
Ok(Ok(result)) => {
write_local_snapshot(root.as_path(), &disk_id, &result.snapshot)
.await
.map_err(Error::from)?;
write_scan_state(&state_path, &result.state).await?;
snapshots.push(result.snapshot);
for (bucket, records) in result.objects_by_bucket {
bucket_objects.entry(bucket).or_default().extend(records.into_iter());
}
disk_status.push(result.status);
}
Ok(Err(err)) => {
warn!("Failed to scan disk {}: {}", disk.to_string(), err);
}
Err(join_err) => {
warn!("Disk scan task panicked for disk {}: {}", disk.to_string(), join_err);
}
}
}
}
}
Ok(LocalScanOutcome {
snapshots,
bucket_objects,
disk_status,
})
}
fn scan_disk_blocking(root: PathBuf, meta: LocalUsageSnapshotMeta, mut state: IncrementalScanState) -> Result<DiskScanResult> {
let now = SystemTime::now();
let now_ns = system_time_to_ns(now);
let mut visited: HashSet<String> = HashSet::new();
let mut emitted: HashSet<String> = HashSet::new();
let mut objects_by_bucket: HashMap<String, Vec<LocalObjectRecord>> = HashMap::new();
let mut status = DiskUsageStatus {
disk_id: meta.disk_id.clone(),
pool_index: meta.pool_index,
set_index: meta.set_index,
disk_index: meta.disk_index,
last_update: None,
snapshot_exists: false,
};
for entry in WalkDir::new(&root).follow_links(false).into_iter().filter_map(|res| res.ok()) {
if !entry.file_type().is_file() {
continue;
}
if entry.file_name() != "xl.meta" {
continue;
}
let xl_path = entry.path().to_path_buf();
let Some(object_dir) = xl_path.parent() else {
continue;
};
let Some(rel_path) = object_dir.strip_prefix(&root).ok().map(normalize_path) else {
continue;
};
let mut components = rel_path.split('/');
let Some(bucket_name) = components.next() else {
continue;
};
if bucket_name.starts_with('.') {
continue;
}
let object_key = components.collect::<Vec<_>>().join("/");
visited.insert(rel_path.clone());
let metadata = match std::fs::metadata(&xl_path) {
Ok(meta) => meta,
Err(err) => {
warn!("Failed to read metadata for {xl_path:?}: {err}");
continue;
}
};
let mtime_ns = metadata.modified().ok().map(system_time_to_ns);
let should_parse = match state.objects.get(&rel_path) {
Some(existing) => existing.last_modified_ns != mtime_ns,
None => true,
};
if should_parse {
match std::fs::read(&xl_path) {
Ok(buf) => match FileMeta::load(&buf) {
Ok(file_meta) => match compute_object_usage(bucket_name, object_key.as_str(), &file_meta) {
Ok(Some(mut record)) => {
record.usage.last_modified_ns = mtime_ns;
state.objects.insert(rel_path.clone(), record.usage.clone());
emitted.insert(rel_path.clone());
objects_by_bucket.entry(record.usage.bucket.clone()).or_default().push(record);
}
Ok(None) => {
state.objects.remove(&rel_path);
}
Err(err) => {
warn!("Failed to parse usage from {:?}: {}", xl_path, err);
}
},
Err(err) => {
warn!("Failed to decode xl.meta {:?}: {}", xl_path, err);
}
},
Err(err) => {
warn!("Failed to read xl.meta {:?}: {}", xl_path, err);
}
}
}
}
state.objects.retain(|key, _| visited.contains(key));
state.last_scan_ns = Some(now_ns);
for (key, usage) in &state.objects {
if emitted.contains(key) {
continue;
}
objects_by_bucket
.entry(usage.bucket.clone())
.or_default()
.push(LocalObjectRecord {
usage: usage.clone(),
object_info: None,
});
}
let snapshot = build_snapshot(meta, &state.objects, now);
status.snapshot_exists = true;
status.last_update = Some(now);
Ok(DiskScanResult {
snapshot,
state,
objects_by_bucket,
status,
})
}
fn compute_object_usage(bucket: &str, object: &str, file_meta: &FileMeta) -> Result<Option<LocalObjectRecord>> {
let mut versions_count = 0u64;
let mut delete_markers_count = 0u64;
let mut total_size = 0u64;
let mut has_live_object = false;
let mut latest_file_info: Option<FileInfo> = None;
for shallow in &file_meta.versions {
match shallow.header.version_type {
VersionType::Object => {
let version = match FileMetaVersion::try_from(shallow.meta.as_slice()) {
Ok(version) => version,
Err(err) => {
warn!("Failed to parse file meta version: {}", err);
continue;
}
};
if let Some(obj) = version.object {
if !has_live_object {
total_size = obj.size.max(0) as u64;
}
has_live_object = true;
versions_count = versions_count.saturating_add(1);
if latest_file_info.is_none()
&& let Ok(info) = file_meta.into_fileinfo(bucket, object, "", false, false, false)
{
latest_file_info = Some(info);
}
}
}
VersionType::Delete => {
delete_markers_count = delete_markers_count.saturating_add(1);
versions_count = versions_count.saturating_add(1);
}
_ => {}
}
}
if !has_live_object && delete_markers_count == 0 {
return Ok(None);
}
let object_info = latest_file_info.as_ref().map(|fi| {
let versioned = fi.version_id.is_some();
ObjectInfo::from_file_info(fi, bucket, object, versioned)
});
Ok(Some(LocalObjectRecord {
usage: LocalObjectUsage {
bucket: bucket.to_string(),
object: object.to_string(),
last_modified_ns: None,
versions_count,
delete_markers_count,
total_size,
has_live_object,
},
object_info,
}))
}
fn build_snapshot(
meta: LocalUsageSnapshotMeta,
objects: &HashMap<String, LocalObjectUsage>,
now: SystemTime,
) -> LocalUsageSnapshot {
let mut snapshot = LocalUsageSnapshot::new(meta);
for usage in objects.values() {
let bucket_entry = snapshot.buckets_usage.entry(usage.bucket.clone()).or_default();
if usage.has_live_object {
bucket_entry.objects_count = bucket_entry.objects_count.saturating_add(1);
}
bucket_entry.versions_count = bucket_entry.versions_count.saturating_add(usage.versions_count);
bucket_entry.delete_markers_count = bucket_entry.delete_markers_count.saturating_add(usage.delete_markers_count);
bucket_entry.size = bucket_entry.size.saturating_add(usage.total_size);
}
snapshot.last_update = Some(now);
snapshot.recompute_totals();
snapshot
}
fn normalize_path(path: &Path) -> String {
path.iter()
.map(|component| component.to_string_lossy())
.collect::<Vec<_>>()
.join("/")
}
fn system_time_to_ns(time: SystemTime) -> i128 {
match time.duration_since(UNIX_EPOCH) {
Ok(duration) => {
let secs = duration.as_secs() as i128;
let nanos = duration.subsec_nanos() as i128;
secs * 1_000_000_000 + nanos
}
Err(err) => {
let duration = err.duration();
let secs = duration.as_secs() as i128;
let nanos = duration.subsec_nanos() as i128;
-(secs * 1_000_000_000 + nanos)
}
}
}
fn state_file_path(root: &Path, disk_id: &str) -> PathBuf {
let mut path = data_usage_state_dir(root);
path.push(format!("{}{}", snapshot_file_name(disk_id), STATE_FILE_EXTENSION));
path
}
async fn read_scan_state(path: &Path) -> Result<IncrementalScanState> {
match fs::read(path).await {
Ok(bytes) => from_slice(&bytes).map_err(|err| Error::Serialization(err.to_string())),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(IncrementalScanState::default()),
Err(err) => Err(err.into()),
}
}
async fn write_scan_state(path: &Path, state: &IncrementalScanState) -> Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).await?;
}
let data = to_vec(state).map_err(|err| Error::Serialization(err.to_string()))?;
fs::write(path, data).await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use rustfs_filemeta::{ChecksumAlgo, ErasureAlgo, FileMetaShallowVersion, MetaDeleteMarker, MetaObject};
use std::collections::HashMap;
use std::fs;
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
fn build_file_meta_with_object(erasure_index: usize, size: i64) -> FileMeta {
let mut file_meta = FileMeta::default();
let meta_object = MetaObject {
version_id: Some(Uuid::new_v4()),
data_dir: Some(Uuid::new_v4()),
erasure_algorithm: ErasureAlgo::ReedSolomon,
erasure_m: 2,
erasure_n: 2,
erasure_block_size: 4096,
erasure_index,
erasure_dist: vec![0_u8, 1, 2, 3],
bitrot_checksum_algo: ChecksumAlgo::HighwayHash,
part_numbers: vec![1],
part_etags: vec!["etag".to_string()],
part_sizes: vec![size as usize],
part_actual_sizes: vec![size],
part_indices: Vec::new(),
size,
mod_time: Some(OffsetDateTime::now_utc()),
meta_sys: HashMap::new(),
meta_user: HashMap::new(),
};
let version = FileMetaVersion {
version_type: VersionType::Object,
object: Some(meta_object),
delete_marker: None,
write_version: 1,
};
let shallow = FileMetaShallowVersion::try_from(version).expect("convert version");
file_meta.versions.push(shallow);
file_meta
}
fn build_file_meta_with_delete_marker() -> FileMeta {
let mut file_meta = FileMeta::default();
let delete_marker = MetaDeleteMarker {
version_id: Some(Uuid::new_v4()),
mod_time: Some(OffsetDateTime::now_utc()),
meta_sys: HashMap::new(),
};
let version = FileMetaVersion {
version_type: VersionType::Delete,
object: None,
delete_marker: Some(delete_marker),
write_version: 2,
};
let shallow = FileMetaShallowVersion::try_from(version).expect("convert delete marker");
file_meta.versions.push(shallow);
file_meta
}
#[test]
fn compute_object_usage_primary_disk() {
let file_meta = build_file_meta_with_object(0, 1024);
let record = compute_object_usage("bucket", "foo/bar", &file_meta)
.expect("compute usage")
.expect("record should exist");
assert!(record.usage.has_live_object);
assert_eq!(record.usage.bucket, "bucket");
assert_eq!(record.usage.object, "foo/bar");
assert_eq!(record.usage.total_size, 1024);
assert!(record.object_info.is_some(), "object info should be synthesized");
}
#[test]
fn compute_object_usage_handles_non_primary_disk() {
let file_meta = build_file_meta_with_object(1, 2048);
let record = compute_object_usage("bucket", "obj", &file_meta)
.expect("compute usage")
.expect("record should exist for non-primary shard");
assert!(record.usage.has_live_object);
}
#[test]
fn compute_object_usage_reports_delete_marker() {
let file_meta = build_file_meta_with_delete_marker();
let record = compute_object_usage("bucket", "obj", &file_meta)
.expect("compute usage")
.expect("delete marker record");
assert!(!record.usage.has_live_object);
assert_eq!(record.usage.delete_markers_count, 1);
assert_eq!(record.usage.versions_count, 1);
}
#[test]
fn build_snapshot_accumulates_usage() {
let mut objects = HashMap::new();
objects.insert(
"bucket/a".to_string(),
LocalObjectUsage {
bucket: "bucket".to_string(),
object: "a".to_string(),
last_modified_ns: None,
versions_count: 2,
delete_markers_count: 1,
total_size: 512,
has_live_object: true,
},
);
let snapshot = build_snapshot(LocalUsageSnapshotMeta::default(), &objects, SystemTime::now());
let usage = snapshot.buckets_usage.get("bucket").expect("bucket entry should exist");
assert_eq!(usage.objects_count, 1);
assert_eq!(usage.versions_count, 2);
assert_eq!(usage.delete_markers_count, 1);
assert_eq!(usage.size, 512);
}
#[test]
fn scan_disk_blocking_handles_incremental_updates() {
let temp_dir = TempDir::new().expect("create temp dir");
let root = temp_dir.path();
let bucket_dir = root.join("bench");
let object1_dir = bucket_dir.join("obj1");
fs::create_dir_all(&object1_dir).expect("create first object directory");
let file_meta = build_file_meta_with_object(0, 1024);
let bytes = file_meta.marshal_msg().expect("serialize first object");
fs::write(object1_dir.join("xl.meta"), bytes).expect("write first xl.meta");
let meta = LocalUsageSnapshotMeta {
disk_id: "disk-test".to_string(),
..Default::default()
};
let DiskScanResult {
snapshot: snapshot1,
state,
..
} = scan_disk_blocking(root.to_path_buf(), meta.clone(), IncrementalScanState::default()).expect("initial scan succeeds");
let usage1 = snapshot1.buckets_usage.get("bench").expect("bucket stats recorded");
assert_eq!(usage1.objects_count, 1);
assert_eq!(usage1.size, 1024);
assert_eq!(state.objects.len(), 1);
let object2_dir = bucket_dir.join("nested").join("obj2");
fs::create_dir_all(&object2_dir).expect("create second object directory");
let second_meta = build_file_meta_with_object(0, 2048);
let bytes = second_meta.marshal_msg().expect("serialize second object");
fs::write(object2_dir.join("xl.meta"), bytes).expect("write second xl.meta");
let DiskScanResult {
snapshot: snapshot2,
state: state_next,
..
} = scan_disk_blocking(root.to_path_buf(), meta.clone(), state).expect("incremental scan succeeds");
let usage2 = snapshot2
.buckets_usage
.get("bench")
.expect("bucket stats recorded after addition");
assert_eq!(usage2.objects_count, 2);
assert_eq!(usage2.size, 1024 + 2048);
assert_eq!(state_next.objects.len(), 2);
fs::remove_dir_all(&object1_dir).expect("remove first object");
let DiskScanResult {
snapshot: snapshot3,
state: state_final,
..
} = scan_disk_blocking(root.to_path_buf(), meta, state_next).expect("scan after deletion succeeds");
let usage3 = snapshot3
.buckets_usage
.get("bench")
.expect("bucket stats recorded after deletion");
assert_eq!(usage3.objects_count, 1);
assert_eq!(usage3.size, 2048);
assert_eq!(state_final.objects.len(), 1);
assert!(
state_final.objects.keys().all(|path| path.contains("nested")),
"state should only keep surviving object"
);
}
#[test]
fn scan_disk_blocking_recovers_from_stale_state_entries() {
let temp_dir = TempDir::new().expect("create temp dir");
let root = temp_dir.path();
let mut stale_state = IncrementalScanState::default();
stale_state.objects.insert(
"bench/stale".to_string(),
LocalObjectUsage {
bucket: "bench".to_string(),
object: "stale".to_string(),
last_modified_ns: Some(42),
versions_count: 1,
delete_markers_count: 0,
total_size: 512,
has_live_object: true,
},
);
stale_state.last_scan_ns = Some(99);
let meta = LocalUsageSnapshotMeta {
disk_id: "disk-test".to_string(),
..Default::default()
};
let DiskScanResult {
snapshot, state, status, ..
} = scan_disk_blocking(root.to_path_buf(), meta, stale_state).expect("scan succeeds");
assert!(state.objects.is_empty(), "stale entries should be cleared when files disappear");
assert!(
snapshot.buckets_usage.is_empty(),
"no real xl.meta files means bucket usage should stay empty"
);
assert!(status.snapshot_exists, "snapshot status should indicate a refresh");
}
#[test]
fn scan_disk_blocking_handles_large_volume() {
const OBJECTS: usize = 256;
let temp_dir = TempDir::new().expect("create temp dir");
let root = temp_dir.path();
let bucket_dir = root.join("bulk");
for idx in 0..OBJECTS {
let object_dir = bucket_dir.join(format!("obj-{idx:03}"));
fs::create_dir_all(&object_dir).expect("create object directory");
let size = 1024 + idx as i64;
let file_meta = build_file_meta_with_object(0, size);
let bytes = file_meta.marshal_msg().expect("serialize file meta");
fs::write(object_dir.join("xl.meta"), bytes).expect("write xl.meta");
}
let meta = LocalUsageSnapshotMeta {
disk_id: "disk-test".to_string(),
..Default::default()
};
let DiskScanResult { snapshot, state, .. } =
scan_disk_blocking(root.to_path_buf(), meta, IncrementalScanState::default()).expect("bulk scan succeeds");
let bucket_usage = snapshot
.buckets_usage
.get("bulk")
.expect("bucket usage present for bulk scan");
assert_eq!(bucket_usage.objects_count as usize, OBJECTS, "should count all objects once");
assert!(
bucket_usage.size >= (1024 * OBJECTS) as u64,
"aggregated size should grow with object count"
);
assert_eq!(state.objects.len(), OBJECTS, "incremental state tracks every object");
}
}

View File

@@ -1,430 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::scanner::node_scanner::{BucketStats, DiskStats, LocalScanStats};
use crate::{Error, Result};
use rustfs_common::data_usage::DataUsageInfo;
use serde::{Deserialize, Serialize};
use std::{
path::{Path, PathBuf},
sync::Arc,
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
/// local stats manager
pub struct LocalStatsManager {
/// node id
node_id: String,
/// stats file path
stats_file: PathBuf,
/// backup file path
backup_file: PathBuf,
/// temp file path
temp_file: PathBuf,
/// local stats data
stats: Arc<RwLock<LocalScanStats>>,
/// save interval
save_interval: Duration,
/// last save time
last_save: Arc<RwLock<SystemTime>>,
/// stats counters
counters: Arc<StatsCounters>,
}
/// stats counters
pub struct StatsCounters {
/// total scanned objects
pub total_objects_scanned: AtomicU64,
/// total healthy objects
pub total_healthy_objects: AtomicU64,
/// total corrupted objects
pub total_corrupted_objects: AtomicU64,
/// total scanned bytes
pub total_bytes_scanned: AtomicU64,
/// total scan errors
pub total_scan_errors: AtomicU64,
/// total heal triggered
pub total_heal_triggered: AtomicU64,
}
impl Default for StatsCounters {
fn default() -> Self {
Self {
total_objects_scanned: AtomicU64::new(0),
total_healthy_objects: AtomicU64::new(0),
total_corrupted_objects: AtomicU64::new(0),
total_bytes_scanned: AtomicU64::new(0),
total_scan_errors: AtomicU64::new(0),
total_heal_triggered: AtomicU64::new(0),
}
}
}
/// scan result entry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanResultEntry {
/// object path
pub object_path: String,
/// bucket name
pub bucket_name: String,
/// object size
pub object_size: u64,
/// is healthy
pub is_healthy: bool,
/// error message (if any)
pub error_message: Option<String>,
/// scan time
pub scan_time: SystemTime,
/// disk id
pub disk_id: String,
}
/// batch scan result
#[derive(Debug, Clone)]
pub struct BatchScanResult {
/// disk id
pub disk_id: String,
/// scan result entries
pub entries: Vec<ScanResultEntry>,
/// scan start time
pub scan_start: SystemTime,
/// scan end time
pub scan_end: SystemTime,
/// scan duration
pub scan_duration: Duration,
}
impl LocalStatsManager {
/// create new local stats manager
pub fn new(node_id: &str, data_dir: &Path) -> Self {
// ensure data directory exists
if !data_dir.exists()
&& let Err(e) = std::fs::create_dir_all(data_dir)
{
error!("create stats data directory failed {:?}: {}", data_dir, e);
}
let stats_file = data_dir.join(format!("scanner_stats_{node_id}.json"));
let backup_file = data_dir.join(format!("scanner_stats_{node_id}.backup"));
let temp_file = data_dir.join(format!("scanner_stats_{node_id}.tmp"));
Self {
node_id: node_id.to_string(),
stats_file,
backup_file,
temp_file,
stats: Arc::new(RwLock::new(LocalScanStats::default())),
save_interval: Duration::from_secs(60), // 60 seconds save once
last_save: Arc::new(RwLock::new(SystemTime::UNIX_EPOCH)),
counters: Arc::new(StatsCounters::default()),
}
}
/// load local stats data
pub async fn load_stats(&self) -> Result<()> {
if !self.stats_file.exists() {
info!("stats data file not exists, will create new stats data");
return Ok(());
}
match self.load_stats_from_file(&self.stats_file).await {
Ok(stats) => {
*self.stats.write().await = stats;
info!("success load local stats data");
Ok(())
}
Err(e) => {
warn!("load main stats file failed: {}, try backup file", e);
match self.load_stats_from_file(&self.backup_file).await {
Ok(stats) => {
*self.stats.write().await = stats;
warn!("restore stats data from backup file");
Ok(())
}
Err(backup_e) => {
warn!("backup file also cannot load: {}, will use default stats data", backup_e);
Ok(())
}
}
}
}
}
/// load stats data from file
async fn load_stats_from_file(&self, file_path: &Path) -> Result<LocalScanStats> {
let content = tokio::fs::read_to_string(file_path)
.await
.map_err(|e| Error::IO(format!("read stats file failed: {e}")))?;
let stats: LocalScanStats =
serde_json::from_str(&content).map_err(|e| Error::Serialization(format!("deserialize stats data failed: {e}")))?;
Ok(stats)
}
/// save stats data to disk
pub async fn save_stats(&self) -> Result<()> {
let now = SystemTime::now();
let last_save = *self.last_save.read().await;
// frequency control
if now.duration_since(last_save).unwrap_or(Duration::ZERO) < self.save_interval {
return Ok(());
}
let stats = self.stats.read().await.clone();
// serialize
let json_data = serde_json::to_string_pretty(&stats)
.map_err(|e| Error::Serialization(format!("serialize stats data failed: {e}")))?;
// atomic write
tokio::fs::write(&self.temp_file, json_data)
.await
.map_err(|e| Error::IO(format!("write temp stats file failed: {e}")))?;
// backup existing file
if self.stats_file.exists() {
tokio::fs::copy(&self.stats_file, &self.backup_file)
.await
.map_err(|e| Error::IO(format!("backup stats file failed: {e}")))?;
}
// atomic replace
tokio::fs::rename(&self.temp_file, &self.stats_file)
.await
.map_err(|e| Error::IO(format!("replace stats file failed: {e}")))?;
*self.last_save.write().await = now;
debug!("save local stats data to {:?}", self.stats_file);
Ok(())
}
/// force save stats data
pub async fn force_save_stats(&self) -> Result<()> {
*self.last_save.write().await = SystemTime::UNIX_EPOCH;
self.save_stats().await
}
/// update disk scan result
pub async fn update_disk_scan_result(&self, result: &BatchScanResult) -> Result<()> {
let mut stats = self.stats.write().await;
// update disk stats
let disk_stat = stats.disks_stats.entry(result.disk_id.clone()).or_insert_with(|| DiskStats {
disk_id: result.disk_id.clone(),
..Default::default()
});
let healthy_count = result.entries.iter().filter(|e| e.is_healthy).count() as u64;
let error_count = result.entries.iter().filter(|e| !e.is_healthy).count() as u64;
disk_stat.objects_scanned += result.entries.len() as u64;
disk_stat.errors_count += error_count;
disk_stat.last_scan_time = result.scan_end;
disk_stat.scan_duration = result.scan_duration;
disk_stat.scan_completed = true;
// update overall stats
stats.objects_scanned += result.entries.len() as u64;
stats.healthy_objects += healthy_count;
stats.corrupted_objects += error_count;
stats.last_update = SystemTime::now();
// update bucket stats
for entry in &result.entries {
let _bucket_stat = stats
.buckets_stats
.entry(entry.bucket_name.clone())
.or_insert_with(BucketStats::default);
// TODO: update BucketStats
}
// update atomic counters
self.counters
.total_objects_scanned
.fetch_add(result.entries.len() as u64, Ordering::Relaxed);
self.counters
.total_healthy_objects
.fetch_add(healthy_count, Ordering::Relaxed);
self.counters
.total_corrupted_objects
.fetch_add(error_count, Ordering::Relaxed);
let total_bytes: u64 = result.entries.iter().map(|e| e.object_size).sum();
self.counters.total_bytes_scanned.fetch_add(total_bytes, Ordering::Relaxed);
if error_count > 0 {
self.counters.total_scan_errors.fetch_add(error_count, Ordering::Relaxed);
}
drop(stats);
debug!(
"update disk {} scan result: objects {}, healthy {}, error {}",
result.disk_id,
result.entries.len(),
healthy_count,
error_count
);
Ok(())
}
/// record single object scan result
pub async fn record_object_scan(&self, entry: ScanResultEntry) -> Result<()> {
let result = BatchScanResult {
disk_id: entry.disk_id.clone(),
entries: vec![entry],
scan_start: SystemTime::now(),
scan_end: SystemTime::now(),
scan_duration: Duration::from_millis(0),
};
self.update_disk_scan_result(&result).await
}
/// get local stats data copy
pub async fn get_stats(&self) -> LocalScanStats {
self.stats.read().await.clone()
}
/// get real-time counters
pub fn get_counters(&self) -> Arc<StatsCounters> {
self.counters.clone()
}
/// reset stats data
pub async fn reset_stats(&self) -> Result<()> {
{
let mut stats = self.stats.write().await;
*stats = LocalScanStats::default();
}
// reset counters
self.counters.total_objects_scanned.store(0, Ordering::Relaxed);
self.counters.total_healthy_objects.store(0, Ordering::Relaxed);
self.counters.total_corrupted_objects.store(0, Ordering::Relaxed);
self.counters.total_bytes_scanned.store(0, Ordering::Relaxed);
self.counters.total_scan_errors.store(0, Ordering::Relaxed);
self.counters.total_heal_triggered.store(0, Ordering::Relaxed);
info!("reset local stats data");
Ok(())
}
/// get stats summary
pub async fn get_stats_summary(&self) -> StatsSummary {
let stats = self.stats.read().await;
StatsSummary {
node_id: self.node_id.clone(),
total_objects_scanned: self.counters.total_objects_scanned.load(Ordering::Relaxed),
total_healthy_objects: self.counters.total_healthy_objects.load(Ordering::Relaxed),
total_corrupted_objects: self.counters.total_corrupted_objects.load(Ordering::Relaxed),
total_bytes_scanned: self.counters.total_bytes_scanned.load(Ordering::Relaxed),
total_scan_errors: self.counters.total_scan_errors.load(Ordering::Relaxed),
total_heal_triggered: self.counters.total_heal_triggered.load(Ordering::Relaxed),
total_disks: stats.disks_stats.len(),
total_buckets: stats.buckets_stats.len(),
last_update: stats.last_update,
scan_progress: stats.scan_progress.clone(),
data_usage: stats.data_usage.clone(),
}
}
/// record heal triggered
pub async fn record_heal_triggered(&self, object_path: &str, error_message: &str) {
self.counters.total_heal_triggered.fetch_add(1, Ordering::Relaxed);
info!("record heal triggered: object={}, error={}", object_path, error_message);
}
/// update data usage stats
pub async fn update_data_usage(&self, data_usage: DataUsageInfo) {
let mut stats = self.stats.write().await;
stats.data_usage = data_usage;
stats.last_update = SystemTime::now();
debug!("update data usage stats");
}
/// cleanup stats files
pub async fn cleanup_stats_files(&self) -> Result<()> {
// delete main file
if self.stats_file.exists() {
tokio::fs::remove_file(&self.stats_file)
.await
.map_err(|e| Error::IO(format!("delete stats file failed: {e}")))?;
}
// delete backup file
if self.backup_file.exists() {
tokio::fs::remove_file(&self.backup_file)
.await
.map_err(|e| Error::IO(format!("delete backup stats file failed: {e}")))?;
}
// delete temp file
if self.temp_file.exists() {
tokio::fs::remove_file(&self.temp_file)
.await
.map_err(|e| Error::IO(format!("delete temp stats file failed: {e}")))?;
}
info!("cleanup all stats files");
Ok(())
}
/// set save interval
pub fn set_save_interval(&mut self, interval: Duration) {
self.save_interval = interval;
info!("set stats data save interval to {:?}", interval);
}
}
/// stats summary
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatsSummary {
/// node id
pub node_id: String,
/// total scanned objects
pub total_objects_scanned: u64,
/// total healthy objects
pub total_healthy_objects: u64,
/// total corrupted objects
pub total_corrupted_objects: u64,
/// total scanned bytes
pub total_bytes_scanned: u64,
/// total scan errors
pub total_scan_errors: u64,
/// total heal triggered
pub total_heal_triggered: u64,
/// total disks
pub total_disks: usize,
/// total buckets
pub total_buckets: usize,
/// last update time
pub last_update: SystemTime,
/// scan progress
pub scan_progress: super::node_scanner::ScanProgress,
/// data usage snapshot for the node
pub data_usage: DataUsageInfo,
}

View File

@@ -1,305 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
use tracing::info;
/// Scanner metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScannerMetrics {
/// Total objects scanned since server start
pub objects_scanned: u64,
/// Total object versions scanned since server start
pub versions_scanned: u64,
/// Total directories scanned since server start
pub directories_scanned: u64,
/// Total bucket scans started since server start
pub bucket_scans_started: u64,
/// Total bucket scans finished since server start
pub bucket_scans_finished: u64,
/// Total objects with health issues found
pub objects_with_issues: u64,
/// Total heal tasks queued
pub heal_tasks_queued: u64,
/// Total heal tasks completed
pub heal_tasks_completed: u64,
/// Total heal tasks failed
pub heal_tasks_failed: u64,
/// Total healthy objects found
pub healthy_objects: u64,
/// Total corrupted objects found
pub corrupted_objects: u64,
/// Last scan activity time
pub last_activity: Option<SystemTime>,
/// Current scan cycle
pub current_cycle: u64,
/// Total scan cycles completed
pub total_cycles: u64,
/// Current scan duration
pub current_scan_duration: Option<Duration>,
/// Average scan duration
pub avg_scan_duration: Duration,
/// Objects scanned per second
pub objects_per_second: f64,
/// Buckets scanned per second
pub buckets_per_second: f64,
/// Storage metrics by bucket
pub bucket_metrics: HashMap<String, BucketMetrics>,
/// Disk metrics
pub disk_metrics: HashMap<String, DiskMetrics>,
}
/// Bucket-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BucketMetrics {
/// Bucket name
pub bucket: String,
/// Total objects in bucket
pub total_objects: u64,
/// Total size of objects in bucket (bytes)
pub total_size: u64,
/// Objects with health issues
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Scan duration
pub scan_duration: Option<Duration>,
/// Heal tasks queued for this bucket
pub heal_tasks_queued: u64,
/// Heal tasks completed for this bucket
pub heal_tasks_completed: u64,
/// Heal tasks failed for this bucket
pub heal_tasks_failed: u64,
}
/// Disk-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DiskMetrics {
/// Disk path
pub disk_path: String,
/// Total disk space (bytes)
pub total_space: u64,
/// Used disk space (bytes)
pub used_space: u64,
/// Free disk space (bytes)
pub free_space: u64,
/// Objects scanned on this disk
pub objects_scanned: u64,
/// Objects with issues on this disk
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Whether disk is online
pub is_online: bool,
/// Whether disk is being scanned
pub is_scanning: bool,
}
/// Thread-safe metrics collector
pub struct MetricsCollector {
/// Atomic counters for real-time metrics
objects_scanned: AtomicU64,
versions_scanned: AtomicU64,
directories_scanned: AtomicU64,
bucket_scans_started: AtomicU64,
bucket_scans_finished: AtomicU64,
objects_with_issues: AtomicU64,
heal_tasks_queued: AtomicU64,
heal_tasks_completed: AtomicU64,
heal_tasks_failed: AtomicU64,
current_cycle: AtomicU64,
total_cycles: AtomicU64,
healthy_objects: AtomicU64,
corrupted_objects: AtomicU64,
}
impl MetricsCollector {
/// Create a new metrics collector
pub fn new() -> Self {
Self {
objects_scanned: AtomicU64::new(0),
versions_scanned: AtomicU64::new(0),
directories_scanned: AtomicU64::new(0),
bucket_scans_started: AtomicU64::new(0),
bucket_scans_finished: AtomicU64::new(0),
objects_with_issues: AtomicU64::new(0),
heal_tasks_queued: AtomicU64::new(0),
heal_tasks_completed: AtomicU64::new(0),
heal_tasks_failed: AtomicU64::new(0),
current_cycle: AtomicU64::new(0),
total_cycles: AtomicU64::new(0),
healthy_objects: AtomicU64::new(0),
corrupted_objects: AtomicU64::new(0),
}
}
/// Increment objects scanned count
pub fn increment_objects_scanned(&self, count: u64) {
self.objects_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment versions scanned count
pub fn increment_versions_scanned(&self, count: u64) {
self.versions_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment directories scanned count
pub fn increment_directories_scanned(&self, count: u64) {
self.directories_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans started count
pub fn increment_bucket_scans_started(&self, count: u64) {
self.bucket_scans_started.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans finished count
pub fn increment_bucket_scans_finished(&self, count: u64) {
self.bucket_scans_finished.fetch_add(count, Ordering::Relaxed);
}
/// Increment objects with issues count
pub fn increment_objects_with_issues(&self, count: u64) {
self.objects_with_issues.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks queued count
pub fn increment_heal_tasks_queued(&self, count: u64) {
self.heal_tasks_queued.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks completed count
pub fn increment_heal_tasks_completed(&self, count: u64) {
self.heal_tasks_completed.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks failed count
pub fn increment_heal_tasks_failed(&self, count: u64) {
self.heal_tasks_failed.fetch_add(count, Ordering::Relaxed);
}
/// Set current cycle
pub fn set_current_cycle(&self, cycle: u64) {
self.current_cycle.store(cycle, Ordering::Relaxed);
}
/// Increment total cycles
pub fn increment_total_cycles(&self) {
self.total_cycles.fetch_add(1, Ordering::Relaxed);
}
/// Increment healthy objects count
pub fn increment_healthy_objects(&self) {
self.healthy_objects.fetch_add(1, Ordering::Relaxed);
}
/// Increment corrupted objects count
pub fn increment_corrupted_objects(&self) {
self.corrupted_objects.fetch_add(1, Ordering::Relaxed);
}
/// Get current metrics snapshot
pub fn get_metrics(&self) -> ScannerMetrics {
ScannerMetrics {
objects_scanned: self.objects_scanned.load(Ordering::Relaxed),
versions_scanned: self.versions_scanned.load(Ordering::Relaxed),
directories_scanned: self.directories_scanned.load(Ordering::Relaxed),
bucket_scans_started: self.bucket_scans_started.load(Ordering::Relaxed),
bucket_scans_finished: self.bucket_scans_finished.load(Ordering::Relaxed),
objects_with_issues: self.objects_with_issues.load(Ordering::Relaxed),
heal_tasks_queued: self.heal_tasks_queued.load(Ordering::Relaxed),
heal_tasks_completed: self.heal_tasks_completed.load(Ordering::Relaxed),
heal_tasks_failed: self.heal_tasks_failed.load(Ordering::Relaxed),
healthy_objects: self.healthy_objects.load(Ordering::Relaxed),
corrupted_objects: self.corrupted_objects.load(Ordering::Relaxed),
last_activity: Some(SystemTime::now()),
current_cycle: self.current_cycle.load(Ordering::Relaxed),
total_cycles: self.total_cycles.load(Ordering::Relaxed),
current_scan_duration: None, // Will be set by scanner
avg_scan_duration: Duration::ZERO, // Will be calculated
objects_per_second: 0.0, // Will be calculated
buckets_per_second: 0.0, // Will be calculated
bucket_metrics: HashMap::new(), // Will be populated by scanner
disk_metrics: HashMap::new(), // Will be populated by scanner
}
}
/// Reset all metrics
pub fn reset(&self) {
self.objects_scanned.store(0, Ordering::Relaxed);
self.versions_scanned.store(0, Ordering::Relaxed);
self.directories_scanned.store(0, Ordering::Relaxed);
self.bucket_scans_started.store(0, Ordering::Relaxed);
self.bucket_scans_finished.store(0, Ordering::Relaxed);
self.objects_with_issues.store(0, Ordering::Relaxed);
self.heal_tasks_queued.store(0, Ordering::Relaxed);
self.heal_tasks_completed.store(0, Ordering::Relaxed);
self.heal_tasks_failed.store(0, Ordering::Relaxed);
self.current_cycle.store(0, Ordering::Relaxed);
self.total_cycles.store(0, Ordering::Relaxed);
self.healthy_objects.store(0, Ordering::Relaxed);
self.corrupted_objects.store(0, Ordering::Relaxed);
info!("Scanner metrics reset");
}
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_collector_creation() {
let collector = MetricsCollector::new();
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
assert_eq!(metrics.versions_scanned, 0);
}
#[test]
fn test_metrics_increment() {
let collector = MetricsCollector::new();
collector.increment_objects_scanned(10);
collector.increment_versions_scanned(5);
collector.increment_objects_with_issues(2);
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 10);
assert_eq!(metrics.versions_scanned, 5);
assert_eq!(metrics.objects_with_issues, 2);
}
#[test]
fn test_metrics_reset() {
let collector = MetricsCollector::new();
collector.increment_objects_scanned(10);
collector.reset();
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
}
}

View File

@@ -1,36 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod checkpoint;
pub mod data_scanner;
pub mod histogram;
pub mod io_monitor;
pub mod io_throttler;
pub mod lifecycle;
pub mod local_scan;
pub mod local_stats;
pub mod metrics;
pub mod node_scanner;
pub mod stats_aggregator;
pub use checkpoint::{CheckpointData, CheckpointInfo, CheckpointManager};
pub use data_scanner::{ScanMode, Scanner, ScannerConfig, ScannerState};
pub use io_monitor::{AdvancedIOMonitor, IOMetrics, IOMonitorConfig};
pub use io_throttler::{AdvancedIOThrottler, IOThrottlerConfig, MetricsSnapshot, ResourceAllocation, ThrottleDecision};
pub use local_stats::{BatchScanResult, LocalStatsManager, ScanResultEntry, StatsSummary};
pub use metrics::{BucketMetrics, DiskMetrics, MetricsCollector, ScannerMetrics};
pub use node_scanner::{IOMonitor, IOThrottler, LoadLevel, LocalScanStats, NodeScanner, NodeScannerConfig};
pub use stats_aggregator::{
AggregatedStats, DecentralizedStatsAggregator, DecentralizedStatsAggregatorConfig, NodeClient, NodeInfo,
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,771 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::scanner::{
local_stats::StatsSummary,
node_scanner::{BucketStats, LoadLevel, ScanProgress},
};
use crate::{Error, Result};
use rustfs_common::data_usage::DataUsageInfo;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
sync::Arc,
time::{Duration, SystemTime},
};
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
/// node client config
#[derive(Debug, Clone)]
pub struct NodeClientConfig {
/// connect timeout
pub connect_timeout: Duration,
/// request timeout
pub request_timeout: Duration,
/// retry times
pub max_retries: u32,
/// retry interval
pub retry_interval: Duration,
}
impl Default for NodeClientConfig {
fn default() -> Self {
Self {
connect_timeout: Duration::from_secs(5),
request_timeout: Duration::from_secs(10),
max_retries: 3,
retry_interval: Duration::from_secs(1),
}
}
}
/// node info
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeInfo {
/// node id
pub node_id: String,
/// node address
pub address: String,
/// node port
pub port: u16,
/// is online
pub is_online: bool,
/// last heartbeat time
pub last_heartbeat: SystemTime,
/// node version
pub version: String,
}
/// aggregated stats
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AggregatedStats {
/// aggregation timestamp
pub aggregation_timestamp: SystemTime,
/// number of nodes participating in aggregation
pub node_count: usize,
/// number of online nodes
pub online_node_count: usize,
/// total scanned objects
pub total_objects_scanned: u64,
/// total healthy objects
pub total_healthy_objects: u64,
/// total corrupted objects
pub total_corrupted_objects: u64,
/// total scanned bytes
pub total_bytes_scanned: u64,
/// total scan errors
pub total_scan_errors: u64,
/// total heal triggered
pub total_heal_triggered: u64,
/// total disks
pub total_disks: usize,
/// total buckets
pub total_buckets: usize,
/// aggregated data usage
pub aggregated_data_usage: DataUsageInfo,
/// node summaries
pub node_summaries: HashMap<String, StatsSummary>,
/// aggregated bucket stats
pub aggregated_bucket_stats: HashMap<String, BucketStats>,
/// aggregated scan progress
pub scan_progress_summary: ScanProgressSummary,
/// load level distribution
pub load_level_distribution: HashMap<LoadLevel, usize>,
}
impl Default for AggregatedStats {
fn default() -> Self {
Self {
aggregation_timestamp: SystemTime::now(),
node_count: 0,
online_node_count: 0,
total_objects_scanned: 0,
total_healthy_objects: 0,
total_corrupted_objects: 0,
total_bytes_scanned: 0,
total_scan_errors: 0,
total_heal_triggered: 0,
total_disks: 0,
total_buckets: 0,
aggregated_data_usage: DataUsageInfo::default(),
node_summaries: HashMap::new(),
aggregated_bucket_stats: HashMap::new(),
scan_progress_summary: ScanProgressSummary::default(),
load_level_distribution: HashMap::new(),
}
}
}
/// scan progress summary
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScanProgressSummary {
/// average current cycle
pub average_current_cycle: f64,
/// total completed disks
pub total_completed_disks: usize,
/// total completed buckets
pub total_completed_buckets: usize,
/// latest scan start time
pub earliest_scan_start: Option<SystemTime>,
/// estimated completion time
pub estimated_completion: Option<SystemTime>,
/// node progress
pub node_progress: HashMap<String, ScanProgress>,
}
/// node client
///
/// responsible for communicating with other nodes, getting stats data
pub struct NodeClient {
/// node info
node_info: NodeInfo,
/// config
config: NodeClientConfig,
/// HTTP client
http_client: reqwest::Client,
}
impl NodeClient {
/// create new node client
pub fn new(node_info: NodeInfo, config: NodeClientConfig) -> Self {
let http_client = reqwest::Client::builder()
.timeout(config.request_timeout)
.connect_timeout(config.connect_timeout)
.build()
.expect("Failed to create HTTP client");
Self {
node_info,
config,
http_client,
}
}
/// get node stats summary
pub async fn get_stats_summary(&self) -> Result<StatsSummary> {
let url = format!("http://{}:{}/internal/scanner/stats", self.node_info.address, self.node_info.port);
for attempt in 1..=self.config.max_retries {
match self.try_get_stats_summary(&url).await {
Ok(summary) => return Ok(summary),
Err(e) => {
warn!("try to get node {} stats failed: {}", self.node_info.node_id, e);
if attempt < self.config.max_retries {
tokio::time::sleep(self.config.retry_interval).await;
}
}
}
}
Err(Error::Other(format!("cannot get stats data from node {}", self.node_info.node_id)))
}
/// try to get stats summary
async fn try_get_stats_summary(&self, url: &str) -> Result<StatsSummary> {
let response = self
.http_client
.get(url)
.send()
.await
.map_err(|e| Error::Other(format!("HTTP request failed: {e}")))?;
if !response.status().is_success() {
return Err(Error::Other(format!("HTTP status error: {}", response.status())));
}
let summary = response
.json::<StatsSummary>()
.await
.map_err(|e| Error::Serialization(format!("deserialize stats data failed: {e}")))?;
Ok(summary)
}
/// check node health status
pub async fn check_health(&self) -> bool {
let url = format!("http://{}:{}/internal/health", self.node_info.address, self.node_info.port);
match self.http_client.get(&url).send().await {
Ok(response) => response.status().is_success(),
Err(_) => false,
}
}
/// get node info
pub fn get_node_info(&self) -> &NodeInfo {
&self.node_info
}
/// update node online status
pub fn update_online_status(&mut self, is_online: bool) {
self.node_info.is_online = is_online;
if is_online {
self.node_info.last_heartbeat = SystemTime::now();
}
}
}
/// decentralized stats aggregator config
#[derive(Debug, Clone)]
pub struct DecentralizedStatsAggregatorConfig {
/// aggregation interval
pub aggregation_interval: Duration,
/// cache ttl
pub cache_ttl: Duration,
/// node timeout
pub node_timeout: Duration,
/// max concurrent aggregations
pub max_concurrent_aggregations: usize,
}
impl Default for DecentralizedStatsAggregatorConfig {
fn default() -> Self {
Self {
aggregation_interval: Duration::from_secs(30), // 30 seconds to aggregate
cache_ttl: Duration::from_secs(3), // 3 seconds to cache
node_timeout: Duration::from_secs(5), // 5 seconds to node timeout
max_concurrent_aggregations: 10, // max 10 nodes to aggregate concurrently
}
}
}
/// decentralized stats aggregator
///
/// real-time aggregate stats data from all nodes, provide global view
pub struct DecentralizedStatsAggregator {
/// config
config: Arc<RwLock<DecentralizedStatsAggregatorConfig>>,
/// node clients
node_clients: Arc<RwLock<HashMap<String, Arc<NodeClient>>>>,
/// cached aggregated stats
cached_stats: Arc<RwLock<Option<AggregatedStats>>>,
/// cache timestamp
cache_timestamp: Arc<RwLock<SystemTime>>,
/// local node stats summary
local_stats_summary: Arc<RwLock<Option<StatsSummary>>>,
}
impl DecentralizedStatsAggregator {
/// create new decentralized stats aggregator
pub fn new(config: DecentralizedStatsAggregatorConfig) -> Self {
Self {
config: Arc::new(RwLock::new(config)),
node_clients: Arc::new(RwLock::new(HashMap::new())),
cached_stats: Arc::new(RwLock::new(None)),
cache_timestamp: Arc::new(RwLock::new(SystemTime::UNIX_EPOCH)),
local_stats_summary: Arc::new(RwLock::new(None)),
}
}
/// add node client
pub async fn add_node(&self, node_info: NodeInfo) {
let client_config = NodeClientConfig::default();
let client = Arc::new(NodeClient::new(node_info.clone(), client_config));
self.node_clients.write().await.insert(node_info.node_id.clone(), client);
info!("add node to aggregator: {}", node_info.node_id);
}
/// remove node client
pub async fn remove_node(&self, node_id: &str) {
self.node_clients.write().await.remove(node_id);
info!("remove node from aggregator: {}", node_id);
}
/// set local node stats summary
pub async fn set_local_stats(&self, stats: StatsSummary) {
*self.local_stats_summary.write().await = Some(stats);
}
/// get aggregated stats data (with cache)
pub async fn get_aggregated_stats(&self) -> Result<AggregatedStats> {
let config = self.config.read().await;
let cache_ttl = config.cache_ttl;
drop(config);
// check cache validity
let cache_timestamp = *self.cache_timestamp.read().await;
let now = SystemTime::now();
debug!(
"cache check: cache_timestamp={:?}, now={:?}, cache_ttl={:?}",
cache_timestamp, now, cache_ttl
);
// Check cache validity if timestamp is not initial value (UNIX_EPOCH)
if cache_timestamp != SystemTime::UNIX_EPOCH
&& let Ok(elapsed) = now.duration_since(cache_timestamp)
{
if elapsed < cache_ttl {
if let Some(cached) = self.cached_stats.read().await.as_ref() {
debug!("Returning cached aggregated stats, remaining TTL: {:?}", cache_ttl - elapsed);
return Ok(cached.clone());
}
} else {
debug!("Cache expired: elapsed={:?} >= ttl={:?}", elapsed, cache_ttl);
}
}
// cache expired, re-aggregate
info!("cache expired, start re-aggregating stats data");
let aggregation_timestamp = now;
let aggregated = self.aggregate_stats_from_all_nodes(aggregation_timestamp).await?;
// update cache
*self.cached_stats.write().await = Some(aggregated.clone());
// Use the time when aggregation completes as cache timestamp to avoid premature expiry during long runs
*self.cache_timestamp.write().await = SystemTime::now();
Ok(aggregated)
}
/// force refresh aggregated stats (ignore cache)
pub async fn force_refresh_aggregated_stats(&self) -> Result<AggregatedStats> {
let now = SystemTime::now();
let aggregated = self.aggregate_stats_from_all_nodes(now).await?;
// update cache
*self.cached_stats.write().await = Some(aggregated.clone());
// Cache timestamp should reflect completion time rather than aggregation start
*self.cache_timestamp.write().await = SystemTime::now();
Ok(aggregated)
}
/// aggregate stats data from all nodes
async fn aggregate_stats_from_all_nodes(&self, aggregation_timestamp: SystemTime) -> Result<AggregatedStats> {
let node_clients = self.node_clients.read().await;
let config = self.config.read().await;
// concurrent get stats data from all nodes
let mut tasks = Vec::new();
let semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_concurrent_aggregations));
// add local node stats
let mut node_summaries = HashMap::new();
if let Some(local_stats) = self.local_stats_summary.read().await.as_ref() {
node_summaries.insert(local_stats.node_id.clone(), local_stats.clone());
}
// get remote node stats
for (node_id, client) in node_clients.iter() {
let client = client.clone();
let semaphore = semaphore.clone();
let node_id = node_id.clone();
let task = tokio::spawn(async move {
let _permit = match semaphore.acquire().await {
Ok(permit) => permit,
Err(e) => {
warn!("Failed to acquire semaphore for node {}: {}", node_id, e);
return None;
}
};
match client.get_stats_summary().await {
Ok(summary) => {
debug!("successfully get node {} stats data", node_id);
Some((node_id, summary))
}
Err(e) => {
warn!("get node {} stats data failed: {}", node_id, e);
None
}
}
});
tasks.push(task);
}
// wait for all tasks to complete
for task in tasks {
if let Ok(Some((node_id, summary))) = task.await {
node_summaries.insert(node_id, summary);
}
}
drop(node_clients);
drop(config);
// aggregate stats data
let aggregated = self.aggregate_node_summaries(node_summaries, aggregation_timestamp).await;
info!(
"aggregate stats completed: {} nodes, {} online",
aggregated.node_count, aggregated.online_node_count
);
Ok(aggregated)
}
/// aggregate node summaries
async fn aggregate_node_summaries(
&self,
node_summaries: HashMap<String, StatsSummary>,
aggregation_timestamp: SystemTime,
) -> AggregatedStats {
let mut aggregated = AggregatedStats {
aggregation_timestamp,
node_count: node_summaries.len(),
online_node_count: node_summaries.len(), // assume all nodes with data are online
node_summaries: node_summaries.clone(),
..Default::default()
};
// aggregate numeric stats
for (node_id, summary) in &node_summaries {
aggregated.total_objects_scanned += summary.total_objects_scanned;
aggregated.total_healthy_objects += summary.total_healthy_objects;
aggregated.total_corrupted_objects += summary.total_corrupted_objects;
aggregated.total_bytes_scanned += summary.total_bytes_scanned;
aggregated.total_scan_errors += summary.total_scan_errors;
aggregated.total_heal_triggered += summary.total_heal_triggered;
aggregated.total_disks += summary.total_disks;
aggregated.total_buckets += summary.total_buckets;
aggregated.aggregated_data_usage.merge(&summary.data_usage);
// aggregate scan progress
aggregated
.scan_progress_summary
.node_progress
.insert(node_id.clone(), summary.scan_progress.clone());
aggregated.scan_progress_summary.total_completed_disks += summary.scan_progress.completed_disks.len();
aggregated.scan_progress_summary.total_completed_buckets += summary.scan_progress.completed_buckets.len();
}
// calculate average scan cycle
if !node_summaries.is_empty() {
let total_cycles: u64 = node_summaries.values().map(|s| s.scan_progress.current_cycle).sum();
aggregated.scan_progress_summary.average_current_cycle = total_cycles as f64 / node_summaries.len() as f64;
}
// find earliest scan start time
aggregated.scan_progress_summary.earliest_scan_start =
node_summaries.values().map(|s| s.scan_progress.scan_start_time).min();
// TODO: aggregate bucket stats and data usage
// here we need to implement it based on the specific BucketStats and DataUsageInfo structure
aggregated
}
/// get nodes health status
pub async fn get_nodes_health(&self) -> HashMap<String, bool> {
let node_clients = self.node_clients.read().await;
let mut health_status = HashMap::new();
// concurrent check all nodes health status
let mut tasks = Vec::new();
for (node_id, client) in node_clients.iter() {
let client = client.clone();
let node_id = node_id.clone();
let task = tokio::spawn(async move {
let is_healthy = client.check_health().await;
(node_id, is_healthy)
});
tasks.push(task);
}
// collect results
for task in tasks {
if let Ok((node_id, is_healthy)) = task.await {
health_status.insert(node_id, is_healthy);
}
}
health_status
}
/// get online nodes list
pub async fn get_online_nodes(&self) -> Vec<String> {
let health_status = self.get_nodes_health().await;
health_status
.into_iter()
.filter_map(|(node_id, is_healthy)| if is_healthy { Some(node_id) } else { None })
.collect()
}
/// clear cache
pub async fn clear_cache(&self) {
*self.cached_stats.write().await = None;
*self.cache_timestamp.write().await = SystemTime::UNIX_EPOCH;
info!("clear aggregated stats cache");
}
/// get cache status
pub async fn get_cache_status(&self) -> CacheStatus {
let cached_stats = self.cached_stats.read().await;
let cache_timestamp = *self.cache_timestamp.read().await;
let config = self.config.read().await;
let is_valid = if let Ok(elapsed) = SystemTime::now().duration_since(cache_timestamp) {
elapsed < config.cache_ttl
} else {
false
};
CacheStatus {
has_cached_data: cached_stats.is_some(),
cache_timestamp,
is_valid,
ttl: config.cache_ttl,
}
}
/// update config
pub async fn update_config(&self, new_config: DecentralizedStatsAggregatorConfig) {
*self.config.write().await = new_config;
info!("update aggregator config");
}
}
/// cache status
#[derive(Debug, Clone)]
pub struct CacheStatus {
/// has cached data
pub has_cached_data: bool,
/// cache timestamp
pub cache_timestamp: SystemTime,
/// cache is valid
pub is_valid: bool,
/// cache ttl
pub ttl: Duration,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::scanner::node_scanner::{BucketScanState, ScanProgress};
use rustfs_common::data_usage::{BucketUsageInfo, DataUsageInfo};
use std::collections::{HashMap, HashSet};
use std::time::Duration;
#[tokio::test]
async fn aggregated_stats_merge_data_usage() {
let aggregator = DecentralizedStatsAggregator::new(DecentralizedStatsAggregatorConfig::default());
let mut data_usage = DataUsageInfo::default();
let bucket_usage = BucketUsageInfo {
objects_count: 5,
size: 1024,
..Default::default()
};
data_usage.buckets_usage.insert("bucket".to_string(), bucket_usage);
data_usage.objects_total_count = 5;
data_usage.objects_total_size = 1024;
let summary = StatsSummary {
node_id: "local-node".to_string(),
total_objects_scanned: 10,
total_healthy_objects: 9,
total_corrupted_objects: 1,
total_bytes_scanned: 2048,
total_scan_errors: 0,
total_heal_triggered: 0,
total_disks: 2,
total_buckets: 1,
last_update: SystemTime::now(),
scan_progress: ScanProgress::default(),
data_usage: data_usage.clone(),
};
aggregator.set_local_stats(summary).await;
// Wait briefly to ensure async cache writes settle in high-concurrency environments
tokio::time::sleep(Duration::from_millis(10)).await;
let aggregated = aggregator.get_aggregated_stats().await.expect("aggregated stats");
assert_eq!(aggregated.node_count, 1);
assert!(aggregated.node_summaries.contains_key("local-node"));
assert_eq!(aggregated.aggregated_data_usage.objects_total_count, 5);
assert_eq!(
aggregated
.aggregated_data_usage
.buckets_usage
.get("bucket")
.expect("bucket usage present")
.objects_count,
5
);
}
#[tokio::test]
async fn aggregated_stats_merge_multiple_nodes() {
let aggregator = DecentralizedStatsAggregator::new(DecentralizedStatsAggregatorConfig::default());
let mut local_usage = DataUsageInfo::default();
let local_bucket = BucketUsageInfo {
objects_count: 3,
versions_count: 3,
size: 150,
..Default::default()
};
local_usage.buckets_usage.insert("local-bucket".to_string(), local_bucket);
local_usage.calculate_totals();
local_usage.buckets_count = local_usage.buckets_usage.len() as u64;
local_usage.last_update = Some(SystemTime::now());
let local_progress = ScanProgress {
current_cycle: 1,
completed_disks: {
let mut set = std::collections::HashSet::new();
set.insert("disk-local".to_string());
set
},
completed_buckets: {
let mut map = std::collections::HashMap::new();
map.insert(
"local-bucket".to_string(),
BucketScanState {
completed: true,
last_object_key: Some("obj1".to_string()),
objects_scanned: 3,
scan_timestamp: SystemTime::now(),
},
);
map
},
..Default::default()
};
let local_summary = StatsSummary {
node_id: "node-local".to_string(),
total_objects_scanned: 30,
total_healthy_objects: 30,
total_corrupted_objects: 0,
total_bytes_scanned: 1500,
total_scan_errors: 0,
total_heal_triggered: 0,
total_disks: 1,
total_buckets: 1,
last_update: SystemTime::now(),
scan_progress: local_progress,
data_usage: local_usage.clone(),
};
let mut remote_usage = DataUsageInfo::default();
let remote_bucket = BucketUsageInfo {
objects_count: 5,
versions_count: 5,
size: 250,
..Default::default()
};
remote_usage.buckets_usage.insert("remote-bucket".to_string(), remote_bucket);
remote_usage.calculate_totals();
remote_usage.buckets_count = remote_usage.buckets_usage.len() as u64;
remote_usage.last_update = Some(SystemTime::now());
let remote_progress = ScanProgress {
current_cycle: 2,
completed_disks: {
let mut set = std::collections::HashSet::new();
set.insert("disk-remote".to_string());
set
},
completed_buckets: {
let mut map = std::collections::HashMap::new();
map.insert(
"remote-bucket".to_string(),
BucketScanState {
completed: true,
last_object_key: Some("remote-obj".to_string()),
objects_scanned: 5,
scan_timestamp: SystemTime::now(),
},
);
map
},
..Default::default()
};
let remote_summary = StatsSummary {
node_id: "node-remote".to_string(),
total_objects_scanned: 50,
total_healthy_objects: 48,
total_corrupted_objects: 2,
total_bytes_scanned: 2048,
total_scan_errors: 1,
total_heal_triggered: 1,
total_disks: 2,
total_buckets: 1,
last_update: SystemTime::now(),
scan_progress: remote_progress,
data_usage: remote_usage.clone(),
};
let node_summaries: HashMap<_, _> = [
(local_summary.node_id.clone(), local_summary.clone()),
(remote_summary.node_id.clone(), remote_summary.clone()),
]
.into_iter()
.collect();
let aggregated = aggregator.aggregate_node_summaries(node_summaries, SystemTime::now()).await;
assert_eq!(aggregated.node_count, 2);
assert_eq!(aggregated.total_objects_scanned, 80);
assert_eq!(aggregated.total_corrupted_objects, 2);
assert_eq!(aggregated.total_disks, 3);
assert!(aggregated.node_summaries.contains_key("node-local"));
assert!(aggregated.node_summaries.contains_key("node-remote"));
assert_eq!(
aggregated.aggregated_data_usage.objects_total_count,
local_usage.objects_total_count + remote_usage.objects_total_count
);
assert_eq!(
aggregated.aggregated_data_usage.objects_total_size,
local_usage.objects_total_size + remote_usage.objects_total_size
);
let mut expected_buckets: HashSet<&str> = HashSet::new();
expected_buckets.insert("local-bucket");
expected_buckets.insert("remote-bucket");
let actual_buckets: HashSet<&str> = aggregated
.aggregated_data_usage
.buckets_usage
.keys()
.map(|s| s.as_str())
.collect();
assert_eq!(expected_buckets, actual_buckets);
}
}

View File

@@ -1,112 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![cfg(test)]
use rustfs_ahm::scanner::data_scanner::Scanner;
use rustfs_common::data_usage::DataUsageInfo;
use rustfs_ecstore::GLOBAL_Endpoints;
use rustfs_ecstore::bucket::metadata_sys::{BucketMetadataSys, GLOBAL_BucketMetadataSys};
use rustfs_ecstore::endpoints::EndpointServerPools;
use rustfs_ecstore::store::ECStore;
use rustfs_ecstore::store_api::{ObjectIO, PutObjReader, StorageAPI};
use std::sync::{Arc, Once};
use tempfile::TempDir;
use tokio::sync::RwLock;
use tokio_util::sync::CancellationToken;
use tracing::Level;
/// Build a minimal single-node ECStore over a temp directory and populate objects.
async fn create_store_with_objects(count: usize) -> (TempDir, std::sync::Arc<ECStore>) {
let temp_dir = TempDir::new().expect("temp dir");
let root = temp_dir.path().to_string_lossy().to_string();
// Create endpoints from the temp dir
let (endpoint_pools, _setup) = EndpointServerPools::from_volumes("127.0.0.1:0", vec![root])
.await
.expect("endpoint pools");
// Seed globals required by metadata sys if not already set
if GLOBAL_Endpoints.get().is_none() {
let _ = GLOBAL_Endpoints.set(endpoint_pools.clone());
}
let store = ECStore::new("127.0.0.1:0".parse().unwrap(), endpoint_pools, CancellationToken::new())
.await
.expect("create store");
if rustfs_ecstore::global::new_object_layer_fn().is_none() {
rustfs_ecstore::global::set_object_layer(store.clone()).await;
}
// Initialize metadata system before bucket operations
if GLOBAL_BucketMetadataSys.get().is_none() {
let mut sys = BucketMetadataSys::new(store.clone());
sys.init(Vec::new()).await;
let _ = GLOBAL_BucketMetadataSys.set(Arc::new(RwLock::new(sys)));
}
store
.make_bucket("fallback-bucket", &rustfs_ecstore::store_api::MakeBucketOptions::default())
.await
.expect("make bucket");
for i in 0..count {
let key = format!("obj-{i:04}");
let data = format!("payload-{i}");
let mut reader = PutObjReader::from_vec(data.into_bytes());
store
.put_object("fallback-bucket", &key, &mut reader, &rustfs_ecstore::store_api::ObjectOptions::default())
.await
.expect("put object");
}
(temp_dir, store)
}
static INIT: Once = Once::new();
fn init_tracing(filter_level: Level) {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_max_level(filter_level)
.with_timer(tracing_subscriber::fmt::time::UtcTime::rfc_3339())
.with_thread_names(true)
.try_init();
});
}
#[tokio::test]
async fn fallback_builds_full_counts_over_100_objects() {
init_tracing(Level::ERROR);
let (_tmp, store) = create_store_with_objects(1000).await;
let scanner = Scanner::new(None, None);
// Directly call the fallback builder to ensure pagination works.
let usage: DataUsageInfo = scanner.build_data_usage_from_ecstore(&store).await.expect("fallback usage");
let bucket = usage.buckets_usage.get("fallback-bucket").expect("bucket usage present");
assert!(
usage.objects_total_count >= 1000,
"total objects should be >=1000, got {}",
usage.objects_total_count
);
assert!(
bucket.objects_count >= 1000,
"bucket objects should be >=1000, got {}",
bucket.objects_count
);
}

View File

@@ -1,411 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_ahm::scanner::{
io_throttler::MetricsSnapshot,
local_stats::StatsSummary,
node_scanner::{LoadLevel, NodeScanner, NodeScannerConfig},
stats_aggregator::{DecentralizedStatsAggregator, DecentralizedStatsAggregatorConfig, NodeInfo},
};
use scanner_optimization_tests::{PerformanceBenchmark, create_test_scanner};
use std::{sync::Arc, time::Duration};
use tempfile::TempDir;
mod scanner_optimization_tests;
#[tokio::test]
async fn test_end_to_end_scanner_lifecycle() {
let temp_dir = TempDir::new().unwrap();
let scanner = create_test_scanner(&temp_dir).await;
scanner.initialize_stats().await.expect("Failed to initialize stats");
let initial_progress = scanner.get_scan_progress().await;
assert_eq!(initial_progress.current_cycle, 0);
scanner.force_save_checkpoint().await.expect("Failed to save checkpoint");
let checkpoint_info = scanner.get_checkpoint_info().await.unwrap();
assert!(checkpoint_info.is_some());
}
#[tokio::test]
async fn test_load_balancing_and_throttling_integration() {
let temp_dir = TempDir::new().unwrap();
let scanner = create_test_scanner(&temp_dir).await;
let io_monitor = scanner.get_io_monitor();
let throttler = scanner.get_io_throttler();
// Start IO monitoring
io_monitor.start().await.expect("Failed to start IO monitor");
// Simulate load variation scenarios
let load_scenarios = vec![
(LoadLevel::Low, 10, 100, 0, 5), // (load level, latency, QPS, error rate, connections)
(LoadLevel::Medium, 30, 300, 10, 20),
(LoadLevel::High, 80, 800, 50, 50),
(LoadLevel::Critical, 200, 1200, 100, 100),
];
for (expected_level, latency, qps, error_rate, connections) in load_scenarios {
// Update business metrics
scanner.update_business_metrics(latency, qps, error_rate, connections).await;
// Wait for monitoring system response
tokio::time::sleep(Duration::from_millis(1200)).await;
// Get current load level
let current_level = io_monitor.get_business_load_level().await;
// Get throttling decision
let metrics_snapshot = MetricsSnapshot {
iops: 100 + qps / 10,
latency,
cpu_usage: std::cmp::min(50 + (qps / 20) as u8, 100),
memory_usage: 40,
};
let decision = throttler.make_throttle_decision(current_level, Some(metrics_snapshot)).await;
println!(
"Load scenario test: Expected={:?}, Actual={:?}, Should_pause={}, Delay={:?}",
expected_level, current_level, decision.should_pause, decision.suggested_delay
);
// Verify throttling effect under high load
if matches!(current_level, LoadLevel::High | LoadLevel::Critical) {
assert!(decision.suggested_delay > Duration::from_millis(1000));
}
if matches!(current_level, LoadLevel::Critical) {
assert!(decision.should_pause);
}
}
io_monitor.stop().await;
}
#[tokio::test]
async fn test_checkpoint_resume_functionality() {
let temp_dir = TempDir::new().unwrap();
// Create first scanner instance
let scanner1 = {
let config = NodeScannerConfig {
data_dir: temp_dir.path().to_path_buf(),
..Default::default()
};
NodeScanner::new("checkpoint-test-node".to_string(), config)
};
// Initialize and simulate some scan progress
scanner1.initialize_stats().await.unwrap();
// Simulate scan progress
scanner1
.update_scan_progress_for_test(3, 1, Some("checkpoint-test-key".to_string()))
.await;
// Save checkpoint
scanner1.force_save_checkpoint().await.unwrap();
// Stop first scanner
scanner1.stop().await.unwrap();
// Create second scanner instance (simulate restart)
let scanner2 = {
let config = NodeScannerConfig {
data_dir: temp_dir.path().to_path_buf(),
..Default::default()
};
NodeScanner::new("checkpoint-test-node".to_string(), config)
};
// Try to recover from checkpoint
scanner2.start_with_resume().await.unwrap();
// Verify recovered progress
let recovered_progress = scanner2.get_scan_progress().await;
assert_eq!(recovered_progress.current_cycle, 3);
assert_eq!(recovered_progress.current_disk_index, 1);
assert_eq!(recovered_progress.last_scan_key, Some("checkpoint-test-key".to_string()));
// Cleanup
scanner2.cleanup_checkpoint().await.unwrap();
}
#[tokio::test]
async fn test_distributed_stats_aggregation() {
// Create decentralized stats aggregator
let config = DecentralizedStatsAggregatorConfig {
cache_ttl: Duration::from_secs(10), // Increase cache TTL to ensure cache is valid during test
node_timeout: Duration::from_millis(500), // Reduce timeout
..Default::default()
};
let aggregator = DecentralizedStatsAggregator::new(config);
// Simulate multiple nodes (these nodes don't exist in test environment, will cause connection failures)
let node_infos = vec![
NodeInfo {
node_id: "node-1".to_string(),
address: "127.0.0.1".to_string(),
port: 9001,
is_online: true,
last_heartbeat: std::time::SystemTime::now(),
version: "1.0.0".to_string(),
},
NodeInfo {
node_id: "node-2".to_string(),
address: "127.0.0.1".to_string(),
port: 9002,
is_online: true,
last_heartbeat: std::time::SystemTime::now(),
version: "1.0.0".to_string(),
},
];
// Add nodes to aggregator
for node_info in node_infos {
aggregator.add_node(node_info).await;
}
// Set local statistics (simulate local node)
let local_stats = StatsSummary {
node_id: "local-node".to_string(),
total_objects_scanned: 1000,
total_healthy_objects: 950,
total_corrupted_objects: 50,
total_bytes_scanned: 1024 * 1024 * 100, // 100MB
total_scan_errors: 5,
total_heal_triggered: 10,
total_disks: 4,
total_buckets: 5,
last_update: std::time::SystemTime::now(),
scan_progress: Default::default(),
data_usage: rustfs_common::data_usage::DataUsageInfo::default(),
};
aggregator.set_local_stats(local_stats).await;
// Get aggregated statistics (remote nodes will fail, but local node should succeed)
let aggregated = aggregator.get_aggregated_stats().await.unwrap();
// Verify local node statistics are included
assert!(aggregated.node_summaries.contains_key("local-node"));
assert!(aggregated.total_objects_scanned >= 1000);
// Only local node data due to remote node connection failures
assert_eq!(aggregated.node_summaries.len(), 1);
// Test caching mechanism
let original_timestamp = aggregated.aggregation_timestamp;
let start_time = std::time::Instant::now();
let cached_result = aggregator.get_aggregated_stats().await.unwrap();
let cached_duration = start_time.elapsed();
// Verify cache is effective: timestamps should be the same
assert_eq!(original_timestamp, cached_result.aggregation_timestamp);
// Cached calls should be fast (relaxed to 200ms for test environment)
assert!(cached_duration < Duration::from_millis(200));
// Force refresh
let _refreshed = aggregator.force_refresh_aggregated_stats().await.unwrap();
// Clear cache
aggregator.clear_cache().await;
// Verify cache status
let cache_status = aggregator.get_cache_status().await;
assert!(!cache_status.has_cached_data);
}
#[tokio::test]
async fn test_performance_impact_measurement() {
let temp_dir = TempDir::new().unwrap();
let scanner = create_test_scanner(&temp_dir).await;
// Start performance monitoring
let io_monitor = scanner.get_io_monitor();
let _throttler = scanner.get_io_throttler();
io_monitor.start().await.unwrap();
// Baseline test: no scanner load - measure multiple times for stability
const MEASUREMENT_COUNT: usize = 5;
let mut baseline_measurements = Vec::new();
for _ in 0..MEASUREMENT_COUNT {
let duration = measure_workload(10_000, Duration::ZERO).await;
baseline_measurements.push(duration);
}
// Use median to reduce impact of outliers
baseline_measurements.sort();
let median_idx = baseline_measurements.len() / 2;
let baseline_duration = baseline_measurements[median_idx].max(Duration::from_millis(20));
// Simulate scanner activity
scanner.update_business_metrics(50, 500, 0, 25).await;
tokio::time::sleep(Duration::from_millis(200)).await;
// Performance test: with scanner load - measure multiple times for stability
let mut scanner_measurements = Vec::new();
for _ in 0..MEASUREMENT_COUNT {
let duration = measure_workload(10_000, Duration::ZERO).await;
scanner_measurements.push(duration);
}
scanner_measurements.sort();
let median_idx = scanner_measurements.len() / 2;
let with_scanner_duration = scanner_measurements[median_idx].max(baseline_duration);
// Calculate performance impact
let baseline_ns = baseline_duration.as_nanos().max(1) as f64;
let overhead_duration = with_scanner_duration.saturating_sub(baseline_duration);
let overhead_ns = overhead_duration.as_nanos() as f64;
let overhead_ms = (overhead_ns / 1_000_000.0).round() as u64;
let impact_percentage = (overhead_ns / baseline_ns) * 100.0;
let benchmark = PerformanceBenchmark {
_scanner_overhead_ms: overhead_ms,
business_impact_percentage: impact_percentage,
_throttle_effectiveness: 95.0, // Simulated value
};
println!("Performance impact measurement:");
println!(" Baseline duration: {baseline_duration:?}");
println!(" With scanner duration: {with_scanner_duration:?}");
println!(" Overhead: {overhead_ms} ms");
println!(" Impact percentage: {impact_percentage:.2}%");
println!(" Meets optimization goals: {}", benchmark.meets_optimization_goals());
// Verify optimization target (business impact < 50%)
// Note: In test environment, allow higher threshold due to system load variability
// In production, the actual impact should be much lower (< 10%)
assert!(impact_percentage < 50.0, "Performance impact too high: {impact_percentage:.2}%");
io_monitor.stop().await;
}
#[tokio::test]
async fn test_concurrent_scanner_operations() {
let temp_dir = TempDir::new().unwrap();
let scanner = Arc::new(create_test_scanner(&temp_dir).await);
scanner.initialize_stats().await.unwrap();
// Execute multiple scanner operations concurrently
let tasks = vec![
// Task 1: Periodically update business metrics
{
let scanner = scanner.clone();
tokio::spawn(async move {
for i in 0..10 {
scanner.update_business_metrics(10 + i * 5, 100 + i * 10, i, 5 + i).await;
tokio::time::sleep(Duration::from_millis(50)).await;
}
})
},
// Task 2: Periodically save checkpoints
{
let scanner = scanner.clone();
tokio::spawn(async move {
for _i in 0..5 {
if let Err(e) = scanner.force_save_checkpoint().await {
eprintln!("Checkpoint save failed: {e}");
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
})
},
// Task 3: Periodically get statistics
{
let scanner = scanner.clone();
tokio::spawn(async move {
for _i in 0..8 {
let _summary = scanner.get_stats_summary().await;
let _progress = scanner.get_scan_progress().await;
tokio::time::sleep(Duration::from_millis(75)).await;
}
})
},
];
// Wait for all tasks to complete
for task in tasks {
task.await.unwrap();
}
// Verify final state
let final_stats = scanner.get_stats_summary().await;
let _final_progress = scanner.get_scan_progress().await;
assert_eq!(final_stats.node_id, "integration-test-node");
assert!(final_stats.last_update > std::time::SystemTime::UNIX_EPOCH);
// Cleanup
scanner.cleanup_checkpoint().await.unwrap();
}
// Helper function to simulate business workload
async fn simulate_business_workload(operations: usize) {
for _i in 0..operations {
// Simulate some CPU-intensive operations
let _result: u64 = (0..100).map(|x| x * x).sum();
// Small delay to simulate IO operations
if _i % 100 == 0 {
tokio::task::yield_now().await;
}
}
}
async fn measure_workload(operations: usize, extra_delay: Duration) -> Duration {
let start = std::time::Instant::now();
simulate_business_workload(operations).await;
if !extra_delay.is_zero() {
tokio::time::sleep(extra_delay).await;
}
start.elapsed()
}
#[tokio::test]
async fn test_error_recovery_and_resilience() {
let temp_dir = TempDir::new().unwrap();
let scanner = create_test_scanner(&temp_dir).await;
// Test recovery from stats initialization failure
scanner.initialize_stats().await.unwrap();
// Test recovery from checkpoint corruption
scanner.force_save_checkpoint().await.unwrap();
// Artificially corrupt checkpoint file (by writing invalid data)
let checkpoint_file = temp_dir.path().join("scanner_checkpoint_integration-test-node.json");
if checkpoint_file.exists() {
tokio::fs::write(&checkpoint_file, "invalid json data").await.unwrap();
}
// Verify system can gracefully handle corrupted checkpoint
let checkpoint_info = scanner.get_checkpoint_info().await;
// Should return error or null value, not crash
assert!(checkpoint_info.is_err() || checkpoint_info.unwrap().is_none());
// Clean up corrupted checkpoint
scanner.cleanup_checkpoint().await.unwrap();
// Verify ability to recreate valid checkpoint
scanner.force_save_checkpoint().await.unwrap();
let new_checkpoint_info = scanner.get_checkpoint_info().await.unwrap();
assert!(new_checkpoint_info.is_some());
}

View File

@@ -1,508 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use heed::byteorder::BigEndian;
use heed::types::*;
use heed::{BoxedError, BytesDecode, BytesEncode, Database, DatabaseFlags, Env, EnvOpenOptions};
use rustfs_ahm::scanner::local_scan::{self, LocalObjectRecord, LocalScanOutcome};
use rustfs_ecstore::{
disk::endpoint::Endpoint,
endpoints::{EndpointServerPools, Endpoints, PoolEndpoints},
store::ECStore,
store_api::{MakeBucketOptions, ObjectIO, ObjectInfo, ObjectOptions, PutObjReader, StorageAPI},
};
use serial_test::serial;
use std::{
borrow::Cow,
path::PathBuf,
sync::{Arc, Once, OnceLock},
};
//use heed_traits::Comparator;
use time::OffsetDateTime;
use tokio::fs;
use tokio_util::sync::CancellationToken;
use tracing::{debug, info, warn};
use uuid::Uuid;
static GLOBAL_ENV: OnceLock<(Vec<PathBuf>, Arc<ECStore>)> = OnceLock::new();
static INIT: Once = Once::new();
static _LIFECYCLE_EXPIRY_CURRENT_DAYS: i32 = 1;
static _LIFECYCLE_EXPIRY_NONCURRENT_DAYS: i32 = 1;
static _LIFECYCLE_TRANSITION_CURRENT_DAYS: i32 = 1;
static _LIFECYCLE_TRANSITION_NONCURRENT_DAYS: i32 = 1;
static GLOBAL_LMDB_ENV: OnceLock<Env> = OnceLock::new();
static GLOBAL_LMDB_DB: OnceLock<Database<I64<BigEndian>, LifecycleContentCodec>> = OnceLock::new();
fn init_tracing() {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt::try_init();
});
}
/// Test helper: Create test environment with ECStore
async fn setup_test_env() -> (Vec<PathBuf>, Arc<ECStore>) {
init_tracing();
// Fast path: already initialized, just clone and return
if let Some((paths, ecstore)) = GLOBAL_ENV.get() {
return (paths.clone(), ecstore.clone());
}
// create temp dir as 4 disks with unique base dir
let test_base_dir = format!("/tmp/rustfs_ahm_lifecyclecache_test_{}", uuid::Uuid::new_v4());
let temp_dir = std::path::PathBuf::from(&test_base_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).await.ok();
}
fs::create_dir_all(&temp_dir).await.unwrap();
// create 4 disk dirs
let disk_paths = vec![
temp_dir.join("disk1"),
temp_dir.join("disk2"),
temp_dir.join("disk3"),
temp_dir.join("disk4"),
];
for disk_path in &disk_paths {
fs::create_dir_all(disk_path).await.unwrap();
}
// create EndpointServerPools
let mut endpoints = Vec::new();
for (i, disk_path) in disk_paths.iter().enumerate() {
let mut endpoint = Endpoint::try_from(disk_path.to_str().unwrap()).unwrap();
// set correct index
endpoint.set_pool_index(0);
endpoint.set_set_index(0);
endpoint.set_disk_index(i);
endpoints.push(endpoint);
}
let pool_endpoints = PoolEndpoints {
legacy: false,
set_count: 1,
drives_per_set: 4,
endpoints: Endpoints::from(endpoints),
cmd_line: "test".to_string(),
platform: format!("OS: {} | Arch: {}", std::env::consts::OS, std::env::consts::ARCH),
};
let endpoint_pools = EndpointServerPools(vec![pool_endpoints]);
// format disks (only first time)
rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await.unwrap();
// create ECStore with dynamic port 0 (let OS assign) or fixed 9002 if free
let port = 9002; // for simplicity
let server_addr: std::net::SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
let ecstore = ECStore::new(server_addr, endpoint_pools, CancellationToken::new())
.await
.unwrap();
// init bucket metadata system
let buckets_list = ecstore
.list_bucket(&rustfs_ecstore::store_api::BucketOptions {
no_metadata: true,
..Default::default()
})
.await
.unwrap();
let buckets = buckets_list.into_iter().map(|v| v.name).collect();
rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys(ecstore.clone(), buckets).await;
//lmdb env
// User home directory
/*if let Ok(home_dir) = env::var("HOME").or_else(|_| env::var("USERPROFILE")) {
let mut path = PathBuf::from(home_dir);
path.push(format!(".{DEFAULT_LOG_FILENAME}"));
path.push(DEFAULT_LOG_DIR);
if ensure_directory_writable(&path) {
//return path;
}
}*/
let test_lmdb_lifecycle_dir = "/tmp/lmdb_lifecycle".to_string();
let temp_dir = std::path::PathBuf::from(&test_lmdb_lifecycle_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).await.ok();
}
fs::create_dir_all(&temp_dir).await.unwrap();
let lmdb_env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&test_lmdb_lifecycle_dir).unwrap() };
let bucket_name = format!("test-lc-cache-{}", "00000");
let mut wtxn = lmdb_env.write_txn().unwrap();
let db = match lmdb_env
.database_options()
.name(&format!("bucket_{bucket_name}"))
.types::<I64<BigEndian>, LifecycleContentCodec>()
.flags(DatabaseFlags::DUP_SORT)
//.dup_sort_comparator::<>()
.create(&mut wtxn)
{
Ok(db) => db,
Err(err) => {
panic!("lmdb error: {err}");
}
};
let _ = wtxn.commit();
let _ = GLOBAL_LMDB_ENV.set(lmdb_env);
let _ = GLOBAL_LMDB_DB.set(db);
// Store in global once lock
let _ = GLOBAL_ENV.set((disk_paths.clone(), ecstore.clone()));
(disk_paths, ecstore)
}
/// Test helper: Create a test bucket
#[allow(dead_code)]
async fn create_test_bucket(ecstore: &Arc<ECStore>, bucket_name: &str) {
(**ecstore)
.make_bucket(bucket_name, &Default::default())
.await
.expect("Failed to create test bucket");
info!("Created test bucket: {}", bucket_name);
}
/// Test helper: Create a test lock bucket
async fn create_test_lock_bucket(ecstore: &Arc<ECStore>, bucket_name: &str) {
(**ecstore)
.make_bucket(
bucket_name,
&MakeBucketOptions {
lock_enabled: true,
versioning_enabled: true,
..Default::default()
},
)
.await
.expect("Failed to create test bucket");
info!("Created test bucket: {}", bucket_name);
}
/// Test helper: Upload test object
async fn upload_test_object(ecstore: &Arc<ECStore>, bucket: &str, object: &str, data: &[u8]) {
let mut reader = PutObjReader::from_vec(data.to_vec());
let object_info = (**ecstore)
.put_object(bucket, object, &mut reader, &ObjectOptions::default())
.await
.expect("Failed to upload test object");
println!("object_info1: {object_info:?}");
info!("Uploaded test object: {}/{} ({} bytes)", bucket, object, object_info.size);
}
/// Test helper: Check if object exists
async fn object_exists(ecstore: &Arc<ECStore>, bucket: &str, object: &str) -> bool {
match (**ecstore).get_object_info(bucket, object, &ObjectOptions::default()).await {
Ok(info) => !info.delete_marker,
Err(_) => false,
}
}
fn ns_to_offset_datetime(ns: i128) -> Option<OffsetDateTime> {
OffsetDateTime::from_unix_timestamp_nanos(ns).ok()
}
fn convert_record_to_object_info(record: &LocalObjectRecord) -> ObjectInfo {
let usage = &record.usage;
ObjectInfo {
bucket: usage.bucket.clone(),
name: usage.object.clone(),
size: usage.total_size as i64,
delete_marker: !usage.has_live_object && usage.delete_markers_count > 0,
mod_time: usage.last_modified_ns.and_then(ns_to_offset_datetime),
..Default::default()
}
}
#[allow(dead_code)]
fn to_object_info(
bucket: &str,
object: &str,
total_size: i64,
delete_marker: bool,
mod_time: OffsetDateTime,
version_id: &str,
) -> ObjectInfo {
ObjectInfo {
bucket: bucket.to_string(),
name: object.to_string(),
size: total_size,
delete_marker,
mod_time: Some(mod_time),
version_id: Some(Uuid::parse_str(version_id).unwrap()),
..Default::default()
}
}
#[derive(Debug, PartialEq, Eq)]
enum LifecycleType {
ExpiryCurrent,
ExpiryNoncurrent,
TransitionCurrent,
TransitionNoncurrent,
}
#[derive(Debug, PartialEq, Eq)]
pub struct LifecycleContent {
ver_no: u8,
ver_id: String,
mod_time: OffsetDateTime,
type_: LifecycleType,
object_name: String,
}
pub struct LifecycleContentCodec;
impl BytesEncode<'_> for LifecycleContentCodec {
type EItem = LifecycleContent;
fn bytes_encode(lcc: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
let (ver_no_byte, ver_id_bytes, mod_timestamp_bytes, type_byte, object_name_bytes) = match lcc {
LifecycleContent {
ver_no,
ver_id,
mod_time,
type_: LifecycleType::ExpiryCurrent,
object_name,
} => (
ver_no,
ver_id.clone().into_bytes(),
mod_time.unix_timestamp().to_be_bytes(),
0,
object_name.clone().into_bytes(),
),
LifecycleContent {
ver_no,
ver_id,
mod_time,
type_: LifecycleType::ExpiryNoncurrent,
object_name,
} => (
ver_no,
ver_id.clone().into_bytes(),
mod_time.unix_timestamp().to_be_bytes(),
1,
object_name.clone().into_bytes(),
),
LifecycleContent {
ver_no,
ver_id,
mod_time,
type_: LifecycleType::TransitionCurrent,
object_name,
} => (
ver_no,
ver_id.clone().into_bytes(),
mod_time.unix_timestamp().to_be_bytes(),
2,
object_name.clone().into_bytes(),
),
LifecycleContent {
ver_no,
ver_id,
mod_time,
type_: LifecycleType::TransitionNoncurrent,
object_name,
} => (
ver_no,
ver_id.clone().into_bytes(),
mod_time.unix_timestamp().to_be_bytes(),
3,
object_name.clone().into_bytes(),
),
};
let mut output = Vec::<u8>::new();
output.push(*ver_no_byte);
output.extend_from_slice(&ver_id_bytes);
output.extend_from_slice(&mod_timestamp_bytes);
output.push(type_byte);
output.extend_from_slice(&object_name_bytes);
Ok(Cow::Owned(output))
}
}
impl<'a> BytesDecode<'a> for LifecycleContentCodec {
type DItem = LifecycleContent;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
use std::mem::size_of;
let ver_no = match bytes.get(..size_of::<u8>()) {
Some(bytes) => bytes.try_into().map(u8::from_be_bytes).unwrap(),
None => return Err("invalid LifecycleContent: cannot extract ver_no".into()),
};
let ver_id = match bytes.get(size_of::<u8>()..(36 + 1)) {
Some(bytes) => unsafe { std::str::from_utf8_unchecked(bytes).to_string() },
None => return Err("invalid LifecycleContent: cannot extract ver_id".into()),
};
let mod_timestamp = match bytes.get((36 + 1)..(size_of::<i64>() + 36 + 1)) {
Some(bytes) => bytes.try_into().map(i64::from_be_bytes).unwrap(),
None => return Err("invalid LifecycleContent: cannot extract mod_time timestamp".into()),
};
let type_ = match bytes.get(size_of::<i64>() + 36 + 1) {
Some(&0) => LifecycleType::ExpiryCurrent,
Some(&1) => LifecycleType::ExpiryNoncurrent,
Some(&2) => LifecycleType::TransitionCurrent,
Some(&3) => LifecycleType::TransitionNoncurrent,
Some(_) => return Err("invalid LifecycleContent: invalid LifecycleType".into()),
None => return Err("invalid LifecycleContent: cannot extract LifecycleType".into()),
};
let object_name = match bytes.get((size_of::<i64>() + 36 + 1 + 1)..) {
Some(bytes) => unsafe { std::str::from_utf8_unchecked(bytes).to_string() },
None => return Err("invalid LifecycleContent: cannot extract object_name".into()),
};
Ok(LifecycleContent {
ver_no,
ver_id,
mod_time: OffsetDateTime::from_unix_timestamp(mod_timestamp).unwrap(),
type_,
object_name,
})
}
}
mod serial_tests {
use super::*;
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
//#[ignore]
async fn test_lifecycle_chche_build() {
let (_disk_paths, ecstore) = setup_test_env().await;
// Create test bucket and object
let suffix = uuid::Uuid::new_v4().simple().to_string();
let bucket_name = format!("test-lc-cache-{}", &suffix[..8]);
let object_name = "test/object.txt"; // Match the lifecycle rule prefix "test/"
let test_data = b"Hello, this is test data for lifecycle expiry!";
create_test_lock_bucket(&ecstore, bucket_name.as_str()).await;
upload_test_object(&ecstore, bucket_name.as_str(), object_name, test_data).await;
// Verify object exists initially
assert!(object_exists(&ecstore, bucket_name.as_str(), object_name).await);
println!("✅ Object exists before lifecycle processing");
let scan_outcome = match local_scan::scan_and_persist_local_usage(ecstore.clone()).await {
Ok(outcome) => outcome,
Err(err) => {
warn!("Local usage scan failed: {}", err);
LocalScanOutcome::default()
}
};
let bucket_objects_map = &scan_outcome.bucket_objects;
let records = match bucket_objects_map.get(&bucket_name) {
Some(records) => records,
None => {
debug!("No local snapshot entries found for bucket {}; skipping lifecycle/integrity", bucket_name);
&vec![]
}
};
if let Some(lmdb_env) = GLOBAL_LMDB_ENV.get()
&& let Some(lmdb) = GLOBAL_LMDB_DB.get()
{
let mut wtxn = lmdb_env.write_txn().unwrap();
/*if let Ok((lc_config, _)) = rustfs_ecstore::bucket::metadata_sys::get_lifecycle_config(bucket_name.as_str()).await {
if let Ok(object_info) = ecstore
.get_object_info(bucket_name.as_str(), object_name, &rustfs_ecstore::store_api::ObjectOptions::default())
.await
{
let event = rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::eval_action_from_lifecycle(
&lc_config,
None,
None,
&object_info,
)
.await;
rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::apply_expiry_on_non_transitioned_objects(
ecstore.clone(),
&object_info,
&event,
&rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_audit::LcEventSrc::Scanner,
)
.await;
expired = wait_for_object_absence(&ecstore, bucket_name.as_str(), object_name, Duration::from_secs(2)).await;
}
}*/
for record in records {
if !record.usage.has_live_object {
continue;
}
let object_info = convert_record_to_object_info(record);
println!("object_info2: {object_info:?}");
let mod_time = object_info.mod_time.unwrap_or(OffsetDateTime::now_utc());
let expiry_time = rustfs_ecstore::bucket::lifecycle::lifecycle::expected_expiry_time(mod_time, 1);
let version_id = if let Some(version_id) = object_info.version_id {
version_id.to_string()
} else {
"zzzzzzzz-zzzz-zzzz-zzzz-zzzzzzzzzzzz".to_string()
};
lmdb.put(
&mut wtxn,
&expiry_time.unix_timestamp(),
&LifecycleContent {
ver_no: 0,
ver_id: version_id,
mod_time,
type_: LifecycleType::TransitionNoncurrent,
object_name: object_info.name,
},
)
.unwrap();
}
wtxn.commit().unwrap();
let mut wtxn = lmdb_env.write_txn().unwrap();
let iter = lmdb.iter_mut(&mut wtxn).unwrap();
//let _ = unsafe { iter.del_current().unwrap() };
for row in iter {
if let Ok(ref elm) = row {
let LifecycleContent {
ver_no,
ver_id,
mod_time,
type_,
object_name,
} = &elm.1;
println!("cache row:{ver_no} {ver_id} {mod_time} {type_:?} {object_name}");
}
println!("row:{row:?}");
}
//drop(iter);
wtxn.commit().unwrap();
}
println!("Lifecycle cache test completed");
}
}

View File

@@ -1,695 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_ahm::scanner::{Scanner, data_scanner::ScannerConfig};
use rustfs_ecstore::{
bucket::metadata::BUCKET_LIFECYCLE_CONFIG,
bucket::metadata_sys,
disk::endpoint::Endpoint,
endpoints::{EndpointServerPools, Endpoints, PoolEndpoints},
global::GLOBAL_TierConfigMgr,
store::ECStore,
store_api::{MakeBucketOptions, ObjectIO, ObjectOptions, PutObjReader, StorageAPI},
tier::tier_config::{TierConfig, TierMinIO, TierType},
};
use serial_test::serial;
use std::{
path::PathBuf,
sync::{Arc, Once, OnceLock},
time::Duration,
};
use tokio::fs;
use tokio_util::sync::CancellationToken;
use tracing::info;
static GLOBAL_ENV: OnceLock<(Vec<PathBuf>, Arc<ECStore>)> = OnceLock::new();
static INIT: Once = Once::new();
fn init_tracing() {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt::try_init();
});
}
/// Test helper: Create test environment with ECStore
async fn setup_test_env() -> (Vec<PathBuf>, Arc<ECStore>) {
init_tracing();
// Fast path: already initialized, just clone and return
if let Some((paths, ecstore)) = GLOBAL_ENV.get() {
return (paths.clone(), ecstore.clone());
}
// create temp dir as 4 disks with unique base dir
let test_base_dir = format!("/tmp/rustfs_ahm_lifecycle_test_{}", uuid::Uuid::new_v4());
let temp_dir = std::path::PathBuf::from(&test_base_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).await.ok();
}
fs::create_dir_all(&temp_dir).await.unwrap();
// create 4 disk dirs
let disk_paths = vec![
temp_dir.join("disk1"),
temp_dir.join("disk2"),
temp_dir.join("disk3"),
temp_dir.join("disk4"),
];
for disk_path in &disk_paths {
fs::create_dir_all(disk_path).await.unwrap();
}
// create EndpointServerPools
let mut endpoints = Vec::new();
for (i, disk_path) in disk_paths.iter().enumerate() {
let mut endpoint = Endpoint::try_from(disk_path.to_str().unwrap()).unwrap();
// set correct index
endpoint.set_pool_index(0);
endpoint.set_set_index(0);
endpoint.set_disk_index(i);
endpoints.push(endpoint);
}
let pool_endpoints = PoolEndpoints {
legacy: false,
set_count: 1,
drives_per_set: 4,
endpoints: Endpoints::from(endpoints),
cmd_line: "test".to_string(),
platform: format!("OS: {} | Arch: {}", std::env::consts::OS, std::env::consts::ARCH),
};
let endpoint_pools = EndpointServerPools(vec![pool_endpoints]);
// format disks (only first time)
rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await.unwrap();
// create ECStore with dynamic port 0 (let OS assign) or fixed 9002 if free
let port = 9002; // for simplicity
let server_addr: std::net::SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
let ecstore = ECStore::new(server_addr, endpoint_pools, CancellationToken::new())
.await
.unwrap();
// init bucket metadata system
let buckets_list = ecstore
.list_bucket(&rustfs_ecstore::store_api::BucketOptions {
no_metadata: true,
..Default::default()
})
.await
.unwrap();
let buckets = buckets_list.into_iter().map(|v| v.name).collect();
rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys(ecstore.clone(), buckets).await;
// Initialize background expiry workers
rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::init_background_expiry(ecstore.clone()).await;
// Store in global once lock
let _ = GLOBAL_ENV.set((disk_paths.clone(), ecstore.clone()));
(disk_paths, ecstore)
}
/// Test helper: Create a test bucket
async fn create_test_bucket(ecstore: &Arc<ECStore>, bucket_name: &str) {
(**ecstore)
.make_bucket(bucket_name, &Default::default())
.await
.expect("Failed to create test bucket");
info!("Created test bucket: {}", bucket_name);
}
/// Test helper: Create a test lock bucket
async fn create_test_lock_bucket(ecstore: &Arc<ECStore>, bucket_name: &str) {
(**ecstore)
.make_bucket(
bucket_name,
&MakeBucketOptions {
lock_enabled: true,
versioning_enabled: true,
..Default::default()
},
)
.await
.expect("Failed to create test bucket");
info!("Created test bucket: {}", bucket_name);
}
/// Test helper: Upload test object
async fn upload_test_object(ecstore: &Arc<ECStore>, bucket: &str, object: &str, data: &[u8]) {
let mut reader = PutObjReader::from_vec(data.to_vec());
let object_info = (**ecstore)
.put_object(bucket, object, &mut reader, &ObjectOptions::default())
.await
.expect("Failed to upload test object");
info!("Uploaded test object: {}/{} ({} bytes)", bucket, object, object_info.size);
}
/// Test helper: Set bucket lifecycle configuration
async fn set_bucket_lifecycle(bucket_name: &str) -> Result<(), Box<dyn std::error::Error>> {
// Create a simple lifecycle configuration XML with 0 days expiry for immediate testing
let lifecycle_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<LifecycleConfiguration>
<Rule>
<ID>test-rule</ID>
<Status>Enabled</Status>
<Filter>
<Prefix>test/</Prefix>
</Filter>
<Expiration>
<Days>0</Days>
</Expiration>
</Rule>
</LifecycleConfiguration>"#;
metadata_sys::update(bucket_name, BUCKET_LIFECYCLE_CONFIG, lifecycle_xml.as_bytes().to_vec()).await?;
Ok(())
}
/// Test helper: Set bucket lifecycle configuration
async fn set_bucket_lifecycle_deletemarker(bucket_name: &str) -> Result<(), Box<dyn std::error::Error>> {
// Create a simple lifecycle configuration XML with 0 days expiry for immediate testing
let lifecycle_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<LifecycleConfiguration>
<Rule>
<ID>test-rule</ID>
<Status>Enabled</Status>
<Filter>
<Prefix>test/</Prefix>
</Filter>
<Expiration>
<Days>0</Days>
<ExpiredObjectDeleteMarker>true</ExpiredObjectDeleteMarker>
</Expiration>
</Rule>
</LifecycleConfiguration>"#;
metadata_sys::update(bucket_name, BUCKET_LIFECYCLE_CONFIG, lifecycle_xml.as_bytes().to_vec()).await?;
Ok(())
}
#[allow(dead_code)]
async fn set_bucket_lifecycle_transition(bucket_name: &str) -> Result<(), Box<dyn std::error::Error>> {
// Create a simple lifecycle configuration XML with 0 days expiry for immediate testing
let lifecycle_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<LifecycleConfiguration>
<Rule>
<ID>test-rule</ID>
<Status>Enabled</Status>
<Filter>
<Prefix>test/</Prefix>
</Filter>
<Transition>
<Days>0</Days>
<StorageClass>COLDTIER44</StorageClass>
</Transition>
</Rule>
<Rule>
<ID>test-rule2</ID>
<Status>Disabled</Status>
<Filter>
<Prefix>test/</Prefix>
</Filter>
<NoncurrentVersionTransition>
<NoncurrentDays>0</NoncurrentDays>
<StorageClass>COLDTIER44</StorageClass>
</NoncurrentVersionTransition>
</Rule>
</LifecycleConfiguration>"#;
metadata_sys::update(bucket_name, BUCKET_LIFECYCLE_CONFIG, lifecycle_xml.as_bytes().to_vec()).await?;
Ok(())
}
/// Test helper: Create a test tier
#[allow(dead_code)]
async fn create_test_tier(server: u32) {
let args = TierConfig {
version: "v1".to_string(),
tier_type: TierType::MinIO,
name: "COLDTIER44".to_string(),
s3: None,
aliyun: None,
tencent: None,
huaweicloud: None,
azure: None,
gcs: None,
r2: None,
rustfs: None,
minio: if server == 1 {
Some(TierMinIO {
access_key: "minioadmin".to_string(),
secret_key: "minioadmin".to_string(),
bucket: "hello".to_string(),
endpoint: "http://39.105.198.204:9000".to_string(),
prefix: format!("mypre{}/", uuid::Uuid::new_v4()),
region: "".to_string(),
..Default::default()
})
} else {
Some(TierMinIO {
access_key: "minioadmin".to_string(),
secret_key: "minioadmin".to_string(),
bucket: "mblock2".to_string(),
endpoint: "http://127.0.0.1:9020".to_string(),
prefix: format!("mypre{}/", uuid::Uuid::new_v4()),
region: "".to_string(),
..Default::default()
})
},
};
let mut tier_config_mgr = GLOBAL_TierConfigMgr.write().await;
if let Err(err) = tier_config_mgr.add(args, false).await {
println!("tier_config_mgr add failed, e: {err:?}");
panic!("tier add failed. {err}");
}
if let Err(e) = tier_config_mgr.save().await {
println!("tier_config_mgr save failed, e: {e:?}");
panic!("tier save failed");
}
println!("Created test tier: COLDTIER44");
}
/// Test helper: Check if object exists
async fn object_exists(ecstore: &Arc<ECStore>, bucket: &str, object: &str) -> bool {
match (**ecstore).get_object_info(bucket, object, &ObjectOptions::default()).await {
Ok(info) => !info.delete_marker,
Err(_) => false,
}
}
/// Test helper: Check if object exists
#[allow(dead_code)]
async fn object_is_delete_marker(ecstore: &Arc<ECStore>, bucket: &str, object: &str) -> bool {
if let Ok(oi) = (**ecstore).get_object_info(bucket, object, &ObjectOptions::default()).await {
println!("oi: {oi:?}");
oi.delete_marker
} else {
println!("object_is_delete_marker is error");
panic!("object_is_delete_marker is error");
}
}
/// Test helper: Check if object exists
#[allow(dead_code)]
async fn object_is_transitioned(ecstore: &Arc<ECStore>, bucket: &str, object: &str) -> bool {
if let Ok(oi) = (**ecstore).get_object_info(bucket, object, &ObjectOptions::default()).await {
println!("oi: {oi:?}");
!oi.transitioned_object.status.is_empty()
} else {
println!("object_is_transitioned is error");
panic!("object_is_transitioned is error");
}
}
async fn wait_for_object_absence(ecstore: &Arc<ECStore>, bucket: &str, object: &str, timeout: Duration) -> bool {
let deadline = tokio::time::Instant::now() + timeout;
loop {
if !object_exists(ecstore, bucket, object).await {
return true;
}
if tokio::time::Instant::now() >= deadline {
return false;
}
tokio::time::sleep(Duration::from_millis(200)).await;
}
}
mod serial_tests {
use super::*;
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_lifecycle_expiry_basic() {
let (_disk_paths, ecstore) = setup_test_env().await;
// Create test bucket and object
let suffix = uuid::Uuid::new_v4().simple().to_string();
let bucket_name = format!("test-lc-expiry-basic-{}", &suffix[..8]);
let object_name = "test/object.txt"; // Match the lifecycle rule prefix "test/"
let test_data = b"Hello, this is test data for lifecycle expiry!";
create_test_lock_bucket(&ecstore, bucket_name.as_str()).await;
upload_test_object(&ecstore, bucket_name.as_str(), object_name, test_data).await;
// Verify object exists initially
assert!(object_exists(&ecstore, bucket_name.as_str(), object_name).await);
println!("✅ Object exists before lifecycle processing");
// Set lifecycle configuration with very short expiry (0 days = immediate expiry)
set_bucket_lifecycle(bucket_name.as_str())
.await
.expect("Failed to set lifecycle configuration");
println!("✅ Lifecycle configuration set for bucket: {bucket_name}");
// Verify lifecycle configuration was set
match rustfs_ecstore::bucket::metadata_sys::get(bucket_name.as_str()).await {
Ok(bucket_meta) => {
assert!(bucket_meta.lifecycle_config.is_some());
println!("✅ Bucket metadata retrieved successfully");
}
Err(e) => {
println!("❌ Error retrieving bucket metadata: {e:?}");
}
}
// Create scanner with very short intervals for testing
let scanner_config = ScannerConfig {
scan_interval: Duration::from_millis(100),
deep_scan_interval: Duration::from_millis(500),
max_concurrent_scans: 1,
..Default::default()
};
let scanner = Scanner::new(Some(scanner_config), None);
// Start scanner
scanner.start().await.expect("Failed to start scanner");
println!("✅ Scanner started");
// Wait for scanner to process lifecycle rules
tokio::time::sleep(Duration::from_secs(2)).await;
// Manually trigger a scan cycle to ensure lifecycle processing
scanner.scan_cycle().await.expect("Failed to trigger scan cycle");
println!("✅ Manual scan cycle completed");
let mut expired = false;
for attempt in 0..3 {
if attempt > 0 {
scanner.scan_cycle().await.expect("Failed to trigger scan cycle on retry");
}
expired = wait_for_object_absence(&ecstore, bucket_name.as_str(), object_name, Duration::from_secs(5)).await;
if expired {
break;
}
}
println!("Object is_delete_marker after lifecycle processing: {}", !expired);
if !expired {
let pending = rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::GLOBAL_ExpiryState
.read()
.await
.pending_tasks()
.await;
println!("Pending expiry tasks: {pending}");
if let Ok((lc_config, _)) = rustfs_ecstore::bucket::metadata_sys::get_lifecycle_config(bucket_name.as_str()).await
&& let Ok(object_info) = ecstore
.get_object_info(bucket_name.as_str(), object_name, &rustfs_ecstore::store_api::ObjectOptions::default())
.await
{
let event = rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::eval_action_from_lifecycle(
&lc_config,
None,
None,
&object_info,
)
.await;
rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::apply_expiry_on_non_transitioned_objects(
ecstore.clone(),
&object_info,
&event,
&rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_audit::LcEventSrc::Scanner,
)
.await;
expired = wait_for_object_absence(&ecstore, bucket_name.as_str(), object_name, Duration::from_secs(2)).await;
}
if !expired {
println!("❌ Object was not deleted by lifecycle processing");
}
} else {
println!("✅ Object was successfully deleted by lifecycle processing");
// Let's try to get object info to see its details
match ecstore
.get_object_info(bucket_name.as_str(), object_name, &rustfs_ecstore::store_api::ObjectOptions::default())
.await
{
Ok(obj_info) => {
println!(
"Object info: name={}, size={}, mod_time={:?}",
obj_info.name, obj_info.size, obj_info.mod_time
);
}
Err(e) => {
println!("Error getting object info: {e:?}");
}
}
}
assert!(expired);
println!("✅ Object successfully expired");
// Stop scanner
let _ = scanner.stop().await;
println!("✅ Scanner stopped");
println!("Lifecycle expiry basic test completed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
#[serial]
//#[ignore]
async fn test_lifecycle_expiry_deletemarker() {
let (_disk_paths, ecstore) = setup_test_env().await;
// Create test bucket and object
let suffix = uuid::Uuid::new_v4().simple().to_string();
let bucket_name = format!("test-lc-expiry-marker-{}", &suffix[..8]);
let object_name = "test/object.txt"; // Match the lifecycle rule prefix "test/"
let test_data = b"Hello, this is test data for lifecycle expiry!";
create_test_lock_bucket(&ecstore, bucket_name.as_str()).await;
upload_test_object(&ecstore, bucket_name.as_str(), object_name, test_data).await;
// Verify object exists initially
assert!(object_exists(&ecstore, bucket_name.as_str(), object_name).await);
println!("✅ Object exists before lifecycle processing");
// Set lifecycle configuration with very short expiry (0 days = immediate expiry)
set_bucket_lifecycle_deletemarker(bucket_name.as_str())
.await
.expect("Failed to set lifecycle configuration");
println!("✅ Lifecycle configuration set for bucket: {bucket_name}");
// Verify lifecycle configuration was set
match rustfs_ecstore::bucket::metadata_sys::get(bucket_name.as_str()).await {
Ok(bucket_meta) => {
assert!(bucket_meta.lifecycle_config.is_some());
println!("✅ Bucket metadata retrieved successfully");
}
Err(e) => {
println!("❌ Error retrieving bucket metadata: {e:?}");
}
}
// Create scanner with very short intervals for testing
let scanner_config = ScannerConfig {
scan_interval: Duration::from_millis(100),
deep_scan_interval: Duration::from_millis(500),
max_concurrent_scans: 1,
..Default::default()
};
let scanner = Scanner::new(Some(scanner_config), None);
// Start scanner
scanner.start().await.expect("Failed to start scanner");
println!("✅ Scanner started");
// Wait for scanner to process lifecycle rules
tokio::time::sleep(Duration::from_secs(2)).await;
// Manually trigger a scan cycle to ensure lifecycle processing
scanner.scan_cycle().await.expect("Failed to trigger scan cycle");
println!("✅ Manual scan cycle completed");
let mut deleted = false;
for attempt in 0..3 {
if attempt > 0 {
scanner.scan_cycle().await.expect("Failed to trigger scan cycle on retry");
}
deleted = wait_for_object_absence(&ecstore, bucket_name.as_str(), object_name, Duration::from_secs(5)).await;
if deleted {
break;
}
}
println!("Object exists after lifecycle processing: {}", !deleted);
if !deleted {
let pending = rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::GLOBAL_ExpiryState
.read()
.await
.pending_tasks()
.await;
println!("Pending expiry tasks: {pending}");
if let Ok((lc_config, _)) = rustfs_ecstore::bucket::metadata_sys::get_lifecycle_config(bucket_name.as_str()).await
&& let Ok(obj_info) = ecstore
.get_object_info(bucket_name.as_str(), object_name, &rustfs_ecstore::store_api::ObjectOptions::default())
.await
{
let event = rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::eval_action_from_lifecycle(
&lc_config, None, None, &obj_info,
)
.await;
rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_ops::apply_expiry_on_non_transitioned_objects(
ecstore.clone(),
&obj_info,
&event,
&rustfs_ecstore::bucket::lifecycle::bucket_lifecycle_audit::LcEventSrc::Scanner,
)
.await;
deleted = wait_for_object_absence(&ecstore, bucket_name.as_str(), object_name, Duration::from_secs(2)).await;
if !deleted {
println!(
"Object info: name={}, size={}, mod_time={:?}",
obj_info.name, obj_info.size, obj_info.mod_time
);
}
}
if !deleted {
println!("❌ Object was not deleted by lifecycle processing");
}
} else {
println!("✅ Object was successfully deleted by lifecycle processing");
}
assert!(deleted);
println!("✅ Object successfully expired");
// Stop scanner
let _ = scanner.stop().await;
println!("✅ Scanner stopped");
println!("Lifecycle expiry basic test completed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
#[serial]
#[ignore]
async fn test_lifecycle_transition_basic() {
let (_disk_paths, ecstore) = setup_test_env().await;
create_test_tier(1).await;
// Create test bucket and object
let suffix = uuid::Uuid::new_v4().simple().to_string();
let bucket_name = format!("test-lc-transition-{}", &suffix[..8]);
let object_name = "test/object.txt"; // Match the lifecycle rule prefix "test/"
let test_data = b"Hello, this is test data for lifecycle expiry!";
//create_test_lock_bucket(&ecstore, bucket_name.as_str()).await;
create_test_bucket(&ecstore, bucket_name.as_str()).await;
upload_test_object(&ecstore, bucket_name.as_str(), object_name, test_data).await;
// Verify object exists initially
assert!(object_exists(&ecstore, bucket_name.as_str(), object_name).await);
println!("✅ Object exists before lifecycle processing");
// Set lifecycle configuration with very short expiry (0 days = immediate expiry)
set_bucket_lifecycle_transition(bucket_name.as_str())
.await
.expect("Failed to set lifecycle configuration");
println!("✅ Lifecycle configuration set for bucket: {bucket_name}");
// Verify lifecycle configuration was set
match rustfs_ecstore::bucket::metadata_sys::get(bucket_name.as_str()).await {
Ok(bucket_meta) => {
assert!(bucket_meta.lifecycle_config.is_some());
println!("✅ Bucket metadata retrieved successfully");
}
Err(e) => {
println!("❌ Error retrieving bucket metadata: {e:?}");
}
}
// Create scanner with very short intervals for testing
let scanner_config = ScannerConfig {
scan_interval: Duration::from_millis(100),
deep_scan_interval: Duration::from_millis(500),
max_concurrent_scans: 1,
..Default::default()
};
let scanner = Scanner::new(Some(scanner_config), None);
// Start scanner
scanner.start().await.expect("Failed to start scanner");
println!("✅ Scanner started");
// Wait for scanner to process lifecycle rules
tokio::time::sleep(Duration::from_secs(2)).await;
// Manually trigger a scan cycle to ensure lifecycle processing
scanner.scan_cycle().await.expect("Failed to trigger scan cycle");
println!("✅ Manual scan cycle completed");
// Wait a bit more for background workers to process expiry tasks
tokio::time::sleep(Duration::from_secs(5)).await;
// Check if object has been expired (deleted)
let check_result = object_is_transitioned(&ecstore, &bucket_name, object_name).await;
println!("Object exists after lifecycle processing: {check_result}");
if check_result {
println!("✅ Object was transitioned by lifecycle processing");
// Let's try to get object info to see its details
match ecstore
.get_object_info(bucket_name.as_str(), object_name, &rustfs_ecstore::store_api::ObjectOptions::default())
.await
{
Ok(obj_info) => {
println!(
"Object info: name={}, size={}, mod_time={:?}",
obj_info.name, obj_info.size, obj_info.mod_time
);
println!("Object info: transitioned_object={:?}", obj_info.transitioned_object);
}
Err(e) => {
println!("Error getting object info: {e:?}");
}
}
} else {
println!("❌ Object was not transitioned by lifecycle processing");
}
assert!(check_result);
println!("✅ Object successfully transitioned");
// Stop scanner
let _ = scanner.stop().await;
println!("✅ Scanner stopped");
println!("Lifecycle transition basic test completed");
}
}

View File

@@ -1,817 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_ahm::heal::manager::HealConfig;
use rustfs_ahm::scanner::{
Scanner,
data_scanner::ScanMode,
node_scanner::{LoadLevel, NodeScanner, NodeScannerConfig},
};
use rustfs_ecstore::{
StorageAPI,
disk::endpoint::Endpoint,
endpoints::{EndpointServerPools, Endpoints, PoolEndpoints},
store::ECStore,
store_api::{MakeBucketOptions, ObjectIO, PutObjReader},
};
use serial_test::serial;
use std::{fs, net::SocketAddr, sync::Arc, sync::OnceLock, time::Duration};
use tempfile::TempDir;
use tokio_util::sync::CancellationToken;
// Global test environment cache to avoid repeated initialization
static GLOBAL_TEST_ENV: OnceLock<(Vec<std::path::PathBuf>, Arc<ECStore>)> = OnceLock::new();
async fn prepare_test_env(test_dir: Option<&str>, port: Option<u16>) -> (Vec<std::path::PathBuf>, Arc<ECStore>) {
// Check if global environment is already initialized
if let Some((disk_paths, ecstore)) = GLOBAL_TEST_ENV.get() {
return (disk_paths.clone(), ecstore.clone());
}
// create temp dir as 4 disks
let test_base_dir = test_dir.unwrap_or("/tmp/rustfs_ahm_optimized_test");
let temp_dir = std::path::PathBuf::from(test_base_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).unwrap();
}
fs::create_dir_all(&temp_dir).unwrap();
// create 4 disk dirs
let disk_paths = vec![
temp_dir.join("disk1"),
temp_dir.join("disk2"),
temp_dir.join("disk3"),
temp_dir.join("disk4"),
];
for disk_path in &disk_paths {
fs::create_dir_all(disk_path).unwrap();
}
// create EndpointServerPools
let mut endpoints = Vec::new();
for (i, disk_path) in disk_paths.iter().enumerate() {
let mut endpoint = Endpoint::try_from(disk_path.to_str().unwrap()).unwrap();
// set correct index
endpoint.set_pool_index(0);
endpoint.set_set_index(0);
endpoint.set_disk_index(i);
endpoints.push(endpoint);
}
let pool_endpoints = PoolEndpoints {
legacy: false,
set_count: 1,
drives_per_set: 4,
endpoints: Endpoints::from(endpoints),
cmd_line: "test".to_string(),
platform: format!("OS: {} | Arch: {}", std::env::consts::OS, std::env::consts::ARCH),
};
let endpoint_pools = EndpointServerPools(vec![pool_endpoints]);
// format disks
rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await.unwrap();
// create ECStore with dynamic port
let port = port.unwrap_or(9000);
let server_addr: SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
let ecstore = ECStore::new(server_addr, endpoint_pools, CancellationToken::new())
.await
.unwrap();
// init bucket metadata system
let buckets_list = ecstore
.list_bucket(&rustfs_ecstore::store_api::BucketOptions {
no_metadata: true,
..Default::default()
})
.await
.unwrap();
let buckets = buckets_list.into_iter().map(|v| v.name).collect();
rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys(ecstore.clone(), buckets).await;
// Store in global cache
let _ = GLOBAL_TEST_ENV.set((disk_paths.clone(), ecstore.clone()));
(disk_paths, ecstore)
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_scanner_basic_functionality() {
const TEST_DIR_BASIC: &str = "/tmp/rustfs_ahm_optimized_test_basic";
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_BASIC), Some(9101)).await;
// create some test data
let bucket_name = "test-bucket";
let object_name = "test-object";
let test_data = b"Hello, Optimized RustFS!";
// create bucket and verify
let bucket_opts = MakeBucketOptions::default();
ecstore
.make_bucket(bucket_name, &bucket_opts)
.await
.expect("make_bucket failed");
// check bucket really exists
let buckets = ecstore
.list_bucket(&rustfs_ecstore::store_api::BucketOptions::default())
.await
.unwrap();
assert!(buckets.iter().any(|b| b.name == bucket_name), "bucket not found after creation");
// write object
let mut put_reader = PutObjReader::from_vec(test_data.to_vec());
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
ecstore
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
.await
.expect("put_object failed");
// create optimized Scanner and test basic functionality
let scanner = Scanner::new(None, None);
// Test 1: Normal scan - verify object is found
println!("=== Test 1: Optimized Normal scan ===");
let scan_result = scanner.scan_cycle().await;
assert!(scan_result.is_ok(), "Optimized normal scan should succeed");
let _metrics = scanner.get_metrics().await;
// Note: The optimized scanner may not immediately show scanned objects as it works differently
println!("Optimized normal scan completed successfully");
// Test 2: Simulate disk corruption - delete object data from disk1
println!("=== Test 2: Optimized corruption handling ===");
let disk1_bucket_path = disk_paths[0].join(bucket_name);
let disk1_object_path = disk1_bucket_path.join(object_name);
// Try to delete the object file from disk1 (simulate corruption)
// Note: This might fail if ECStore is actively using the file
match fs::remove_dir_all(&disk1_object_path) {
Ok(_) => {
println!("Successfully deleted object from disk1: {disk1_object_path:?}");
// Verify deletion by checking if the directory still exists
if disk1_object_path.exists() {
println!("WARNING: Directory still exists after deletion: {disk1_object_path:?}");
} else {
println!("Confirmed: Directory was successfully deleted");
}
}
Err(e) => {
println!("Could not delete object from disk1 (file may be in use): {disk1_object_path:?} - {e}");
// This is expected behavior - ECStore might be holding file handles
}
}
// Scan again - should still complete (even with missing data)
let scan_result_after_corruption = scanner.scan_cycle().await;
println!("Optimized scan after corruption result: {scan_result_after_corruption:?}");
// Scanner should handle missing data gracefully
assert!(
scan_result_after_corruption.is_ok(),
"Optimized scanner should handle missing data gracefully"
);
// Test 3: Test metrics collection
println!("=== Test 3: Optimized metrics collection ===");
let final_metrics = scanner.get_metrics().await;
println!("Optimized final metrics: {final_metrics:?}");
// Verify metrics are available (even if different from legacy scanner)
assert!(final_metrics.last_activity.is_some(), "Should have scan activity");
// clean up temp dir
let temp_dir = std::path::PathBuf::from(TEST_DIR_BASIC);
if let Err(e) = fs::remove_dir_all(&temp_dir) {
eprintln!("Warning: Failed to clean up temp directory {temp_dir:?}: {e}");
}
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_scanner_usage_stats() {
const TEST_DIR_USAGE_STATS: &str = "/tmp/rustfs_ahm_optimized_test_usage_stats";
let (_, ecstore) = prepare_test_env(Some(TEST_DIR_USAGE_STATS), Some(9102)).await;
// prepare test bucket and object
let bucket = "test-bucket-optimized";
ecstore.make_bucket(bucket, &Default::default()).await.unwrap();
let mut pr = PutObjReader::from_vec(b"hello optimized".to_vec());
ecstore
.put_object(bucket, "obj1", &mut pr, &Default::default())
.await
.unwrap();
let scanner = Scanner::new(None, None);
// enable statistics
scanner.set_config_enable_data_usage_stats(true).await;
// first scan and get statistics
scanner.scan_cycle().await.unwrap();
let du_initial = scanner.get_data_usage_info().await.unwrap();
// Note: Optimized scanner may work differently, so we're less strict about counts
println!("Initial data usage: {du_initial:?}");
// write 3 more objects and get statistics again
for size in [1024, 2048, 4096] {
let name = format!("obj_{size}");
let mut pr = PutObjReader::from_vec(vec![b'x'; size]);
ecstore.put_object(bucket, &name, &mut pr, &Default::default()).await.unwrap();
}
scanner.scan_cycle().await.unwrap();
let du_after = scanner.get_data_usage_info().await.unwrap();
println!("Data usage after adding objects: {du_after:?}");
// The optimized scanner should at least not crash and return valid data
// buckets_count is u64, so it's always >= 0
assert!(du_after.buckets_count == du_after.buckets_count);
// clean up temp dir
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_USAGE_STATS));
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_volume_healing_functionality() {
const TEST_DIR_VOLUME_HEAL: &str = "/tmp/rustfs_ahm_optimized_test_volume_heal";
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_VOLUME_HEAL), Some(9103)).await;
// Create test buckets
let bucket1 = "test-bucket-1-opt";
let bucket2 = "test-bucket-2-opt";
ecstore.make_bucket(bucket1, &Default::default()).await.unwrap();
ecstore.make_bucket(bucket2, &Default::default()).await.unwrap();
// Add some test objects
let mut pr1 = PutObjReader::from_vec(b"test data 1 optimized".to_vec());
ecstore
.put_object(bucket1, "obj1", &mut pr1, &Default::default())
.await
.unwrap();
let mut pr2 = PutObjReader::from_vec(b"test data 2 optimized".to_vec());
ecstore
.put_object(bucket2, "obj2", &mut pr2, &Default::default())
.await
.unwrap();
// Simulate missing bucket on one disk by removing bucket directory
let disk1_bucket1_path = disk_paths[0].join(bucket1);
if disk1_bucket1_path.exists() {
println!("Removing bucket directory to simulate missing volume: {disk1_bucket1_path:?}");
match fs::remove_dir_all(&disk1_bucket1_path) {
Ok(_) => println!("Successfully removed bucket directory from disk 0"),
Err(e) => println!("Failed to remove bucket directory: {e}"),
}
}
// Create optimized scanner
let scanner = Scanner::new(None, None);
// Enable healing in config
scanner.set_config_enable_healing(true).await;
println!("=== Testing optimized volume healing functionality ===");
// Run scan cycle which should detect missing volume
let scan_result = scanner.scan_cycle().await;
assert!(scan_result.is_ok(), "Optimized scan cycle should succeed");
// Get metrics to verify scan completed
let metrics = scanner.get_metrics().await;
println!("Optimized volume healing detection test completed successfully");
println!("Optimized scan metrics: {metrics:?}");
// Clean up
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_VOLUME_HEAL));
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_performance_characteristics() {
const TEST_DIR_PERF: &str = "/tmp/rustfs_ahm_optimized_test_perf";
let (_, ecstore) = prepare_test_env(Some(TEST_DIR_PERF), Some(9104)).await;
// Create test bucket with multiple objects
let bucket_name = "performance-test-bucket";
ecstore.make_bucket(bucket_name, &Default::default()).await.unwrap();
// Create several test objects
for i in 0..10 {
let object_name = format!("perf-object-{i}");
let test_data = vec![b'A' + (i % 26) as u8; 1024 * (i + 1)]; // Variable size objects
let mut put_reader = PutObjReader::from_vec(test_data);
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
ecstore
.put_object(bucket_name, &object_name, &mut put_reader, &object_opts)
.await
.unwrap_or_else(|_| panic!("Failed to create object {object_name}"));
}
// Create optimized scanner
let scanner = Scanner::new(None, None);
// Test performance characteristics
println!("=== Testing optimized scanner performance ===");
// Measure scan time
let start_time = std::time::Instant::now();
let scan_result = scanner.scan_cycle().await;
let scan_duration = start_time.elapsed();
println!("Optimized scan completed in: {scan_duration:?}");
assert!(scan_result.is_ok(), "Performance scan should succeed");
// Verify the scan was reasonably fast (should be faster than old concurrent scanner)
// Note: This is a rough check - in practice, optimized scanner should be much faster
assert!(
scan_duration < Duration::from_secs(30),
"Optimized scan should complete within 30 seconds"
);
// Test memory usage is reasonable (indirect test through successful completion)
let metrics = scanner.get_metrics().await;
println!("Performance test metrics: {metrics:?}");
// Test that multiple scans don't degrade performance significantly
let start_time2 = std::time::Instant::now();
let _scan_result2 = scanner.scan_cycle().await;
let scan_duration2 = start_time2.elapsed();
println!("Second optimized scan completed in: {scan_duration2:?}");
// Second scan should be similar or faster due to caching
let performance_ratio = scan_duration2.as_millis() as f64 / scan_duration.as_millis() as f64;
println!("Performance ratio (second/first): {performance_ratio:.2}");
// Clean up
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_PERF));
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_load_balancing_and_throttling() {
let temp_dir = TempDir::new().unwrap();
// Create a node scanner with optimized configuration
let config = NodeScannerConfig {
data_dir: temp_dir.path().to_path_buf(),
enable_smart_scheduling: true,
scan_interval: Duration::from_millis(100), // Fast for testing
disk_scan_delay: Duration::from_millis(50),
..Default::default()
};
let node_scanner = NodeScanner::new("test-optimized-node".to_string(), config);
// Initialize the scanner
node_scanner.initialize_stats().await.unwrap();
let io_monitor = node_scanner.get_io_monitor();
let throttler = node_scanner.get_io_throttler();
// Start IO monitoring
io_monitor.start().await.expect("Failed to start IO monitor");
// Test load balancing scenarios
let load_scenarios = vec![
(LoadLevel::Low, 10, 100, 0, 5), // (load level, latency, qps, error rate, connections)
(LoadLevel::Medium, 30, 300, 10, 20),
(LoadLevel::High, 80, 800, 50, 50),
(LoadLevel::Critical, 200, 1200, 100, 100),
];
for (expected_level, latency, qps, error_rate, connections) in load_scenarios {
println!("Testing load scenario: {expected_level:?}");
// Update business metrics to simulate load
node_scanner
.update_business_metrics(latency, qps, error_rate, connections)
.await;
// Wait for monitoring system to respond
tokio::time::sleep(Duration::from_millis(500)).await;
// Get current load level
let current_level = io_monitor.get_business_load_level().await;
println!("Detected load level: {current_level:?}");
// Get throttling decision
let _current_metrics = io_monitor.get_current_metrics().await;
let metrics_snapshot = rustfs_ahm::scanner::io_throttler::MetricsSnapshot {
iops: 100 + qps / 10,
latency,
cpu_usage: std::cmp::min(50 + (qps / 20) as u8, 100),
memory_usage: 40,
};
let decision = throttler.make_throttle_decision(current_level, Some(metrics_snapshot)).await;
println!(
"Throttle decision: should_pause={}, delay={:?}",
decision.should_pause, decision.suggested_delay
);
// Verify throttling behavior
match current_level {
LoadLevel::Critical => {
assert!(decision.should_pause, "Critical load should trigger pause");
}
LoadLevel::High => {
assert!(
decision.suggested_delay > Duration::from_millis(1000),
"High load should suggest significant delay"
);
}
_ => {
// Lower loads should have reasonable delays
assert!(
decision.suggested_delay < Duration::from_secs(5),
"Lower loads should not have excessive delays"
);
}
}
}
io_monitor.stop().await;
println!("Optimized load balancing and throttling test completed successfully");
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_scanner_detect_missing_data_parts() {
const TEST_DIR_MISSING_PARTS: &str = "/tmp/rustfs_ahm_optimized_test_missing_parts";
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_MISSING_PARTS), Some(9105)).await;
// Create test bucket
let bucket_name = "test-bucket-parts-opt";
let object_name = "large-object-20mb-opt";
ecstore.make_bucket(bucket_name, &Default::default()).await.unwrap();
// Create a 20MB object to ensure it has multiple parts
let large_data = vec![b'A'; 20 * 1024 * 1024]; // 20MB of 'A' characters
let mut put_reader = PutObjReader::from_vec(large_data);
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
println!("=== Creating 20MB object ===");
ecstore
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
.await
.expect("put_object failed for large object");
// Verify object was created and get its info
let obj_info = ecstore
.get_object_info(bucket_name, object_name, &object_opts)
.await
.expect("get_object_info failed");
println!(
"Object info: size={}, parts={}, inlined={}",
obj_info.size,
obj_info.parts.len(),
obj_info.inlined
);
assert!(!obj_info.inlined, "20MB object should not be inlined");
println!("Object has {} parts", obj_info.parts.len());
// Create HealManager and optimized Scanner
let heal_storage = Arc::new(rustfs_ahm::heal::storage::ECStoreHealStorage::new(ecstore.clone()));
let heal_config = HealConfig {
enable_auto_heal: true,
heal_interval: Duration::from_millis(100),
max_concurrent_heals: 4,
task_timeout: Duration::from_secs(300),
queue_size: 1000,
};
let heal_manager = Arc::new(rustfs_ahm::heal::HealManager::new(heal_storage, Some(heal_config)));
heal_manager.start().await.unwrap();
let scanner = Scanner::new(None, Some(heal_manager.clone()));
// Enable healing to detect missing parts
scanner.set_config_enable_healing(true).await;
scanner.set_config_scan_mode(ScanMode::Deep).await;
println!("=== Initial scan (all parts present) ===");
let initial_scan = scanner.scan_cycle().await;
assert!(initial_scan.is_ok(), "Initial scan should succeed");
let initial_metrics = scanner.get_metrics().await;
println!("Initial scan metrics: objects_scanned={}", initial_metrics.objects_scanned);
// Simulate data part loss by deleting part files from some disks
println!("=== Simulating data part loss ===");
let mut deleted_parts = 0;
let mut deleted_part_paths = Vec::new();
for (disk_idx, disk_path) in disk_paths.iter().enumerate() {
if disk_idx > 0 {
// Only delete from first disk
break;
}
let bucket_path = disk_path.join(bucket_name);
let object_path = bucket_path.join(object_name);
if !object_path.exists() {
continue;
}
// Find the data directory (UUID)
if let Ok(entries) = fs::read_dir(&object_path) {
for entry in entries.flatten() {
let entry_path = entry.path();
if entry_path.is_dir() {
// This is likely the data_dir, look for part files inside
let part_file_path = entry_path.join("part.1");
if part_file_path.exists() {
match fs::remove_file(&part_file_path) {
Ok(_) => {
println!("Deleted part file: {part_file_path:?}");
deleted_part_paths.push(part_file_path);
deleted_parts += 1;
}
Err(e) => {
println!("Failed to delete part file {part_file_path:?}: {e}");
}
}
}
}
}
}
}
println!("Deleted {deleted_parts} part files to simulate data loss");
// Scan again to detect missing parts
println!("=== Scan after data deletion (should detect missing data) ===");
let scan_after_deletion = scanner.scan_cycle().await;
// Wait a bit for the heal manager to process
tokio::time::sleep(Duration::from_millis(500)).await;
// Check heal statistics
let heal_stats = heal_manager.get_statistics().await;
println!("Heal statistics:");
println!(" - total_tasks: {}", heal_stats.total_tasks);
println!(" - successful_tasks: {}", heal_stats.successful_tasks);
println!(" - failed_tasks: {}", heal_stats.failed_tasks);
// Get scanner metrics
let final_metrics = scanner.get_metrics().await;
println!("Scanner metrics after deletion scan:");
println!(" - objects_scanned: {}", final_metrics.objects_scanned);
// The optimized scanner should handle missing data gracefully
match scan_after_deletion {
Ok(_) => {
println!("Optimized scanner completed successfully despite missing data");
}
Err(e) => {
println!("Optimized scanner detected errors (acceptable): {e}");
}
}
println!("=== Test completed ===");
println!("Optimized scanner successfully handled missing data scenario");
// Clean up
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_MISSING_PARTS));
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_scanner_detect_missing_xl_meta() {
const TEST_DIR_MISSING_META: &str = "/tmp/rustfs_ahm_optimized_test_missing_meta";
let (disk_paths, ecstore) = prepare_test_env(Some(TEST_DIR_MISSING_META), Some(9106)).await;
// Create test bucket
let bucket_name = "test-bucket-meta-opt";
let object_name = "test-object-meta-opt";
ecstore.make_bucket(bucket_name, &Default::default()).await.unwrap();
// Create a test object
let test_data = vec![b'B'; 5 * 1024 * 1024]; // 5MB of 'B' characters
let mut put_reader = PutObjReader::from_vec(test_data);
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
println!("=== Creating test object ===");
ecstore
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
.await
.expect("put_object failed");
// Create HealManager and optimized Scanner
let heal_storage = Arc::new(rustfs_ahm::heal::storage::ECStoreHealStorage::new(ecstore.clone()));
let heal_config = HealConfig {
enable_auto_heal: true,
heal_interval: Duration::from_millis(100),
max_concurrent_heals: 4,
task_timeout: Duration::from_secs(300),
queue_size: 1000,
};
let heal_manager = Arc::new(rustfs_ahm::heal::HealManager::new(heal_storage, Some(heal_config)));
heal_manager.start().await.unwrap();
let scanner = Scanner::new(None, Some(heal_manager.clone()));
// Enable healing to detect missing metadata
scanner.set_config_enable_healing(true).await;
scanner.set_config_scan_mode(ScanMode::Deep).await;
println!("=== Initial scan (all metadata present) ===");
let initial_scan = scanner.scan_cycle().await;
assert!(initial_scan.is_ok(), "Initial scan should succeed");
// Simulate xl.meta file loss by deleting xl.meta files from some disks
println!("=== Simulating xl.meta file loss ===");
let mut deleted_meta_files = 0;
let mut deleted_meta_paths = Vec::new();
for (disk_idx, disk_path) in disk_paths.iter().enumerate() {
if disk_idx >= 2 {
// Only delete from first two disks to ensure some copies remain
break;
}
let bucket_path = disk_path.join(bucket_name);
let object_path = bucket_path.join(object_name);
if !object_path.exists() {
continue;
}
// Delete xl.meta file
let xl_meta_path = object_path.join("xl.meta");
if xl_meta_path.exists() {
match fs::remove_file(&xl_meta_path) {
Ok(_) => {
println!("Deleted xl.meta file: {xl_meta_path:?}");
deleted_meta_paths.push(xl_meta_path);
deleted_meta_files += 1;
}
Err(e) => {
println!("Failed to delete xl.meta file {xl_meta_path:?}: {e}");
}
}
}
}
println!("Deleted {deleted_meta_files} xl.meta files to simulate metadata loss");
// Scan again to detect missing metadata
println!("=== Scan after xl.meta deletion ===");
let scan_after_deletion = scanner.scan_cycle().await;
// Wait for heal manager to process
tokio::time::sleep(Duration::from_millis(1000)).await;
// Check heal statistics
let final_heal_stats = heal_manager.get_statistics().await;
println!("Final heal statistics:");
println!(" - total_tasks: {}", final_heal_stats.total_tasks);
println!(" - successful_tasks: {}", final_heal_stats.successful_tasks);
println!(" - failed_tasks: {}", final_heal_stats.failed_tasks);
let _ = final_heal_stats; // Use the variable to avoid unused warning
// The optimized scanner should handle missing metadata gracefully
match scan_after_deletion {
Ok(_) => {
println!("Optimized scanner completed successfully despite missing metadata");
}
Err(e) => {
println!("Optimized scanner detected errors (acceptable): {e}");
}
}
println!("=== Test completed ===");
println!("Optimized scanner successfully handled missing xl.meta scenario");
// Clean up
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_MISSING_META));
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "Please run it manually."]
#[serial]
async fn test_optimized_scanner_healthy_objects_not_marked_corrupted() {
const TEST_DIR_HEALTHY: &str = "/tmp/rustfs_ahm_optimized_test_healthy_objects";
let (_, ecstore) = prepare_test_env(Some(TEST_DIR_HEALTHY), Some(9107)).await;
// Create heal manager for this test
let heal_config = HealConfig::default();
let heal_storage = Arc::new(rustfs_ahm::heal::storage::ECStoreHealStorage::new(ecstore.clone()));
let heal_manager = Arc::new(rustfs_ahm::heal::manager::HealManager::new(heal_storage, Some(heal_config)));
heal_manager.start().await.unwrap();
// Create optimized scanner with healing enabled
let scanner = Scanner::new(None, Some(heal_manager.clone()));
scanner.set_config_enable_healing(true).await;
scanner.set_config_scan_mode(ScanMode::Deep).await;
// Create test bucket and multiple healthy objects
let bucket_name = "healthy-test-bucket-opt";
let bucket_opts = MakeBucketOptions::default();
ecstore.make_bucket(bucket_name, &bucket_opts).await.unwrap();
// Create multiple test objects with different sizes
let test_objects = vec![
("small-object-opt", b"Small test data optimized".to_vec()),
("medium-object-opt", vec![42u8; 1024]), // 1KB
("large-object-opt", vec![123u8; 10240]), // 10KB
];
let object_opts = rustfs_ecstore::store_api::ObjectOptions::default();
// Write all test objects
for (object_name, test_data) in &test_objects {
let mut put_reader = PutObjReader::from_vec(test_data.clone());
ecstore
.put_object(bucket_name, object_name, &mut put_reader, &object_opts)
.await
.expect("Failed to put test object");
println!("Created test object: {object_name} (size: {} bytes)", test_data.len());
}
// Wait a moment for objects to be fully written
tokio::time::sleep(Duration::from_millis(100)).await;
// Get initial heal statistics
let initial_heal_stats = heal_manager.get_statistics().await;
println!("Initial heal statistics:");
println!(" - total_tasks: {}", initial_heal_stats.total_tasks);
// Perform initial scan on healthy objects
println!("=== Scanning healthy objects ===");
let scan_result = scanner.scan_cycle().await;
assert!(scan_result.is_ok(), "Scan of healthy objects should succeed");
// Wait for any potential heal tasks to be processed
tokio::time::sleep(Duration::from_millis(1000)).await;
// Get scanner metrics after scanning
let metrics = scanner.get_metrics().await;
println!("Optimized scanner metrics after scanning healthy objects:");
println!(" - objects_scanned: {}", metrics.objects_scanned);
println!(" - healthy_objects: {}", metrics.healthy_objects);
println!(" - corrupted_objects: {}", metrics.corrupted_objects);
// Get heal statistics after scanning
let post_scan_heal_stats = heal_manager.get_statistics().await;
println!("Heal statistics after scanning healthy objects:");
println!(" - total_tasks: {}", post_scan_heal_stats.total_tasks);
println!(" - successful_tasks: {}", post_scan_heal_stats.successful_tasks);
println!(" - failed_tasks: {}", post_scan_heal_stats.failed_tasks);
// Critical assertion: healthy objects should not trigger unnecessary heal tasks
let heal_tasks_created = post_scan_heal_stats.total_tasks - initial_heal_stats.total_tasks;
if heal_tasks_created > 0 {
println!("WARNING: {heal_tasks_created} heal tasks were created for healthy objects");
// For optimized scanner, we're more lenient as it may work differently
println!("Note: Optimized scanner may have different behavior than legacy scanner");
} else {
println!("✓ No heal tasks created for healthy objects - optimized scanner working correctly");
}
// Perform a second scan to ensure consistency
println!("=== Second scan to verify consistency ===");
let second_scan_result = scanner.scan_cycle().await;
assert!(second_scan_result.is_ok(), "Second scan should also succeed");
let second_metrics = scanner.get_metrics().await;
let _final_heal_stats = heal_manager.get_statistics().await;
println!("Second scan metrics:");
println!(" - objects_scanned: {}", second_metrics.objects_scanned);
println!("=== Test completed successfully ===");
println!("✓ Optimized scanner handled healthy objects correctly");
println!("✓ No false positive corruption detection");
println!("✓ Objects remain accessible after scanning");
// Clean up
let _ = std::fs::remove_dir_all(std::path::Path::new(TEST_DIR_HEALTHY));
}

View File

@@ -1,380 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_ahm::scanner::{
checkpoint::{CheckpointData, CheckpointManager},
io_monitor::{AdvancedIOMonitor, IOMonitorConfig},
io_throttler::{AdvancedIOThrottler, IOThrottlerConfig},
local_stats::LocalStatsManager,
node_scanner::{LoadLevel, NodeScanner, NodeScannerConfig, ScanProgress},
stats_aggregator::{DecentralizedStatsAggregator, DecentralizedStatsAggregatorConfig},
};
use std::time::Duration;
use tempfile::TempDir;
#[tokio::test]
async fn test_checkpoint_manager_save_and_load() {
let temp_dir = TempDir::new().unwrap();
let node_id = "test-node-1";
let checkpoint_manager = CheckpointManager::new(node_id, temp_dir.path());
// create checkpoint
let progress = ScanProgress {
current_cycle: 5,
current_disk_index: 2,
last_scan_key: Some("test-object-key".to_string()),
..Default::default()
};
// save checkpoint
checkpoint_manager
.force_save_checkpoint(&progress)
.await
.expect("Failed to save checkpoint");
// load checkpoint
let loaded_progress = checkpoint_manager
.load_checkpoint()
.await
.expect("Failed to load checkpoint")
.expect("No checkpoint found");
// verify data
assert_eq!(loaded_progress.current_cycle, 5);
assert_eq!(loaded_progress.current_disk_index, 2);
assert_eq!(loaded_progress.last_scan_key, Some("test-object-key".to_string()));
}
#[tokio::test]
async fn test_checkpoint_data_integrity() {
let temp_dir = TempDir::new().unwrap();
let node_id = "test-node-integrity";
let checkpoint_manager = CheckpointManager::new(node_id, temp_dir.path());
let progress = ScanProgress::default();
// create checkpoint data
let checkpoint_data = CheckpointData::new(progress.clone(), node_id.to_string());
// verify integrity
assert!(checkpoint_data.verify_integrity());
// save and load
checkpoint_manager
.force_save_checkpoint(&progress)
.await
.expect("Failed to save checkpoint");
let loaded = checkpoint_manager.load_checkpoint().await.expect("Failed to load checkpoint");
assert!(loaded.is_some());
}
#[tokio::test]
async fn test_local_stats_manager() {
let temp_dir = TempDir::new().unwrap();
let node_id = "test-stats-node";
let stats_manager = LocalStatsManager::new(node_id, temp_dir.path());
// load stats
stats_manager.load_stats().await.expect("Failed to load stats");
// get stats summary
let summary = stats_manager.get_stats_summary().await;
assert_eq!(summary.node_id, node_id);
assert_eq!(summary.total_objects_scanned, 0);
// record heal triggered
stats_manager
.record_heal_triggered("test-object", "corruption detected")
.await;
let counters = stats_manager.get_counters();
assert_eq!(counters.total_heal_triggered.load(std::sync::atomic::Ordering::Relaxed), 1);
}
#[tokio::test]
async fn test_io_monitor_load_level_calculation() {
let config = IOMonitorConfig {
enable_system_monitoring: false, // use mock data
..Default::default()
};
let io_monitor = AdvancedIOMonitor::new(config);
io_monitor.start().await.expect("Failed to start IO monitor");
// update business metrics to affect load calculation
io_monitor.update_business_metrics(50, 100, 0, 10).await;
// wait for a monitoring cycle
tokio::time::sleep(Duration::from_millis(1500)).await;
let load_level = io_monitor.get_business_load_level().await;
// load level should be in a reasonable range
assert!(matches!(
load_level,
LoadLevel::Low | LoadLevel::Medium | LoadLevel::High | LoadLevel::Critical
));
io_monitor.stop().await;
}
#[tokio::test]
async fn test_io_throttler_load_adjustment() {
let config = IOThrottlerConfig::default();
let throttler = AdvancedIOThrottler::new(config);
// test adjust for load level
let low_delay = throttler.adjust_for_load_level(LoadLevel::Low).await;
let medium_delay = throttler.adjust_for_load_level(LoadLevel::Medium).await;
let high_delay = throttler.adjust_for_load_level(LoadLevel::High).await;
let critical_delay = throttler.adjust_for_load_level(LoadLevel::Critical).await;
// verify delay increment
assert!(low_delay < medium_delay);
assert!(medium_delay < high_delay);
assert!(high_delay < critical_delay);
// verify pause logic
assert!(!throttler.should_pause_scanning(LoadLevel::Low).await);
assert!(!throttler.should_pause_scanning(LoadLevel::Medium).await);
assert!(!throttler.should_pause_scanning(LoadLevel::High).await);
assert!(throttler.should_pause_scanning(LoadLevel::Critical).await);
}
#[tokio::test]
async fn test_throttler_business_pressure_simulation() {
let throttler = AdvancedIOThrottler::default();
// run short time pressure test
let simulation_duration = Duration::from_millis(500);
let result = throttler.simulate_business_pressure(simulation_duration).await;
// verify simulation result
assert!(!result.simulation_records.is_empty());
assert!(result.total_duration >= simulation_duration);
assert!(result.final_stats.total_decisions > 0);
// verify all load levels are tested
let load_levels: std::collections::HashSet<_> = result.simulation_records.iter().map(|r| r.load_level).collect();
assert!(load_levels.contains(&LoadLevel::Low));
assert!(load_levels.contains(&LoadLevel::Critical));
}
#[tokio::test]
async fn test_node_scanner_creation_and_config() {
let temp_dir = TempDir::new().unwrap();
let node_id = "test-scanner-node".to_string();
let config = NodeScannerConfig {
scan_interval: Duration::from_secs(30),
disk_scan_delay: Duration::from_secs(5),
enable_smart_scheduling: true,
enable_checkpoint: true,
data_dir: temp_dir.path().to_path_buf(),
..Default::default()
};
let scanner = NodeScanner::new(node_id.clone(), config);
// verify node id
assert_eq!(scanner.node_id(), &node_id);
// initialize stats
scanner.initialize_stats().await.expect("Failed to initialize stats");
// get stats summary
let summary = scanner.get_stats_summary().await;
assert_eq!(summary.node_id, node_id);
}
#[tokio::test]
async fn test_decentralized_stats_aggregator() {
let config = DecentralizedStatsAggregatorConfig {
cache_ttl: Duration::from_millis(100), // short cache ttl for testing
..Default::default()
};
let aggregator = DecentralizedStatsAggregator::new(config);
// test cache mechanism
let _start_time = std::time::Instant::now();
// first get stats (should trigger aggregation)
let stats1 = aggregator
.get_aggregated_stats()
.await
.expect("Failed to get aggregated stats");
let first_call_duration = _start_time.elapsed();
// second get stats (should use cache)
let cache_start = std::time::Instant::now();
let stats2 = aggregator.get_aggregated_stats().await.expect("Failed to get cached stats");
let cache_call_duration = cache_start.elapsed();
// cache call should be faster
assert!(cache_call_duration < first_call_duration);
// data should be same
assert_eq!(stats1.aggregation_timestamp, stats2.aggregation_timestamp);
// wait for cache expiration
tokio::time::sleep(Duration::from_millis(150)).await;
// third get should refresh data
let stats3 = aggregator
.get_aggregated_stats()
.await
.expect("Failed to get refreshed stats");
// timestamp should be different
assert!(stats3.aggregation_timestamp > stats1.aggregation_timestamp);
}
#[tokio::test]
async fn test_scanner_performance_impact() {
let temp_dir = TempDir::new().unwrap();
let node_id = "performance-test-node".to_string();
let config = NodeScannerConfig {
scan_interval: Duration::from_millis(100), // fast scan for testing
disk_scan_delay: Duration::from_millis(10),
data_dir: temp_dir.path().to_path_buf(),
..Default::default()
};
let scanner = NodeScanner::new(node_id, config);
// simulate business workload
let _start_time = std::time::Instant::now();
// update business metrics for high load
scanner.update_business_metrics(1500, 3000, 500, 800).await;
// get io monitor and throttler
let io_monitor = scanner.get_io_monitor();
let throttler = scanner.get_io_throttler();
// start io monitor
io_monitor.start().await.expect("Failed to start IO monitor");
// wait for monitor system to stabilize and trigger throttling - increase wait time
tokio::time::sleep(Duration::from_millis(1000)).await;
// simulate some io operations to trigger throttling mechanism
for _ in 0..10 {
let _current_metrics = io_monitor.get_current_metrics().await;
let metrics_snapshot = rustfs_ahm::scanner::io_throttler::MetricsSnapshot {
iops: 1000,
latency: 100,
cpu_usage: 80,
memory_usage: 70,
};
let load_level = io_monitor.get_business_load_level().await;
let _decision = throttler.make_throttle_decision(load_level, Some(metrics_snapshot)).await;
tokio::time::sleep(Duration::from_millis(50)).await;
}
// check if load level is correctly responded
let load_level = io_monitor.get_business_load_level().await;
// in high load, scanner should automatically adjust
let throttle_stats = throttler.get_throttle_stats().await;
println!("Performance test results:");
println!(" Load level: {load_level:?}");
println!(" Throttle decisions: {}", throttle_stats.total_decisions);
println!(" Average delay: {:?}", throttle_stats.average_delay);
// verify performance impact control - if load is high enough, there should be throttling delay
if load_level != LoadLevel::Low {
assert!(throttle_stats.average_delay > Duration::from_millis(0));
} else {
// in low load, there should be no throttling delay
assert!(throttle_stats.average_delay >= Duration::from_millis(0));
}
io_monitor.stop().await;
}
#[tokio::test]
async fn test_checkpoint_recovery_resilience() {
let temp_dir = TempDir::new().unwrap();
let node_id = "resilience-test-node";
let checkpoint_manager = CheckpointManager::new(node_id, temp_dir.path());
// verify checkpoint manager
let result = checkpoint_manager.load_checkpoint().await.unwrap();
assert!(result.is_none());
// create and save checkpoint
let progress = ScanProgress {
current_cycle: 10,
current_disk_index: 3,
last_scan_key: Some("recovery-test-key".to_string()),
..Default::default()
};
checkpoint_manager
.force_save_checkpoint(&progress)
.await
.expect("Failed to save checkpoint");
// verify recovery
let recovered = checkpoint_manager
.load_checkpoint()
.await
.expect("Failed to load checkpoint")
.expect("No checkpoint recovered");
assert_eq!(recovered.current_cycle, 10);
assert_eq!(recovered.current_disk_index, 3);
// cleanup checkpoint
checkpoint_manager
.cleanup_checkpoint()
.await
.expect("Failed to cleanup checkpoint");
// verify cleanup
let after_cleanup = checkpoint_manager.load_checkpoint().await.unwrap();
assert!(after_cleanup.is_none());
}
pub async fn create_test_scanner(temp_dir: &TempDir) -> NodeScanner {
let config = NodeScannerConfig {
scan_interval: Duration::from_millis(50),
disk_scan_delay: Duration::from_millis(10),
data_dir: temp_dir.path().to_path_buf(),
..Default::default()
};
NodeScanner::new("integration-test-node".to_string(), config)
}
pub struct PerformanceBenchmark {
pub _scanner_overhead_ms: u64,
pub business_impact_percentage: f64,
pub _throttle_effectiveness: f64,
}
impl PerformanceBenchmark {
pub fn meets_optimization_goals(&self) -> bool {
self.business_impact_percentage < 10.0
}
}

View File

@@ -436,7 +436,7 @@ impl DataUsageEntry {
self.obj_sizes.add(summary.total_size as u64);
self.obj_versions.add(summary.versions as u64);
let replication_stats = self.replication_stats.get_or_insert(ReplicationAllStats::default());
let replication_stats = self.replication_stats.get_or_insert_with(ReplicationAllStats::default);
replication_stats.replica_size += summary.replica_size as u64;
replication_stats.replica_count += summary.replica_count as u64;

View File

@@ -35,6 +35,7 @@ serde.workspace = true
serde_json.workspace = true
tonic = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
url.workspace = true
rustfs-madmin.workspace = true
rustfs-filemeta.workspace = true

View File

@@ -0,0 +1,316 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Used by test_distributed_lock_4_nodes_grpc in lock.rs
#![allow(dead_code)]
use async_trait::async_trait;
use rustfs_ecstore::rpc::node_service_time_out_client_no_auth;
use rustfs_lock::{
LockClient, LockError, LockId, LockInfo, LockRequest, LockResponse, LockStats, LockStatus, LockType, Result,
types::{LockMetadata, LockPriority},
};
use rustfs_protos::proto_gen::node_service::{GenerallyLockRequest, PingRequest};
use tonic::Request;
use tracing::{info, warn};
/// gRPC lock client without authentication for testing
/// Similar to RemoteClient but uses no_auth client
#[derive(Debug, Clone)]
pub struct GrpcLockClient {
addr: String,
}
impl GrpcLockClient {
pub fn new(endpoint: String) -> Self {
Self { addr: endpoint }
}
async fn get_client(
&self,
) -> Result<
rustfs_protos::proto_gen::node_service::node_service_client::NodeServiceClient<
tonic::service::interceptor::InterceptedService<tonic::transport::Channel, rustfs_ecstore::rpc::TonicInterceptor>,
>,
> {
node_service_time_out_client_no_auth(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))
}
/// Create a minimal LockRequest for unlock operations using only lock_id
fn create_unlock_request(lock_id: &LockId) -> LockRequest {
LockRequest {
lock_id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: LockType::Exclusive, // Type doesn't matter for unlock
owner: String::new(), // Owner not needed, server uses lock_id
acquire_timeout: std::time::Duration::from_secs(30),
ttl: std::time::Duration::from_secs(300),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
}
}
}
#[async_trait]
impl LockClient for GrpcLockClient {
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
info!("grpc acquire_lock for {}", request.resource);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
// Check for explicit error first
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
// Check if the lock acquisition was successful
if resp.success {
// Try to deserialize lock_info from response
let lock_info = if let Some(lock_info_json) = resp.lock_info {
match serde_json::from_str::<LockInfo>(&lock_info_json) {
Ok(info) => info,
Err(e) => {
// If deserialization fails, fall back to constructing from request
warn!("Failed to deserialize lock_info from response: {}, using request data", e);
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
}
}
}
} else {
// If lock_info is not provided, construct from request
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
}
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
"Lock acquisition failed on remote server".to_string(),
std::time::Duration::ZERO,
))
}
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
info!("grpc release for {}", lock_id);
let unlock_request = Self::create_unlock_request(lock_id);
let request_string = serde_json::to_string(&unlock_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?;
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: request_string.clone(),
});
let resp = client
.un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn refresh(&self, lock_id: &LockId) -> Result<bool> {
info!("grpc refresh for {}", lock_id);
let refresh_request = Self::create_unlock_request(lock_id);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&refresh_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.refresh(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn force_release(&self, lock_id: &LockId) -> Result<bool> {
info!("grpc force_release for {}", lock_id);
let force_request = Self::create_unlock_request(lock_id);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&force_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.force_un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>> {
info!("grpc check_status for {}", lock_id);
// Since there's no direct status query in the gRPC service,
// we attempt a non-blocking lock acquisition to check if the resource is available
let status_request = Self::create_unlock_request(lock_id);
let mut client = self.get_client().await?;
// Try to acquire a very short-lived lock to test availability
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&status_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
// Try exclusive lock first with very short timeout
let resp = client.lock(req).await;
match resp {
Ok(response) => {
let resp = response.into_inner();
if resp.success {
// If we successfully acquired the lock, the resource was free
// Immediately release it
let release_req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&status_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let _ = client.un_lock(release_req).await; // Best effort release
// Return None since no one was holding the lock
Ok(None)
} else {
// Lock acquisition failed, meaning someone is holding it
// We can't determine the exact details remotely, so return a generic status
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: LockType::Exclusive, // We can't know the exact type
status: LockStatus::Acquired,
owner: "unknown".to_string(), // Remote client can't determine owner
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(3600),
last_refreshed: std::time::SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
}))
}
}
Err(_) => {
// Communication error or lock is held
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: LockType::Exclusive,
status: LockStatus::Acquired,
owner: "unknown".to_string(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(3600),
last_refreshed: std::time::SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
}))
}
}
}
async fn get_stats(&self) -> Result<LockStats> {
info!("grpc get_stats from {}", self.addr);
// Since there's no direct statistics endpoint in the gRPC service,
// we return basic stats indicating this is a remote client
let stats = LockStats {
last_updated: std::time::SystemTime::now(),
..Default::default()
};
Ok(stats)
}
async fn close(&self) -> Result<()> {
Ok(())
}
async fn is_online(&self) -> bool {
// Use Ping interface to test if remote service is online
let mut client = match self.get_client().await {
Ok(client) => client,
Err(_) => {
info!("grpc client {} connection failed", self.addr);
return false;
}
};
let ping_req = Request::new(PingRequest {
version: 1,
body: bytes::Bytes::new(),
});
match client.ping(ping_req).await {
Ok(_) => {
info!("grpc client {} is online", self.addr);
true
}
Err(_) => {
info!("grpc client {} ping failed", self.addr);
false
}
}
}
async fn is_local(&self) -> bool {
false
}
}

View File

@@ -0,0 +1,711 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Used by test_distributed_lock_4_nodes_grpc in lock.rs
#![allow(dead_code)]
use bytes::Bytes;
use futures::Stream;
use rustfs_lock::{LockClient, LockRequest};
use rustfs_protos::{
models::PingBodyBuilder,
proto_gen::node_service::{
GenerallyLockRequest, GenerallyLockResponse, PingRequest, PingResponse, node_service_server::NodeService,
},
};
use std::pin::Pin;
use std::sync::Arc;
use tokio::net::TcpListener;
use tokio_stream::wrappers::TcpListenerStream;
use tonic::{Request, Response, Status};
use tracing::debug;
type ResponseStream<T> = Pin<Box<dyn Stream<Item = Result<T, Status>> + Send>>;
/// Minimal NodeService implementation that only supports Lock RPCs
/// Used for testing distributed lock scenarios with real gRPC
#[derive(Debug)]
pub struct MinimalLockNodeService {
lock_client: Arc<dyn LockClient>,
}
impl MinimalLockNodeService {
pub fn new(lock_client: Arc<dyn LockClient>) -> Self {
Self { lock_client }
}
}
#[tonic::async_trait]
impl NodeService for MinimalLockNodeService {
async fn ping(&self, _request: Request<PingRequest>) -> Result<Response<PingResponse>, Status> {
debug!("MinimalLockNodeService: PING");
let mut fbb = flatbuffers::FlatBufferBuilder::new();
let payload = fbb.create_vector(b"pong");
let mut builder = PingBodyBuilder::new(&mut fbb);
builder.add_payload(payload);
let root = builder.finish();
fbb.finish(root, None);
let finished_data = fbb.finished_data();
Ok(Response::new(PingResponse {
version: 1,
body: Bytes::copy_from_slice(finished_data),
}))
}
async fn lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_client.acquire_lock(&args).await {
Ok(result) => {
let lock_info_json = result.lock_info.as_ref().and_then(|info| serde_json::to_string(info).ok());
Ok(Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
lock_info: lock_info_json,
}))
}
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!(
"can not lock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
lock_info: None,
})),
}
}
async fn un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_client.release(&args.lock_id).await {
Ok(success) => Ok(Response::new(GenerallyLockResponse {
success,
error_info: None,
lock_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!(
"can not unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
lock_info: None,
})),
}
}
async fn force_un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_client.force_release(&args.lock_id).await {
Ok(success) => Ok(Response::new(GenerallyLockResponse {
success,
error_info: None,
lock_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!(
"can not force_unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
lock_info: None,
})),
}
}
async fn refresh(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_client.refresh(&args.lock_id).await {
Ok(success) => Ok(Response::new(GenerallyLockResponse {
success,
error_info: None,
lock_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not refresh, err: {err}")),
lock_info: None,
})),
}
}
// All other methods return unimplemented
async fn heal_bucket(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::HealBucketRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::HealBucketResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn list_bucket(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ListBucketRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ListBucketResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn make_bucket(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::MakeBucketRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::MakeBucketResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_bucket_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetBucketInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetBucketInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_bucket(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteBucketRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteBucketResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn read_all(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReadAllRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReadAllResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn write_all(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::WriteAllRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::WriteAllResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn verify_file(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::VerifyFileRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::VerifyFileResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn read_parts(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReadPartsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReadPartsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn check_parts(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::CheckPartsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::CheckPartsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn rename_part(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::RenamePartRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::RenamePartResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn rename_file(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::RenameFileRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::RenameFileResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn write(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::WriteRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::WriteResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
type WriteStreamStream = ResponseStream<rustfs_protos::proto_gen::node_service::WriteResponse>;
async fn write_stream(
&self,
_request: Request<tonic::Streaming<rustfs_protos::proto_gen::node_service::WriteRequest>>,
) -> Result<Response<Self::WriteStreamStream>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
type ReadAtStream = ResponseStream<rustfs_protos::proto_gen::node_service::ReadAtResponse>;
async fn read_at(
&self,
_request: Request<tonic::Streaming<rustfs_protos::proto_gen::node_service::ReadAtRequest>>,
) -> Result<Response<Self::ReadAtStream>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn list_dir(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ListDirRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ListDirResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
type WalkDirStream = ResponseStream<rustfs_protos::proto_gen::node_service::WalkDirResponse>;
async fn walk_dir(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::WalkDirRequest>,
) -> Result<Response<Self::WalkDirStream>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn rename_data(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::RenameDataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::RenameDataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn make_volumes(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::MakeVolumesRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::MakeVolumesResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn make_volume(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::MakeVolumeRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::MakeVolumeResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn list_volumes(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ListVolumesRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ListVolumesResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn stat_volume(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::StatVolumeRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::StatVolumeResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_paths(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeletePathsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeletePathsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn update_metadata(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::UpdateMetadataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::UpdateMetadataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn read_metadata(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReadMetadataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReadMetadataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn write_metadata(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::WriteMetadataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::WriteMetadataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn read_version(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReadVersionRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReadVersionResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn read_xl(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReadXlRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReadXlResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_version(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteVersionRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteVersionResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_versions(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteVersionsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteVersionsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn read_multiple(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReadMultipleRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReadMultipleResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_volume(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteVolumeRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteVolumeResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn disk_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DiskInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DiskInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn local_storage_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LocalStorageInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LocalStorageInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn server_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ServerInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ServerInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_cpus(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetCpusRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetCpusResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_net_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetNetInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetNetInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_partitions(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetPartitionsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetPartitionsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_os_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetOsInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetOsInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_se_linux_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetSeLinuxInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetSeLinuxInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_sys_config(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetSysConfigRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetSysConfigResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_sys_errors(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetSysErrorsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetSysErrorsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_mem_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetMemInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetMemInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_proc_info(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetProcInfoRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetProcInfoResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_bucket_metadata(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadBucketMetadataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadBucketMetadataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_policy(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadPolicyRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadPolicyResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_group(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadGroupRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadGroupResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_policy_mapping(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadPolicyMappingRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadPolicyMappingResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_rebalance_meta(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadRebalanceMetaRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadRebalanceMetaResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_metrics(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetMetricsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetMetricsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn start_profiling(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::StartProfilingRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::StartProfilingResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn download_profile_data(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DownloadProfileDataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DownloadProfileDataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_bucket_stats(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetBucketStatsDataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetBucketStatsDataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_sr_metrics(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetSrMetricsDataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetSrMetricsDataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_all_bucket_stats(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetAllBucketStatsRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetAllBucketStatsResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_bucket_metadata(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteBucketMetadataRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteBucketMetadataResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_policy(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeletePolicyRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeletePolicyResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_user(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteUserRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteUserResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn delete_service_account(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::DeleteServiceAccountRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::DeleteServiceAccountResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_user(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadUserRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadUserResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_service_account(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadServiceAccountRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadServiceAccountResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn reload_site_replication_config(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReloadSiteReplicationConfigRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReloadSiteReplicationConfigResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn signal_service(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::SignalServiceRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::SignalServiceResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn background_heal_status(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::BackgroundHealStatusRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::BackgroundHealStatusResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn get_metacache_listing(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::GetMetacacheListingRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::GetMetacacheListingResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn update_metacache_listing(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::UpdateMetacacheListingRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::UpdateMetacacheListingResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn reload_pool_meta(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::ReloadPoolMetaRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::ReloadPoolMetaResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn stop_rebalance(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::StopRebalanceRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::StopRebalanceResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
async fn load_transition_tier_config(
&self,
_request: Request<rustfs_protos::proto_gen::node_service::LoadTransitionTierConfigRequest>,
) -> Result<Response<rustfs_protos::proto_gen::node_service::LoadTransitionTierConfigResponse>, Status> {
Err(Status::unimplemented("lock-only test server"))
}
}
/// Spawn a gRPC lock server on a random port
/// Returns the address and a shutdown handle
pub async fn spawn_lock_server(
lock_client: Arc<dyn LockClient>,
) -> std::result::Result<(String, tokio::task::JoinHandle<()>), Box<dyn std::error::Error>> {
let listener = TcpListener::bind("127.0.0.1:0").await?;
let addr = listener.local_addr()?;
let addr_str = format!("http://127.0.0.1:{}", addr.port());
let service = MinimalLockNodeService::new(lock_client);
let server = tonic::transport::Server::builder()
.add_service(rustfs_protos::proto_gen::node_service::node_service_server::NodeServiceServer::new(
service,
))
.serve_with_incoming(TcpListenerStream::new(listener));
let handle = tokio::spawn(async move {
if let Err(e) = server.await {
eprintln!("gRPC server error: {}", e);
}
});
Ok((addr_str, handle))
}

View File

@@ -13,780 +13,118 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use rustfs_ecstore::disk::endpoint::Endpoint;
use rustfs_ecstore::rpc::RemoteClient;
use rustfs_lock::client::{LockClient, local::LocalClient};
use rustfs_lock::types::{LockInfo, LockResponse, LockStats};
use rustfs_lock::{LockId, LockMetadata, LockPriority, LockType};
use rustfs_lock::{LockRequest, NamespaceLock, NamespaceLockManager};
use rustfs_protos::proto_gen::node_service::GenerallyLockRequest;
use serial_test::serial;
use std::{collections::HashMap, error::Error, sync::Arc, time::Duration};
use tokio::time::sleep;
use tonic::Request;
use url::Url;
const CLUSTER_ADDR: &str = "http://localhost:9000";
fn get_cluster_endpoints() -> Vec<Endpoint> {
vec![Endpoint {
url: Url::parse(CLUSTER_ADDR).unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
}]
}
async fn create_unique_clients(endpoints: &[Endpoint]) -> Result<Vec<Arc<dyn LockClient>>, Box<dyn Error>> {
let mut unique_endpoints: HashMap<String, &Endpoint> = HashMap::new();
for endpoint in endpoints {
if endpoint.is_local {
unique_endpoints.insert("local".to_string(), endpoint);
} else {
let host_port = format!(
"{}:{}",
endpoint.url.host_str().unwrap_or("localhost"),
endpoint.url.port().unwrap_or(9000)
);
unique_endpoints.insert(host_port, endpoint);
}
}
let mut clients = Vec::new();
for (_key, endpoint) in unique_endpoints {
if endpoint.is_local {
clients.push(Arc::new(LocalClient::new()) as Arc<dyn LockClient>);
} else {
clients.push(Arc::new(RemoteClient::new(endpoint.url.to_string())) as Arc<dyn LockClient>);
}
}
Ok(clients)
}
use super::{grpc_lock_client::GrpcLockClient, grpc_lock_server::spawn_lock_server};
use rustfs_lock::{GlobalLockManager, NamespaceLock, ObjectKey, client::local::LocalClient};
use std::sync::Arc;
use std::time::Duration;
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_guard_drop_releases_exclusive_lock_local() -> Result<(), Box<dyn Error>> {
// Single local client; no external server required
let client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let ns_lock = NamespaceLock::with_clients("e2e_guard_local".to_string(), vec![client]);
async fn test_distributed_lock_4_nodes_grpc() {
// Spawn 4 gRPC lock servers, each with its own GlobalLockManager
let manager1 = Arc::new(GlobalLockManager::new());
let manager2 = Arc::new(GlobalLockManager::new());
let manager3 = Arc::new(GlobalLockManager::new());
let manager4 = Arc::new(GlobalLockManager::new());
// Acquire exclusive guard
let g1 = ns_lock
.lock_guard("guard_exclusive", "owner1", Duration::from_millis(100), Duration::from_secs(5))
.await?;
assert!(g1.is_some(), "first guard acquisition should succeed");
let client1: Arc<dyn rustfs_lock::LockClient> = Arc::new(LocalClient::with_manager(manager1));
let client2: Arc<dyn rustfs_lock::LockClient> = Arc::new(LocalClient::with_manager(manager2));
let client3: Arc<dyn rustfs_lock::LockClient> = Arc::new(LocalClient::with_manager(manager3));
let client4: Arc<dyn rustfs_lock::LockClient> = Arc::new(LocalClient::with_manager(manager4));
// While g1 is alive, second exclusive acquisition should fail
let g2 = ns_lock
.lock_guard("guard_exclusive", "owner2", Duration::from_millis(50), Duration::from_secs(5))
.await?;
assert!(g2.is_none(), "second guard acquisition should fail while first is held");
// Spawn 4 gRPC servers on random ports
let (addr1, handle1) = spawn_lock_server(client1).await.expect("Failed to spawn server 1");
let (addr2, handle2) = spawn_lock_server(client2).await.expect("Failed to spawn server 2");
let (addr3, handle3) = spawn_lock_server(client3).await.expect("Failed to spawn server 3");
let (addr4, handle4) = spawn_lock_server(client4).await.expect("Failed to spawn server 4");
// Drop first guard to trigger background release
drop(g1);
// Give the background unlock worker a short moment to process
sleep(Duration::from_millis(80)).await;
// Give servers a moment to start
tokio::time::sleep(Duration::from_millis(100)).await;
// Now acquisition should succeed
let g3 = ns_lock
.lock_guard("guard_exclusive", "owner2", Duration::from_millis(100), Duration::from_secs(5))
.await?;
assert!(g3.is_some(), "acquisition should succeed after guard drop releases the lock");
drop(g3);
// Create 4 gRPC clients (no auth)
let grpc_client1: Arc<dyn rustfs_lock::LockClient> = Arc::new(GrpcLockClient::new(addr1));
let grpc_client2: Arc<dyn rustfs_lock::LockClient> = Arc::new(GrpcLockClient::new(addr2));
let grpc_client3: Arc<dyn rustfs_lock::LockClient> = Arc::new(GrpcLockClient::new(addr3));
let grpc_client4: Arc<dyn rustfs_lock::LockClient> = Arc::new(GrpcLockClient::new(addr4));
Ok(())
}
let clients = vec![grpc_client1, grpc_client2, grpc_client3, grpc_client4];
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_guard_shared_then_write_after_drop() -> Result<(), Box<dyn Error>> {
// Two shared read guards should coexist; write should be blocked until they drop
let client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let ns_lock = NamespaceLock::with_clients("e2e_guard_rw".to_string(), vec![client]);
// Create NamespaceLock with 4 clients and quorum=3
let lock = NamespaceLock::with_clients_and_quorum("grpc-4-node".to_string(), clients, 3);
assert_eq!(lock.namespace(), "grpc-4-node");
// Acquire two read guards
let r1 = ns_lock
.rlock_guard("rw_resource", "reader1", Duration::from_millis(100), Duration::from_secs(5))
.await?;
let r2 = ns_lock
.rlock_guard("rw_resource", "reader2", Duration::from_millis(100), Duration::from_secs(5))
.await?;
assert!(r1.is_some() && r2.is_some(), "both read guards should be acquired");
// Attempt write while readers hold the lock should fail
let w_fail = ns_lock
.lock_guard("rw_resource", "writer", Duration::from_millis(50), Duration::from_secs(5))
.await?;
assert!(w_fail.is_none(), "write should be blocked when read guards are active");
// Drop read guards to release
drop(r1);
drop(r2);
sleep(Duration::from_millis(80)).await;
// Now write should succeed
let w_ok = ns_lock
.lock_guard("rw_resource", "writer", Duration::from_millis(150), Duration::from_secs(5))
.await?;
assert!(w_ok.is_some(), "write should succeed after read guards are dropped");
drop(w_ok);
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_rpc() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("dandan"),
resource: "dandan".to_string(),
lock_type: LockType::Exclusive,
owner: "dd".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
let resource = ObjectKey {
bucket: Arc::from("test-bucket"),
object: Arc::from("test-object"),
version: None,
};
let args = serde_json::to_string(&args)?;
let mut client = RemoteClient::new(CLUSTER_ADDR.to_string()).get_client().await?;
println!("got client");
let request = Request::new(GenerallyLockRequest { args: args.clone() });
// Test 1: Owner A acquires write lock successfully
let mut guard_a = lock
.get_write_lock(resource.clone(), "owner-a", Duration::from_secs(5))
.await
.expect("Owner A should acquire write lock");
println!("start request");
let response = client.lock(request).await?.into_inner();
println!("request ended");
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
let request = Request::new(GenerallyLockRequest { args });
let response = client.un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get un_lock: {error_info}");
}
Ok(())
}
/// Mock client that simulates remote node failures
#[derive(Debug)]
struct FailingMockClient {
local_client: Arc<dyn LockClient>,
should_fail_acquire: bool,
should_fail_release: bool,
}
impl FailingMockClient {
fn new(should_fail_acquire: bool, should_fail_release: bool) -> Self {
Self {
local_client: Arc::new(LocalClient::new()),
should_fail_acquire,
should_fail_release,
// Verify it's a Standard guard (DistributedLock path)
match &guard_a {
rustfs_lock::NamespaceLockGuard::Standard(_) => {
// Expected for distributed lock
}
rustfs_lock::NamespaceLockGuard::Fast(_) => {
panic!("Expected Standard guard for distributed lock");
}
}
}
#[async_trait]
impl LockClient for FailingMockClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> rustfs_lock::error::Result<LockResponse> {
if self.should_fail_acquire {
// Simulate network timeout or remote node failure
return Ok(LockResponse::failure("Simulated remote node failure", Duration::from_millis(100)));
}
self.local_client.acquire_exclusive(request).await
}
async fn acquire_shared(&self, request: &LockRequest) -> rustfs_lock::error::Result<LockResponse> {
if self.should_fail_acquire {
return Ok(LockResponse::failure("Simulated remote node failure", Duration::from_millis(100)));
}
self.local_client.acquire_shared(request).await
}
async fn release(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
if self.should_fail_release {
return Err(rustfs_lock::error::LockError::internal("Simulated release failure"));
}
self.local_client.release(lock_id).await
}
async fn refresh(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
self.local_client.refresh(lock_id).await
}
async fn force_release(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
self.local_client.force_release(lock_id).await
}
async fn check_status(&self, lock_id: &LockId) -> rustfs_lock::error::Result<Option<LockInfo>> {
self.local_client.check_status(lock_id).await
}
async fn get_stats(&self) -> rustfs_lock::error::Result<LockStats> {
self.local_client.get_stats().await
}
async fn close(&self) -> rustfs_lock::error::Result<()> {
self.local_client.close().await
}
async fn is_online(&self) -> bool {
if self.should_fail_acquire {
return false; // Simulate offline node
}
true // Simulate online node
}
async fn is_local(&self) -> bool {
false // Simulate remote client
}
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_with_remote_failure() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional lock with simulated remote node failure");
// Create a two-node cluster: one local (success) + one remote (failure)
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_transactional".to_string(), clients);
let resource = "critical_resource".to_string();
// Test single lock operation with 2PC
println!("📝 Testing single lock with remote failure...");
let request = LockRequest::new(&resource, LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(30));
let response = ns_lock.acquire_lock(&request).await?;
// Should fail because quorum (2/2) is not met due to remote failure
assert!(!response.success, "Lock should fail due to remote node failure");
println!("✅ Single lock correctly failed due to remote node failure");
// Verify no locks are left behind on the local node
let local_client_direct = LocalClient::new();
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(&resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain on local node after rollback");
println!("✅ Verified rollback: no locks left on local node");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_batch_lock_with_mixed_failures() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional batch lock with mixed node failures");
// Create a cluster with different failure patterns
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_batch_transactional".to_string(), clients);
let resources = vec!["resource_1".to_string(), "resource_2".to_string(), "resource_3".to_string()];
println!("📝 Testing batch lock with remote failure...");
let result = ns_lock
.lock_batch(&resources, "batch_owner", Duration::from_millis(100), Duration::from_secs(30))
.await?;
// Should fail because remote node cannot acquire locks
assert!(!result, "Batch lock should fail due to remote node failure");
println!("✅ Batch lock correctly failed due to remote node failure");
// Verify no locks are left behind on any resource
let local_client_direct = LocalClient::new();
for resource in &resources {
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain for resource: {resource}");
}
println!("✅ Verified rollback: no locks left on any resource");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_with_quorum_success() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional lock with quorum success");
// Create a three-node cluster where 2 succeed and 1 fails (quorum = 2 automatically)
let local_client1: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let local_client2: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client1, local_client2, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_quorum".to_string(), clients);
let resource = "quorum_resource".to_string();
println!("📝 Testing lock with automatic quorum=2, 2 success + 1 failure...");
let request = LockRequest::new(&resource, LockType::Exclusive, "quorum_owner").with_ttl(Duration::from_secs(30));
let response = ns_lock.acquire_lock(&request).await?;
// Should fail because we require all nodes to succeed for consistency
// (even though quorum is met, the implementation requires all nodes for consistency)
assert!(!response.success, "Lock should fail due to consistency requirement");
println!("✅ Lock correctly failed due to consistency requirement (partial success rolled back)");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_rollback_on_release_failure() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing rollback behavior when release fails");
// Create clients where acquire succeeds but release fails
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_release_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(false, true));
let clients = vec![local_client, failing_release_client];
let ns_lock = NamespaceLock::with_clients("test_release_failure".to_string(), clients);
let resource = "release_test_resource".to_string();
println!("📝 Testing lock acquisition with release failure handling...");
let request = LockRequest::new(&resource, LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(30));
// This should fail because both LocalClient instances share the same global lock map
// The first client (LocalClient) will acquire the lock, but the second client
// (FailingMockClient's internal LocalClient) will fail to acquire the same resource
let response = ns_lock.acquire_lock(&request).await?;
// The operation should fail due to lock contention between the two LocalClient instances
assert!(
!response.success,
"Lock should fail due to lock contention between LocalClient instances sharing global lock map"
);
println!("✅ Lock correctly failed due to lock contention (both clients use same global lock map)");
// Verify no locks are left behind after rollback
let local_client_direct = LocalClient::new();
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(&resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain after rollback");
println!("✅ Verified rollback: no locks left after failed acquisition");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_ns_lock() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test".to_string(), clients);
let resources = vec!["foo".to_string()];
let result = ns_lock
.lock_batch(&resources, "dandan", Duration::from_secs(5), Duration::from_secs(10))
// Test 2: Owner B tries to acquire write lock while A holds it - should fail
// Since all 4 backends are holding locks from owner-a, owner-b cannot acquire on any backend
// This means 0 successes < quorum(3), so acquisition should fail
let result_b = lock
.get_write_lock(resource.clone(), "owner-b", Duration::from_millis(100))
.await;
match &result {
Ok(success) => println!("Lock result: {success}"),
Err(e) => println!("Lock error: {e}"),
}
let result = result?;
assert!(result, "Lock should succeed, but got: {result}");
ns_lock.unlock_batch(&resources, "dandan").await?;
Ok(())
}
assert!(result_b.is_err(), "Owner B should fail to acquire lock while owner A holds it");
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_concurrent_lock_attempts() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test".to_string(), clients);
let resource = vec!["concurrent_resource".to_string()];
// First lock should succeed
println!("Attempting first lock...");
let result1 = ns_lock
.lock_batch(&resource, "owner1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
println!("First lock result: {result1}");
assert!(result1, "First lock should succeed");
// Second lock should fail (resource already locked)
println!("Attempting second lock...");
let result2 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(1), Duration::from_secs(10))
.await?;
println!("Second lock result: {result2}");
assert!(!result2, "Second lock should fail");
// Unlock by first owner
println!("Unlocking first lock...");
ns_lock.unlock_batch(&resource, "owner1").await?;
println!("First lock unlocked");
// Now second owner should be able to lock
println!("Attempting third lock...");
let result3 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
println!("Third lock result: {result3}");
assert!(result3, "Lock should succeed after unlock");
// Clean up
println!("Cleaning up...");
ns_lock.unlock_batch(&resource, "owner2").await?;
println!("Test completed");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_read_write_lock_compatibility() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_rw".to_string(), clients);
let resource = vec!["rw_resource".to_string()];
// First read lock should succeed
let result1 = ns_lock
.rlock_batch(&resource, "reader1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result1, "First read lock should succeed");
// Second read lock should also succeed (read locks are compatible)
let result2 = ns_lock
.rlock_batch(&resource, "reader2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result2, "Second read lock should succeed");
// Write lock should fail (read locks are held)
let result3 = ns_lock
.lock_batch(&resource, "writer1", Duration::from_secs(1), Duration::from_secs(10))
.await?;
assert!(!result3, "Write lock should fail when read locks are held");
// Release read locks
ns_lock.runlock_batch(&resource, "reader1").await?;
ns_lock.runlock_batch(&resource, "reader2").await?;
// Now write lock should succeed
let result4 = ns_lock
.lock_batch(&resource, "writer1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result4, "Write lock should succeed after read locks released");
// Clean up
ns_lock.unlock_batch(&resource, "writer1").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_timeout() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_timeout".to_string(), clients);
let resource = vec!["timeout_resource".to_string()];
// First lock with short timeout
let result1 = ns_lock
.lock_batch(&resource, "owner1", Duration::from_secs(2), Duration::from_secs(1))
.await?;
assert!(result1, "First lock should succeed");
// Wait for lock to expire
sleep(Duration::from_secs(5)).await;
// Second lock should succeed after timeout
let result2 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(1))
.await?;
assert!(result2, "Lock should succeed after timeout");
// Clean up
ns_lock.unlock_batch(&resource, "owner2").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_batch_lock_operations() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_batch".to_string(), clients);
let resources = vec![
"batch_resource1".to_string(),
"batch_resource2".to_string(),
"batch_resource3".to_string(),
];
// Lock all resources
let result = ns_lock
.lock_batch(&resources, "batch_owner", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result, "Batch lock should succeed");
// Try to lock one of the resources with different owner - should fail
let single_resource = vec!["batch_resource2".to_string()];
let result2 = ns_lock
.lock_batch(&single_resource, "other_owner", Duration::from_secs(1), Duration::from_secs(10))
.await?;
assert!(!result2, "Lock should fail for already locked resource");
// Unlock all resources
ns_lock.unlock_batch(&resources, "batch_owner").await?;
// Now should be able to lock single resource
let result3 = ns_lock
.lock_batch(&single_resource, "other_owner", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result3, "Lock should succeed after batch unlock");
// Clean up
ns_lock.unlock_batch(&single_resource, "other_owner").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_multiple_namespaces() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock1 = NamespaceLock::with_clients("namespace1".to_string(), clients.clone());
let ns_lock2 = NamespaceLock::with_clients("namespace2".to_string(), clients);
let resource = vec!["shared_resource".to_string()];
// Lock same resource in different namespaces - both should succeed
let result1 = ns_lock1
.lock_batch(&resource, "owner1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result1, "Lock in namespace1 should succeed");
let result2 = ns_lock2
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result2, "Lock in namespace2 should succeed");
// Clean up
ns_lock1.unlock_batch(&resource, "owner1").await?;
ns_lock2.unlock_batch(&resource, "owner2").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_rpc_read_lock() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("read_resource"),
resource: "read_resource".to_string(),
lock_type: LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = RemoteClient::new(CLUSTER_ADDR.to_string()).get_client().await?;
// First read lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.r_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get read lock: {error_info}");
// Verify the error is a timeout or quorum failure
if let Err(err) = result_b {
let err_str = err.to_string().to_lowercase();
assert!(
err_str.contains("timeout") || err_str.contains("quorum") || err_str.contains("not reached"),
"Error should be timeout or quorum related, got: {}",
err
);
}
// Second read lock with different owner should also succeed
let args2 = LockRequest {
lock_id: LockId::new_deterministic("read_resource"),
resource: "read_resource".to_string(),
lock_type: LockType::Shared,
owner: "reader2".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args2_str = serde_json::to_string(&args2)?;
let request2 = Request::new(GenerallyLockRequest { args: args2_str });
let response2 = client.r_lock(request2).await?.into_inner();
if let Some(error_info) = response2.error_info {
panic!("can not get second read lock: {error_info}");
// Test 3: Release owner A's lock
assert!(guard_a.release(), "Should release guard_a successfully");
assert!(guard_a.is_released(), "Guard A should be marked as released");
// Test 4: Owner B should now be able to acquire the lock
let guard_b = lock
.get_write_lock(resource.clone(), "owner-b", Duration::from_secs(5))
.await
.expect("Owner B should acquire write lock after A releases");
match &guard_b {
rustfs_lock::NamespaceLockGuard::Standard(_) => {
// Expected for distributed lock
}
rustfs_lock::NamespaceLockGuard::Fast(_) => {
panic!("Expected Standard guard for distributed lock");
}
}
// Unlock both
let request = Request::new(GenerallyLockRequest { args: args_str });
let response = client.r_un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not unlock read lock: {error_info}");
}
// Test 5: Verify health check shows 4 nodes
let health = lock.get_health().await;
assert_eq!(health.node_id, "grpc-4-node");
assert_eq!(health.total_nodes, 4);
assert_eq!(health.connected_nodes, 4);
assert_eq!(health.status, rustfs_lock::types::HealthStatus::Healthy);
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_refresh() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("refresh_resource"),
resource: "refresh_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "refresh_owner".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = RemoteClient::new(CLUSTER_ADDR.to_string()).get_client().await?;
// Acquire lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
// Refresh lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.refresh(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not refresh lock: {error_info}");
}
assert!(response.success, "Lock refresh should succeed");
// Unlock
let request = Request::new(GenerallyLockRequest { args: args_str });
let response = client.un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not unlock: {error_info}");
}
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_force_unlock() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("force_resource"),
resource: "force_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "force_owner".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = RemoteClient::new(CLUSTER_ADDR.to_string()).get_client().await?;
// Acquire lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
// Force unlock (even by different owner)
let force_args = LockRequest {
lock_id: LockId::new_deterministic("force_resource"),
resource: "force_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "admin".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let force_args_str = serde_json::to_string(&force_args)?;
let request = Request::new(GenerallyLockRequest { args: force_args_str });
let response = client.force_un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not force unlock: {error_info}");
}
assert!(response.success, "Force unlock should succeed");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_global_lock_map_sharing() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock1 = NamespaceLock::with_clients("global_test".to_string(), clients.clone());
let ns_lock2 = NamespaceLock::with_clients("global_test".to_string(), clients);
let resource = vec!["global_test_resource".to_string()];
// First instance acquires lock
println!("First lock map attempting to acquire lock...");
let result1 = ns_lock1
.lock_batch(&resource, "owner1", std::time::Duration::from_secs(5), std::time::Duration::from_secs(10))
.await?;
println!("First lock result: {result1}");
assert!(result1, "First lock should succeed");
// Second instance should fail to acquire the same lock
println!("Second lock map attempting to acquire lock...");
let result2 = ns_lock2
.lock_batch(&resource, "owner2", std::time::Duration::from_secs(1), std::time::Duration::from_secs(10))
.await?;
println!("Second lock result: {result2}");
assert!(!result2, "Second lock should fail because resource is already locked");
// Release lock from first instance
println!("First lock map releasing lock...");
ns_lock1.unlock_batch(&resource, "owner1").await?;
// Now second instance should be able to acquire lock
println!("Second lock map attempting to acquire lock again...");
let result3 = ns_lock2
.lock_batch(&resource, "owner2", std::time::Duration::from_secs(5), std::time::Duration::from_secs(10))
.await?;
println!("Third lock result: {result3}");
assert!(result3, "Lock should succeed after first lock is released");
// Clean up
ns_lock2.unlock_batch(&resource, "owner2").await?;
Ok(())
// Cleanup
drop(guard_b);
// Shutdown servers
handle1.abort();
handle2.abort();
handle3.abort();
handle4.abort();
}

View File

@@ -14,6 +14,8 @@
mod conditional_writes;
mod get_deleted_object_test;
mod grpc_lock_client;
mod grpc_lock_server;
mod head_deleted_object_versioning_test;
mod lifecycle;
mod lock;

View File

@@ -2813,6 +2813,11 @@ mod test {
let disk_info = disk.disk_info(&disk_info_opts).await.unwrap();
// Basic checks on disk info
// Note: On macOS and some other Unix systems, fs_type may be empty
// because statvfs does not provide filesystem type information.
// This is a platform limitation, not a bug.
#[cfg(not(target_os = "macos"))]
assert!(!disk_info.fs_type.is_empty(), "fs_type should not be empty on this platform");
assert!(disk_info.total > 0);
assert!(disk_info.free <= disk_info.total);
assert!(!disk_info.mount_path.is_empty());

View File

@@ -21,6 +21,7 @@ use crate::{
tier::tier::TierConfigMgr,
};
use lazy_static::lazy_static;
use rustfs_lock::client::LockClient;
use std::{
collections::HashMap,
sync::{Arc, OnceLock},
@@ -55,6 +56,8 @@ lazy_static! {
pub static ref GLOBAL_LocalNodeNameHex: String = rustfs_utils::crypto::hex(GLOBAL_LocalNodeName.as_bytes());
pub static ref GLOBAL_NodeNamesHex: HashMap<String, ()> = HashMap::new();
pub static ref GLOBAL_REGION: OnceLock<String> = OnceLock::new();
pub static ref GLOBAL_LOCAL_LOCK_CLIENT: OnceLock<Arc<dyn rustfs_lock::client::LockClient>> = OnceLock::new();
pub static ref GLOBAL_LOCK_CLIENTS: OnceLock<HashMap<String, Arc<dyn LockClient>>> = OnceLock::new();
}
/// Global cancellation token for background services (data scanner and auto heal)
@@ -275,3 +278,51 @@ pub fn shutdown_background_services() {
cancel_token.cancel();
}
}
/// Set the global lock client (first LocalClient created)
///
/// # Arguments
/// * `client` - The LockClient instance to set globally
///
/// # Returns
/// * `Ok(())` if successful
/// * `Err(Arc<dyn LockClient>)` if setting fails (client already set)
///
pub fn set_global_lock_client(
client: Arc<dyn rustfs_lock::client::LockClient>,
) -> Result<(), Arc<dyn rustfs_lock::client::LockClient>> {
GLOBAL_LOCAL_LOCK_CLIENT.set(client)
}
/// Get the global lock client
///
/// # Returns
/// * `Option<Arc<dyn LockClient>>` - The global lock client, if set
///
pub fn get_global_lock_client() -> Option<Arc<dyn rustfs_lock::client::LockClient>> {
GLOBAL_LOCAL_LOCK_CLIENT.get().cloned()
}
/// Set the global lock clients map
///
/// # Arguments
/// * `clients` - The HashMap of lock clients to set globally
///
/// # Returns
/// * `Ok(())` if successful
/// * `Err(HashMap<String, Arc<dyn LockClient>>)` if setting fails (clients already set)
///
pub fn set_global_lock_clients(
clients: HashMap<String, Arc<dyn LockClient>>,
) -> Result<(), HashMap<String, Arc<dyn LockClient>>> {
GLOBAL_LOCK_CLIENTS.set(clients)
}
/// Get the global lock clients map
///
/// # Returns
/// * `Option<&HashMap<String, Arc<dyn LockClient>>>` - The global lock clients map, if set
///
pub fn get_global_lock_clients() -> Option<&'static HashMap<String, Arc<dyn LockClient>>> {
GLOBAL_LOCK_CLIENTS.get()
}

View File

@@ -56,6 +56,7 @@ pub mod tier;
pub use global::new_object_layer_fn;
pub use global::set_global_endpoints;
pub use global::update_erasure_type;
pub use global::{get_global_lock_client, get_global_lock_clients, set_global_lock_client, set_global_lock_clients};
pub use global::GLOBAL_Endpoints;
pub use store_api::StorageAPI;

View File

@@ -20,53 +20,33 @@ use rustfs_lock::{
};
use rustfs_protos::proto_gen::node_service::node_service_client::NodeServiceClient;
use rustfs_protos::proto_gen::node_service::{GenerallyLockRequest, PingRequest};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use tonic::Request;
use tonic::service::interceptor::InterceptedService;
use tonic::transport::Channel;
use tracing::info;
use tracing::{info, warn};
/// Remote lock client implementation
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct RemoteClient {
addr: String,
// Track active locks with their original owner information
active_locks: Arc<RwLock<HashMap<LockId, String>>>, // lock_id -> owner
}
impl Clone for RemoteClient {
fn clone(&self) -> Self {
Self {
addr: self.addr.clone(),
active_locks: self.active_locks.clone(),
}
}
}
impl RemoteClient {
pub fn new(endpoint: String) -> Self {
Self {
addr: endpoint,
active_locks: Arc::new(RwLock::new(HashMap::new())),
}
Self { addr: endpoint }
}
pub fn from_url(url: url::Url) -> Self {
Self {
addr: url.to_string(),
active_locks: Arc::new(RwLock::new(HashMap::new())),
}
Self { addr: url.to_string() }
}
/// Create a minimal LockRequest for unlock operations
fn create_unlock_request(&self, lock_id: &LockId, owner: &str) -> LockRequest {
/// Create a minimal LockRequest for unlock operations using only lock_id
fn create_unlock_request(lock_id: &LockId) -> LockRequest {
LockRequest {
lock_id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: LockType::Exclusive, // Type doesn't matter for unlock
owner: owner.to_string(),
owner: String::new(), // Owner not needed, server uses lock_id
acquire_timeout: std::time::Duration::from_secs(30),
ttl: std::time::Duration::from_secs(300),
metadata: LockMetadata::default(),
@@ -84,13 +64,14 @@ impl RemoteClient {
#[async_trait]
impl LockClient for RemoteClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse> {
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
info!("remote acquire_exclusive for {}", request.resource);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.lock(req)
.await
@@ -104,11 +85,30 @@ impl LockClient for RemoteClient {
// Check if the lock acquisition was successful
if resp.success {
// Save the lock information for later release
let mut locks = self.active_locks.write().await;
locks.insert(request.lock_id.clone(), request.owner.clone());
Ok(LockResponse::success(
// Try to deserialize lock_info from response
let lock_info = if let Some(lock_info_json) = resp.lock_info {
match serde_json::from_str::<LockInfo>(&lock_info_json) {
Ok(info) => info,
Err(e) => {
// If deserialization fails, fall back to constructing from request
warn!("Failed to deserialize lock_info from response: {}, using request data", e);
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
}
}
}
} else {
// If lock_info is not provided, construct from request
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
@@ -121,9 +121,10 @@ impl LockClient for RemoteClient {
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
std::time::Duration::ZERO,
))
}
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
@@ -133,109 +134,28 @@ impl LockClient for RemoteClient {
}
}
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse> {
info!("remote acquire_shared for {}", request.resource);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.r_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
// Check for explicit error first
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
// Check if the lock acquisition was successful
if resp.success {
// Save the lock information for later release
let mut locks = self.active_locks.write().await;
locks.insert(request.lock_id.clone(), request.owner.clone());
Ok(LockResponse::success(
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
std::time::Duration::ZERO,
))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
"Shared lock acquisition failed on remote server".to_string(),
std::time::Duration::ZERO,
))
}
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
info!("remote release for {}", lock_id);
// Get the original owner for this lock
let owner = {
let locks = self.active_locks.read().await;
locks.get(lock_id).cloned().unwrap_or_else(|| "remote".to_string())
};
let unlock_request = self.create_unlock_request(lock_id, &owner);
let unlock_request = Self::create_unlock_request(lock_id);
let request_string = serde_json::to_string(&unlock_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?;
let mut client = self.get_client().await?;
// Try UnLock first (for exclusive locks)
let req = Request::new(GenerallyLockRequest {
args: request_string.clone(),
});
let resp = client.un_lock(req).await;
let success = if resp.is_err() {
// If that fails, try RUnLock (for shared locks)
let req = Request::new(GenerallyLockRequest { args: request_string });
let resp = client
.r_un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
resp.success
} else {
let resp = resp.map_err(|e| LockError::internal(e.to_string()))?.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
resp.success
};
// Remove the lock from our tracking if successful
if success {
let mut locks = self.active_locks.write().await;
locks.remove(lock_id);
let req = Request::new(GenerallyLockRequest { args: request_string });
let resp = client
.un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(success)
Ok(resp.success)
}
async fn refresh(&self, lock_id: &LockId) -> Result<bool> {
info!("remote refresh for {}", lock_id);
let refresh_request = self.create_unlock_request(lock_id, "remote");
let refresh_request = Self::create_unlock_request(lock_id);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&refresh_request)
@@ -254,7 +174,7 @@ impl LockClient for RemoteClient {
async fn force_release(&self, lock_id: &LockId) -> Result<bool> {
info!("remote force_release for {}", lock_id);
let force_request = self.create_unlock_request(lock_id, "remote");
let force_request = Self::create_unlock_request(lock_id);
let mut client = self.get_client().await?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&force_request)
@@ -276,7 +196,7 @@ impl LockClient for RemoteClient {
// Since there's no direct status query in the gRPC service,
// we attempt a non-blocking lock acquisition to check if the resource is available
let status_request = self.create_unlock_request(lock_id, "remote");
let status_request = Self::create_unlock_request(lock_id);
let mut client = self.get_client().await?;
// Try to acquire a very short-lived lock to test availability
@@ -307,7 +227,7 @@ impl LockClient for RemoteClient {
// We can't determine the exact details remotely, so return a generic status
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.as_str().to_string(),
resource: lock_id.resource.clone(),
lock_type: LockType::Exclusive, // We can't know the exact type
status: LockStatus::Acquired,
owner: "unknown".to_string(), // Remote client can't determine owner
@@ -324,7 +244,7 @@ impl LockClient for RemoteClient {
// Communication error or lock is held
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.as_str().to_string(),
resource: lock_id.resource.clone(),
lock_type: LockType::Exclusive,
status: LockStatus::Acquired,
owner: "unknown".to_string(),

View File

@@ -73,8 +73,10 @@ use rustfs_filemeta::{
FileInfo, FileMeta, FileMetaShallowVersion, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ObjectPartInfo,
RawFileInfo, ReplicationStatusType, VersionPurgeStatusType, file_info_from_raw, merge_file_meta_versions,
};
use rustfs_lock::FastLockGuard;
use rustfs_lock::LockClient;
use rustfs_lock::fast_lock::types::LockResult;
use rustfs_lock::local_lock::LocalLock;
use rustfs_lock::{FastLockGuard, NamespaceLock, NamespaceLockGuard, NamespaceLockWrapper, ObjectKey};
use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem};
use rustfs_rio::{EtagResolvable, HashReader, HashReaderMut, TryGetIndex as _, WarpReader};
use rustfs_utils::http::RUSTFS_BUCKET_REPLICATION_SSEC_CHECKSUM;
@@ -118,9 +120,14 @@ pub const MAX_PARTS_COUNT: usize = 10000;
const DISK_ONLINE_TIMEOUT: Duration = Duration::from_secs(1);
const DISK_HEALTH_CACHE_TTL: Duration = Duration::from_millis(750);
/// Get lock acquire timeout from environment variable RUSTFS_LOCK_ACQUIRE_TIMEOUT (in seconds)
/// Defaults to 30 seconds if not set or invalid
fn get_lock_acquire_timeout() -> Duration {
Duration::from_secs(rustfs_utils::get_env_u64("RUSTFS_LOCK_ACQUIRE_TIMEOUT", 5))
}
#[derive(Clone, Debug)]
pub struct SetDisks {
pub fast_lock_manager: Arc<rustfs_lock::FastObjectLockManager>,
pub locker_owner: String,
pub disks: Arc<RwLock<Vec<Option<DiskStore>>>>,
pub set_endpoints: Vec<Endpoint>,
@@ -130,6 +137,7 @@ pub struct SetDisks {
pub pool_index: usize,
pub format: FormatV3,
disk_health_cache: Arc<RwLock<Vec<Option<DiskHealthEntry>>>>,
pub lockers: Vec<Arc<dyn LockClient>>,
}
#[derive(Clone, Debug)]
@@ -151,7 +159,6 @@ impl DiskHealthEntry {
impl SetDisks {
#[allow(clippy::too_many_arguments)]
pub async fn new(
fast_lock_manager: Arc<rustfs_lock::FastObjectLockManager>,
locker_owner: String,
disks: Arc<RwLock<Vec<Option<DiskStore>>>>,
set_drive_count: usize,
@@ -160,9 +167,9 @@ impl SetDisks {
pool_index: usize,
set_endpoints: Vec<Endpoint>,
format: FormatV3,
lockers: Vec<Arc<dyn LockClient>>,
) -> Arc<Self> {
Arc::new(SetDisks {
fast_lock_manager,
locker_owner,
disks,
set_drive_count,
@@ -172,6 +179,7 @@ impl SetDisks {
format,
set_endpoints,
disk_health_cache: Arc::new(RwLock::new(Vec::new())),
lockers,
})
}
@@ -237,6 +245,27 @@ impl SetDisks {
LockResult::Acquired => format!("unexpected lock state while acquiring {mode} lock on {bucket}/{object}"),
}
}
fn format_lock_error_from_error(
&self,
bucket: &str,
object: &str,
mode: &str,
err: &rustfs_lock::error::LockError,
) -> String {
match err {
rustfs_lock::error::LockError::Timeout { .. } => {
format!(
"ns_loc: {mode} lock acquisition timed out on {bucket}/{object} (owner={})",
self.locker_owner
)
}
rustfs_lock::error::LockError::AlreadyLocked { owner, .. } => {
format!("ns_loc: {mode} lock conflicted on {bucket}/{object}: held by {owner}")
}
_ => format!("ns_loc: {mode} lock acquisition failed on {bucket}/{object}: {}", err),
}
}
async fn get_disks_internal(&self) -> Vec<Option<DiskStore>> {
let rl = self.disks.read().await;
@@ -602,8 +631,10 @@ impl SetDisks {
Ok(())
}
#[tracing::instrument(skip(disks))]
async fn cleanup_multipart_path(disks: &[Option<DiskStore>], paths: &[String]) {
#[tracing::instrument(skip(self))]
async fn cleanup_multipart_path(&self, paths: &[String]) {
let disks = self.get_disks_internal().await;
let mut errs = Vec::with_capacity(disks.len());
// Use improved simple batch processor instead of join_all for better performance
@@ -831,7 +862,9 @@ impl SetDisks {
}
#[tracing::instrument(skip(disks, meta))]
#[allow(clippy::too_many_arguments)]
async fn rename_part(
&self,
disks: &[Option<DiskStore>],
src_bucket: &str,
src_object: &str,
@@ -878,7 +911,8 @@ impl SetDisks {
if let Some(err) = reduce_write_quorum_errs(&errs, OBJECT_OP_IGNORED_ERRS, write_quorum) {
warn!("rename_part errs {:?}", &errs);
Self::cleanup_multipart_path(disks, &[dst_object.to_string(), format!("{dst_object}.meta")]).await;
self.cleanup_multipart_path(&[dst_object.to_string(), format!("{dst_object}.meta")])
.await;
return Err(err);
}
@@ -2627,63 +2661,15 @@ impl SetDisks {
..Default::default()
};
let _write_lock_guard = if !opts.no_lock {
info!("Acquiring write lock for object: {}, owner: {}", object, self.locker_owner);
// let fast_lock_guard = self.new_ns_lock(bucket, object).await?;
// Some(fast_lock_guard)
// Check if lock is already held
let key = rustfs_lock::fast_lock::types::ObjectKey::new(bucket, object);
let mut reuse_existing_lock = false;
if let Some(lock_info) = self.fast_lock_manager.get_lock_info(&key) {
if lock_info.owner.as_ref() == self.locker_owner.as_str()
&& matches!(lock_info.mode, rustfs_lock::fast_lock::types::LockMode::Exclusive)
{
reuse_existing_lock = true;
debug!("Reusing existing exclusive lock for object {} held by {}", object, self.locker_owner);
} else {
warn!("Lock already exists for object {}: {:?}", object, lock_info);
}
} else {
info!("No existing lock found for object {}", object);
}
if reuse_existing_lock {
None
} else {
let mut lock_result = None;
for i in 0..3 {
let start_time = Instant::now();
match self
.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
.await
{
Ok(res) => {
let elapsed = start_time.elapsed();
info!(duration = ?elapsed, attempt = i + 1, "Write lock acquired");
lock_result = Some(res);
break;
}
Err(e) => {
let elapsed = start_time.elapsed();
info!(error = ?e, attempt = i + 1, duration = ?elapsed, "Lock acquisition failed, retrying");
if i < 2 {
tokio::time::sleep(Duration::from_millis(50 * (i as u64 + 1))).await;
} else {
let message = self.format_lock_error(bucket, object, "write", &e);
error!("Failed to acquire write lock after retries: {}", message);
return Err(DiskError::other(message));
}
}
}
}
lock_result
}
let write_lock_guard = if !opts.no_lock {
let ns_lock = self.new_ns_lock(bucket, object).await?;
Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?)
} else {
info!("Skipping lock acquisition (no_lock=true)");
None
};
@@ -3330,11 +3316,15 @@ impl SetDisks {
remove: bool,
) -> Result<(HealResultItem, Option<DiskError>)> {
let _write_lock_guard = self
.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
.new_ns_lock(bucket, object)
.await?
.get_write_lock(get_lock_acquire_timeout())
.await
.map_err(|e| {
let message = self.format_lock_error(bucket, object, "write", &e);
let message = format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
);
DiskError::other(message)
})?;
@@ -3632,10 +3622,16 @@ impl ObjectIO for SetDisks {
// Acquire a shared read-lock early to protect read consistency
let read_lock_guard = if !opts.no_lock {
Some(
self.fast_lock_manager
.acquire_read_lock(bucket, object, self.locker_owner.as_str())
self.new_ns_lock(bucket, object)
.await?
.get_read_lock(get_lock_acquire_timeout())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "read", &e)))?,
.map_err(|e| {
Error::other(format!(
"Failed to acquire read lock: {}",
self.format_lock_error_from_error(bucket, object, "read", &e)
))
})?,
)
} else {
None
@@ -3718,24 +3714,23 @@ impl ObjectIO for SetDisks {
#[tracing::instrument(level = "debug", skip(self, data,))]
async fn put_object(&self, bucket: &str, object: &str, data: &mut PutObjReader, opts: &ObjectOptions) -> Result<ObjectInfo> {
let disks = self.get_disks_internal().await;
// let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await;
// Acquire per-object exclusive lock via RAII guard. It auto-releases asynchronously on drop.
let _object_lock_guard = if !opts.no_lock {
Some(
self.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))?,
)
} else {
None
};
let mut object_lock_guard = None;
if let Some(http_preconditions) = opts.http_preconditions.clone()
&& let Some(err) = self.check_write_precondition(bucket, object, opts).await
{
return Err(err);
if let Some(http_preconditions) = opts.http_preconditions.clone() {
if !opts.no_lock {
let ns_lock = self.new_ns_lock(bucket, object).await?;
object_lock_guard = Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?);
}
if let Some(err) = self.check_write_precondition(bucket, object, opts).await {
return Err(err);
}
}
let mut user_defined = opts.user_defined.clone();
@@ -3937,6 +3932,16 @@ impl ObjectIO for SetDisks {
drop(writers); // drop writers to close all files, this is to prevent FileAccessDenied errors when renaming data
if !opts.no_lock && object_lock_guard.is_none() {
let ns_lock = self.new_ns_lock(bucket, object).await?;
object_lock_guard = Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?);
}
let (online_disks, _, op_old_dir) = Self::rename_data(
&shuffle_disks,
RUSTFS_META_TMP_BUCKET,
@@ -3953,6 +3958,8 @@ impl ObjectIO for SetDisks {
.await?;
}
drop(object_lock_guard); // drop object lock guard to release the lock
self.delete_all(RUSTFS_META_TMP_BUCKET, &tmp_dir).await?;
for (i, op_disk) in online_disks.iter().enumerate() {
@@ -3975,11 +3982,30 @@ impl ObjectIO for SetDisks {
#[async_trait::async_trait]
impl StorageAPI for SetDisks {
#[tracing::instrument(skip(self))]
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<FastLockGuard> {
self.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<NamespaceLockWrapper> {
let set_lock = if is_dist_erasure().await {
// Calculate quorum based on lockers count (majority)
let lockers_count = self.lockers.len();
let write_quorum = if lockers_count > 1 { (lockers_count / 2) + 1 } else { 1 };
NamespaceLock::with_clients_and_quorum(
format!("set-{}-{}", self.pool_index, self.set_index),
self.lockers.clone(),
write_quorum,
)
} else {
NamespaceLock::Local(LocalLock::new(
format!("set-{}-{}", self.pool_index, self.set_index),
Arc::new(rustfs_lock::GlobalLockManager::new()),
))
};
let resource = ObjectKey {
bucket: Arc::from(bucket),
object: Arc::from(object),
version: None,
};
Ok(NamespaceLockWrapper::new(set_lock, resource, self.locker_owner.clone()))
}
#[tracing::instrument(skip(self))]
@@ -4041,10 +4067,16 @@ impl StorageAPI for SetDisks {
// Guard lock for source object metadata update
let _lock_guard = self
.fast_lock_manager
.acquire_write_lock(src_bucket, src_object, self.locker_owner.as_str())
.new_ns_lock(src_bucket, src_object)
.await?
.get_write_lock(get_lock_acquire_timeout())
.await
.map_err(|e| Error::other(self.format_lock_error(src_bucket, src_object, "write", &e)))?;
.map_err(|e| {
Error::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(src_bucket, src_object, "write", &e)
))
})?;
let disks = self.get_disks_internal().await;
@@ -4139,12 +4171,6 @@ impl StorageAPI for SetDisks {
}
#[tracing::instrument(skip(self))]
async fn delete_object_version(&self, bucket: &str, object: &str, fi: &FileInfo, force_del_marker: bool) -> Result<()> {
// // Guard lock for single object delete-version
// let _lock_guard = self
// .fast_lock_manager
// .acquire_write_lock("", object, self.locker_owner.as_str())
// .await
// .map_err(|_| Error::other("can not get lock. please retry".to_string()))?;
let disks = self.get_disks(0, 0).await?;
let write_quorum = disks.len() / 2 + 1;
@@ -4206,34 +4232,41 @@ impl StorageAPI for SetDisks {
let mut unique_objects: HashSet<String> = HashSet::new();
for dobj in &objects {
if unique_objects.insert(dobj.object_name.clone()) {
batch = batch.add_write_lock(bucket, dobj.object_name.clone());
batch = batch.add_write_lock(rustfs_lock::ObjectKey::new(bucket, dobj.object_name.clone()));
}
}
let batch_result = self.fast_lock_manager.acquire_locks_batch(batch).await;
let locked_objects: HashSet<String> = batch_result
.successful_locks
.iter()
.map(|key| key.object.as_ref().to_string())
.collect();
let _lock_guards = batch_result.guards;
let mut failed_map = HashMap::new();
let mut batch_guards = Vec::with_capacity(batch.requests.len());
let failed_map: HashMap<(String, String), LockResult> = batch_result
.failed_locks
.into_iter()
.map(|(key, err)| ((key.bucket.as_ref().to_string(), key.object.as_ref().to_string()), err))
.collect();
let mut locked_objects = HashSet::new();
for req in batch.requests.iter() {
let ns_lock = match self.new_ns_lock(req.key.bucket.as_ref(), req.key.object.as_ref()).await {
Ok(ns_lock) => ns_lock,
Err(e) => {
failed_map.insert((req.key.bucket.as_ref().to_string(), req.key.object.as_ref().to_string()), e.to_string());
continue;
}
};
let _lock_guard = match ns_lock.get_write_lock(get_lock_acquire_timeout()).await {
Ok(lock_guard) => lock_guard,
Err(e) => {
failed_map.insert((req.key.bucket.as_ref().to_string(), req.key.object.as_ref().to_string()), e.to_string());
continue;
}
};
batch_guards.push(_lock_guard);
locked_objects.insert(req.key.object.as_ref().to_string());
}
// Mark failures for objects that could not be locked
for (i, dobj) in objects.iter().enumerate() {
if let Some(err) = failed_map.get(&(bucket.to_string(), dobj.object_name.clone())) {
let message = self.format_lock_error(bucket, dobj.object_name.as_str(), "write", err);
del_errs[i] = Some(Error::other(message));
del_errs[i] = Some(Error::other(err.to_string()));
}
}
// let mut del_fvers = Vec::with_capacity(objects.len());
let ver_cfg = BucketVersioningSys::get(bucket).await.unwrap_or_default();
let mut vers_map: HashMap<&String, FileInfoVersions> = HashMap::new();
@@ -4398,10 +4431,16 @@ impl StorageAPI for SetDisks {
// Guard lock for single object delete
let _lock_guard = if !opts.delete_prefix {
Some(
self.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
self.new_ns_lock(bucket, object)
.await?
.get_write_lock(get_lock_acquire_timeout())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))?,
.map_err(|e| {
Error::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?,
)
} else {
None
@@ -4578,10 +4617,16 @@ impl StorageAPI for SetDisks {
// Acquire a shared read-lock to protect consistency during info fetch
let _read_lock_guard = if !opts.no_lock {
Some(
self.fast_lock_manager
.acquire_read_lock(bucket, object, self.locker_owner.as_str())
self.new_ns_lock(bucket, object)
.await?
.get_read_lock(get_lock_acquire_timeout())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "read", &e)))?,
.map_err(|e| {
Error::other(format!(
"Failed to acquire read lock: {}",
self.format_lock_error_from_error(bucket, object, "read", &e)
))
})?,
)
} else {
None
@@ -4628,10 +4673,16 @@ impl StorageAPI for SetDisks {
// Guard lock for metadata update
let _lock_guard = if !opts.no_lock {
Some(
self.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
self.new_ns_lock(bucket, object)
.await?
.get_write_lock(get_lock_acquire_timeout())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))?,
.map_err(|e| {
Error::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?,
)
} else {
None
@@ -4640,8 +4691,8 @@ impl StorageAPI for SetDisks {
let disks = self.get_disks_internal().await;
let (metas, errs) = {
if let Some(vid) = opts.version_id.as_ref() {
Self::read_all_fileinfo(&disks, "", bucket, object, vid.as_str(), false, false).await?
if let Some(version_id) = &opts.version_id {
Self::read_all_fileinfo(&disks, "", bucket, object, version_id.to_string().as_str(), false, false).await?
} else {
Self::read_all_xl(&disks, bucket, object, false, false).await
}
@@ -5156,16 +5207,17 @@ impl StorageAPI for SetDisks {
drop(writers); // drop writers to close all files
let part_path = format!("{}/{}/{}", upload_id_path, fi.data_dir.unwrap_or_default(), part_suffix);
let _ = Self::rename_part(
&disks,
RUSTFS_META_TMP_BUCKET,
&tmp_part_path,
RUSTFS_META_MULTIPART_BUCKET,
&part_path,
part_info_buff.into(),
write_quorum,
)
.await?;
let _ = self
.rename_part(
&disks,
RUSTFS_META_TMP_BUCKET,
&tmp_part_path,
RUSTFS_META_MULTIPART_BUCKET,
&part_path,
part_info_buff.into(),
write_quorum,
)
.await?;
let ret: PartInfo = PartInfo {
etag: Some(etag.clone()),
@@ -5453,6 +5505,24 @@ impl StorageAPI for SetDisks {
#[tracing::instrument(skip(self))]
async fn new_multipart_upload(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<MultipartUploadResult> {
if let Some(http_preconditions) = opts.http_preconditions.clone() {
let object_lock_guard = if !opts.no_lock {
let ns_lock = self.new_ns_lock(bucket, object).await?;
Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?)
} else {
None
};
if let Some(err) = self.check_write_precondition(bucket, object, opts).await {
return Err(err);
}
}
let disks = self.disks.read().await;
let disks = disks.clone();
@@ -5608,16 +5678,24 @@ impl StorageAPI for SetDisks {
uploaded_parts: Vec<CompletePart>,
opts: &ObjectOptions,
) -> Result<ObjectInfo> {
let _object_lock_guard = if !opts.no_lock {
Some(
self.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))?,
)
} else {
None
};
let mut object_lock_guard = None;
// Acquire per-object exclusive lock via RAII guard. It auto-releases asynchronously on drop.
if let Some(http_preconditions) = opts.http_preconditions.clone() {
if !opts.no_lock {
let ns_lock = self.new_ns_lock(bucket, object).await?;
object_lock_guard = Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?);
}
if let Some(err) = self.check_write_precondition(bucket, object, opts).await {
return Err(err);
}
}
let (mut fi, files_metas) = self.check_upload_id_exists(bucket, object, upload_id, true).await?;
let upload_id_path = Self::get_upload_id_dir(bucket, object, upload_id);
@@ -5629,25 +5707,6 @@ impl StorageAPI for SetDisks {
let disks = disks.clone();
// let disks = Self::shuffle_disks(&disks, &fi.erasure.distribution);
// Acquire per-object exclusive lock via RAII guard. It auto-releases asynchronously on drop.
if let Some(http_preconditions) = opts.http_preconditions.clone() {
// if !opts.no_lock {
// let guard_opt = self
// .namespace_lock
// .lock_guard(object, &self.locker_owner, Duration::from_secs(5), Duration::from_secs(10))
// .await?;
// if guard_opt.is_none() {
// return Err(Error::other("can not get lock. please retry".to_string()));
// }
// _object_lock_guard = guard_opt;
// }
if let Some(err) = self.check_write_precondition(bucket, object, opts).await {
return Err(err);
}
}
let part_path = format!("{}/{}/", upload_id_path, fi.data_dir.unwrap_or(Uuid::nil()));
let part_meta_paths = uploaded_parts
@@ -5671,11 +5730,16 @@ impl StorageAPI for SetDisks {
return Err(Error::other("checksum type not found"));
};
checksum_type = rustfs_rio::ChecksumType::from_string_with_obj_type(cs, ct);
if let Some(want) = opts.want_checksum.as_ref()
&& !want.checksum_type.is(checksum_type)
if let Some(want) = &opts.want_checksum
&& !want
.checksum_type
.is(rustfs_rio::ChecksumType::from_string_with_obj_type(cs, ct))
{
return Err(Error::other(format!("checksum type mismatch, got {:?}, want {:?}", want, checksum_type)));
return Err(Error::other(format!(
"checksum type mismatch, got {:?}, want {:?}",
want,
rustfs_rio::ChecksumType::from_string_with_obj_type(cs, ct)
)));
}
}
@@ -5962,11 +6026,18 @@ impl StorageAPI for SetDisks {
}
}
{
let disks = self.get_disks_internal().await;
Self::cleanup_multipart_path(&disks, &parts).await;
if !opts.no_lock && object_lock_guard.is_none() {
let ns_lock = self.new_ns_lock(bucket, object).await?;
object_lock_guard = Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?);
}
self.cleanup_multipart_path(&parts).await;
let (online_disks, versions, op_old_dir) = Self::rename_data(
&shuffle_disks,
RUSTFS_META_MULTIPART_BUCKET,
@@ -5982,6 +6053,9 @@ impl StorageAPI for SetDisks {
self.commit_rename_data_dir(&shuffle_disks, bucket, object, &old_dir.to_string(), write_quorum)
.await?;
}
drop(object_lock_guard); // drop object lock guard to release the lock
if let Some(versions) = versions {
let _ =
rustfs_common::heal_channel::send_heal_request(rustfs_common::heal_channel::create_heal_request_with_options(
@@ -6048,79 +6122,14 @@ impl StorageAPI for SetDisks {
version_id: &str,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)> {
// let mut effective_object = object.to_string();
//
// // Optimization: Only attempt correction if the name looks suspicious (quotes or URL encoded)
// // and the original object does NOT exist.
// let has_quotes = (effective_object.starts_with('\'') && effective_object.ends_with('\''))
// || (effective_object.starts_with('"') && effective_object.ends_with('"'));
// let has_percent = effective_object.contains('%');
//
// if has_quotes || has_percent {
// let disks = self.disks.read().await;
// // 1. Check if the original object exists (lightweight check)
// let (_, errs) = Self::read_all_fileinfo(&disks, "", bucket, &effective_object, version_id, false, false).await?;
//
// if DiskError::is_all_not_found(&errs) {
// // Original not found. Try candidates.
// let mut candidates = Vec::new();
//
// // Candidate 1: URL Decoded (Priority for web access issues)
// if has_percent {
// if let Ok(decoded) = urlencoding::decode(&effective_object) {
// if decoded != effective_object {
// candidates.push(decoded.to_string());
// }
// }
// }
//
// // Candidate 2: Quote Stripped (For shell copy-paste issues)
// if has_quotes && effective_object.len() >= 2 {
// candidates.push(effective_object[1..effective_object.len() - 1].to_string());
// }
//
// // Check candidates
// for candidate in candidates {
// let (_, errs_cand) =
// Self::read_all_fileinfo(&disks, "", bucket, &candidate, version_id, false, false).await?;
//
// if !DiskError::is_all_not_found(&errs_cand) {
// info!(
// "Heal request for object '{}' failed (not found). Auto-corrected to '{}'.",
// effective_object, candidate
// );
// effective_object = candidate;
// break; // Found a match, stop searching
// }
// }
// }
// }
// let object = effective_object.as_str();
let _write_lock_guard = if !opts.no_lock {
let key = rustfs_lock::fast_lock::types::ObjectKey::new(bucket, object);
let mut skip_lock = false;
if let Some(lock_info) = self.fast_lock_manager.get_lock_info(&key)
&& lock_info.owner.as_ref() == self.locker_owner.as_str()
&& matches!(lock_info.mode, rustfs_lock::fast_lock::types::LockMode::Exclusive)
{
debug!(
"Reusing existing exclusive lock for heal operation on {}/{} held by {}",
bucket, object, self.locker_owner
);
skip_lock = true;
}
if skip_lock {
None
} else {
info!(?opts, "Starting heal_object");
Some(
self.fast_lock_manager
.acquire_write_lock(bucket, object, self.locker_owner.as_str())
.await
.map_err(|e| Error::other(self.format_lock_error(bucket, object, "write", &e)))?,
)
}
let ns_lock = self.new_ns_lock(bucket, object).await?;
Some(ns_lock.get_write_lock(get_lock_acquire_timeout()).await.map_err(|e| {
StorageError::other(format!(
"Failed to acquire write lock: {}",
self.format_lock_error_from_error(bucket, object, "write", &e)
))
})?)
} else {
None
};

View File

@@ -25,7 +25,7 @@ use crate::{
},
endpoints::{Endpoints, PoolEndpoints},
error::StorageError,
global::{GLOBAL_LOCAL_DISK_SET_DRIVES, is_dist_erasure},
global::{GLOBAL_LOCAL_DISK_SET_DRIVES, get_global_lock_clients, is_dist_erasure},
set_disk::SetDisks,
store_api::{
BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, GetObjectReader, HTTPRangeSpec,
@@ -42,7 +42,8 @@ use rustfs_common::{
heal_channel::{DriveState, HealItemType},
};
use rustfs_filemeta::FileInfo;
use rustfs_lock::FastLockGuard;
use rustfs_lock::NamespaceLockWrapper;
use rustfs_lock::client::LockClient;
use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem};
use rustfs_utils::{crc_hash, path::path_join_buf, sip_hash};
use std::{collections::HashMap, sync::Arc};
@@ -91,35 +92,30 @@ impl Sets {
let set_count = fm.erasure.sets.len();
let set_drive_count = fm.erasure.sets[0].len();
let mut unique: Vec<Vec<String>> = (0..set_count).map(|_| vec![]).collect();
for (idx, endpoint) in endpoints.endpoints.as_ref().iter().enumerate() {
let set_idx = idx / set_drive_count;
if endpoint.is_local && !unique[set_idx].contains(&"local".to_string()) {
unique[set_idx].push("local".to_string());
}
if !endpoint.is_local {
let host_port = format!("{}:{}", endpoint.url.host_str().unwrap(), endpoint.url.port().unwrap());
if !unique[set_idx].contains(&host_port) {
unique[set_idx].push(host_port);
}
}
}
let mut disk_set = Vec::with_capacity(set_count);
// Create fast lock manager for high performance
let fast_lock_manager = Arc::new(rustfs_lock::FastObjectLockManager::new());
// Get lock clients from global storage
let lock_clients = get_global_lock_clients();
for i in 0..set_count {
let mut set_drive = Vec::with_capacity(set_drive_count);
let mut set_endpoints = Vec::with_capacity(set_drive_count);
let mut set_lock_clients: HashMap<String, Arc<dyn LockClient>> = HashMap::new();
for j in 0..set_drive_count {
let idx = i * set_drive_count + j;
let mut disk = disks[idx].clone();
let endpoint = endpoints.endpoints.as_ref()[idx].clone();
if let Some(lock_clients_map) = lock_clients {
let host_port = endpoint.host_port();
if let Some(lock_client) = lock_clients_map.get(&host_port)
&& !set_lock_clients.contains_key(&host_port)
{
set_lock_clients.insert(host_port, lock_client.clone());
}
}
set_endpoints.push(endpoint);
if disk.is_none() {
@@ -163,11 +159,8 @@ impl Sets {
}
}
// Note: write_quorum was used for the old lock system, no longer needed with FastLock
let _write_quorum = set_drive_count - parity_count;
let lockers = set_lock_clients.values().cloned().collect::<Vec<Arc<dyn LockClient>>>();
let set_disks = SetDisks::new(
fast_lock_manager.clone(),
GLOBAL_LOCAL_NODE_NAME.read().await.to_string(),
Arc::new(RwLock::new(set_drive)),
set_drive_count,
@@ -176,6 +169,7 @@ impl Sets {
pool_idx,
set_endpoints,
fm.clone(),
lockers,
)
.await;
@@ -365,7 +359,7 @@ impl ObjectIO for Sets {
#[async_trait::async_trait]
impl StorageAPI for Sets {
#[tracing::instrument(skip(self))]
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<FastLockGuard> {
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<NamespaceLockWrapper> {
self.disk_set[0].new_ns_lock(bucket, object).await
}
#[tracing::instrument(skip(self))]

View File

@@ -45,6 +45,7 @@ use crate::global::{
use crate::notification_sys::get_global_notification_sys;
use crate::pools::PoolMeta;
use crate::rebalance::RebalanceMeta;
use crate::rpc::RemoteClient;
use crate::store_api::{
ListMultipartsInfo, ListObjectVersionsInfo, ListPartsInfo, MultipartInfo, ObjectIO, ObjectInfoOrErr, WalkOptions,
};
@@ -69,7 +70,7 @@ use rand::Rng as _;
use rustfs_common::heal_channel::{HealItemType, HealOpts};
use rustfs_common::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT};
use rustfs_filemeta::FileInfo;
use rustfs_lock::FastLockGuard;
use rustfs_lock::{LocalClient, LockClient, NamespaceLockWrapper};
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_utils::path::{decode_dir_object, encode_dir_object, path_join_buf};
use s3s::dto::{BucketVersioningStatus, ObjectLockConfiguration, ObjectLockEnabled, VersioningConfiguration};
@@ -1125,6 +1126,45 @@ pub async fn init_local_disks(endpoint_pools: EndpointServerPools) -> Result<()>
Ok(())
}
/// create unique lock clients for the endpoints and store them globally
pub fn init_lock_clients(endpoint_pools: EndpointServerPools) {
let mut unique_endpoints: HashMap<String, &Endpoint> = HashMap::new();
for pool_eps in endpoint_pools.as_ref().iter() {
for ep in pool_eps.endpoints.as_ref().iter() {
unique_endpoints.insert(ep.host_port(), ep);
}
}
let mut clients = HashMap::new();
let mut first_local_client_set = false;
for (key, endpoint) in unique_endpoints {
if endpoint.is_local {
let local_client = Arc::new(LocalClient::new()) as Arc<dyn LockClient>;
// Store the first LocalClient globally for use by other modules
if !first_local_client_set {
if let Err(e) = crate::global::set_global_lock_client(local_client.clone()) {
// If already set, ignore the error (another thread may have set it)
warn!("set_global_lock_client error: {:?}", e);
} else {
first_local_client_set = true;
}
}
clients.insert(key, local_client);
} else {
clients.insert(key, Arc::new(RemoteClient::new(endpoint.url.to_string())) as Arc<dyn LockClient>);
}
}
// Store the lock clients map globally
if crate::global::set_global_lock_clients(clients).is_err() {
error!("init_lock_clients: error setting lock clients");
}
}
#[derive(Debug, Default)]
struct PoolErr {
index: Option<usize>,
@@ -1245,7 +1285,7 @@ lazy_static! {
#[async_trait::async_trait]
impl StorageAPI for ECStore {
#[instrument(skip(self))]
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<FastLockGuard> {
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<NamespaceLockWrapper> {
self.pools[0].new_ns_lock(bucket, object).await
}
#[instrument(skip(self))]
@@ -2352,20 +2392,10 @@ impl StorageAPI for ECStore {
let mut futures = Vec::with_capacity(self.pools.len());
for pool in self.pools.iter() {
//TODO: IsSuspended
if self.is_suspended(pool.pool_idx).await {
continue;
}
futures.push(pool.heal_object(bucket, &object, version_id, opts));
// futures.push(async move {
// match pool.heal_object(bucket, &object, version_id, opts).await {
// Ok((mut result, err)) => {
// result.object = utils::path::decode_dir_object(&result.object);
// results.write().await.insert(idx, result);
// errs.write().await[idx] = err;
// }
// Err(err) => {
// errs.write().await[idx] = Some(err);
// }
// }
// });
}
let results = join_all(futures).await;

View File

@@ -31,7 +31,7 @@ use rustfs_filemeta::{
ReplicationStatusType, RestoreStatusOps as _, VersionPurgeStatusType, parse_restore_obj_status, replication_statuses_map,
version_purge_statuses_map,
};
use rustfs_lock::FastLockGuard;
use rustfs_lock::NamespaceLockWrapper;
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_rio::Checksum;
use rustfs_rio::{DecompressReader, HashReader, LimitReader, WarpReader};
@@ -1349,10 +1349,7 @@ pub trait ObjectIO: Send + Sync + Debug + 'static {
#[async_trait::async_trait]
#[allow(clippy::too_many_arguments)]
pub trait StorageAPI: ObjectIO + Debug {
// NewNSLock TODO:
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<FastLockGuard>;
// Shutdown TODO:
// NSScanner TODO:
async fn new_ns_lock(&self, bucket: &str, object: &str) -> Result<NamespaceLockWrapper>;
async fn backend_info(&self) -> rustfs_madmin::BackendInfo;
async fn storage_info(&self) -> rustfs_madmin::StorageInfo;

View File

@@ -617,7 +617,7 @@ impl FileMeta {
// delete_version deletes version, returns data_dir
#[tracing::instrument(skip(self))]
pub fn delete_version(&mut self, fi: &FileInfo) -> Result<Option<Uuid>> {
let vid = fi.version_id.or(Some(Uuid::nil()));
let vid = Some(fi.version_id.unwrap_or(Uuid::nil()));
let mut ventry = FileMetaVersion::default();
if fi.deleted {
@@ -1725,13 +1725,9 @@ impl From<FileMetaVersion> for FileMetaVersionHeader {
f
};
let (ec_n, ec_m) = if value.version_type == VersionType::Object {
value
.object
.as_ref()
.map_or((0, 0), |o| (o.erasure_n as u8, o.erasure_m as u8))
} else {
(0, 0)
let (ec_n, ec_m) = match (value.version_type == VersionType::Object, value.object.as_ref()) {
(true, Some(obj)) => (obj.erasure_n as u8, obj.erasure_m as u8),
_ => (0, 0),
};
Self {

View File

@@ -1,15 +1,29 @@
# Copyright 2024 RustFS Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
name = "rustfs-ahm"
name = "rustfs-heal"
version.workspace = true
edition.workspace = true
authors = ["RustFS Team"]
license.workspace = true
description = "RustFS AHM (Automatic Health Management) Scanner"
description = "RustFS erasure set and object healing"
repository.workspace = true
rust-version.workspace = true
homepage.workspace = true
documentation = "https://docs.rs/rustfs-ahm/latest/rustfs_ahm/"
keywords = ["RustFS", "AHM", "health-management", "scanner", "Minio"]
documentation = "https://docs.rs/rustfs-heal/latest/rustfs_heal/"
keywords = ["RustFS", "heal", "erasure-coding", "Minio"]
categories = ["web-programming", "development-tools", "filesystem"]
[dependencies]

View File

@@ -14,9 +14,9 @@
use thiserror::Error;
/// Custom error type for AHM operations
/// Custom error type for heal operations
/// This enum defines various error variants that can occur during
/// the execution of AHM-related tasks, such as I/O errors, storage errors,
/// the execution of heal-related tasks, such as I/O errors, storage errors,
/// configuration errors, and specific errors related to healing operations.
#[derive(Debug, Error)]
pub enum Error {
@@ -41,13 +41,6 @@ pub enum Error {
#[error(transparent)]
Anyhow(#[from] anyhow::Error),
// Scanner
#[error("Scanner error: {0}")]
Scanner(String),
#[error("Metrics error: {0}")]
Metrics(String),
#[error("Serialization error: {0}")]
Serialization(String),
@@ -60,7 +53,6 @@ pub enum Error {
#[error("Invalid checkpoint: {0}")]
InvalidCheckpoint(String),
// Heal
#[error("Heal task not found: {task_id}")]
TaskNotFound { task_id: String },
@@ -89,9 +81,7 @@ pub enum Error {
ProgressTrackingFailed { message: String },
}
/// A specialized Result type for AHM operations
///This type is a convenient alias for results returned by functions in the AHM crate,
/// using the custom Error type defined above.
/// A specialized Result type for heal operations
pub type Result<T, E = Error> = std::result::Result<T, E>;
impl Error {

View File

@@ -245,7 +245,7 @@ impl ErasureSetHealer {
resume_manager: &ResumeManager,
checkpoint_manager: &CheckpointManager,
) -> Result<()> {
info!(target: "rustfs:ahm:heal_bucket_with_resume" ,"Starting heal for bucket from object index {}", current_object_index);
info!(target: "rustfs:heal:heal_bucket_with_resume" ,"Starting heal for bucket from object index {}", current_object_index);
// 1. get bucket info
let _bucket_info = match self.storage.get_bucket_info(bucket).await? {
@@ -301,7 +301,7 @@ impl ErasureSetHealer {
if !object_exists {
info!(
target: "rustfs:ahm:heal_bucket_with_resume" ,"Object {}/{} no longer exists, skipping heal (likely deleted intentionally)",
target: "rustfs:heal:heal_bucket_with_resume" ,"Object {}/{} no longer exists, skipping heal (likely deleted intentionally)",
bucket, object
);
checkpoint_manager.add_processed_object(object.clone()).await?;

View File

@@ -472,8 +472,8 @@ impl HealManager {
let config = config.read().await;
config.heal_interval
};
if duration < Duration::from_secs(1) {
duration = Duration::from_secs(1);
if duration < Duration::from_secs(10) {
duration = Duration::from_secs(10);
}
info!("start_auto_disk_scanner: Starting auto disk scanner with interval: {:?}", duration);

View File

@@ -14,38 +14,36 @@
mod error;
pub mod heal;
pub mod scanner;
pub use error::{Error, Result};
pub use heal::{HealManager, HealOptions, HealPriority, HealRequest, HealType, channel::HealChannelProcessor};
pub use scanner::Scanner;
use std::sync::{Arc, OnceLock};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
// Global cancellation token for AHM services (scanner and other background tasks)
// Global cancellation token for heal and related services
static GLOBAL_AHM_SERVICES_CANCEL_TOKEN: OnceLock<CancellationToken> = OnceLock::new();
/// Initialize the global AHM services cancellation token
/// Initialize the global heal services cancellation token
pub fn init_ahm_services_cancel_token(cancel_token: CancellationToken) -> Result<()> {
GLOBAL_AHM_SERVICES_CANCEL_TOKEN
.set(cancel_token)
.map_err(|_| Error::Config("AHM services cancel token already initialized".to_string()))
.map_err(|_| Error::Config("Heal services cancel token already initialized".to_string()))
}
/// Get the global AHM services cancellation token
/// Get the global heal services cancellation token
pub fn get_ahm_services_cancel_token() -> Option<&'static CancellationToken> {
GLOBAL_AHM_SERVICES_CANCEL_TOKEN.get()
}
/// Create and initialize the global AHM services cancellation token
/// Create and initialize the global heal services cancellation token
pub fn create_ahm_services_cancel_token() -> CancellationToken {
let cancel_token = CancellationToken::new();
init_ahm_services_cancel_token(cancel_token.clone()).expect("AHM services cancel token already initialized");
init_ahm_services_cancel_token(cancel_token.clone()).expect("Heal services cancel token already initialized");
cancel_token
}
/// Shutdown all AHM services gracefully
/// Shutdown all heal services gracefully
pub fn shutdown_ahm_services() {
if let Some(cancel_token) = GLOBAL_AHM_SERVICES_CANCEL_TOKEN.get() {
cancel_token.cancel();

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_ahm::heal::{
use rustfs_heal::heal::{
event::{HealEvent, Severity},
task::{HealPriority, HealType},
utils,
@@ -73,7 +73,7 @@ fn test_heal_event_object_corruption() {
bucket: "test-bucket".to_string(),
object: "test-object".to_string(),
version_id: None,
corruption_type: rustfs_ahm::heal::event::CorruptionType::DataCorruption,
corruption_type: rustfs_heal::heal::event::CorruptionType::DataCorruption,
severity: Severity::High,
};
@@ -119,7 +119,7 @@ fn test_format_set_disk_id_from_i32_valid() {
#[test]
fn test_resume_state_timestamp_handling() {
use rustfs_ahm::heal::resume::ResumeState;
use rustfs_heal::heal::resume::ResumeState;
// Test that ResumeState creation doesn't panic even if system time is before epoch
// This is a theoretical test - in practice, system time should never be before epoch
@@ -139,7 +139,7 @@ fn test_resume_state_timestamp_handling() {
#[test]
fn test_resume_checkpoint_timestamp_handling() {
use rustfs_ahm::heal::resume::ResumeCheckpoint;
use rustfs_heal::heal::resume::ResumeCheckpoint;
// Test that ResumeCheckpoint creation doesn't panic
let checkpoint = ResumeCheckpoint::new("test-task".to_string());
@@ -162,8 +162,8 @@ fn test_path_to_str_helper() {
#[test]
fn test_heal_task_status_atomic_update() {
use rustfs_ahm::heal::storage::HealStorageAPI;
use rustfs_ahm::heal::task::{HealOptions, HealRequest, HealTask, HealTaskStatus};
use rustfs_heal::heal::storage::HealStorageAPI;
use rustfs_heal::heal::task::{HealOptions, HealRequest, HealTask, HealTaskStatus};
use std::sync::Arc;
// Mock storage for testing
@@ -174,49 +174,49 @@ fn test_heal_task_status_atomic_update() {
&self,
_bucket: &str,
_object: &str,
) -> rustfs_ahm::Result<Option<rustfs_ecstore::store_api::ObjectInfo>> {
) -> rustfs_heal::Result<Option<rustfs_ecstore::store_api::ObjectInfo>> {
Ok(None)
}
async fn get_object_data(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<Option<Vec<u8>>> {
async fn get_object_data(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<Option<Vec<u8>>> {
Ok(None)
}
async fn put_object_data(&self, _bucket: &str, _object: &str, _data: &[u8]) -> rustfs_ahm::Result<()> {
async fn put_object_data(&self, _bucket: &str, _object: &str, _data: &[u8]) -> rustfs_heal::Result<()> {
Ok(())
}
async fn delete_object(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<()> {
async fn delete_object(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<()> {
Ok(())
}
async fn verify_object_integrity(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<bool> {
async fn verify_object_integrity(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<bool> {
Ok(true)
}
async fn ec_decode_rebuild(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<Vec<u8>> {
async fn ec_decode_rebuild(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<Vec<u8>> {
Ok(vec![])
}
async fn get_disk_status(
&self,
_endpoint: &rustfs_ecstore::disk::endpoint::Endpoint,
) -> rustfs_ahm::Result<rustfs_ahm::heal::storage::DiskStatus> {
Ok(rustfs_ahm::heal::storage::DiskStatus::Ok)
) -> rustfs_heal::Result<rustfs_heal::heal::storage::DiskStatus> {
Ok(rustfs_heal::heal::storage::DiskStatus::Ok)
}
async fn format_disk(&self, _endpoint: &rustfs_ecstore::disk::endpoint::Endpoint) -> rustfs_ahm::Result<()> {
async fn format_disk(&self, _endpoint: &rustfs_ecstore::disk::endpoint::Endpoint) -> rustfs_heal::Result<()> {
Ok(())
}
async fn get_bucket_info(&self, _bucket: &str) -> rustfs_ahm::Result<Option<rustfs_ecstore::store_api::BucketInfo>> {
async fn get_bucket_info(&self, _bucket: &str) -> rustfs_heal::Result<Option<rustfs_ecstore::store_api::BucketInfo>> {
Ok(None)
}
async fn heal_bucket_metadata(&self, _bucket: &str) -> rustfs_ahm::Result<()> {
async fn heal_bucket_metadata(&self, _bucket: &str) -> rustfs_heal::Result<()> {
Ok(())
}
async fn list_buckets(&self) -> rustfs_ahm::Result<Vec<rustfs_ecstore::store_api::BucketInfo>> {
async fn list_buckets(&self) -> rustfs_heal::Result<Vec<rustfs_ecstore::store_api::BucketInfo>> {
Ok(vec![])
}
async fn object_exists(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<bool> {
async fn object_exists(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<bool> {
Ok(false)
}
async fn get_object_size(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<Option<u64>> {
async fn get_object_size(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<Option<u64>> {
Ok(None)
}
async fn get_object_checksum(&self, _bucket: &str, _object: &str) -> rustfs_ahm::Result<Option<String>> {
async fn get_object_checksum(&self, _bucket: &str, _object: &str) -> rustfs_heal::Result<Option<String>> {
Ok(None)
}
async fn heal_object(
@@ -225,23 +225,23 @@ fn test_heal_task_status_atomic_update() {
_object: &str,
_version_id: Option<&str>,
_opts: &rustfs_common::heal_channel::HealOpts,
) -> rustfs_ahm::Result<(rustfs_madmin::heal_commands::HealResultItem, Option<rustfs_ahm::Error>)> {
) -> rustfs_heal::Result<(rustfs_madmin::heal_commands::HealResultItem, Option<rustfs_heal::Error>)> {
Ok((rustfs_madmin::heal_commands::HealResultItem::default(), None))
}
async fn heal_bucket(
&self,
_bucket: &str,
_opts: &rustfs_common::heal_channel::HealOpts,
) -> rustfs_ahm::Result<rustfs_madmin::heal_commands::HealResultItem> {
) -> rustfs_heal::Result<rustfs_madmin::heal_commands::HealResultItem> {
Ok(rustfs_madmin::heal_commands::HealResultItem::default())
}
async fn heal_format(
&self,
_dry_run: bool,
) -> rustfs_ahm::Result<(rustfs_madmin::heal_commands::HealResultItem, Option<rustfs_ahm::Error>)> {
) -> rustfs_heal::Result<(rustfs_madmin::heal_commands::HealResultItem, Option<rustfs_heal::Error>)> {
Ok((rustfs_madmin::heal_commands::HealResultItem::default(), None))
}
async fn list_objects_for_heal(&self, _bucket: &str, _prefix: &str) -> rustfs_ahm::Result<Vec<String>> {
async fn list_objects_for_heal(&self, _bucket: &str, _prefix: &str) -> rustfs_heal::Result<Vec<String>> {
Ok(vec![])
}
async fn list_objects_for_heal_page(
@@ -249,11 +249,11 @@ fn test_heal_task_status_atomic_update() {
_bucket: &str,
_prefix: &str,
_continuation_token: Option<&str>,
) -> rustfs_ahm::Result<(Vec<String>, Option<String>, bool)> {
) -> rustfs_heal::Result<(Vec<String>, Option<String>, bool)> {
Ok((vec![], None, false))
}
async fn get_disk_for_resume(&self, _set_disk_id: &str) -> rustfs_ahm::Result<rustfs_ecstore::disk::DiskStore> {
Err(rustfs_ahm::Error::other("Not implemented in mock"))
async fn get_disk_for_resume(&self, _set_disk_id: &str) -> rustfs_heal::Result<rustfs_ecstore::disk::DiskStore> {
Err(rustfs_heal::Error::other("Not implemented in mock"))
}
}

View File

@@ -12,11 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_ahm::heal::{
manager::{HealConfig, HealManager},
storage::{ECStoreHealStorage, HealStorageAPI},
task::{HealOptions, HealPriority, HealRequest, HealTaskStatus, HealType},
};
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::{
disk::endpoint::Endpoint,
@@ -24,6 +19,11 @@ use rustfs_ecstore::{
store::ECStore,
store_api::{ObjectIO, ObjectOptions, PutObjReader, StorageAPI},
};
use rustfs_heal::heal::{
manager::{HealConfig, HealManager},
storage::{ECStoreHealStorage, HealStorageAPI},
task::{HealOptions, HealPriority, HealRequest, HealTaskStatus, HealType},
};
use serial_test::serial;
use std::{
path::PathBuf,
@@ -58,7 +58,7 @@ async fn setup_test_env() -> (Vec<PathBuf>, Arc<ECStore>, Arc<ECStoreHealStorage
}
// create temp dir as 4 disks with unique base dir
let test_base_dir = format!("/tmp/rustfs_ahm_heal_test_{}", uuid::Uuid::new_v4());
let test_base_dir = format!("/tmp/rustfs_heal_heal_test_{}", uuid::Uuid::new_v4());
let temp_dir = std::path::PathBuf::from(&test_base_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).await.ok();

View File

@@ -1,43 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Example demonstrating environment variable control of lock system
use rustfs_lock::{LockManager, get_global_lock_manager};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let manager = get_global_lock_manager();
println!("Lock system status: {}", if manager.is_disabled() { "DISABLED" } else { "ENABLED" });
match std::env::var("RUSTFS_ENABLE_LOCKS") {
Ok(value) => println!("RUSTFS_ENABLE_LOCKS set to: {value}"),
Err(_) => println!("RUSTFS_ENABLE_LOCKS not set (defaults to enabled)"),
}
// Test acquiring a lock
let result = manager.acquire_read_lock("test-bucket", "test-object", "test-owner").await;
match result {
Ok(guard) => {
println!("Lock acquired successfully! Disabled: {}", guard.is_disabled());
}
Err(e) => {
println!("Failed to acquire lock: {e:?}");
}
}
println!("Environment control example completed");
Ok(())
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use tokio::sync::RwLock;
@@ -21,23 +22,69 @@ use crate::{
LockResponse, LockStats, LockStatus, LockType, Result,
};
/// Local lock client using FastLock
#[derive(Debug, Clone)]
/// Default shard count for guard storage (must be power of 2)
const DEFAULT_GUARD_SHARD_COUNT: usize = 64;
/// Local lock client using FastLock with sharded guard storage for better concurrency
#[derive(Debug)]
pub struct LocalClient {
guard_storage: Arc<RwLock<HashMap<LockId, FastLockGuard>>>,
/// Sharded guard storage to reduce lock contention
guard_storage: Vec<Arc<RwLock<HashMap<LockId, FastLockGuard>>>>,
/// Mask for fast shard index calculation (shard_count - 1)
shard_mask: usize,
/// Optional lock manager (if None, uses global singleton)
manager: Option<Arc<GlobalLockManager>>,
}
impl LocalClient {
/// Create new local client
/// Create new local client with default shard count
pub fn new() -> Self {
Self::with_shard_count(DEFAULT_GUARD_SHARD_COUNT)
}
/// Create new local client with custom shard count
/// Shard count must be a power of 2 for efficient masking
pub fn with_shard_count(shard_count: usize) -> Self {
assert!(shard_count.is_power_of_two(), "Shard count must be power of 2");
let guard_storage: Vec<Arc<RwLock<HashMap<LockId, FastLockGuard>>>> =
(0..shard_count).map(|_| Arc::new(RwLock::new(HashMap::new()))).collect();
Self {
guard_storage: Arc::new(RwLock::new(HashMap::new())),
guard_storage,
shard_mask: shard_count - 1,
manager: None,
}
}
/// Get the global lock manager
/// Create new local client with a specific lock manager
/// This allows simulating multi-node environments where each node has its own lock backend
pub fn with_manager(manager: Arc<GlobalLockManager>) -> Self {
Self {
guard_storage: (0..DEFAULT_GUARD_SHARD_COUNT)
.map(|_| Arc::new(RwLock::new(HashMap::new())))
.collect(),
shard_mask: DEFAULT_GUARD_SHARD_COUNT - 1,
manager: Some(manager),
}
}
/// Get the lock manager (injected manager if available, otherwise global singleton)
pub fn get_lock_manager(&self) -> Arc<GlobalLockManager> {
crate::get_global_lock_manager()
self.manager.clone().unwrap_or_else(crate::get_global_lock_manager)
}
/// Get the shard index for a given lock ID
fn get_shard_index(&self, lock_id: &LockId) -> usize {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
lock_id.hash(&mut hasher);
(hasher.finish() as usize) & self.shard_mask
}
/// Get the shard for a given lock ID
fn get_shard(&self, lock_id: &LockId) -> &Arc<RwLock<HashMap<LockId, FastLockGuard>>> {
let index = self.get_shard_index(lock_id);
&self.guard_storage[index]
}
}
@@ -49,67 +96,30 @@ impl Default for LocalClient {
#[async_trait::async_trait]
impl LockClient for LocalClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse> {
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
let lock_manager = self.get_lock_manager();
let lock_request = crate::ObjectLockRequest::new_write("", request.resource.clone(), request.owner.clone())
.with_acquire_timeout(request.acquire_timeout);
let lock_request = match request.lock_type {
LockType::Exclusive => crate::ObjectLockRequest::new_write(request.resource.clone(), request.owner.clone())
.with_acquire_timeout(request.acquire_timeout),
LockType::Shared => crate::ObjectLockRequest::new_read(request.resource.clone(), request.owner.clone())
.with_acquire_timeout(request.acquire_timeout),
};
match lock_manager.acquire_lock(lock_request).await {
Ok(guard) => {
let lock_id = LockId::new_deterministic(&request.resource);
let lock_id = LockId::new_unique(&request.resource);
// Store guard for later release
let mut guards = self.guard_storage.write().await;
guards.insert(lock_id.clone(), guard);
{
let shard = self.get_shard(&lock_id);
let mut guards = shard.write().await;
guards.insert(lock_id.clone(), guard);
}
let lock_info = LockInfo {
id: lock_id,
resource: request.resource.clone(),
lock_type: LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
}
Err(crate::fast_lock::LockResult::Timeout) => {
Ok(LockResponse::failure("Lock acquisition timeout", request.acquire_timeout))
}
Err(crate::fast_lock::LockResult::Conflict {
current_owner,
current_mode,
}) => Ok(LockResponse::failure(
format!("Lock conflict: resource held by {current_owner} in {current_mode:?} mode"),
std::time::Duration::ZERO,
)),
Err(crate::fast_lock::LockResult::Acquired) => {
unreachable!("Acquired should not be an error")
}
}
}
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse> {
let lock_manager = self.get_lock_manager();
let lock_request = crate::ObjectLockRequest::new_read("", request.resource.clone(), request.owner.clone())
.with_acquire_timeout(request.acquire_timeout);
match lock_manager.acquire_lock(lock_request).await {
Ok(guard) => {
let lock_id = LockId::new_deterministic(&request.resource);
// Store guard for later release
let mut guards = self.guard_storage.write().await;
guards.insert(lock_id.clone(), guard);
let lock_info = LockInfo {
id: lock_id,
resource: request.resource.clone(),
lock_type: LockType::Shared,
lock_type: request.lock_type,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
@@ -138,7 +148,8 @@ impl LockClient for LocalClient {
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
let mut guards = self.guard_storage.write().await;
let shard = self.get_shard(lock_id);
let mut guards = shard.write().await;
if let Some(guard) = guards.remove(lock_id) {
// Guard automatically releases the lock when dropped
drop(guard);
@@ -159,7 +170,8 @@ impl LockClient for LocalClient {
}
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>> {
let guards = self.guard_storage.read().await;
let shard = self.get_shard(lock_id);
let guards = shard.read().await;
if let Some(guard) = guards.get(lock_id) {
// We have an active guard for this lock
let lock_type = match guard.mode() {
@@ -200,181 +212,3 @@ impl LockClient for LocalClient {
true
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::LockType;
#[tokio::test]
async fn test_local_client_acquire_exclusive() {
let client = LocalClient::new();
let resource_name = format!("test-resource-exclusive-{}", uuid::Uuid::new_v4());
let request = LockRequest::new(&resource_name, LockType::Exclusive, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_exclusive(&request).await.unwrap();
assert!(response.is_success());
// Clean up
if let Some(lock_info) = response.lock_info() {
let _ = client.release(&lock_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_acquire_shared() {
let client = LocalClient::new();
let resource_name = format!("test-resource-shared-{}", uuid::Uuid::new_v4());
let request = LockRequest::new(&resource_name, LockType::Shared, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_shared(&request).await.unwrap();
assert!(response.is_success());
// Clean up
if let Some(lock_info) = response.lock_info() {
let _ = client.release(&lock_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_release() {
let client = LocalClient::new();
let resource_name = format!("test-resource-release-{}", uuid::Uuid::new_v4());
// First acquire a lock
let request = LockRequest::new(&resource_name, LockType::Exclusive, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_exclusive(&request).await.unwrap();
assert!(response.is_success());
// Get the lock ID from the response
if let Some(lock_info) = response.lock_info() {
let result = client.release(&lock_info.id).await.unwrap();
assert!(result);
} else {
panic!("No lock info in response");
}
}
#[tokio::test]
async fn test_local_client_is_local() {
let client = LocalClient::new();
assert!(client.is_local().await);
}
#[tokio::test]
async fn test_local_client_read_write_lock_exclusion() {
let client = LocalClient::new();
let resource_name = format!("test-resource-exclusion-{}", uuid::Uuid::new_v4());
// First, acquire an exclusive lock
let exclusive_request = LockRequest::new(&resource_name, LockType::Exclusive, "exclusive-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let exclusive_response = client.acquire_exclusive(&exclusive_request).await.unwrap();
assert!(exclusive_response.is_success());
// Try to acquire a shared lock on the same resource - should fail
let shared_request = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let shared_response = client.acquire_shared(&shared_request).await.unwrap();
assert!(!shared_response.is_success(), "Shared lock should fail when exclusive lock exists");
// Clean up exclusive lock
if let Some(exclusive_info) = exclusive_response.lock_info() {
let _ = client.release(&exclusive_info.id).await;
}
// Now shared lock should succeed
let shared_request2 = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let shared_response2 = client.acquire_shared(&shared_request2).await.unwrap();
assert!(
shared_response2.is_success(),
"Shared lock should succeed after exclusive lock is released"
);
// Clean up
if let Some(shared_info) = shared_response2.lock_info() {
let _ = client.release(&shared_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_read_write_lock_distinction() {
let client = LocalClient::new();
let resource_name = format!("test-resource-rw-{}", uuid::Uuid::new_v4());
// Test exclusive lock
let exclusive_request = LockRequest::new(&resource_name, LockType::Exclusive, "exclusive-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let exclusive_response = client.acquire_exclusive(&exclusive_request).await.unwrap();
assert!(exclusive_response.is_success());
if let Some(exclusive_info) = exclusive_response.lock_info() {
assert_eq!(exclusive_info.lock_type, LockType::Exclusive);
// Check status should return correct lock type
let status = client.check_status(&exclusive_info.id).await.unwrap();
assert!(status.is_some());
assert_eq!(status.unwrap().lock_type, LockType::Exclusive);
// Release exclusive lock
let result = client.release(&exclusive_info.id).await.unwrap();
assert!(result);
}
// Test shared lock
let shared_request = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let shared_response = client.acquire_shared(&shared_request).await.unwrap();
assert!(shared_response.is_success());
if let Some(shared_info) = shared_response.lock_info() {
assert_eq!(shared_info.lock_type, LockType::Shared);
// Check status should return correct lock type
let status = client.check_status(&shared_info.id).await.unwrap();
assert!(status.is_some());
assert_eq!(status.unwrap().lock_type, LockType::Shared);
// Release shared lock
let result = client.release(&shared_info.id).await.unwrap();
assert!(result);
}
}
#[tokio::test]
async fn test_multiple_local_clients_exclusive_mutex() {
let client1 = LocalClient::new();
let client2 = LocalClient::new();
let resource_name = format!("test-multi-client-mutex-{}", uuid::Uuid::new_v4());
// client1 acquire exclusive lock
let req1 = LockRequest::new(&resource_name, LockType::Exclusive, "owner1")
.with_acquire_timeout(std::time::Duration::from_millis(50));
let resp1 = client1.acquire_exclusive(&req1).await.unwrap();
assert!(resp1.is_success(), "client1 should acquire exclusive lock");
// client2 try to acquire exclusive lock, should fail
let req2 = LockRequest::new(&resource_name, LockType::Exclusive, "owner2")
.with_acquire_timeout(std::time::Duration::from_millis(50));
let resp2 = client2.acquire_exclusive(&req2).await.unwrap();
assert!(!resp2.is_success(), "client2 should not acquire exclusive lock while client1 holds it");
// client1 release lock
if let Some(lock_info) = resp1.lock_info() {
let _ = client1.release(&lock_info.id).await;
}
// client2 try again, should succeed
let resp3 = client2.acquire_exclusive(&req2).await.unwrap();
assert!(resp3.is_success(), "client2 should acquire exclusive lock after client1 releases it");
// clean up
if let Some(lock_info) = resp3.lock_info() {
let _ = client2.release(&lock_info.id).await;
}
}
}

View File

@@ -15,26 +15,15 @@
pub mod local;
// pub mod remote;
use crate::{LockId, LockInfo, LockRequest, LockResponse, LockStats, LockType, Result};
use crate::{LockId, LockInfo, LockRequest, LockResponse, LockStats, Result};
use async_trait::async_trait;
use std::sync::Arc;
/// Lock client trait
#[async_trait]
pub trait LockClient: Send + Sync + std::fmt::Debug {
/// Acquire exclusive lock
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse>;
/// Acquire shared lock
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse>;
/// Acquire lock (generic method)
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
match request.lock_type {
LockType::Exclusive => self.acquire_exclusive(request).await,
LockType::Shared => self.acquire_shared(request).await,
}
}
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse>;
/// Release lock
async fn release(&self, lock_id: &LockId) -> Result<bool>;
@@ -75,36 +64,3 @@ impl ClientFactory {
// Arc::new(remote::RemoteClient::new(endpoint))
// }
}
#[cfg(test)]
mod tests {
use super::*;
use crate::LockType;
#[tokio::test]
async fn test_local_client_basic_operations() {
let client = ClientFactory::create_local();
let request = LockRequest::new("test-resource", LockType::Exclusive, "test-owner");
// Test lock acquisition
let response = client.acquire_exclusive(&request).await;
assert!(response.is_ok());
if let Ok(response) = response
&& response.success
{
let lock_info = response.lock_info.unwrap();
// Test status check
let status = client.check_status(&lock_info.id).await;
assert!(status.is_ok());
assert!(status.unwrap().is_some());
// Test lock release
let released = client.release(&lock_info.id).await;
assert!(released.is_ok());
assert!(released.unwrap());
}
}
}

View File

@@ -0,0 +1,367 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{
ObjectKey,
client::LockClient,
error::{LockError, Result},
types::{LockId, LockInfo, LockRequest, LockResponse, LockStatus, LockType},
};
use std::sync::{Arc, LazyLock};
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::warn;
use uuid::Uuid;
/// Generate a new aggregate lock ID for multiple client locks
fn generate_aggregate_lock_id(resource: &ObjectKey) -> LockId {
LockId {
resource: resource.clone(),
uuid: Uuid::new_v4().to_string(),
}
}
#[derive(Debug, Clone)]
struct UnlockJob {
/// Entries to release: each (LockId, client) pair will be released independently.
entries: Vec<(LockId, Arc<dyn LockClient>)>,
}
#[derive(Debug)]
struct UnlockRuntime {
tx: mpsc::Sender<UnlockJob>,
}
// Global unlock runtime with background worker
static UNLOCK_RUNTIME: LazyLock<UnlockRuntime> = LazyLock::new(|| {
// Larger buffer to reduce contention during bursts
let (tx, mut rx) = mpsc::channel::<UnlockJob>(8192);
// Spawn background worker when first used; assumes a Tokio runtime is available
tokio::spawn(async move {
while let Some(job) = rx.recv().await {
// Best-effort release across all (LockId, client) entries.
let mut any_ok = false;
for (lock_id, client) in job.entries.into_iter() {
if client.release(&lock_id).await.unwrap_or(false) {
any_ok = true;
}
}
if !any_ok {
tracing::warn!("DistributedLockGuard background release failed for one or more entries");
} else {
tracing::debug!("DistributedLockGuard background released one or more entries");
}
}
});
UnlockRuntime { tx }
});
/// A RAII guard for distributed locks that releases the lock asynchronously when dropped.
#[derive(Debug)]
pub struct DistributedLockGuard {
/// The public-facing lock id. For multi-client scenarios this is typically
/// an aggregate id; for single-client it is the only id.
lock_id: LockId,
/// All underlying (LockId, client) entries that should be released when the
/// guard is dropped.
entries: Vec<(LockId, Arc<dyn LockClient>)>,
/// If true, Drop will not try to release (used if user manually released).
disarmed: bool,
}
impl DistributedLockGuard {
/// Create a new guard.
///
/// - `lock_id` is the id returned to the caller (`lock_id()`).
/// - `entries` is the full list of underlying (LockId, client) pairs
/// that should be released when this guard is dropped.
pub(crate) fn new(lock_id: LockId, entries: Vec<(LockId, Arc<dyn LockClient>)>) -> Self {
Self {
lock_id,
entries,
disarmed: false,
}
}
/// Get the lock id associated with this guard
pub fn lock_id(&self) -> &LockId {
&self.lock_id
}
/// Manually disarm the guard so dropping it won't release the lock.
/// Call this if you explicitly released the lock elsewhere.
pub fn disarm(&mut self) {
self.disarmed = true;
}
/// Check if the guard has been disarmed (lock already released)
pub fn is_disarmed(&self) -> bool {
self.disarmed
}
/// Manually release the lock early.
/// This sends a release job to the background worker and then disarms the guard
/// to prevent double-release on drop.
/// Returns true if the lock was released (or was already released), false otherwise.
pub fn release(&mut self) -> bool {
if self.disarmed {
// Lock was already released, return true to indicate lock is in released state
return true;
}
let job = UnlockJob {
entries: self.entries.clone(),
};
// Try a non-blocking send to avoid panics
let success = if let Err(err) = UNLOCK_RUNTIME.tx.try_send(job) {
// Channel full or closed; best-effort fallback: spawn a detached task
let entries = self.entries.clone();
tracing::warn!(
"DistributedLockGuard channel send failed ({}), spawning fallback unlock task for {} entries",
err,
entries.len()
);
// If runtime is not available, this will panic; but in RustFS we are inside Tokio contexts.
let handle = tokio::spawn(async move {
let futures_iter = entries
.into_iter()
.map(|(lock_id, client)| async move { client.release(&lock_id).await.unwrap_or(false) });
let _ = futures::future::join_all(futures_iter).await;
});
// Explicitly drop the JoinHandle to acknowledge detaching the task.
drop(handle);
true // Consider it successful even if we had to use fallback
} else {
true
};
// Disarm to prevent double-release on drop
self.disarmed = true;
success
}
}
impl Drop for DistributedLockGuard {
fn drop(&mut self) {
// Call release() to handle the actual release logic
// If already disarmed, release() will return early
// Setting disarmed in release() is harmless here since we're dropping anyway
let _ = self.release();
}
}
/// Distributed lock handler for distributed use cases
/// Uses quorum-based acquisition and aggregate lock ID mapping
#[derive(Debug)]
pub struct DistributedLock {
/// Lock clients for this namespace
clients: Vec<Arc<dyn LockClient>>,
/// Namespace identifier
namespace: String,
/// Quorum size for operations (majority for distributed)
quorum: usize,
}
impl DistributedLock {
/// Create new distributed lock
pub fn new(namespace: String, clients: Vec<Arc<dyn LockClient>>, quorum: usize) -> Self {
let q = if clients.len() <= 1 {
1
} else {
quorum.clamp(1, clients.len())
};
Self {
clients,
namespace,
quorum: q,
}
}
/// Get namespace identifier
pub fn namespace(&self) -> &str {
&self.namespace
}
/// Get resource key for this namespace
pub fn get_resource_key(&self, resource: &ObjectKey) -> String {
format!("{}:{}", self.namespace, resource)
}
/// Get clients (for health check and stats)
pub(crate) fn clients(&self) -> &[Arc<dyn LockClient>] {
&self.clients
}
/// Acquire a lock and return a RAII guard
pub(crate) async fn acquire_guard(&self, request: &LockRequest) -> Result<Option<DistributedLockGuard>> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
let (resp, individual_locks) = self.acquire_lock_quorum(request).await?;
if resp.success {
// Use aggregate lock_id from LockResponse's LockInfo
// The aggregate id is what we expose to callers; individual_locks carries
// the real (LockId, client) pairs that must be released.
let aggregate_lock_id = resp
.lock_info
.as_ref()
.map(|info| info.id.clone())
.unwrap_or_else(|| LockId::new_unique(&request.resource));
Ok(Some(DistributedLockGuard::new(aggregate_lock_id, individual_locks)))
} else {
// Check if it's a timeout or quorum failure
if let Some(error_msg) = &resp.error {
warn!("acquire_lock_quorum error: {}", error_msg);
if error_msg.contains("quorum") {
// This is a quorum failure - return appropriate error
// Extract achieved count from error message or use individual_locks.len()
let achieved = individual_locks.len();
Err(LockError::QuorumNotReached {
required: self.quorum,
achieved,
})
} else if error_msg.contains("timeout") || resp.wait_time >= request.acquire_timeout {
// This is a timeout - return None so caller can convert to timeout error
Ok(None)
} else {
// Other failure - return None for backward compatibility
Ok(None)
}
} else {
Ok(None)
}
}
}
/// Convenience: acquire exclusive lock as a guard
pub async fn lock_guard(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
ttl: Duration,
) -> Result<Option<DistributedLockGuard>> {
let req = LockRequest::new(resource, LockType::Exclusive, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
self.acquire_guard(&req).await
}
/// Convenience: acquire shared lock as a guard
pub async fn rlock_guard(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
ttl: Duration,
) -> Result<Option<DistributedLockGuard>> {
let req = LockRequest::new(resource, LockType::Shared, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
self.acquire_guard(&req).await
}
/// Quorum-based lock acquisition: success if at least `self.quorum` clients succeed.
/// Collects all individual lock_ids from successful clients and creates an aggregate lock_id.
/// Returns the LockResponse with aggregate lock_id and individual lock mappings.
async fn acquire_lock_quorum(&self, request: &LockRequest) -> Result<(LockResponse, Vec<(LockId, Arc<dyn LockClient>)>)> {
let futs: Vec<_> = self
.clients
.iter()
.enumerate()
.map(|(idx, client)| async move { (idx, client.acquire_lock(request).await) })
.collect();
let results = futures::future::join_all(futs).await;
// Store all individual lock_ids and their corresponding clients
let mut individual_locks: Vec<(LockId, Arc<dyn LockClient>)> = Vec::new();
for (idx, result) in results {
match result {
Ok(resp) => {
if resp.success {
// Collect individual lock_id and client for each successful acquisition
if let Some(lock_info) = &resp.lock_info
&& idx < self.clients.len()
{
// Save the individual lock_id returned by each client
individual_locks.push((lock_info.id.clone(), self.clients[idx].clone()));
}
} else {
tracing::warn!(
"Failed to acquire lock on client from response: {}, error: {}",
idx,
resp.error.unwrap_or_else(|| "unknown error".to_string())
);
}
}
Err(e) => {
tracing::warn!("Failed to acquire lock on client {}: {}", idx, e);
}
}
}
if individual_locks.len() >= self.quorum {
// Generate a new aggregate lock_id for multiple client locks
let aggregate_lock_id = generate_aggregate_lock_id(&request.resource);
tracing::debug!(
"Generated aggregate lock_id {} for {} individual locks on resource {}",
aggregate_lock_id,
individual_locks.len(),
request.resource
);
let resp = LockResponse::success(
LockInfo {
id: aggregate_lock_id,
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
Duration::ZERO,
);
Ok((resp, individual_locks))
} else {
// Rollback: release all locks that were successfully acquired
let rollback_count = individual_locks.len();
for (individual_lock_id, client) in individual_locks {
if let Err(e) = client.release(&individual_lock_id).await {
tracing::warn!("Failed to rollback lock {} on client: {}", individual_lock_id, e);
}
}
let resp = LockResponse::failure(
format!("Failed to acquire quorum: {}/{} required", rollback_count, self.quorum),
Duration::ZERO,
);
Ok((resp, Vec::new()))
}
}
}

View File

@@ -51,51 +51,22 @@ impl DisabledLockManager {
}
/// Always succeeds - returns a no-op guard
pub async fn acquire_read_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_read(bucket, object, owner);
pub async fn acquire_read_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>>) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_read(key, owner);
self.acquire_lock(request).await
}
/// Always succeeds - returns a no-op guard
pub async fn acquire_read_lock_versioned(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
version: impl Into<Arc<str>>,
key: ObjectKey,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_read(bucket, object, owner).with_version(version);
let request = ObjectLockRequest::new_write(key, owner);
self.acquire_lock(request).await
}
/// Always succeeds - returns a no-op guard
pub async fn acquire_write_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_write(bucket, object, owner);
self.acquire_lock(request).await
}
/// Always succeeds - returns a no-op guard
pub async fn acquire_write_lock_versioned(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
version: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_write(bucket, object, owner).with_version(version);
self.acquire_lock(request).await
}
/// Always succeeds - all locks acquired
pub async fn acquire_locks_batch(&self, batch_request: BatchLockRequest) -> BatchLockResult {
let successful_locks: Vec<ObjectKey> = batch_request.requests.iter().map(|req| req.key.clone()).collect();
@@ -161,42 +132,12 @@ impl LockManager for DisabledLockManager {
self.acquire_lock(request).await
}
async fn acquire_read_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_read_lock(bucket, object, owner).await
async fn acquire_read_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>> + Send) -> Result<FastLockGuard, LockResult> {
self.acquire_read_lock(key, owner).await
}
async fn acquire_read_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_read_lock_versioned(bucket, object, version, owner).await
}
async fn acquire_write_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_write_lock(bucket, object, owner).await
}
async fn acquire_write_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_write_lock_versioned(bucket, object, version, owner).await
async fn acquire_write_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>> + Send) -> Result<FastLockGuard, LockResult> {
self.acquire_write_lock(key, owner).await
}
async fn acquire_locks_batch(&self, batch_request: BatchLockRequest) -> BatchLockResult {
@@ -235,63 +176,3 @@ impl LockManager for DisabledLockManager {
true
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_disabled_manager_basic_operations() {
let manager = DisabledLockManager::new();
// All operations should succeed immediately
let read_guard = manager
.acquire_read_lock("bucket", "object", "owner1")
.await
.expect("Disabled manager should always succeed");
let write_guard = manager
.acquire_write_lock("bucket", "object", "owner2")
.await
.expect("Disabled manager should always succeed");
// Guards should indicate they are disabled
assert!(read_guard.is_disabled());
assert!(write_guard.is_disabled());
}
#[tokio::test]
async fn test_disabled_manager_batch_operations() {
let manager = DisabledLockManager::new();
let batch = BatchLockRequest::new("owner")
.add_read_lock("bucket", "obj1")
.add_write_lock("bucket", "obj2")
.with_all_or_nothing(true);
let result = manager.acquire_locks_batch(batch).await;
assert!(result.all_acquired);
assert_eq!(result.successful_locks.len(), 2);
assert!(result.failed_locks.is_empty());
}
#[tokio::test]
async fn test_disabled_manager_metrics() {
let manager = DisabledLockManager::new();
// Metrics should indicate empty/disabled state
let metrics = manager.get_metrics();
assert!(metrics.is_empty());
assert_eq!(manager.total_lock_count(), 0);
assert!(manager.get_pool_stats().is_empty());
}
#[tokio::test]
async fn test_disabled_manager_cleanup() {
let manager = DisabledLockManager::new();
// Cleanup should be no-op
assert_eq!(manager.cleanup_expired().await, 0);
assert_eq!(manager.cleanup_expired_traditional().await, 0);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,255 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Example integration of FastObjectLockManager in set_disk.rs
// This shows how to replace the current slow lock system
use crate::fast_lock::{BatchLockRequest, FastObjectLockManager, ObjectLockRequest};
use std::sync::Arc;
use std::time::Duration;
/// Example integration into SetDisks structure
pub struct SetDisksWithFastLock {
/// Replace the old namespace_lock with fast lock manager
pub fast_lock_manager: Arc<FastObjectLockManager>,
pub locker_owner: String,
// ... other fields remain the same
}
impl SetDisksWithFastLock {
/// Example: Replace get_object_reader with fast locking
pub async fn get_object_reader_fast(
&self,
bucket: &str,
object: &str,
version: Option<&str>,
// ... other parameters
) -> Result<(), Box<dyn std::error::Error>> {
// Fast path: Try to acquire read lock immediately
let _read_guard = if let Some(v) = version {
// Version-specific lock
self.fast_lock_manager
.acquire_read_lock_versioned(bucket, object, v, self.locker_owner.as_str())
.await
.map_err(|_| "Lock acquisition failed")?
} else {
// Latest version lock
self.fast_lock_manager
.acquire_read_lock(bucket, object, self.locker_owner.as_str())
.await
.map_err(|_| "Lock acquisition failed")?
};
// Critical section: Read object
// The lock is automatically released when _read_guard goes out of scope
// ... actual read operation logic
Ok(())
}
/// Example: Replace put_object with fast locking
pub async fn put_object_fast(
&self,
bucket: &str,
object: &str,
version: Option<&str>,
// ... other parameters
) -> Result<(), Box<dyn std::error::Error>> {
// Acquire exclusive write lock with timeout
let request = ObjectLockRequest::new_write(bucket, object, self.locker_owner.as_str())
.with_acquire_timeout(Duration::from_secs(5))
.with_lock_timeout(Duration::from_secs(30));
let request = if let Some(v) = version {
request.with_version(v)
} else {
request
};
let _write_guard = self
.fast_lock_manager
.acquire_lock(request)
.await
.map_err(|_| "Lock acquisition failed")?;
// Critical section: Write object
// ... actual write operation logic
Ok(())
// Lock automatically released when _write_guard drops
}
/// Example: Replace delete_objects with batch fast locking
pub async fn delete_objects_fast(
&self,
bucket: &str,
objects: Vec<(&str, Option<&str>)>, // (object_name, version)
) -> Result<Vec<String>, Box<dyn std::error::Error>> {
// Create batch request for atomic locking
let mut batch = BatchLockRequest::new(self.locker_owner.as_str()).with_all_or_nothing(true); // Either lock all or fail
// Add all objects to batch (sorted internally to prevent deadlocks)
for (object, version) in &objects {
let mut request = ObjectLockRequest::new_write(bucket, *object, self.locker_owner.as_str());
if let Some(v) = version {
request = request.with_version(*v);
}
batch.requests.push(request);
}
// Acquire all locks atomically
let batch_result = self.fast_lock_manager.acquire_locks_batch(batch).await;
if !batch_result.all_acquired {
return Err("Failed to acquire all locks for batch delete".into());
}
// Critical section: Delete all objects
let mut deleted = Vec::new();
for (object, _version) in objects {
// ... actual delete operation logic
deleted.push(object.to_string());
}
// All locks automatically released when guards go out of scope
Ok(deleted)
}
/// Example: Health check integration
pub fn get_lock_health(&self) -> crate::fast_lock::metrics::AggregatedMetrics {
self.fast_lock_manager.get_metrics()
}
/// Example: Cleanup integration
pub async fn cleanup_expired_locks(&self) -> usize {
self.fast_lock_manager.cleanup_expired().await
}
}
/// Performance comparison demonstration
pub mod performance_comparison {
use super::*;
use std::time::Instant;
pub async fn benchmark_fast_vs_old() {
let fast_manager = Arc::new(FastObjectLockManager::new());
let owner = "benchmark_owner";
// Benchmark fast lock acquisition
let start = Instant::now();
let mut guards = Vec::new();
for i in 0..1000 {
let guard = fast_manager
.acquire_write_lock("bucket", format!("object_{i}"), owner)
.await
.expect("Failed to acquire fast lock");
guards.push(guard);
}
let fast_duration = start.elapsed();
println!("Fast lock: 1000 acquisitions in {fast_duration:?}");
// Release all
drop(guards);
// Compare with metrics
let metrics = fast_manager.get_metrics();
println!("Fast path rate: {:.2}%", metrics.shard_metrics.fast_path_rate() * 100.0);
println!("Average wait time: {:?}", metrics.shard_metrics.avg_wait_time());
println!("Total operations/sec: {:.2}", metrics.ops_per_second());
}
}
/// Migration guide from old to new system
pub mod migration_guide {
/*
Step-by-step migration from old lock system:
1. Replace namespace_lock field:
OLD: pub namespace_lock: Arc<rustfs_lock::NamespaceLock>
NEW: pub fast_lock_manager: Arc<FastObjectLockManager>
2. Replace lock acquisition:
OLD: self.namespace_lock.lock_guard(object, &self.locker_owner, timeout, ttl).await?
NEW: self.fast_lock_manager.acquire_write_lock(bucket, object, &self.locker_owner).await?
3. Replace read lock acquisition:
OLD: self.namespace_lock.rlock_guard(object, &self.locker_owner, timeout, ttl).await?
NEW: self.fast_lock_manager.acquire_read_lock(bucket, object, &self.locker_owner).await?
4. Add version support where needed:
NEW: self.fast_lock_manager.acquire_write_lock_versioned(bucket, object, version, owner).await?
5. Replace batch operations:
OLD: Multiple individual lock_guard calls in loop
NEW: Single BatchLockRequest with all objects
6. Remove manual lock release (RAII handles it automatically)
OLD: guard.disarm() or explicit release
NEW: Just let guard go out of scope
Expected performance improvements:
- 10-50x faster lock acquisition
- 90%+ fast path success rate
- Sub-millisecond lock operations
- No deadlock issues with batch operations
- Automatic cleanup and monitoring
*/
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_integration_example() {
let fast_manager = Arc::new(FastObjectLockManager::new());
let set_disks = SetDisksWithFastLock {
fast_lock_manager: fast_manager,
locker_owner: "test_owner".to_string(),
};
// Test read operation
assert!(set_disks.get_object_reader_fast("bucket", "object", None).await.is_ok());
// Test write operation
assert!(set_disks.put_object_fast("bucket", "object", Some("v1")).await.is_ok());
// Test batch delete
let objects = vec![("obj1", None), ("obj2", Some("v1"))];
let result = set_disks.delete_objects_fast("bucket", objects).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_version_locking() {
let fast_manager = Arc::new(FastObjectLockManager::new());
// Should be able to lock different versions simultaneously
let guard_v1 = fast_manager
.acquire_write_lock_versioned("bucket", "object", "v1", "owner1")
.await
.expect("Failed to lock v1");
let guard_v2 = fast_manager
.acquire_write_lock_versioned("bucket", "object", "v2", "owner2")
.await
.expect("Failed to lock v2");
// Both locks should coexist
assert!(!guard_v1.is_released());
assert!(!guard_v2.is_released());
}
}

View File

@@ -1,166 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Integration tests for performance optimizations
#[cfg(test)]
mod tests {
use crate::fast_lock::FastObjectLockManager;
use tokio::time::Duration;
#[tokio::test]
async fn test_object_pool_integration() {
let manager = FastObjectLockManager::new();
// Create many locks to test pool efficiency
let mut guards = Vec::new();
for i in 0..100 {
let bucket = format!("test-bucket-{}", i % 10); // Reuse some bucket names
let object = format!("test-object-{i}");
let guard = manager
.acquire_write_lock(bucket.as_str(), object.as_str(), "test-owner")
.await
.expect("Failed to acquire lock");
guards.push(guard);
}
// Drop all guards to return objects to pool
drop(guards);
// Wait a moment for cleanup
tokio::time::sleep(Duration::from_millis(100)).await;
// Get pool statistics from all shards
let pool_stats = manager.get_pool_stats();
let (hits, misses, releases, pool_size) = pool_stats.iter().fold((0, 0, 0, 0), |acc, stats| {
(acc.0 + stats.0, acc.1 + stats.1, acc.2 + stats.2, acc.3 + stats.3)
});
let hit_rate = if hits + misses > 0 {
hits as f64 / (hits + misses) as f64
} else {
0.0
};
println!("Pool stats - Hits: {hits}, Misses: {misses}, Releases: {releases}, Pool size: {pool_size}");
println!("Hit rate: {:.2}%", hit_rate * 100.0);
// We should see some pool activity
assert!(hits + misses > 0, "Pool should have been used");
}
#[tokio::test]
async fn test_optimized_notification_system() {
let manager = FastObjectLockManager::new();
// Test that notifications work by measuring timing
let start = std::time::Instant::now();
// Acquire two read locks on different objects (should be fast)
let guard1 = manager
.acquire_read_lock("bucket", "object1", "reader1")
.await
.expect("Failed to acquire first read lock");
let guard2 = manager
.acquire_read_lock("bucket", "object2", "reader2")
.await
.expect("Failed to acquire second read lock");
let duration = start.elapsed();
println!("Two read locks on different objects took: {duration:?}");
// Should be very fast since no contention
assert!(duration < Duration::from_millis(10), "Read locks should be fast with no contention");
drop(guard1);
drop(guard2);
// Test same object contention
let start = std::time::Instant::now();
let guard1 = manager
.acquire_read_lock("bucket", "same-object", "reader1")
.await
.expect("Failed to acquire first read lock on same object");
let guard2 = manager
.acquire_read_lock("bucket", "same-object", "reader2")
.await
.expect("Failed to acquire second read lock on same object");
let duration = start.elapsed();
println!("Two read locks on same object took: {duration:?}");
// Should still be fast since read locks are compatible
assert!(duration < Duration::from_millis(10), "Compatible read locks should be fast");
drop(guard1);
drop(guard2);
}
#[tokio::test]
async fn test_fast_path_optimization() {
let manager = FastObjectLockManager::new();
// First acquisition should be fast path
let start = std::time::Instant::now();
let guard1 = manager
.acquire_read_lock("bucket", "object", "reader1")
.await
.expect("Failed to acquire first read lock");
let first_duration = start.elapsed();
// Second read lock should also be fast path
let start = std::time::Instant::now();
let guard2 = manager
.acquire_read_lock("bucket", "object", "reader2")
.await
.expect("Failed to acquire second read lock");
let second_duration = start.elapsed();
println!("First lock: {first_duration:?}, Second lock: {second_duration:?}");
// Both should be very fast (sub-millisecond typically)
assert!(first_duration < Duration::from_millis(10));
assert!(second_duration < Duration::from_millis(10));
drop(guard1);
drop(guard2);
}
#[tokio::test]
async fn test_batch_operations_optimization() {
let manager = FastObjectLockManager::new();
// Test batch operation with sorted keys
let batch = crate::fast_lock::BatchLockRequest::new("batch-owner")
.add_read_lock("bucket", "obj1")
.add_read_lock("bucket", "obj2")
.add_write_lock("bucket", "obj3")
.with_all_or_nothing(false);
let start = std::time::Instant::now();
let result = manager.acquire_locks_batch(batch).await;
let duration = start.elapsed();
println!("Batch operation took: {duration:?}");
assert!(result.all_acquired, "All locks should be acquired");
assert_eq!(result.successful_locks.len(), 3);
assert!(result.failed_locks.is_empty());
// Batch should be reasonably fast
assert!(duration < Duration::from_millis(100));
}
}

View File

@@ -77,100 +77,54 @@ impl FastObjectLockManager {
}
/// Acquire shared (read) lock
pub async fn acquire_read_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_read(bucket, object, owner);
pub async fn acquire_read_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>>) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_read(key, owner);
self.acquire_lock(request).await
}
/// Acquire shared (read) lock for specific version
pub async fn acquire_read_lock_versioned(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
version: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_read(bucket, object, owner).with_version(version);
self.acquire_lock(request).await
}
/// Acquire exclusive (write) lock
pub async fn acquire_write_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
// let bucket = bucket.into();
// let object = object.into();
// let owner = owner.into();
// error!("acquire_write_lock: bucket={:?}, object={:?}, owner={:?}", bucket, object, owner);
let request = ObjectLockRequest::new_write(bucket, object, owner);
self.acquire_lock(request).await
}
/// Acquire exclusive (write) lock for specific version
pub async fn acquire_write_lock_versioned(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
version: impl Into<Arc<str>>,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_write(bucket, object, owner).with_version(version);
pub async fn acquire_write_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>>) -> Result<FastLockGuard, LockResult> {
let request = ObjectLockRequest::new_write(key, owner);
self.acquire_lock(request).await
}
/// Acquire high-priority read lock - optimized for database queries
pub async fn acquire_high_priority_read_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
key: ObjectKey,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request =
ObjectLockRequest::new_read(bucket, object, owner).with_priority(crate::fast_lock::types::LockPriority::High);
let request = ObjectLockRequest::new_read(key, owner).with_priority(crate::fast_lock::types::LockPriority::High);
self.acquire_lock(request).await
}
/// Acquire high-priority write lock - optimized for database queries
pub async fn acquire_high_priority_write_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
key: ObjectKey,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request =
ObjectLockRequest::new_write(bucket, object, owner).with_priority(crate::fast_lock::types::LockPriority::High);
let request = ObjectLockRequest::new_write(key, owner).with_priority(crate::fast_lock::types::LockPriority::High);
self.acquire_lock(request).await
}
/// Acquire critical priority read lock - for system operations
pub async fn acquire_critical_read_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
key: ObjectKey,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request =
ObjectLockRequest::new_read(bucket, object, owner).with_priority(crate::fast_lock::types::LockPriority::Critical);
let request = ObjectLockRequest::new_read(key, owner).with_priority(crate::fast_lock::types::LockPriority::Critical);
self.acquire_lock(request).await
}
/// Acquire critical priority write lock - for system operations
pub async fn acquire_critical_write_lock(
&self,
bucket: impl Into<Arc<str>>,
object: impl Into<Arc<str>>,
key: ObjectKey,
owner: impl Into<Arc<str>>,
) -> Result<FastLockGuard, LockResult> {
let request =
ObjectLockRequest::new_write(bucket, object, owner).with_priority(crate::fast_lock::types::LockPriority::Critical);
let request = ObjectLockRequest::new_write(key, owner).with_priority(crate::fast_lock::types::LockPriority::Critical);
self.acquire_lock(request).await
}
@@ -440,42 +394,12 @@ impl LockManager for FastObjectLockManager {
self.acquire_lock(request).await
}
async fn acquire_read_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_read_lock(bucket, object, owner).await
async fn acquire_read_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>> + Send) -> Result<FastLockGuard, LockResult> {
self.acquire_read_lock(key, owner).await
}
async fn acquire_read_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_read_lock_versioned(bucket, object, version, owner).await
}
async fn acquire_write_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_write_lock(bucket, object, owner).await
}
async fn acquire_write_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult> {
self.acquire_write_lock_versioned(bucket, object, version, owner).await
async fn acquire_write_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>> + Send) -> Result<FastLockGuard, LockResult> {
self.acquire_write_lock(key, owner).await
}
async fn acquire_locks_batch(&self, batch_request: BatchLockRequest) -> BatchLockResult {
@@ -514,146 +438,3 @@ impl LockManager for FastObjectLockManager {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::time::Duration;
#[tokio::test]
async fn test_manager_basic_operations() {
let manager = FastObjectLockManager::new();
// Test read lock
let read_guard = manager
.acquire_read_lock("bucket", "object", "owner1")
.await
.expect("Failed to acquire read lock");
// Should be able to acquire another read lock
let read_guard2 = manager
.acquire_read_lock("bucket", "object", "owner2")
.await
.expect("Failed to acquire second read lock");
drop(read_guard);
drop(read_guard2);
// Test write lock
let write_guard = manager
.acquire_write_lock("bucket", "object", "owner1")
.await
.expect("Failed to acquire write lock");
drop(write_guard);
}
#[tokio::test]
async fn test_manager_contention() {
let manager = Arc::new(FastObjectLockManager::new());
// Acquire write lock
let write_guard = manager
.acquire_write_lock("bucket", "object", "owner1")
.await
.expect("Failed to acquire write lock");
// Try to acquire read lock (should timeout)
let manager_clone = manager.clone();
let read_result =
tokio::time::timeout(Duration::from_millis(100), manager_clone.acquire_read_lock("bucket", "object", "owner2")).await;
assert!(read_result.is_err()); // Should timeout
drop(write_guard);
// Now read lock should succeed
let read_guard = manager
.acquire_read_lock("bucket", "object", "owner2")
.await
.expect("Failed to acquire read lock after write lock released");
drop(read_guard);
}
#[tokio::test]
async fn test_versioned_locks() {
let manager = FastObjectLockManager::new();
// Acquire lock on version v1
let v1_guard = manager
.acquire_write_lock_versioned("bucket", "object", "v1", "owner1")
.await
.expect("Failed to acquire v1 lock");
// Should be able to acquire lock on version v2 simultaneously
let v2_guard = manager
.acquire_write_lock_versioned("bucket", "object", "v2", "owner2")
.await
.expect("Failed to acquire v2 lock");
drop(v1_guard);
drop(v2_guard);
}
#[tokio::test]
async fn test_batch_operations() {
let manager = FastObjectLockManager::new();
let batch = BatchLockRequest::new("owner")
.add_read_lock("bucket", "obj1")
.add_write_lock("bucket", "obj2")
.with_all_or_nothing(true);
let result = manager.acquire_locks_batch(batch).await;
assert!(result.all_acquired);
assert_eq!(result.successful_locks.len(), 2);
assert!(result.failed_locks.is_empty());
}
#[tokio::test]
async fn test_metrics() {
let manager = FastObjectLockManager::new();
// Perform some operations
let _guard1 = manager.acquire_read_lock("bucket", "obj1", "owner").await.unwrap();
let _guard2 = manager.acquire_write_lock("bucket", "obj2", "owner").await.unwrap();
let metrics = manager.get_metrics();
assert!(metrics.shard_metrics.total_acquisitions() > 0);
assert!(metrics.shard_metrics.fast_path_rate() > 0.0);
}
#[tokio::test]
async fn test_cleanup() {
let config = LockConfig {
max_idle_time: Duration::from_secs(1), // Use 1 second for easier testing
..Default::default()
};
let manager = FastObjectLockManager::with_config(config);
// Acquire and release some locks
{
let _guard = manager.acquire_read_lock("bucket", "obj1", "owner1").await.unwrap();
let _guard2 = manager.acquire_read_lock("bucket", "obj2", "owner2").await.unwrap();
} // Locks are released here
// Check lock count before cleanup
let count_before = manager.total_lock_count();
assert!(count_before >= 2, "Should have at least 2 locks before cleanup");
// Wait for idle timeout
tokio::time::sleep(Duration::from_secs(2)).await;
// Force cleanup with traditional method to ensure cleanup for testing
let cleaned = manager.cleanup_expired_traditional().await;
let count_after = manager.total_lock_count();
// The test should pass if cleanup works at all
assert!(
cleaned > 0 || count_after < count_before,
"Cleanup should either clean locks or they should be cleaned by other means"
);
}
}

View File

@@ -31,38 +31,10 @@ pub trait LockManager: Send + Sync {
async fn acquire_lock(&self, request: ObjectLockRequest) -> Result<FastLockGuard, LockResult>;
/// Acquire shared (read) lock
async fn acquire_read_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult>;
/// Acquire shared (read) lock for specific version
async fn acquire_read_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult>;
async fn acquire_read_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>> + Send) -> Result<FastLockGuard, LockResult>;
/// Acquire exclusive (write) lock
async fn acquire_write_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult>;
/// Acquire exclusive (write) lock for specific version
async fn acquire_write_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> Result<FastLockGuard, LockResult>;
async fn acquire_write_lock(&self, key: ObjectKey, owner: impl Into<Arc<str>> + Send) -> Result<FastLockGuard, LockResult>;
/// Acquire multiple locks atomically
async fn acquire_locks_batch(&self, batch_request: BatchLockRequest) -> BatchLockResult;

View File

@@ -26,8 +26,6 @@
pub mod disabled_manager;
pub mod guard;
pub mod integration_example;
pub mod integration_test;
pub mod manager;
pub mod manager_trait;
pub mod metrics;
@@ -37,6 +35,9 @@ pub mod shard;
pub mod state;
pub mod types;
#[cfg(test)]
mod tests;
// Re-export main types
pub use disabled_manager::DisabledLockManager;
pub use guard::FastLockGuard;
@@ -45,19 +46,19 @@ pub use manager_trait::LockManager;
use std::time::Duration;
pub use types::*;
/// Default RustFS specific timeouts in seconds
pub(crate) const DEFAULT_RUSTFS_MAX_ACQUIRE_TIMEOUT: u64 = 120;
/// Maximum acquire timeout in seconds (for slow storage / high contention; override via env)
pub(crate) const DEFAULT_RUSTFS_MAX_ACQUIRE_TIMEOUT: u64 = 60;
/// Default RustFS acquire timeout in seconds
pub(crate) const DEFAULT_RUSTFS_ACQUIRE_TIMEOUT: u64 = 60;
/// Default acquire timeout in seconds (how long to wait for a lock before giving up)
pub(crate) const DEFAULT_RUSTFS_ACQUIRE_TIMEOUT: u64 = 10;
/// Default shard count (must be power of 2)
pub const DEFAULT_SHARD_COUNT: usize = 1024;
/// Default lock timeout
/// Default lock timeout (lease TTL; lock is released if not refreshed within this duration)
pub const DEFAULT_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
/// Default acquire timeout - increased for network block storage workloads (e.g., Hetzner Ceph)
/// Default acquire timeout - common value for local/low-latency; use env to increase for slow storage
pub const DEFAULT_ACQUIRE_TIMEOUT: Duration = Duration::from_secs(DEFAULT_RUSTFS_ACQUIRE_TIMEOUT);
/// Maximum acquire timeout for high-load scenarios

View File

@@ -759,13 +759,17 @@ mod tests {
let shard = LockShard::new(0);
// First acquire a lock that will block the batch operation
let blocking_request = ObjectLockRequest::new_write("bucket", "obj1", "blocking_owner");
let blocking_request = ObjectLockRequest::new_write(ObjectKey::new("bucket", "obj1"), "blocking_owner")
.with_acquire_timeout(Duration::from_secs(1));
shard.acquire_lock(&blocking_request).await.unwrap();
// Now try a batch operation that should fail and clean up properly
// Use short acquire timeout so the test fails fast when obj1 is already locked
// (default is 60s which would make this test very slow)
let requests = vec![
ObjectLockRequest::new_read("bucket", "obj2", "batch_owner"), // This should succeed
ObjectLockRequest::new_write("bucket", "obj1", "batch_owner"), // This should fail due to existing lock
ObjectLockRequest::new_read(ObjectKey::new("bucket", "obj2"), "batch_owner")
.with_acquire_timeout(Duration::from_millis(100)), // This should succeed
ObjectLockRequest::new_write(ObjectKey::new("bucket", "obj1"), "batch_owner")
.with_acquire_timeout(Duration::from_millis(100)), // This should fail due to existing lock
];
let result = shard.acquire_locks_batch(requests, true).await;

View File

@@ -0,0 +1,532 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(test)]
mod fast_lock_tests {
use crate::fast_lock::FastObjectLockManager;
use crate::fast_lock::types::{LockConfig, LockMode, LockPriority, LockResult, ObjectKey, ObjectLockRequest};
use std::sync::Arc;
use std::time::Duration;
use tokio::time::sleep;
/// Helper function to create a test lock manager
fn create_test_manager() -> FastObjectLockManager {
let config = LockConfig {
shard_count: 4, // Use smaller shard count for tests
default_lock_timeout: Duration::from_secs(30),
default_acquire_timeout: Duration::from_secs(5),
..LockConfig::default()
};
FastObjectLockManager::with_config(config)
}
#[tokio::test]
async fn test_basic_write_lock_acquire_release() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner: Arc<str> = Arc::from("test-owner");
// Acquire write lock
let mut guard = manager
.acquire_write_lock(key.clone(), owner.clone())
.await
.expect("Should acquire write lock");
// Verify guard properties
assert_eq!(guard.key(), &key);
assert_eq!(guard.mode(), LockMode::Exclusive);
assert_eq!(guard.owner(), &owner);
assert!(!guard.is_released());
// Manually release lock
assert!(guard.release(), "Should release lock successfully");
assert!(guard.is_released(), "Guard should be marked as released");
// Try to acquire again - should succeed
let guard2 = manager
.acquire_write_lock(key.clone(), owner.clone())
.await
.expect("Should acquire write lock again after release");
drop(guard2);
}
#[tokio::test]
async fn test_basic_read_lock_acquire_release() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner: Arc<str> = Arc::from("test-owner");
// Acquire read lock
let mut guard = manager
.acquire_read_lock(key.clone(), owner.clone())
.await
.expect("Should acquire read lock");
// Verify guard properties
assert_eq!(guard.key(), &key);
assert_eq!(guard.mode(), LockMode::Shared);
assert_eq!(guard.owner(), &owner);
assert!(!guard.is_released());
// Manually release lock
assert!(guard.release(), "Should release lock successfully");
assert!(guard.is_released(), "Guard should be marked as released");
}
#[tokio::test]
async fn test_lock_auto_release_on_drop() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner1: Arc<str> = Arc::from("owner1");
let owner2: Arc<str> = Arc::from("owner2");
// Acquire lock and drop guard
{
let guard = manager
.acquire_write_lock(key.clone(), owner1.clone())
.await
.expect("Should acquire write lock");
assert!(!guard.is_released());
// Guard is dropped here, lock should be automatically released
}
// Wait a bit to ensure cleanup
sleep(Duration::from_millis(10)).await;
// Another owner should be able to acquire the lock
let guard2 = manager
.acquire_write_lock(key.clone(), owner2.clone())
.await
.expect("Should acquire write lock after previous guard dropped");
drop(guard2);
}
#[tokio::test]
async fn test_multiple_read_locks() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner1: Arc<str> = Arc::from("owner1");
let owner2: Arc<str> = Arc::from("owner2");
let owner3: Arc<str> = Arc::from("owner3");
// Multiple read locks should be allowed
let mut guard1 = manager
.acquire_read_lock(key.clone(), owner1.clone())
.await
.expect("Should acquire first read lock");
let mut guard2 = manager
.acquire_read_lock(key.clone(), owner2.clone())
.await
.expect("Should acquire second read lock");
let mut guard3 = manager
.acquire_read_lock(key.clone(), owner3.clone())
.await
.expect("Should acquire third read lock");
// All guards should be valid
assert_eq!(guard1.mode(), LockMode::Shared);
assert_eq!(guard2.mode(), LockMode::Shared);
assert_eq!(guard3.mode(), LockMode::Shared);
// Release all
assert!(guard1.release());
assert!(guard2.release());
assert!(guard3.release());
}
#[tokio::test]
async fn test_write_lock_excludes_read_lock() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let writer: Arc<str> = Arc::from("writer");
let reader: Arc<str> = Arc::from("reader");
// Acquire write lock
let mut write_guard = manager
.acquire_write_lock(key.clone(), writer.clone())
.await
.expect("Should acquire write lock");
// Try to acquire read lock - should timeout
let read_request =
ObjectLockRequest::new_read(key.clone(), reader.clone()).with_acquire_timeout(Duration::from_millis(100));
let result = manager.acquire_lock(read_request).await;
assert!(
matches!(result, Err(LockResult::Timeout)),
"Read lock should timeout when write lock is held"
);
// Release write lock
assert!(write_guard.release());
// Now read lock should succeed
let mut read_guard = manager
.acquire_read_lock(key.clone(), reader.clone())
.await
.expect("Should acquire read lock after write lock released");
assert!(read_guard.release());
}
#[tokio::test]
async fn test_read_lock_excludes_write_lock() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let reader: Arc<str> = Arc::from("reader");
let writer: Arc<str> = Arc::from("writer");
// Acquire read lock
let mut read_guard = manager
.acquire_read_lock(key.clone(), reader.clone())
.await
.expect("Should acquire read lock");
// Try to acquire write lock - should timeout
let write_request =
ObjectLockRequest::new_write(key.clone(), writer.clone()).with_acquire_timeout(Duration::from_millis(100));
let result = manager.acquire_lock(write_request).await;
assert!(
matches!(result, Err(LockResult::Timeout)),
"Write lock should timeout when read lock is held"
);
// Release read lock
assert!(read_guard.release());
// Now write lock should succeed
let mut write_guard = manager
.acquire_write_lock(key.clone(), writer.clone())
.await
.expect("Should acquire write lock after read lock released");
assert!(write_guard.release());
}
#[tokio::test]
async fn test_write_lock_excludes_write_lock() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner1: Arc<str> = Arc::from("owner1");
let owner2: Arc<str> = Arc::from("owner2");
// Acquire first write lock
let mut guard1 = manager
.acquire_write_lock(key.clone(), owner1.clone())
.await
.expect("Should acquire first write lock");
// Try to acquire second write lock - should timeout
let request2 = ObjectLockRequest::new_write(key.clone(), owner2.clone()).with_acquire_timeout(Duration::from_millis(100));
let result = manager.acquire_lock(request2).await;
assert!(
matches!(result, Err(LockResult::Timeout)),
"Second write lock should timeout when first write lock is held"
);
// Release first lock
assert!(guard1.release());
// Now second write lock should succeed
let mut guard2 = manager
.acquire_write_lock(key.clone(), owner2.clone())
.await
.expect("Should acquire second write lock after first released");
assert!(guard2.release());
}
#[tokio::test]
async fn test_same_owner_reentrant_write_lock() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner: Arc<str> = Arc::from("owner");
// Acquire first write lock
let mut guard1 = manager
.acquire_write_lock(key.clone(), owner.clone())
.await
.expect("Should acquire first write lock");
// Same owner trying to acquire again - should timeout (not reentrant)
let request2 = ObjectLockRequest::new_write(key.clone(), owner.clone()).with_acquire_timeout(Duration::from_millis(100));
let result = manager.acquire_lock(request2).await;
assert!(
matches!(result, Err(LockResult::Timeout)),
"Same owner should not be able to acquire lock again (not reentrant)"
);
assert!(guard1.release());
}
#[tokio::test]
async fn test_different_keys_no_conflict() {
let manager = create_test_manager();
let key1 = ObjectKey::new("bucket1", "object1");
let key2 = ObjectKey::new("bucket2", "object2");
let owner: Arc<str> = Arc::from("owner");
// Acquire locks on different keys simultaneously
let mut guard1 = manager
.acquire_write_lock(key1.clone(), owner.clone())
.await
.expect("Should acquire lock on key1");
let mut guard2 = manager
.acquire_write_lock(key2.clone(), owner.clone())
.await
.expect("Should acquire lock on key2");
// Both should be valid
assert_eq!(guard1.key(), &key1);
assert_eq!(guard2.key(), &key2);
assert!(guard1.release());
assert!(guard2.release());
}
#[tokio::test]
async fn test_versioned_keys() {
let manager = create_test_manager();
let base_key = ObjectKey::new("bucket", "object");
let versioned_key = ObjectKey::with_version("bucket", "object", "v1");
let owner: Arc<str> = Arc::from("owner");
// Acquire lock on base key
let mut guard1 = manager
.acquire_write_lock(base_key.clone(), owner.clone())
.await
.expect("Should acquire lock on base key");
// Should be able to acquire lock on versioned key (different keys)
let mut guard2 = manager
.acquire_write_lock(versioned_key.clone(), owner.clone())
.await
.expect("Should acquire lock on versioned key");
assert_eq!(guard1.key(), &base_key);
assert_eq!(guard2.key(), &versioned_key);
assert!(guard1.release());
assert!(guard2.release());
}
#[tokio::test]
async fn test_concurrent_read_locks() {
let manager = Arc::new(create_test_manager());
let key = ObjectKey::new("test-bucket", "test-object");
let num_readers = 10;
let mut handles = Vec::new();
// Spawn multiple readers
for i in 0..num_readers {
let manager = manager.clone();
let key = key.clone();
let owner: Arc<str> = Arc::from(format!("reader-{}", i));
let handle = tokio::spawn(async move {
let mut guard = manager.acquire_read_lock(key, owner).await.expect("Should acquire read lock");
// Hold lock for a bit
sleep(Duration::from_millis(10)).await;
assert!(guard.release());
});
handles.push(handle);
}
// Wait for all readers
for handle in handles {
handle.await.expect("Reader task should complete");
}
}
#[tokio::test]
async fn test_concurrent_write_lock_contention() {
let manager = Arc::new(create_test_manager());
let key = ObjectKey::new("test-bucket", "test-object");
let num_writers = 5;
let mut handles = Vec::new();
// Spawn multiple writers - they should serialize
for i in 0..num_writers {
let manager = manager.clone();
let key = key.clone();
let owner: Arc<str> = Arc::from(format!("writer-{}", i));
let handle = tokio::spawn(async move {
let mut guard = manager
.acquire_write_lock(key, owner)
.await
.expect("Should acquire write lock");
// Hold lock for a bit
sleep(Duration::from_millis(10)).await;
assert!(guard.release());
});
handles.push(handle);
}
// Wait for all writers - they should complete sequentially
for handle in handles {
handle.await.expect("Writer task should complete");
}
}
#[tokio::test]
async fn test_lock_timeout() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner1: Arc<str> = Arc::from("owner1");
let owner2: Arc<str> = Arc::from("owner2");
// Acquire first lock
let mut guard1 = manager
.acquire_write_lock(key.clone(), owner1.clone())
.await
.expect("Should acquire first lock");
// Try to acquire with short timeout - should timeout
let request = ObjectLockRequest::new_write(key.clone(), owner2.clone()).with_acquire_timeout(Duration::from_millis(50));
let result = manager.acquire_lock(request).await;
assert!(matches!(result, Err(LockResult::Timeout)), "Should timeout when lock is held");
assert!(guard1.release());
}
#[tokio::test]
async fn test_lock_priority() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let normal_owner: Arc<str> = Arc::from("normal");
let high_owner: Arc<str> = Arc::from("high");
// Acquire normal priority lock
let normal_request = ObjectLockRequest::new_write(key.clone(), normal_owner.clone())
.with_priority(LockPriority::Normal)
.with_acquire_timeout(Duration::from_secs(1));
let mut normal_guard = manager
.acquire_lock(normal_request)
.await
.expect("Should acquire normal priority lock");
// Try high priority lock - should still timeout (write locks are exclusive)
let high_request = ObjectLockRequest::new_write(key.clone(), high_owner.clone())
.with_priority(LockPriority::High)
.with_acquire_timeout(Duration::from_millis(100));
let result = manager.acquire_lock(high_request).await;
assert!(
matches!(result, Err(LockResult::Timeout)),
"High priority write lock should still timeout when normal write lock is held"
);
assert!(normal_guard.release());
}
#[tokio::test]
async fn test_double_release() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner: Arc<str> = Arc::from("owner");
let mut guard = manager
.acquire_write_lock(key.clone(), owner.clone())
.await
.expect("Should acquire lock");
// First release should succeed
assert!(guard.release(), "First release should succeed");
assert!(guard.is_released(), "Guard should be marked as released");
// Second release should fail
assert!(!guard.release(), "Second release should fail");
}
#[tokio::test]
async fn test_lock_info() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let owner: Arc<str> = Arc::from("owner");
let mut guard = manager
.acquire_write_lock(key.clone(), owner.clone())
.await
.expect("Should acquire lock");
// Get lock info
let lock_info = guard.lock_info();
assert!(lock_info.is_some(), "Should have lock info");
if let Some(info) = lock_info {
assert_eq!(info.key, key);
assert_eq!(info.mode, LockMode::Exclusive);
assert_eq!(info.owner, owner);
}
// Release lock
assert!(guard.release());
// Lock info should be None after release
let lock_info_after = guard.lock_info();
assert!(lock_info_after.is_none(), "Lock info should be None after release");
}
#[tokio::test]
async fn test_read_write_mixed_scenario() {
let manager = create_test_manager();
let key = ObjectKey::new("test-bucket", "test-object");
let reader1: Arc<str> = Arc::from("reader1");
let reader2: Arc<str> = Arc::from("reader2");
let writer: Arc<str> = Arc::from("writer");
// Acquire two read locks
let mut read_guard1 = manager
.acquire_read_lock(key.clone(), reader1.clone())
.await
.expect("Should acquire first read lock");
let mut read_guard2 = manager
.acquire_read_lock(key.clone(), reader2.clone())
.await
.expect("Should acquire second read lock");
// Writer should timeout
let write_request =
ObjectLockRequest::new_write(key.clone(), writer.clone()).with_acquire_timeout(Duration::from_millis(100));
let result = manager.acquire_lock(write_request).await;
assert!(
matches!(result, Err(LockResult::Timeout)),
"Write lock should timeout when read locks are held"
);
// Release one read lock
assert!(read_guard1.release());
// Writer should still timeout (other read lock still held)
let write_request2 =
ObjectLockRequest::new_write(key.clone(), writer.clone()).with_acquire_timeout(Duration::from_millis(100));
let result2 = manager.acquire_lock(write_request2).await;
assert!(
matches!(result2, Err(LockResult::Timeout)),
"Write lock should still timeout when read lock is held"
);
// Release second read lock
assert!(read_guard2.release());
// Now writer should succeed
let mut write_guard = manager
.acquire_write_lock(key.clone(), writer.clone())
.await
.expect("Should acquire write lock after all read locks released");
assert!(write_guard.release());
}
}

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use crate::fast_lock::guard::FastLockGuard;
use serde::{Deserialize, Serialize};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use smartstring::SmartString;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
@@ -28,6 +28,88 @@ pub struct ObjectKey {
pub version: Option<Arc<str>>, // None means latest version
}
impl Serialize for ObjectKey {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
use serde::ser::SerializeStruct;
let mut state = serializer.serialize_struct("ObjectKey", 3)?;
state.serialize_field("bucket", self.bucket.as_ref())?;
state.serialize_field("object", self.object.as_ref())?;
state.serialize_field("version", &self.version.as_ref().map(|v| v.as_ref()))?;
state.end()
}
}
impl<'de> Deserialize<'de> for ObjectKey {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
use serde::de::{self, MapAccess, Visitor};
use std::fmt;
#[derive(Deserialize)]
#[serde(field_identifier, rename_all = "lowercase")]
enum Field {
Bucket,
Object,
Version,
}
struct ObjectKeyVisitor;
impl<'de> Visitor<'de> for ObjectKeyVisitor {
type Value = ObjectKey;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct ObjectKey")
}
fn visit_map<V>(self, mut map: V) -> Result<ObjectKey, V::Error>
where
V: MapAccess<'de>,
{
let mut bucket = None;
let mut object = None;
let mut version = None;
while let Some(key) = map.next_key()? {
match key {
Field::Bucket => {
if bucket.is_some() {
return Err(de::Error::duplicate_field("bucket"));
}
let s: String = map.next_value()?;
bucket = Some(Arc::from(s));
}
Field::Object => {
if object.is_some() {
return Err(de::Error::duplicate_field("object"));
}
let s: String = map.next_value()?;
object = Some(Arc::from(s));
}
Field::Version => {
if version.is_some() {
return Err(de::Error::duplicate_field("version"));
}
let opt: Option<String> = map.next_value()?;
version = opt.map(Arc::from);
}
}
}
let bucket = bucket.ok_or_else(|| de::Error::missing_field("bucket"))?;
let object = object.ok_or_else(|| de::Error::missing_field("object"))?;
Ok(ObjectKey { bucket, object, version })
}
}
const FIELDS: &[&str] = &["bucket", "object", "version"];
deserializer.deserialize_struct("ObjectKey", FIELDS, ObjectKeyVisitor)
}
}
impl ObjectKey {
pub fn new(bucket: impl Into<Arc<str>>, object: impl Into<Arc<str>>) -> Self {
Self {
@@ -198,9 +280,9 @@ pub struct ObjectLockRequest {
}
impl ObjectLockRequest {
pub fn new_read(bucket: impl Into<Arc<str>>, object: impl Into<Arc<str>>, owner: impl Into<Arc<str>>) -> Self {
pub fn new_read(key: ObjectKey, owner: impl Into<Arc<str>>) -> Self {
Self {
key: ObjectKey::new(bucket, object),
key,
mode: LockMode::Shared,
owner: owner.into(),
acquire_timeout: crate::fast_lock::DEFAULT_ACQUIRE_TIMEOUT,
@@ -209,9 +291,9 @@ impl ObjectLockRequest {
}
}
pub fn new_write(bucket: impl Into<Arc<str>>, object: impl Into<Arc<str>>, owner: impl Into<Arc<str>>) -> Self {
pub fn new_write(key: ObjectKey, owner: impl Into<Arc<str>>) -> Self {
Self {
key: ObjectKey::new(bucket, object),
key,
mode: LockMode::Exclusive,
owner: owner.into(),
acquire_timeout: crate::fast_lock::DEFAULT_ACQUIRE_TIMEOUT,
@@ -317,15 +399,13 @@ impl BatchLockRequest {
}
}
pub fn add_read_lock(mut self, bucket: impl Into<Arc<str>>, object: impl Into<Arc<str>>) -> Self {
self.requests
.push(ObjectLockRequest::new_read(bucket, object, self.owner.clone()));
pub fn add_read_lock(mut self, key: ObjectKey) -> Self {
self.requests.push(ObjectLockRequest::new_read(key, self.owner.clone()));
self
}
pub fn add_write_lock(mut self, bucket: impl Into<Arc<str>>, object: impl Into<Arc<str>>) -> Self {
self.requests
.push(ObjectLockRequest::new_write(bucket, object, self.owner.clone()));
pub fn add_write_lock(mut self, key: ObjectKey) -> Self {
self.requests.push(ObjectLockRequest::new_write(key, self.owner.clone()));
self
}
@@ -366,7 +446,7 @@ mod tests {
#[test]
fn test_lock_request() {
let req = ObjectLockRequest::new_read("bucket", "object", "owner")
let req = ObjectLockRequest::new_read(ObjectKey::new("bucket", "object"), "owner")
.with_version("v1")
.with_priority(LockPriority::High);
@@ -378,8 +458,8 @@ mod tests {
#[test]
fn test_batch_request() {
let batch = BatchLockRequest::new("owner")
.add_read_lock("bucket", "obj1")
.add_write_lock("bucket", "obj2");
.add_read_lock(ObjectKey::new("bucket", "obj1"))
.add_write_lock(ObjectKey::new("bucket", "obj2"));
assert_eq!(batch.requests.len(), 2);
assert_eq!(batch.requests[0].mode, LockMode::Shared);

View File

@@ -1,117 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{LockClient, LockId};
use std::sync::{Arc, LazyLock};
use tokio::sync::mpsc;
#[derive(Debug, Clone)]
struct UnlockJob {
lock_id: LockId,
clients: Vec<Arc<dyn LockClient>>, // cloned Arcs; cheap and shares state
}
#[derive(Debug)]
struct UnlockRuntime {
tx: mpsc::Sender<UnlockJob>,
}
// Global unlock runtime with background worker
static UNLOCK_RUNTIME: LazyLock<UnlockRuntime> = LazyLock::new(|| {
// Larger buffer to reduce contention during bursts
let (tx, mut rx) = mpsc::channel::<UnlockJob>(8192);
// Spawn background worker when first used; assumes a Tokio runtime is available
tokio::spawn(async move {
while let Some(job) = rx.recv().await {
// Best-effort release across clients; try all, success if any succeeds
let mut any_ok = false;
let lock_id = job.lock_id.clone();
for client in job.clients.into_iter() {
if client.release(&lock_id).await.unwrap_or(false) {
any_ok = true;
}
}
if !any_ok {
tracing::warn!("LockGuard background release failed for {}", lock_id);
} else {
tracing::debug!("LockGuard background released {}", lock_id);
}
}
});
UnlockRuntime { tx }
});
/// A RAII guard that releases the lock asynchronously when dropped.
#[derive(Debug)]
pub struct LockGuard {
lock_id: LockId,
clients: Vec<Arc<dyn LockClient>>,
/// If true, Drop will not try to release (used if user manually released).
disarmed: bool,
}
impl LockGuard {
pub(crate) fn new(lock_id: LockId, clients: Vec<Arc<dyn LockClient>>) -> Self {
Self {
lock_id,
clients,
disarmed: false,
}
}
/// Get the lock id associated with this guard
pub fn lock_id(&self) -> &LockId {
&self.lock_id
}
/// Manually disarm the guard so dropping it won't release the lock.
/// Call this if you explicitly released the lock elsewhere.
pub fn disarm(&mut self) {
self.disarmed = true;
}
}
impl Drop for LockGuard {
fn drop(&mut self) {
if self.disarmed {
return;
}
let job = UnlockJob {
lock_id: self.lock_id.clone(),
clients: self.clients.clone(),
};
// Try a non-blocking send to avoid panics in Drop
if let Err(err) = UNLOCK_RUNTIME.tx.try_send(job) {
// Channel full or closed; best-effort fallback: spawn a detached task
let lock_id = self.lock_id.clone();
let clients = self.clients.clone();
tracing::warn!("LockGuard channel send failed ({}), spawning fallback unlock task for {}", err, lock_id);
// If runtime is not available, this will panic; but in RustFS we are inside Tokio contexts.
let handle = tokio::spawn(async move {
let futures_iter = clients.into_iter().map(|client| {
let id = lock_id.clone();
async move { client.release(&id).await.unwrap_or(false) }
});
let _ = futures::future::join_all(futures_iter).await;
});
// Explicitly drop the JoinHandle to acknowledge detaching the task.
drop(handle);
}
}
}

View File

@@ -17,6 +17,8 @@
// ============================================================================
// Application Layer Modules
pub mod distributed_lock;
pub mod local_lock;
pub mod namespace;
// Abstraction Layer Modules
@@ -27,7 +29,6 @@ pub mod fast_lock;
// Core Modules
pub mod error;
pub mod guard;
pub mod types;
// ============================================================================
@@ -38,6 +39,7 @@ pub mod types;
pub use crate::{
// Client interfaces
client::{LockClient, local::LocalClient},
distributed_lock::DistributedLockGuard,
// Error types
error::{LockError, Result},
// Fast Lock System exports
@@ -45,9 +47,8 @@ pub use crate::{
BatchLockRequest, BatchLockResult, DisabledLockManager, FastLockGuard, FastObjectLockManager, LockManager, LockMode,
LockResult, ObjectKey, ObjectLockInfo, ObjectLockRequest, metrics::AggregatedMetrics,
},
guard::LockGuard,
// Main components
namespace::{NamespaceLock, NamespaceLockManager},
namespace::{NamespaceLock, NamespaceLockGuard, NamespaceLockWrapper},
// Core types
types::{
HealthInfo, HealthStatus, LockId, LockInfo, LockMetadata, LockPriority, LockRequest, LockResponse, LockStats, LockStatus,
@@ -81,6 +82,7 @@ use std::sync::Arc;
use std::sync::OnceLock;
/// Enum wrapper for different lock manager implementations
#[derive(Debug)]
pub enum GlobalLockManager {
Enabled(Arc<FastObjectLockManager>),
Disabled(DisabledLockManager),
@@ -157,51 +159,23 @@ impl LockManager for GlobalLockManager {
async fn acquire_read_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
key: ObjectKey,
owner: impl Into<Arc<str>> + Send,
) -> std::result::Result<FastLockGuard, LockResult> {
match self {
Self::Enabled(manager) => manager.acquire_read_lock(bucket, object, owner).await,
Self::Disabled(manager) => manager.acquire_read_lock(bucket, object, owner).await,
}
}
async fn acquire_read_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> std::result::Result<FastLockGuard, LockResult> {
match self {
Self::Enabled(manager) => manager.acquire_read_lock_versioned(bucket, object, version, owner).await,
Self::Disabled(manager) => manager.acquire_read_lock_versioned(bucket, object, version, owner).await,
Self::Enabled(manager) => manager.acquire_read_lock(key, owner).await,
Self::Disabled(manager) => manager.acquire_read_lock(key, owner).await,
}
}
async fn acquire_write_lock(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
key: ObjectKey,
owner: impl Into<Arc<str>> + Send,
) -> std::result::Result<FastLockGuard, LockResult> {
match self {
Self::Enabled(manager) => manager.acquire_write_lock(bucket, object, owner).await,
Self::Disabled(manager) => manager.acquire_write_lock(bucket, object, owner).await,
}
}
async fn acquire_write_lock_versioned(
&self,
bucket: impl Into<Arc<str>> + Send,
object: impl Into<Arc<str>> + Send,
version: impl Into<Arc<str>> + Send,
owner: impl Into<Arc<str>> + Send,
) -> std::result::Result<FastLockGuard, LockResult> {
match self {
Self::Enabled(manager) => manager.acquire_write_lock_versioned(bucket, object, version, owner).await,
Self::Disabled(manager) => manager.acquire_write_lock_versioned(bucket, object, version, owner).await,
Self::Enabled(manager) => manager.acquire_write_lock(key, owner).await,
Self::Disabled(manager) => manager.acquire_write_lock(key, owner).await,
}
}
@@ -290,98 +264,3 @@ pub fn get_global_fast_lock_manager() -> Arc<FastObjectLockManager> {
panic!("Cannot get FastObjectLockManager when locks are disabled. Use get_global_lock_manager() instead.");
})
}
// ============================================================================
// Convenience Functions
// ============================================================================
/// Create a new namespace lock
pub fn create_namespace_lock(namespace: String, _distributed: bool) -> NamespaceLock {
// The distributed behavior is now determined by the type of clients added to the NamespaceLock
// This function just creates an empty NamespaceLock
NamespaceLock::new(namespace)
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_global_lock_manager_basic() {
let manager = get_global_lock_manager();
// Should be able to acquire locks
let guard = manager.acquire_read_lock("bucket", "object", "owner").await;
assert!(guard.is_ok());
// Test metrics
let _metrics = manager.get_metrics();
// Even if locks are disabled, metrics should be available (empty or real)
// shard_count is usize so always >= 0
}
#[tokio::test]
async fn test_disabled_manager_direct() {
let manager = DisabledLockManager::new();
// All operations should succeed immediately
let guard = manager.acquire_read_lock("bucket", "object", "owner").await;
assert!(guard.is_ok());
assert!(guard.unwrap().is_disabled());
// Metrics should be empty
let metrics = manager.get_metrics();
assert!(metrics.is_empty());
assert_eq!(manager.total_lock_count(), 0);
}
#[tokio::test]
async fn test_enabled_manager_direct() {
let manager = FastObjectLockManager::new();
// Operations should work normally
let guard = manager.acquire_read_lock("bucket", "object", "owner").await;
assert!(guard.is_ok());
assert!(!guard.unwrap().is_disabled());
// Should have real metrics
let _metrics = manager.get_metrics();
// Note: total_lock_count might be > 0 due to previous lock acquisition
}
#[tokio::test]
async fn test_global_manager_enum_wrapper() {
// Test the GlobalLockManager enum directly
let enabled_manager = GlobalLockManager::Enabled(Arc::new(FastObjectLockManager::new()));
let disabled_manager = GlobalLockManager::Disabled(DisabledLockManager::new());
assert!(!enabled_manager.is_disabled());
assert!(disabled_manager.is_disabled());
// Test trait methods work for both
let enabled_guard = enabled_manager.acquire_read_lock("bucket", "obj", "owner").await;
let disabled_guard = disabled_manager.acquire_read_lock("bucket", "obj", "owner").await;
assert!(enabled_guard.is_ok());
assert!(disabled_guard.is_ok());
assert!(!enabled_guard.unwrap().is_disabled());
assert!(disabled_guard.unwrap().is_disabled());
}
#[tokio::test]
async fn test_batch_operations_work() {
let manager = get_global_lock_manager();
let batch = BatchLockRequest::new("owner")
.add_read_lock("bucket", "obj1")
.add_write_lock("bucket", "obj2");
let result = manager.acquire_locks_batch(batch).await;
// Should succeed regardless of whether locks are enabled or disabled
assert!(result.all_acquired);
assert_eq!(result.successful_locks.len(), 2);
assert!(result.failed_locks.is_empty());
}
}

View File

@@ -0,0 +1,110 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{
GlobalLockManager, ObjectKey,
error::Result,
fast_lock::{FastLockGuard, LockManager, LockMode, ObjectLockRequest},
types::{LockPriority, LockRequest, LockType},
};
use std::sync::Arc;
use std::time::Duration;
/// Local lock handler using GlobalLockManager
/// Directly uses FastObjectLockManager for high-performance local locking
#[derive(Debug)]
pub struct LocalLock {
/// Global lock manager for fast local locks
manager: Arc<GlobalLockManager>,
/// Namespace identifier
namespace: String,
}
impl LocalLock {
/// Create new local lock
pub fn new(namespace: String, manager: Arc<GlobalLockManager>) -> Self {
Self { namespace, manager }
}
/// Get namespace identifier
pub fn namespace(&self) -> &str {
&self.namespace
}
/// Get resource key for this namespace
pub fn get_resource_key(&self, resource: &ObjectKey) -> String {
format!("{}:{}", self.namespace, resource)
}
/// Acquire a lock and return a RAII guard
pub(crate) async fn acquire_guard(&self, request: &LockRequest) -> Result<Option<FastLockGuard>> {
// Convert LockRequest to ObjectLockRequest
let object_key = request.resource.clone();
let mode = match request.lock_type {
LockType::Exclusive => LockMode::Exclusive,
LockType::Shared => LockMode::Shared,
};
let owner: Arc<str> = request.owner.clone().into();
// Convert LockPriority from types::LockPriority to fast_lock::types::LockPriority
let fast_priority = match request.priority {
LockPriority::Low => crate::fast_lock::types::LockPriority::Low,
LockPriority::Normal => crate::fast_lock::types::LockPriority::Normal,
LockPriority::High => crate::fast_lock::types::LockPriority::High,
LockPriority::Critical => crate::fast_lock::types::LockPriority::Critical,
};
let object_request = ObjectLockRequest {
key: object_key,
mode,
owner,
acquire_timeout: request.acquire_timeout,
lock_timeout: request.ttl,
priority: fast_priority,
};
match self.manager.as_ref().acquire_lock(object_request).await {
Ok(guard) => Ok(Some(guard)),
Err(_) => Ok(None),
}
}
/// Convenience: acquire exclusive lock as a guard
pub async fn lock_guard(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
ttl: Duration,
) -> Result<Option<FastLockGuard>> {
let req = LockRequest::new(resource, LockType::Exclusive, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
self.acquire_guard(&req).await
}
/// Convenience: acquire shared lock as a guard
pub async fn rlock_guard(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
ttl: Duration,
) -> Result<Option<FastLockGuard>> {
let req = LockRequest::new(resource, LockType::Shared, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
self.acquire_guard(&req).await
}
}

View File

@@ -1,586 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use std::sync::Arc;
use std::time::Duration;
use crate::{
client::LockClient,
error::{LockError, Result},
guard::LockGuard,
types::{LockId, LockInfo, LockRequest, LockResponse, LockStatus, LockType},
};
/// Namespace lock for managing locks by resource namespaces
#[derive(Debug)]
pub struct NamespaceLock {
/// Lock clients for this namespace
clients: Vec<Arc<dyn LockClient>>,
/// Namespace identifier
namespace: String,
/// Quorum size for operations (1 for local, majority for distributed)
quorum: usize,
}
impl NamespaceLock {
/// Create new namespace lock
pub fn new(namespace: String) -> Self {
Self {
clients: Vec::new(),
namespace,
quorum: 1,
}
}
/// Create namespace lock with clients
pub fn with_clients(namespace: String, clients: Vec<Arc<dyn LockClient>>) -> Self {
let quorum = if clients.len() > 1 {
// For multiple clients (distributed mode), require majority
(clients.len() / 2) + 1
} else {
// For single client (local mode), only need 1
1
};
Self {
clients,
namespace,
quorum,
}
}
/// Create namespace lock with clients and an explicit quorum size.
/// Quorum will be clamped into [1, clients.len()]. For single client, quorum is always 1.
pub fn with_clients_and_quorum(namespace: String, clients: Vec<Arc<dyn LockClient>>, quorum: usize) -> Self {
let q = if clients.len() <= 1 {
1
} else {
quorum.clamp(1, clients.len())
};
Self {
clients,
namespace,
quorum: q,
}
}
/// Create namespace lock with client (compatibility)
pub fn with_client(client: Arc<dyn LockClient>) -> Self {
Self::with_clients("default".to_string(), vec![client])
}
/// Get namespace identifier
pub fn namespace(&self) -> &str {
&self.namespace
}
/// Get resource key for this namespace
pub fn get_resource_key(&self, resource: &str) -> String {
format!("{}:{}", self.namespace, resource)
}
/// Acquire lock using clients with transactional semantics (all-or-nothing)
pub async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// For single client, use it directly
if self.clients.len() == 1 {
return self.clients[0].acquire_lock(request).await;
}
// Quorum-based acquisition for distributed mode
let (resp, _idxs) = self.acquire_lock_quorum(request).await?;
Ok(resp)
}
/// Acquire a lock and return a RAII guard that will release asynchronously on Drop.
/// This is a thin wrapper around `acquire_lock` and will only create a guard when acquisition succeeds.
pub async fn acquire_guard(&self, request: &LockRequest) -> Result<Option<LockGuard>> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
if self.clients.len() == 1 {
let resp = self.clients[0].acquire_lock(request).await?;
if resp.success {
return Ok(Some(LockGuard::new(
LockId::new_deterministic(&request.resource),
vec![self.clients[0].clone()],
)));
}
return Ok(None);
}
let (resp, idxs) = self.acquire_lock_quorum(request).await?;
if resp.success {
let subset: Vec<_> = idxs.into_iter().filter_map(|i| self.clients.get(i).cloned()).collect();
Ok(Some(LockGuard::new(LockId::new_deterministic(&request.resource), subset)))
} else {
Ok(None)
}
}
/// Convenience: acquire exclusive lock as a guard
pub async fn lock_guard(&self, resource: &str, owner: &str, timeout: Duration, ttl: Duration) -> Result<Option<LockGuard>> {
let req = LockRequest::new(self.get_resource_key(resource), LockType::Exclusive, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
self.acquire_guard(&req).await
}
/// Convenience: acquire shared lock as a guard
pub async fn rlock_guard(&self, resource: &str, owner: &str, timeout: Duration, ttl: Duration) -> Result<Option<LockGuard>> {
let req = LockRequest::new(self.get_resource_key(resource), LockType::Shared, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
self.acquire_guard(&req).await
}
/// Quorum-based lock acquisition: success if at least `self.quorum` clients succeed.
/// Returns the LockResponse and the indices of clients that acquired the lock.
async fn acquire_lock_quorum(&self, request: &LockRequest) -> Result<(LockResponse, Vec<usize>)> {
let futs: Vec<_> = self
.clients
.iter()
.enumerate()
.map(|(idx, client)| async move { (idx, client.acquire_lock(request).await) })
.collect();
let results = futures::future::join_all(futs).await;
let mut successful_clients = Vec::new();
for (idx, res) in results {
if let Ok(resp) = res
&& resp.success
{
successful_clients.push(idx);
}
}
if successful_clients.len() >= self.quorum {
let resp = LockResponse::success(
LockInfo {
id: LockId::new_deterministic(&request.resource),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
Duration::ZERO,
);
Ok((resp, successful_clients))
} else {
if !successful_clients.is_empty() {
self.rollback_acquisitions(request, &successful_clients).await;
}
let resp = LockResponse::failure(
format!("Failed to acquire quorum: {}/{} required", successful_clients.len(), self.quorum),
Duration::ZERO,
);
Ok((resp, Vec::new()))
}
}
/// Rollback lock acquisitions on specified clients
async fn rollback_acquisitions(&self, request: &LockRequest, client_indices: &[usize]) {
let lock_id = LockId::new_deterministic(&request.resource);
let rollback_futures: Vec<_> = client_indices
.iter()
.filter_map(|&idx| self.clients.get(idx))
.map(|client| async {
if let Err(e) = client.release(&lock_id).await {
tracing::warn!("Failed to rollback lock on client: {}", e);
}
})
.collect();
futures::future::join_all(rollback_futures).await;
tracing::info!(
"Rolled back {} lock acquisitions for resource: {}",
client_indices.len(),
request.resource
);
}
/// Release lock using clients
pub async fn release_lock(&self, lock_id: &LockId) -> Result<bool> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// For single client, use it directly
if self.clients.len() == 1 {
return self.clients[0].release(lock_id).await;
}
// For multiple clients, try to release from all clients
let futures: Vec<_> = self
.clients
.iter()
.map(|client| {
let id = lock_id.clone();
async move { client.release(&id).await }
})
.collect();
let results = futures::future::join_all(futures).await;
let successful = results.into_iter().filter_map(|r| r.ok()).filter(|&r| r).count();
// For release, if any succeed, consider it successful
Ok(successful > 0)
}
/// Get health information
pub async fn get_health(&self) -> crate::types::HealthInfo {
let lock_stats = self.get_stats().await;
let mut health = crate::types::HealthInfo {
node_id: self.namespace.clone(),
lock_stats,
..Default::default()
};
// Check client status
let mut connected_clients = 0;
for client in &self.clients {
if client.is_online().await {
connected_clients += 1;
}
}
health.status = if connected_clients > 0 {
crate::types::HealthStatus::Healthy
} else {
crate::types::HealthStatus::Degraded
};
health.connected_nodes = connected_clients;
health.total_nodes = self.clients.len();
health
}
/// Get namespace statistics
pub async fn get_stats(&self) -> crate::types::LockStats {
let mut stats = crate::types::LockStats::default();
// Try to get stats from clients
for client in &self.clients {
if let Ok(client_stats) = client.get_stats().await {
stats.successful_acquires += client_stats.successful_acquires;
stats.failed_acquires += client_stats.failed_acquires;
}
}
stats
}
}
impl Default for NamespaceLock {
fn default() -> Self {
Self::new("default".to_string())
}
}
/// Namespace lock manager trait
#[async_trait]
pub trait NamespaceLockManager: Send + Sync {
/// Batch get write lock
async fn lock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool>;
/// Batch release write lock
async fn unlock_batch(&self, resources: &[String], owner: &str) -> Result<()>;
/// Batch get read lock
async fn rlock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool>;
/// Batch release read lock
async fn runlock_batch(&self, resources: &[String], owner: &str) -> Result<()>;
}
#[async_trait]
impl NamespaceLockManager for NamespaceLock {
async fn lock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Transactional batch lock: all resources must be locked or none
let mut acquired_resources = Vec::new();
for resource in resources {
let namespaced_resource = self.get_resource_key(resource);
let request = LockRequest::new(&namespaced_resource, LockType::Exclusive, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
let response = self.acquire_lock(&request).await?;
if response.success {
acquired_resources.push(namespaced_resource);
} else {
// Rollback all previously acquired locks
self.rollback_batch_locks(&acquired_resources, owner).await;
return Ok(false);
}
}
Ok(true)
}
async fn unlock_batch(&self, resources: &[String], _owner: &str) -> Result<()> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Release all locks (best effort)
let release_futures: Vec<_> = resources
.iter()
.map(|resource| {
let namespaced_resource = self.get_resource_key(resource);
let lock_id = LockId::new_deterministic(&namespaced_resource);
async move {
if let Err(e) = self.release_lock(&lock_id).await {
tracing::warn!("Failed to release lock for resource {}: {}", resource, e);
}
}
})
.collect();
futures::future::join_all(release_futures).await;
Ok(())
}
async fn rlock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Transactional batch read lock: all resources must be locked or none
let mut acquired_resources = Vec::new();
for resource in resources {
let namespaced_resource = self.get_resource_key(resource);
let request = LockRequest::new(&namespaced_resource, LockType::Shared, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
let response = self.acquire_lock(&request).await?;
if response.success {
acquired_resources.push(namespaced_resource);
} else {
// Rollback all previously acquired read locks
self.rollback_batch_locks(&acquired_resources, owner).await;
return Ok(false);
}
}
Ok(true)
}
async fn runlock_batch(&self, resources: &[String], _owner: &str) -> Result<()> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Release all read locks (best effort)
let release_futures: Vec<_> = resources
.iter()
.map(|resource| {
let namespaced_resource = self.get_resource_key(resource);
let lock_id = LockId::new_deterministic(&namespaced_resource);
async move {
if let Err(e) = self.release_lock(&lock_id).await {
tracing::warn!("Failed to release read lock for resource {}: {}", resource, e);
}
}
})
.collect();
futures::future::join_all(release_futures).await;
Ok(())
}
}
impl NamespaceLock {
/// Rollback batch lock acquisitions
async fn rollback_batch_locks(&self, acquired_resources: &[String], _owner: &str) {
let rollback_futures: Vec<_> = acquired_resources
.iter()
.map(|resource| {
let lock_id = LockId::new_deterministic(resource);
async move {
if let Err(e) = self.release_lock(&lock_id).await {
tracing::warn!("Failed to rollback lock for resource {}: {}", resource, e);
}
}
})
.collect();
futures::future::join_all(rollback_futures).await;
tracing::info!("Rolled back {} batch lock acquisitions", acquired_resources.len());
}
}
#[cfg(test)]
mod tests {
use crate::LocalClient;
use super::*;
#[tokio::test]
async fn test_namespace_lock_local() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
let resources = vec!["test1".to_string(), "test2".to_string()];
// Test batch lock
let result = ns_lock
.lock_batch(&resources, "test_owner", Duration::from_millis(100), Duration::from_secs(10))
.await;
assert!(result.is_ok());
assert!(result.unwrap());
// Test batch unlock
let result = ns_lock.unlock_batch(&resources, "test_owner").await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_guard_acquire_and_drop_release() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
// Acquire guard
let guard = ns_lock
.lock_guard("guard-resource", "owner", Duration::from_millis(100), Duration::from_secs(5))
.await
.unwrap();
assert!(guard.is_some());
let lock_id = guard.as_ref().unwrap().lock_id().clone();
// Drop guard to trigger background release
drop(guard);
// Give background worker a moment to process
tokio::time::sleep(Duration::from_millis(50)).await;
// Re-acquire should succeed (previous lock released)
let req = LockRequest::new(&lock_id.resource, LockType::Exclusive, "owner").with_ttl(Duration::from_secs(2));
let resp = ns_lock.acquire_lock(&req).await.unwrap();
assert!(resp.success);
// Cleanup
let _ = ns_lock.release_lock(&LockId::new_deterministic(&lock_id.resource)).await;
}
#[tokio::test]
async fn test_connection_health() {
let local_lock = NamespaceLock::new("test-namespace".to_string());
let health = local_lock.get_health().await;
assert_eq!(health.status, crate::types::HealthStatus::Degraded); // No clients
}
#[tokio::test]
async fn test_namespace_lock_creation() {
let ns_lock = NamespaceLock::new("test-namespace".to_string());
assert_eq!(ns_lock.namespace(), "test-namespace");
}
#[tokio::test]
async fn test_namespace_lock_new_local() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
assert_eq!(ns_lock.namespace(), "default");
assert_eq!(ns_lock.clients.len(), 1);
assert!(ns_lock.clients[0].is_local().await);
// Test that it can perform lock operations
let resources = vec!["test-resource".to_string()];
let result = ns_lock
.lock_batch(&resources, "test-owner", Duration::from_millis(100), Duration::from_secs(10))
.await;
assert!(result.is_ok());
assert!(result.unwrap());
}
#[tokio::test]
async fn test_namespace_lock_resource_key() {
let ns_lock = NamespaceLock::new("test-namespace".to_string());
// Test resource key generation
let resource_key = ns_lock.get_resource_key("test-resource");
assert_eq!(resource_key, "test-namespace:test-resource");
}
#[tokio::test]
async fn test_transactional_batch_lock() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
let resources = vec!["resource1".to_string(), "resource2".to_string(), "resource3".to_string()];
// First, acquire one of the resources to simulate conflict
let conflicting_request = LockRequest::new(ns_lock.get_resource_key("resource2"), LockType::Exclusive, "other_owner")
.with_ttl(Duration::from_secs(10));
let response = ns_lock.acquire_lock(&conflicting_request).await.unwrap();
assert!(response.success);
// Now try batch lock - should fail and rollback
let result = ns_lock
.lock_batch(&resources, "test_owner", Duration::from_millis(10), Duration::from_secs(5))
.await;
assert!(result.is_ok());
assert!(!result.unwrap()); // Should fail due to conflict
// Verify that no locks were left behind (all rolled back)
for resource in &resources {
if resource != "resource2" {
// Skip the one we intentionally locked
let check_request = LockRequest::new(ns_lock.get_resource_key(resource), LockType::Exclusive, "verify_owner")
.with_ttl(Duration::from_secs(1));
let check_response = ns_lock.acquire_lock(&check_request).await.unwrap();
assert!(check_response.success, "Resource {resource} should be available after rollback");
// Clean up
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(resource));
let _ = ns_lock.release_lock(&lock_id).await;
}
}
}
#[tokio::test]
async fn test_distributed_lock_consistency() {
// Create a namespace with multiple local clients to simulate distributed scenario
let client1: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let client2: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let clients = vec![client1, client2];
// LocalClient shares a global in-memory map. For exclusive locks, only one can acquire at a time.
// In real distributed setups the quorum should be tied to EC write quorum. Here we use quorum=1 for success.
let ns_lock = NamespaceLock::with_clients_and_quorum("test-namespace".to_string(), clients, 1);
let request = LockRequest::new("test-resource", LockType::Shared, "test_owner").with_ttl(Duration::from_secs(2));
// This should succeed only if ALL clients can acquire the lock
let response = ns_lock.acquire_lock(&request).await.unwrap();
// Since we're using separate LocalClient instances, they don't share state
// so this test demonstrates the consistency check
assert!(response.success); // Either all succeed or rollback happens
}
}

View File

@@ -0,0 +1,336 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{
ObjectKey,
client::LockClient,
distributed_lock::{DistributedLock, DistributedLockGuard},
error::Result,
fast_lock::FastLockGuard,
local_lock::LocalLock,
types::{LockId, LockRequest},
};
use std::sync::Arc;
use std::time::Duration;
#[cfg(test)]
mod tests;
/// Unified guard for namespace locks
/// Supports both DistributedLockGuard (for Distributed locks) and FastLockGuard (for Local locks)
#[derive(Debug)]
pub enum NamespaceLockGuard {
/// Standard guard for Distributed locks
Standard(DistributedLockGuard),
/// Fast guard for Local locks using GlobalLockManager
Fast(FastLockGuard),
}
/// Wrapper for NamespaceLock that provides convenient lock acquisition methods
/// This wrapper holds the lock instance and resource information for easy lock acquisition
#[derive(Debug)]
pub struct NamespaceLockWrapper {
lock: NamespaceLock,
resource: ObjectKey,
owner: String,
}
impl NamespaceLockWrapper {
/// Create a new wrapper with the lock, resource, and owner
pub fn new(lock: NamespaceLock, resource: ObjectKey, owner: String) -> Self {
Self { lock, resource, owner }
}
/// Acquire write lock (exclusive lock) with timeout
/// Returns the guard if acquisition succeeds, or an error if it fails
pub async fn get_write_lock(&self, timeout: Duration) -> std::result::Result<NamespaceLockGuard, crate::error::LockError> {
self.lock.get_write_lock(self.resource.clone(), &self.owner, timeout).await
}
/// Acquire read lock (shared lock) with timeout
/// Returns the guard if acquisition succeeds, or an error if it fails
pub async fn get_read_lock(&self, timeout: Duration) -> std::result::Result<NamespaceLockGuard, crate::error::LockError> {
self.lock.get_read_lock(self.resource.clone(), &self.owner, timeout).await
}
}
impl NamespaceLockGuard {
/// Get the lock ID if available (only for Standard guards)
pub fn lock_id(&self) -> Option<&LockId> {
match self {
Self::Standard(guard) => Some(guard.lock_id()),
Self::Fast(_) => None,
}
}
/// Get the object key if available (only for Fast guards)
pub fn key(&self) -> Option<&ObjectKey> {
match self {
Self::Standard(_) => None,
Self::Fast(guard) => Some(guard.key()),
}
}
/// Manually release the lock early
pub fn release(&mut self) -> bool {
match self {
Self::Standard(guard) => {
// DistributedLockGuard::release() actually releases the lock and then disarms
guard.release()
}
Self::Fast(guard) => guard.release(),
}
}
/// Check if the lock has been released
pub fn is_released(&self) -> bool {
match self {
Self::Standard(guard) => {
// Check if the guard has been disarmed, which indicates the lock was released
guard.is_disarmed()
}
Self::Fast(guard) => guard.is_released(),
}
}
}
/// Namespace lock for managing locks by resource namespaces
/// Supports DistributedLock and LocalLock
#[derive(Debug)]
pub enum NamespaceLock {
/// Distributed lock (distributed use case)
Distributed(DistributedLock),
/// Local lock using GlobalLockManager (high-performance local locking)
Local(LocalLock),
}
impl NamespaceLock {
/// Create new namespace lock with single client (local use case)
/// Uses DistributedLock with quorum=1 for single client
pub fn new(namespace: String, client: Arc<dyn LockClient>) -> Self {
Self::Distributed(DistributedLock::new(namespace, vec![client], 1))
}
/// Create namespace lock with client (compatibility)
/// Uses DistributedLock with quorum=1 for single client
pub fn with_client(client: Arc<dyn LockClient>) -> Self {
Self::Distributed(DistributedLock::new("default".to_string(), vec![client], 1))
}
/// Create namespace lock with GlobalLockManager (high-performance local locking)
pub fn with_local_manager(namespace: String, manager: Arc<crate::GlobalLockManager>) -> Self {
Self::Local(LocalLock::new(namespace, manager))
}
/// Create namespace lock with clients
/// Uses DistributedLock with appropriate quorum
pub fn with_clients(namespace: String, clients: Vec<Arc<dyn LockClient>>) -> Self {
// Multiple clients: use DistributedLock with majority quorum
let quorum = if clients.len() > 1 { (clients.len() / 2) + 1 } else { 1 };
Self::Distributed(DistributedLock::new(namespace, clients, quorum))
}
/// Create namespace lock with clients and an explicit quorum size.
/// Quorum will be clamped into [1, clients.len()].
pub fn with_clients_and_quorum(namespace: String, clients: Vec<Arc<dyn LockClient>>, quorum: usize) -> Self {
Self::Distributed(DistributedLock::new(namespace, clients, quorum))
}
/// Get namespace identifier
pub fn namespace(&self) -> &str {
match self {
Self::Distributed(lock) => lock.namespace(),
Self::Local(lock) => lock.namespace(),
}
}
/// Get resource key for this namespace
pub fn get_resource_key(&self, resource: &ObjectKey) -> String {
match self {
Self::Distributed(lock) => lock.get_resource_key(resource),
Self::Local(lock) => lock.get_resource_key(resource),
}
}
/// Acquire a lock and return a RAII guard that will release asynchronously on Drop.
/// This is a thin wrapper around `acquire_lock` and will only create a guard when acquisition succeeds.
pub async fn acquire_guard(&self, request: &LockRequest) -> Result<Option<NamespaceLockGuard>> {
match self {
Self::Distributed(lock) => lock
.acquire_guard(request)
.await
.map(|opt| opt.map(NamespaceLockGuard::Standard)),
Self::Local(lock) => lock.acquire_guard(request).await.map(|opt| opt.map(NamespaceLockGuard::Fast)),
}
}
/// Convenience: acquire exclusive lock as a guard
pub async fn lock_guard(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
ttl: Duration,
) -> Result<Option<NamespaceLockGuard>> {
match self {
Self::Distributed(lock) => lock
.lock_guard(resource, owner, timeout, ttl)
.await
.map(|opt| opt.map(NamespaceLockGuard::Standard)),
Self::Local(lock) => lock
.lock_guard(resource, owner, timeout, ttl)
.await
.map(|opt| opt.map(NamespaceLockGuard::Fast)),
}
}
/// Convenience: acquire shared lock as a guard
pub async fn rlock_guard(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
ttl: Duration,
) -> Result<Option<NamespaceLockGuard>> {
match self {
Self::Distributed(lock) => lock
.rlock_guard(resource, owner, timeout, ttl)
.await
.map(|opt| opt.map(NamespaceLockGuard::Standard)),
Self::Local(lock) => lock
.rlock_guard(resource, owner, timeout, ttl)
.await
.map(|opt| opt.map(NamespaceLockGuard::Fast)),
}
}
/// Acquire write lock (exclusive lock) with timeout
/// Returns the guard if acquisition succeeds, or an error if it fails
pub async fn get_write_lock(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
) -> std::result::Result<NamespaceLockGuard, crate::error::LockError> {
let ttl = crate::fast_lock::DEFAULT_LOCK_TIMEOUT;
let resource_str = format!("{}", resource);
match self.lock_guard(resource, owner, timeout, ttl).await {
Ok(Some(guard)) => Ok(guard),
Ok(None) => {
// None can mean timeout or other failure - check if it's a quorum error
// For distributed locks, quorum errors are already converted to LockError::QuorumNotReached
// So if we get None here, it's likely a timeout
Err(crate::error::LockError::timeout(resource_str, timeout))
}
Err(e) => Err(e),
}
}
/// Acquire read lock (shared lock) with timeout
/// Returns the guard if acquisition succeeds, or an error if it fails
pub async fn get_read_lock(
&self,
resource: ObjectKey,
owner: &str,
timeout: Duration,
) -> std::result::Result<NamespaceLockGuard, crate::error::LockError> {
let ttl = crate::fast_lock::DEFAULT_LOCK_TIMEOUT;
let resource_str = format!("{}", resource);
match self.rlock_guard(resource, owner, timeout, ttl).await {
Ok(Some(guard)) => Ok(guard),
Ok(None) => Err(crate::error::LockError::timeout(resource_str, timeout)),
Err(e) => Err(e),
}
}
/// Get health information
pub async fn get_health(&self) -> crate::types::HealthInfo {
let lock_stats = self.get_stats().await;
let namespace = self.namespace().to_string();
let mut health = crate::types::HealthInfo {
node_id: namespace.clone(),
lock_stats,
..Default::default()
};
match self {
Self::Distributed(lock) => {
// Check client status - parallelize async calls for better performance
let clients = lock.clients();
let client_checks: Vec<_> = clients.iter().map(|client| client.is_online()).collect();
let results = futures::future::join_all(client_checks).await;
let connected_clients = results.iter().filter(|&&online| online).count();
let quorum = if clients.len() > 1 { (clients.len() / 2) + 1 } else { 1 };
health.status = if connected_clients >= quorum {
crate::types::HealthStatus::Healthy
} else {
crate::types::HealthStatus::Degraded
};
health.connected_nodes = connected_clients;
health.total_nodes = clients.len();
}
Self::Local(_) => {
// Local locks are always healthy (they use GlobalLockManager which is always available)
health.status = crate::types::HealthStatus::Healthy;
health.connected_nodes = 1;
health.total_nodes = 1;
}
}
health
}
/// Get namespace statistics
pub async fn get_stats(&self) -> crate::types::LockStats {
let mut stats = crate::types::LockStats::default();
match self {
Self::Distributed(lock) => {
// Parallelize stats collection for better performance
let stats_futures: Vec<_> = lock.clients().iter().map(|client| client.get_stats()).collect();
let results = futures::future::join_all(stats_futures).await;
for result in results {
match result {
Ok(client_stats) => {
stats.successful_acquires += client_stats.successful_acquires;
stats.failed_acquires += client_stats.failed_acquires;
}
Err(e) => {
tracing::debug!("Failed to get stats from client: {}", e);
}
}
}
}
Self::Local(_) => {
// Local locks use GlobalLockManager which doesn't expose detailed stats
// Stats are tracked internally but not exposed through the same interface
// We leave stats at default (0) for now
}
}
stats
}
}
impl Default for NamespaceLock {
fn default() -> Self {
use crate::client::ClientFactory;
Self::new("default".to_string(), ClientFactory::create_local())
}
}

View File

@@ -0,0 +1,370 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::*;
use crate::GlobalLockManager;
use crate::client::{ClientFactory, local::LocalClient};
use crate::types::LockType;
use std::sync::Arc;
use std::time::Duration;
fn create_test_object_key(bucket: &str, object: &str) -> ObjectKey {
ObjectKey {
bucket: Arc::from(bucket),
object: Arc::from(object),
version: None,
}
}
#[tokio::test]
async fn test_namespace_lock_new() {
let client = ClientFactory::create_local();
let lock = NamespaceLock::new("test-namespace".to_string(), client);
assert_eq!(lock.namespace(), "test-namespace");
}
#[tokio::test]
async fn test_namespace_lock_with_client() {
let client = ClientFactory::create_local();
let lock = NamespaceLock::with_client(client);
assert_eq!(lock.namespace(), "default");
}
#[tokio::test]
async fn test_namespace_lock_with_local_manager() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("local-ns".to_string(), manager);
assert_eq!(lock.namespace(), "local-ns");
}
#[tokio::test]
async fn test_namespace_lock_with_clients() {
let clients = vec![ClientFactory::create_local(), ClientFactory::create_local()];
let lock = NamespaceLock::with_clients("multi-client".to_string(), clients);
assert_eq!(lock.namespace(), "multi-client");
}
#[tokio::test]
async fn test_namespace_lock_get_resource_key() {
let client = ClientFactory::create_local();
let lock = NamespaceLock::new("test-ns".to_string(), client);
let resource = create_test_object_key("bucket", "object");
let key = lock.get_resource_key(&resource);
assert!(key.contains("test-ns"));
assert!(key.contains("bucket"));
assert!(key.contains("object"));
}
#[tokio::test]
async fn test_namespace_lock_acquire_guard_local() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("test-local".to_string(), manager);
let resource = create_test_object_key("bucket", "object");
let request = LockRequest::new(resource.clone(), LockType::Exclusive, "owner1")
.with_acquire_timeout(Duration::from_secs(5))
.with_ttl(Duration::from_secs(30));
let guard_opt = lock.acquire_guard(&request).await.unwrap();
assert!(guard_opt.is_some());
if let Some(NamespaceLockGuard::Fast(guard)) = guard_opt {
assert_eq!(guard.key(), &resource);
assert!(!guard.is_released());
// Test release
let mut guard = guard;
assert!(guard.release());
assert!(guard.is_released());
} else {
panic!("Expected Fast guard");
}
}
#[tokio::test]
async fn test_namespace_lock_get_write_lock_local() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("test-write".to_string(), manager);
let resource = create_test_object_key("bucket", "object");
let guard = lock
.get_write_lock(resource.clone(), "owner1", Duration::from_secs(5))
.await
.unwrap();
match guard {
NamespaceLockGuard::Fast(guard) => {
assert_eq!(guard.key(), &resource);
assert!(!guard.is_released());
}
NamespaceLockGuard::Standard(_) => {
panic!("Expected Fast guard for local lock");
}
}
}
#[tokio::test]
async fn test_namespace_lock_get_read_lock_local() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("test-read".to_string(), manager);
let resource = create_test_object_key("bucket", "object");
let guard = lock
.get_read_lock(resource.clone(), "owner1", Duration::from_secs(5))
.await
.unwrap();
match guard {
NamespaceLockGuard::Fast(guard) => {
assert_eq!(guard.key(), &resource);
assert!(!guard.is_released());
}
NamespaceLockGuard::Standard(_) => {
panic!("Expected Fast guard for local lock");
}
}
}
#[tokio::test]
async fn test_namespace_lock_guard_release() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("test-release".to_string(), manager);
let resource = create_test_object_key("bucket", "object");
let mut guard = lock.get_write_lock(resource, "owner1", Duration::from_secs(5)).await.unwrap();
assert!(!guard.is_released());
assert!(guard.release());
assert!(guard.is_released());
}
#[tokio::test]
async fn test_namespace_lock_wrapper() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("wrapper-test".to_string(), manager);
let resource = create_test_object_key("bucket", "object");
let wrapper = NamespaceLockWrapper::new(lock, resource.clone(), "owner1".to_string());
let guard = wrapper.get_write_lock(Duration::from_secs(5)).await.unwrap();
match guard {
NamespaceLockGuard::Fast(guard) => {
assert_eq!(guard.key(), &resource);
}
_ => panic!("Expected Fast guard"),
}
}
#[tokio::test]
async fn test_namespace_lock_get_health_local() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("health-test".to_string(), manager);
let health = lock.get_health().await;
assert_eq!(health.node_id, "health-test");
assert_eq!(health.status, crate::types::HealthStatus::Healthy);
assert_eq!(health.connected_nodes, 1);
assert_eq!(health.total_nodes, 1);
}
#[tokio::test]
async fn test_namespace_lock_get_stats_local() {
let manager = Arc::new(GlobalLockManager::new());
let lock = NamespaceLock::with_local_manager("stats-test".to_string(), manager);
let stats = lock.get_stats().await;
// Local locks don't expose detailed stats, so defaults should be 0
assert_eq!(stats.successful_acquires, 0);
assert_eq!(stats.failed_acquires, 0);
}
#[tokio::test]
async fn test_namespace_lock_default() {
let lock = NamespaceLock::default();
assert_eq!(lock.namespace(), "default");
}
#[tokio::test]
async fn test_namespace_lock_guard_lock_id() {
let client = ClientFactory::create_local();
let lock = NamespaceLock::new("test-id".to_string(), client);
let resource = create_test_object_key("bucket", "object");
let request = LockRequest::new(resource, LockType::Exclusive, "owner1")
.with_acquire_timeout(Duration::from_secs(5))
.with_ttl(Duration::from_secs(30));
if let Some(NamespaceLockGuard::Standard(guard)) = lock.acquire_guard(&request).await.unwrap() {
// lock_id() returns &LockId, not Option, so we just check it's not empty
let lock_id = guard.lock_id();
assert!(!lock_id.uuid.is_empty());
}
}
#[tokio::test]
async fn test_namespace_lock_distributed_multi_node_simulation() {
// Simulate a 3-node distributed environment where each node has its own lock backend
let manager1 = Arc::new(GlobalLockManager::new());
let manager2 = Arc::new(GlobalLockManager::new());
let manager3 = Arc::new(GlobalLockManager::new());
// Create 3 clients, each bound to its own manager (simulating independent nodes)
let client1: Arc<dyn LockClient> = Arc::new(LocalClient::with_manager(manager1));
let client2: Arc<dyn LockClient> = Arc::new(LocalClient::with_manager(manager2));
let client3: Arc<dyn LockClient> = Arc::new(LocalClient::with_manager(manager3));
let clients = vec![client1, client2, client3];
// Create NamespaceLock with 3 clients (quorum will be 2)
let lock = NamespaceLock::with_clients("multi-node".to_string(), clients);
assert_eq!(lock.namespace(), "multi-node");
let resource = create_test_object_key("test-bucket", "test-object");
// Test 1: Owner A acquires write lock successfully
let mut guard_a = lock
.get_write_lock(resource.clone(), "owner-a", Duration::from_secs(5))
.await
.expect("Owner A should acquire write lock");
// Verify it's a Standard guard (DistributedLock path)
match &guard_a {
NamespaceLockGuard::Standard(_) => {
// Expected for distributed lock
}
NamespaceLockGuard::Fast(_) => {
panic!("Expected Standard guard for distributed lock");
}
}
// Test 2: Owner B tries to acquire write lock while A holds it - should fail
// Since all 3 backends are holding locks from owner-a, owner-b cannot acquire on any backend
// This means 0 successes < quorum(2), so acquisition should fail
let result_b = lock
.get_write_lock(resource.clone(), "owner-b", Duration::from_millis(100))
.await;
assert!(result_b.is_err(), "Owner B should fail to acquire lock while owner A holds it");
// Verify the error is a timeout or quorum failure (since quorum cannot be reached)
if let Err(err) = result_b {
// The error should indicate timeout or quorum failure
let err_str = err.to_string().to_lowercase();
assert!(
err_str.contains("timeout") || err_str.contains("quorum") || err_str.contains("not reached"),
"Error should be timeout or quorum related, got: {}",
err
);
}
// Test 3: Release owner A's lock
assert!(guard_a.release(), "Should release guard_a successfully");
assert!(guard_a.is_released(), "Guard A should be marked as released");
// Test 4: Owner B should now be able to acquire the lock
let guard_b = lock
.get_write_lock(resource.clone(), "owner-b", Duration::from_secs(5))
.await
.expect("Owner B should acquire write lock after A releases");
match &guard_b {
NamespaceLockGuard::Standard(_) => {
// Expected for distributed lock
}
NamespaceLockGuard::Fast(_) => {
panic!("Expected Standard guard for distributed lock");
}
}
// Test 5: Verify health check shows 3 nodes
let health = lock.get_health().await;
assert_eq!(health.node_id, "multi-node");
assert_eq!(health.total_nodes, 3);
assert_eq!(health.connected_nodes, 3);
assert_eq!(health.status, crate::types::HealthStatus::Healthy);
// Cleanup
drop(guard_b);
}
#[tokio::test]
async fn test_namespace_lock_distributed_with_clients_and_quorum() {
// Same 3-node setup as multi-node simulation; use explicit quorum via with_clients_and_quorum
let manager1 = Arc::new(GlobalLockManager::new());
let manager2 = Arc::new(GlobalLockManager::new());
let manager3 = Arc::new(GlobalLockManager::new());
let client1: Arc<dyn LockClient> = Arc::new(LocalClient::with_manager(manager1));
let client2: Arc<dyn LockClient> = Arc::new(LocalClient::with_manager(manager2));
let client3: Arc<dyn LockClient> = Arc::new(LocalClient::with_manager(manager3));
let clients = vec![client1, client2, client3];
// Create NamespaceLock with explicit quorum=2 (same as with_clients default for 3 nodes)
let lock = NamespaceLock::with_clients_and_quorum("multi-node".to_string(), clients, 2);
assert_eq!(lock.namespace(), "multi-node");
let resource = create_test_object_key("test-bucket", "test-object");
// Owner A acquires write lock successfully
let mut guard_a = lock
.get_write_lock(resource.clone(), "owner-a", Duration::from_secs(5))
.await
.expect("Owner A should acquire write lock");
match &guard_a {
NamespaceLockGuard::Standard(_) => {}
NamespaceLockGuard::Fast(_) => panic!("Expected Standard guard for distributed lock"),
}
// Owner B tries to acquire while A holds it - should fail (quorum not reached)
let result_b = lock
.get_write_lock(resource.clone(), "owner-b", Duration::from_millis(100))
.await;
assert!(result_b.is_err(), "Owner B should fail to acquire lock while owner A holds it");
if let Err(err) = result_b {
let err_str = err.to_string().to_lowercase();
assert!(
err_str.contains("timeout") || err_str.contains("quorum") || err_str.contains("not reached"),
"Error should be timeout or quorum related, got: {}",
err
);
}
// Release owner A's lock
assert!(guard_a.release(), "Should release guard_a successfully");
assert!(guard_a.is_released(), "Guard A should be marked as released");
// Owner B should now acquire the lock
let guard_b = lock
.get_write_lock(resource.clone(), "owner-b", Duration::from_secs(5))
.await
.expect("Owner B should acquire write lock after A releases");
match &guard_b {
NamespaceLockGuard::Standard(_) => {}
NamespaceLockGuard::Fast(_) => panic!("Expected Standard guard for distributed lock"),
}
// Health check: 3 nodes, Healthy
let health = lock.get_health().await;
assert_eq!(health.node_id, "multi-node");
assert_eq!(health.total_nodes, 3);
assert_eq!(health.connected_nodes, 3);
assert_eq!(health.status, crate::types::HealthStatus::Healthy);
drop(guard_b);
}

View File

@@ -16,6 +16,8 @@ use serde::{Deserialize, Serialize};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use uuid::Uuid;
use crate::ObjectKey;
/// Lock type enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum LockType {
@@ -56,7 +58,7 @@ pub struct LockInfo {
/// Unique identifier for the lock
pub id: LockId,
/// Resource path
pub resource: String,
pub resource: ObjectKey,
/// Lock type
pub lock_type: LockType,
/// Lock status
@@ -102,56 +104,27 @@ impl LockInfo {
/// Lock ID type
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct LockId {
pub resource: String,
pub resource: ObjectKey,
pub uuid: String,
}
impl LockId {
/// Generate new lock ID for a resource
pub fn new(resource: &str) -> Self {
pub fn new(resource: ObjectKey) -> Self {
Self {
resource: resource.to_string(),
resource,
uuid: Uuid::new_v4().to_string(),
}
}
/// Generate deterministic lock ID for a resource (same resource = same ID)
pub fn new_deterministic(resource: &str) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
resource.hash(&mut hasher);
let hash = hasher.finish();
/// Generate unique lock ID for a resource
/// Each call generates a different ID, even for the same resource
pub fn new_unique(resource: &ObjectKey) -> Self {
// Use UUID v4 (random) to ensure uniqueness
// Each call generates a new unique ID regardless of the resource
Self {
resource: resource.to_string(),
uuid: format!("{hash:016x}"),
}
}
/// Create lock ID from resource and uuid
pub fn from_parts(resource: impl Into<String>, uuid: impl Into<String>) -> Self {
Self {
resource: resource.into(),
uuid: uuid.into(),
}
}
/// Create lock ID from string (for compatibility, expects "resource:uuid")
pub fn from_string(id: impl Into<String>) -> Self {
let s = id.into();
if let Some((resource, uuid)) = s.split_once(":") {
Self {
resource: resource.to_string(),
uuid: uuid.to_string(),
}
} else {
// fallback: treat as uuid only
Self {
resource: "unknown".to_string(),
uuid: s,
}
resource: resource.clone(),
uuid: Uuid::new_v4().to_string(),
}
}
@@ -163,7 +136,7 @@ impl LockId {
impl Default for LockId {
fn default() -> Self {
Self::new("default")
Self::new(ObjectKey::new("default", "default"))
}
}
@@ -237,7 +210,7 @@ pub struct LockRequest {
/// Lock ID
pub lock_id: LockId,
/// Resource path
pub resource: String,
pub resource: ObjectKey,
/// Lock type
pub lock_type: LockType,
/// Lock owner
@@ -256,11 +229,10 @@ pub struct LockRequest {
impl LockRequest {
/// Create new lock request
pub fn new(resource: impl Into<String>, lock_type: LockType, owner: impl Into<String>) -> Self {
let resource_str = resource.into();
pub fn new(resource: ObjectKey, lock_type: LockType, owner: impl Into<String>) -> Self {
Self {
lock_id: LockId::new_deterministic(&resource_str),
resource: resource_str,
lock_id: LockId::new_unique(&resource),
resource,
lock_type,
owner: owner.into(),
acquire_timeout: Duration::from_secs(10), // Default 10 seconds to acquire
@@ -611,95 +583,3 @@ impl WaitQueueItem {
self.wait_start_time.elapsed().unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lock_id() {
let id1 = LockId::new("test-resource");
let id2 = LockId::new("test-resource");
assert_ne!(id1, id2);
let id3 = LockId::from_string("test-resource:test-uuid");
assert_eq!(id3.as_str(), "test-resource:test-uuid");
}
#[test]
fn test_lock_metadata() {
let metadata = LockMetadata::new()
.with_client_info("test-client")
.with_operation_id("test-op")
.with_priority(1)
.with_tag("key", "value");
assert_eq!(metadata.client_info, Some("test-client".to_string()));
assert_eq!(metadata.operation_id, Some("test-op".to_string()));
assert_eq!(metadata.priority, Some(1));
assert_eq!(metadata.tags.get("key"), Some(&"value".to_string()));
}
#[test]
fn test_lock_request() {
let request = LockRequest::new("test-resource", LockType::Exclusive, "test-owner")
.with_acquire_timeout(Duration::from_secs(60))
.with_priority(LockPriority::High)
.with_deadlock_detection(true);
assert_eq!(request.resource, "test-resource");
assert_eq!(request.lock_type, LockType::Exclusive);
assert_eq!(request.owner, "test-owner");
assert_eq!(request.acquire_timeout, Duration::from_secs(60));
assert_eq!(request.priority, LockPriority::High);
assert!(request.deadlock_detection);
}
#[test]
fn test_lock_response() {
let lock_info = LockInfo {
id: LockId::new("test-resource"),
resource: "test".to_string(),
lock_type: LockType::Exclusive,
status: LockStatus::Acquired,
owner: "test".to_string(),
acquired_at: SystemTime::now(),
expires_at: SystemTime::now() + Duration::from_secs(30),
last_refreshed: SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
};
let success = LockResponse::success(lock_info.clone(), Duration::ZERO);
assert!(success.is_success());
let failure = LockResponse::failure("error", Duration::ZERO);
assert!(failure.is_failure());
let waiting = LockResponse::waiting(Duration::ZERO, 1);
assert!(waiting.is_waiting());
}
#[test]
fn test_timestamp_conversion() {
let now = SystemTime::now();
let timestamp = system_time_to_timestamp(now);
let converted = timestamp_to_system_time(timestamp);
// Allow for small time differences
let diff = now.duration_since(converted).unwrap_or(Duration::ZERO);
assert!(diff < Duration::from_secs(1));
}
#[test]
fn test_serialization() {
let request = LockRequest::new("test", LockType::Exclusive, "owner");
let serialized = serde_json::to_string(&request).unwrap();
let deserialized: LockRequest = serde_json::from_str(&serialized).unwrap();
assert_eq!(request.resource, deserialized.resource);
assert_eq!(request.lock_type, deserialized.lock_type);
assert_eq!(request.owner, deserialized.owner);
}
}

View File

@@ -638,6 +638,9 @@ pub struct GenerallyLockResponse {
pub success: bool,
#[prost(string, optional, tag = "2")]
pub error_info: ::core::option::Option<::prost::alloc::string::String>,
/// JSON serialized LockInfo
#[prost(string, optional, tag = "3")]
pub lock_info: ::core::option::Option<::prost::alloc::string::String>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Mss {
@@ -1707,36 +1710,6 @@ pub mod node_service_client {
.insert(GrpcMethod::new("node_service.NodeService", "UnLock"));
self.inner.unary(req, path, codec).await
}
pub async fn r_lock(
&mut self,
request: impl tonic::IntoRequest<super::GenerallyLockRequest>,
) -> std::result::Result<tonic::Response<super::GenerallyLockResponse>, tonic::Status> {
self.inner
.ready()
.await
.map_err(|e| tonic::Status::unknown(format!("Service was not ready: {}", e.into())))?;
let codec = tonic_prost::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static("/node_service.NodeService/RLock");
let mut req = request.into_request();
req.extensions_mut()
.insert(GrpcMethod::new("node_service.NodeService", "RLock"));
self.inner.unary(req, path, codec).await
}
pub async fn r_un_lock(
&mut self,
request: impl tonic::IntoRequest<super::GenerallyLockRequest>,
) -> std::result::Result<tonic::Response<super::GenerallyLockResponse>, tonic::Status> {
self.inner
.ready()
.await
.map_err(|e| tonic::Status::unknown(format!("Service was not ready: {}", e.into())))?;
let codec = tonic_prost::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static("/node_service.NodeService/RUnLock");
let mut req = request.into_request();
req.extensions_mut()
.insert(GrpcMethod::new("node_service.NodeService", "RUnLock"));
self.inner.unary(req, path, codec).await
}
pub async fn force_un_lock(
&mut self,
request: impl tonic::IntoRequest<super::GenerallyLockRequest>,
@@ -2480,14 +2453,6 @@ pub mod node_service_server {
&self,
request: tonic::Request<super::GenerallyLockRequest>,
) -> std::result::Result<tonic::Response<super::GenerallyLockResponse>, tonic::Status>;
async fn r_lock(
&self,
request: tonic::Request<super::GenerallyLockRequest>,
) -> std::result::Result<tonic::Response<super::GenerallyLockResponse>, tonic::Status>;
async fn r_un_lock(
&self,
request: tonic::Request<super::GenerallyLockRequest>,
) -> std::result::Result<tonic::Response<super::GenerallyLockResponse>, tonic::Status>;
async fn force_un_lock(
&self,
request: tonic::Request<super::GenerallyLockRequest>,
@@ -3752,62 +3717,6 @@ pub mod node_service_server {
};
Box::pin(fut)
}
"/node_service.NodeService/RLock" => {
#[allow(non_camel_case_types)]
struct RLockSvc<T: NodeService>(pub Arc<T>);
impl<T: NodeService> tonic::server::UnaryService<super::GenerallyLockRequest> for RLockSvc<T> {
type Response = super::GenerallyLockResponse;
type Future = BoxFuture<tonic::Response<Self::Response>, tonic::Status>;
fn call(&mut self, request: tonic::Request<super::GenerallyLockRequest>) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move { <T as NodeService>::r_lock(&inner, request).await };
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = RLockSvc(inner);
let codec = tonic_prost::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(accept_compression_encodings, send_compression_encodings)
.apply_max_message_size_config(max_decoding_message_size, max_encoding_message_size);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/node_service.NodeService/RUnLock" => {
#[allow(non_camel_case_types)]
struct RUnLockSvc<T: NodeService>(pub Arc<T>);
impl<T: NodeService> tonic::server::UnaryService<super::GenerallyLockRequest> for RUnLockSvc<T> {
type Response = super::GenerallyLockResponse;
type Future = BoxFuture<tonic::Response<Self::Response>, tonic::Status>;
fn call(&mut self, request: tonic::Request<super::GenerallyLockRequest>) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move { <T as NodeService>::r_un_lock(&inner, request).await };
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = RUnLockSvc(inner);
let codec = tonic_prost::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(accept_compression_encodings, send_compression_encodings)
.apply_max_message_size_config(max_decoding_message_size, max_encoding_message_size);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/node_service.NodeService/ForceUnLock" => {
#[allow(non_camel_case_types)]
struct ForceUnLockSvc<T: NodeService>(pub Arc<T>);

View File

@@ -445,6 +445,7 @@ message GenerallyLockRequest {
message GenerallyLockResponse {
bool success = 1;
optional string error_info = 2;
optional string lock_info = 3; // JSON serialized LockInfo
}
message Mss {
@@ -813,8 +814,6 @@ service NodeService {
rpc Lock(GenerallyLockRequest) returns (GenerallyLockResponse) {};
rpc UnLock(GenerallyLockRequest) returns (GenerallyLockResponse) {};
rpc RLock(GenerallyLockRequest) returns (GenerallyLockResponse) {};
rpc RUnLock(GenerallyLockRequest) returns (GenerallyLockResponse) {};
rpc ForceUnLock(GenerallyLockRequest) returns (GenerallyLockResponse) {};
rpc Refresh(GenerallyLockRequest) returns (GenerallyLockResponse) {};

View File

@@ -521,7 +521,7 @@ impl DataUsageEntry {
self.obj_sizes.add(summary.total_size as u64);
self.obj_versions.add(summary.versions as u64);
let replication_stats = self.replication_stats.get_or_insert(ReplicationAllStats::default());
let replication_stats = self.replication_stats.get_or_insert_with(ReplicationAllStats::default);
replication_stats.replica_size += summary.replica_size as u64;
replication_stats.replica_count += summary.replica_count as u64;

View File

@@ -125,12 +125,28 @@ pub async fn save_background_heal_info(storeapi: Arc<ECStore>, info: BackgroundH
}
}
/// Get lock acquire timeout from environment variable RUSTFS_LOCK_ACQUIRE_TIMEOUT (in seconds)
/// Defaults to 5 seconds if not set or invalid
/// For distributed environments with multiple nodes, a longer timeout may be needed
fn get_lock_acquire_timeout() -> Duration {
Duration::from_secs(rustfs_utils::get_env_u64("RUSTFS_LOCK_ACQUIRE_TIMEOUT", 5))
}
pub async fn run_data_scanner(ctx: CancellationToken, storeapi: Arc<ECStore>) -> Result<(), ScannerError> {
// TODO: leader lock
// Acquire leader lock (write lock) to ensure only one scanner runs
let _guard = match storeapi.new_ns_lock(RUSTFS_META_BUCKET, "leader.lock").await {
Ok(guard) => guard,
Ok(ns_lock) => match ns_lock.get_write_lock(get_lock_acquire_timeout()).await {
Ok(guard) => {
debug!("run_data_scanner: acquired leader write lock");
guard
}
Err(e) => {
debug!("run_data_scanner: other node is running, failed to acquire leader write lock: {:?}", e);
return Ok(());
}
},
Err(e) => {
error!("run_data_scanner: other node is running, failed to acquire leader lock: {e}");
error!("run_data_scanner: failed to create namespace lock: {e}");
return Ok(());
}
};
@@ -223,6 +239,9 @@ pub async fn run_data_scanner(ctx: CancellationToken, storeapi: Arc<ECStore>) ->
}
global_metrics().set_cycle(None).await;
debug!("Data scanner done");
Ok(())
}

View File

@@ -39,7 +39,7 @@ workspace = true
[dependencies]
# RustFS Internal Crates
rustfs-ahm = { workspace = true }
rustfs-heal = { workspace = true }
rustfs-appauth = { workspace = true }
rustfs-audit = { workspace = true }
rustfs-common = { workspace = true }

View File

@@ -36,9 +36,9 @@ use crate::server::{
};
use clap::Parser;
use license::init_license;
use rustfs_ahm::{create_ahm_services_cancel_token, heal::storage::ECStoreHealStorage, init_heal_manager, shutdown_ahm_services};
use rustfs_common::{GlobalReadiness, SystemStage, set_global_addr};
use rustfs_credentials::init_global_action_credentials;
use rustfs_ecstore::store::init_lock_clients;
use rustfs_ecstore::{
StorageAPI,
bucket::metadata_sys::init_bucket_metadata_sys,
@@ -54,6 +54,9 @@ use rustfs_ecstore::{
store_api::BucketOptions,
update_erasure_type,
};
use rustfs_heal::{
create_ahm_services_cancel_token, heal::storage::ECStoreHealStorage, init_heal_manager, shutdown_ahm_services,
};
use rustfs_iam::init_iam_sys;
use rustfs_obs::{init_obs, set_global_guard};
use rustfs_scanner::init_data_scanner;
@@ -183,6 +186,14 @@ async fn run(opt: config::Opt) -> Result<()> {
.await
.map_err(Error::other)?;
set_global_endpoints(endpoint_pools.as_ref().clone());
update_erasure_type(setup_type).await;
// Initialize the local disk
init_local_disks(endpoint_pools.clone()).await.map_err(Error::other)?;
// Initialize the lock clients
init_lock_clients(endpoint_pools.clone());
for (i, eps) in endpoint_pools.as_ref().iter().enumerate() {
info!(
target: "rustfs::main::run",
@@ -236,12 +247,6 @@ async fn run(opt: config::Opt) -> Result<()> {
None
};
set_global_endpoints(endpoint_pools.as_ref().clone());
update_erasure_type(setup_type).await;
// Initialize the local disk
init_local_disks(endpoint_pools.clone()).await.map_err(Error::other)?;
let ctx = CancellationToken::new();
// init store

View File

@@ -111,6 +111,8 @@ use rustfs_targets::{
EventName,
arn::{ARN, TargetIDError},
};
use rustfs_utils::http::RUSTFS_FORCE_DELETE;
use rustfs_utils::string::parse_bool;
use rustfs_utils::{
CompressionAlgorithm, extract_params_header, extract_resp_elements, get_request_host, get_request_port,
get_request_user_agent,
@@ -1357,19 +1359,37 @@ impl S3 for FS {
/// Delete a bucket
#[instrument(level = "debug", skip(self, req))]
async fn delete_bucket(&self, req: S3Request<DeleteBucketInput>) -> S3Result<S3Response<DeleteBucketOutput>> {
async fn delete_bucket(&self, mut req: S3Request<DeleteBucketInput>) -> S3Result<S3Response<DeleteBucketOutput>> {
let helper = OperationHelper::new(&req, EventName::BucketRemoved, "s3:DeleteBucket");
let input = req.input;
let input = req.input.clone();
// TODO: DeleteBucketInput doesn't have force parameter?
let Some(store) = new_object_layer_fn() else {
return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string()));
};
// get value from header, support mc style
let force_str = req
.headers
.get(RUSTFS_FORCE_DELETE)
.map(|v| v.to_str().unwrap_or_default())
.unwrap_or(
req.headers
.get("x-minio-force-delete")
.map(|v| v.to_str().unwrap_or_default())
.unwrap_or_default(),
);
let force = parse_bool(force_str).unwrap_or_default();
if force {
authorize_request(&mut req, Action::S3Action(S3Action::ForceDeleteBucketAction)).await?;
}
store
.delete_bucket(
&input.bucket,
&DeleteBucketOptions {
force: false,
force,
..Default::default()
},
)
@@ -4504,8 +4524,6 @@ impl S3 for FS {
sse_customer_key_md5,
ssekms_key_id,
content_md5,
if_match,
if_none_match,
..
} = input;
@@ -4540,46 +4558,6 @@ impl S3 for FS {
}
}
if if_match.is_some() || if_none_match.is_some() {
let Some(store) = new_object_layer_fn() else {
return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string()));
};
match store.get_object_info(&bucket, &key, &ObjectOptions::default()).await {
Ok(info) => {
if !info.delete_marker {
if let Some(ifmatch) = if_match
&& let Some(strong_etag) = ifmatch.into_etag()
&& info
.etag
.as_ref()
.is_some_and(|etag| ETag::Strong(etag.clone()) != strong_etag)
{
return Err(s3_error!(PreconditionFailed));
}
if let Some(ifnonematch) = if_none_match
&& let Some(strong_etag) = ifnonematch.into_etag()
&& info
.etag
.as_ref()
.is_some_and(|etag| ETag::Strong(etag.clone()) == strong_etag)
{
return Err(s3_error!(PreconditionFailed));
}
}
}
Err(err) => {
if !is_err_object_not_found(&err) && !is_err_version_not_found(&err) {
return Err(ApiError::from(err).into());
}
if if_match.is_some() && (is_err_object_not_found(&err) || is_err_version_not_found(&err)) {
return Err(ApiError::from(err).into());
}
}
}
}
let Some(body) = body else { return Err(s3_error!(IncompleteBody)) };
let mut size = match content_length {

View File

@@ -24,6 +24,7 @@ use rustfs_ecstore::{
DeleteOptions, DiskAPI, DiskInfoOptions, DiskStore, FileInfoVersions, ReadMultipleReq, ReadOptions, UpdateMetadataOpts,
error::DiskError,
},
get_global_lock_client,
metrics_realtime::{CollectMetricsOpts, MetricType, collect_local_metrics},
new_object_layer_fn,
rpc::{LocalPeerS3Client, PeerS3Client},
@@ -74,16 +75,11 @@ type ResponseStream<T> = Pin<Box<dyn Stream<Item = Result<T, Status>> + Send>>;
#[derive(Debug)]
pub struct NodeService {
local_peer: LocalPeerS3Client,
lock_manager: Arc<rustfs_lock::LocalClient>,
}
pub fn make_server() -> NodeService {
let local_peer = LocalPeerS3Client::new(None, None);
let lock_manager = Arc::new(rustfs_lock::LocalClient::new());
NodeService {
local_peer,
lock_manager,
}
NodeService { local_peer }
}
impl NodeService {
@@ -94,6 +90,12 @@ impl NodeService {
async fn all_disk(&self) -> Vec<String> {
all_local_disk_path().await
}
/// Get the global lock client, returning an error if not initialized
fn get_lock_client(&self) -> Result<Arc<dyn LockClient>, Status> {
get_global_lock_client()
.ok_or_else(|| Status::internal("Lock client not initialized. Please ensure storage is initialized first."))
}
}
#[tonic::async_trait]
@@ -1458,21 +1460,29 @@ impl Node for NodeService {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_manager.acquire_exclusive(&args).await {
Ok(result) => Ok(Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
})),
let lock_client = self.get_lock_client()?;
match lock_client.acquire_lock(&args).await {
Ok(result) => {
// Serialize lock_info if available
let lock_info_json = result.lock_info.as_ref().and_then(|info| serde_json::to_string(info).ok());
Ok(Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
lock_info: lock_info_json,
}))
}
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!(
"can not lock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
lock_info: None,
})),
}
}
@@ -1485,14 +1495,17 @@ impl Node for NodeService {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
let lock_client = self.get_lock_client()?;
match lock_client.release(&args.lock_id).await {
Ok(_) => Ok(Response::new(GenerallyLockResponse {
success: true,
error_info: None,
lock_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
@@ -1500,60 +1513,7 @@ impl Node for NodeService {
"can not unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn r_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.acquire_shared(&args).await {
Ok(result) => Ok(Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!(
"can not rlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn r_un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!(
"can not runlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
lock_info: None,
})),
}
}
@@ -1566,14 +1526,17 @@ impl Node for NodeService {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
let lock_client = self.get_lock_client()?;
match lock_client.release(&args.lock_id).await {
Ok(_) => Ok(Response::new(GenerallyLockResponse {
success: true,
error_info: None,
lock_info: None,
})),
Err(err) => Ok(Response::new(GenerallyLockResponse {
success: false,
@@ -1581,6 +1544,7 @@ impl Node for NodeService {
"can not force_unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
lock_info: None,
})),
}
}
@@ -1593,6 +1557,7 @@ impl Node for NodeService {
return Ok(Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
lock_info: None,
}));
}
};
@@ -1600,6 +1565,7 @@ impl Node for NodeService {
Ok(Response::new(GenerallyLockResponse {
success: true,
error_info: None,
lock_info: None,
}))
}
@@ -3209,38 +3175,6 @@ mod tests {
assert!(unlock_response.error_info.is_some());
}
#[tokio::test]
async fn test_r_lock_invalid_args() {
let service = create_test_node_service();
let request = Request::new(GenerallyLockRequest {
args: "invalid json".to_string(),
});
let response = service.r_lock(request).await;
assert!(response.is_ok());
let rlock_response = response.unwrap().into_inner();
assert!(!rlock_response.success);
assert!(rlock_response.error_info.is_some());
}
#[tokio::test]
async fn test_r_un_lock_invalid_args() {
let service = create_test_node_service();
let request = Request::new(GenerallyLockRequest {
args: "invalid json".to_string(),
});
let response = service.r_un_lock(request).await;
assert!(response.is_ok());
let runlock_response = response.unwrap().into_inner();
assert!(!runlock_response.success);
assert!(runlock_response.error_info.is_some());
}
#[tokio::test]
async fn test_force_un_lock_invalid_args() {
let service = create_test_node_service();