Compare commits

...

64 Commits

Author SHA1 Message Date
weisd
56fd8132e9 fix:#303 returns empty when querying an empty or not dir (#304) 2025-07-28 16:17:40 +08:00
guojidan
35daa74430 Merge pull request #302 from guojidan/lock
Lock: add transactional
2025-07-28 12:00:44 +08:00
junxiang Mu
dc156fb4cd Fix: clippy
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 11:38:42 +08:00
junxiang Mu
de905a878c Cargo: use workspace dependence
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 11:02:40 +08:00
junxiang Mu
f3252f989b Test: Add e2e test case for lock transactional
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 11:00:10 +08:00
junxiang Mu
01a2afca9a lock: Add transactional
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 10:59:43 +08:00
guojidan
a4fe68ad21 Merge pull request #301 from guojidan/improve-sql
s3Select: add unit test case
2025-07-28 09:56:10 +08:00
junxiang Mu
c03f86b23c s3Select: add unit test case
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 09:19:47 +08:00
guojidan
5667f324ae Merge pull request #297 from guojidan/improve-sql
Test: Add e2e_test case for sql && add script for e2e_test
2025-07-25 17:16:41 +08:00
junxiang Mu
bcd806796f Test: add test script for e2e
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-25 16:52:06 +08:00
junxiang Mu
612404c47f Test: add e2e_test for s3select
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-25 15:07:44 +08:00
guojidan
85388262b3 Merge pull request #294 from guojidan/improve-sql
Refactor: DatabaseManagerSystem as global
2025-07-25 08:33:54 +08:00
junxiang Mu
25a4503285 fix: fmt
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-25 08:18:14 +08:00
安正超
526c4d5a61 refactor: 优化构建工作流,统一 latest 文件处理和简化制品上传 (#293) 2025-07-25 01:10:04 +08:00
junxiang Mu
addc964d56 Refactor: DatabaseManagerSystem as global
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 17:12:51 +08:00
loverustfs
371119f733 GNU to MUSL modify Dockerfile 2025-07-24 16:36:15 +08:00
guojidan
021abc0398 Merge pull request #292 from guojidan/Arc
Chore: remove dirty file(cache.rs)
2025-07-24 16:32:20 +08:00
junxiang Mu
0672b6dd3e Chore: remove dirty file(cache.rs)
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 14:57:48 +08:00
guojidan
1372dc2857 Merge pull request #288 from guojidan/scanner
Refactor: Scanner
2025-07-24 14:42:54 +08:00
houseme
77bc9af109 Update Cargo.toml 2025-07-24 14:14:12 +08:00
junxiang Mu
91b1c84430 rebase
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:18:05 +08:00
junxiang Mu
b667927216 fix fmt
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:28 +08:00
junxiang Mu
29795fac51 fix Cargo.toml
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:28 +08:00
junxiang Mu
2ce7e01f55 Chore: remove dirty file(heal)
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:27 +08:00
junxiang Mu
4fefd63a5b rebase
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
2a8c46874d fix: auto heal when xl.meta lose
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
b8b5511b68 fix: heal data part lose
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
bdaee228db fix(ahm): adjust test expectations for missing xl.meta recovery scenario
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
d562620e99 fix: implement uses_data_dir method
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
69b0c828c9 fix: scanner add heal bucket
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
2bfd1efb9b Fix: fix add heal_manager into scanner when scanner start
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
0854e6b921 Chore: rename init_heal_manager_with_channel
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
b907f4e61b refactor(ahm): remove obsolete scanner/data_usage.rs after data usage refactor
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
6ec568459c chore: update admin handlers, lockfile, and minor fixes
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
ea210d52dc refactor(heal): unify heal request interface, add disk field, update ahm/ecstore/common for erasure set healing
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:03 +08:00
junxiang Mu
3d3c6e4e06 chore(protos): update proto definitions, remove ns_scanner, fix codegen and formatting
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
e7d0a8d4b9 feat: integrate global metrics system into AHM scanner
- Add global metrics system to common crate for cross-module usage
- Integrate global metrics collection into AHM scanner operations
- Update ECStore to use common metrics system instead of local implementation
- Add chrono dependency to AHM crate for timestamp handling
- Re-export IlmAction from common metrics in ECStore lifecycle module
- Update scanner methods to use global metrics for cycle, disk, and volume scans
- Maintain backward compatibility with local metrics collector
- Fix clippy warnings and ensure proper code formatting

This change enables unified metrics collection across the entire RustFS system,
allowing better monitoring and observability of scanner operations.

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
7d3b2b774c fix heal disk
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
aed8f52423 refactor: integrate disk healing into erasure set healing
- Remove HealType::Disk and related disk-specific healing methods
- Integrate disk format healing into heal_erasure_set with include_format_heal option
- Update auto disk scanner to use ErasureSet heal type instead of Disk heal
- Fix disk status change event handling to use ErasureSet heal requests
- Add proper bucket list retrieval for auto healing scenarios
- Update data scanner to submit ErasureSet heal tasks for offline disks
- Remove duplicate healing logic between Disk and ErasureSet types
- Ensure all healing operations go through unified ErasureSet healing path
2025-07-24 12:12:49 +08:00
junxiang Mu
c49414f6ac fix: resolve test conflicts and improve data scanner functionality
- Fix multi-threaded test conflicts in AHM heal integration tests
- Remove global environment sharing to prevent test state pollution
- Fix test_all_disk_method by clearing global disk map before test
- Improve data scanner and cache value implementations
- Update dependencies and resolve clippy warnings

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
8e766b90cd feat: implement heal channel mechanism for admin-ahm communication
- Add global unbounded channel in common crate for heal requests
- Implement channel processor in ahm to handle heal commands
- Add Start/Query/Cancel commands support via channel
- Integrate heal manager initialization in main.rs
- Replace direct MRF calls with channel-based heal requests in ecstore
- Support advanced heal options including pool_index and set_index
- Enable admin handlers to send heal requests via channel
2025-07-24 12:12:49 +08:00
junxiang Mu
3409cd8dff feat(ahm): add HealingTracker support & complete fresh-disk healing
• Introduce ecstore HealingTracker into ahm crate; load/init/save tracker
• Re-implement heal_fresh_disk to use heal_erasure_set with tracker
• Enhance auto-disk scanner: detect unformatted disks via get_disk_id()
• Remove DataUsageCache handling for now
• Refactor imports & types, clean up duplicate constants
2025-07-24 12:12:49 +08:00
junxiang Mu
f4973a681c feat: implement complete ahm heal system with ecstore integration
- Add comprehensive heal storage API with ECStore integration
- Implement heal object, bucket, disk, metadata, and EC decode operations
- Add heal task management with progress tracking and statistics
- Optimize heal manager by removing unnecessary workers
- Add integration tests for core heal functionality (heal_object, heal_bucket, heal_format)
- Integrate with ecstore's native heal commands for actual repair operations

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
4fb3d187d0 feat: implement heal subsystem for automatic data repair
- Add heal module with core types (HealType, HealRequest, HealTask)
- Implement HealManager for task scheduling and execution
- Add HealStorageAPI trait and ECStoreHealStorage implementation
- Integrate heal capabilities into scanner for automatic repair
- Support multiple heal types: object, bucket, disk, metadata, MRF, EC decode
- Add progress tracking and event system for heal operations
- Merge heal and scanner error types for unified error handling
- Include comprehensive logging and metrics for heal operations

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
dandan
0aff736efd Chore: fix ref and fix comment
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
dandan
2aa7a631ef feat: refactor scanner module and add data usage statistics
- Move scanner code to scanner/ subdirectory for better organization
- Add data usage statistics collection and persistence
- Implement histogram support for size and version distribution
- Add global cancel token management for scanner operations
- Integrate scanner with ECStore for comprehensive data analysis
- Update error handling and improve test isolation
- Add data usage API endpoints and backend integration

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
dandan
b40ef147a9 refact: step 2
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
1f11a3167b fix: Refact heal and scanner design
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
guojidan
18b0134ddf Merge pull request #290 from guojidan/feat/complete-lock-implementation
refactor: reimplement lock
2025-07-24 12:11:19 +08:00
junxiang Mu
b48a5fdc94 fix fmt
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 11:52:57 +08:00
junxiang Mu
168a07a670 add api into ecstore
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 11:52:57 +08:00
junxiang Mu
cad005bc21 refactor(lock): unify NamespaceLock client model and LockRequest API
- Refactor NamespaceLock to use a unified client vector and quorum mechanism,
  removing legacy local/distributed lock split and related code.
- Update LockRequest to split timeout into acquire_timeout and ttl, and add
  builder methods for both.
- Adjust all batch lock APIs to accept ttl and use new LockRequest fields.
- Update all affected tests and documentation for the new API.

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 11:52:57 +08:00
root
dc44cde081 tmp
Signed-off-by: root <root@PC.localdomain>
2025-07-24 11:52:57 +08:00
dandan
4ccdeb9d2a refactor(lock): restructure lock crate, remove unused modules and clarify directory layout
- Remove unused core/rwlock.rs and manager/ modules (ManagerFactory, LifecycleManager, NamespaceManager)
- Move all lock-related code into crates/lock/src with clear submodules: client, core, utils, etc.
- Ensure only necessary files and APIs are exposed, improve maintainability
- No functional logic change, pure structure and cleanup refactor

Signed-off-by: dandan <dandan@dandandeMac-Studio.local>
2025-07-24 11:52:55 +08:00
dependabot[bot]
1b48934f47 build(deps): bump the dependencies group with 5 updates (#289)
Bumps the dependencies group with 5 updates:

| Package | From | To |
| --- | --- | --- |
| [hyper-util](https://github.com/hyperium/hyper-util) | `0.1.15` | `0.1.16` |
| [rand](https://github.com/rust-random/rand) | `0.9.1` | `0.9.2` |
| [serde_json](https://github.com/serde-rs/json) | `1.0.140` | `1.0.141` |
| [strum](https://github.com/Peternator7/strum) | `0.27.1` | `0.27.2` |
| [sysinfo](https://github.com/GuillaumeGomez/sysinfo) | `0.36.0` | `0.36.1` |


Updates `hyper-util` from 0.1.15 to 0.1.16
- [Release notes](https://github.com/hyperium/hyper-util/releases)
- [Changelog](https://github.com/hyperium/hyper-util/blob/master/CHANGELOG.md)
- [Commits](https://github.com/hyperium/hyper-util/compare/v0.1.15...v0.1.16)

Updates `rand` from 0.9.1 to 0.9.2
- [Release notes](https://github.com/rust-random/rand/releases)
- [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-random/rand/compare/rand_core-0.9.1...rand_core-0.9.2)

Updates `serde_json` from 1.0.140 to 1.0.141
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.140...v1.0.141)

Updates `strum` from 0.27.1 to 0.27.2
- [Release notes](https://github.com/Peternator7/strum/releases)
- [Changelog](https://github.com/Peternator7/strum/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Peternator7/strum/compare/v0.27.1...v0.27.2)

Updates `sysinfo` from 0.36.0 to 0.36.1
- [Changelog](https://github.com/GuillaumeGomez/sysinfo/blob/master/CHANGELOG.md)
- [Commits](https://github.com/GuillaumeGomez/sysinfo/compare/v0.36.0...v0.36.1)

---
updated-dependencies:
- dependency-name: hyper-util
  dependency-version: 0.1.16
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: rand
  dependency-version: 0.9.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: serde_json
  dependency-version: 1.0.141
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: strum
  dependency-version: 0.27.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: sysinfo
  dependency-version: 0.36.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-24 11:50:52 +08:00
zhangwenlong
25fa645184 add rustfs.spec for rustfs (#103)
add support on loongarch64
2025-07-24 11:39:09 +08:00
Marco Orlandin
3a3bb880f2 fix: update link in README.md leading to a 404 error (#285) 2025-07-24 09:15:04 +08:00
安正超
affe27298c fix: improve Windows build support and CI/CD workflow (#283)
- Fix Windows zip command issue by using PowerShell Compress-Archive
- Add Windows support for OSS upload with ossutil
- Replace Chinese comments with English in build.yml
- Fix bash syntax error in package_zip function
- Improve code formatting and consistency
- Update various configuration files for better cross-platform support

Resolves Windows build failures in GitHub Actions.
2025-07-22 23:55:57 +08:00
shiro.lee
629db6218e fix: the issue where preview fails when the path length exceeds 255 characters (#280) 2025-07-22 22:10:57 +08:00
安正超
aa1a3ce4e8 feat: add cargo clippy --fix --allow-dirty to pre-commit command (#282)
Resolves #277

- Add --fix flag to automatically fix clippy warnings
- Add --allow-dirty flag to run on dirty Git trees
- Improves code quality in pre-commit workflow
2025-07-22 22:10:53 +08:00
houseme
693db59fcc fix 2025-07-21 20:45:59 +08:00
houseme
0a7df4ef26 fix 2025-07-21 19:03:15 +08:00
houseme
9dcdc44718 fix 2025-07-21 18:03:01 +08:00
houseme
2a0c618f8b fix: windows build 2025-07-21 17:45:56 +08:00
105 changed files with 14694 additions and 11017 deletions

View File

@@ -27,7 +27,7 @@ services:
ports:
- "9000:9000" # Map port 9001 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"
node1:
@@ -44,7 +44,7 @@ services:
ports:
- "9001:9000" # Map port 9002 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"
node2:
@@ -61,7 +61,7 @@ services:
ports:
- "9002:9000" # Map port 9003 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"
node3:
@@ -78,5 +78,5 @@ services:
ports:
- "9003:9000" # Map port 9004 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"

View File

@@ -194,10 +194,10 @@ jobs:
target: x86_64-pc-windows-msvc
cross: false
platform: windows
- os: windows-latest
target: aarch64-pc-windows-msvc
cross: true
platform: windows
#- os: windows-latest
# target: aarch64-pc-windows-msvc
# cross: true
# platform: windows
steps:
- name: Checkout repository
uses: actions/checkout@v4
@@ -215,6 +215,7 @@ jobs:
install-cross-tools: ${{ matrix.cross }}
- name: Download static console assets
shell: bash
run: |
mkdir -p ./rustfs/static
if [[ "${{ matrix.platform }}" == "windows" ]]; then
@@ -240,6 +241,7 @@ jobs:
fi
- name: Build RustFS
shell: bash
run: |
# Force rebuild by touching build.rs
touch rustfs/build.rs
@@ -268,30 +270,55 @@ jobs:
# Extract platform and arch from target
TARGET="${{ matrix.target }}"
PLATFORM="${{ matrix.platform }}"
# Map target to architecture
# Map target to architecture and variant
case "$TARGET" in
*x86_64*musl*)
ARCH="x86_64"
VARIANT="musl"
;;
*x86_64*gnu*)
ARCH="x86_64"
VARIANT="gnu"
;;
*x86_64*)
ARCH="x86_64"
VARIANT=""
;;
*aarch64*musl*|*arm64*musl*)
ARCH="aarch64"
VARIANT="musl"
;;
*aarch64*gnu*|*arm64*gnu*)
ARCH="aarch64"
VARIANT="gnu"
;;
*aarch64*|*arm64*)
ARCH="aarch64"
VARIANT=""
;;
*armv7*)
ARCH="armv7"
VARIANT=""
;;
*)
ARCH="unknown"
VARIANT=""
;;
esac
# Generate package name based on build type
if [[ "$BUILD_TYPE" == "development" ]]; then
# Development build: rustfs-${platform}-${arch}-dev-${short_sha}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH}-dev-${SHORT_SHA}"
if [[ -n "$VARIANT" ]]; then
ARCH_WITH_VARIANT="${ARCH}-${VARIANT}"
else
# Release/Prerelease build: rustfs-${platform}-${arch}-v${version}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH}-v${VERSION}"
ARCH_WITH_VARIANT="${ARCH}"
fi
if [[ "$BUILD_TYPE" == "development" ]]; then
# Development build: rustfs-${platform}-${arch}-${variant}-dev-${short_sha}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH_WITH_VARIANT}-dev-${SHORT_SHA}"
else
# Release/Prerelease build: rustfs-${platform}-${arch}-${variant}-v${version}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH_WITH_VARIANT}-v${VERSION}"
fi
# Create zip packages for all platforms
@@ -303,23 +330,119 @@ jobs:
fi
cd target/${{ matrix.target }}/release
zip "../../../${PACKAGE_NAME}.zip" rustfs
# Determine the binary name based on platform
if [[ "${{ matrix.platform }}" == "windows" ]]; then
BINARY_NAME="rustfs.exe"
else
BINARY_NAME="rustfs"
fi
# Verify the binary exists before packaging
if [[ ! -f "$BINARY_NAME" ]]; then
echo "❌ Binary $BINARY_NAME not found in $(pwd)"
if [[ "${{ matrix.platform }}" == "windows" ]]; then
dir
else
ls -la
fi
exit 1
fi
# Universal packaging function
package_zip() {
local src=$1
local dst=$2
if [[ "${{ matrix.platform }}" == "windows" ]]; then
# Windows uses PowerShell Compress-Archive
powershell -Command "Compress-Archive -Path '$src' -DestinationPath '$dst' -Force"
elif command -v zip &> /dev/null; then
# Unix systems use zip command
zip "$dst" "$src"
else
echo "❌ No zip utility available"
exit 1
fi
}
# Create the zip package
echo "Start packaging: $BINARY_NAME -> ../../../${PACKAGE_NAME}.zip"
package_zip "$BINARY_NAME" "../../../${PACKAGE_NAME}.zip"
cd ../../..
# Verify the package was created
if [[ -f "${PACKAGE_NAME}.zip" ]]; then
echo "✅ Package created successfully: ${PACKAGE_NAME}.zip"
if [[ "${{ matrix.platform }}" == "windows" ]]; then
dir
else
ls -lh ${PACKAGE_NAME}.zip
fi
else
echo "❌ Failed to create package: ${PACKAGE_NAME}.zip"
exit 1
fi
# Create latest version files right after the main package
LATEST_FILES=""
if [[ "$BUILD_TYPE" == "release" ]] || [[ "$BUILD_TYPE" == "prerelease" ]]; then
# Create latest version filename
# Convert from rustfs-linux-x86_64-musl-v1.0.0 to rustfs-linux-x86_64-musl-latest
LATEST_FILE="${PACKAGE_NAME%-v*}-latest.zip"
echo "🔄 Creating latest version: ${PACKAGE_NAME}.zip -> $LATEST_FILE"
cp "${PACKAGE_NAME}.zip" "$LATEST_FILE"
if [[ -f "$LATEST_FILE" ]]; then
echo "✅ Latest version created: $LATEST_FILE"
LATEST_FILES="$LATEST_FILE"
fi
elif [[ "$BUILD_TYPE" == "development" ]]; then
# Development builds (only main branch triggers development builds)
# Create main-latest version filename
# Convert from rustfs-linux-x86_64-dev-abc123 to rustfs-linux-x86_64-main-latest
MAIN_LATEST_FILE="${PACKAGE_NAME%-dev-*}-main-latest.zip"
echo "🔄 Creating main-latest version: ${PACKAGE_NAME}.zip -> $MAIN_LATEST_FILE"
cp "${PACKAGE_NAME}.zip" "$MAIN_LATEST_FILE"
if [[ -f "$MAIN_LATEST_FILE" ]]; then
echo "✅ Main-latest version created: $MAIN_LATEST_FILE"
LATEST_FILES="$MAIN_LATEST_FILE"
# Also create a generic main-latest for Docker builds (Linux only)
if [[ "${{ matrix.platform }}" == "linux" ]]; then
DOCKER_MAIN_LATEST_FILE="rustfs-linux-${ARCH_WITH_VARIANT}-main-latest.zip"
echo "🔄 Creating Docker main-latest version: ${PACKAGE_NAME}.zip -> $DOCKER_MAIN_LATEST_FILE"
cp "${PACKAGE_NAME}.zip" "$DOCKER_MAIN_LATEST_FILE"
if [[ -f "$DOCKER_MAIN_LATEST_FILE" ]]; then
echo "✅ Docker main-latest version created: $DOCKER_MAIN_LATEST_FILE"
LATEST_FILES="$LATEST_FILES $DOCKER_MAIN_LATEST_FILE"
fi
fi
fi
fi
echo "package_name=${PACKAGE_NAME}" >> $GITHUB_OUTPUT
echo "package_file=${PACKAGE_NAME}.zip" >> $GITHUB_OUTPUT
echo "latest_files=${LATEST_FILES}" >> $GITHUB_OUTPUT
echo "build_type=${BUILD_TYPE}" >> $GITHUB_OUTPUT
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "📦 Package created: ${PACKAGE_NAME}.zip"
if [[ -n "$LATEST_FILES" ]]; then
echo "📦 Latest files created: $LATEST_FILES"
fi
echo "🔧 Build type: ${BUILD_TYPE}"
echo "📊 Version: ${VERSION}"
- name: Upload artifacts
- name: Upload to GitHub artifacts
uses: actions/upload-artifact@v4
with:
name: ${{ steps.package.outputs.package_name }}
path: ${{ steps.package.outputs.package_file }}
path: "rustfs-*.zip"
retention-days: ${{ startsWith(github.ref, 'refs/tags/') && 30 || 7 }}
- name: Upload to Aliyun OSS
@@ -329,6 +452,7 @@ jobs:
OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }}
OSS_REGION: cn-beijing
OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com
shell: bash
run: |
BUILD_TYPE="${{ needs.build-check.outputs.build_type }}"
@@ -367,6 +491,16 @@ jobs:
chmod +x /usr/local/bin/ossutil
OSSUTIL_BIN=ossutil
;;
windows)
OSSUTIL_ZIP="ossutil-${OSSUTIL_VERSION}-windows-amd64.zip"
OSSUTIL_DIR="ossutil-${OSSUTIL_VERSION}-windows-amd64"
curl -o "$OSSUTIL_ZIP" "https://gosspublic.alicdn.com/ossutil/v2/${OSSUTIL_VERSION}/${OSSUTIL_ZIP}"
unzip "$OSSUTIL_ZIP"
mv "${OSSUTIL_DIR}/ossutil.exe" ./ossutil.exe
rm -rf "$OSSUTIL_DIR" "$OSSUTIL_ZIP"
OSSUTIL_BIN=./ossutil.exe
;;
esac
# Determine upload path based on build type
@@ -378,72 +512,15 @@ jobs:
echo "📤 Uploading release build to OSS release directory"
fi
# Upload the package file to OSS
echo "Uploading ${{ steps.package.outputs.package_file }} to $OSS_PATH..."
$OSSUTIL_BIN cp "${{ steps.package.outputs.package_file }}" "$OSS_PATH" --force
# For release and prerelease builds, also create a latest version
if [[ "$BUILD_TYPE" == "release" ]] || [[ "$BUILD_TYPE" == "prerelease" ]]; then
# Extract platform and arch from package name
PACKAGE_NAME="${{ steps.package.outputs.package_name }}"
# Create latest version filename
# Convert from rustfs-linux-x86_64-v1.0.0 to rustfs-linux-x86_64-latest
LATEST_FILE="${PACKAGE_NAME%-v*}-latest.zip"
# Copy the original file to latest version
cp "${{ steps.package.outputs.package_file }}" "$LATEST_FILE"
# Upload the latest version
echo "Uploading latest version: $LATEST_FILE to $OSS_PATH..."
$OSSUTIL_BIN cp "$LATEST_FILE" "$OSS_PATH" --force
echo "✅ Latest version uploaded: $LATEST_FILE"
fi
# For development builds, create dev-latest version
if [[ "$BUILD_TYPE" == "development" ]]; then
# Extract platform and arch from package name
PACKAGE_NAME="${{ steps.package.outputs.package_name }}"
# Create dev-latest version filename
# Convert from rustfs-linux-x86_64-dev-abc123 to rustfs-linux-x86_64-dev-latest
DEV_LATEST_FILE="${PACKAGE_NAME%-*}-latest.zip"
# Copy the original file to dev-latest version
cp "${{ steps.package.outputs.package_file }}" "$DEV_LATEST_FILE"
# Upload the dev-latest version
echo "Uploading dev-latest version: $DEV_LATEST_FILE to $OSS_PATH..."
$OSSUTIL_BIN cp "$DEV_LATEST_FILE" "$OSS_PATH" --force
echo "✅ Dev-latest version uploaded: $DEV_LATEST_FILE"
# For main branch builds, also create a main-latest version
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
# Create main-latest version filename
# Convert from rustfs-linux-x86_64-dev-abc123 to rustfs-linux-x86_64-main-latest
MAIN_LATEST_FILE="${PACKAGE_NAME%-dev-*}-main-latest.zip"
# Copy the original file to main-latest version
cp "${{ steps.package.outputs.package_file }}" "$MAIN_LATEST_FILE"
# Upload the main-latest version
echo "Uploading main-latest version: $MAIN_LATEST_FILE to $OSS_PATH..."
$OSSUTIL_BIN cp "$MAIN_LATEST_FILE" "$OSS_PATH" --force
echo "✅ Main-latest version uploaded: $MAIN_LATEST_FILE"
# Also create a generic main-latest for Docker builds
if [[ "${{ matrix.platform }}" == "linux" ]]; then
DOCKER_MAIN_LATEST_FILE="rustfs-linux-${{ matrix.target == 'x86_64-unknown-linux-musl' && 'x86_64' || 'aarch64' }}-main-latest.zip"
cp "${{ steps.package.outputs.package_file }}" "$DOCKER_MAIN_LATEST_FILE"
$OSSUTIL_BIN cp "$DOCKER_MAIN_LATEST_FILE" "$OSS_PATH" --force
echo "✅ Docker main-latest version uploaded: $DOCKER_MAIN_LATEST_FILE"
fi
# Upload all rustfs zip files to OSS using glob pattern
echo "📤 Uploading all rustfs-*.zip files to $OSS_PATH..."
for zip_file in rustfs-*.zip; do
if [[ -f "$zip_file" ]]; then
echo "Uploading: $zip_file to $OSS_PATH..."
$OSSUTIL_BIN cp "$zip_file" "$OSS_PATH" --force
echo "✅ Uploaded: $zip_file"
fi
fi
done
echo "✅ Upload completed successfully"
@@ -455,6 +532,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Build completion summary
shell: bash
run: |
BUILD_TYPE="${{ needs.build-check.outputs.build_type }}"
VERSION="${{ needs.build-check.outputs.version }}"
@@ -519,6 +597,7 @@ jobs:
id: create
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
TAG="${{ needs.build-check.outputs.version }}"
VERSION="${{ needs.build-check.outputs.version }}"
@@ -605,13 +684,14 @@ jobs:
- name: Prepare release assets
id: prepare
shell: bash
run: |
VERSION="${{ needs.build-check.outputs.version }}"
TAG="${{ needs.build-check.outputs.version }}"
mkdir -p ./release-assets
# Copy and verify artifacts
# Copy and verify artifacts (including latest files created during build)
ASSETS_COUNT=0
for file in ./artifacts/*.zip; do
if [[ -f "$file" ]]; then
@@ -627,7 +707,7 @@ jobs:
cd ./release-assets
# Generate checksums
# Generate checksums for all files (including latest versions)
if ls *.zip >/dev/null 2>&1; then
sha256sum *.zip > SHA256SUMS
sha512sum *.zip > SHA512SUMS
@@ -642,11 +722,12 @@ jobs:
echo "📦 Prepared assets:"
ls -la
echo "🔢 Asset count: $ASSETS_COUNT"
echo "🔢 Total asset count: $ASSETS_COUNT"
- name: Upload to GitHub Release
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
TAG="${{ needs.build-check.outputs.version }}"
@@ -675,6 +756,7 @@ jobs:
OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }}
OSS_REGION: cn-beijing
OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com
shell: bash
run: |
if [[ -z "$OSS_ACCESS_KEY_ID" ]]; then
echo "⚠️ OSS credentials not available, skipping latest.json update"
@@ -726,6 +808,7 @@ jobs:
- name: Update release notes and publish
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
TAG="${{ needs.build-check.outputs.version }}"
VERSION="${{ needs.build-check.outputs.version }}"

308
Cargo.lock generated
View File

@@ -449,7 +449,7 @@ dependencies = [
"enumflags2",
"futures-channel",
"futures-util",
"rand 0.9.1",
"rand 0.9.2",
"raw-window-handle 0.6.2",
"serde",
"serde_repr",
@@ -504,9 +504,9 @@ dependencies = [
[[package]]
name = "async-io"
version = "2.4.1"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3"
checksum = "19634d6336019ef220f09fd31168ce5c184b295cbf80345437cc36094ef223ca"
dependencies = [
"async-lock",
"cfg-if",
@@ -517,8 +517,7 @@ dependencies = [
"polling",
"rustix 1.0.8",
"slab",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
@@ -534,9 +533,9 @@ dependencies = [
[[package]]
name = "async-process"
version = "2.3.1"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde3f4e40e6021d7acffc90095cbd6dc54cb593903d1de5832f435eb274b85dc"
checksum = "65daa13722ad51e6ab1a1b9c01299142bc75135b337923cfa10e79bbbd669f00"
dependencies = [
"async-channel",
"async-io",
@@ -548,7 +547,6 @@ dependencies = [
"event-listener",
"futures-lite",
"rustix 1.0.8",
"tracing",
]
[[package]]
@@ -564,9 +562,9 @@ dependencies = [
[[package]]
name = "async-signal"
version = "0.2.11"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7605a4e50d4b06df3898d5a70bf5fde51ed9059b0434b73105193bc27acce0d"
checksum = "f567af260ef69e1d52c2b560ce0ea230763e6fbb9214a85d768760a920e3e3c1"
dependencies = [
"async-io",
"async-lock",
@@ -577,7 +575,7 @@ dependencies = [
"rustix 1.0.8",
"signal-hook-registry",
"slab",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
@@ -674,6 +672,36 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "aws-config"
version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0baa720ebadea158c5bda642ac444a2af0cdf7bb66b46d1e4533de5d1f449d0"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-sdk-sso",
"aws-sdk-ssooidc",
"aws-sdk-sts",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"hex",
"http 1.3.1",
"ring",
"time",
"tokio",
"tracing",
"url",
"zeroize",
]
[[package]]
name = "aws-credential-types"
version = "1.2.4"
@@ -688,9 +716,9 @@ dependencies = [
[[package]]
name = "aws-lc-rs"
version = "1.13.2"
version = "1.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be"
checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba"
dependencies = [
"aws-lc-sys",
"zeroize",
@@ -736,9 +764,9 @@ dependencies = [
[[package]]
name = "aws-sdk-s3"
version = "1.98.0"
version = "1.99.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "029e89cae7e628553643aecb3a3f054a0a0912ff0fd1f5d6a0b4fda421dce64b"
checksum = "b2d64d68c93000d5792b2a25fbeaafb90985fa80a1c8adfe93f24fb271296f5f"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -760,7 +788,7 @@ dependencies = [
"http 0.2.12",
"http 1.3.1",
"http-body 0.4.6",
"lru",
"lru 0.12.5",
"percent-encoding",
"regex-lite",
"sha2 0.10.9",
@@ -768,6 +796,73 @@ dependencies = [
"url",
]
[[package]]
name = "aws-sdk-sso"
version = "1.77.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18f2f37fea82468fe3f5a059542c05392ef680c4f7f00e0db02df8b6e5c7d0c6"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-ssooidc"
version = "1.78.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecb4f6eada20e0193450cd48b12ed05e1e66baac86f39160191651b932f2b7d9"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-sts"
version = "1.79.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "317377afba3498fca4948c5d32b399ef9a5ad35561a1e8a6f2ac7273dabf802d"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-query",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-smithy-xml",
"aws-types",
"fastrand",
"http 0.2.12",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sigv4"
version = "1.3.3"
@@ -907,10 +1002,20 @@ dependencies = [
]
[[package]]
name = "aws-smithy-runtime"
version = "1.8.4"
name = "aws-smithy-query"
version = "0.60.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3aaec682eb189e43c8a19c3dab2fe54590ad5f2cc2d26ab27608a20f2acf81c"
checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb"
dependencies = [
"aws-smithy-types",
"urlencoding",
]
[[package]]
name = "aws-smithy-runtime"
version = "1.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "660f70d9d8af6876b4c9aa8dcb0dbaf0f89b04ee9a4455bea1b4ba03b15f26f6"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -932,9 +1037,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
version = "1.8.3"
version = "1.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9852b9226cb60b78ce9369022c0df678af1cac231c882d5da97a0c4e03be6e67"
checksum = "38280ac228bc479f347fcfccf4bf4d22d68f3bb4629685cb591cabd856567bbc"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@@ -984,9 +1089,9 @@ dependencies = [
[[package]]
name = "aws-types"
version = "1.3.7"
version = "1.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a322fec39e4df22777ed3ad8ea868ac2f94cd15e1a55f6ee8d8d6305057689a"
checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390"
dependencies = [
"aws-credential-types",
"aws-smithy-async",
@@ -1479,9 +1584,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.2.29"
version = "1.2.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362"
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
dependencies = [
"jobserver",
"libc",
@@ -2008,7 +2113,7 @@ dependencies = [
"crc",
"digest 0.10.7",
"libc",
"rand 0.9.1",
"rand 0.9.2",
"regex",
]
@@ -3468,6 +3573,9 @@ checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
name = "e2e_test"
version = "0.0.5"
dependencies = [
"async-trait",
"aws-config",
"aws-sdk-s3",
"bytes",
"flatbuffers 25.2.10",
"futures",
@@ -3479,6 +3587,7 @@ dependencies = [
"rustfs-protos",
"serde",
"serde_json",
"serial_test",
"tokio",
"tonic",
"url",
@@ -4705,9 +4814,9 @@ dependencies = [
[[package]]
name = "hyper-util"
version = "0.1.15"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
dependencies = [
"base64 0.22.1",
"bytes",
@@ -4721,7 +4830,7 @@ dependencies = [
"libc",
"percent-encoding",
"pin-project-lite",
"socket2 0.5.10",
"socket2 0.6.0",
"system-configuration",
"tokio",
"tower-service",
@@ -4957,9 +5066,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
[[package]]
name = "io-uring"
version = "0.7.8"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
dependencies = [
"bitflags 2.9.1",
"cfg-if",
@@ -5361,13 +5470,13 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "libredox"
version = "0.1.4"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638"
checksum = "4488594b9328dee448adb906d8b126d9b7deb7cf5c22161ee591610bb1be83c0"
dependencies = [
"bitflags 2.9.1",
"libc",
"redox_syscall 0.5.13",
"redox_syscall 0.5.15",
]
[[package]]
@@ -5484,6 +5593,15 @@ dependencies = [
"hashbrown 0.15.4",
]
[[package]]
name = "lru"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86ea4e65087ff52f3862caff188d489f1fab49a0cb09e01b2e3f1a617b10aaed"
dependencies = [
"hashbrown 0.15.4",
]
[[package]]
name = "lru-slab"
version = "0.1.2"
@@ -6463,7 +6581,7 @@ dependencies = [
"futures-util",
"opentelemetry",
"percent-encoding",
"rand 0.9.1",
"rand 0.9.2",
"serde_json",
"thiserror 2.0.12",
"tokio",
@@ -6567,7 +6685,7 @@ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
"cfg-if",
"libc",
"redox_syscall 0.5.13",
"redox_syscall 0.5.15",
"smallvec",
"windows-targets 0.52.6",
]
@@ -6946,17 +7064,16 @@ dependencies = [
[[package]]
name = "polling"
version = "3.8.0"
version = "3.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b53a684391ad002dd6a596ceb6c74fd004fdce75f4be2e3f615068abbea5fd50"
checksum = "8ee9b2fa7a4517d2c91ff5bc6c297a427a96749d15f98fcdbb22c05571a4d4b7"
dependencies = [
"cfg-if",
"concurrent-queue",
"hermit-abi",
"pin-project-lite",
"rustix 1.0.8",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
@@ -7224,7 +7341,7 @@ dependencies = [
"bytes",
"getrandom 0.3.3",
"lru-slab",
"rand 0.9.1",
"rand 0.9.2",
"ring",
"rustc-hash 2.1.1",
"rustls 0.23.29",
@@ -7292,9 +7409,9 @@ dependencies = [
[[package]]
name = "rand"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.3",
@@ -7474,9 +7591,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.5.13"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec"
dependencies = [
"bitflags 2.9.1",
]
@@ -7923,8 +8040,10 @@ dependencies = [
"anyhow",
"async-trait",
"bytes",
"chrono",
"futures",
"lazy_static",
"once_cell",
"rmp-serde",
"rustfs-common",
"rustfs-ecstore",
@@ -7934,14 +8053,18 @@ dependencies = [
"rustfs-utils",
"serde",
"serde_json",
"serial_test",
"tempfile",
"thiserror 2.0.12",
"time",
"tokio",
"tokio-test",
"tokio-util",
"tracing",
"tracing-subscriber",
"url",
"uuid",
"walkdir",
]
[[package]]
@@ -7958,8 +8081,18 @@ dependencies = [
name = "rustfs-common"
version = "0.0.5"
dependencies = [
"async-trait",
"chrono",
"lazy_static",
"path-clean",
"rmp-serde",
"rustfs-filemeta",
"rustfs-madmin",
"s3s",
"serde",
"tokio",
"tonic",
"uuid",
]
[[package]]
@@ -7981,7 +8114,7 @@ dependencies = [
"chacha20poly1305",
"jsonwebtoken",
"pbkdf2",
"rand 0.9.1",
"rand 0.9.2",
"serde_json",
"sha2 0.10.9",
"test-case",
@@ -8021,7 +8154,7 @@ dependencies = [
"path-absolutize",
"path-clean",
"pin-project-lite",
"rand 0.9.1",
"rand 0.9.2",
"reed-solomon-simd",
"regex",
"reqwest",
@@ -8114,7 +8247,7 @@ dependencies = [
"base64-simd",
"futures",
"jsonwebtoken",
"rand 0.9.1",
"rand 0.9.2",
"rustfs-crypto",
"rustfs-ecstore",
"rustfs-madmin",
@@ -8133,10 +8266,16 @@ name = "rustfs-lock"
version = "0.0.5"
dependencies = [
"async-trait",
"rand 0.9.1",
"bytes",
"futures",
"lazy_static",
"lru 0.16.0",
"once_cell",
"rand 0.9.2",
"rustfs-protos",
"serde",
"serde_json",
"thiserror 2.0.12",
"tokio",
"tonic",
"tracing",
@@ -8225,7 +8364,7 @@ dependencies = [
"base64-simd",
"ipnetwork",
"jsonwebtoken",
"rand 0.9.1",
"rand 0.9.2",
"regex",
"rustfs-crypto",
"serde",
@@ -8259,7 +8398,7 @@ dependencies = [
"http 1.3.1",
"md-5",
"pin-project-lite",
"rand 0.9.1",
"rand 0.9.2",
"reqwest",
"rustfs-utils",
"serde",
@@ -8344,7 +8483,7 @@ dependencies = [
"bytes",
"http 1.3.1",
"hyper 1.6.0",
"rand 0.9.1",
"rand 0.9.2",
"rustfs-utils",
"s3s",
"serde",
@@ -8375,7 +8514,7 @@ dependencies = [
"md-5",
"netif",
"nix 0.30.1",
"rand 0.9.1",
"rand 0.9.2",
"regex",
"rustfs-config",
"rustls 0.23.29",
@@ -8653,6 +8792,15 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scc"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4"
dependencies = [
"sdd",
]
[[package]]
name = "schannel"
version = "0.1.27"
@@ -8684,6 +8832,12 @@ dependencies = [
"untrusted",
]
[[package]]
name = "sdd"
version = "3.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
[[package]]
name = "sec1"
version = "0.3.0"
@@ -8865,9 +9019,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.140"
version = "1.0.141"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
dependencies = [
"itoa 1.0.15",
"memchr",
@@ -8941,6 +9095,31 @@ dependencies = [
"unsafe-libyaml",
]
[[package]]
name = "serial_test"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9"
dependencies = [
"futures",
"log",
"once_cell",
"parking_lot",
"scc",
"serial_test_derive",
]
[[package]]
name = "serial_test_derive"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "server_fn"
version = "0.6.15"
@@ -9408,23 +9587,22 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "strum"
version = "0.27.1"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.27.1"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8"
checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.104",
]
@@ -9570,9 +9748,9 @@ dependencies = [
[[package]]
name = "sysinfo"
version = "0.36.0"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aab138f5c1bb35231de19049060a87977ad23e04f2303e953bc5c2947ac7dec4"
checksum = "252800745060e7b9ffb7b2badbd8b31cfa4aa2e61af879d0a3bf2a317c20217d"
dependencies = [
"libc",
"memchr",
@@ -10533,7 +10711,7 @@ checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
dependencies = [
"getrandom 0.3.3",
"js-sys",
"rand 0.9.1",
"rand 0.9.2",
"serde",
"uuid-macro-internal",
"wasm-bindgen",
@@ -10905,9 +11083,9 @@ dependencies = [
[[package]]
name = "webpki-roots"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502"
checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
dependencies = [
"rustls-pki-types",
]

View File

@@ -130,7 +130,7 @@ hex-simd = "0.8.0"
highway = { version = "1.3.0" }
hmac = "0.12.1"
hyper = "1.6.0"
hyper-util = { version = "0.1.15", features = [
hyper-util = { version = "0.1.16", features = [
"tokio",
"server-auto",
"server-graceful",
@@ -148,6 +148,7 @@ keyring = { version = "3.6.2", features = [
] }
lazy_static = "1.5.0"
libsystemd = { version = "0.7.2" }
lru = "0.16"
local-ip-address = "0.6.5"
lz4 = "1.28.1"
matchit = "0.8.4"
@@ -183,7 +184,7 @@ percent-encoding = "2.3.1"
pin-project-lite = "0.2.16"
prost = "0.13.5"
quick-xml = "0.38.0"
rand = "0.9.1"
rand = "0.9.2"
rdkafka = { version = "0.38.0", features = ["tokio"] }
reed-solomon-simd = { version = "3.0.1" }
regex = { version = "1.11.1" }
@@ -213,7 +214,7 @@ rustls-pemfile = "2.2.0"
s3s = { version = "0.12.0-minio-preview.2" }
shadow-rs = { version = "1.2.0", default-features = false }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["raw_value"] }
serde_json = { version = "1.0.141", features = ["raw_value"] }
serde-xml-rs = "0.8.1"
serde_urlencoded = "0.7.1"
sha1 = "0.10.6"
@@ -223,8 +224,8 @@ smallvec = { version = "1.15.1", features = ["serde"] }
snafu = "0.8.6"
snap = "1.1.1"
socket2 = "0.6.0"
strum = { version = "0.27.1", features = ["derive"] }
sysinfo = "0.36.0"
strum = { version = "0.27.2", features = ["derive"] }
sysinfo = "0.36.1"
sysctl = "0.6.0"
tempfile = "3.20.0"
temp-env = "0.3.6"

View File

@@ -13,12 +13,18 @@ RUN apk add --no-cache ca-certificates curl unzip
# Create build directory
WORKDIR /build
# Detect architecture and download corresponding binary
RUN case "${TARGETARCH}" in \
amd64) ARCH="x86_64" ;; \
arm64) ARCH="aarch64" ;; \
*) echo "Unsupported architecture: ${TARGETARCH}" >&2 && exit 1 ;; \
esac && \
# Set architecture-specific variables
RUN if [ "$TARGETARCH" = "amd64" ]; then \
echo "x86_64-musl" > /tmp/arch; \
elif [ "$TARGETARCH" = "arm64" ]; then \
echo "aarch64-musl" > /tmp/arch; \
else \
echo "unsupported" > /tmp/arch; \
fi
RUN ARCH=$(cat /tmp/arch) && \
if [ "$ARCH" = "unsupported" ]; then \
echo "Unsupported architecture: $TARGETARCH" && exit 1; \
fi && \
if [ "${RELEASE}" = "latest" ]; then \
VERSION="latest"; \
else \

View File

@@ -23,7 +23,7 @@ fmt-check:
.PHONY: clippy
clippy:
@echo "🔍 Running clippy checks..."
cargo clippy --all-targets --all-features -- -D warnings
cargo clippy --all-targets --all-features --fix --allow-dirty -- -D warnings
.PHONY: check
check:
@@ -90,6 +90,18 @@ build-gnu:
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform x86_64-unknown-linux-gnu
.PHONY: build-musl-arm64
build-musl-arm64:
@echo "🔨 Building rustfs for aarch64-unknown-linux-musl..."
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform aarch64-unknown-linux-musl
.PHONY: build-gnu-arm64
build-gnu-arm64:
@echo "🔨 Building rustfs for aarch64-unknown-linux-gnu..."
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform aarch64-unknown-linux-gnu
.PHONY: deploy-dev
deploy-dev: build-musl
@echo "🚀 Deploying to dev server: $${IP}"
@@ -248,10 +260,14 @@ build-cross-all:
@echo "💡 On macOS/Windows, use 'make docker-dev' for reliable multi-arch builds"
@echo "🔨 Generating protobuf code..."
cargo run --bin gproto || true
@echo "🔨 Building x86_64-unknown-linux-musl..."
./build-rustfs.sh --platform x86_64-unknown-linux-musl
@echo "🔨 Building x86_64-unknown-linux-gnu..."
./build-rustfs.sh --platform x86_64-unknown-linux-gnu
@echo "🔨 Building aarch64-unknown-linux-gnu..."
./build-rustfs.sh --platform aarch64-unknown-linux-gnu
@echo "🔨 Building x86_64-unknown-linux-musl..."
./build-rustfs.sh --platform x86_64-unknown-linux-musl
@echo "🔨 Building aarch64-unknown-linux-musl..."
./build-rustfs.sh --platform aarch64-unknown-linux-musl
@echo "✅ All architectures built successfully!"
# ========================================================================================
@@ -265,8 +281,10 @@ help-build:
@echo "🚀 本地构建 (推荐使用):"
@echo " make build # 构建 RustFS 二进制文件 (默认包含 console)"
@echo " make build-dev # 开发模式构建"
@echo " make build-musl # 构建 musl 版本"
@echo " make build-gnu # 构建 GNU 版本"
@echo " make build-musl # 构建 x86_64 musl 版本"
@echo " make build-gnu # 构建 x86_64 GNU 版本"
@echo " make build-musl-arm64 # 构建 aarch64 musl 版本"
@echo " make build-gnu-arm64 # 构建 aarch64 GNU 版本"
@echo ""
@echo "🐳 Docker 构建:"
@echo " make build-docker # 使用 Docker 容器构建"
@@ -281,7 +299,7 @@ help-build:
@echo " ./build-rustfs.sh --force-console-update # 强制更新 console 资源"
@echo " ./build-rustfs.sh --dev # 开发模式构建"
@echo " ./build-rustfs.sh --sign # 签名二进制文件"
@echo " ./build-rustfs.sh --platform x86_64-unknown-linux-musl # 指定目标平台"
@echo " ./build-rustfs.sh --platform x86_64-unknown-linux-gnu # 指定目标平台"
@echo " ./build-rustfs.sh --skip-verification # 跳过二进制验证"
@echo ""
@echo "💡 build-rustfs.sh 脚本提供了更多选项、智能检测和二进制验证功能"

View File

@@ -11,8 +11,8 @@
</p>
<p align="center">
<a href="https://docs.rustfs.com/en/introduction.html">Getting Started</a>
· <a href="https://docs.rustfs.com/en/">Docs</a>
<a href="https://docs.rustfs.com/introduction.html">Getting Started</a>
· <a href="https://docs.rustfs.com/">Docs</a>
· <a href="https://github.com/rustfs/rustfs/issues">Bug reports</a>
· <a href="https://github.com/rustfs/rustfs/discussions">Discussions</a>
</p>

View File

@@ -21,13 +21,17 @@ detect_platform() {
"linux")
case "$arch" in
"x86_64")
echo "x86_64-unknown-linux-musl"
# Default to GNU for better compatibility
echo "x86_64-unknown-linux-gnu"
;;
"aarch64"|"arm64")
echo "aarch64-unknown-linux-musl"
echo "aarch64-unknown-linux-gnu"
;;
"armv7l")
echo "armv7-unknown-linux-musleabihf"
echo "armv7-unknown-linux-gnueabihf"
;;
"loongarch64")
echo "loongarch64-unknown-linux-musl"
;;
*)
echo "unknown-platform"
@@ -119,6 +123,17 @@ usage() {
echo " -o, --output-dir DIR Output directory (default: target/release)"
echo " -b, --binary-name NAME Binary name (default: rustfs)"
echo " -p, --platform TARGET Target platform (default: auto-detect)"
echo " Supported platforms:"
echo " x86_64-unknown-linux-gnu"
echo " aarch64-unknown-linux-gnu"
echo " armv7-unknown-linux-gnueabihf"
echo " x86_64-unknown-linux-musl"
echo " aarch64-unknown-linux-musl"
echo " armv7-unknown-linux-musleabihf"
echo " x86_64-apple-darwin"
echo " aarch64-apple-darwin"
echo " x86_64-pc-windows-msvc"
echo " aarch64-pc-windows-msvc"
echo " --dev Build in dev mode"
echo " --sign Sign binaries after build"
echo " --with-console Download console static assets (default)"
@@ -385,7 +400,7 @@ build_binary() {
fi
else
# Native compilation
build_cmd="cargo build"
build_cmd="RUSTFLAGS=-Clink-arg=-lm cargo build"
fi
if [ "$BUILD_TYPE" = "release" ]; then

View File

@@ -34,8 +34,14 @@ url = { workspace = true }
rustfs-lock = { workspace = true }
lazy_static = { workspace = true }
chrono = { workspace = true }
[dev-dependencies]
rmp-serde = { workspace = true }
tokio-test = { workspace = true }
serde_json = { workspace = true }
serial_test = "3.2.0"
once_cell = { workspace = true }
tracing-subscriber = { workspace = true }
walkdir = "2.5.0"
tempfile = { workspace = true }

View File

@@ -14,30 +14,79 @@
use thiserror::Error;
/// RustFS AHM/Heal/Scanner 统一错误类型
#[derive(Debug, Error)]
pub enum Error {
// 通用
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("Storage error: {0}")]
Storage(#[from] rustfs_ecstore::error::Error),
#[error("Disk error: {0}")]
Disk(#[from] rustfs_ecstore::disk::error::DiskError),
#[error("Configuration error: {0}")]
Config(String),
#[error("Heal configuration error: {message}")]
ConfigurationError { message: String },
#[error("Other error: {0}")]
Other(String),
#[error(transparent)]
Anyhow(#[from] anyhow::Error),
// Scanner相关
#[error("Scanner error: {0}")]
Scanner(String),
#[error("Metrics error: {0}")]
Metrics(String),
#[error(transparent)]
Other(#[from] anyhow::Error),
// Heal相关
#[error("Heal task not found: {task_id}")]
TaskNotFound { task_id: String },
#[error("Heal task already exists: {task_id}")]
TaskAlreadyExists { task_id: String },
#[error("Heal manager is not running")]
ManagerNotRunning,
#[error("Heal task execution failed: {message}")]
TaskExecutionFailed { message: String },
#[error("Invalid heal type: {heal_type}")]
InvalidHealType { heal_type: String },
#[error("Heal task cancelled")]
TaskCancelled,
#[error("Heal task timeout")]
TaskTimeout,
#[error("Heal event processing failed: {message}")]
EventProcessingFailed { message: String },
#[error("Heal progress tracking failed: {message}")]
ProgressTrackingFailed { message: String },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
// Implement conversion from ahm::Error to std::io::Error for use in main.rs
impl Error {
pub fn other<E>(error: E) -> Self
where
E: Into<Box<dyn std::error::Error + Send + Sync>>,
{
Error::Other(error.into().to_string())
}
}
// 可选:实现与 std::io::Error 的互转
impl From<Error> for std::io::Error {
fn from(err: Error) -> Self {
std::io::Error::other(err)

View File

@@ -0,0 +1,233 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::Result;
use crate::heal::{
manager::HealManager,
task::{HealOptions, HealPriority, HealRequest, HealType},
};
use rustfs_common::heal_channel::{
HealChannelCommand, HealChannelPriority, HealChannelReceiver, HealChannelRequest, HealChannelResponse, HealScanMode,
};
use std::sync::Arc;
use tokio::sync::mpsc;
use tracing::{error, info};
/// Heal channel processor
pub struct HealChannelProcessor {
/// Heal manager
heal_manager: Arc<HealManager>,
/// Response sender
response_sender: mpsc::UnboundedSender<HealChannelResponse>,
/// Response receiver
response_receiver: mpsc::UnboundedReceiver<HealChannelResponse>,
}
impl HealChannelProcessor {
/// Create new HealChannelProcessor
pub fn new(heal_manager: Arc<HealManager>) -> Self {
let (response_tx, response_rx) = mpsc::unbounded_channel();
Self {
heal_manager,
response_sender: response_tx,
response_receiver: response_rx,
}
}
/// Start processing heal channel requests
pub async fn start(&mut self, mut receiver: HealChannelReceiver) -> Result<()> {
info!("Starting heal channel processor");
loop {
tokio::select! {
command = receiver.recv() => {
match command {
Some(command) => {
if let Err(e) = self.process_command(command).await {
error!("Failed to process heal command: {}", e);
}
}
None => {
info!("Heal channel receiver closed, stopping processor");
break;
}
}
}
response = self.response_receiver.recv() => {
if let Some(response) = response {
// Handle response if needed
info!("Received heal response for request: {}", response.request_id);
}
}
}
}
info!("Heal channel processor stopped");
Ok(())
}
/// Process heal command
async fn process_command(&self, command: HealChannelCommand) -> Result<()> {
match command {
HealChannelCommand::Start(request) => self.process_start_request(request).await,
HealChannelCommand::Query { heal_path, client_token } => self.process_query_request(heal_path, client_token).await,
HealChannelCommand::Cancel { heal_path } => self.process_cancel_request(heal_path).await,
}
}
/// Process start request
async fn process_start_request(&self, request: HealChannelRequest) -> Result<()> {
info!("Processing heal start request: {} for bucket: {}", request.id, request.bucket);
// Convert channel request to heal request
let heal_request = self.convert_to_heal_request(request.clone())?;
// Submit to heal manager
match self.heal_manager.submit_heal_request(heal_request).await {
Ok(task_id) => {
info!("Successfully submitted heal request: {} as task: {}", request.id, task_id);
// Send success response
let response = HealChannelResponse {
request_id: request.id,
success: true,
data: Some(format!("Task ID: {task_id}").into_bytes()),
error: None,
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send heal response: {}", e);
}
}
Err(e) => {
error!("Failed to submit heal request: {} - {}", request.id, e);
// Send error response
let response = HealChannelResponse {
request_id: request.id,
success: false,
data: None,
error: Some(e.to_string()),
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send heal error response: {}", e);
}
}
}
Ok(())
}
/// Process query request
async fn process_query_request(&self, heal_path: String, client_token: String) -> Result<()> {
info!("Processing heal query request for path: {}", heal_path);
// TODO: Implement query logic based on heal_path and client_token
// For now, return a placeholder response
let response = HealChannelResponse {
request_id: client_token,
success: true,
data: Some(format!("Query result for path: {heal_path}").into_bytes()),
error: None,
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send query response: {}", e);
}
Ok(())
}
/// Process cancel request
async fn process_cancel_request(&self, heal_path: String) -> Result<()> {
info!("Processing heal cancel request for path: {}", heal_path);
// TODO: Implement cancel logic based on heal_path
// For now, return a placeholder response
let response = HealChannelResponse {
request_id: heal_path.clone(),
success: true,
data: Some(format!("Cancel request for path: {heal_path}").into_bytes()),
error: None,
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send cancel response: {}", e);
}
Ok(())
}
/// Convert channel request to heal request
fn convert_to_heal_request(&self, request: HealChannelRequest) -> Result<HealRequest> {
let heal_type = if let Some(disk_id) = &request.disk {
HealType::ErasureSet {
buckets: vec![],
set_disk_id: disk_id.clone(),
}
} else if let Some(prefix) = &request.object_prefix {
if !prefix.is_empty() {
HealType::Object {
bucket: request.bucket.clone(),
object: prefix.clone(),
version_id: None,
}
} else {
HealType::Bucket {
bucket: request.bucket.clone(),
}
}
} else {
HealType::Bucket {
bucket: request.bucket.clone(),
}
};
let priority = match request.priority {
HealChannelPriority::Low => HealPriority::Low,
HealChannelPriority::Normal => HealPriority::Normal,
HealChannelPriority::High => HealPriority::High,
HealChannelPriority::Critical => HealPriority::Urgent,
};
// Build HealOptions with all available fields
let mut options = HealOptions {
scan_mode: request.scan_mode.unwrap_or(HealScanMode::Normal),
remove_corrupted: request.remove_corrupted.unwrap_or(false),
recreate_missing: request.recreate_missing.unwrap_or(true),
update_parity: request.update_parity.unwrap_or(true),
recursive: request.recursive.unwrap_or(false),
dry_run: request.dry_run.unwrap_or(false),
timeout: request.timeout_seconds.map(std::time::Duration::from_secs),
pool_index: request.pool_index,
set_index: request.set_index,
};
// Apply force_start overrides
if request.force_start {
options.remove_corrupted = true;
options.recreate_missing = true;
options.update_parity = true;
}
Ok(HealRequest::new(heal_type, options, priority))
}
/// Get response sender for external use
pub fn get_response_sender(&self) -> mpsc::UnboundedSender<HealChannelResponse> {
self.response_sender.clone()
}
}

View File

@@ -0,0 +1,456 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::heal::{
progress::HealProgress,
resume::{CheckpointManager, ResumeManager, ResumeUtils},
storage::HealStorageAPI,
};
use futures::future::join_all;
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::disk::DiskStore;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{error, info, warn};
/// Erasure Set Healer
pub struct ErasureSetHealer {
storage: Arc<dyn HealStorageAPI>,
progress: Arc<RwLock<HealProgress>>,
cancel_token: tokio_util::sync::CancellationToken,
disk: DiskStore,
}
impl ErasureSetHealer {
pub fn new(
storage: Arc<dyn HealStorageAPI>,
progress: Arc<RwLock<HealProgress>>,
cancel_token: tokio_util::sync::CancellationToken,
disk: DiskStore,
) -> Self {
Self {
storage,
progress,
cancel_token,
disk,
}
}
/// execute erasure set heal with resume
pub async fn heal_erasure_set(&self, buckets: &[String], set_disk_id: &str) -> Result<()> {
info!("Starting erasure set heal for {} buckets on set disk {}", buckets.len(), set_disk_id);
// 1. generate or get task id
let task_id = self.get_or_create_task_id(set_disk_id).await?;
// 2. initialize or resume resume state
let (resume_manager, checkpoint_manager) = self.initialize_resume_state(&task_id, buckets).await?;
// 3. execute heal with resume
let result = self
.execute_heal_with_resume(buckets, &resume_manager, &checkpoint_manager)
.await;
// 4. cleanup resume state
if result.is_ok() {
if let Err(e) = resume_manager.cleanup().await {
warn!("Failed to cleanup resume state: {}", e);
}
if let Err(e) = checkpoint_manager.cleanup().await {
warn!("Failed to cleanup checkpoint: {}", e);
}
}
result
}
/// get or create task id
async fn get_or_create_task_id(&self, _set_disk_id: &str) -> Result<String> {
// check if there are resumable tasks
let resumable_tasks = ResumeUtils::get_resumable_tasks(&self.disk).await?;
for task_id in resumable_tasks {
if ResumeUtils::can_resume_task(&self.disk, &task_id).await {
info!("Found resumable task: {}", task_id);
return Ok(task_id);
}
}
// create new task id
let task_id = ResumeUtils::generate_task_id();
info!("Created new heal task: {}", task_id);
Ok(task_id)
}
/// initialize or resume resume state
async fn initialize_resume_state(&self, task_id: &str, buckets: &[String]) -> Result<(ResumeManager, CheckpointManager)> {
// check if resume state exists
if ResumeManager::has_resume_state(&self.disk, task_id).await {
info!("Loading existing resume state for task: {}", task_id);
let resume_manager = ResumeManager::load_from_disk(self.disk.clone(), task_id).await?;
let checkpoint_manager = if CheckpointManager::has_checkpoint(&self.disk, task_id).await {
CheckpointManager::load_from_disk(self.disk.clone(), task_id).await?
} else {
CheckpointManager::new(self.disk.clone(), task_id.to_string()).await?
};
Ok((resume_manager, checkpoint_manager))
} else {
info!("Creating new resume state for task: {}", task_id);
let resume_manager =
ResumeManager::new(self.disk.clone(), task_id.to_string(), "erasure_set".to_string(), buckets.to_vec()).await?;
let checkpoint_manager = CheckpointManager::new(self.disk.clone(), task_id.to_string()).await?;
Ok((resume_manager, checkpoint_manager))
}
}
/// execute heal with resume
async fn execute_heal_with_resume(
&self,
buckets: &[String],
resume_manager: &ResumeManager,
checkpoint_manager: &CheckpointManager,
) -> Result<()> {
// 1. get current state
let state = resume_manager.get_state().await;
let checkpoint = checkpoint_manager.get_checkpoint().await;
info!(
"Resuming from bucket {} object {}",
checkpoint.current_bucket_index, checkpoint.current_object_index
);
// 2. initialize progress
self.initialize_progress(buckets, &state).await;
// 3. continue from checkpoint
let current_bucket_index = checkpoint.current_bucket_index;
let mut current_object_index = checkpoint.current_object_index;
let mut processed_objects = state.processed_objects;
let mut successful_objects = state.successful_objects;
let mut failed_objects = state.failed_objects;
let mut skipped_objects = state.skipped_objects;
// 4. process remaining buckets
for (bucket_idx, bucket) in buckets.iter().enumerate().skip(current_bucket_index) {
// check if completed
if state.completed_buckets.contains(bucket) {
continue;
}
// update current bucket
resume_manager.set_current_item(Some(bucket.clone()), None).await?;
// process objects in bucket
let bucket_result = self
.heal_bucket_with_resume(
bucket,
&mut current_object_index,
&mut processed_objects,
&mut successful_objects,
&mut failed_objects,
&mut skipped_objects,
resume_manager,
checkpoint_manager,
)
.await;
// update checkpoint position
checkpoint_manager.update_position(bucket_idx, current_object_index).await?;
// update progress
resume_manager
.update_progress(processed_objects, successful_objects, failed_objects, skipped_objects)
.await?;
// check cancel status
if self.cancel_token.is_cancelled() {
info!("Heal task cancelled");
return Err(Error::TaskCancelled);
}
// process bucket result
match bucket_result {
Ok(_) => {
resume_manager.complete_bucket(bucket).await?;
info!("Completed heal for bucket: {}", bucket);
}
Err(e) => {
error!("Failed to heal bucket {}: {}", bucket, e);
// continue to next bucket, do not interrupt the whole process
}
}
// reset object index
current_object_index = 0;
}
// 5. mark task completed
resume_manager.mark_completed().await?;
info!("Erasure set heal completed successfully");
Ok(())
}
/// heal single bucket with resume
#[allow(clippy::too_many_arguments)]
async fn heal_bucket_with_resume(
&self,
bucket: &str,
current_object_index: &mut usize,
processed_objects: &mut u64,
successful_objects: &mut u64,
failed_objects: &mut u64,
_skipped_objects: &mut u64,
resume_manager: &ResumeManager,
checkpoint_manager: &CheckpointManager,
) -> Result<()> {
info!("Starting heal for bucket: {} from object index {}", bucket, current_object_index);
// 1. get bucket info
let _bucket_info = match self.storage.get_bucket_info(bucket).await? {
Some(info) => info,
None => {
warn!("Bucket {} not found, skipping", bucket);
return Ok(());
}
};
// 2. get objects to heal
let objects = self.storage.list_objects_for_heal(bucket, "").await?;
// 3. continue from checkpoint
for (obj_idx, object) in objects.iter().enumerate().skip(*current_object_index) {
// check if already processed
if checkpoint_manager.get_checkpoint().await.processed_objects.contains(object) {
continue;
}
// update current object
resume_manager
.set_current_item(Some(bucket.to_string()), Some(object.clone()))
.await?;
// heal object
let heal_opts = HealOpts {
scan_mode: HealScanMode::Normal,
remove: true,
recreate: true,
..Default::default()
};
match self.storage.heal_object(bucket, object, None, &heal_opts).await {
Ok((_result, None)) => {
*successful_objects += 1;
checkpoint_manager.add_processed_object(object.clone()).await?;
info!("Successfully healed object {}/{}", bucket, object);
}
Ok((_, Some(err))) => {
*failed_objects += 1;
checkpoint_manager.add_failed_object(object.clone()).await?;
warn!("Failed to heal object {}/{}: {}", bucket, object, err);
}
Err(err) => {
*failed_objects += 1;
checkpoint_manager.add_failed_object(object.clone()).await?;
warn!("Error healing object {}/{}: {}", bucket, object, err);
}
}
*processed_objects += 1;
*current_object_index = obj_idx + 1;
// check cancel status
if self.cancel_token.is_cancelled() {
info!("Heal task cancelled during object processing");
return Err(Error::TaskCancelled);
}
// save checkpoint periodically
if obj_idx % 100 == 0 {
checkpoint_manager.update_position(0, *current_object_index).await?;
}
}
Ok(())
}
/// initialize progress tracking
async fn initialize_progress(&self, _buckets: &[String], state: &crate::heal::resume::ResumeState) {
let mut progress = self.progress.write().await;
progress.objects_scanned = state.total_objects;
progress.objects_healed = state.successful_objects;
progress.objects_failed = state.failed_objects;
progress.bytes_processed = 0; // set to 0 for now, can be extended later
progress.set_current_object(state.current_object.clone());
}
/// heal all buckets concurrently
#[allow(dead_code)]
async fn heal_buckets_concurrently(&self, buckets: &[String]) -> Vec<Result<()>> {
// use semaphore to control concurrency, avoid too many concurrent healings
let semaphore = Arc::new(tokio::sync::Semaphore::new(4)); // max 4 concurrent healings
let heal_futures = buckets.iter().map(|bucket| {
let bucket = bucket.clone();
let storage = self.storage.clone();
let progress = self.progress.clone();
let semaphore = semaphore.clone();
let cancel_token = self.cancel_token.clone();
async move {
let _permit = semaphore.acquire().await.unwrap();
if cancel_token.is_cancelled() {
return Err(Error::TaskCancelled);
}
Self::heal_single_bucket(&storage, &bucket, &progress).await
}
});
// use join_all to process concurrently
join_all(heal_futures).await
}
/// heal single bucket
#[allow(dead_code)]
async fn heal_single_bucket(
storage: &Arc<dyn HealStorageAPI>,
bucket: &str,
progress: &Arc<RwLock<HealProgress>>,
) -> Result<()> {
info!("Starting heal for bucket: {}", bucket);
// 1. get bucket info
let _bucket_info = match storage.get_bucket_info(bucket).await? {
Some(info) => info,
None => {
warn!("Bucket {} not found, skipping", bucket);
return Ok(());
}
};
// 2. get objects to heal
let objects = storage.list_objects_for_heal(bucket, "").await?;
// 3. update progress
{
let mut p = progress.write().await;
p.objects_scanned += objects.len() as u64;
}
// 4. heal objects concurrently
let heal_opts = HealOpts {
scan_mode: HealScanMode::Normal,
remove: true, // remove corrupted data
recreate: true, // recreate missing data
..Default::default()
};
let object_results = Self::heal_objects_concurrently(storage, bucket, &objects, &heal_opts, progress).await;
// 5. count results
let (success_count, failure_count) = object_results
.into_iter()
.fold((0, 0), |(success, failure), result| match result {
Ok(_) => (success + 1, failure),
Err(_) => (success, failure + 1),
});
// 6. update progress
{
let mut p = progress.write().await;
p.objects_healed += success_count;
p.objects_failed += failure_count;
p.set_current_object(Some(format!("completed bucket: {bucket}")));
}
info!(
"Completed heal for bucket {}: {} success, {} failures",
bucket, success_count, failure_count
);
Ok(())
}
/// heal objects concurrently
#[allow(dead_code)]
async fn heal_objects_concurrently(
storage: &Arc<dyn HealStorageAPI>,
bucket: &str,
objects: &[String],
heal_opts: &HealOpts,
_progress: &Arc<RwLock<HealProgress>>,
) -> Vec<Result<()>> {
// use semaphore to control object healing concurrency
let semaphore = Arc::new(tokio::sync::Semaphore::new(8)); // max 8 concurrent object healings
let heal_futures = objects.iter().map(|object| {
let object = object.clone();
let bucket = bucket.to_string();
let storage = storage.clone();
let heal_opts = *heal_opts;
let semaphore = semaphore.clone();
async move {
let _permit = semaphore.acquire().await.unwrap();
match storage.heal_object(&bucket, &object, None, &heal_opts).await {
Ok((_result, None)) => {
info!("Successfully healed object {}/{}", bucket, object);
Ok(())
}
Ok((_, Some(err))) => {
warn!("Failed to heal object {}/{}: {}", bucket, object, err);
Err(Error::other(err))
}
Err(err) => {
warn!("Error healing object {}/{}: {}", bucket, object, err);
Err(err)
}
}
}
});
join_all(heal_futures).await
}
/// process results
#[allow(dead_code)]
async fn process_results(&self, results: Vec<Result<()>>) -> Result<()> {
let (success_count, failure_count): (usize, usize) =
results.into_iter().fold((0, 0), |(success, failure), result| match result {
Ok(_) => (success + 1, failure),
Err(_) => (success, failure + 1),
});
let total = success_count + failure_count;
info!("Erasure set heal completed: {}/{} buckets successful", success_count, total);
if failure_count > 0 {
warn!("{} buckets failed to heal", failure_count);
return Err(Error::other(format!("{failure_count} buckets failed to heal")));
}
Ok(())
}
}

View File

@@ -0,0 +1,359 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::heal::task::{HealOptions, HealPriority, HealRequest, HealType};
use rustfs_ecstore::disk::endpoint::Endpoint;
use serde::{Deserialize, Serialize};
use std::time::SystemTime;
/// Corruption type
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CorruptionType {
/// Data corruption
DataCorruption,
/// Metadata corruption
MetadataCorruption,
/// Partial corruption
PartialCorruption,
/// Complete corruption
CompleteCorruption,
}
/// Severity level
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum Severity {
/// Low severity
Low = 0,
/// Medium severity
Medium = 1,
/// High severity
High = 2,
/// Critical severity
Critical = 3,
}
/// Heal event
#[derive(Debug, Clone)]
pub enum HealEvent {
/// Object corruption event
ObjectCorruption {
bucket: String,
object: String,
version_id: Option<String>,
corruption_type: CorruptionType,
severity: Severity,
},
/// Object missing event
ObjectMissing {
bucket: String,
object: String,
version_id: Option<String>,
expected_locations: Vec<usize>,
available_locations: Vec<usize>,
},
/// Metadata corruption event
MetadataCorruption {
bucket: String,
object: String,
corruption_type: CorruptionType,
},
/// Disk status change event
DiskStatusChange {
endpoint: Endpoint,
old_status: String,
new_status: String,
},
/// EC decode failure event
ECDecodeFailure {
bucket: String,
object: String,
version_id: Option<String>,
missing_shards: Vec<usize>,
available_shards: Vec<usize>,
},
/// Checksum mismatch event
ChecksumMismatch {
bucket: String,
object: String,
version_id: Option<String>,
expected_checksum: String,
actual_checksum: String,
},
/// Bucket metadata corruption event
BucketMetadataCorruption {
bucket: String,
corruption_type: CorruptionType,
},
/// MRF metadata corruption event
MRFMetadataCorruption {
meta_path: String,
corruption_type: CorruptionType,
},
}
impl HealEvent {
/// Convert HealEvent to HealRequest
pub fn to_heal_request(&self) -> HealRequest {
match self {
HealEvent::ObjectCorruption {
bucket,
object,
version_id,
severity,
..
} => HealRequest::new(
HealType::Object {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
Self::severity_to_priority(severity),
),
HealEvent::ObjectMissing {
bucket,
object,
version_id,
..
} => HealRequest::new(
HealType::Object {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
HealPriority::High,
),
HealEvent::MetadataCorruption { bucket, object, .. } => HealRequest::new(
HealType::Metadata {
bucket: bucket.clone(),
object: object.clone(),
},
HealOptions::default(),
HealPriority::High,
),
HealEvent::DiskStatusChange { endpoint, .. } => {
// Convert disk status change to erasure set heal
// Note: This requires access to storage to get bucket list, which is not available here
// The actual bucket list will need to be provided by the caller or retrieved differently
HealRequest::new(
HealType::ErasureSet {
buckets: vec![], // Empty bucket list - caller should populate this
set_disk_id: format!("{}_{}", endpoint.pool_idx, endpoint.set_idx),
},
HealOptions::default(),
HealPriority::High,
)
}
HealEvent::ECDecodeFailure {
bucket,
object,
version_id,
..
} => HealRequest::new(
HealType::ECDecode {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
HealPriority::Urgent,
),
HealEvent::ChecksumMismatch {
bucket,
object,
version_id,
..
} => HealRequest::new(
HealType::Object {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
HealPriority::High,
),
HealEvent::BucketMetadataCorruption { bucket, .. } => {
HealRequest::new(HealType::Bucket { bucket: bucket.clone() }, HealOptions::default(), HealPriority::High)
}
HealEvent::MRFMetadataCorruption { meta_path, .. } => HealRequest::new(
HealType::MRF {
meta_path: meta_path.clone(),
},
HealOptions::default(),
HealPriority::High,
),
}
}
/// Convert severity to priority
fn severity_to_priority(severity: &Severity) -> HealPriority {
match severity {
Severity::Low => HealPriority::Low,
Severity::Medium => HealPriority::Normal,
Severity::High => HealPriority::High,
Severity::Critical => HealPriority::Urgent,
}
}
/// Get event description
pub fn description(&self) -> String {
match self {
HealEvent::ObjectCorruption {
bucket,
object,
corruption_type,
..
} => {
format!("Object corruption detected: {bucket}/{object} - {corruption_type:?}")
}
HealEvent::ObjectMissing { bucket, object, .. } => {
format!("Object missing: {bucket}/{object}")
}
HealEvent::MetadataCorruption {
bucket,
object,
corruption_type,
..
} => {
format!("Metadata corruption: {bucket}/{object} - {corruption_type:?}")
}
HealEvent::DiskStatusChange {
endpoint,
old_status,
new_status,
..
} => {
format!("Disk status changed: {endpoint:?} {old_status} -> {new_status}")
}
HealEvent::ECDecodeFailure {
bucket,
object,
missing_shards,
..
} => {
format!("EC decode failure: {bucket}/{object} - missing shards: {missing_shards:?}")
}
HealEvent::ChecksumMismatch {
bucket,
object,
expected_checksum,
actual_checksum,
..
} => {
format!("Checksum mismatch: {bucket}/{object} - expected: {expected_checksum}, actual: {actual_checksum}")
}
HealEvent::BucketMetadataCorruption {
bucket, corruption_type, ..
} => {
format!("Bucket metadata corruption: {bucket} - {corruption_type:?}")
}
HealEvent::MRFMetadataCorruption {
meta_path,
corruption_type,
..
} => {
format!("MRF metadata corruption: {meta_path} - {corruption_type:?}")
}
}
}
/// Get event severity
pub fn severity(&self) -> Severity {
match self {
HealEvent::ObjectCorruption { severity, .. } => severity.clone(),
HealEvent::ObjectMissing { .. } => Severity::High,
HealEvent::MetadataCorruption { .. } => Severity::High,
HealEvent::DiskStatusChange { .. } => Severity::High,
HealEvent::ECDecodeFailure { .. } => Severity::Critical,
HealEvent::ChecksumMismatch { .. } => Severity::High,
HealEvent::BucketMetadataCorruption { .. } => Severity::High,
HealEvent::MRFMetadataCorruption { .. } => Severity::High,
}
}
/// Get event timestamp
pub fn timestamp(&self) -> SystemTime {
SystemTime::now()
}
}
/// Heal event handler
pub struct HealEventHandler {
/// Event queue
events: Vec<HealEvent>,
/// Maximum number of events
max_events: usize,
}
impl HealEventHandler {
pub fn new(max_events: usize) -> Self {
Self {
events: Vec::new(),
max_events,
}
}
/// Add event
pub fn add_event(&mut self, event: HealEvent) {
if self.events.len() >= self.max_events {
// Remove oldest event
self.events.remove(0);
}
self.events.push(event);
}
/// Get all events
pub fn get_events(&self) -> &[HealEvent] {
&self.events
}
/// Clear events
pub fn clear_events(&mut self) {
self.events.clear();
}
/// Get event count
pub fn event_count(&self) -> usize {
self.events.len()
}
/// Filter events by severity
pub fn filter_by_severity(&self, min_severity: Severity) -> Vec<&HealEvent> {
self.events.iter().filter(|event| event.severity() >= min_severity).collect()
}
/// Filter events by type
pub fn filter_by_type(&self, event_type: &str) -> Vec<&HealEvent> {
self.events
.iter()
.filter(|event| match event {
HealEvent::ObjectCorruption { .. } => event_type == "ObjectCorruption",
HealEvent::ObjectMissing { .. } => event_type == "ObjectMissing",
HealEvent::MetadataCorruption { .. } => event_type == "MetadataCorruption",
HealEvent::DiskStatusChange { .. } => event_type == "DiskStatusChange",
HealEvent::ECDecodeFailure { .. } => event_type == "ECDecodeFailure",
HealEvent::ChecksumMismatch { .. } => event_type == "ChecksumMismatch",
HealEvent::BucketMetadataCorruption { .. } => event_type == "BucketMetadataCorruption",
HealEvent::MRFMetadataCorruption { .. } => event_type == "MRFMetadataCorruption",
})
.collect()
}
}
impl Default for HealEventHandler {
fn default() -> Self {
Self::new(1000)
}
}

View File

@@ -0,0 +1,422 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::heal::{
progress::{HealProgress, HealStatistics},
storage::HealStorageAPI,
task::{HealOptions, HealPriority, HealRequest, HealTask, HealTaskStatus, HealType},
};
use rustfs_ecstore::disk::DiskAPI;
use rustfs_ecstore::disk::error::DiskError;
use rustfs_ecstore::global::GLOBAL_LOCAL_DISK_MAP;
use std::{
collections::{HashMap, VecDeque},
sync::Arc,
time::{Duration, SystemTime},
};
use tokio::{
sync::{Mutex, RwLock},
time::interval,
};
use tokio_util::sync::CancellationToken;
use tracing::{error, info, warn};
/// Heal config
#[derive(Debug, Clone)]
pub struct HealConfig {
/// Whether to enable auto heal
pub enable_auto_heal: bool,
/// Heal interval
pub heal_interval: Duration,
/// Maximum concurrent heal tasks
pub max_concurrent_heals: usize,
/// Task timeout
pub task_timeout: Duration,
/// Queue size
pub queue_size: usize,
}
impl Default for HealConfig {
fn default() -> Self {
Self {
enable_auto_heal: true,
heal_interval: Duration::from_secs(10), // 10 seconds
max_concurrent_heals: 4,
task_timeout: Duration::from_secs(300), // 5 minutes
queue_size: 1000,
}
}
}
/// Heal state
#[derive(Debug, Default)]
pub struct HealState {
/// Whether running
pub is_running: bool,
/// Current heal cycle
pub current_cycle: u64,
/// Last heal time
pub last_heal_time: Option<SystemTime>,
/// Total healed objects
pub total_healed_objects: u64,
/// Total heal failures
pub total_heal_failures: u64,
/// Current active heal tasks
pub active_heal_count: usize,
}
/// Heal manager
pub struct HealManager {
/// Heal config
config: Arc<RwLock<HealConfig>>,
/// Heal state
state: Arc<RwLock<HealState>>,
/// Active heal tasks
active_heals: Arc<Mutex<HashMap<String, Arc<HealTask>>>>,
/// Heal queue
heal_queue: Arc<Mutex<VecDeque<HealRequest>>>,
/// Storage layer interface
storage: Arc<dyn HealStorageAPI>,
/// Cancel token
cancel_token: CancellationToken,
/// Statistics
statistics: Arc<RwLock<HealStatistics>>,
}
impl HealManager {
/// Create new HealManager
pub fn new(storage: Arc<dyn HealStorageAPI>, config: Option<HealConfig>) -> Self {
let config = config.unwrap_or_default();
Self {
config: Arc::new(RwLock::new(config)),
state: Arc::new(RwLock::new(HealState::default())),
active_heals: Arc::new(Mutex::new(HashMap::new())),
heal_queue: Arc::new(Mutex::new(VecDeque::new())),
storage,
cancel_token: CancellationToken::new(),
statistics: Arc::new(RwLock::new(HealStatistics::new())),
}
}
/// Start HealManager
pub async fn start(&self) -> Result<()> {
let mut state = self.state.write().await;
if state.is_running {
warn!("HealManager is already running");
return Ok(());
}
state.is_running = true;
drop(state);
info!("Starting HealManager");
// start scheduler
self.start_scheduler().await?;
// start auto disk scanner
self.start_auto_disk_scanner().await?;
info!("HealManager started successfully");
Ok(())
}
/// Stop HealManager
pub async fn stop(&self) -> Result<()> {
info!("Stopping HealManager");
// cancel all tasks
self.cancel_token.cancel();
// wait for all tasks to complete
let mut active_heals = self.active_heals.lock().await;
for task in active_heals.values() {
if let Err(e) = task.cancel().await {
warn!("Failed to cancel task {}: {}", task.id, e);
}
}
active_heals.clear();
// update state
let mut state = self.state.write().await;
state.is_running = false;
info!("HealManager stopped successfully");
Ok(())
}
/// Submit heal request
pub async fn submit_heal_request(&self, request: HealRequest) -> Result<String> {
let config = self.config.read().await;
let mut queue = self.heal_queue.lock().await;
if queue.len() >= config.queue_size {
return Err(Error::ConfigurationError {
message: "Heal queue is full".to_string(),
});
}
let request_id = request.id.clone();
queue.push_back(request);
drop(queue);
info!("Submitted heal request: {}", request_id);
Ok(request_id)
}
/// Get task status
pub async fn get_task_status(&self, task_id: &str) -> Result<HealTaskStatus> {
let active_heals = self.active_heals.lock().await;
if let Some(task) = active_heals.get(task_id) {
Ok(task.get_status().await)
} else {
Err(Error::TaskNotFound {
task_id: task_id.to_string(),
})
}
}
/// Get task progress
pub async fn get_active_tasks_count(&self) -> usize {
self.active_heals.lock().await.len()
}
pub async fn get_task_progress(&self, task_id: &str) -> Result<HealProgress> {
let active_heals = self.active_heals.lock().await;
if let Some(task) = active_heals.get(task_id) {
Ok(task.get_progress().await)
} else {
Err(Error::TaskNotFound {
task_id: task_id.to_string(),
})
}
}
/// Cancel task
pub async fn cancel_task(&self, task_id: &str) -> Result<()> {
let mut active_heals = self.active_heals.lock().await;
if let Some(task) = active_heals.get(task_id) {
task.cancel().await?;
active_heals.remove(task_id);
info!("Cancelled heal task: {}", task_id);
Ok(())
} else {
Err(Error::TaskNotFound {
task_id: task_id.to_string(),
})
}
}
/// Get statistics
pub async fn get_statistics(&self) -> HealStatistics {
self.statistics.read().await.clone()
}
/// Get active task count
pub async fn get_active_task_count(&self) -> usize {
let active_heals = self.active_heals.lock().await;
active_heals.len()
}
/// Get queue length
pub async fn get_queue_length(&self) -> usize {
let queue = self.heal_queue.lock().await;
queue.len()
}
/// Start scheduler
async fn start_scheduler(&self) -> Result<()> {
let config = self.config.clone();
let heal_queue = self.heal_queue.clone();
let active_heals = self.active_heals.clone();
let cancel_token = self.cancel_token.clone();
let statistics = self.statistics.clone();
let storage = self.storage.clone();
tokio::spawn(async move {
let mut interval = interval(config.read().await.heal_interval);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("Heal scheduler received shutdown signal");
break;
}
_ = interval.tick() => {
Self::process_heal_queue(&heal_queue, &active_heals, &config, &statistics, &storage).await;
}
}
}
});
Ok(())
}
/// Start background task to auto scan local disks and enqueue erasure set heal requests
async fn start_auto_disk_scanner(&self) -> Result<()> {
let config = self.config.clone();
let heal_queue = self.heal_queue.clone();
let active_heals = self.active_heals.clone();
let cancel_token = self.cancel_token.clone();
let storage = self.storage.clone();
tokio::spawn(async move {
let mut interval = interval(config.read().await.heal_interval);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("Auto disk scanner received shutdown signal");
break;
}
_ = interval.tick() => {
// Build list of endpoints that need healing
let mut endpoints = Vec::new();
for (_, disk_opt) in GLOBAL_LOCAL_DISK_MAP.read().await.iter() {
if let Some(disk) = disk_opt {
// detect unformatted disk via get_disk_id()
if let Err(err) = disk.get_disk_id().await {
if err == DiskError::UnformattedDisk {
endpoints.push(disk.endpoint());
continue;
}
}
}
}
if endpoints.is_empty() {
continue;
}
// Get bucket list for erasure set healing
let buckets = match storage.list_buckets().await {
Ok(buckets) => buckets.iter().map(|b| b.name.clone()).collect::<Vec<String>>(),
Err(e) => {
error!("Failed to get bucket list for auto healing: {}", e);
continue;
}
};
// Create erasure set heal requests for each endpoint
for ep in endpoints {
// skip if already queued or healing
let mut skip = false;
{
let queue = heal_queue.lock().await;
if queue.iter().any(|req| matches!(&req.heal_type, crate::heal::task::HealType::ErasureSet { set_disk_id, .. } if set_disk_id == &format!("{}_{}", ep.pool_idx, ep.set_idx))) {
skip = true;
}
}
if !skip {
let active = active_heals.lock().await;
if active.values().any(|task| matches!(&task.heal_type, crate::heal::task::HealType::ErasureSet { set_disk_id, .. } if set_disk_id == &format!("{}_{}", ep.pool_idx, ep.set_idx))) {
skip = true;
}
}
if skip {
continue;
}
// enqueue erasure set heal request for this disk
let set_disk_id = format!("pool_{}_set_{}", ep.pool_idx, ep.set_idx);
let req = HealRequest::new(
HealType::ErasureSet {
buckets: buckets.clone(),
set_disk_id: set_disk_id.clone()
},
HealOptions::default(),
HealPriority::Normal,
);
let mut queue = heal_queue.lock().await;
queue.push_back(req);
info!("Enqueued auto erasure set heal for endpoint: {} (set_disk_id: {})", ep, set_disk_id);
}
}
}
}
});
Ok(())
}
/// Process heal queue
async fn process_heal_queue(
heal_queue: &Arc<Mutex<VecDeque<HealRequest>>>,
active_heals: &Arc<Mutex<HashMap<String, Arc<HealTask>>>>,
config: &Arc<RwLock<HealConfig>>,
statistics: &Arc<RwLock<HealStatistics>>,
storage: &Arc<dyn HealStorageAPI>,
) {
let config = config.read().await;
let mut active_heals_guard = active_heals.lock().await;
// check if new heal tasks can be started
if active_heals_guard.len() >= config.max_concurrent_heals {
return;
}
let mut queue = heal_queue.lock().await;
if let Some(request) = queue.pop_front() {
let task = Arc::new(HealTask::from_request(request, storage.clone()));
let task_id = task.id.clone();
active_heals_guard.insert(task_id.clone(), task.clone());
drop(active_heals_guard);
let active_heals_clone = active_heals.clone();
let statistics_clone = statistics.clone();
// start heal task
tokio::spawn(async move {
info!("Starting heal task: {}", task_id);
let result = task.execute().await;
match result {
Ok(_) => {
info!("Heal task completed successfully: {}", task_id);
}
Err(e) => {
error!("Heal task failed: {} - {}", task_id, e);
}
}
let mut active_heals_guard = active_heals_clone.lock().await;
if let Some(completed_task) = active_heals_guard.remove(&task_id) {
// update statistics
let mut stats = statistics_clone.write().await;
match completed_task.get_status().await {
HealTaskStatus::Completed => {
stats.update_task_completion(true);
}
_ => {
stats.update_task_completion(false);
}
}
stats.update_running_tasks(active_heals_guard.len() as u64);
}
});
// update statistics
let mut stats = statistics.write().await;
stats.total_tasks += 1;
}
}
}
impl std::fmt::Debug for HealManager {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HealManager")
.field("config", &"<config>")
.field("state", &"<state>")
.field("active_heals_count", &"<active_heals>")
.field("queue_length", &"<queue>")
.finish()
}
}

View File

@@ -12,8 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub const ERR_IGNORE_FILE_CONTRIB: &str = "ignore this file's contribution toward data-usage";
pub const ERR_SKIP_FILE: &str = "skip this file";
pub const ERR_HEAL_STOP_SIGNALLED: &str = "heal stop signaled";
pub const ERR_HEAL_IDLE_TIMEOUT: &str = "healing results were not consumed for too long";
pub const ERR_RETRY_HEALING: &str = "some items failed to heal, we will retry healing this drive again";
pub mod channel;
pub mod erasure_healer;
pub mod event;
pub mod manager;
pub mod progress;
pub mod resume;
pub mod storage;
pub mod task;
pub use erasure_healer::ErasureSetHealer;
pub use manager::HealManager;
pub use resume::{CheckpointManager, ResumeCheckpoint, ResumeManager, ResumeState, ResumeUtils};
pub use task::{HealOptions, HealPriority, HealRequest, HealTask, HealType};

View File

@@ -0,0 +1,148 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use std::time::SystemTime;
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct HealProgress {
/// Objects scanned
pub objects_scanned: u64,
/// Objects healed
pub objects_healed: u64,
/// Objects failed
pub objects_failed: u64,
/// Bytes processed
pub bytes_processed: u64,
/// Current object
pub current_object: Option<String>,
/// Progress percentage
pub progress_percentage: f64,
/// Start time
pub start_time: Option<SystemTime>,
/// Last update time
pub last_update_time: Option<SystemTime>,
/// Estimated completion time
pub estimated_completion_time: Option<SystemTime>,
}
impl HealProgress {
pub fn new() -> Self {
Self {
start_time: Some(SystemTime::now()),
last_update_time: Some(SystemTime::now()),
..Default::default()
}
}
pub fn update_progress(&mut self, scanned: u64, healed: u64, failed: u64, bytes: u64) {
self.objects_scanned = scanned;
self.objects_healed = healed;
self.objects_failed = failed;
self.bytes_processed = bytes;
self.last_update_time = Some(SystemTime::now());
// calculate progress percentage
let total = scanned + healed + failed;
if total > 0 {
self.progress_percentage = (healed as f64 / total as f64) * 100.0;
}
}
pub fn set_current_object(&mut self, object: Option<String>) {
self.current_object = object;
self.last_update_time = Some(SystemTime::now());
}
pub fn is_completed(&self) -> bool {
self.progress_percentage >= 100.0
|| self.objects_scanned > 0 && self.objects_healed + self.objects_failed >= self.objects_scanned
}
pub fn get_success_rate(&self) -> f64 {
let total = self.objects_healed + self.objects_failed;
if total > 0 {
(self.objects_healed as f64 / total as f64) * 100.0
} else {
0.0
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealStatistics {
/// Total heal tasks
pub total_tasks: u64,
/// Successful tasks
pub successful_tasks: u64,
/// Failed tasks
pub failed_tasks: u64,
/// Running tasks
pub running_tasks: u64,
/// Total healed objects
pub total_objects_healed: u64,
/// Total healed bytes
pub total_bytes_healed: u64,
/// Last update time
pub last_update_time: SystemTime,
}
impl Default for HealStatistics {
fn default() -> Self {
Self::new()
}
}
impl HealStatistics {
pub fn new() -> Self {
Self {
total_tasks: 0,
successful_tasks: 0,
failed_tasks: 0,
running_tasks: 0,
total_objects_healed: 0,
total_bytes_healed: 0,
last_update_time: SystemTime::now(),
}
}
pub fn update_task_completion(&mut self, success: bool) {
if success {
self.successful_tasks += 1;
} else {
self.failed_tasks += 1;
}
self.last_update_time = SystemTime::now();
}
pub fn update_running_tasks(&mut self, count: u64) {
self.running_tasks = count;
self.last_update_time = SystemTime::now();
}
pub fn add_healed_objects(&mut self, count: u64, bytes: u64) {
self.total_objects_healed += count;
self.total_bytes_healed += bytes;
self.last_update_time = SystemTime::now();
}
pub fn get_success_rate(&self) -> f64 {
let total = self.successful_tasks + self.failed_tasks;
if total > 0 {
(self.successful_tasks as f64 / total as f64) * 100.0
} else {
0.0
}
}
}

View File

@@ -0,0 +1,696 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use rustfs_ecstore::disk::{BUCKET_META_PREFIX, DiskAPI, DiskStore, RUSTFS_META_BUCKET};
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
use uuid::Uuid;
/// resume state file constants
const RESUME_STATE_FILE: &str = "ahm_resume_state.json";
const RESUME_PROGRESS_FILE: &str = "ahm_progress.json";
const RESUME_CHECKPOINT_FILE: &str = "ahm_checkpoint.json";
/// resume state
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResumeState {
/// task id
pub task_id: String,
/// task type
pub task_type: String,
/// start time
pub start_time: u64,
/// last update time
pub last_update: u64,
/// completed
pub completed: bool,
/// total objects
pub total_objects: u64,
/// processed objects
pub processed_objects: u64,
/// successful objects
pub successful_objects: u64,
/// failed objects
pub failed_objects: u64,
/// skipped objects
pub skipped_objects: u64,
/// current bucket
pub current_bucket: Option<String>,
/// current object
pub current_object: Option<String>,
/// completed buckets
pub completed_buckets: Vec<String>,
/// pending buckets
pub pending_buckets: Vec<String>,
/// error message
pub error_message: Option<String>,
/// retry count
pub retry_count: u32,
/// max retries
pub max_retries: u32,
}
impl ResumeState {
pub fn new(task_id: String, task_type: String, buckets: Vec<String>) -> Self {
Self {
task_id,
task_type,
start_time: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
last_update: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
completed: false,
total_objects: 0,
processed_objects: 0,
successful_objects: 0,
failed_objects: 0,
skipped_objects: 0,
current_bucket: None,
current_object: None,
completed_buckets: Vec::new(),
pending_buckets: buckets,
error_message: None,
retry_count: 0,
max_retries: 3,
}
}
pub fn update_progress(&mut self, processed: u64, successful: u64, failed: u64, skipped: u64) {
self.processed_objects = processed;
self.successful_objects = successful;
self.failed_objects = failed;
self.skipped_objects = skipped;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn set_current_item(&mut self, bucket: Option<String>, object: Option<String>) {
self.current_bucket = bucket;
self.current_object = object;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn complete_bucket(&mut self, bucket: &str) {
if !self.completed_buckets.contains(&bucket.to_string()) {
self.completed_buckets.push(bucket.to_string());
}
if let Some(pos) = self.pending_buckets.iter().position(|b| b == bucket) {
self.pending_buckets.remove(pos);
}
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn mark_completed(&mut self) {
self.completed = true;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn set_error(&mut self, error: String) {
self.error_message = Some(error);
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn increment_retry(&mut self) {
self.retry_count += 1;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn can_retry(&self) -> bool {
self.retry_count < self.max_retries
}
pub fn get_progress_percentage(&self) -> f64 {
if self.total_objects == 0 {
return 0.0;
}
(self.processed_objects as f64 / self.total_objects as f64) * 100.0
}
pub fn get_success_rate(&self) -> f64 {
let total = self.successful_objects + self.failed_objects;
if total == 0 {
return 0.0;
}
(self.successful_objects as f64 / total as f64) * 100.0
}
}
/// resume manager
pub struct ResumeManager {
disk: DiskStore,
state: Arc<RwLock<ResumeState>>,
}
impl ResumeManager {
/// create new resume manager
pub async fn new(disk: DiskStore, task_id: String, task_type: String, buckets: Vec<String>) -> Result<Self> {
let state = ResumeState::new(task_id, task_type, buckets);
let manager = Self {
disk,
state: Arc::new(RwLock::new(state)),
};
// save initial state
manager.save_state().await?;
Ok(manager)
}
/// load resume state from disk
pub async fn load_from_disk(disk: DiskStore, task_id: &str) -> Result<Self> {
let state_data = Self::read_state_file(&disk, task_id).await?;
let state: ResumeState = serde_json::from_slice(&state_data).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to deserialize resume state: {e}"),
})?;
Ok(Self {
disk,
state: Arc::new(RwLock::new(state)),
})
}
/// check if resume state exists
pub async fn has_resume_state(disk: &DiskStore, task_id: &str) -> bool {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_STATE_FILE}"));
match disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap()).await {
Ok(data) => !data.is_empty(),
Err(_) => false,
}
}
/// get current state
pub async fn get_state(&self) -> ResumeState {
self.state.read().await.clone()
}
/// update progress
pub async fn update_progress(&self, processed: u64, successful: u64, failed: u64, skipped: u64) -> Result<()> {
let mut state = self.state.write().await;
state.update_progress(processed, successful, failed, skipped);
drop(state);
self.save_state().await
}
/// set current item
pub async fn set_current_item(&self, bucket: Option<String>, object: Option<String>) -> Result<()> {
let mut state = self.state.write().await;
state.set_current_item(bucket, object);
drop(state);
self.save_state().await
}
/// complete bucket
pub async fn complete_bucket(&self, bucket: &str) -> Result<()> {
let mut state = self.state.write().await;
state.complete_bucket(bucket);
drop(state);
self.save_state().await
}
/// mark task completed
pub async fn mark_completed(&self) -> Result<()> {
let mut state = self.state.write().await;
state.mark_completed();
drop(state);
self.save_state().await
}
/// set error message
pub async fn set_error(&self, error: String) -> Result<()> {
let mut state = self.state.write().await;
state.set_error(error);
drop(state);
self.save_state().await
}
/// increment retry count
pub async fn increment_retry(&self) -> Result<()> {
let mut state = self.state.write().await;
state.increment_retry();
drop(state);
self.save_state().await
}
/// cleanup resume state
pub async fn cleanup(&self) -> Result<()> {
let state = self.state.read().await;
let task_id = &state.task_id;
// delete state files
let state_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_STATE_FILE}"));
let progress_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_PROGRESS_FILE}"));
let checkpoint_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
// ignore delete errors, files may not exist
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, state_file.to_str().unwrap(), Default::default())
.await;
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, progress_file.to_str().unwrap(), Default::default())
.await;
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, checkpoint_file.to_str().unwrap(), Default::default())
.await;
info!("Cleaned up resume state for task: {}", task_id);
Ok(())
}
/// save state to disk
async fn save_state(&self) -> Result<()> {
let state = self.state.read().await;
let state_data = serde_json::to_vec(&*state).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to serialize resume state: {e}"),
})?;
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{}_{}", state.task_id, RESUME_STATE_FILE));
self.disk
.write_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap(), state_data.into())
.await
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to save resume state: {e}"),
})?;
debug!("Saved resume state for task: {}", state.task_id);
Ok(())
}
/// read state file from disk
async fn read_state_file(disk: &DiskStore, task_id: &str) -> Result<Vec<u8>> {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_STATE_FILE}"));
disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap())
.await
.map(|bytes| bytes.to_vec())
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to read resume state file: {e}"),
})
}
}
/// resume checkpoint
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResumeCheckpoint {
/// task id
pub task_id: String,
/// checkpoint time
pub checkpoint_time: u64,
/// current bucket index
pub current_bucket_index: usize,
/// current object index
pub current_object_index: usize,
/// processed objects
pub processed_objects: Vec<String>,
/// failed objects
pub failed_objects: Vec<String>,
/// skipped objects
pub skipped_objects: Vec<String>,
}
impl ResumeCheckpoint {
pub fn new(task_id: String) -> Self {
Self {
task_id,
checkpoint_time: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
current_bucket_index: 0,
current_object_index: 0,
processed_objects: Vec::new(),
failed_objects: Vec::new(),
skipped_objects: Vec::new(),
}
}
pub fn update_position(&mut self, bucket_index: usize, object_index: usize) {
self.current_bucket_index = bucket_index;
self.current_object_index = object_index;
self.checkpoint_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn add_processed_object(&mut self, object: String) {
if !self.processed_objects.contains(&object) {
self.processed_objects.push(object);
}
}
pub fn add_failed_object(&mut self, object: String) {
if !self.failed_objects.contains(&object) {
self.failed_objects.push(object);
}
}
pub fn add_skipped_object(&mut self, object: String) {
if !self.skipped_objects.contains(&object) {
self.skipped_objects.push(object);
}
}
}
/// resume checkpoint manager
pub struct CheckpointManager {
disk: DiskStore,
checkpoint: Arc<RwLock<ResumeCheckpoint>>,
}
impl CheckpointManager {
/// create new checkpoint manager
pub async fn new(disk: DiskStore, task_id: String) -> Result<Self> {
let checkpoint = ResumeCheckpoint::new(task_id);
let manager = Self {
disk,
checkpoint: Arc::new(RwLock::new(checkpoint)),
};
// save initial checkpoint
manager.save_checkpoint().await?;
Ok(manager)
}
/// load checkpoint from disk
pub async fn load_from_disk(disk: DiskStore, task_id: &str) -> Result<Self> {
let checkpoint_data = Self::read_checkpoint_file(&disk, task_id).await?;
let checkpoint: ResumeCheckpoint = serde_json::from_slice(&checkpoint_data).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to deserialize checkpoint: {e}"),
})?;
Ok(Self {
disk,
checkpoint: Arc::new(RwLock::new(checkpoint)),
})
}
/// check if checkpoint exists
pub async fn has_checkpoint(disk: &DiskStore, task_id: &str) -> bool {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
match disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap()).await {
Ok(data) => !data.is_empty(),
Err(_) => false,
}
}
/// get current checkpoint
pub async fn get_checkpoint(&self) -> ResumeCheckpoint {
self.checkpoint.read().await.clone()
}
/// update position
pub async fn update_position(&self, bucket_index: usize, object_index: usize) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.update_position(bucket_index, object_index);
drop(checkpoint);
self.save_checkpoint().await
}
/// add processed object
pub async fn add_processed_object(&self, object: String) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.add_processed_object(object);
drop(checkpoint);
self.save_checkpoint().await
}
/// add failed object
pub async fn add_failed_object(&self, object: String) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.add_failed_object(object);
drop(checkpoint);
self.save_checkpoint().await
}
/// add skipped object
pub async fn add_skipped_object(&self, object: String) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.add_skipped_object(object);
drop(checkpoint);
self.save_checkpoint().await
}
/// cleanup checkpoint
pub async fn cleanup(&self) -> Result<()> {
let checkpoint = self.checkpoint.read().await;
let task_id = &checkpoint.task_id;
let checkpoint_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, checkpoint_file.to_str().unwrap(), Default::default())
.await;
info!("Cleaned up checkpoint for task: {}", task_id);
Ok(())
}
/// save checkpoint to disk
async fn save_checkpoint(&self) -> Result<()> {
let checkpoint = self.checkpoint.read().await;
let checkpoint_data = serde_json::to_vec(&*checkpoint).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to serialize checkpoint: {e}"),
})?;
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{}_{}", checkpoint.task_id, RESUME_CHECKPOINT_FILE));
self.disk
.write_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap(), checkpoint_data.into())
.await
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to save checkpoint: {e}"),
})?;
debug!("Saved checkpoint for task: {}", checkpoint.task_id);
Ok(())
}
/// read checkpoint file from disk
async fn read_checkpoint_file(disk: &DiskStore, task_id: &str) -> Result<Vec<u8>> {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap())
.await
.map(|bytes| bytes.to_vec())
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to read checkpoint file: {e}"),
})
}
}
/// resume utils
pub struct ResumeUtils;
impl ResumeUtils {
/// generate unique task id
pub fn generate_task_id() -> String {
Uuid::new_v4().to_string()
}
/// check if task can be resumed
pub async fn can_resume_task(disk: &DiskStore, task_id: &str) -> bool {
ResumeManager::has_resume_state(disk, task_id).await
}
/// get all resumable task ids
pub async fn get_resumable_tasks(disk: &DiskStore) -> Result<Vec<String>> {
// List all files in the buckets metadata directory
let entries = match disk.list_dir("", RUSTFS_META_BUCKET, BUCKET_META_PREFIX, -1).await {
Ok(entries) => entries,
Err(e) => {
debug!("Failed to list resume state files: {}", e);
return Ok(Vec::new());
}
};
let mut task_ids = Vec::new();
// Filter files that end with ahm_resume_state.json and extract task IDs
for entry in entries {
if entry.ends_with(&format!("_{RESUME_STATE_FILE}")) {
// Extract task ID from filename: {task_id}_ahm_resume_state.json
if let Some(task_id) = entry.strip_suffix(&format!("_{RESUME_STATE_FILE}")) {
if !task_id.is_empty() {
task_ids.push(task_id.to_string());
}
}
}
}
debug!("Found {} resumable tasks: {:?}", task_ids.len(), task_ids);
Ok(task_ids)
}
/// cleanup expired resume states
pub async fn cleanup_expired_states(disk: &DiskStore, max_age_hours: u64) -> Result<()> {
let task_ids = Self::get_resumable_tasks(disk).await?;
let current_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
for task_id in task_ids {
if let Ok(resume_manager) = ResumeManager::load_from_disk(disk.clone(), &task_id).await {
let state = resume_manager.get_state().await;
let age_hours = (current_time - state.last_update) / 3600;
if age_hours > max_age_hours {
info!("Cleaning up expired resume state for task: {} (age: {} hours)", task_id, age_hours);
if let Err(e) = resume_manager.cleanup().await {
warn!("Failed to cleanup expired resume state for task {}: {}", task_id, e);
}
}
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_resume_state_creation() {
let task_id = ResumeUtils::generate_task_id();
let buckets = vec!["bucket1".to_string(), "bucket2".to_string()];
let state = ResumeState::new(task_id.clone(), "erasure_set".to_string(), buckets);
assert_eq!(state.task_id, task_id);
assert_eq!(state.task_type, "erasure_set");
assert!(!state.completed);
assert_eq!(state.processed_objects, 0);
assert_eq!(state.pending_buckets.len(), 2);
}
#[tokio::test]
async fn test_resume_state_progress() {
let task_id = ResumeUtils::generate_task_id();
let buckets = vec!["bucket1".to_string()];
let mut state = ResumeState::new(task_id, "erasure_set".to_string(), buckets);
state.update_progress(10, 8, 1, 1);
assert_eq!(state.processed_objects, 10);
assert_eq!(state.successful_objects, 8);
assert_eq!(state.failed_objects, 1);
assert_eq!(state.skipped_objects, 1);
let progress = state.get_progress_percentage();
assert_eq!(progress, 0.0); // total_objects is 0
state.total_objects = 100;
let progress = state.get_progress_percentage();
assert_eq!(progress, 10.0);
}
#[tokio::test]
async fn test_resume_state_bucket_completion() {
let task_id = ResumeUtils::generate_task_id();
let buckets = vec!["bucket1".to_string(), "bucket2".to_string()];
let mut state = ResumeState::new(task_id, "erasure_set".to_string(), buckets);
assert_eq!(state.pending_buckets.len(), 2);
assert_eq!(state.completed_buckets.len(), 0);
state.complete_bucket("bucket1");
assert_eq!(state.pending_buckets.len(), 1);
assert_eq!(state.completed_buckets.len(), 1);
assert!(state.completed_buckets.contains(&"bucket1".to_string()));
}
#[tokio::test]
async fn test_resume_utils() {
let task_id1 = ResumeUtils::generate_task_id();
let task_id2 = ResumeUtils::generate_task_id();
assert_ne!(task_id1, task_id2);
assert_eq!(task_id1.len(), 36); // UUID length
assert_eq!(task_id2.len(), 36);
}
#[tokio::test]
async fn test_get_resumable_tasks_integration() {
use rustfs_ecstore::disk::{DiskOption, endpoint::Endpoint, new_disk};
use tempfile::TempDir;
// Create a temporary directory for testing
let temp_dir = TempDir::new().unwrap();
let disk_path = temp_dir.path().join("test_disk");
std::fs::create_dir_all(&disk_path).unwrap();
// Create a local disk for testing
let endpoint = Endpoint::try_from(disk_path.to_string_lossy().as_ref()).unwrap();
let disk_option = DiskOption {
cleanup: false,
health_check: false,
};
let disk = new_disk(&endpoint, &disk_option).await.unwrap();
// Create necessary directories first (ignore if already exist)
let _ = disk.make_volume(RUSTFS_META_BUCKET).await;
let _ = disk.make_volume(&format!("{RUSTFS_META_BUCKET}/{BUCKET_META_PREFIX}")).await;
// Create some test resume state files
let task_ids = vec![
"test-task-1".to_string(),
"test-task-2".to_string(),
"test-task-3".to_string(),
];
// Save resume state files for each task
for task_id in &task_ids {
let state = ResumeState::new(
task_id.clone(),
"erasure_set".to_string(),
vec!["bucket1".to_string(), "bucket2".to_string()],
);
let state_data = serde_json::to_vec(&state).unwrap();
let file_path = format!("{BUCKET_META_PREFIX}/{task_id}_{RESUME_STATE_FILE}");
disk.write_all(RUSTFS_META_BUCKET, &file_path, state_data.into())
.await
.unwrap();
}
// Also create some non-resume state files to test filtering
let non_resume_files = vec![
"other_file.txt",
"task4_ahm_checkpoint.json",
"task5_ahm_progress.json",
"_ahm_resume_state.json", // Invalid: empty task ID
];
for file_name in non_resume_files {
let file_path = format!("{BUCKET_META_PREFIX}/{file_name}");
disk.write_all(RUSTFS_META_BUCKET, &file_path, b"test data".to_vec().into())
.await
.unwrap();
}
// Now call get_resumable_tasks to see if it finds the correct files
let found_task_ids = ResumeUtils::get_resumable_tasks(&disk).await.unwrap();
// Verify that only the valid resume state files are found
assert_eq!(found_task_ids.len(), 3);
for task_id in &task_ids {
assert!(found_task_ids.contains(task_id), "Task ID {task_id} not found");
}
// Verify that invalid files are not included
assert!(!found_task_ids.contains(&"".to_string()));
assert!(!found_task_ids.contains(&"task4".to_string()));
assert!(!found_task_ids.contains(&"task5".to_string()));
// Clean up
temp_dir.close().unwrap();
}
}

View File

@@ -0,0 +1,506 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use async_trait::async_trait;
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::{
disk::{DiskStore, endpoint::Endpoint},
store::ECStore,
store_api::{BucketInfo, ObjectIO, StorageAPI},
};
use rustfs_madmin::heal_commands::HealResultItem;
use std::sync::Arc;
use tracing::{debug, error, info, warn};
/// Disk status for heal operations
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DiskStatus {
/// Ok
Ok,
/// Offline
Offline,
/// Corrupt
Corrupt,
/// Missing
Missing,
/// Permission denied
PermissionDenied,
/// Faulty
Faulty,
/// Root mount
RootMount,
/// Unknown
Unknown,
/// Unformatted
Unformatted,
}
/// Heal storage layer interface
#[async_trait]
pub trait HealStorageAPI: Send + Sync {
/// Get object meta
async fn get_object_meta(&self, bucket: &str, object: &str) -> Result<Option<rustfs_ecstore::store_api::ObjectInfo>>;
/// Get object data
async fn get_object_data(&self, bucket: &str, object: &str) -> Result<Option<Vec<u8>>>;
/// Put object data
async fn put_object_data(&self, bucket: &str, object: &str, data: &[u8]) -> Result<()>;
/// Delete object
async fn delete_object(&self, bucket: &str, object: &str) -> Result<()>;
/// Check object integrity
async fn verify_object_integrity(&self, bucket: &str, object: &str) -> Result<bool>;
/// EC decode rebuild
async fn ec_decode_rebuild(&self, bucket: &str, object: &str) -> Result<Vec<u8>>;
/// Get disk status
async fn get_disk_status(&self, endpoint: &Endpoint) -> Result<DiskStatus>;
/// Format disk
async fn format_disk(&self, endpoint: &Endpoint) -> Result<()>;
/// Get bucket info
async fn get_bucket_info(&self, bucket: &str) -> Result<Option<BucketInfo>>;
/// Fix bucket metadata
async fn heal_bucket_metadata(&self, bucket: &str) -> Result<()>;
/// Get all buckets
async fn list_buckets(&self) -> Result<Vec<BucketInfo>>;
/// Check object exists
async fn object_exists(&self, bucket: &str, object: &str) -> Result<bool>;
/// Get object size
async fn get_object_size(&self, bucket: &str, object: &str) -> Result<Option<u64>>;
/// Get object checksum
async fn get_object_checksum(&self, bucket: &str, object: &str) -> Result<Option<String>>;
/// Heal object using ecstore
async fn heal_object(
&self,
bucket: &str,
object: &str,
version_id: Option<&str>,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)>;
/// Heal bucket using ecstore
async fn heal_bucket(&self, bucket: &str, opts: &HealOpts) -> Result<HealResultItem>;
/// Heal format using ecstore
async fn heal_format(&self, dry_run: bool) -> Result<(HealResultItem, Option<Error>)>;
/// List objects for healing
async fn list_objects_for_heal(&self, bucket: &str, prefix: &str) -> Result<Vec<String>>;
/// Get disk for resume functionality
async fn get_disk_for_resume(&self, set_disk_id: &str) -> Result<DiskStore>;
}
/// ECStore Heal storage layer implementation
pub struct ECStoreHealStorage {
ecstore: Arc<ECStore>,
}
impl ECStoreHealStorage {
pub fn new(ecstore: Arc<ECStore>) -> Self {
Self { ecstore }
}
}
#[async_trait]
impl HealStorageAPI for ECStoreHealStorage {
async fn get_object_meta(&self, bucket: &str, object: &str) -> Result<Option<rustfs_ecstore::store_api::ObjectInfo>> {
debug!("Getting object meta: {}/{}", bucket, object);
match self.ecstore.get_object_info(bucket, object, &Default::default()).await {
Ok(info) => Ok(Some(info)),
Err(e) => {
error!("Failed to get object meta: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn get_object_data(&self, bucket: &str, object: &str) -> Result<Option<Vec<u8>>> {
debug!("Getting object data: {}/{}", bucket, object);
match (*self.ecstore)
.get_object_reader(bucket, object, None, Default::default(), &Default::default())
.await
{
Ok(mut reader) => match reader.read_all().await {
Ok(data) => Ok(Some(data)),
Err(e) => {
error!("Failed to read object data: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
},
Err(e) => {
error!("Failed to get object: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn put_object_data(&self, bucket: &str, object: &str, data: &[u8]) -> Result<()> {
debug!("Putting object data: {}/{} ({} bytes)", bucket, object, data.len());
let mut reader = rustfs_ecstore::store_api::PutObjReader::from_vec(data.to_vec());
match (*self.ecstore)
.put_object(bucket, object, &mut reader, &Default::default())
.await
{
Ok(_) => {
info!("Successfully put object: {}/{}", bucket, object);
Ok(())
}
Err(e) => {
error!("Failed to put object: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn delete_object(&self, bucket: &str, object: &str) -> Result<()> {
debug!("Deleting object: {}/{}", bucket, object);
match self.ecstore.delete_object(bucket, object, Default::default()).await {
Ok(_) => {
info!("Successfully deleted object: {}/{}", bucket, object);
Ok(())
}
Err(e) => {
error!("Failed to delete object: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn verify_object_integrity(&self, bucket: &str, object: &str) -> Result<bool> {
debug!("Verifying object integrity: {}/{}", bucket, object);
// Try to get object info and data to verify integrity
match self.get_object_meta(bucket, object).await? {
Some(obj_info) => {
// Check if object has valid metadata
if obj_info.size < 0 {
warn!("Object has invalid size: {}/{}", bucket, object);
return Ok(false);
}
// Try to read object data to verify it's accessible
match self.get_object_data(bucket, object).await {
Ok(Some(_)) => {
info!("Object integrity check passed: {}/{}", bucket, object);
Ok(true)
}
Ok(None) => {
warn!("Object data not found: {}/{}", bucket, object);
Ok(false)
}
Err(_) => {
warn!("Object data read failed: {}/{}", bucket, object);
Ok(false)
}
}
}
None => {
warn!("Object metadata not found: {}/{}", bucket, object);
Ok(false)
}
}
}
async fn ec_decode_rebuild(&self, bucket: &str, object: &str) -> Result<Vec<u8>> {
debug!("EC decode rebuild: {}/{}", bucket, object);
// Use ecstore's heal_object to rebuild the object
let heal_opts = HealOpts {
recursive: false,
dry_run: false,
remove: false,
recreate: true,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.heal_object(bucket, object, None, &heal_opts).await {
Ok((_result, error)) => {
if error.is_some() {
return Err(Error::TaskExecutionFailed {
message: format!("Heal failed: {error:?}"),
});
}
// After healing, try to read the object data
match self.get_object_data(bucket, object).await? {
Some(data) => {
info!("EC decode rebuild successful: {}/{} ({} bytes)", bucket, object, data.len());
Ok(data)
}
None => {
error!("Object not found after heal: {}/{}", bucket, object);
Err(Error::TaskExecutionFailed {
message: format!("Object not found after heal: {bucket}/{object}"),
})
}
}
}
Err(e) => {
error!("Heal operation failed: {}/{} - {}", bucket, object, e);
Err(e)
}
}
}
async fn get_disk_status(&self, endpoint: &Endpoint) -> Result<DiskStatus> {
debug!("Getting disk status: {:?}", endpoint);
// TODO: implement disk status check using ecstore
// For now, return Ok status
info!("Disk status check: {:?} - OK", endpoint);
Ok(DiskStatus::Ok)
}
async fn format_disk(&self, endpoint: &Endpoint) -> Result<()> {
debug!("Formatting disk: {:?}", endpoint);
// Use ecstore's heal_format
match self.heal_format(false).await {
Ok((_, error)) => {
if error.is_some() {
return Err(Error::other(format!("Format failed: {error:?}")));
}
info!("Successfully formatted disk: {:?}", endpoint);
Ok(())
}
Err(e) => {
error!("Failed to format disk: {:?} - {}", endpoint, e);
Err(e)
}
}
}
async fn get_bucket_info(&self, bucket: &str) -> Result<Option<BucketInfo>> {
debug!("Getting bucket info: {}", bucket);
match self.ecstore.get_bucket_info(bucket, &Default::default()).await {
Ok(info) => Ok(Some(info)),
Err(e) => {
error!("Failed to get bucket info: {} - {}", bucket, e);
Err(Error::other(e))
}
}
}
async fn heal_bucket_metadata(&self, bucket: &str) -> Result<()> {
debug!("Healing bucket metadata: {}", bucket);
let heal_opts = HealOpts {
recursive: true,
dry_run: false,
remove: false,
recreate: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
no_lock: false,
pool: None,
set: None,
};
match self.heal_bucket(bucket, &heal_opts).await {
Ok(_) => {
info!("Successfully healed bucket metadata: {}", bucket);
Ok(())
}
Err(e) => {
error!("Failed to heal bucket metadata: {} - {}", bucket, e);
Err(e)
}
}
}
async fn list_buckets(&self) -> Result<Vec<BucketInfo>> {
debug!("Listing buckets");
match self.ecstore.list_bucket(&Default::default()).await {
Ok(buckets) => Ok(buckets),
Err(e) => {
error!("Failed to list buckets: {}", e);
Err(Error::other(e))
}
}
}
async fn object_exists(&self, bucket: &str, object: &str) -> Result<bool> {
debug!("Checking object exists: {}/{}", bucket, object);
match self.get_object_meta(bucket, object).await {
Ok(Some(_)) => Ok(true),
Ok(None) => Ok(false),
Err(_) => Ok(false),
}
}
async fn get_object_size(&self, bucket: &str, object: &str) -> Result<Option<u64>> {
debug!("Getting object size: {}/{}", bucket, object);
match self.get_object_meta(bucket, object).await {
Ok(Some(obj_info)) => Ok(Some(obj_info.size as u64)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
async fn get_object_checksum(&self, bucket: &str, object: &str) -> Result<Option<String>> {
debug!("Getting object checksum: {}/{}", bucket, object);
match self.get_object_meta(bucket, object).await {
Ok(Some(obj_info)) => {
// Convert checksum bytes to hex string
let checksum = obj_info.checksum.iter().map(|b| format!("{b:02x}")).collect::<String>();
Ok(Some(checksum))
}
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
async fn heal_object(
&self,
bucket: &str,
object: &str,
version_id: Option<&str>,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)> {
debug!("Healing object: {}/{}", bucket, object);
let version_id_str = version_id.unwrap_or("");
match self.ecstore.heal_object(bucket, object, version_id_str, opts).await {
Ok((result, ecstore_error)) => {
let error = ecstore_error.map(Error::other);
info!("Heal object completed: {}/{} - result: {:?}, error: {:?}", bucket, object, result, error);
Ok((result, error))
}
Err(e) => {
error!("Heal object failed: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn heal_bucket(&self, bucket: &str, opts: &HealOpts) -> Result<HealResultItem> {
debug!("Healing bucket: {}", bucket);
match self.ecstore.heal_bucket(bucket, opts).await {
Ok(result) => {
info!("Heal bucket completed: {} - result: {:?}", bucket, result);
Ok(result)
}
Err(e) => {
error!("Heal bucket failed: {} - {}", bucket, e);
Err(Error::other(e))
}
}
}
async fn heal_format(&self, dry_run: bool) -> Result<(HealResultItem, Option<Error>)> {
debug!("Healing format (dry_run: {})", dry_run);
match self.ecstore.heal_format(dry_run).await {
Ok((result, ecstore_error)) => {
let error = ecstore_error.map(Error::other);
info!("Heal format completed - result: {:?}, error: {:?}", result, error);
Ok((result, error))
}
Err(e) => {
error!("Heal format failed: {}", e);
Err(Error::other(e))
}
}
}
async fn list_objects_for_heal(&self, bucket: &str, prefix: &str) -> Result<Vec<String>> {
debug!("Listing objects for heal: {}/{}", bucket, prefix);
// Use list_objects_v2 to get objects
match self
.ecstore
.clone()
.list_objects_v2(bucket, prefix, None, None, 1000, false, None)
.await
{
Ok(list_info) => {
let objects: Vec<String> = list_info.objects.into_iter().map(|obj| obj.name).collect();
info!("Found {} objects for heal in {}/{}", objects.len(), bucket, prefix);
Ok(objects)
}
Err(e) => {
error!("Failed to list objects for heal: {}/{} - {}", bucket, prefix, e);
Err(Error::other(e))
}
}
}
async fn get_disk_for_resume(&self, set_disk_id: &str) -> Result<DiskStore> {
debug!("Getting disk for resume: {}", set_disk_id);
// Parse set_disk_id to extract pool and set indices
// Format: "pool_{pool_idx}_set_{set_idx}"
let parts: Vec<&str> = set_disk_id.split('_').collect();
if parts.len() != 4 || parts[0] != "pool" || parts[2] != "set" {
return Err(Error::TaskExecutionFailed {
message: format!("Invalid set_disk_id format: {set_disk_id}"),
});
}
let pool_idx: usize = parts[1].parse().map_err(|_| Error::TaskExecutionFailed {
message: format!("Invalid pool index in set_disk_id: {set_disk_id}"),
})?;
let set_idx: usize = parts[3].parse().map_err(|_| Error::TaskExecutionFailed {
message: format!("Invalid set index in set_disk_id: {set_disk_id}"),
})?;
// Get the first available disk from the set
let disks = self
.ecstore
.get_disks(pool_idx, set_idx)
.await
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to get disks for pool {pool_idx} set {set_idx}: {e}"),
})?;
// Find the first available disk
if let Some(disk_store) = disks.into_iter().flatten().next() {
info!("Found disk for resume: {:?}", disk_store);
return Ok(disk_store);
}
Err(Error::TaskExecutionFailed {
message: format!("No available disk found for set_disk_id: {set_disk_id}"),
})
}
}

855
crates/ahm/src/heal/task.rs Normal file
View File

@@ -0,0 +1,855 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::heal::ErasureSetHealer;
use crate::heal::{progress::HealProgress, storage::HealStorageAPI};
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use tokio::sync::RwLock;
use tracing::{error, info, warn};
use uuid::Uuid;
/// Heal type
#[derive(Debug, Clone)]
pub enum HealType {
/// Object heal
Object {
bucket: String,
object: String,
version_id: Option<String>,
},
/// Bucket heal
Bucket { bucket: String },
/// Erasure Set heal (includes disk format repair)
ErasureSet { buckets: Vec<String>, set_disk_id: String },
/// Metadata heal
Metadata { bucket: String, object: String },
/// MRF heal
MRF { meta_path: String },
/// EC decode heal
ECDecode {
bucket: String,
object: String,
version_id: Option<String>,
},
}
/// Heal priority
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum HealPriority {
/// Low priority
Low = 0,
/// Normal priority
Normal = 1,
/// High priority
High = 2,
/// Urgent priority
Urgent = 3,
}
impl Default for HealPriority {
fn default() -> Self {
Self::Normal
}
}
/// Heal options
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealOptions {
/// Scan mode
pub scan_mode: HealScanMode,
/// Whether to remove corrupted data
pub remove_corrupted: bool,
/// Whether to recreate
pub recreate_missing: bool,
/// Whether to update parity
pub update_parity: bool,
/// Whether to recursively process
pub recursive: bool,
/// Whether to dry run
pub dry_run: bool,
/// Timeout
pub timeout: Option<Duration>,
/// pool index
pub pool_index: Option<usize>,
/// set index
pub set_index: Option<usize>,
}
impl Default for HealOptions {
fn default() -> Self {
Self {
scan_mode: HealScanMode::Normal,
remove_corrupted: false,
recreate_missing: true,
update_parity: true,
recursive: false,
dry_run: false,
timeout: Some(Duration::from_secs(300)), // 5 minutes default timeout
pool_index: None,
set_index: None,
}
}
}
/// Heal task status
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum HealTaskStatus {
/// Pending
Pending,
/// Running
Running,
/// Completed
Completed,
/// Failed
Failed { error: String },
/// Cancelled
Cancelled,
/// Timeout
Timeout,
}
/// Heal request
#[derive(Debug, Clone)]
pub struct HealRequest {
/// Request ID
pub id: String,
/// Heal type
pub heal_type: HealType,
/// Heal options
pub options: HealOptions,
/// Priority
pub priority: HealPriority,
/// Created time
pub created_at: SystemTime,
}
impl HealRequest {
pub fn new(heal_type: HealType, options: HealOptions, priority: HealPriority) -> Self {
Self {
id: Uuid::new_v4().to_string(),
heal_type,
options,
priority,
created_at: SystemTime::now(),
}
}
pub fn object(bucket: String, object: String, version_id: Option<String>) -> Self {
Self::new(
HealType::Object {
bucket,
object,
version_id,
},
HealOptions::default(),
HealPriority::Normal,
)
}
pub fn bucket(bucket: String) -> Self {
Self::new(HealType::Bucket { bucket }, HealOptions::default(), HealPriority::Normal)
}
pub fn metadata(bucket: String, object: String) -> Self {
Self::new(HealType::Metadata { bucket, object }, HealOptions::default(), HealPriority::High)
}
pub fn ec_decode(bucket: String, object: String, version_id: Option<String>) -> Self {
Self::new(
HealType::ECDecode {
bucket,
object,
version_id,
},
HealOptions::default(),
HealPriority::Urgent,
)
}
}
/// Heal task
pub struct HealTask {
/// Task ID
pub id: String,
/// Heal type
pub heal_type: HealType,
/// Heal options
pub options: HealOptions,
/// Task status
pub status: Arc<RwLock<HealTaskStatus>>,
/// Progress tracking
pub progress: Arc<RwLock<HealProgress>>,
/// Created time
pub created_at: SystemTime,
/// Started time
pub started_at: Arc<RwLock<Option<SystemTime>>>,
/// Completed time
pub completed_at: Arc<RwLock<Option<SystemTime>>>,
/// Cancel token
pub cancel_token: tokio_util::sync::CancellationToken,
/// Storage layer interface
pub storage: Arc<dyn HealStorageAPI>,
}
impl HealTask {
pub fn from_request(request: HealRequest, storage: Arc<dyn HealStorageAPI>) -> Self {
Self {
id: request.id,
heal_type: request.heal_type,
options: request.options,
status: Arc::new(RwLock::new(HealTaskStatus::Pending)),
progress: Arc::new(RwLock::new(HealProgress::new())),
created_at: request.created_at,
started_at: Arc::new(RwLock::new(None)),
completed_at: Arc::new(RwLock::new(None)),
cancel_token: tokio_util::sync::CancellationToken::new(),
storage,
}
}
pub async fn execute(&self) -> Result<()> {
// update status to running
{
let mut status = self.status.write().await;
*status = HealTaskStatus::Running;
}
{
let mut started_at = self.started_at.write().await;
*started_at = Some(SystemTime::now());
}
info!("Starting heal task: {} with type: {:?}", self.id, self.heal_type);
let result = match &self.heal_type {
HealType::Object {
bucket,
object,
version_id,
} => self.heal_object(bucket, object, version_id.as_deref()).await,
HealType::Bucket { bucket } => self.heal_bucket(bucket).await,
HealType::Metadata { bucket, object } => self.heal_metadata(bucket, object).await,
HealType::MRF { meta_path } => self.heal_mrf(meta_path).await,
HealType::ECDecode {
bucket,
object,
version_id,
} => self.heal_ec_decode(bucket, object, version_id.as_deref()).await,
HealType::ErasureSet { buckets, set_disk_id } => self.heal_erasure_set(buckets.clone(), set_disk_id.clone()).await,
};
// update completed time and status
{
let mut completed_at = self.completed_at.write().await;
*completed_at = Some(SystemTime::now());
}
match &result {
Ok(_) => {
let mut status = self.status.write().await;
*status = HealTaskStatus::Completed;
info!("Heal task completed successfully: {}", self.id);
}
Err(e) => {
let mut status = self.status.write().await;
*status = HealTaskStatus::Failed { error: e.to_string() };
error!("Heal task failed: {} with error: {}", self.id, e);
}
}
result
}
pub async fn cancel(&self) -> Result<()> {
self.cancel_token.cancel();
let mut status = self.status.write().await;
*status = HealTaskStatus::Cancelled;
info!("Heal task cancelled: {}", self.id);
Ok(())
}
pub async fn get_status(&self) -> HealTaskStatus {
self.status.read().await.clone()
}
pub async fn get_progress(&self) -> HealProgress {
self.progress.read().await.clone()
}
// specific heal implementation method
async fn heal_object(&self, bucket: &str, object: &str, version_id: Option<&str>) -> Result<()> {
info!("Healing object: {}/{}", bucket, object);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("{bucket}/{object}")));
progress.update_progress(0, 4, 0, 0); // 开始heal总共4个步骤
}
// Step 1: Check if object exists and get metadata
info!("Step 1: Checking object existence and metadata");
let object_exists = self.storage.object_exists(bucket, object).await?;
if !object_exists {
warn!("Object does not exist: {}/{}", bucket, object);
if self.options.recreate_missing {
info!("Attempting to recreate missing object: {}/{}", bucket, object);
return self.recreate_missing_object(bucket, object, version_id).await;
} else {
return Err(Error::TaskExecutionFailed {
message: format!("Object not found: {bucket}/{object}"),
});
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: directly call ecstore to perform heal
info!("Step 2: Performing heal using ecstore");
let heal_opts = HealOpts {
recursive: self.options.recursive,
dry_run: self.options.dry_run,
remove: self.options.remove_corrupted,
recreate: self.options.recreate_missing,
scan_mode: self.options.scan_mode,
update_parity: self.options.update_parity,
no_lock: false,
pool: self.options.pool_index,
set: self.options.set_index,
};
match self.storage.heal_object(bucket, object, version_id, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Heal operation failed: {}/{} - {}", bucket, object, e);
// If heal failed and remove_corrupted is enabled, delete the corrupted object
if self.options.remove_corrupted {
warn!("Removing corrupted object: {}/{}", bucket, object);
if !self.options.dry_run {
self.storage.delete_object(bucket, object).await?;
info!("Successfully deleted corrupted object: {}/{}", bucket, object);
} else {
info!("Dry run mode - would delete corrupted object: {}/{}", bucket, object);
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal object {bucket}/{object}: {e}"),
});
}
// Step 3: Verify heal result
info!("Step 3: Verifying heal result");
let object_size = result.object_size as u64;
info!(
"Heal completed successfully: {}/{} ({} bytes, {} drives healed)",
bucket,
object,
object_size,
result.after.drives.len()
);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, object_size, object_size);
}
Ok(())
}
Err(e) => {
error!("Heal operation failed: {}/{} - {}", bucket, object, e);
// If heal failed and remove_corrupted is enabled, delete the corrupted object
if self.options.remove_corrupted {
warn!("Removing corrupted object: {}/{}", bucket, object);
if !self.options.dry_run {
self.storage.delete_object(bucket, object).await?;
info!("Successfully deleted corrupted object: {}/{}", bucket, object);
} else {
info!("Dry run mode - would delete corrupted object: {}/{}", bucket, object);
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal object {bucket}/{object}: {e}"),
})
}
}
}
/// Recreate missing object (for EC decode scenarios)
async fn recreate_missing_object(&self, bucket: &str, object: &str, version_id: Option<&str>) -> Result<()> {
info!("Attempting to recreate missing object: {}/{}", bucket, object);
// Use ecstore's heal_object with recreate option
let heal_opts = HealOpts {
recursive: false,
dry_run: self.options.dry_run,
remove: false,
recreate: true,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.storage.heal_object(bucket, object, version_id, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Failed to recreate missing object: {}/{} - {}", bucket, object, e);
return Err(Error::TaskExecutionFailed {
message: format!("Failed to recreate missing object {bucket}/{object}: {e}"),
});
}
let object_size = result.object_size as u64;
info!("Successfully recreated missing object: {}/{} ({} bytes)", bucket, object, object_size);
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, object_size, object_size);
}
Ok(())
}
Err(e) => {
error!("Failed to recreate missing object: {}/{} - {}", bucket, object, e);
Err(Error::TaskExecutionFailed {
message: format!("Failed to recreate missing object {bucket}/{object}: {e}"),
})
}
}
}
async fn heal_bucket(&self, bucket: &str) -> Result<()> {
info!("Healing bucket: {}", bucket);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("bucket: {bucket}")));
progress.update_progress(0, 3, 0, 0);
}
// Step 1: Check if bucket exists
info!("Step 1: Checking bucket existence");
let bucket_exists = self.storage.get_bucket_info(bucket).await?.is_some();
if !bucket_exists {
warn!("Bucket does not exist: {}", bucket);
return Err(Error::TaskExecutionFailed {
message: format!("Bucket not found: {bucket}"),
});
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: Perform bucket heal using ecstore
info!("Step 2: Performing bucket heal using ecstore");
let heal_opts = HealOpts {
recursive: self.options.recursive,
dry_run: self.options.dry_run,
remove: self.options.remove_corrupted,
recreate: self.options.recreate_missing,
scan_mode: self.options.scan_mode,
update_parity: self.options.update_parity,
no_lock: false,
pool: self.options.pool_index,
set: self.options.set_index,
};
match self.storage.heal_bucket(bucket, &heal_opts).await {
Ok(result) => {
info!("Bucket heal completed successfully: {} ({} drives)", bucket, result.after.drives.len());
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Ok(())
}
Err(e) => {
error!("Bucket heal failed: {} - {}", bucket, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal bucket {bucket}: {e}"),
})
}
}
}
async fn heal_metadata(&self, bucket: &str, object: &str) -> Result<()> {
info!("Healing metadata: {}/{}", bucket, object);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("metadata: {bucket}/{object}")));
progress.update_progress(0, 3, 0, 0);
}
// Step 1: Check if object exists
info!("Step 1: Checking object existence");
let object_exists = self.storage.object_exists(bucket, object).await?;
if !object_exists {
warn!("Object does not exist: {}/{}", bucket, object);
return Err(Error::TaskExecutionFailed {
message: format!("Object not found: {bucket}/{object}"),
});
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: Perform metadata heal using ecstore
info!("Step 2: Performing metadata heal using ecstore");
let heal_opts = HealOpts {
recursive: false,
dry_run: self.options.dry_run,
remove: false,
recreate: false,
scan_mode: HealScanMode::Deep,
update_parity: false,
no_lock: false,
pool: self.options.pool_index,
set: self.options.set_index,
};
match self.storage.heal_object(bucket, object, None, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Metadata heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal metadata {bucket}/{object}: {e}"),
});
}
info!(
"Metadata heal completed successfully: {}/{} ({} drives)",
bucket,
object,
result.after.drives.len()
);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Ok(())
}
Err(e) => {
error!("Metadata heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal metadata {bucket}/{object}: {e}"),
})
}
}
}
async fn heal_mrf(&self, meta_path: &str) -> Result<()> {
info!("Healing MRF: {}", meta_path);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("mrf: {meta_path}")));
progress.update_progress(0, 2, 0, 0);
}
// Parse meta_path to extract bucket and object
let parts: Vec<&str> = meta_path.split('/').collect();
if parts.len() < 2 {
return Err(Error::TaskExecutionFailed {
message: format!("Invalid meta path format: {meta_path}"),
});
}
let bucket = parts[0];
let object = parts[1..].join("/");
// Step 1: Perform MRF heal using ecstore
info!("Step 1: Performing MRF heal using ecstore");
let heal_opts = HealOpts {
recursive: true,
dry_run: self.options.dry_run,
remove: self.options.remove_corrupted,
recreate: self.options.recreate_missing,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.storage.heal_object(bucket, &object, None, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("MRF heal failed: {} - {}", meta_path, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 2, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal MRF {meta_path}: {e}"),
});
}
info!("MRF heal completed successfully: {} ({} drives)", meta_path, result.after.drives.len());
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 2, 0, 0);
}
Ok(())
}
Err(e) => {
error!("MRF heal failed: {} - {}", meta_path, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 2, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal MRF {meta_path}: {e}"),
})
}
}
}
async fn heal_ec_decode(&self, bucket: &str, object: &str, version_id: Option<&str>) -> Result<()> {
info!("Healing EC decode: {}/{}", bucket, object);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("ec_decode: {bucket}/{object}")));
progress.update_progress(0, 3, 0, 0);
}
// Step 1: Check if object exists
info!("Step 1: Checking object existence");
let object_exists = self.storage.object_exists(bucket, object).await?;
if !object_exists {
warn!("Object does not exist: {}/{}", bucket, object);
return Err(Error::TaskExecutionFailed {
message: format!("Object not found: {bucket}/{object}"),
});
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: Perform EC decode heal using ecstore
info!("Step 2: Performing EC decode heal using ecstore");
let heal_opts = HealOpts {
recursive: false,
dry_run: self.options.dry_run,
remove: false,
recreate: true,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.storage.heal_object(bucket, object, version_id, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("EC decode heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal EC decode {bucket}/{object}: {e}"),
});
}
let object_size = result.object_size as u64;
info!(
"EC decode heal completed successfully: {}/{} ({} bytes, {} drives)",
bucket,
object,
object_size,
result.after.drives.len()
);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, object_size, object_size);
}
Ok(())
}
Err(e) => {
error!("EC decode heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal EC decode {bucket}/{object}: {e}"),
})
}
}
}
async fn heal_erasure_set(&self, buckets: Vec<String>, set_disk_id: String) -> Result<()> {
info!("Healing Erasure Set: {} ({} buckets)", set_disk_id, buckets.len());
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("erasure_set: {} ({} buckets)", set_disk_id, buckets.len())));
progress.update_progress(0, 4, 0, 0);
}
let buckets = if buckets.is_empty() {
info!("No buckets specified, listing all buckets");
let bucket_infos = self.storage.list_buckets().await?;
bucket_infos.into_iter().map(|info| info.name).collect()
} else {
buckets
};
// Step 1: Perform disk format heal using ecstore
info!("Step 1: Performing disk format heal using ecstore");
match self.storage.heal_format(self.options.dry_run).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Disk format heal failed: {} - {}", set_disk_id, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal disk format for {set_disk_id}: {e}"),
});
}
info!(
"Disk format heal completed successfully: {} ({} drives)",
set_disk_id,
result.after.drives.len()
);
}
Err(e) => {
error!("Disk format heal failed: {} - {}", set_disk_id, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal disk format for {set_disk_id}: {e}"),
});
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 4, 0, 0);
}
// Step 2: Get disk for resume functionality
info!("Step 2: Getting disk for resume functionality");
let disk = self.storage.get_disk_for_resume(&set_disk_id).await?;
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 4, 0, 0);
}
// Step 3: Heal bucket structure
for bucket in buckets.iter() {
if let Err(err) = self.heal_bucket(bucket).await {
info!("{}", err.to_string());
}
}
// Step 3: Create erasure set healer with resume support
info!("Step 3: Creating erasure set healer with resume support");
let erasure_healer = ErasureSetHealer::new(self.storage.clone(), self.progress.clone(), self.cancel_token.clone(), disk);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 4, 0, 0);
}
// Step 4: Execute erasure set heal with resume
info!("Step 4: Executing erasure set heal with resume");
let result = erasure_healer.heal_erasure_set(&buckets, &set_disk_id).await;
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, 0, 0);
}
match result {
Ok(_) => {
info!("Erasure set heal completed successfully: {} ({} buckets)", set_disk_id, buckets.len());
Ok(())
}
Err(e) => {
error!("Erasure set heal failed: {} - {}", set_disk_id, e);
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal erasure set {set_disk_id}: {e}"),
})
}
}
}
}
impl std::fmt::Debug for HealTask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HealTask")
.field("id", &self.id)
.field("heal_type", &self.heal_type)
.field("options", &self.options)
.field("created_at", &self.created_at)
.finish()
}
}

View File

@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::OnceLock;
use std::sync::{Arc, OnceLock};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
pub mod error;
pub mod heal;
pub mod scanner;
pub use error::{Error, Result};
pub use scanner::{
BucketTargetUsageInfo, BucketUsageInfo, DataUsageInfo, Scanner, ScannerMetrics, load_data_usage_from_backend,
store_data_usage_in_backend,
};
pub use heal::{HealManager, HealOptions, HealPriority, HealRequest, HealType, channel::HealChannelProcessor};
pub use scanner::Scanner;
// Global cancellation token for AHM services (scanner and other background tasks)
static GLOBAL_AHM_SERVICES_CANCEL_TOKEN: OnceLock<CancellationToken> = OnceLock::new();
@@ -52,3 +52,61 @@ pub fn shutdown_ahm_services() {
cancel_token.cancel();
}
}
/// Global heal manager instance
static GLOBAL_HEAL_MANAGER: OnceLock<Arc<HealManager>> = OnceLock::new();
/// Global heal channel processor instance
static GLOBAL_HEAL_CHANNEL_PROCESSOR: OnceLock<Arc<tokio::sync::Mutex<HealChannelProcessor>>> = OnceLock::new();
/// Initialize and start heal manager with channel processor
pub async fn init_heal_manager(
storage: Arc<dyn heal::storage::HealStorageAPI>,
config: Option<heal::manager::HealConfig>,
) -> Result<Arc<HealManager>> {
// Create heal manager
let heal_manager = Arc::new(HealManager::new(storage, config));
// Start heal manager
heal_manager.start().await?;
// Store global instance
GLOBAL_HEAL_MANAGER
.set(heal_manager.clone())
.map_err(|_| Error::Config("Heal manager already initialized".to_string()))?;
// Initialize heal channel
let channel_receiver = rustfs_common::heal_channel::init_heal_channel();
// Create channel processor
let channel_processor = HealChannelProcessor::new(heal_manager.clone());
// Store channel processor instance first
GLOBAL_HEAL_CHANNEL_PROCESSOR
.set(Arc::new(tokio::sync::Mutex::new(channel_processor)))
.map_err(|_| Error::Config("Heal channel processor already initialized".to_string()))?;
// Start channel processor in background
let receiver = channel_receiver;
tokio::spawn(async move {
if let Some(processor_guard) = GLOBAL_HEAL_CHANNEL_PROCESSOR.get() {
let mut processor = processor_guard.lock().await;
if let Err(e) = processor.start(receiver).await {
error!("Heal channel processor failed: {}", e);
}
}
});
info!("Heal manager with channel processor initialized successfully");
Ok(heal_manager)
}
/// Get global heal manager instance
pub fn get_heal_manager() -> Option<&'static Arc<HealManager>> {
GLOBAL_HEAL_MANAGER.get()
}
/// Get global heal channel processor instance
pub fn get_heal_channel_processor() -> Option<&'static Arc<tokio::sync::Mutex<HealChannelProcessor>>> {
GLOBAL_HEAL_CHANNEL_PROCESSOR.get()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,671 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, sync::Arc, time::SystemTime};
use rustfs_ecstore::{bucket::metadata_sys::get_replication_config, config::com::read_config, store::ECStore};
use rustfs_utils::path::SLASH_SEPARATOR;
use serde::{Deserialize, Serialize};
use tracing::{error, info, warn};
use crate::error::{Error, Result};
// Data usage storage constants
pub const DATA_USAGE_ROOT: &str = SLASH_SEPARATOR;
const DATA_USAGE_OBJ_NAME: &str = ".usage.json";
const DATA_USAGE_BLOOM_NAME: &str = ".bloomcycle.bin";
pub const DATA_USAGE_CACHE_NAME: &str = ".usage-cache.bin";
// Data usage storage paths
lazy_static::lazy_static! {
pub static ref DATA_USAGE_BUCKET: String = format!("{}{}{}",
rustfs_ecstore::disk::RUSTFS_META_BUCKET,
SLASH_SEPARATOR,
rustfs_ecstore::disk::BUCKET_META_PREFIX
);
pub static ref DATA_USAGE_OBJ_NAME_PATH: String = format!("{}{}{}",
rustfs_ecstore::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_OBJ_NAME
);
pub static ref DATA_USAGE_BLOOM_NAME_PATH: String = format!("{}{}{}",
rustfs_ecstore::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_BLOOM_NAME
);
}
/// Bucket target usage info provides replication statistics
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct BucketTargetUsageInfo {
pub replication_pending_size: u64,
pub replication_failed_size: u64,
pub replicated_size: u64,
pub replica_size: u64,
pub replication_pending_count: u64,
pub replication_failed_count: u64,
pub replicated_count: u64,
}
/// Bucket usage info provides bucket-level statistics
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct BucketUsageInfo {
pub size: u64,
// Following five fields suffixed with V1 are here for backward compatibility
// Total Size for objects that have not yet been replicated
pub replication_pending_size_v1: u64,
// Total size for objects that have witness one or more failures and will be retried
pub replication_failed_size_v1: u64,
// Total size for objects that have been replicated to destination
pub replicated_size_v1: u64,
// Total number of objects pending replication
pub replication_pending_count_v1: u64,
// Total number of objects that failed replication
pub replication_failed_count_v1: u64,
pub objects_count: u64,
pub object_size_histogram: HashMap<String, u64>,
pub object_versions_histogram: HashMap<String, u64>,
pub versions_count: u64,
pub delete_markers_count: u64,
pub replica_size: u64,
pub replica_count: u64,
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
}
/// DataUsageInfo represents data usage stats of the underlying storage
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct DataUsageInfo {
/// Total capacity
pub total_capacity: u64,
/// Total used capacity
pub total_used_capacity: u64,
/// Total free capacity
pub total_free_capacity: u64,
/// LastUpdate is the timestamp of when the data usage info was last updated
pub last_update: Option<SystemTime>,
/// Objects total count across all buckets
pub objects_total_count: u64,
/// Versions total count across all buckets
pub versions_total_count: u64,
/// Delete markers total count across all buckets
pub delete_markers_total_count: u64,
/// Objects total size across all buckets
pub objects_total_size: u64,
/// Replication info across all buckets
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
/// Total number of buckets in this cluster
pub buckets_count: u64,
/// Buckets usage info provides following information across all buckets
pub buckets_usage: HashMap<String, BucketUsageInfo>,
/// Deprecated kept here for backward compatibility reasons
pub bucket_sizes: HashMap<String, u64>,
}
/// Size summary for a single object or group of objects
#[derive(Debug, Default, Clone)]
pub struct SizeSummary {
/// Total size
pub total_size: usize,
/// Number of versions
pub versions: usize,
/// Number of delete markers
pub delete_markers: usize,
/// Replicated size
pub replicated_size: usize,
/// Replicated count
pub replicated_count: usize,
/// Pending size
pub pending_size: usize,
/// Failed size
pub failed_size: usize,
/// Replica size
pub replica_size: usize,
/// Replica count
pub replica_count: usize,
/// Pending count
pub pending_count: usize,
/// Failed count
pub failed_count: usize,
/// Replication target stats
pub repl_target_stats: HashMap<String, ReplTargetSizeSummary>,
}
/// Replication target size summary
#[derive(Debug, Default, Clone)]
pub struct ReplTargetSizeSummary {
/// Replicated size
pub replicated_size: usize,
/// Replicated count
pub replicated_count: usize,
/// Pending size
pub pending_size: usize,
/// Failed size
pub failed_size: usize,
/// Pending count
pub pending_count: usize,
/// Failed count
pub failed_count: usize,
}
impl DataUsageInfo {
/// Create a new DataUsageInfo
pub fn new() -> Self {
Self::default()
}
/// Add object metadata to data usage statistics
pub fn add_object(&mut self, object_path: &str, meta_object: &rustfs_filemeta::MetaObject) {
// This method is kept for backward compatibility
// For accurate version counting, use add_object_from_file_meta instead
let bucket_name = match self.extract_bucket_from_path(object_path) {
Ok(name) => name,
Err(_) => return,
};
// Update bucket statistics
if let Some(bucket_usage) = self.buckets_usage.get_mut(&bucket_name) {
bucket_usage.size += meta_object.size as u64;
bucket_usage.objects_count += 1;
bucket_usage.versions_count += 1; // Simplified: assume 1 version per object
// Update size histogram
let total_size = meta_object.size as u64;
let size_ranges = [
("0-1KB", 0, 1024),
("1KB-1MB", 1024, 1024 * 1024),
("1MB-10MB", 1024 * 1024, 10 * 1024 * 1024),
("10MB-100MB", 10 * 1024 * 1024, 100 * 1024 * 1024),
("100MB-1GB", 100 * 1024 * 1024, 1024 * 1024 * 1024),
("1GB+", 1024 * 1024 * 1024, u64::MAX),
];
for (range_name, min_size, max_size) in size_ranges {
if total_size >= min_size && total_size < max_size {
*bucket_usage.object_size_histogram.entry(range_name.to_string()).or_insert(0) += 1;
break;
}
}
// Update version histogram (simplified - count as single version)
*bucket_usage
.object_versions_histogram
.entry("SINGLE_VERSION".to_string())
.or_insert(0) += 1;
} else {
// Create new bucket usage
let mut bucket_usage = BucketUsageInfo {
size: meta_object.size as u64,
objects_count: 1,
versions_count: 1,
..Default::default()
};
bucket_usage.object_size_histogram.insert("0-1KB".to_string(), 1);
bucket_usage.object_versions_histogram.insert("SINGLE_VERSION".to_string(), 1);
self.buckets_usage.insert(bucket_name, bucket_usage);
}
// Update global statistics
self.objects_total_size += meta_object.size as u64;
self.objects_total_count += 1;
self.versions_total_count += 1;
}
/// Add object from FileMeta for accurate version counting
pub fn add_object_from_file_meta(&mut self, object_path: &str, file_meta: &rustfs_filemeta::FileMeta) {
let bucket_name = match self.extract_bucket_from_path(object_path) {
Ok(name) => name,
Err(_) => return,
};
// Calculate accurate statistics from all versions
let mut total_size = 0u64;
let mut versions_count = 0u64;
let mut delete_markers_count = 0u64;
let mut latest_object_size = 0u64;
// Process all versions to get accurate counts
for version in &file_meta.versions {
match rustfs_filemeta::FileMetaVersion::try_from(version.clone()) {
Ok(ver) => {
if let Some(obj) = ver.object {
total_size += obj.size as u64;
versions_count += 1;
latest_object_size = obj.size as u64; // Keep track of latest object size
} else if ver.delete_marker.is_some() {
delete_markers_count += 1;
}
}
Err(_) => {
// Skip invalid versions
continue;
}
}
}
// Update bucket statistics
if let Some(bucket_usage) = self.buckets_usage.get_mut(&bucket_name) {
bucket_usage.size += total_size;
bucket_usage.objects_count += 1;
bucket_usage.versions_count += versions_count;
bucket_usage.delete_markers_count += delete_markers_count;
// Update size histogram based on latest object size
let size_ranges = [
("0-1KB", 0, 1024),
("1KB-1MB", 1024, 1024 * 1024),
("1MB-10MB", 1024 * 1024, 10 * 1024 * 1024),
("10MB-100MB", 10 * 1024 * 1024, 100 * 1024 * 1024),
("100MB-1GB", 100 * 1024 * 1024, 1024 * 1024 * 1024),
("1GB+", 1024 * 1024 * 1024, u64::MAX),
];
for (range_name, min_size, max_size) in size_ranges {
if latest_object_size >= min_size && latest_object_size < max_size {
*bucket_usage.object_size_histogram.entry(range_name.to_string()).or_insert(0) += 1;
break;
}
}
// Update version histogram based on actual version count
let version_ranges = [
("1", 1, 1),
("2-5", 2, 5),
("6-10", 6, 10),
("11-50", 11, 50),
("51-100", 51, 100),
("100+", 101, usize::MAX),
];
for (range_name, min_versions, max_versions) in version_ranges {
if versions_count as usize >= min_versions && versions_count as usize <= max_versions {
*bucket_usage
.object_versions_histogram
.entry(range_name.to_string())
.or_insert(0) += 1;
break;
}
}
} else {
// Create new bucket usage
let mut bucket_usage = BucketUsageInfo {
size: total_size,
objects_count: 1,
versions_count,
delete_markers_count,
..Default::default()
};
// Set size histogram
let size_ranges = [
("0-1KB", 0, 1024),
("1KB-1MB", 1024, 1024 * 1024),
("1MB-10MB", 1024 * 1024, 10 * 1024 * 1024),
("10MB-100MB", 10 * 1024 * 1024, 100 * 1024 * 1024),
("100MB-1GB", 100 * 1024 * 1024, 1024 * 1024 * 1024),
("1GB+", 1024 * 1024 * 1024, u64::MAX),
];
for (range_name, min_size, max_size) in size_ranges {
if latest_object_size >= min_size && latest_object_size < max_size {
bucket_usage.object_size_histogram.insert(range_name.to_string(), 1);
break;
}
}
// Set version histogram
let version_ranges = [
("1", 1, 1),
("2-5", 2, 5),
("6-10", 6, 10),
("11-50", 11, 50),
("51-100", 51, 100),
("100+", 101, usize::MAX),
];
for (range_name, min_versions, max_versions) in version_ranges {
if versions_count as usize >= min_versions && versions_count as usize <= max_versions {
bucket_usage.object_versions_histogram.insert(range_name.to_string(), 1);
break;
}
}
self.buckets_usage.insert(bucket_name, bucket_usage);
// Update buckets count when adding new bucket
self.buckets_count = self.buckets_usage.len() as u64;
}
// Update global statistics
self.objects_total_size += total_size;
self.objects_total_count += 1;
self.versions_total_count += versions_count;
self.delete_markers_total_count += delete_markers_count;
}
/// Extract bucket name from object path
fn extract_bucket_from_path(&self, object_path: &str) -> Result<String> {
let parts: Vec<&str> = object_path.split('/').collect();
if parts.is_empty() {
return Err(Error::Scanner("Invalid object path: empty".to_string()));
}
Ok(parts[0].to_string())
}
/// Update capacity information
pub fn update_capacity(&mut self, total: u64, used: u64, free: u64) {
self.total_capacity = total;
self.total_used_capacity = used;
self.total_free_capacity = free;
self.last_update = Some(SystemTime::now());
}
/// Add bucket usage info
pub fn add_bucket_usage(&mut self, bucket: String, usage: BucketUsageInfo) {
self.buckets_usage.insert(bucket.clone(), usage);
self.buckets_count = self.buckets_usage.len() as u64;
self.last_update = Some(SystemTime::now());
}
/// Get bucket usage info
pub fn get_bucket_usage(&self, bucket: &str) -> Option<&BucketUsageInfo> {
self.buckets_usage.get(bucket)
}
/// Calculate total statistics from all buckets
pub fn calculate_totals(&mut self) {
self.objects_total_count = 0;
self.versions_total_count = 0;
self.delete_markers_total_count = 0;
self.objects_total_size = 0;
for usage in self.buckets_usage.values() {
self.objects_total_count += usage.objects_count;
self.versions_total_count += usage.versions_count;
self.delete_markers_total_count += usage.delete_markers_count;
self.objects_total_size += usage.size;
}
}
/// Merge another DataUsageInfo into this one
pub fn merge(&mut self, other: &DataUsageInfo) {
// Merge bucket usage
for (bucket, usage) in &other.buckets_usage {
if let Some(existing) = self.buckets_usage.get_mut(bucket) {
existing.merge(usage);
} else {
self.buckets_usage.insert(bucket.clone(), usage.clone());
}
}
// Recalculate totals
self.calculate_totals();
// Ensure buckets_count stays consistent with buckets_usage
self.buckets_count = self.buckets_usage.len() as u64;
// Update last update time
if let Some(other_update) = other.last_update {
if self.last_update.is_none() || other_update > self.last_update.unwrap() {
self.last_update = Some(other_update);
}
}
}
}
impl BucketUsageInfo {
/// Create a new BucketUsageInfo
pub fn new() -> Self {
Self::default()
}
/// Add size summary to this bucket usage
pub fn add_size_summary(&mut self, summary: &SizeSummary) {
self.size += summary.total_size as u64;
self.versions_count += summary.versions as u64;
self.delete_markers_count += summary.delete_markers as u64;
self.replica_size += summary.replica_size as u64;
self.replica_count += summary.replica_count as u64;
}
/// Merge another BucketUsageInfo into this one
pub fn merge(&mut self, other: &BucketUsageInfo) {
self.size += other.size;
self.objects_count += other.objects_count;
self.versions_count += other.versions_count;
self.delete_markers_count += other.delete_markers_count;
self.replica_size += other.replica_size;
self.replica_count += other.replica_count;
// Merge histograms
for (key, value) in &other.object_size_histogram {
*self.object_size_histogram.entry(key.clone()).or_insert(0) += value;
}
for (key, value) in &other.object_versions_histogram {
*self.object_versions_histogram.entry(key.clone()).or_insert(0) += value;
}
// Merge replication info
for (target, info) in &other.replication_info {
let entry = self.replication_info.entry(target.clone()).or_default();
entry.replicated_size += info.replicated_size;
entry.replica_size += info.replica_size;
entry.replication_pending_size += info.replication_pending_size;
entry.replication_failed_size += info.replication_failed_size;
entry.replication_pending_count += info.replication_pending_count;
entry.replication_failed_count += info.replication_failed_count;
entry.replicated_count += info.replicated_count;
}
// Merge backward compatibility fields
self.replication_pending_size_v1 += other.replication_pending_size_v1;
self.replication_failed_size_v1 += other.replication_failed_size_v1;
self.replicated_size_v1 += other.replicated_size_v1;
self.replication_pending_count_v1 += other.replication_pending_count_v1;
self.replication_failed_count_v1 += other.replication_failed_count_v1;
}
}
impl SizeSummary {
/// Create a new SizeSummary
pub fn new() -> Self {
Self::default()
}
/// Add another SizeSummary to this one
pub fn add(&mut self, other: &SizeSummary) {
self.total_size += other.total_size;
self.versions += other.versions;
self.delete_markers += other.delete_markers;
self.replicated_size += other.replicated_size;
self.replicated_count += other.replicated_count;
self.pending_size += other.pending_size;
self.failed_size += other.failed_size;
self.replica_size += other.replica_size;
self.replica_count += other.replica_count;
self.pending_count += other.pending_count;
self.failed_count += other.failed_count;
// Merge replication target stats
for (target, stats) in &other.repl_target_stats {
let entry = self.repl_target_stats.entry(target.clone()).or_default();
entry.replicated_size += stats.replicated_size;
entry.replicated_count += stats.replicated_count;
entry.pending_size += stats.pending_size;
entry.failed_size += stats.failed_size;
entry.pending_count += stats.pending_count;
entry.failed_count += stats.failed_count;
}
}
}
/// Store data usage info to backend storage
pub async fn store_data_usage_in_backend(data_usage_info: DataUsageInfo, store: Arc<ECStore>) -> Result<()> {
let data =
serde_json::to_vec(&data_usage_info).map_err(|e| Error::Config(format!("Failed to serialize data usage info: {e}")))?;
// Save to backend using the same mechanism as original code
rustfs_ecstore::config::com::save_config(store, &DATA_USAGE_OBJ_NAME_PATH, data)
.await
.map_err(Error::Storage)?;
Ok(())
}
/// Load data usage info from backend storage
pub async fn load_data_usage_from_backend(store: Arc<ECStore>) -> Result<DataUsageInfo> {
let buf = match read_config(store, &DATA_USAGE_OBJ_NAME_PATH).await {
Ok(data) => data,
Err(e) => {
error!("Failed to read data usage info from backend: {}", e);
if e == rustfs_ecstore::error::Error::ConfigNotFound {
return Ok(DataUsageInfo::default());
}
return Err(Error::Storage(e));
}
};
let mut data_usage_info: DataUsageInfo =
serde_json::from_slice(&buf).map_err(|e| Error::Config(format!("Failed to deserialize data usage info: {e}")))?;
warn!("Loaded data usage info from backend {:?}", &data_usage_info);
// Handle backward compatibility like original code
if data_usage_info.buckets_usage.is_empty() {
data_usage_info.buckets_usage = data_usage_info
.bucket_sizes
.iter()
.map(|(bucket, &size)| {
(
bucket.clone(),
BucketUsageInfo {
size,
..Default::default()
},
)
})
.collect();
}
if data_usage_info.bucket_sizes.is_empty() {
data_usage_info.bucket_sizes = data_usage_info
.buckets_usage
.iter()
.map(|(bucket, bui)| (bucket.clone(), bui.size))
.collect();
}
for (bucket, bui) in &data_usage_info.buckets_usage {
if bui.replicated_size_v1 > 0
|| bui.replication_failed_count_v1 > 0
|| bui.replication_failed_size_v1 > 0
|| bui.replication_pending_count_v1 > 0
{
if let Ok((cfg, _)) = get_replication_config(bucket).await {
if !cfg.role.is_empty() {
data_usage_info.replication_info.insert(
cfg.role.clone(),
BucketTargetUsageInfo {
replication_failed_size: bui.replication_failed_size_v1,
replication_failed_count: bui.replication_failed_count_v1,
replicated_size: bui.replicated_size_v1,
replication_pending_count: bui.replication_pending_count_v1,
replication_pending_size: bui.replication_pending_size_v1,
..Default::default()
},
);
}
}
}
}
Ok(data_usage_info)
}
/// Example function showing how to use AHM data usage functionality
/// This demonstrates the integration pattern for DataUsageInfoHandler
pub async fn example_data_usage_integration() -> Result<()> {
// Get the global storage instance
let Some(store) = rustfs_ecstore::new_object_layer_fn() else {
return Err(Error::Config("Storage not initialized".to_string()));
};
// Load data usage from backend (this replaces the original load_data_usage_from_backend)
let data_usage = load_data_usage_from_backend(store).await?;
info!(
"Loaded data usage info: {} buckets, {} total objects",
data_usage.buckets_count, data_usage.objects_total_count
);
// Example: Store updated data usage back to backend
// This would typically be called by the scanner after collecting new statistics
// store_data_usage_in_backend(data_usage, store).await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_data_usage_info_creation() {
let mut info = DataUsageInfo::new();
info.update_capacity(1000, 500, 500);
assert_eq!(info.total_capacity, 1000);
assert_eq!(info.total_used_capacity, 500);
assert_eq!(info.total_free_capacity, 500);
assert!(info.last_update.is_some());
}
#[test]
fn test_bucket_usage_info_merge() {
let mut usage1 = BucketUsageInfo::new();
usage1.size = 100;
usage1.objects_count = 10;
usage1.versions_count = 5;
let mut usage2 = BucketUsageInfo::new();
usage2.size = 200;
usage2.objects_count = 20;
usage2.versions_count = 10;
usage1.merge(&usage2);
assert_eq!(usage1.size, 300);
assert_eq!(usage1.objects_count, 30);
assert_eq!(usage1.versions_count, 15);
}
#[test]
fn test_size_summary_add() {
let mut summary1 = SizeSummary::new();
summary1.total_size = 100;
summary1.versions = 5;
let mut summary2 = SizeSummary::new();
summary2.total_size = 200;
summary2.versions = 10;
summary1.add(&summary2);
assert_eq!(summary1.total_size, 300);
assert_eq!(summary1.versions, 15);
}
}

View File

@@ -12,197 +12,258 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::{
collections::HashMap,
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
/// Size interval for object size histogram
#[derive(Debug, Clone)]
pub struct SizeInterval {
pub start: u64,
pub end: u64,
pub name: &'static str,
use serde::{Deserialize, Serialize};
use tracing::info;
/// Scanner metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScannerMetrics {
/// Total objects scanned since server start
pub objects_scanned: u64,
/// Total object versions scanned since server start
pub versions_scanned: u64,
/// Total directories scanned since server start
pub directories_scanned: u64,
/// Total bucket scans started since server start
pub bucket_scans_started: u64,
/// Total bucket scans finished since server start
pub bucket_scans_finished: u64,
/// Total objects with health issues found
pub objects_with_issues: u64,
/// Total heal tasks queued
pub heal_tasks_queued: u64,
/// Total heal tasks completed
pub heal_tasks_completed: u64,
/// Total heal tasks failed
pub heal_tasks_failed: u64,
/// Total healthy objects found
pub healthy_objects: u64,
/// Total corrupted objects found
pub corrupted_objects: u64,
/// Last scan activity time
pub last_activity: Option<SystemTime>,
/// Current scan cycle
pub current_cycle: u64,
/// Total scan cycles completed
pub total_cycles: u64,
/// Current scan duration
pub current_scan_duration: Option<Duration>,
/// Average scan duration
pub avg_scan_duration: Duration,
/// Objects scanned per second
pub objects_per_second: f64,
/// Buckets scanned per second
pub buckets_per_second: f64,
/// Storage metrics by bucket
pub bucket_metrics: HashMap<String, BucketMetrics>,
/// Disk metrics
pub disk_metrics: HashMap<String, DiskMetrics>,
}
/// Version interval for object versions histogram
#[derive(Debug, Clone)]
pub struct VersionInterval {
pub start: u64,
pub end: u64,
pub name: &'static str,
/// Bucket-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BucketMetrics {
/// Bucket name
pub bucket: String,
/// Total objects in bucket
pub total_objects: u64,
/// Total size of objects in bucket (bytes)
pub total_size: u64,
/// Objects with health issues
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Scan duration
pub scan_duration: Option<Duration>,
/// Heal tasks queued for this bucket
pub heal_tasks_queued: u64,
/// Heal tasks completed for this bucket
pub heal_tasks_completed: u64,
/// Heal tasks failed for this bucket
pub heal_tasks_failed: u64,
}
/// Object size histogram intervals
pub const OBJECTS_HISTOGRAM_INTERVALS: &[SizeInterval] = &[
SizeInterval {
start: 0,
end: 1024 - 1,
name: "LESS_THAN_1_KiB",
},
SizeInterval {
start: 1024,
end: 1024 * 1024 - 1,
name: "1_KiB_TO_1_MiB",
},
SizeInterval {
start: 1024 * 1024,
end: 10 * 1024 * 1024 - 1,
name: "1_MiB_TO_10_MiB",
},
SizeInterval {
start: 10 * 1024 * 1024,
end: 64 * 1024 * 1024 - 1,
name: "10_MiB_TO_64_MiB",
},
SizeInterval {
start: 64 * 1024 * 1024,
end: 128 * 1024 * 1024 - 1,
name: "64_MiB_TO_128_MiB",
},
SizeInterval {
start: 128 * 1024 * 1024,
end: 512 * 1024 * 1024 - 1,
name: "128_MiB_TO_512_MiB",
},
SizeInterval {
start: 512 * 1024 * 1024,
end: u64::MAX,
name: "MORE_THAN_512_MiB",
},
];
/// Object version count histogram intervals
pub const OBJECTS_VERSION_COUNT_INTERVALS: &[VersionInterval] = &[
VersionInterval {
start: 1,
end: 1,
name: "1_VERSION",
},
VersionInterval {
start: 2,
end: 10,
name: "2_TO_10_VERSIONS",
},
VersionInterval {
start: 11,
end: 100,
name: "11_TO_100_VERSIONS",
},
VersionInterval {
start: 101,
end: 1000,
name: "101_TO_1000_VERSIONS",
},
VersionInterval {
start: 1001,
end: u64::MAX,
name: "MORE_THAN_1000_VERSIONS",
},
];
/// Size histogram for object size distribution
#[derive(Debug, Clone, Default)]
pub struct SizeHistogram {
counts: Vec<u64>,
/// Disk-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DiskMetrics {
/// Disk path
pub disk_path: String,
/// Total disk space (bytes)
pub total_space: u64,
/// Used disk space (bytes)
pub used_space: u64,
/// Free disk space (bytes)
pub free_space: u64,
/// Objects scanned on this disk
pub objects_scanned: u64,
/// Objects with issues on this disk
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Whether disk is online
pub is_online: bool,
/// Whether disk is being scanned
pub is_scanning: bool,
}
/// Versions histogram for object version count distribution
#[derive(Debug, Clone, Default)]
pub struct VersionsHistogram {
counts: Vec<u64>,
/// Thread-safe metrics collector
pub struct MetricsCollector {
/// Atomic counters for real-time metrics
objects_scanned: AtomicU64,
versions_scanned: AtomicU64,
directories_scanned: AtomicU64,
bucket_scans_started: AtomicU64,
bucket_scans_finished: AtomicU64,
objects_with_issues: AtomicU64,
heal_tasks_queued: AtomicU64,
heal_tasks_completed: AtomicU64,
heal_tasks_failed: AtomicU64,
current_cycle: AtomicU64,
total_cycles: AtomicU64,
healthy_objects: AtomicU64,
corrupted_objects: AtomicU64,
}
impl SizeHistogram {
/// Create a new size histogram
impl MetricsCollector {
/// Create a new metrics collector
pub fn new() -> Self {
Self {
counts: vec![0; OBJECTS_HISTOGRAM_INTERVALS.len()],
objects_scanned: AtomicU64::new(0),
versions_scanned: AtomicU64::new(0),
directories_scanned: AtomicU64::new(0),
bucket_scans_started: AtomicU64::new(0),
bucket_scans_finished: AtomicU64::new(0),
objects_with_issues: AtomicU64::new(0),
heal_tasks_queued: AtomicU64::new(0),
heal_tasks_completed: AtomicU64::new(0),
heal_tasks_failed: AtomicU64::new(0),
current_cycle: AtomicU64::new(0),
total_cycles: AtomicU64::new(0),
healthy_objects: AtomicU64::new(0),
corrupted_objects: AtomicU64::new(0),
}
}
/// Add a size to the histogram
pub fn add(&mut self, size: u64) {
for (idx, interval) in OBJECTS_HISTOGRAM_INTERVALS.iter().enumerate() {
if size >= interval.start && size <= interval.end {
self.counts[idx] += 1;
break;
}
/// Increment objects scanned count
pub fn increment_objects_scanned(&self, count: u64) {
self.objects_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment versions scanned count
pub fn increment_versions_scanned(&self, count: u64) {
self.versions_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment directories scanned count
pub fn increment_directories_scanned(&self, count: u64) {
self.directories_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans started count
pub fn increment_bucket_scans_started(&self, count: u64) {
self.bucket_scans_started.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans finished count
pub fn increment_bucket_scans_finished(&self, count: u64) {
self.bucket_scans_finished.fetch_add(count, Ordering::Relaxed);
}
/// Increment objects with issues count
pub fn increment_objects_with_issues(&self, count: u64) {
self.objects_with_issues.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks queued count
pub fn increment_heal_tasks_queued(&self, count: u64) {
self.heal_tasks_queued.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks completed count
pub fn increment_heal_tasks_completed(&self, count: u64) {
self.heal_tasks_completed.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks failed count
pub fn increment_heal_tasks_failed(&self, count: u64) {
self.heal_tasks_failed.fetch_add(count, Ordering::Relaxed);
}
/// Set current cycle
pub fn set_current_cycle(&self, cycle: u64) {
self.current_cycle.store(cycle, Ordering::Relaxed);
}
/// Increment total cycles
pub fn increment_total_cycles(&self) {
self.total_cycles.fetch_add(1, Ordering::Relaxed);
}
/// Increment healthy objects count
pub fn increment_healthy_objects(&self) {
self.healthy_objects.fetch_add(1, Ordering::Relaxed);
}
/// Increment corrupted objects count
pub fn increment_corrupted_objects(&self) {
self.corrupted_objects.fetch_add(1, Ordering::Relaxed);
}
/// Get current metrics snapshot
pub fn get_metrics(&self) -> ScannerMetrics {
ScannerMetrics {
objects_scanned: self.objects_scanned.load(Ordering::Relaxed),
versions_scanned: self.versions_scanned.load(Ordering::Relaxed),
directories_scanned: self.directories_scanned.load(Ordering::Relaxed),
bucket_scans_started: self.bucket_scans_started.load(Ordering::Relaxed),
bucket_scans_finished: self.bucket_scans_finished.load(Ordering::Relaxed),
objects_with_issues: self.objects_with_issues.load(Ordering::Relaxed),
heal_tasks_queued: self.heal_tasks_queued.load(Ordering::Relaxed),
heal_tasks_completed: self.heal_tasks_completed.load(Ordering::Relaxed),
heal_tasks_failed: self.heal_tasks_failed.load(Ordering::Relaxed),
healthy_objects: self.healthy_objects.load(Ordering::Relaxed),
corrupted_objects: self.corrupted_objects.load(Ordering::Relaxed),
last_activity: Some(SystemTime::now()),
current_cycle: self.current_cycle.load(Ordering::Relaxed),
total_cycles: self.total_cycles.load(Ordering::Relaxed),
current_scan_duration: None, // Will be set by scanner
avg_scan_duration: Duration::ZERO, // Will be calculated
objects_per_second: 0.0, // Will be calculated
buckets_per_second: 0.0, // Will be calculated
bucket_metrics: HashMap::new(), // Will be populated by scanner
disk_metrics: HashMap::new(), // Will be populated by scanner
}
}
/// Get the histogram as a map
pub fn to_map(&self) -> HashMap<String, u64> {
let mut result = HashMap::new();
for (idx, count) in self.counts.iter().enumerate() {
let interval = &OBJECTS_HISTOGRAM_INTERVALS[idx];
result.insert(interval.name.to_string(), *count);
}
result
}
/// Reset all metrics
pub fn reset(&self) {
self.objects_scanned.store(0, Ordering::Relaxed);
self.versions_scanned.store(0, Ordering::Relaxed);
self.directories_scanned.store(0, Ordering::Relaxed);
self.bucket_scans_started.store(0, Ordering::Relaxed);
self.bucket_scans_finished.store(0, Ordering::Relaxed);
self.objects_with_issues.store(0, Ordering::Relaxed);
self.heal_tasks_queued.store(0, Ordering::Relaxed);
self.heal_tasks_completed.store(0, Ordering::Relaxed);
self.heal_tasks_failed.store(0, Ordering::Relaxed);
self.current_cycle.store(0, Ordering::Relaxed);
self.total_cycles.store(0, Ordering::Relaxed);
self.healthy_objects.store(0, Ordering::Relaxed);
self.corrupted_objects.store(0, Ordering::Relaxed);
/// Merge another histogram into this one
pub fn merge(&mut self, other: &SizeHistogram) {
for (idx, count) in other.counts.iter().enumerate() {
self.counts[idx] += count;
}
}
/// Get total count
pub fn total_count(&self) -> u64 {
self.counts.iter().sum()
}
/// Reset the histogram
pub fn reset(&mut self) {
for count in &mut self.counts {
*count = 0;
}
info!("Scanner metrics reset");
}
}
impl VersionsHistogram {
/// Create a new versions histogram
pub fn new() -> Self {
Self {
counts: vec![0; OBJECTS_VERSION_COUNT_INTERVALS.len()],
}
}
/// Add a version count to the histogram
pub fn add(&mut self, versions: u64) {
for (idx, interval) in OBJECTS_VERSION_COUNT_INTERVALS.iter().enumerate() {
if versions >= interval.start && versions <= interval.end {
self.counts[idx] += 1;
break;
}
}
}
/// Get the histogram as a map
pub fn to_map(&self) -> HashMap<String, u64> {
let mut result = HashMap::new();
for (idx, count) in self.counts.iter().enumerate() {
let interval = &OBJECTS_VERSION_COUNT_INTERVALS[idx];
result.insert(interval.name.to_string(), *count);
}
result
}
/// Merge another histogram into this one
pub fn merge(&mut self, other: &VersionsHistogram) {
for (idx, count) in other.counts.iter().enumerate() {
self.counts[idx] += count;
}
}
/// Get total count
pub fn total_count(&self) -> u64 {
self.counts.iter().sum()
}
/// Reset the histogram
pub fn reset(&mut self) {
for count in &mut self.counts {
*count = 0;
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
@@ -211,67 +272,35 @@ mod tests {
use super::*;
#[test]
fn test_size_histogram() {
let mut histogram = SizeHistogram::new();
// Add some sizes
histogram.add(512); // LESS_THAN_1_KiB
histogram.add(1024); // 1_KiB_TO_1_MiB
histogram.add(1024 * 1024); // 1_MiB_TO_10_MiB
histogram.add(5 * 1024 * 1024); // 1_MiB_TO_10_MiB
let map = histogram.to_map();
assert_eq!(map.get("LESS_THAN_1_KiB"), Some(&1));
assert_eq!(map.get("1_KiB_TO_1_MiB"), Some(&1));
assert_eq!(map.get("1_MiB_TO_10_MiB"), Some(&2));
assert_eq!(map.get("10_MiB_TO_64_MiB"), Some(&0));
fn test_metrics_collector_creation() {
let collector = MetricsCollector::new();
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
assert_eq!(metrics.versions_scanned, 0);
}
#[test]
fn test_versions_histogram() {
let mut histogram = VersionsHistogram::new();
fn test_metrics_increment() {
let collector = MetricsCollector::new();
// Add some version counts
histogram.add(1); // 1_VERSION
histogram.add(5); // 2_TO_10_VERSIONS
histogram.add(50); // 11_TO_100_VERSIONS
histogram.add(500); // 101_TO_1000_VERSIONS
collector.increment_objects_scanned(10);
collector.increment_versions_scanned(5);
collector.increment_objects_with_issues(2);
let map = histogram.to_map();
assert_eq!(map.get("1_VERSION"), Some(&1));
assert_eq!(map.get("2_TO_10_VERSIONS"), Some(&1));
assert_eq!(map.get("11_TO_100_VERSIONS"), Some(&1));
assert_eq!(map.get("101_TO_1000_VERSIONS"), Some(&1));
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 10);
assert_eq!(metrics.versions_scanned, 5);
assert_eq!(metrics.objects_with_issues, 2);
}
#[test]
fn test_histogram_merge() {
let mut histogram1 = SizeHistogram::new();
histogram1.add(1024);
histogram1.add(1024 * 1024);
fn test_metrics_reset() {
let collector = MetricsCollector::new();
let mut histogram2 = SizeHistogram::new();
histogram2.add(1024);
histogram2.add(5 * 1024 * 1024);
collector.increment_objects_scanned(10);
collector.reset();
histogram1.merge(&histogram2);
let map = histogram1.to_map();
assert_eq!(map.get("1_KiB_TO_1_MiB"), Some(&2)); // 1 from histogram1 + 1 from histogram2
assert_eq!(map.get("1_MiB_TO_10_MiB"), Some(&2)); // 1 from histogram1 + 1 from histogram2
}
#[test]
fn test_histogram_reset() {
let mut histogram = SizeHistogram::new();
histogram.add(1024);
histogram.add(1024 * 1024);
assert_eq!(histogram.total_count(), 2);
histogram.reset();
assert_eq!(histogram.total_count(), 0);
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
}
}

View File

@@ -42,6 +42,10 @@ pub struct ScannerMetrics {
pub heal_tasks_completed: u64,
/// Total heal tasks failed
pub heal_tasks_failed: u64,
/// Total healthy objects found
pub healthy_objects: u64,
/// Total corrupted objects found
pub corrupted_objects: u64,
/// Last scan activity time
pub last_activity: Option<SystemTime>,
/// Current scan cycle
@@ -122,6 +126,8 @@ pub struct MetricsCollector {
heal_tasks_failed: AtomicU64,
current_cycle: AtomicU64,
total_cycles: AtomicU64,
healthy_objects: AtomicU64,
corrupted_objects: AtomicU64,
}
impl MetricsCollector {
@@ -139,6 +145,8 @@ impl MetricsCollector {
heal_tasks_failed: AtomicU64::new(0),
current_cycle: AtomicU64::new(0),
total_cycles: AtomicU64::new(0),
healthy_objects: AtomicU64::new(0),
corrupted_objects: AtomicU64::new(0),
}
}
@@ -197,6 +205,16 @@ impl MetricsCollector {
self.total_cycles.fetch_add(1, Ordering::Relaxed);
}
/// Increment healthy objects count
pub fn increment_healthy_objects(&self) {
self.healthy_objects.fetch_add(1, Ordering::Relaxed);
}
/// Increment corrupted objects count
pub fn increment_corrupted_objects(&self) {
self.corrupted_objects.fetch_add(1, Ordering::Relaxed);
}
/// Get current metrics snapshot
pub fn get_metrics(&self) -> ScannerMetrics {
ScannerMetrics {
@@ -209,6 +227,8 @@ impl MetricsCollector {
heal_tasks_queued: self.heal_tasks_queued.load(Ordering::Relaxed),
heal_tasks_completed: self.heal_tasks_completed.load(Ordering::Relaxed),
heal_tasks_failed: self.heal_tasks_failed.load(Ordering::Relaxed),
healthy_objects: self.healthy_objects.load(Ordering::Relaxed),
corrupted_objects: self.corrupted_objects.load(Ordering::Relaxed),
last_activity: Some(SystemTime::now()),
current_cycle: self.current_cycle.load(Ordering::Relaxed),
total_cycles: self.total_cycles.load(Ordering::Relaxed),
@@ -234,6 +254,8 @@ impl MetricsCollector {
self.heal_tasks_failed.store(0, Ordering::Relaxed);
self.current_cycle.store(0, Ordering::Relaxed);
self.total_cycles.store(0, Ordering::Relaxed);
self.healthy_objects.store(0, Ordering::Relaxed);
self.corrupted_objects.store(0, Ordering::Relaxed);
info!("Scanner metrics reset");
}

View File

@@ -13,13 +13,8 @@
// limitations under the License.
pub mod data_scanner;
pub mod data_usage;
pub mod histogram;
pub mod metrics;
// Re-export main types for convenience
pub use data_scanner::Scanner;
pub use data_usage::{
BucketTargetUsageInfo, BucketUsageInfo, DataUsageInfo, load_data_usage_from_backend, store_data_usage_in_backend,
};
pub use metrics::ScannerMetrics;

View File

@@ -0,0 +1,410 @@
use rustfs_ahm::heal::{
manager::{HealConfig, HealManager},
storage::{ECStoreHealStorage, HealStorageAPI},
task::{HealOptions, HealPriority, HealRequest, HealTaskStatus, HealType},
};
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::{
disk::endpoint::Endpoint,
endpoints::{EndpointServerPools, Endpoints, PoolEndpoints},
store::ECStore,
store_api::{ObjectIO, ObjectOptions, PutObjReader, StorageAPI},
};
use serial_test::serial;
use std::sync::Once;
use std::sync::OnceLock;
use std::{path::PathBuf, sync::Arc, time::Duration};
use tokio::fs;
use tracing::info;
use walkdir::WalkDir;
static GLOBAL_ENV: OnceLock<(Vec<PathBuf>, Arc<ECStore>, Arc<ECStoreHealStorage>)> = OnceLock::new();
static INIT: Once = Once::new();
fn init_tracing() {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt::try_init();
});
}
/// Test helper: Create test environment with ECStore
async fn setup_test_env() -> (Vec<PathBuf>, Arc<ECStore>, Arc<ECStoreHealStorage>) {
init_tracing();
// Fast path: already initialized, just clone and return
if let Some((paths, ecstore, heal_storage)) = GLOBAL_ENV.get() {
return (paths.clone(), ecstore.clone(), heal_storage.clone());
}
// create temp dir as 4 disks with unique base dir
let test_base_dir = format!("/tmp/rustfs_ahm_heal_test_{}", uuid::Uuid::new_v4());
let temp_dir = std::path::PathBuf::from(&test_base_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).await.ok();
}
fs::create_dir_all(&temp_dir).await.unwrap();
// create 4 disk dirs
let disk_paths = vec![
temp_dir.join("disk1"),
temp_dir.join("disk2"),
temp_dir.join("disk3"),
temp_dir.join("disk4"),
];
for disk_path in &disk_paths {
fs::create_dir_all(disk_path).await.unwrap();
}
// create EndpointServerPools
let mut endpoints = Vec::new();
for (i, disk_path) in disk_paths.iter().enumerate() {
let mut endpoint = Endpoint::try_from(disk_path.to_str().unwrap()).unwrap();
// set correct index
endpoint.set_pool_index(0);
endpoint.set_set_index(0);
endpoint.set_disk_index(i);
endpoints.push(endpoint);
}
let pool_endpoints = PoolEndpoints {
legacy: false,
set_count: 1,
drives_per_set: 4,
endpoints: Endpoints::from(endpoints),
cmd_line: "test".to_string(),
platform: format!("OS: {} | Arch: {}", std::env::consts::OS, std::env::consts::ARCH),
};
let endpoint_pools = EndpointServerPools(vec![pool_endpoints]);
// format disks (only first time)
rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await.unwrap();
// create ECStore with dynamic port 0 (let OS assign) or fixed 9001 if free
let port = 9001; // for simplicity
let server_addr: std::net::SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
let ecstore = ECStore::new(server_addr, endpoint_pools).await.unwrap();
// init bucket metadata system
let buckets_list = ecstore
.list_bucket(&rustfs_ecstore::store_api::BucketOptions {
no_metadata: true,
..Default::default()
})
.await
.unwrap();
let buckets = buckets_list.into_iter().map(|v| v.name).collect();
rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys(ecstore.clone(), buckets).await;
// Create heal storage layer
let heal_storage = Arc::new(ECStoreHealStorage::new(ecstore.clone()));
// Store in global once lock
let _ = GLOBAL_ENV.set((disk_paths.clone(), ecstore.clone(), heal_storage.clone()));
(disk_paths, ecstore, heal_storage)
}
/// Test helper: Create a test bucket
async fn create_test_bucket(ecstore: &Arc<ECStore>, bucket_name: &str) {
(**ecstore)
.make_bucket(bucket_name, &Default::default())
.await
.expect("Failed to create test bucket");
info!("Created test bucket: {}", bucket_name);
}
/// Test helper: Upload test object
async fn upload_test_object(ecstore: &Arc<ECStore>, bucket: &str, object: &str, data: &[u8]) {
let mut reader = PutObjReader::from_vec(data.to_vec());
let object_info = (**ecstore)
.put_object(bucket, object, &mut reader, &ObjectOptions::default())
.await
.expect("Failed to upload test object");
info!("Uploaded test object: {}/{} ({} bytes)", bucket, object, object_info.size);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_object_basic() {
let (disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Create test bucket and object
let bucket_name = "test-bucket";
let object_name = "test-object.txt";
let test_data = b"Hello, this is test data for healing!";
create_test_bucket(&ecstore, bucket_name).await;
upload_test_object(&ecstore, bucket_name, object_name, test_data).await;
// ─── 1⃣ delete single data shard file ─────────────────────────────────────
let obj_dir = disk_paths[0].join(bucket_name).join(object_name);
// find part file at depth 2, e.g. .../<uuid>/part.1
let target_part = WalkDir::new(&obj_dir)
.min_depth(2)
.max_depth(2)
.into_iter()
.filter_map(Result::ok)
.find(|e| e.file_type().is_file() && e.file_name().to_str().map(|n| n.starts_with("part.")).unwrap_or(false))
.map(|e| e.into_path())
.expect("Failed to locate part file to delete");
std::fs::remove_file(&target_part).expect("failed to delete part file");
assert!(!target_part.exists());
println!("✅ Deleted shard part file: {target_part:?}");
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_millis(1),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Submit heal request for the object
let heal_request = HealRequest::new(
HealType::Object {
bucket: bucket_name.to_string(),
object: object_name.to_string(),
version_id: None,
},
HealOptions {
dry_run: false,
recursive: false,
remove_corrupted: false,
recreate_missing: true,
scan_mode: HealScanMode::Normal,
update_parity: true,
timeout: Some(Duration::from_secs(300)),
pool_index: None,
set_index: None,
},
HealPriority::Normal,
);
let task_id = heal_manager
.submit_heal_request(heal_request)
.await
.expect("Failed to submit heal request");
info!("Submitted heal request with task ID: {}", task_id);
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(8)).await;
// Attempt to fetch task status (might be removed if finished)
match heal_manager.get_task_status(&task_id).await {
Ok(status) => info!("Task status: {:?}", status),
Err(e) => info!("Task status not found (likely completed): {}", e),
}
// ─── 2⃣ verify each part file is restored ───────
assert!(target_part.exists());
info!("Heal object basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_bucket_basic() {
let (disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Create test bucket
let bucket_name = "test-bucket-heal";
create_test_bucket(&ecstore, bucket_name).await;
// ─── 1⃣ delete bucket dir on disk ──────────────
let broken_bucket_path = disk_paths[0].join(bucket_name);
assert!(broken_bucket_path.exists(), "bucket dir does not exist on disk");
std::fs::remove_dir_all(&broken_bucket_path).expect("failed to delete bucket dir on disk");
assert!(!broken_bucket_path.exists(), "bucket dir still exists after deletion");
println!("✅ Deleted bucket directory on disk: {broken_bucket_path:?}");
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_millis(1),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Submit heal request for the bucket
let heal_request = HealRequest::new(
HealType::Bucket {
bucket: bucket_name.to_string(),
},
HealOptions {
dry_run: false,
recursive: true,
remove_corrupted: false,
recreate_missing: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
timeout: Some(Duration::from_secs(300)),
pool_index: None,
set_index: None,
},
HealPriority::Normal,
);
let task_id = heal_manager
.submit_heal_request(heal_request)
.await
.expect("Failed to submit bucket heal request");
info!("Submitted bucket heal request with task ID: {}", task_id);
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// Attempt to fetch task status (optional)
if let Ok(status) = heal_manager.get_task_status(&task_id).await {
if status == HealTaskStatus::Completed {
info!("Bucket heal task status: {:?}", status);
} else {
panic!("Bucket heal task status: {status:?}");
}
}
// ─── 3⃣ Verify bucket directory is restored on every disk ───────
assert!(broken_bucket_path.exists(), "bucket dir does not exist on disk");
info!("Heal bucket basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_format_basic() {
let (disk_paths, _ecstore, heal_storage) = setup_test_env().await;
// ─── 1⃣ delete format.json on one disk ──────────────
let format_path = disk_paths[0].join(".rustfs.sys").join("format.json");
assert!(format_path.exists(), "format.json does not exist on disk");
std::fs::remove_file(&format_path).expect("failed to delete format.json on disk");
assert!(!format_path.exists(), "format.json still exists after deletion");
println!("✅ Deleted format.json on disk: {format_path:?}");
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_secs(2),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// ─── 2⃣ verify format.json is restored ───────
assert!(format_path.exists(), "format.json does not exist on disk after heal");
info!("Heal format basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_format_with_data() {
let (disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Create test bucket and object
let bucket_name = "test-bucket";
let object_name = "test-object.txt";
let test_data = b"Hello, this is test data for healing!";
create_test_bucket(&ecstore, bucket_name).await;
upload_test_object(&ecstore, bucket_name, object_name, test_data).await;
let obj_dir = disk_paths[0].join(bucket_name).join(object_name);
let target_part = WalkDir::new(&obj_dir)
.min_depth(2)
.max_depth(2)
.into_iter()
.filter_map(Result::ok)
.find(|e| e.file_type().is_file() && e.file_name().to_str().map(|n| n.starts_with("part.")).unwrap_or(false))
.map(|e| e.into_path())
.expect("Failed to locate part file to delete");
// ─── 1⃣ delete format.json on one disk ──────────────
let format_path = disk_paths[0].join(".rustfs.sys").join("format.json");
std::fs::remove_dir_all(&disk_paths[0]).expect("failed to delete all contents under disk_paths[0]");
std::fs::create_dir_all(&disk_paths[0]).expect("failed to recreate disk_paths[0] directory");
println!("✅ Deleted format.json on disk: {:?}", disk_paths[0]);
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_secs(2),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// ─── 2⃣ verify format.json is restored ───────
assert!(format_path.exists(), "format.json does not exist on disk after heal");
// ─── 3 verify each part file is restored ───────
assert!(target_part.exists());
info!("Heal format basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_storage_api_direct() {
let (_disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Test direct heal storage API calls
// Test heal_format
let format_result = heal_storage.heal_format(true).await; // dry run
assert!(format_result.is_ok());
info!("Direct heal_format test passed");
// Test heal_bucket
let bucket_name = "test-bucket-direct";
create_test_bucket(&ecstore, bucket_name).await;
let heal_opts = HealOpts {
recursive: true,
dry_run: true,
remove: false,
recreate: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
no_lock: false,
pool: None,
set: None,
};
let bucket_result = heal_storage.heal_bucket(bucket_name, &heal_opts).await;
assert!(bucket_result.is_ok());
info!("Direct heal_bucket test passed");
// Test heal_object
let object_name = "test-object-direct.txt";
let test_data = b"Test data for direct heal API";
upload_test_object(&ecstore, bucket_name, object_name, test_data).await;
let object_heal_opts = HealOpts {
recursive: false,
dry_run: true,
remove: false,
recreate: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
no_lock: false,
pool: None,
set: None,
};
let object_result = heal_storage
.heal_object(bucket_name, object_name, None, &object_heal_opts)
.await;
assert!(object_result.is_ok());
info!("Direct heal_object test passed");
info!("Direct heal storage API test passed");
}

View File

@@ -28,5 +28,15 @@ categories = ["web-programming", "development-tools", "data-structures"]
workspace = true
[dependencies]
tokio.workspace = true
lazy_static = { workspace = true}
tokio = { workspace = true }
tonic = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
rustfs-madmin = { workspace = true }
rustfs-filemeta = { workspace = true }
serde = { workspace = true }
path-clean = { workspace = true }
rmp-serde = { workspace = true }
async-trait = { workspace = true }
s3s = { workspace = true }

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,427 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use s3s::dto::{BucketLifecycleConfiguration, ExpirationStatus, LifecycleRule, ReplicationConfiguration, ReplicationRuleStatus};
use serde::{Deserialize, Serialize};
use std::{
fmt::{self, Display},
sync::OnceLock,
};
use tokio::sync::mpsc;
use uuid::Uuid;
pub const HEAL_DELETE_DANGLING: bool = true;
pub const RUSTFS_RESERVED_BUCKET: &str = "rustfs";
pub const RUSTFS_RESERVED_BUCKET_PATH: &str = "/rustfs";
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub enum HealItemType {
Metadata,
Bucket,
BucketMetadata,
Object,
}
impl HealItemType {
pub fn to_str(&self) -> &str {
match self {
HealItemType::Metadata => "metadata",
HealItemType::Bucket => "bucket",
HealItemType::BucketMetadata => "bucket-metadata",
HealItemType::Object => "object",
}
}
}
impl Display for HealItemType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_str())
}
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub enum DriveState {
Ok,
Offline,
Corrupt,
Missing,
PermissionDenied,
Faulty,
RootMount,
Unknown,
Unformatted, // only returned by disk
}
impl DriveState {
pub fn to_str(&self) -> &str {
match self {
DriveState::Ok => "ok",
DriveState::Offline => "offline",
DriveState::Corrupt => "corrupt",
DriveState::Missing => "missing",
DriveState::PermissionDenied => "permission-denied",
DriveState::Faulty => "faulty",
DriveState::RootMount => "root-mount",
DriveState::Unknown => "unknown",
DriveState::Unformatted => "unformatted",
}
}
}
impl Display for DriveState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_str())
}
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum HealScanMode {
Unknown,
Normal,
Deep,
}
impl Default for HealScanMode {
fn default() -> Self {
Self::Normal
}
}
#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
pub struct HealOpts {
pub recursive: bool,
#[serde(rename = "dryRun")]
pub dry_run: bool,
pub remove: bool,
pub recreate: bool,
#[serde(rename = "scanMode")]
pub scan_mode: HealScanMode,
#[serde(rename = "updateParity")]
pub update_parity: bool,
#[serde(rename = "nolock")]
pub no_lock: bool,
pub pool: Option<usize>,
pub set: Option<usize>,
}
/// Heal channel command type
#[derive(Debug, Clone)]
pub enum HealChannelCommand {
/// Start a new heal task
Start(HealChannelRequest),
/// Query heal task status
Query { heal_path: String, client_token: String },
/// Cancel heal task
Cancel { heal_path: String },
}
/// Heal request from admin to ahm
#[derive(Debug, Clone, Default)]
pub struct HealChannelRequest {
/// Unique request ID
pub id: String,
/// Disk ID for heal disk/erasure set task
pub disk: Option<String>,
/// Bucket name
pub bucket: String,
/// Object prefix (optional)
pub object_prefix: Option<String>,
/// Force start heal
pub force_start: bool,
/// Priority
pub priority: HealChannelPriority,
/// Pool index (optional)
pub pool_index: Option<usize>,
/// Set index (optional)
pub set_index: Option<usize>,
/// Scan mode (optional)
pub scan_mode: Option<HealScanMode>,
/// Whether to remove corrupted data
pub remove_corrupted: Option<bool>,
/// Whether to recreate missing data
pub recreate_missing: Option<bool>,
/// Whether to update parity
pub update_parity: Option<bool>,
/// Whether to recursively process
pub recursive: Option<bool>,
/// Whether to dry run
pub dry_run: Option<bool>,
/// Timeout in seconds (optional)
pub timeout_seconds: Option<u64>,
}
/// Heal response from ahm to admin
#[derive(Debug, Clone)]
pub struct HealChannelResponse {
/// Request ID
pub request_id: String,
/// Success status
pub success: bool,
/// Response data (if successful)
pub data: Option<Vec<u8>>,
/// Error message (if failed)
pub error: Option<String>,
}
/// Heal priority
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HealChannelPriority {
/// Low priority
Low,
/// Normal priority
Normal,
/// High priority
High,
/// Critical priority
Critical,
}
impl Default for HealChannelPriority {
fn default() -> Self {
Self::Normal
}
}
/// Heal channel sender
pub type HealChannelSender = mpsc::UnboundedSender<HealChannelCommand>;
/// Heal channel receiver
pub type HealChannelReceiver = mpsc::UnboundedReceiver<HealChannelCommand>;
/// Global heal channel sender
static GLOBAL_HEAL_CHANNEL_SENDER: OnceLock<HealChannelSender> = OnceLock::new();
/// Initialize global heal channel
pub fn init_heal_channel() -> HealChannelReceiver {
let (tx, rx) = mpsc::unbounded_channel();
GLOBAL_HEAL_CHANNEL_SENDER
.set(tx)
.expect("Heal channel sender already initialized");
rx
}
/// Get global heal channel sender
pub fn get_heal_channel_sender() -> Option<&'static HealChannelSender> {
GLOBAL_HEAL_CHANNEL_SENDER.get()
}
/// Send heal command through global channel
pub async fn send_heal_command(command: HealChannelCommand) -> Result<(), String> {
if let Some(sender) = get_heal_channel_sender() {
sender
.send(command)
.map_err(|e| format!("Failed to send heal command: {e}"))?;
Ok(())
} else {
Err("Heal channel not initialized".to_string())
}
}
/// Send heal start request
pub async fn send_heal_request(request: HealChannelRequest) -> Result<(), String> {
send_heal_command(HealChannelCommand::Start(request)).await
}
/// Send heal query request
pub async fn query_heal_status(heal_path: String, client_token: String) -> Result<(), String> {
send_heal_command(HealChannelCommand::Query { heal_path, client_token }).await
}
/// Send heal cancel request
pub async fn cancel_heal_task(heal_path: String) -> Result<(), String> {
send_heal_command(HealChannelCommand::Cancel { heal_path }).await
}
/// Create a new heal request
pub fn create_heal_request(
bucket: String,
object_prefix: Option<String>,
force_start: bool,
priority: Option<HealChannelPriority>,
) -> HealChannelRequest {
HealChannelRequest {
id: Uuid::new_v4().to_string(),
bucket,
object_prefix,
force_start,
priority: priority.unwrap_or_default(),
pool_index: None,
set_index: None,
scan_mode: None,
remove_corrupted: None,
recreate_missing: None,
update_parity: None,
recursive: None,
dry_run: None,
timeout_seconds: None,
disk: None,
}
}
/// Create a new heal request with advanced options
pub fn create_heal_request_with_options(
bucket: String,
object_prefix: Option<String>,
force_start: bool,
priority: Option<HealChannelPriority>,
pool_index: Option<usize>,
set_index: Option<usize>,
) -> HealChannelRequest {
HealChannelRequest {
id: Uuid::new_v4().to_string(),
bucket,
object_prefix,
force_start,
priority: priority.unwrap_or_default(),
pool_index,
set_index,
..Default::default()
}
}
/// Create a heal response
pub fn create_heal_response(
request_id: String,
success: bool,
data: Option<Vec<u8>>,
error: Option<String>,
) -> HealChannelResponse {
HealChannelResponse {
request_id,
success,
data,
error,
}
}
fn lc_get_prefix(rule: &LifecycleRule) -> String {
if let Some(p) = &rule.prefix {
return p.to_string();
} else if let Some(filter) = &rule.filter {
if let Some(p) = &filter.prefix {
return p.to_string();
} else if let Some(and) = &filter.and {
if let Some(p) = &and.prefix {
return p.to_string();
}
}
}
"".into()
}
pub fn lc_has_active_rules(config: &BucketLifecycleConfiguration, prefix: &str) -> bool {
if config.rules.is_empty() {
return false;
}
for rule in config.rules.iter() {
if rule.status == ExpirationStatus::from_static(ExpirationStatus::DISABLED) {
continue;
}
let rule_prefix = lc_get_prefix(rule);
if !prefix.is_empty() && !rule_prefix.is_empty() && !prefix.starts_with(&rule_prefix) && !rule_prefix.starts_with(prefix)
{
continue;
}
if let Some(e) = &rule.noncurrent_version_expiration {
if let Some(true) = e.noncurrent_days.map(|d| d > 0) {
return true;
}
if let Some(true) = e.newer_noncurrent_versions.map(|d| d > 0) {
return true;
}
}
if rule.noncurrent_version_transitions.is_some() {
return true;
}
if let Some(true) = rule.expiration.as_ref().map(|e| e.date.is_some()) {
return true;
}
if let Some(true) = rule.expiration.as_ref().map(|e| e.days.is_some()) {
return true;
}
if let Some(Some(true)) = rule.expiration.as_ref().map(|e| e.expired_object_delete_marker) {
return true;
}
if let Some(true) = rule.transitions.as_ref().map(|t| !t.is_empty()) {
return true;
}
if rule.transitions.is_some() {
return true;
}
}
false
}
pub fn rep_has_active_rules(config: &ReplicationConfiguration, prefix: &str, recursive: bool) -> bool {
if config.rules.is_empty() {
return false;
}
for rule in config.rules.iter() {
if rule
.status
.eq(&ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED))
{
continue;
}
if !prefix.is_empty() {
if let Some(filter) = &rule.filter {
if let Some(r_prefix) = &filter.prefix {
if !r_prefix.is_empty() {
// incoming prefix must be in rule prefix
if !recursive && !prefix.starts_with(r_prefix) {
continue;
}
// If recursive, we can skip this rule if it doesn't match the tested prefix or level below prefix
// does not match
if recursive && !r_prefix.starts_with(prefix) && !prefix.starts_with(r_prefix) {
continue;
}
}
}
}
}
return true;
}
false
}
pub async fn send_heal_disk(set_disk_id: String, priority: Option<HealChannelPriority>) -> Result<(), String> {
let req = HealChannelRequest {
id: Uuid::new_v4().to_string(),
bucket: "".to_string(),
object_prefix: None,
disk: Some(set_disk_id),
force_start: false,
priority: priority.unwrap_or_default(),
pool_index: None,
set_index: None,
scan_mode: None,
remove_corrupted: None,
recreate_missing: None,
update_parity: None,
recursive: None,
dry_run: None,
timeout_seconds: None,
};
send_heal_request(req).await
}

View File

@@ -14,8 +14,11 @@
pub mod bucket_stats;
// pub mod error;
pub mod data_usage;
pub mod globals;
pub mod heal_channel;
pub mod last_minute;
pub mod metrics;
// is ','
pub static DEFAULT_DELIMITER: u8 = 44;

View File

@@ -12,14 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use super::data_scanner::CurrentScannerCycle;
use crate::bucket::lifecycle::lifecycle;
use chrono::Utc;
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use rustfs_common::last_minute::{AccElem, LastMinuteLatency};
use rustfs_madmin::metrics::ScannerMetrics as M_ScannerMetrics;
use std::{
collections::HashMap,
fmt::Display,
pin::Pin,
sync::{
Arc,
@@ -29,12 +27,58 @@ use std::{
};
use tokio::sync::{Mutex, RwLock};
use crate::last_minute::{AccElem, LastMinuteLatency};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IlmAction {
NoneAction = 0,
DeleteAction,
DeleteVersionAction,
TransitionAction,
TransitionVersionAction,
DeleteRestoredAction,
DeleteRestoredVersionAction,
DeleteAllVersionsAction,
DelMarkerDeleteAllVersionsAction,
ActionCount,
}
impl IlmAction {
pub fn delete_restored(&self) -> bool {
*self == Self::DeleteRestoredAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_versioned(&self) -> bool {
*self == Self::DeleteVersionAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_all(&self) -> bool {
*self == Self::DeleteAllVersionsAction || *self == Self::DelMarkerDeleteAllVersionsAction
}
pub fn delete(&self) -> bool {
if self.delete_restored() {
return true;
}
*self == Self::DeleteVersionAction
|| *self == Self::DeleteAction
|| *self == Self::DeleteAllVersionsAction
|| *self == Self::DelMarkerDeleteAllVersionsAction
}
}
impl Display for IlmAction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}
lazy_static! {
pub static ref globalScannerMetrics: Arc<ScannerMetrics> = Arc::new(ScannerMetrics::new());
pub static ref globalMetrics: Arc<Metrics> = Arc::new(Metrics::new());
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub enum ScannerMetric {
pub enum Metric {
// START Realtime metrics, that only records
// last minute latencies and total operation count.
ReadMetadata = 0,
@@ -69,7 +113,7 @@ pub enum ScannerMetric {
Last,
}
impl ScannerMetric {
impl Metric {
/// Convert to string representation for metrics
pub fn as_str(self) -> &'static str {
match self {
@@ -203,7 +247,7 @@ impl CurrentPathTracker {
}
/// Main scanner metrics structure
pub struct ScannerMetrics {
pub struct Metrics {
// All fields must be accessed atomically and aligned.
operations: Vec<AtomicU64>,
latency: Vec<LockedLastMinuteLatency>,
@@ -213,94 +257,102 @@ pub struct ScannerMetrics {
current_paths: Arc<RwLock<HashMap<String, Arc<CurrentPathTracker>>>>,
// Cycle information
cycle_info: Arc<RwLock<Option<CurrentScannerCycle>>>,
cycle_info: Arc<RwLock<Option<CurrentCycle>>>,
}
impl ScannerMetrics {
pub fn new() -> Self {
let operations = (0..ScannerMetric::Last as usize).map(|_| AtomicU64::new(0)).collect();
// This is a placeholder. We'll need to define this struct.
#[derive(Clone, Debug)]
pub struct CurrentCycle {
pub current: u64,
pub cycle_completed: Vec<DateTime<Utc>>,
pub started: DateTime<Utc>,
}
let latency = (0..ScannerMetric::LastRealtime as usize)
impl Metrics {
pub fn new() -> Self {
let operations = (0..Metric::Last as usize).map(|_| AtomicU64::new(0)).collect();
let latency = (0..Metric::LastRealtime as usize)
.map(|_| LockedLastMinuteLatency::new())
.collect();
Self {
operations,
latency,
actions: (0..ScannerMetric::Last as usize).map(|_| AtomicU64::new(0)).collect(),
actions_latency: vec![LockedLastMinuteLatency::default(); ScannerMetric::LastRealtime as usize],
actions: (0..IlmAction::ActionCount as usize).map(|_| AtomicU64::new(0)).collect(),
actions_latency: vec![LockedLastMinuteLatency::default(); IlmAction::ActionCount as usize],
current_paths: Arc::new(RwLock::new(HashMap::new())),
cycle_info: Arc::new(RwLock::new(None)),
}
}
/// Log scanner action with custom metadata - compatible with existing usage
pub fn log(metric: ScannerMetric) -> impl Fn(&HashMap<String, String>) {
pub fn log(metric: Metric) -> impl Fn(&HashMap<String, String>) {
let metric = metric as usize;
let start_time = SystemTime::now();
move |_custom: &HashMap<String, String>| {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics (spawn async task for this)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add(duration).await;
globalMetrics.latency[metric_index].add(duration).await;
});
}
// Log trace metrics
if metric as u8 > ScannerMetric::StartTrace as u8 {
if metric as u8 > Metric::StartTrace as u8 {
//debug!(metric = metric.as_str(), duration_ms = duration.as_millis(), "Scanner trace metric");
}
}
}
/// Time scanner action with size - returns function that takes size
pub fn time_size(metric: ScannerMetric) -> impl Fn(u64) {
pub fn time_size(metric: Metric) -> impl Fn(u64) {
let metric = metric as usize;
let start_time = SystemTime::now();
move |size: u64| {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics with size (spawn async task)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add_size(duration, size).await;
globalMetrics.latency[metric_index].add_size(duration, size).await;
});
}
}
}
/// Time a scanner action - returns a closure to call when done
pub fn time(metric: ScannerMetric) -> impl Fn() {
pub fn time(metric: Metric) -> impl Fn() {
let metric = metric as usize;
let start_time = SystemTime::now();
move || {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics (spawn async task)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add(duration).await;
globalMetrics.latency[metric_index].add(duration).await;
});
}
}
}
/// Time N scanner actions - returns function that takes count, then returns completion function
pub fn time_n(metric: ScannerMetric) -> Box<dyn Fn(usize) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
pub fn time_n(metric: Metric) -> Box<dyn Fn(usize) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
let metric = metric as usize;
let start_time = SystemTime::now();
Box::new(move |count: usize| {
@@ -308,22 +360,23 @@ impl ScannerMetrics {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(count as u64, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(count as u64, Ordering::Relaxed);
// Update latency for realtime metrics (spawn async task)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add(duration).await;
globalMetrics.latency[metric_index].add(duration).await;
});
}
})
})
}
pub fn time_ilm(a: lifecycle::IlmAction) -> Box<dyn Fn(u64) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
/// Time ILM action with versions - returns function that takes versions, then returns completion function
pub fn time_ilm(a: IlmAction) -> Box<dyn Fn(u64) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
let a_clone = a as usize;
if a_clone == lifecycle::IlmAction::NoneAction as usize || a_clone >= lifecycle::IlmAction::ActionCount as usize {
if a_clone == IlmAction::NoneAction as usize || a_clone >= IlmAction::ActionCount as usize {
return Box::new(move |_: u64| Box::new(move || {}));
}
let start = SystemTime::now();
@@ -331,50 +384,50 @@ impl ScannerMetrics {
Box::new(move || {
let duration = SystemTime::now().duration_since(start).unwrap_or(Duration::from_secs(0));
tokio::spawn(async move {
globalScannerMetrics.actions[a_clone].fetch_add(versions, Ordering::Relaxed);
globalScannerMetrics.actions_latency[a_clone].add(duration).await;
globalMetrics.actions[a_clone].fetch_add(versions, Ordering::Relaxed);
globalMetrics.actions_latency[a_clone].add(duration).await;
});
})
})
}
/// Increment time with specific duration
pub async fn inc_time(metric: ScannerMetric, duration: Duration) {
pub async fn inc_time(metric: Metric, duration: Duration) {
let metric = metric as usize;
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics
if (metric) < ScannerMetric::LastRealtime as usize {
globalScannerMetrics.latency[metric].add(duration).await;
if (metric) < Metric::LastRealtime as usize {
globalMetrics.latency[metric].add(duration).await;
}
}
/// Get lifetime operation count for a metric
pub fn lifetime(&self, metric: ScannerMetric) -> u64 {
pub fn lifetime(&self, metric: Metric) -> u64 {
let metric = metric as usize;
if (metric) >= ScannerMetric::Last as usize {
if (metric) >= Metric::Last as usize {
return 0;
}
self.operations[metric].load(Ordering::Relaxed)
}
/// Get last minute statistics for a metric
pub async fn last_minute(&self, metric: ScannerMetric) -> AccElem {
pub async fn last_minute(&self, metric: Metric) -> AccElem {
let metric = metric as usize;
if (metric) >= ScannerMetric::LastRealtime as usize {
if (metric) >= Metric::LastRealtime as usize {
return AccElem::default();
}
self.latency[metric].total().await
}
/// Set current cycle information
pub async fn set_cycle(&self, cycle: Option<CurrentScannerCycle>) {
pub async fn set_cycle(&self, cycle: Option<CurrentCycle>) {
*self.cycle_info.write().await = cycle;
}
/// Get current cycle information
pub async fn get_cycle(&self) -> Option<CurrentScannerCycle> {
pub async fn get_cycle(&self) -> Option<CurrentCycle> {
self.cycle_info.read().await.clone()
}
@@ -411,20 +464,20 @@ impl ScannerMetrics {
metrics.active_paths = self.get_current_paths().await;
// Lifetime operations
for i in 0..ScannerMetric::Last as usize {
for i in 0..Metric::Last as usize {
let count = self.operations[i].load(Ordering::Relaxed);
if count > 0 {
if let Some(metric) = ScannerMetric::from_index(i) {
if let Some(metric) = Metric::from_index(i) {
metrics.life_time_ops.insert(metric.as_str().to_string(), count);
}
}
}
// Last minute statistics for realtime metrics
for i in 0..ScannerMetric::LastRealtime as usize {
for i in 0..Metric::LastRealtime as usize {
let last_min = self.latency[i].total().await;
if last_min.n > 0 {
if let Some(_metric) = ScannerMetric::from_index(i) {
if let Some(_metric) = Metric::from_index(i) {
// Convert to madmin TimedAction format if needed
// This would require implementing the conversion
}
@@ -448,11 +501,7 @@ pub fn current_path_updater(disk: &str, initial: &str) -> (UpdateCurrentPathFn,
let tracker_clone = Arc::clone(&tracker);
let disk_clone = disk_name.clone();
tokio::spawn(async move {
globalScannerMetrics
.current_paths
.write()
.await
.insert(disk_clone, tracker_clone);
globalMetrics.current_paths.write().await.insert(disk_clone, tracker_clone);
});
let update_fn = {
@@ -471,7 +520,7 @@ pub fn current_path_updater(disk: &str, initial: &str) -> (UpdateCurrentPathFn,
Arc::new(move || -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> {
let disk_name = disk_name.clone();
Box::pin(async move {
globalScannerMetrics.current_paths.write().await.remove(&disk_name);
globalMetrics.current_paths.write().await.remove(&disk_name);
})
})
};
@@ -479,7 +528,7 @@ pub fn current_path_updater(disk: &str, initial: &str) -> (UpdateCurrentPathFn,
(update_fn, done_fn)
}
impl Default for ScannerMetrics {
impl Default for Metrics {
fn default() -> Self {
Self::new()
}

View File

@@ -38,3 +38,7 @@ url.workspace = true
rustfs-madmin.workspace = true
rustfs-filemeta.workspace = true
bytes.workspace = true
serial_test = "3.2.0"
aws-sdk-s3 = "1.99.0"
aws-config = "1.8.3"
async-trait = { workspace = true }

View File

@@ -13,28 +13,45 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_lock::{
drwmutex::Options,
lock_args::LockArgs,
namespace_lock::{NsLockMap, new_nslock},
new_lock_api,
};
use async_trait::async_trait;
use rustfs_ecstore::{disk::endpoint::Endpoint, lock_utils::create_unique_clients};
use rustfs_lock::client::{LockClient, local::LocalClient};
use rustfs_lock::types::{LockInfo, LockResponse, LockStats};
use rustfs_lock::{LockId, LockMetadata, LockPriority, LockType};
use rustfs_lock::{LockRequest, NamespaceLock, NamespaceLockManager};
use rustfs_protos::{node_service_time_out_client, proto_gen::node_service::GenerallyLockRequest};
use serial_test::serial;
use std::{error::Error, sync::Arc, time::Duration};
use tokio::sync::RwLock;
use tokio::time::sleep;
use tonic::Request;
use url::Url;
const CLUSTER_ADDR: &str = "http://localhost:9000";
fn get_cluster_endpoints() -> Vec<Endpoint> {
vec![Endpoint {
url: Url::parse(CLUSTER_ADDR).unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
}]
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_rpc() -> Result<(), Box<dyn Error>> {
let args = LockArgs {
uid: "1111".to_string(),
resources: vec!["dandan".to_string()],
let args = LockRequest {
lock_id: LockId::new_deterministic("dandan"),
resource: "dandan".to_string(),
lock_type: LockType::Exclusive,
owner: "dd".to_string(),
source: "".to_string(),
quorum: 3,
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args = serde_json::to_string(&args)?;
@@ -58,31 +75,616 @@ async fn test_lock_unlock_rpc() -> Result<(), Box<dyn Error>> {
Ok(())
}
#[tokio::test]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_ns_lock() -> Result<(), Box<dyn Error>> {
let url = url::Url::parse("http://127.0.0.1:9000/data")?;
let locker = new_lock_api(false, Some(url));
let ns_mutex = Arc::new(RwLock::new(NsLockMap::new(true)));
let ns = new_nslock(
Arc::clone(&ns_mutex),
"local".to_string(),
"dandan".to_string(),
vec!["foo".to_string()],
vec![locker],
)
.await;
assert!(
ns.0.write()
.await
.get_lock(&Options {
timeout: Duration::from_secs(5),
retry_interval: Duration::from_secs(1),
})
.await
.unwrap()
);
/// Mock client that simulates remote node failures
#[derive(Debug)]
struct FailingMockClient {
local_client: Arc<dyn LockClient>,
should_fail_acquire: bool,
should_fail_release: bool,
}
impl FailingMockClient {
fn new(should_fail_acquire: bool, should_fail_release: bool) -> Self {
Self {
local_client: Arc::new(LocalClient::new()),
should_fail_acquire,
should_fail_release,
}
}
}
#[async_trait]
impl LockClient for FailingMockClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> rustfs_lock::error::Result<LockResponse> {
if self.should_fail_acquire {
// Simulate network timeout or remote node failure
return Ok(LockResponse::failure("Simulated remote node failure", Duration::from_millis(100)));
}
self.local_client.acquire_exclusive(request).await
}
async fn acquire_shared(&self, request: &LockRequest) -> rustfs_lock::error::Result<LockResponse> {
if self.should_fail_acquire {
return Ok(LockResponse::failure("Simulated remote node failure", Duration::from_millis(100)));
}
self.local_client.acquire_shared(request).await
}
async fn release(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
if self.should_fail_release {
return Err(rustfs_lock::error::LockError::internal("Simulated release failure"));
}
self.local_client.release(lock_id).await
}
async fn refresh(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
self.local_client.refresh(lock_id).await
}
async fn force_release(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
self.local_client.force_release(lock_id).await
}
async fn check_status(&self, lock_id: &LockId) -> rustfs_lock::error::Result<Option<LockInfo>> {
self.local_client.check_status(lock_id).await
}
async fn get_stats(&self) -> rustfs_lock::error::Result<LockStats> {
self.local_client.get_stats().await
}
async fn close(&self) -> rustfs_lock::error::Result<()> {
self.local_client.close().await
}
async fn is_online(&self) -> bool {
if self.should_fail_acquire {
return false; // Simulate offline node
}
true // Simulate online node
}
async fn is_local(&self) -> bool {
false // Simulate remote client
}
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_with_remote_failure() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional lock with simulated remote node failure");
// Create a two-node cluster: one local (success) + one remote (failure)
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_transactional".to_string(), clients);
let resource = "critical_resource".to_string();
// Test single lock operation with 2PC
println!("📝 Testing single lock with remote failure...");
let request = LockRequest::new(&resource, LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(30));
let response = ns_lock.acquire_lock(&request).await?;
// Should fail because quorum (2/2) is not met due to remote failure
assert!(!response.success, "Lock should fail due to remote node failure");
println!("✅ Single lock correctly failed due to remote node failure");
// Verify no locks are left behind on the local node
let local_client_direct = LocalClient::new();
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(&resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain on local node after rollback");
println!("✅ Verified rollback: no locks left on local node");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_batch_lock_with_mixed_failures() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional batch lock with mixed node failures");
// Create a cluster with different failure patterns
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_batch_transactional".to_string(), clients);
let resources = vec!["resource_1".to_string(), "resource_2".to_string(), "resource_3".to_string()];
println!("📝 Testing batch lock with remote failure...");
let result = ns_lock
.lock_batch(&resources, "batch_owner", Duration::from_millis(100), Duration::from_secs(30))
.await?;
// Should fail because remote node cannot acquire locks
assert!(!result, "Batch lock should fail due to remote node failure");
println!("✅ Batch lock correctly failed due to remote node failure");
// Verify no locks are left behind on any resource
let local_client_direct = LocalClient::new();
for resource in &resources {
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain for resource: {resource}");
}
println!("✅ Verified rollback: no locks left on any resource");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_with_quorum_success() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional lock with quorum success");
// Create a three-node cluster where 2 succeed and 1 fails (quorum = 2 automatically)
let local_client1: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let local_client2: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client1, local_client2, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_quorum".to_string(), clients);
let resource = "quorum_resource".to_string();
println!("📝 Testing lock with automatic quorum=2, 2 success + 1 failure...");
let request = LockRequest::new(&resource, LockType::Exclusive, "quorum_owner").with_ttl(Duration::from_secs(30));
let response = ns_lock.acquire_lock(&request).await?;
// Should fail because we require all nodes to succeed for consistency
// (even though quorum is met, the implementation requires all nodes for consistency)
assert!(!response.success, "Lock should fail due to consistency requirement");
println!("✅ Lock correctly failed due to consistency requirement (partial success rolled back)");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_rollback_on_release_failure() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing rollback behavior when release fails");
// Create clients where acquire succeeds but release fails
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_release_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(false, true));
let clients = vec![local_client, failing_release_client];
let ns_lock = NamespaceLock::with_clients("test_release_failure".to_string(), clients);
let resource = "release_test_resource".to_string();
println!("📝 Testing lock acquisition with release failure handling...");
let request = LockRequest::new(&resource, LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(30));
// This should fail because both LocalClient instances share the same global lock map
// The first client (LocalClient) will acquire the lock, but the second client
// (FailingMockClient's internal LocalClient) will fail to acquire the same resource
let response = ns_lock.acquire_lock(&request).await?;
// The operation should fail due to lock contention between the two LocalClient instances
assert!(
!response.success,
"Lock should fail due to lock contention between LocalClient instances sharing global lock map"
);
println!("✅ Lock correctly failed due to lock contention (both clients use same global lock map)");
// Verify no locks are left behind after rollback
let local_client_direct = LocalClient::new();
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(&resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain after rollback");
println!("✅ Verified rollback: no locks left after failed acquisition");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_ns_lock() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test".to_string(), clients);
let resources = vec!["foo".to_string()];
let result = ns_lock
.lock_batch(&resources, "dandan", Duration::from_secs(5), Duration::from_secs(10))
.await;
match &result {
Ok(success) => println!("Lock result: {success}"),
Err(e) => println!("Lock error: {e}"),
}
let result = result?;
assert!(result, "Lock should succeed, but got: {result}");
ns_lock.unlock_batch(&resources, "dandan").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_concurrent_lock_attempts() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test".to_string(), clients);
let resource = vec!["concurrent_resource".to_string()];
// First lock should succeed
println!("Attempting first lock...");
let result1 = ns_lock
.lock_batch(&resource, "owner1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
println!("First lock result: {result1}");
assert!(result1, "First lock should succeed");
// Second lock should fail (resource already locked)
println!("Attempting second lock...");
let result2 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(1), Duration::from_secs(10))
.await?;
println!("Second lock result: {result2}");
assert!(!result2, "Second lock should fail");
// Unlock by first owner
println!("Unlocking first lock...");
ns_lock.unlock_batch(&resource, "owner1").await?;
println!("First lock unlocked");
// Now second owner should be able to lock
println!("Attempting third lock...");
let result3 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
println!("Third lock result: {result3}");
assert!(result3, "Lock should succeed after unlock");
// Clean up
println!("Cleaning up...");
ns_lock.unlock_batch(&resource, "owner2").await?;
println!("Test completed");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_read_write_lock_compatibility() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_rw".to_string(), clients);
let resource = vec!["rw_resource".to_string()];
// First read lock should succeed
let result1 = ns_lock
.rlock_batch(&resource, "reader1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result1, "First read lock should succeed");
// Second read lock should also succeed (read locks are compatible)
let result2 = ns_lock
.rlock_batch(&resource, "reader2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result2, "Second read lock should succeed");
// Write lock should fail (read locks are held)
let result3 = ns_lock
.lock_batch(&resource, "writer1", Duration::from_secs(1), Duration::from_secs(10))
.await?;
assert!(!result3, "Write lock should fail when read locks are held");
// Release read locks
ns_lock.runlock_batch(&resource, "reader1").await?;
ns_lock.runlock_batch(&resource, "reader2").await?;
// Now write lock should succeed
let result4 = ns_lock
.lock_batch(&resource, "writer1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result4, "Write lock should succeed after read locks released");
// Clean up
ns_lock.unlock_batch(&resource, "writer1").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_timeout() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_timeout".to_string(), clients);
let resource = vec!["timeout_resource".to_string()];
// First lock with short timeout
let result1 = ns_lock
.lock_batch(&resource, "owner1", Duration::from_secs(2), Duration::from_secs(1))
.await?;
assert!(result1, "First lock should succeed");
// Wait for lock to expire
sleep(Duration::from_secs(5)).await;
// Second lock should succeed after timeout
let result2 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(1))
.await?;
assert!(result2, "Lock should succeed after timeout");
// Clean up
ns_lock.unlock_batch(&resource, "owner2").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_batch_lock_operations() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_batch".to_string(), clients);
let resources = vec![
"batch_resource1".to_string(),
"batch_resource2".to_string(),
"batch_resource3".to_string(),
];
// Lock all resources
let result = ns_lock
.lock_batch(&resources, "batch_owner", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result, "Batch lock should succeed");
// Try to lock one of the resources with different owner - should fail
let single_resource = vec!["batch_resource2".to_string()];
let result2 = ns_lock
.lock_batch(&single_resource, "other_owner", Duration::from_secs(1), Duration::from_secs(10))
.await?;
assert!(!result2, "Lock should fail for already locked resource");
// Unlock all resources
ns_lock.unlock_batch(&resources, "batch_owner").await?;
// Now should be able to lock single resource
let result3 = ns_lock
.lock_batch(&single_resource, "other_owner", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result3, "Lock should succeed after batch unlock");
// Clean up
ns_lock.unlock_batch(&single_resource, "other_owner").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_multiple_namespaces() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock1 = NamespaceLock::with_clients("namespace1".to_string(), clients.clone());
let ns_lock2 = NamespaceLock::with_clients("namespace2".to_string(), clients);
let resource = vec!["shared_resource".to_string()];
// Lock same resource in different namespaces - both should succeed
let result1 = ns_lock1
.lock_batch(&resource, "owner1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result1, "Lock in namespace1 should succeed");
let result2 = ns_lock2
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result2, "Lock in namespace2 should succeed");
// Clean up
ns_lock1.unlock_batch(&resource, "owner1").await?;
ns_lock2.unlock_batch(&resource, "owner2").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_rpc_read_lock() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("read_resource"),
resource: "read_resource".to_string(),
lock_type: LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = node_service_time_out_client(&CLUSTER_ADDR.to_string()).await?;
// First read lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.r_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get read lock: {error_info}");
}
// Second read lock with different owner should also succeed
let args2 = LockRequest {
lock_id: LockId::new_deterministic("read_resource"),
resource: "read_resource".to_string(),
lock_type: LockType::Shared,
owner: "reader2".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args2_str = serde_json::to_string(&args2)?;
let request2 = Request::new(GenerallyLockRequest { args: args2_str });
let response2 = client.r_lock(request2).await?.into_inner();
if let Some(error_info) = response2.error_info {
panic!("can not get second read lock: {error_info}");
}
// Unlock both
let request = Request::new(GenerallyLockRequest { args: args_str });
let response = client.r_un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not unlock read lock: {error_info}");
}
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_refresh() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("refresh_resource"),
resource: "refresh_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "refresh_owner".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = node_service_time_out_client(&CLUSTER_ADDR.to_string()).await?;
// Acquire lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
// Refresh lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.refresh(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not refresh lock: {error_info}");
}
assert!(response.success, "Lock refresh should succeed");
// Unlock
let request = Request::new(GenerallyLockRequest { args: args_str });
let response = client.un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not unlock: {error_info}");
}
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_force_unlock() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("force_resource"),
resource: "force_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "force_owner".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = node_service_time_out_client(&CLUSTER_ADDR.to_string()).await?;
// Acquire lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
// Force unlock (even by different owner)
let force_args = LockRequest {
lock_id: LockId::new_deterministic("force_resource"),
resource: "force_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "admin".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let force_args_str = serde_json::to_string(&force_args)?;
let request = Request::new(GenerallyLockRequest { args: force_args_str });
let response = client.force_un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not force unlock: {error_info}");
}
assert!(response.success, "Force unlock should succeed");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_global_lock_map_sharing() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock1 = NamespaceLock::with_clients("global_test".to_string(), clients.clone());
let ns_lock2 = NamespaceLock::with_clients("global_test".to_string(), clients);
let resource = vec!["global_test_resource".to_string()];
// First instance acquires lock
println!("First lock map attempting to acquire lock...");
let result1 = ns_lock1
.lock_batch(&resource, "owner1", std::time::Duration::from_secs(5), std::time::Duration::from_secs(10))
.await?;
println!("First lock result: {result1}");
assert!(result1, "First lock should succeed");
// Second instance should fail to acquire the same lock
println!("Second lock map attempting to acquire lock...");
let result2 = ns_lock2
.lock_batch(&resource, "owner2", std::time::Duration::from_secs(1), std::time::Duration::from_secs(10))
.await?;
println!("Second lock result: {result2}");
assert!(!result2, "Second lock should fail because resource is already locked");
// Release lock from first instance
println!("First lock map releasing lock...");
ns_lock1.unlock_batch(&resource, "owner1").await?;
// Now second instance should be able to acquire lock
println!("Second lock map attempting to acquire lock again...");
let result3 = ns_lock2
.lock_batch(&resource, "owner2", std::time::Duration::from_secs(5), std::time::Duration::from_secs(10))
.await?;
println!("Third lock result: {result3}");
assert!(result3, "Lock should succeed after first lock is released");
// Clean up
ns_lock2.unlock_batch(&resource, "owner2").await?;
ns.0.write().await.un_lock().await.unwrap();
Ok(())
}

View File

@@ -14,3 +14,4 @@
mod lock;
mod node_interact_test;
mod sql;

View File

@@ -0,0 +1,402 @@
#![cfg(test)]
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use aws_config::meta::region::RegionProviderChain;
use aws_sdk_s3::Client;
use aws_sdk_s3::config::{Credentials, Region};
use aws_sdk_s3::types::{
CsvInput, CsvOutput, ExpressionType, FileHeaderInfo, InputSerialization, JsonInput, JsonOutput, JsonType, OutputSerialization,
};
use bytes::Bytes;
use serial_test::serial;
use std::error::Error;
const ENDPOINT: &str = "http://localhost:9000";
const ACCESS_KEY: &str = "rustfsadmin";
const SECRET_KEY: &str = "rustfsadmin";
const BUCKET: &str = "test-sql-bucket";
const CSV_OBJECT: &str = "test-data.csv";
const JSON_OBJECT: &str = "test-data.json";
async fn create_aws_s3_client() -> Result<Client, Box<dyn Error>> {
let region_provider = RegionProviderChain::default_provider().or_else(Region::new("us-east-1"));
let shared_config = aws_config::defaults(aws_config::BehaviorVersion::latest())
.region(region_provider)
.credentials_provider(Credentials::new(ACCESS_KEY, SECRET_KEY, None, None, "static"))
.endpoint_url(ENDPOINT)
.load()
.await;
let client = Client::from_conf(
aws_sdk_s3::Config::from(&shared_config)
.to_builder()
.force_path_style(true) // Important for S3-compatible services
.build(),
);
Ok(client)
}
async fn setup_test_bucket(client: &Client) -> Result<(), Box<dyn Error>> {
match client.create_bucket().bucket(BUCKET).send().await {
Ok(_) => {}
Err(e) => {
let error_str = e.to_string();
if !error_str.contains("BucketAlreadyOwnedByYou") && !error_str.contains("BucketAlreadyExists") {
return Err(e.into());
}
}
}
Ok(())
}
async fn upload_test_csv(client: &Client) -> Result<(), Box<dyn Error>> {
let csv_data = "name,age,city\nAlice,30,New York\nBob,25,Los Angeles\nCharlie,35,Chicago\nDiana,28,Boston";
client
.put_object()
.bucket(BUCKET)
.key(CSV_OBJECT)
.body(Bytes::from(csv_data.as_bytes()).into())
.send()
.await?;
Ok(())
}
async fn upload_test_json(client: &Client) -> Result<(), Box<dyn Error>> {
let json_data = r#"{"name":"Alice","age":30,"city":"New York"}
{"name":"Bob","age":25,"city":"Los Angeles"}
{"name":"Charlie","age":35,"city":"Chicago"}
{"name":"Diana","age":28,"city":"Boston"}"#;
client
.put_object()
.bucket(BUCKET)
.key(JSON_OBJECT)
.body(Bytes::from(json_data.as_bytes()).into())
.send()
.await?;
Ok(())
}
async fn process_select_response(
mut event_stream: aws_sdk_s3::operation::select_object_content::SelectObjectContentOutput,
) -> Result<String, Box<dyn Error>> {
let mut total_data = Vec::new();
while let Ok(Some(event)) = event_stream.payload.recv().await {
match event {
aws_sdk_s3::types::SelectObjectContentEventStream::Records(records_event) => {
if let Some(payload) = records_event.payload {
let data = payload.into_inner();
total_data.extend_from_slice(&data);
}
}
aws_sdk_s3::types::SelectObjectContentEventStream::End(_) => {
break;
}
_ => {
// Handle other event types (Stats, Progress, Cont, etc.)
}
}
}
Ok(String::from_utf8(total_data)?)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_basic() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Construct SelectObjectContent request - basic query
let sql = "SELECT * FROM S3Object WHERE age > 28";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Select result: {result_str}");
// Verify results contain records with age > 28
assert!(result_str.contains("Alice,30,New York"));
assert!(result_str.contains("Charlie,35,Chicago"));
assert!(!result_str.contains("Bob,25,Los Angeles"));
assert!(!result_str.contains("Diana,28,Boston"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_aggregation() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Construct aggregation query - use simpler approach
let sql = "SELECT name, age FROM S3Object WHERE age >= 25";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Aggregation result: {result_str}");
// Verify query results - should include records with age >= 25
assert!(result_str.contains("Alice"));
assert!(result_str.contains("Bob"));
assert!(result_str.contains("Charlie"));
assert!(result_str.contains("Diana"));
assert!(result_str.contains("30"));
assert!(result_str.contains("25"));
assert!(result_str.contains("35"));
assert!(result_str.contains("28"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_json_basic() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_json(&client).await?;
// Construct JSON query
let sql = "SELECT s.name, s.age FROM S3Object s WHERE s.age > 28";
let json_input = JsonInput::builder().set_type(Some(JsonType::Document)).build();
let input_serialization = InputSerialization::builder().json(json_input).build();
let json_output = JsonOutput::builder().build();
let output_serialization = OutputSerialization::builder().json(json_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(JSON_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("JSON Select result: {result_str}");
// Verify JSON query results
assert!(result_str.contains("Alice"));
assert!(result_str.contains("Charlie"));
assert!(result_str.contains("30"));
assert!(result_str.contains("35"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_limit() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Test LIMIT clause
let sql = "SELECT * FROM S3Object LIMIT 2";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Limit result: {result_str}");
// Verify only first 2 records are returned
let lines: Vec<&str> = result_str.lines().filter(|line| !line.trim().is_empty()).collect();
assert_eq!(lines.len(), 2, "Should return exactly 2 records");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_order_by() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Test ORDER BY clause
let sql = "SELECT name, age FROM S3Object ORDER BY age DESC LIMIT 2";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Order By result: {result_str}");
// Verify ordered by age descending
let lines: Vec<&str> = result_str.lines().filter(|line| !line.trim().is_empty()).collect();
assert!(lines.len() >= 2, "Should return at least 2 records");
// Check if contains highest age records
assert!(result_str.contains("Charlie,35"));
assert!(result_str.contains("Alice,30"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_error_handling() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Test invalid SQL query
let sql = "SELECT * FROM S3Object WHERE invalid_column > 10";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
// This query should fail because invalid_column doesn't exist
let result = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await;
// Verify query fails (expected behavior)
assert!(result.is_err(), "Query with invalid column should fail");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_nonexistent_object() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
// Test query on nonexistent object
let sql = "SELECT * FROM S3Object";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let result = client
.select_object_content()
.bucket(BUCKET)
.key("nonexistent.csv")
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await;
// Verify query fails (expected behavior)
assert!(result.is_err(), "Query on nonexistent object should fail");
Ok(())
}

View File

@@ -12,23 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::data_usage::{DATA_USAGE_CACHE_NAME, DATA_USAGE_ROOT, load_data_usage_from_backend};
use crate::error::{Error, Result};
use crate::{
disk::endpoint::Endpoint,
global::{GLOBAL_BOOT_TIME, GLOBAL_Endpoints},
heal::{
data_usage::{DATA_USAGE_CACHE_NAME, DATA_USAGE_ROOT, load_data_usage_from_backend},
data_usage_cache::DataUsageCache,
heal_commands::{DRIVE_STATE_OK, DRIVE_STATE_UNFORMATTED},
},
new_object_layer_fn,
notification_sys::get_global_notification_sys,
store_api::StorageAPI,
};
use rustfs_common::{
// error::{Error, Result},
globals::GLOBAL_Local_Node_Name,
};
use crate::data_usage::load_data_usage_cache;
use rustfs_common::{globals::GLOBAL_Local_Node_Name, heal_channel::DriveState};
use rustfs_madmin::{
BackendDisks, Disk, ErasureSetInfo, ITEM_INITIALIZING, ITEM_OFFLINE, ITEM_ONLINE, InfoMessage, ServerProperties,
};
@@ -318,7 +313,7 @@ fn get_online_offline_disks_stats(disks_info: &[Disk]) -> (BackendDisks, Backend
for disk in disks_info {
let ep = &disk.endpoint;
let state = &disk.state;
if *state != DRIVE_STATE_OK && *state != DRIVE_STATE_UNFORMATTED {
if *state != DriveState::Ok.to_string() && *state != DriveState::Unformatted.to_string() {
*offline_disks.get_mut(ep).unwrap() += 1;
continue;
}
@@ -359,13 +354,13 @@ async fn get_pools_info(all_disks: &[Disk]) -> Result<HashMap<i32, HashMap<i32,
if erasure_set.id == 0 {
erasure_set.id = d.set_index;
if let Ok(cache) = DataUsageCache::load(
if let Ok(cache) = load_data_usage_cache(
&store.pools[d.pool_index as usize].disk_set[d.set_index as usize].clone(),
DATA_USAGE_CACHE_NAME,
)
.await
{
let data_usage_info = cache.dui(DATA_USAGE_ROOT, &[]);
let data_usage_info = cache.dui(DATA_USAGE_ROOT, &Vec::<String>::new());
erasure_set.objects_count = data_usage_info.objects_total_count;
erasure_set.versions_count = data_usage_info.versions_total_count;
erasure_set.delete_markers_count = data_usage_info.delete_markers_total_count;

View File

@@ -22,6 +22,10 @@ use async_channel::{Receiver as A_Receiver, Sender as A_Sender, bounded};
use futures::Future;
use http::HeaderMap;
use lazy_static::lazy_static;
use rustfs_common::data_usage::TierStats;
use rustfs_common::heal_channel::rep_has_active_rules;
use rustfs_common::metrics::{IlmAction, Metrics};
use rustfs_utils::path::encode_dir_object;
use s3s::Body;
use sha2::{Digest, Sha256};
use std::any::Any;
@@ -31,6 +35,7 @@ use std::io::Write;
use std::pin::Pin;
use std::sync::atomic::{AtomicI64, Ordering};
use std::sync::{Arc, Mutex};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{Receiver, Sender};
use tokio::sync::{RwLock, mpsc};
@@ -41,9 +46,10 @@ use xxhash_rust::xxh64;
//use rustfs_notify::{BucketNotificationConfig, Event, EventName, LogLevel, NotificationError, init_logger};
//use rustfs_notify::{initialize, notification_system};
use super::bucket_lifecycle_audit::{LcAuditEvent, LcEventSrc};
use super::lifecycle::{self, ExpirationOptions, IlmAction, Lifecycle, TransitionOptions};
use super::lifecycle::{self, ExpirationOptions, Lifecycle, TransitionOptions};
use super::tier_last_day_stats::{DailyAllTierStats, LastDayTierStats};
use super::tier_sweeper::{Jentry, delete_object_from_remote_tier};
use crate::bucket::object_lock::objectlock_sys::enforce_retention_for_deletion;
use crate::bucket::{metadata_sys::get_lifecycle_config, versioning_sys::BucketVersioningSys};
use crate::client::object_api_utils::new_getobjectreader;
use crate::error::Error;
@@ -52,16 +58,11 @@ use crate::event::name::EventName;
use crate::event_notification::{EventArgs, send_event};
use crate::global::GLOBAL_LocalNodeName;
use crate::global::{GLOBAL_LifecycleSys, GLOBAL_TierConfigMgr, get_global_deployment_id};
use crate::heal::{
data_scanner::{apply_expiry_on_non_transitioned_objects, apply_expiry_on_transitioned_object},
data_scanner_metric::ScannerMetrics,
data_usage_cache::TierStats,
};
use crate::store::ECStore;
use crate::store_api::StorageAPI;
use crate::store_api::{GetObjectReader, HTTPRangeSpec, ObjectInfo, ObjectOptions, ObjectToDelete};
use crate::tier::warm_backend::WarmBackendGetOpts;
use s3s::dto::BucketLifecycleConfiguration;
use s3s::dto::{BucketLifecycleConfiguration, DefaultRetention, ReplicationConfiguration};
pub type TimeFn = Arc<dyn Fn() -> Pin<Box<dyn Future<Output = ()> + Send>> + Send + Sync + 'static>;
pub type TraceFn =
@@ -631,7 +632,7 @@ pub async fn enqueue_transition_immediate(oi: &ObjectInfo, src: LcEventSrc) {
if !lc.is_none() {
let event = lc.expect("err").eval(&oi.to_lifecycle_opts()).await;
match event.action {
lifecycle::IlmAction::TransitionAction | lifecycle::IlmAction::TransitionVersionAction => {
IlmAction::TransitionAction | IlmAction::TransitionVersionAction => {
if oi.delete_marker || oi.is_dir {
return;
}
@@ -728,7 +729,7 @@ pub fn gen_transition_objname(bucket: &str) -> Result<String, Error> {
}
pub async fn transition_object(api: Arc<ECStore>, oi: &ObjectInfo, lae: LcAuditEvent) -> Result<(), Error> {
let time_ilm = ScannerMetrics::time_ilm(lae.event.action);
let time_ilm = Metrics::time_ilm(lae.event.action);
let opts = ObjectOptions {
transition: TransitionOptions {
@@ -842,3 +843,161 @@ pub struct RestoreObjectRequest {
}
const _MAX_RESTORE_OBJECT_REQUEST_SIZE: i64 = 2 << 20;
pub async fn eval_action_from_lifecycle(
lc: &BucketLifecycleConfiguration,
lr: Option<DefaultRetention>,
rcfg: Option<(ReplicationConfiguration, OffsetDateTime)>,
oi: &ObjectInfo,
) -> lifecycle::Event {
let event = lc.eval(&oi.to_lifecycle_opts()).await;
//if serverDebugLog {
info!("lifecycle: Secondary scan: {}", event.action);
//}
let lock_enabled = if let Some(lr) = lr { lr.mode.is_some() } else { false };
match event.action {
lifecycle::IlmAction::DeleteAllVersionsAction | lifecycle::IlmAction::DelMarkerDeleteAllVersionsAction => {
if lock_enabled {
return lifecycle::Event::default();
}
}
lifecycle::IlmAction::DeleteVersionAction | lifecycle::IlmAction::DeleteRestoredVersionAction => {
if oi.version_id.is_none() {
return lifecycle::Event::default();
}
if lock_enabled && enforce_retention_for_deletion(oi) {
//if serverDebugLog {
if oi.version_id.is_some() {
info!("lifecycle: {} v({}) is locked, not deleting", oi.name, oi.version_id.expect("err"));
} else {
info!("lifecycle: {} is locked, not deleting", oi.name);
}
//}
return lifecycle::Event::default();
}
if let Some(rcfg) = rcfg {
if rep_has_active_rules(&rcfg.0, &oi.name, true) {
return lifecycle::Event::default();
}
}
}
_ => (),
}
event
}
async fn apply_transition_rule(event: &lifecycle::Event, src: &LcEventSrc, oi: &ObjectInfo) -> bool {
if oi.delete_marker || oi.is_dir {
return false;
}
GLOBAL_TransitionState.queue_transition_task(oi, event, src).await;
true
}
pub async fn apply_expiry_on_transitioned_object(
api: Arc<ECStore>,
oi: &ObjectInfo,
lc_event: &lifecycle::Event,
src: &LcEventSrc,
) -> bool {
// let time_ilm = ScannerMetrics::time_ilm(lc_event.action.clone());
if let Err(_err) = expire_transitioned_object(api, oi, lc_event, src).await {
return false;
}
// let _ = time_ilm(1);
true
}
pub async fn apply_expiry_on_non_transitioned_objects(
api: Arc<ECStore>,
oi: &ObjectInfo,
lc_event: &lifecycle::Event,
_src: &LcEventSrc,
) -> bool {
let mut opts = ObjectOptions {
expiration: ExpirationOptions { expire: true },
..Default::default()
};
if lc_event.action.delete_versioned() {
opts.version_id = Some(oi.version_id.expect("err").to_string());
}
opts.versioned = BucketVersioningSys::prefix_enabled(&oi.bucket, &oi.name).await;
opts.version_suspended = BucketVersioningSys::prefix_suspended(&oi.bucket, &oi.name).await;
if lc_event.action.delete_all() {
opts.delete_prefix = true;
opts.delete_prefix_object = true;
}
// let time_ilm = ScannerMetrics::time_ilm(lc_event.action.clone());
let mut dobj = api
.delete_object(&oi.bucket, &encode_dir_object(&oi.name), opts)
.await
.unwrap();
if dobj.name.is_empty() {
dobj = oi.clone();
}
//let tags = LcAuditEvent::new(lc_event.clone(), src.clone()).tags();
//tags["version-id"] = dobj.version_id;
let mut event_name = EventName::ObjectRemovedDelete;
if oi.delete_marker {
event_name = EventName::ObjectRemovedDeleteMarkerCreated;
}
match lc_event.action {
lifecycle::IlmAction::DeleteAllVersionsAction => event_name = EventName::ObjectRemovedDeleteAllVersions,
lifecycle::IlmAction::DelMarkerDeleteAllVersionsAction => event_name = EventName::ILMDelMarkerExpirationDelete,
_ => (),
}
send_event(EventArgs {
event_name: event_name.as_ref().to_string(),
bucket_name: dobj.bucket.clone(),
object: dobj,
user_agent: "Internal: [ILM-Expiry]".to_string(),
host: GLOBAL_LocalNodeName.to_string(),
..Default::default()
});
if lc_event.action != lifecycle::IlmAction::NoneAction {
// let mut num_versions = 1_u64;
// if lc_event.action.delete_all() {
// num_versions = oi.num_versions as u64;
// }
// let _ = time_ilm(num_versions);
}
true
}
async fn apply_expiry_rule(event: &lifecycle::Event, src: &LcEventSrc, oi: &ObjectInfo) -> bool {
let mut expiry_state = GLOBAL_ExpiryState.write().await;
expiry_state.enqueue_by_days(oi, event, src).await;
true
}
pub async fn apply_lifecycle_action(event: &lifecycle::Event, src: &LcEventSrc, oi: &ObjectInfo) -> bool {
let mut success = false;
match event.action {
lifecycle::IlmAction::DeleteVersionAction
| lifecycle::IlmAction::DeleteAction
| lifecycle::IlmAction::DeleteRestoredAction
| lifecycle::IlmAction::DeleteRestoredVersionAction
| lifecycle::IlmAction::DeleteAllVersionsAction
| lifecycle::IlmAction::DelMarkerDeleteAllVersionsAction => {
success = apply_expiry_rule(event, src, oi).await;
}
lifecycle::IlmAction::TransitionAction | lifecycle::IlmAction::TransitionVersionAction => {
success = apply_transition_rule(event, src, oi).await;
}
_ => (),
}
success
}

View File

@@ -43,49 +43,7 @@ const _ERR_XML_NOT_WELL_FORMED: &str =
const ERR_LIFECYCLE_BUCKET_LOCKED: &str =
"ExpiredObjectAllVersions element and DelMarkerExpiration action cannot be used on an retention bucket";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IlmAction {
NoneAction = 0,
DeleteAction,
DeleteVersionAction,
TransitionAction,
TransitionVersionAction,
DeleteRestoredAction,
DeleteRestoredVersionAction,
DeleteAllVersionsAction,
DelMarkerDeleteAllVersionsAction,
ActionCount,
}
impl IlmAction {
pub fn delete_restored(&self) -> bool {
*self == Self::DeleteRestoredAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_versioned(&self) -> bool {
*self == Self::DeleteVersionAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_all(&self) -> bool {
*self == Self::DeleteAllVersionsAction || *self == Self::DelMarkerDeleteAllVersionsAction
}
pub fn delete(&self) -> bool {
if self.delete_restored() {
return true;
}
*self == Self::DeleteVersionAction
|| *self == Self::DeleteAction
|| *self == Self::DeleteAllVersionsAction
|| *self == Self::DelMarkerDeleteAllVersionsAction
}
}
impl Display for IlmAction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
pub use rustfs_common::metrics::IlmAction;
#[async_trait::async_trait]
pub trait RuleValidate {

View File

@@ -25,7 +25,7 @@ use std::ops::Sub;
use time::OffsetDateTime;
use tracing::{error, warn};
use crate::heal::data_usage_cache::TierStats;
use rustfs_common::data_usage::TierStats;
pub type DailyAllTierStats = HashMap<String, LastDayTierStats>;

View File

@@ -18,9 +18,9 @@ use crate::bucket::utils::{deserialize, is_meta_bucketname};
use crate::cmd::bucket_targets;
use crate::error::{Error, Result, is_err_bucket_not_found};
use crate::global::{GLOBAL_Endpoints, is_dist_erasure, is_erasure, new_object_layer_fn};
use crate::heal::heal_commands::HealOpts;
use crate::store::ECStore;
use futures::future::join_all;
use rustfs_common::heal_channel::HealOpts;
use rustfs_policy::policy::BucketPolicy;
use s3s::dto::{
BucketLifecycleConfiguration, NotificationConfiguration, ObjectLockConfiguration, ReplicationConfiguration,

View File

@@ -1,137 +0,0 @@
#![allow(unsafe_code)] // TODO: audit unsafe code
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{
fmt::Debug,
future::Future,
pin::Pin,
ptr,
sync::{
Arc,
atomic::{AtomicPtr, AtomicU64, Ordering},
},
time::{Duration, SystemTime, UNIX_EPOCH},
};
use tokio::{spawn, sync::Mutex};
use std::io::Result;
pub type UpdateFn<T> = Box<dyn Fn() -> Pin<Box<dyn Future<Output = Result<T>> + Send>> + Send + Sync + 'static>;
#[derive(Clone, Debug, Default)]
pub struct Opts {
return_last_good: bool,
no_wait: bool,
}
pub struct Cache<T: Clone + Debug + Send> {
update_fn: UpdateFn<T>,
ttl: Duration,
opts: Opts,
val: AtomicPtr<T>,
last_update_ms: AtomicU64,
updating: Arc<Mutex<bool>>,
}
impl<T: Clone + Debug + Send + 'static> Cache<T> {
pub fn new(update_fn: UpdateFn<T>, ttl: Duration, opts: Opts) -> Self {
let val = AtomicPtr::new(ptr::null_mut());
Self {
update_fn,
ttl,
opts,
val,
last_update_ms: AtomicU64::new(0),
updating: Arc::new(Mutex::new(false)),
}
}
pub async fn get(self: Arc<Self>) -> Result<T> {
let v_ptr = self.val.load(Ordering::SeqCst);
let v = if v_ptr.is_null() {
None
} else {
Some(unsafe { (*v_ptr).clone() })
};
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
if now - self.last_update_ms.load(Ordering::SeqCst) < self.ttl.as_secs() {
if let Some(v) = v {
return Ok(v);
}
}
if self.opts.no_wait && v.is_some() && now - self.last_update_ms.load(Ordering::SeqCst) < self.ttl.as_secs() * 2 {
if self.updating.try_lock().is_ok() {
let this = Arc::clone(&self);
spawn(async move {
let _ = this.update().await;
});
}
return Ok(v.unwrap());
}
let _ = self.updating.lock().await;
if let Ok(duration) =
SystemTime::now().duration_since(UNIX_EPOCH + Duration::from_secs(self.last_update_ms.load(Ordering::SeqCst)))
{
if duration < self.ttl {
return Ok(v.unwrap());
}
}
match self.update().await {
Ok(_) => {
let v_ptr = self.val.load(Ordering::SeqCst);
let v = if v_ptr.is_null() {
None
} else {
Some(unsafe { (*v_ptr).clone() })
};
Ok(v.unwrap())
}
Err(err) => Err(err),
}
}
async fn update(&self) -> Result<()> {
match (self.update_fn)().await {
Ok(val) => {
self.val.store(Box::into_raw(Box::new(val)), Ordering::SeqCst);
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
self.last_update_ms.store(now, Ordering::SeqCst);
Ok(())
}
Err(err) => {
let v_ptr = self.val.load(Ordering::SeqCst);
if self.opts.return_last_good && !v_ptr.is_null() {
return Ok(());
}
Err(err)
}
}
}
}

View File

@@ -12,5 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// pub mod cache;
use std::sync::Arc;
use lazy_static::lazy_static;
use tokio_util::sync::CancellationToken;
pub mod metacache_set;
lazy_static! {
pub static ref LIST_PATH_RAW_CANCEL_TOKEN: Arc<CancellationToken> = Arc::new(CancellationToken::new());
}

View File

@@ -18,7 +18,7 @@ use crate::bucket::versioning::VersioningApi;
use crate::bucket::versioning_sys::BucketVersioningSys;
use crate::store::ECStore;
use crate::store_api::{ObjectOptions, ObjectToDelete};
use rustfs_lock::local_locker::MAX_DELETE_LIST;
use rustfs_lock::MAX_DELETE_LIST;
pub async fn delete_object_versions(api: ECStore, bucket: &str, to_del: &[ObjectToDelete], _lc_event: lifecycle::Event) {
let mut remaining = to_del;

View File

@@ -0,0 +1,297 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, sync::Arc};
use crate::{bucket::metadata_sys::get_replication_config, config::com::read_config, store::ECStore};
use rustfs_common::data_usage::{BucketTargetUsageInfo, DataUsageCache, DataUsageEntry, DataUsageInfo, SizeSummary};
use rustfs_utils::path::SLASH_SEPARATOR;
use tracing::{error, warn};
use crate::error::Error;
// Data usage storage constants
pub const DATA_USAGE_ROOT: &str = SLASH_SEPARATOR;
const DATA_USAGE_OBJ_NAME: &str = ".usage.json";
const DATA_USAGE_BLOOM_NAME: &str = ".bloomcycle.bin";
pub const DATA_USAGE_CACHE_NAME: &str = ".usage-cache.bin";
// Data usage storage paths
lazy_static::lazy_static! {
pub static ref DATA_USAGE_BUCKET: String = format!("{}{}{}",
crate::disk::RUSTFS_META_BUCKET,
SLASH_SEPARATOR,
crate::disk::BUCKET_META_PREFIX
);
pub static ref DATA_USAGE_OBJ_NAME_PATH: String = format!("{}{}{}",
crate::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_OBJ_NAME
);
pub static ref DATA_USAGE_BLOOM_NAME_PATH: String = format!("{}{}{}",
crate::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_BLOOM_NAME
);
}
/// Store data usage info to backend storage
pub async fn store_data_usage_in_backend(data_usage_info: DataUsageInfo, store: Arc<ECStore>) -> Result<(), Error> {
let data =
serde_json::to_vec(&data_usage_info).map_err(|e| Error::other(format!("Failed to serialize data usage info: {e}")))?;
// Save to backend using the same mechanism as original code
crate::config::com::save_config(store, &DATA_USAGE_OBJ_NAME_PATH, data)
.await
.map_err(Error::other)?;
Ok(())
}
/// Load data usage info from backend storage
pub async fn load_data_usage_from_backend(store: Arc<ECStore>) -> Result<DataUsageInfo, Error> {
let buf: Vec<u8> = match read_config(store, &DATA_USAGE_OBJ_NAME_PATH).await {
Ok(data) => data,
Err(e) => {
error!("Failed to read data usage info from backend: {}", e);
if e == crate::error::Error::ConfigNotFound {
return Ok(DataUsageInfo::default());
}
return Err(Error::other(e));
}
};
let mut data_usage_info: DataUsageInfo =
serde_json::from_slice(&buf).map_err(|e| Error::other(format!("Failed to deserialize data usage info: {e}")))?;
warn!("Loaded data usage info from backend {:?}", &data_usage_info);
// Handle backward compatibility like original code
if data_usage_info.buckets_usage.is_empty() {
data_usage_info.buckets_usage = data_usage_info
.bucket_sizes
.iter()
.map(|(bucket, &size)| {
(
bucket.clone(),
rustfs_common::data_usage::BucketUsageInfo {
size,
..Default::default()
},
)
})
.collect();
}
if data_usage_info.bucket_sizes.is_empty() {
data_usage_info.bucket_sizes = data_usage_info
.buckets_usage
.iter()
.map(|(bucket, bui)| (bucket.clone(), bui.size))
.collect();
}
for (bucket, bui) in &data_usage_info.buckets_usage {
if bui.replicated_size_v1 > 0
|| bui.replication_failed_count_v1 > 0
|| bui.replication_failed_size_v1 > 0
|| bui.replication_pending_count_v1 > 0
{
if let Ok((cfg, _)) = get_replication_config(bucket).await {
if !cfg.role.is_empty() {
data_usage_info.replication_info.insert(
cfg.role.clone(),
BucketTargetUsageInfo {
replication_failed_size: bui.replication_failed_size_v1,
replication_failed_count: bui.replication_failed_count_v1,
replicated_size: bui.replicated_size_v1,
replication_pending_count: bui.replication_pending_count_v1,
replication_pending_size: bui.replication_pending_size_v1,
..Default::default()
},
);
}
}
}
}
Ok(data_usage_info)
}
/// Create a data usage cache entry from size summary
pub fn create_cache_entry_from_summary(summary: &SizeSummary) -> DataUsageEntry {
let mut entry = DataUsageEntry::default();
entry.add_sizes(summary);
entry
}
/// Convert data usage cache to DataUsageInfo
pub fn cache_to_data_usage_info(cache: &DataUsageCache, path: &str, buckets: &[crate::store_api::BucketInfo]) -> DataUsageInfo {
let e = match cache.find(path) {
Some(e) => e,
None => return DataUsageInfo::default(),
};
let flat = cache.flatten(&e);
let mut buckets_usage = HashMap::new();
for bucket in buckets.iter() {
let e = match cache.find(&bucket.name) {
Some(e) => e,
None => continue,
};
let flat = cache.flatten(&e);
let mut bui = rustfs_common::data_usage::BucketUsageInfo {
size: flat.size as u64,
versions_count: flat.versions as u64,
objects_count: flat.objects as u64,
delete_markers_count: flat.delete_markers as u64,
object_size_histogram: flat.obj_sizes.to_map(),
object_versions_histogram: flat.obj_versions.to_map(),
..Default::default()
};
if let Some(rs) = &flat.replication_stats {
bui.replica_size = rs.replica_size;
bui.replica_count = rs.replica_count;
for (arn, stat) in rs.targets.iter() {
bui.replication_info.insert(
arn.clone(),
BucketTargetUsageInfo {
replication_pending_size: stat.pending_size,
replicated_size: stat.replicated_size,
replication_failed_size: stat.failed_size,
replication_pending_count: stat.pending_count,
replication_failed_count: stat.failed_count,
replicated_count: stat.replicated_count,
..Default::default()
},
);
}
}
buckets_usage.insert(bucket.name.clone(), bui);
}
DataUsageInfo {
last_update: cache.info.last_update,
objects_total_count: flat.objects as u64,
versions_total_count: flat.versions as u64,
delete_markers_total_count: flat.delete_markers as u64,
objects_total_size: flat.size as u64,
buckets_count: e.children.len() as u64,
buckets_usage,
..Default::default()
}
}
// Helper functions for DataUsageCache operations
pub async fn load_data_usage_cache(store: &crate::set_disk::SetDisks, name: &str) -> crate::error::Result<DataUsageCache> {
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::store_api::{ObjectIO, ObjectOptions};
use http::HeaderMap;
use rand::Rng;
use std::path::Path;
use std::time::Duration;
use tokio::time::sleep;
let mut d = DataUsageCache::default();
let mut retries = 0;
while retries < 5 {
let path = Path::new(BUCKET_META_PREFIX).join(name);
match store
.get_object_reader(
RUSTFS_META_BUCKET,
path.to_str().unwrap(),
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = DataUsageCache::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(err) => match err {
crate::error::Error::FileNotFound | crate::error::Error::VolumeNotFound => {
match store
.get_object_reader(
RUSTFS_META_BUCKET,
name,
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = DataUsageCache::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(_) => match err {
crate::error::Error::FileNotFound | crate::error::Error::VolumeNotFound => {
break;
}
_ => {}
},
}
}
_ => {
break;
}
},
}
retries += 1;
let dur = {
let mut rng = rand::rng();
rng.random_range(0..1_000)
};
sleep(Duration::from_millis(dur)).await;
}
Ok(d)
}
pub async fn save_data_usage_cache(cache: &DataUsageCache, name: &str) -> crate::error::Result<()> {
use crate::config::com::save_config;
use crate::disk::BUCKET_META_PREFIX;
use crate::new_object_layer_fn;
use std::path::Path;
let Some(store) = new_object_layer_fn() else {
return Err(crate::error::Error::other("errServerNotInitialized"));
};
let buf = cache.marshal_msg().map_err(crate::error::Error::other)?;
let buf_clone = buf.clone();
let store_clone = store.clone();
let name = Path::new(BUCKET_META_PREFIX).join(name).to_string_lossy().to_string();
let name_clone = name.clone();
tokio::spawn(async move {
let _ = save_config(store_clone, &format!("{}{}", &name_clone, ".bkp"), buf_clone).await;
});
save_config(store, &name, buf).await?;
Ok(())
}

View File

@@ -21,9 +21,6 @@ use super::{
};
use super::{endpoint::Endpoint, error::DiskError, format::FormatV3};
use crate::bucket::metadata_sys::{self};
use crate::bucket::versioning::VersioningApi;
use crate::bucket::versioning_sys::BucketVersioningSys;
use crate::disk::error::FileAccessDeniedWithContext;
use crate::disk::error_conv::{to_access_error, to_file_error, to_unformatted_disk_error, to_volume_error};
use crate::disk::fs::{
@@ -36,16 +33,6 @@ use crate::disk::{
};
use crate::disk::{FileWriter, STORAGE_FORMAT_FILE};
use crate::global::{GLOBAL_IsErasureSD, GLOBAL_RootDiskThreshold};
use crate::heal::data_scanner::{
ScannerItem, ShouldSleepFn, SizeSummary, lc_has_active_rules, rep_has_active_rules, scan_data_folder,
};
use crate::heal::data_scanner_metric::{ScannerMetric, ScannerMetrics};
use crate::heal::data_usage_cache::{DataUsageCache, DataUsageEntry};
use crate::heal::error::{ERR_IGNORE_FILE_CONTRIB, ERR_SKIP_FILE};
use crate::heal::heal_commands::{HealScanMode, HealingTracker};
use crate::heal::heal_ops::HEALING_TRACKER_FILENAME;
use crate::new_object_layer_fn;
use crate::store_api::{ObjectInfo, StorageAPI};
use rustfs_utils::path::{
GLOBAL_DIR_SUFFIX, GLOBAL_DIR_SUFFIX_WITH_SLASH, SLASH_SEPARATOR, clean, decode_dir_object, encode_dir_object, has_suffix,
path_join, path_join_buf,
@@ -55,19 +42,18 @@ use tokio::time::interval;
use crate::erasure_coding::bitrot_verify;
use bytes::Bytes;
use path_absolutize::Absolutize;
use rustfs_common::defer;
use rustfs_filemeta::{
Cache, FileInfo, FileInfoOpts, FileMeta, MetaCacheEntry, MetacacheWriter, ObjectPartInfo, Opts, RawFileInfo, UpdateFn,
get_file_info, read_xl_meta_no_data,
};
use rustfs_utils::HashAlgorithm;
use rustfs_utils::os::get_info;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt::Debug;
use std::io::SeekFrom;
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use std::time::{Duration, SystemTime};
use std::time::Duration;
use std::{
fs::Metadata,
path::{Path, PathBuf},
@@ -76,7 +62,6 @@ use time::OffsetDateTime;
use tokio::fs::{self, File};
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt, ErrorKind};
use tokio::sync::RwLock;
use tokio::sync::mpsc::Sender;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
@@ -1705,6 +1690,15 @@ impl DiskAPI for LocalDisk {
};
out.write_obj(&meta).await?;
objs_returned += 1;
} else {
let fpath =
self.get_object_path(&opts.bucket, path_join_buf(&[opts.base_dir.as_str(), STORAGE_FORMAT_FILE]).as_str())?;
if let Ok(meta) = tokio::fs::metadata(fpath).await
&& meta.is_file()
{
return Err(DiskError::FileNotFound);
}
}
}
@@ -2268,184 +2262,6 @@ impl DiskAPI for LocalDisk {
Ok(info)
}
#[tracing::instrument(level = "info", skip_all)]
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
self.scanning.fetch_add(1, Ordering::SeqCst);
defer!(|| { self.scanning.fetch_sub(1, Ordering::SeqCst) });
// must before metadata_sys
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let mut cache = cache.clone();
// Check if the current bucket has a configured lifecycle policy
if let Ok((lc, _)) = metadata_sys::get_lifecycle_config(&cache.info.name).await {
if lc_has_active_rules(&lc, "") {
cache.info.lifecycle = Some(lc);
}
}
// Check if the current bucket has replication configuration
if let Ok((rcfg, _)) = metadata_sys::get_replication_config(&cache.info.name).await {
if rep_has_active_rules(&rcfg, "", true) {
// TODO: globalBucketTargetSys
}
}
let vcfg = BucketVersioningSys::get(&cache.info.name).await.ok();
let loc = self.get_disk_location();
// TODO: 这里需要处理错误
let disks = store
.get_disks(loc.pool_idx.unwrap(), loc.disk_idx.unwrap())
.await
.map_err(|e| Error::other(e.to_string()))?;
let disk = Arc::new(LocalDisk::new(&self.endpoint(), false).await?);
let disk_clone = disk.clone();
cache.info.updates = Some(updates.clone());
let mut data_usage_info = scan_data_folder(
&disks,
disk,
&cache,
Box::new(move |item: &ScannerItem| {
let mut item = item.clone();
let disk = disk_clone.clone();
let vcfg = vcfg.clone();
Box::pin(async move {
if !item.path.ends_with(&format!("{SLASH_SEPARATOR}{STORAGE_FORMAT_FILE}")) {
return Err(Error::other(ERR_SKIP_FILE).into());
}
let stop_fn = ScannerMetrics::log(ScannerMetric::ScanObject);
let mut res = HashMap::new();
let done_sz = ScannerMetrics::time_size(ScannerMetric::ReadMetadata);
let buf = match disk.read_metadata(item.path.clone()).await {
Ok(buf) => buf,
Err(err) => {
res.insert("err".to_string(), err.to_string());
stop_fn(&res);
return Err(Error::other(ERR_SKIP_FILE).into());
}
};
done_sz(buf.len() as u64);
res.insert("metasize".to_string(), buf.len().to_string());
item.transform_meta_dir();
let meta_cache = MetaCacheEntry {
name: item.object_path().to_string_lossy().to_string(),
metadata: buf,
..Default::default()
};
let fivs = match meta_cache.file_info_versions(&item.bucket) {
Ok(fivs) => fivs,
Err(err) => {
res.insert("err".to_string(), err.to_string());
stop_fn(&res);
return Err(Error::other(ERR_SKIP_FILE).into());
}
};
let mut size_s = SizeSummary::default();
let done = ScannerMetrics::time(ScannerMetric::ApplyAll);
let obj_infos = match item.apply_versions_actions(&fivs.versions).await {
Ok(obj_infos) => obj_infos,
Err(err) => {
res.insert("err".to_string(), err.to_string());
stop_fn(&res);
return Err(Error::other(ERR_SKIP_FILE).into());
}
};
let versioned = if let Some(vcfg) = vcfg.as_ref() {
vcfg.versioned(item.object_path().to_str().unwrap_or_default())
} else {
false
};
let mut obj_deleted = false;
for info in obj_infos.iter() {
let done = ScannerMetrics::time(ScannerMetric::ApplyVersion);
let sz: i64;
(obj_deleted, sz) = item.apply_actions(info, &mut size_s).await;
done();
if obj_deleted {
break;
}
let actual_sz = match info.get_actual_size() {
Ok(size) => size,
Err(_) => continue,
};
if info.delete_marker {
size_s.delete_markers += 1;
}
if info.version_id.is_some() && sz == actual_sz {
size_s.versions += 1;
}
size_s.total_size += sz as usize;
if info.delete_marker {
continue;
}
}
for free_version in fivs.free_versions.iter() {
let _obj_info = ObjectInfo::from_file_info(
free_version,
&item.bucket,
&item.object_path().to_string_lossy(),
versioned,
);
let done = ScannerMetrics::time(ScannerMetric::TierObjSweep);
done();
}
// todo: global trace
if obj_deleted {
return Err(Error::other(ERR_IGNORE_FILE_CONTRIB).into());
}
done();
Ok(size_s)
})
}),
scan_mode,
we_sleep,
)
.await?;
data_usage_info.info.last_update = Some(SystemTime::now());
debug!("ns_scanner completed: {data_usage_info:?}");
Ok(data_usage_info)
}
#[tracing::instrument(skip(self))]
async fn healing(&self) -> Option<HealingTracker> {
let healing_file = path_join(&[
self.path(),
PathBuf::from(RUSTFS_META_BUCKET),
PathBuf::from(BUCKET_META_PREFIX),
PathBuf::from(HEALING_TRACKER_FILENAME),
]);
let b = match fs::read(healing_file).await {
Ok(b) => b,
Err(_) => return None,
};
if b.is_empty() {
return None;
}
match HealingTracker::unmarshal_msg(&b) {
Ok(h) => Some(h),
Err(_) => Some(HealingTracker::default()),
}
}
}
async fn get_disk_info(drive_path: PathBuf) -> Result<(rustfs_utils::os::DiskInfo, bool)> {

View File

@@ -30,11 +30,6 @@ pub const FORMAT_CONFIG_FILE: &str = "format.json";
pub const STORAGE_FORMAT_FILE: &str = "xl.meta";
pub const STORAGE_FORMAT_FILE_BACKUP: &str = "xl.meta.bkp";
use crate::heal::{
data_scanner::ShouldSleepFn,
data_usage_cache::{DataUsageCache, DataUsageEntry},
heal_commands::{HealScanMode, HealingTracker},
};
use crate::rpc::RemoteDisk;
use bytes::Bytes;
use endpoint::Endpoint;
@@ -46,10 +41,7 @@ use rustfs_madmin::info_commands::DiskMetrics;
use serde::{Deserialize, Serialize};
use std::{fmt::Debug, path::PathBuf, sync::Arc};
use time::OffsetDateTime;
use tokio::{
io::{AsyncRead, AsyncWrite},
sync::mpsc::Sender,
};
use tokio::io::{AsyncRead, AsyncWrite};
use uuid::Uuid;
pub type DiskStore = Arc<Disk>;
@@ -406,28 +398,6 @@ impl DiskAPI for Disk {
Disk::Remote(remote_disk) => remote_disk.disk_info(opts).await,
}
}
#[tracing::instrument(skip(self, cache, we_sleep, scan_mode))]
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
match self {
Disk::Local(local_disk) => local_disk.ns_scanner(cache, updates, scan_mode, we_sleep).await,
Disk::Remote(remote_disk) => remote_disk.ns_scanner(cache, updates, scan_mode, we_sleep).await,
}
}
#[tracing::instrument(skip(self))]
async fn healing(&self) -> Option<HealingTracker> {
match self {
Disk::Local(local_disk) => local_disk.healing().await,
Disk::Remote(remote_disk) => remote_disk.healing().await,
}
}
}
pub async fn new_disk(ep: &Endpoint, opt: &DiskOption) -> Result<DiskStore> {
@@ -527,14 +497,6 @@ pub trait DiskAPI: Debug + Send + Sync + 'static {
async fn write_all(&self, volume: &str, path: &str, data: Bytes) -> Result<()>;
async fn read_all(&self, volume: &str, path: &str) -> Result<Bytes>;
async fn disk_info(&self, opts: &DiskInfoOptions) -> Result<DiskInfo>;
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache>;
async fn healing(&self) -> Option<HealingTracker>;
}
#[derive(Debug, Default, Serialize, Deserialize)]

View File

@@ -49,7 +49,8 @@ pub fn check_path_length(path_name: &str) -> Result<()> {
let mut count = 0usize;
for c in path_name.chars() {
match c {
'/' | '\\' if cfg!(target_os = "windows") => count = 0, // Reset
'/' => count = 0,
'\\' if cfg!(target_os = "windows") => count = 0, // Reset
_ => {
count += 1;
if count > 255 {

View File

@@ -183,6 +183,9 @@ pub enum StorageError {
#[error("Io error: {0}")]
Io(std::io::Error),
#[error("Lock error: {0}")]
Lock(#[from] rustfs_lock::LockError),
}
impl StorageError {
@@ -409,6 +412,7 @@ impl Clone for StorageError {
StorageError::FirstDiskWait => StorageError::FirstDiskWait,
StorageError::TooManyOpenFiles => StorageError::TooManyOpenFiles,
StorageError::NoHealRequired => StorageError::NoHealRequired,
StorageError::Lock(e) => StorageError::Lock(e.clone()),
}
}
}
@@ -471,6 +475,7 @@ impl StorageError {
StorageError::ConfigNotFound => 0x35,
StorageError::TooManyOpenFiles => 0x36,
StorageError::NoHealRequired => 0x37,
StorageError::Lock(_) => 0x38,
}
}
@@ -535,6 +540,7 @@ impl StorageError {
0x35 => Some(StorageError::ConfigNotFound),
0x36 => Some(StorageError::TooManyOpenFiles),
0x37 => Some(StorageError::NoHealRequired),
0x38 => Some(StorageError::Lock(rustfs_lock::LockError::internal("Generic lock error".to_string()))),
_ => None,
}
}

View File

@@ -12,13 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::heal::mrf::MRFState;
use crate::{
bucket::lifecycle::bucket_lifecycle_ops::LifecycleSys,
disk::DiskStore,
endpoints::{EndpointServerPools, PoolEndpoints, SetupType},
event_notification::EventNotifier,
heal::{background_heal_ops::HealRoutine, heal_ops::AllHealState},
store::ECStore,
tier::tier::TierConfigMgr,
};
@@ -51,14 +49,10 @@ pub static ref GLOBAL_LOCAL_DISK_MAP: Arc<RwLock<HashMap<String, Option<DiskStor
pub static ref GLOBAL_LOCAL_DISK_SET_DRIVES: Arc<RwLock<TypeLocalDiskSetDrives>> = Arc::new(RwLock::new(Vec::new()));
pub static ref GLOBAL_Endpoints: OnceLock<EndpointServerPools> = OnceLock::new();
pub static ref GLOBAL_RootDiskThreshold: RwLock<u64> = RwLock::new(0);
pub static ref GLOBAL_BackgroundHealRoutine: Arc<HealRoutine> = HealRoutine::new();
pub static ref GLOBAL_BackgroundHealState: Arc<AllHealState> = AllHealState::new(false);
pub static ref GLOBAL_TierConfigMgr: Arc<RwLock<TierConfigMgr>> = TierConfigMgr::new();
pub static ref GLOBAL_LifecycleSys: Arc<LifecycleSys> = LifecycleSys::new();
pub static ref GLOBAL_EventNotifier: Arc<RwLock<EventNotifier>> = EventNotifier::new();
//pub static ref GLOBAL_RemoteTargetTransport
pub static ref GLOBAL_ALlHealState: Arc<AllHealState> = AllHealState::new(false);
pub static ref GLOBAL_MRFState: Arc<MRFState> = Arc::new(MRFState::new());
static ref globalDeploymentIDPtr: OnceLock<Uuid> = OnceLock::new();
pub static ref GLOBAL_BOOT_TIME: OnceCell<SystemTime> = OnceCell::new();
pub static ref GLOBAL_LocalNodeName: String = "127.0.0.1:9000".to_string();

View File

@@ -1,512 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use futures::future::join_all;
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_utils::path::{SLASH_SEPARATOR, path_join};
use std::{cmp::Ordering, env, path::PathBuf, sync::Arc, time::Duration};
use tokio::{
spawn,
sync::{
RwLock,
mpsc::{self, Receiver, Sender},
},
time::interval,
};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
use uuid::Uuid;
use super::{
heal_commands::HealOpts,
heal_ops::{HealSequence, new_bg_heal_sequence},
};
use crate::error::{Error, Result};
use crate::global::{GLOBAL_MRFState, get_background_services_cancel_token};
use crate::heal::error::ERR_RETRY_HEALING;
use crate::heal::heal_commands::{HEAL_ITEM_BUCKET, HealScanMode};
use crate::heal::heal_ops::{BG_HEALING_UUID, HealSource};
use crate::{
config::RUSTFS_CONFIG_PREFIX,
disk::{BUCKET_META_PREFIX, DiskAPI, DiskInfoOptions, RUSTFS_META_BUCKET, endpoint::Endpoint, error::DiskError},
global::{GLOBAL_BackgroundHealRoutine, GLOBAL_BackgroundHealState, GLOBAL_LOCAL_DISK_MAP},
heal::{
data_usage::{DATA_USAGE_CACHE_NAME, DATA_USAGE_ROOT},
data_usage_cache::DataUsageCache,
heal_commands::{init_healing_tracker, load_healing_tracker},
heal_ops::NOP_HEAL,
},
new_object_layer_fn,
store::get_disk_via_endpoint,
store_api::{BucketInfo, BucketOptions, StorageAPI},
};
pub static DEFAULT_MONITOR_NEW_DISK_INTERVAL: Duration = Duration::from_secs(10);
pub async fn init_auto_heal() {
info!("Initializing auto heal background task");
let Some(cancel_token) = get_background_services_cancel_token() else {
error!("Background services cancel token not initialized");
return;
};
init_background_healing().await;
let v = env::var("_RUSTFS_AUTO_DRIVE_HEALING").unwrap_or("on".to_string());
if v == "on" {
info!("start monitor local disks and heal");
GLOBAL_BackgroundHealState
.push_heal_local_disks(&get_local_disks_to_heal().await)
.await;
let cancel_clone = cancel_token.clone();
spawn(async move {
monitor_local_disks_and_heal(cancel_clone).await;
});
}
let cancel_clone = cancel_token.clone();
spawn(async move {
GLOBAL_MRFState.heal_routine_with_cancel(cancel_clone).await;
});
}
async fn init_background_healing() {
let bg_seq = Arc::new(new_bg_heal_sequence());
for _ in 0..GLOBAL_BackgroundHealRoutine.workers {
let bg_seq_clone = bg_seq.clone();
spawn(async {
GLOBAL_BackgroundHealRoutine.add_worker(bg_seq_clone).await;
});
}
let _ = GLOBAL_BackgroundHealState.launch_new_heal_sequence(bg_seq).await;
}
pub async fn get_local_disks_to_heal() -> Vec<Endpoint> {
let mut disks_to_heal = Vec::new();
for (_, disk) in GLOBAL_LOCAL_DISK_MAP.read().await.iter() {
if let Some(disk) = disk {
if let Err(err) = disk.disk_info(&DiskInfoOptions::default()).await {
if err == DiskError::UnformattedDisk {
info!("get_local_disks_to_heal, disk is unformatted: {}", err);
disks_to_heal.push(disk.endpoint());
}
}
let h = disk.healing().await;
if let Some(h) = h {
if !h.finished {
info!("get_local_disks_to_heal, disk healing not finished");
disks_to_heal.push(disk.endpoint());
}
}
}
}
// todo
// if disks_to_heal.len() == GLOBAL_Endpoints.read().await.n {
// }
disks_to_heal
}
async fn monitor_local_disks_and_heal(cancel_token: CancellationToken) {
info!("Auto heal monitor started");
let mut interval = interval(DEFAULT_MONITOR_NEW_DISK_INTERVAL);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("Auto heal monitor received shutdown signal, exiting gracefully");
break;
}
_ = interval.tick() => {
let heal_disks = GLOBAL_BackgroundHealState.get_heal_local_disk_endpoints().await;
if heal_disks.is_empty() {
info!("heal local disks is empty");
interval.reset();
continue;
}
info!("heal local disks: {:?}", heal_disks);
let store = new_object_layer_fn().expect("errServerNotInitialized");
if let (_result, Some(err)) = store.heal_format(false).await.expect("heal format failed") {
error!("heal local disk format error: {}", err);
if err == Error::NoHealRequired {
} else {
info!("heal format err: {}", err.to_string());
interval.reset();
continue;
}
}
let mut futures = Vec::new();
for disk in heal_disks.into_ref().iter() {
let disk_clone = disk.clone();
let cancel_clone = cancel_token.clone();
futures.push(async move {
let disk_for_cancel = disk_clone.clone();
tokio::select! {
_ = cancel_clone.cancelled() => {
info!("Disk healing task cancelled for disk: {}", disk_for_cancel);
}
_ = async {
GLOBAL_BackgroundHealState
.set_disk_healing_status(disk_clone.clone(), true)
.await;
if heal_fresh_disk(&disk_clone).await.is_err() {
info!("heal_fresh_disk is err");
GLOBAL_BackgroundHealState
.set_disk_healing_status(disk_clone.clone(), false)
.await;
}
GLOBAL_BackgroundHealState.pop_heal_local_disks(&[disk_clone]).await;
} => {}
}
});
}
let _ = join_all(futures).await;
interval.reset();
}
}
}
}
async fn heal_fresh_disk(endpoint: &Endpoint) -> Result<()> {
let (pool_idx, set_idx) = (endpoint.pool_idx as usize, endpoint.set_idx as usize);
let disk = match get_disk_via_endpoint(endpoint).await {
Some(disk) => disk,
None => {
return Err(Error::other(format!(
"Unexpected error disk must be initialized by now after formatting: {endpoint}"
)));
}
};
if let Err(err) = disk.disk_info(&DiskInfoOptions::default()).await {
match err {
DiskError::DriveIsRoot => {
return Ok(());
}
DiskError::UnformattedDisk => {}
_ => {
return Err(err.into());
}
}
}
let mut tracker = match load_healing_tracker(&Some(disk.clone())).await {
Ok(tracker) => tracker,
Err(err) => {
match err {
DiskError::FileNotFound => {
return Ok(());
}
_ => {
info!(
"Unable to load healing tracker on '{}': {}, re-initializing..",
disk.to_string(),
err.to_string()
);
}
}
init_healing_tracker(disk.clone(), &Uuid::new_v4().to_string()).await?
}
};
info!(
"Healing drive '{}' - 'mc admin heal alias/ --verbose' to check the current status.",
endpoint.to_string()
);
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let mut buckets = store.list_bucket(&BucketOptions::default()).await?;
buckets.push(BucketInfo {
name: path_join(&[PathBuf::from(RUSTFS_META_BUCKET), PathBuf::from(RUSTFS_CONFIG_PREFIX)])
.to_string_lossy()
.to_string(),
..Default::default()
});
buckets.push(BucketInfo {
name: path_join(&[PathBuf::from(RUSTFS_META_BUCKET), PathBuf::from(BUCKET_META_PREFIX)])
.to_string_lossy()
.to_string(),
..Default::default()
});
buckets.sort_by(|a, b| {
let a_has_prefix = a.name.starts_with(RUSTFS_META_BUCKET);
let b_has_prefix = b.name.starts_with(RUSTFS_META_BUCKET);
match (a_has_prefix, b_has_prefix) {
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
_ => b.created.cmp(&a.created),
}
});
if let Ok(cache) = DataUsageCache::load(&store.pools[pool_idx].disk_set[set_idx], DATA_USAGE_CACHE_NAME).await {
let data_usage_info = cache.dui(DATA_USAGE_ROOT, &Vec::new());
tracker.objects_total_count = data_usage_info.objects_total_count;
tracker.objects_total_size = data_usage_info.objects_total_size;
};
tracker.set_queue_buckets(&buckets).await;
tracker.save().await?;
let tracker = Arc::new(RwLock::new(tracker));
let qb = tracker.read().await.queue_buckets.clone();
store.pools[pool_idx].disk_set[set_idx]
.clone()
.heal_erasure_set(&qb, tracker.clone())
.await?;
let mut tracker_w = tracker.write().await;
if tracker_w.items_failed > 0 && tracker_w.retry_attempts < 4 {
tracker_w.retry_attempts += 1;
tracker_w.reset_healing().await;
if let Err(err) = tracker_w.update().await {
info!("update tracker failed: {}", err.to_string());
}
return Err(Error::other(ERR_RETRY_HEALING));
}
if tracker_w.items_failed > 0 {
info!(
"Healing of drive '{}' is incomplete, retried {} times (healed: {}, skipped: {}, failed: {}).",
disk.to_string(),
tracker_w.retry_attempts,
tracker_w.items_healed,
tracker_w.item_skipped,
tracker_w.items_failed
);
} else if tracker_w.retry_attempts > 0 {
info!(
"Healing of drive '{}' is incomplete, retried {} times (healed: {}, skipped: {}).",
disk.to_string(),
tracker_w.retry_attempts,
tracker_w.items_healed,
tracker_w.item_skipped
);
} else {
info!(
"Healing of drive '{}' is finished (healed: {}, skipped: {}).",
disk.to_string(),
tracker_w.items_healed,
tracker_w.item_skipped
);
}
if tracker_w.heal_id.is_empty() {
if let Err(err) = tracker_w.delete().await {
error!("delete tracker failed: {}", err.to_string());
}
}
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let disks = store.get_disks(pool_idx, set_idx).await?;
for disk in disks.into_iter() {
if disk.is_none() {
continue;
}
let mut tracker = match load_healing_tracker(&disk).await {
Ok(tracker) => tracker,
Err(err) => {
match err {
DiskError::FileNotFound => {}
_ => {
info!("Unable to load healing tracker on '{:?}': {}, re-initializing..", disk, err.to_string());
}
}
continue;
}
};
if tracker.heal_id == tracker_w.heal_id {
tracker.finished = true;
tracker.update().await?;
}
}
Ok(())
}
#[derive(Debug)]
pub struct HealTask {
pub bucket: String,
pub object: String,
pub version_id: String,
pub opts: HealOpts,
pub resp_tx: Option<Sender<HealResult>>,
pub resp_rx: Option<Receiver<HealResult>>,
}
impl HealTask {
pub fn new(bucket: &str, object: &str, version_id: &str, opts: &HealOpts) -> Self {
Self {
bucket: bucket.to_string(),
object: object.to_string(),
version_id: version_id.to_string(),
opts: *opts,
resp_tx: None,
resp_rx: None,
}
}
}
#[derive(Debug)]
pub struct HealResult {
pub result: HealResultItem,
pub err: Option<Error>,
}
pub struct HealRoutine {
pub tasks_tx: Sender<HealTask>,
tasks_rx: RwLock<Receiver<HealTask>>,
workers: usize,
}
impl HealRoutine {
pub fn new() -> Arc<Self> {
let mut workers = num_cpus::get() / 2;
if let Ok(env_heal_workers) = env::var("_RUSTFS_HEAL_WORKERS") {
if let Ok(num_healers) = env_heal_workers.parse::<usize>() {
workers = num_healers;
}
}
if workers == 0 {
workers = 4;
}
let (tx, rx) = mpsc::channel(100);
Arc::new(Self {
tasks_tx: tx,
tasks_rx: RwLock::new(rx),
workers,
})
}
pub async fn add_worker(&self, bgseq: Arc<HealSequence>) {
loop {
let mut d_res = HealResultItem::default();
let d_err: Option<Error>;
match self.tasks_rx.write().await.recv().await {
Some(task) => {
info!("got task: {:?}", task);
if task.bucket == NOP_HEAL {
d_err = Some(Error::other("skip file"));
} else if task.bucket == SLASH_SEPARATOR {
match heal_disk_format(task.opts).await {
Ok((res, err)) => {
d_res = res;
d_err = err;
}
Err(err) => d_err = Some(err),
}
} else {
let store = new_object_layer_fn().expect("errServerNotInitialized");
if task.object.is_empty() {
match store.heal_bucket(&task.bucket, &task.opts).await {
Ok(res) => {
d_res = res;
d_err = None;
}
Err(err) => d_err = Some(err),
}
} else {
match store
.heal_object(&task.bucket, &task.object, &task.version_id, &task.opts)
.await
{
Ok((res, err)) => {
d_res = res;
d_err = err;
}
Err(err) => d_err = Some(err),
}
}
}
info!("task finished, task: {:?}", task);
if let Some(resp_tx) = task.resp_tx {
let _ = resp_tx
.send(HealResult {
result: d_res,
err: d_err,
})
.await;
} else {
// when respCh is not set caller is not waiting but we
// update the relevant metrics for them
if d_err.is_none() {
bgseq.count_healed(d_res.heal_item_type).await;
} else {
bgseq.count_failed(d_res.heal_item_type).await;
}
}
}
None => {
info!("add_worker, tasks_rx was closed, return");
return;
}
}
}
}
}
// pub fn active_listeners() -> Result<usize> {
// }
async fn heal_disk_format(opts: HealOpts) -> Result<(HealResultItem, Option<Error>)> {
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let (res, err) = store.heal_format(opts.dry_run).await?;
// return any error, ignore error returned when disks have
// already healed.
if err.is_some() {
return Ok((HealResultItem::default(), err));
}
Ok((res, err))
}
pub(crate) async fn heal_bucket(bucket: &str) -> Result<()> {
let (bg_seq, ok) = GLOBAL_BackgroundHealState.get_heal_sequence_by_token(BG_HEALING_UUID).await;
if ok {
// bg_seq must be Some when ok is true
return bg_seq
.unwrap()
.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
..Default::default()
},
HEAL_ITEM_BUCKET.to_string(),
)
.await;
}
Ok(())
}
pub(crate) async fn heal_object(bucket: &str, object: &str, version_id: &str, scan_mode: HealScanMode) -> Result<()> {
let (bg_seq, ok) = GLOBAL_BackgroundHealState.get_heal_sequence_by_token(BG_HEALING_UUID).await;
if ok {
// bg_seq must be Some when ok is true
return HealSequence::heal_object(bg_seq.unwrap(), bucket, object, version_id, scan_mode).await;
}
Ok(())
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,221 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::{
bucket::metadata_sys::get_replication_config,
config::com::{read_config, save_config},
disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET},
error::to_object_err,
new_object_layer_fn,
store::ECStore,
};
use lazy_static::lazy_static;
use rustfs_utils::path::SLASH_SEPARATOR;
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, sync::Arc, time::SystemTime};
use tokio::sync::mpsc::Receiver;
use tracing::{error, warn};
pub const DATA_USAGE_ROOT: &str = SLASH_SEPARATOR;
const DATA_USAGE_OBJ_NAME: &str = ".usage.json";
const DATA_USAGE_BLOOM_NAME: &str = ".bloomcycle.bin";
pub const DATA_USAGE_CACHE_NAME: &str = ".usage-cache.bin";
lazy_static! {
pub static ref DATA_USAGE_BUCKET: String = format!("{}{}{}", RUSTFS_META_BUCKET, SLASH_SEPARATOR, BUCKET_META_PREFIX);
pub static ref DATA_USAGE_OBJ_NAME_PATH: String = format!("{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, DATA_USAGE_OBJ_NAME);
pub static ref DATA_USAGE_BLOOM_NAME_PATH: String =
format!("{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, DATA_USAGE_BLOOM_NAME);
pub static ref BACKGROUND_HEAL_INFO_PATH: String =
format!("{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, ".background-heal.json");
}
// BucketTargetUsageInfo - bucket target usage info provides
// - replicated size for all objects sent to this target
// - replica size for all objects received from this target
// - replication pending size for all objects pending replication to this target
// - replication failed size for all objects failed replication to this target
// - replica pending count
// - replica failed count
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BucketTargetUsageInfo {
pub replication_pending_size: u64,
pub replication_failed_size: u64,
pub replicated_size: u64,
pub replica_size: u64,
pub replication_pending_count: u64,
pub replication_failed_count: u64,
pub replicated_count: u64,
}
// BucketUsageInfo - bucket usage info provides
// - total size of the bucket
// - total objects in a bucket
// - object size histogram per bucket
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BucketUsageInfo {
pub size: u64,
// Following five fields suffixed with V1 are here for backward compatibility
// Total Size for objects that have not yet been replicated
pub replication_pending_size_v1: u64,
// Total size for objects that have witness one or more failures and will be retried
pub replication_failed_size_v1: u64,
// Total size for objects that have been replicated to destination
pub replicated_size_v1: u64,
// Total number of objects pending replication
pub replication_pending_count_v1: u64,
// Total number of objects that failed replication
pub replication_failed_count_v1: u64,
pub objects_count: u64,
pub object_size_histogram: HashMap<String, u64>,
pub object_versions_histogram: HashMap<String, u64>,
pub versions_count: u64,
pub delete_markers_count: u64,
pub replica_size: u64,
pub replica_count: u64,
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
}
// DataUsageInfo represents data usage stats of the underlying Object API
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct DataUsageInfo {
pub total_capacity: u64,
pub total_used_capacity: u64,
pub total_free_capacity: u64,
// LastUpdate is the timestamp of when the data usage info was last updated.
// This does not indicate a full scan.
pub last_update: Option<SystemTime>,
// Objects total count across all buckets
pub objects_total_count: u64,
// Versions total count across all buckets
pub versions_total_count: u64,
// Delete markers total count across all buckets
pub delete_markers_total_count: u64,
// Objects total size across all buckets
pub objects_total_size: u64,
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
// Total number of buckets in this cluster
pub buckets_count: u64,
// Buckets usage info provides following information across all buckets
// - total size of the bucket
// - total objects in a bucket
// - object size histogram per bucket
pub buckets_usage: HashMap<String, BucketUsageInfo>,
// Deprecated kept here for backward compatibility reasons.
pub bucket_sizes: HashMap<String, u64>,
// Todo: TierStats
// TierStats contains per-tier stats of all configured remote tiers
}
pub async fn store_data_usage_in_backend(mut rx: Receiver<DataUsageInfo>) {
let Some(store) = new_object_layer_fn() else {
error!("errServerNotInitialized");
return;
};
let mut attempts = 1;
loop {
match rx.recv().await {
Some(data_usage_info) => {
if let Ok(data) = serde_json::to_vec(&data_usage_info) {
if attempts > 10 {
let _ =
save_config(store.clone(), &format!("{}{}", *DATA_USAGE_OBJ_NAME_PATH, ".bkp"), data.clone()).await;
attempts += 1;
}
let _ = save_config(store.clone(), &DATA_USAGE_OBJ_NAME_PATH, data).await;
attempts += 1;
} else {
continue;
}
}
None => {
return;
}
}
}
}
// TODO: cancel ctx
pub async fn load_data_usage_from_backend(store: Arc<ECStore>) -> Result<DataUsageInfo> {
let buf = match read_config(store, &DATA_USAGE_OBJ_NAME_PATH).await {
Ok(data) => data,
Err(e) => {
error!("Failed to read data usage info from backend: {}", e);
if e == Error::ConfigNotFound {
return Ok(DataUsageInfo::default());
}
return Err(to_object_err(e, vec![RUSTFS_META_BUCKET, &DATA_USAGE_OBJ_NAME_PATH]));
}
};
let mut data_usage_info: DataUsageInfo = serde_json::from_slice(&buf)?;
warn!("Loaded data usage info from backend {:?}", &data_usage_info);
if data_usage_info.buckets_usage.is_empty() {
data_usage_info.buckets_usage = data_usage_info
.bucket_sizes
.iter()
.map(|(bucket, &size)| {
(
bucket.clone(),
BucketUsageInfo {
size,
..Default::default()
},
)
})
.collect();
}
if data_usage_info.bucket_sizes.is_empty() {
data_usage_info.bucket_sizes = data_usage_info
.buckets_usage
.iter()
.map(|(bucket, bui)| (bucket.clone(), bui.size))
.collect();
}
for (bucket, bui) in &data_usage_info.buckets_usage {
if bui.replicated_size_v1 > 0
|| bui.replication_failed_count_v1 > 0
|| bui.replication_failed_size_v1 > 0
|| bui.replication_pending_count_v1 > 0
{
if let Ok((cfg, _)) = get_replication_config(bucket).await {
if !cfg.role.is_empty() {
data_usage_info.replication_info.insert(
cfg.role.clone(),
BucketTargetUsageInfo {
replication_failed_size: bui.replication_failed_size_v1,
replication_failed_count: bui.replication_failed_count_v1,
replicated_size: bui.replicated_size_v1,
replication_pending_count: bui.replication_pending_count_v1,
replication_pending_size: bui.replication_pending_size_v1,
..Default::default()
},
);
}
}
}
}
Ok(data_usage_info)
}

View File

@@ -1,928 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::config::com::save_config;
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::error::{Error, Result};
use crate::new_object_layer_fn;
use crate::set_disk::SetDisks;
use crate::store_api::{BucketInfo, ObjectIO, ObjectOptions};
use bytesize::ByteSize;
use http::HeaderMap;
use path_clean::PathClean;
use rand::Rng;
use rmp_serde::Serializer;
use s3s::dto::{BucketLifecycleConfiguration, ReplicationConfiguration};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::path::Path;
use std::time::{Duration, SystemTime};
use tokio::sync::mpsc::Sender;
use tokio::time::sleep;
use super::data_scanner::{DATA_SCANNER_FORCE_COMPACT_AT_FOLDERS, SizeSummary};
use super::data_usage::{BucketTargetUsageInfo, BucketUsageInfo, DataUsageInfo};
// DATA_USAGE_BUCKET_LEN must be length of ObjectsHistogramIntervals
pub const DATA_USAGE_BUCKET_LEN: usize = 11;
pub const DATA_USAGE_VERSION_LEN: usize = 7;
pub type DataUsageHashMap = HashSet<String>;
struct ObjectHistogramInterval {
name: &'static str,
start: u64,
end: u64,
}
const OBJECTS_HISTOGRAM_INTERVALS: [ObjectHistogramInterval; DATA_USAGE_BUCKET_LEN] = [
ObjectHistogramInterval {
name: "LESS_THAN_1024_B",
start: 0,
end: ByteSize::kib(1).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_1024_B_AND_64_KB",
start: ByteSize::kib(1).as_u64(),
end: ByteSize::kib(64).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_64_KB_AND_256_KB",
start: ByteSize::kib(64).as_u64(),
end: ByteSize::kib(256).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_256_KB_AND_512_KB",
start: ByteSize::kib(256).as_u64(),
end: ByteSize::kib(512).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_512_KB_AND_1_MB",
start: ByteSize::kib(512).as_u64(),
end: ByteSize::mib(1).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_1024B_AND_1_MB",
start: ByteSize::kib(1).as_u64(),
end: ByteSize::mib(1).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_1_MB_AND_10_MB",
start: ByteSize::mib(1).as_u64(),
end: ByteSize::mib(10).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_10_MB_AND_64_MB",
start: ByteSize::mib(10).as_u64(),
end: ByteSize::mib(64).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_64_MB_AND_128_MB",
start: ByteSize::mib(64).as_u64(),
end: ByteSize::mib(128).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_128_MB_AND_512_MB",
start: ByteSize::mib(128).as_u64(),
end: ByteSize::mib(512).as_u64() - 1,
},
ObjectHistogramInterval {
name: "GREATER_THAN_512_MB",
start: ByteSize::mib(512).as_u64(),
end: u64::MAX,
},
];
const OBJECTS_VERSION_COUNT_INTERVALS: [ObjectHistogramInterval; DATA_USAGE_VERSION_LEN] = [
ObjectHistogramInterval {
name: "UNVERSIONED",
start: 0,
end: 0,
},
ObjectHistogramInterval {
name: "SINGLE_VERSION",
start: 1,
end: 1,
},
ObjectHistogramInterval {
name: "BETWEEN_2_AND_10",
start: 2,
end: 9,
},
ObjectHistogramInterval {
name: "BETWEEN_10_AND_100",
start: 10,
end: 99,
},
ObjectHistogramInterval {
name: "BETWEEN_100_AND_1000",
start: 100,
end: 999,
},
ObjectHistogramInterval {
name: "BETWEEN_1000_AND_10000",
start: 1000,
end: 9999,
},
ObjectHistogramInterval {
name: "GREATER_THAN_10000",
start: 10000,
end: u64::MAX,
},
];
#[derive(Clone, Copy, Default)]
pub struct TierStats {
pub total_size: u64,
pub num_versions: i32,
pub num_objects: i32,
}
impl TierStats {
pub fn add(&self, u: &TierStats) -> TierStats {
TierStats {
total_size: self.total_size + u.total_size,
num_versions: self.num_versions + u.num_versions,
num_objects: self.num_objects + u.num_objects,
}
}
}
struct AllTierStats {
tiers: HashMap<String, TierStats>,
}
impl AllTierStats {
pub fn new() -> Self {
Self { tiers: HashMap::new() }
}
fn add_sizes(&mut self, tiers: HashMap<String, TierStats>) {
for (tier, st) in tiers {
self.tiers.insert(tier.clone(), self.tiers[&tier].add(&st));
}
}
fn merge(&mut self, other: AllTierStats) {
for (tier, st) in other.tiers {
self.tiers.insert(tier.clone(), self.tiers[&tier].add(&st));
}
}
fn populate_stats(&self, stats: &mut HashMap<String, TierStats>) {
for (tier, st) in &self.tiers {
stats.insert(
tier.clone(),
TierStats {
total_size: st.total_size,
num_versions: st.num_versions,
num_objects: st.num_objects,
},
);
}
}
}
// sizeHistogram is a size histogram.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SizeHistogram(Vec<u64>);
impl Default for SizeHistogram {
fn default() -> Self {
Self(vec![0; DATA_USAGE_BUCKET_LEN])
}
}
impl SizeHistogram {
fn add(&mut self, size: u64) {
for (idx, interval) in OBJECTS_HISTOGRAM_INTERVALS.iter().enumerate() {
if size >= interval.start && size <= interval.end {
self.0[idx] += 1;
break;
}
}
}
pub fn to_map(&self) -> HashMap<String, u64> {
let mut res = HashMap::new();
let mut spl_count = 0;
for (count, oh) in self.0.iter().zip(OBJECTS_HISTOGRAM_INTERVALS.iter()) {
if ByteSize::kib(1).as_u64() == oh.start && oh.end == ByteSize::mib(1).as_u64() - 1 {
res.insert(oh.name.to_string(), spl_count);
} else if ByteSize::kib(1).as_u64() <= oh.start && oh.end < ByteSize::mib(1).as_u64() {
spl_count += count;
res.insert(oh.name.to_string(), *count);
} else {
res.insert(oh.name.to_string(), *count);
}
}
res
}
}
// versionsHistogram is a histogram of number of versions in an object.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct VersionsHistogram(Vec<u64>);
impl Default for VersionsHistogram {
fn default() -> Self {
Self(vec![0; DATA_USAGE_VERSION_LEN])
}
}
impl VersionsHistogram {
fn add(&mut self, size: u64) {
for (idx, interval) in OBJECTS_VERSION_COUNT_INTERVALS.iter().enumerate() {
if size >= interval.start && size <= interval.end {
self.0[idx] += 1;
break;
}
}
}
pub fn to_map(&self) -> HashMap<String, u64> {
let mut res = HashMap::new();
for (count, ov) in self.0.iter().zip(OBJECTS_VERSION_COUNT_INTERVALS.iter()) {
res.insert(ov.name.to_string(), *count);
}
res
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct ReplicationStats {
pub pending_size: u64,
pub replicated_size: u64,
pub failed_size: u64,
pub failed_count: u64,
pub pending_count: u64,
pub missed_threshold_size: u64,
pub after_threshold_size: u64,
pub missed_threshold_count: u64,
pub after_threshold_count: u64,
pub replicated_count: u64,
}
impl ReplicationStats {
pub fn empty(&self) -> bool {
self.replicated_size == 0 && self.failed_size == 0 && self.failed_count == 0
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct ReplicationAllStats {
pub targets: HashMap<String, ReplicationStats>,
pub replica_size: u64,
pub replica_count: u64,
}
impl ReplicationAllStats {
pub fn empty(&self) -> bool {
if self.replica_size != 0 && self.replica_count != 0 {
return false;
}
for (_, v) in self.targets.iter() {
if !v.empty() {
return false;
}
}
true
}
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct DataUsageEntry {
pub children: DataUsageHashMap,
// These fields do not include any children.
pub size: usize,
pub objects: usize,
pub versions: usize,
pub delete_markers: usize,
pub obj_sizes: SizeHistogram,
pub obj_versions: VersionsHistogram,
pub replication_stats: Option<ReplicationAllStats>,
// Todo: tier
// pub all_tier_stats: ,
pub compacted: bool,
}
impl DataUsageEntry {
pub fn add_child(&mut self, hash: &DataUsageHash) {
if self.children.contains(&hash.key()) {
return;
}
self.children.insert(hash.key());
}
pub fn add_sizes(&mut self, summary: &SizeSummary) {
self.size += summary.total_size;
self.versions += summary.versions;
self.delete_markers += summary.delete_markers;
self.obj_sizes.add(summary.total_size as u64);
self.obj_versions.add(summary.versions as u64);
let replication_stats = if self.replication_stats.is_none() {
self.replication_stats = Some(ReplicationAllStats::default());
self.replication_stats.as_mut().unwrap()
} else {
self.replication_stats.as_mut().unwrap()
};
replication_stats.replica_size += summary.replica_size as u64;
replication_stats.replica_count += summary.replica_count as u64;
for (arn, st) in &summary.repl_target_stats {
let tgt_stat = replication_stats
.targets
.entry(arn.to_string())
.or_insert(ReplicationStats::default());
tgt_stat.pending_size += st.pending_size as u64;
tgt_stat.failed_size += st.failed_size as u64;
tgt_stat.replicated_size += st.replicated_size as u64;
tgt_stat.replicated_count += st.replicated_count as u64;
tgt_stat.failed_count += st.failed_count as u64;
tgt_stat.pending_count += st.pending_count as u64;
}
// Todo:: tiers
}
pub fn merge(&mut self, other: &DataUsageEntry) {
self.objects += other.objects;
self.versions += other.versions;
self.delete_markers += other.delete_markers;
self.size += other.size;
if let Some(o_rep) = &other.replication_stats {
if self.replication_stats.is_none() {
self.replication_stats = Some(ReplicationAllStats::default());
}
let s_rep = self.replication_stats.as_mut().unwrap();
s_rep.targets.clear();
s_rep.replica_size += o_rep.replica_size;
s_rep.replica_count += o_rep.replica_count;
for (arn, stat) in o_rep.targets.iter() {
let st = s_rep.targets.entry(arn.clone()).or_default();
*st = ReplicationStats {
pending_size: stat.pending_size + st.pending_size,
failed_size: stat.failed_size + st.failed_size,
replicated_size: stat.replicated_size + st.replicated_size,
pending_count: stat.pending_count + st.pending_count,
failed_count: stat.failed_count + st.failed_count,
replicated_count: stat.replicated_count + st.replicated_count,
..Default::default()
};
}
}
for (i, v) in other.obj_sizes.0.iter().enumerate() {
self.obj_sizes.0[i] += v;
}
for (i, v) in other.obj_versions.0.iter().enumerate() {
self.obj_versions.0[i] += v;
}
// todo: tiers
}
}
#[derive(Clone)]
pub struct DataUsageEntryInfo {
pub name: String,
pub parent: String,
pub entry: DataUsageEntry,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct DataUsageCacheInfo {
pub name: String,
pub next_cycle: u32,
pub last_update: Option<SystemTime>,
pub skip_healing: bool,
#[serde(skip)]
pub lifecycle: Option<BucketLifecycleConfiguration>,
#[serde(skip)]
pub updates: Option<Sender<DataUsageEntry>>,
#[serde(skip)]
pub replication: Option<ReplicationConfiguration>,
}
// impl Default for DataUsageCacheInfo {
// fn default() -> Self {
// Self {
// name: Default::default(),
// next_cycle: Default::default(),
// last_update: SystemTime::now(),
// skip_healing: Default::default(),
// updates: Default::default(),
// replication: Default::default(),
// }
// }
// }
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct DataUsageCache {
pub info: DataUsageCacheInfo,
pub cache: HashMap<String, DataUsageEntry>,
}
impl DataUsageCache {
pub async fn load(store: &SetDisks, name: &str) -> Result<Self> {
let mut d = DataUsageCache::default();
let mut retries = 0;
while retries < 5 {
let path = Path::new(BUCKET_META_PREFIX).join(name);
// warn!("Loading data usage cache from backend: {}", path.display());
match store
.get_object_reader(
RUSTFS_META_BUCKET,
path.to_str().unwrap(),
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = Self::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(err) => {
// warn!("Failed to load data usage cache from backend: {}", &err);
match err {
Error::FileNotFound | Error::VolumeNotFound => {
match store
.get_object_reader(
RUSTFS_META_BUCKET,
name,
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = Self::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(_) => match err {
Error::FileNotFound | Error::VolumeNotFound => {
break;
}
_ => {}
},
}
}
_ => {
break;
}
}
}
}
retries += 1;
let dur = {
let mut rng = rand::rng();
rng.random_range(0..1_000)
};
sleep(Duration::from_millis(dur)).await;
}
Ok(d)
}
pub async fn save(&self, name: &str) -> Result<()> {
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let buf = self.marshal_msg()?;
let buf_clone = buf.clone();
let store_clone = store.clone();
let name = Path::new(BUCKET_META_PREFIX).join(name).to_string_lossy().to_string();
let name_clone = name.clone();
tokio::spawn(async move {
let _ = save_config(store_clone, &format!("{}{}", &name_clone, ".bkp"), buf_clone).await;
});
save_config(store, &name, buf).await?;
Ok(())
}
pub fn replace(&mut self, path: &str, parent: &str, e: DataUsageEntry) {
let hash = hash_path(path);
self.cache.insert(hash.key(), e);
if !parent.is_empty() {
let phash = hash_path(parent);
let p = {
let p = self.cache.entry(phash.key()).or_default();
p.add_child(&hash);
p.clone()
};
self.cache.insert(phash.key(), p);
}
}
pub fn replace_hashed(&mut self, hash: &DataUsageHash, parent: &Option<DataUsageHash>, e: &DataUsageEntry) {
self.cache.insert(hash.key(), e.clone());
if let Some(parent) = parent {
self.cache.entry(parent.key()).or_default().add_child(hash);
}
}
pub fn find(&self, path: &str) -> Option<DataUsageEntry> {
self.cache.get(&hash_path(path).key()).cloned()
}
pub fn find_children_copy(&mut self, h: DataUsageHash) -> DataUsageHashMap {
self.cache.entry(h.string()).or_default().children.clone()
}
pub fn flatten(&self, root: &DataUsageEntry) -> DataUsageEntry {
let mut root = root.clone();
for id in root.children.clone().iter() {
if let Some(e) = self.cache.get(id) {
let mut e = e.clone();
if !e.children.is_empty() {
e = self.flatten(&e);
}
root.merge(&e);
}
}
root.children.clear();
root
}
pub fn copy_with_children(&mut self, src: &DataUsageCache, hash: &DataUsageHash, parent: &Option<DataUsageHash>) {
if let Some(e) = src.cache.get(&hash.string()) {
self.cache.insert(hash.key(), e.clone());
for ch in e.children.iter() {
if *ch == hash.key() {
return;
}
self.copy_with_children(src, &DataUsageHash(ch.to_string()), &Some(hash.clone()));
}
if let Some(parent) = parent {
let p = self.cache.entry(parent.key()).or_default();
p.add_child(hash);
}
}
}
pub fn delete_recursive(&mut self, hash: &DataUsageHash) {
let mut need_remove = Vec::new();
if let Some(v) = self.cache.get(&hash.string()) {
for child in v.children.iter() {
need_remove.push(child.clone());
}
}
self.cache.remove(&hash.string());
need_remove.iter().for_each(|child| {
self.delete_recursive(&DataUsageHash(child.to_string()));
});
}
pub fn size_recursive(&self, path: &str) -> Option<DataUsageEntry> {
match self.find(path) {
Some(root) => {
if root.children.is_empty() {
return Some(root);
}
let mut flat = self.flatten(&root);
if flat.replication_stats.is_some() && flat.replication_stats.as_ref().unwrap().empty() {
flat.replication_stats = None;
}
Some(flat)
}
None => None,
}
}
pub fn search_parent(&self, hash: &DataUsageHash) -> Option<DataUsageHash> {
let want = hash.key();
if let Some(last_index) = want.rfind('/') {
if let Some(v) = self.find(&want[0..last_index]) {
if v.children.contains(&want) {
let found = hash_path(&want[0..last_index]);
return Some(found);
}
}
}
for (k, v) in self.cache.iter() {
if v.children.contains(&want) {
let found = DataUsageHash(k.clone());
return Some(found);
}
}
None
}
pub fn is_compacted(&self, hash: &DataUsageHash) -> bool {
match self.cache.get(&hash.key()) {
Some(due) => due.compacted,
None => false,
}
}
pub fn force_compact(&mut self, limit: usize) {
if self.cache.len() < limit {
return;
}
let top = hash_path(&self.info.name).key();
let top_e = match self.find(&top) {
Some(e) => e,
None => return,
};
if top_e.children.len() > <u64 as TryInto<usize>>::try_into(DATA_SCANNER_FORCE_COMPACT_AT_FOLDERS).unwrap() {
self.reduce_children_of(&hash_path(&self.info.name), limit, true);
}
if self.cache.len() <= limit {
return;
}
let mut found = HashSet::new();
found.insert(top);
mark(self, &top_e, &mut found);
self.cache.retain(|k, _| {
if !found.contains(k) {
return false;
}
true
});
}
pub fn reduce_children_of(&mut self, path: &DataUsageHash, limit: usize, compact_self: bool) {
let e = match self.cache.get(&path.key()) {
Some(e) => e,
None => return,
};
if e.compacted {
return;
}
if e.children.len() > limit && compact_self {
let mut flat = self.size_recursive(&path.key()).unwrap_or_default();
flat.compacted = true;
self.delete_recursive(path);
self.replace_hashed(path, &None, &flat);
return;
}
let total = self.total_children_rec(&path.key());
if total < limit {
return;
}
let mut leaves = Vec::new();
let mut remove = total - limit;
add(self, path, &mut leaves);
leaves.sort_by(|a, b| a.objects.cmp(&b.objects));
while remove > 0 && !leaves.is_empty() {
let e = leaves.first().unwrap();
let candidate = e.path.clone();
if candidate == *path && !compact_self {
break;
}
let removing = self.total_children_rec(&candidate.key());
let mut flat = match self.size_recursive(&candidate.key()) {
Some(flat) => flat,
None => {
leaves.remove(0);
continue;
}
};
flat.compacted = true;
self.delete_recursive(&candidate);
self.replace_hashed(&candidate, &None, &flat);
remove -= removing;
leaves.remove(0);
}
}
pub fn total_children_rec(&self, path: &str) -> usize {
let root = self.find(path);
if root.is_none() {
return 0;
}
let root = root.unwrap();
if root.children.is_empty() {
return 0;
}
let mut n = root.children.len();
for ch in root.children.iter() {
n += self.total_children_rec(ch);
}
n
}
pub fn merge(&mut self, o: &DataUsageCache) {
let mut existing_root = self.root();
let other_root = o.root();
if existing_root.is_none() && other_root.is_none() {
return;
}
if other_root.is_none() {
return;
}
if existing_root.is_none() {
*self = o.clone();
return;
}
if o.info.last_update.gt(&self.info.last_update) {
self.info.last_update = o.info.last_update;
}
existing_root.as_mut().unwrap().merge(other_root.as_ref().unwrap());
self.cache.insert(hash_path(&self.info.name).key(), existing_root.unwrap());
let e_hash = self.root_hash();
for key in other_root.as_ref().unwrap().children.iter() {
let entry = &o.cache[key];
let flat = o.flatten(entry);
let mut existing = self.cache[key].clone();
existing.merge(&flat);
self.replace_hashed(&DataUsageHash(key.clone()), &Some(e_hash.clone()), &existing);
}
}
pub fn root_hash(&self) -> DataUsageHash {
hash_path(&self.info.name)
}
pub fn root(&self) -> Option<DataUsageEntry> {
self.find(&self.info.name)
}
pub fn dui(&self, path: &str, buckets: &[BucketInfo]) -> DataUsageInfo {
let e = match self.find(path) {
Some(e) => e,
None => return DataUsageInfo::default(),
};
let flat = self.flatten(&e);
DataUsageInfo {
last_update: self.info.last_update,
objects_total_count: flat.objects as u64,
versions_total_count: flat.versions as u64,
delete_markers_total_count: flat.delete_markers as u64,
objects_total_size: flat.size as u64,
buckets_count: e.children.len() as u64,
buckets_usage: self.buckets_usage_info(buckets),
..Default::default()
}
}
pub fn buckets_usage_info(&self, buckets: &[BucketInfo]) -> HashMap<String, BucketUsageInfo> {
let mut dst = HashMap::new();
for bucket in buckets.iter() {
let e = match self.find(&bucket.name) {
Some(e) => e,
None => continue,
};
let flat = self.flatten(&e);
let mut bui = BucketUsageInfo {
size: flat.size as u64,
versions_count: flat.versions as u64,
objects_count: flat.objects as u64,
delete_markers_count: flat.delete_markers as u64,
object_size_histogram: flat.obj_sizes.to_map(),
object_versions_histogram: flat.obj_versions.to_map(),
..Default::default()
};
if let Some(rs) = &flat.replication_stats {
bui.replica_size = rs.replica_size;
bui.replica_count = rs.replica_count;
for (arn, stat) in rs.targets.iter() {
bui.replication_info.insert(
arn.clone(),
BucketTargetUsageInfo {
replication_pending_size: stat.pending_size,
replicated_size: stat.replicated_size,
replication_failed_size: stat.failed_size,
replication_pending_count: stat.pending_count,
replication_failed_count: stat.failed_count,
replicated_count: stat.replicated_count,
..Default::default()
},
);
}
}
dst.insert(bucket.name.clone(), bui);
}
dst
}
pub fn marshal_msg(&self) -> Result<Vec<u8>> {
let mut buf = Vec::new();
self.serialize(&mut Serializer::new(&mut buf))?;
Ok(buf)
}
pub fn unmarshal(buf: &[u8]) -> Result<Self> {
let t: Self = rmp_serde::from_slice(buf)?;
Ok(t)
}
}
#[derive(Default, Clone)]
struct Inner {
objects: usize,
path: DataUsageHash,
}
fn add(data_usage_cache: &DataUsageCache, path: &DataUsageHash, leaves: &mut Vec<Inner>) {
let e = match data_usage_cache.cache.get(&path.key()) {
Some(e) => e,
None => return,
};
if !e.children.is_empty() {
return;
}
let sz = data_usage_cache.size_recursive(&path.key()).unwrap_or_default();
leaves.push(Inner {
objects: sz.objects,
path: path.clone(),
});
for ch in e.children.iter() {
add(data_usage_cache, &DataUsageHash(ch.clone()), leaves);
}
}
fn mark(duc: &DataUsageCache, entry: &DataUsageEntry, found: &mut HashSet<String>) {
for k in entry.children.iter() {
found.insert(k.to_string());
if let Some(ch) = duc.cache.get(k) {
mark(duc, ch, found);
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DataUsageHash(pub String);
impl DataUsageHash {
pub fn string(&self) -> String {
self.0.clone()
}
pub fn key(&self) -> String {
self.0.clone()
}
pub fn mod_(&self, cycle: u32, cycles: u32) -> bool {
if cycles <= 1 {
return cycles == 1;
}
let hash = self.calculate_hash();
hash as u32 % cycles == cycle % cycles
}
pub fn mod_alt(&self, cycle: u32, cycles: u32) -> bool {
if cycles <= 1 {
return cycles == 1;
}
let hash = self.calculate_hash();
(hash >> 32) as u32 % cycles == cycle % cycles
}
fn calculate_hash(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.0.hash(&mut hasher);
hasher.finish()
}
}
pub fn hash_path(data: &str) -> DataUsageHash {
DataUsageHash(Path::new(&data).clean().to_string_lossy().to_string())
}

View File

@@ -1,544 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{
collections::{HashMap, HashSet},
path::Path,
time::SystemTime,
};
use crate::{
config::storageclass::{RRS, STANDARD},
disk::{BUCKET_META_PREFIX, DeleteOptions, DiskAPI, DiskStore, RUSTFS_META_BUCKET, error::DiskError, fs::read_file},
global::GLOBAL_BackgroundHealState,
heal::heal_ops::HEALING_TRACKER_FILENAME,
new_object_layer_fn,
store_api::{BucketInfo, StorageAPI},
};
use crate::{disk, error::Result};
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tokio::sync::RwLock;
use super::{background_heal_ops::get_local_disks_to_heal, heal_ops::BG_HEALING_UUID};
pub type HealScanMode = usize;
pub const HEAL_UNKNOWN_SCAN: HealScanMode = 0;
pub const HEAL_NORMAL_SCAN: HealScanMode = 1;
pub const HEAL_DEEP_SCAN: HealScanMode = 2;
pub const HEAL_ITEM_METADATA: &str = "metadata";
pub const HEAL_ITEM_BUCKET: &str = "bucket";
pub const HEAL_ITEM_BUCKET_METADATA: &str = "bucket-metadata";
pub const HEAL_ITEM_OBJECT: &str = "object";
pub const DRIVE_STATE_OK: &str = "ok";
pub const DRIVE_STATE_OFFLINE: &str = "offline";
pub const DRIVE_STATE_CORRUPT: &str = "corrupt";
pub const DRIVE_STATE_MISSING: &str = "missing";
pub const DRIVE_STATE_PERMISSION: &str = "permission-denied";
pub const DRIVE_STATE_FAULTY: &str = "faulty";
pub const DRIVE_STATE_ROOT_MOUNT: &str = "root-mount";
pub const DRIVE_STATE_UNKNOWN: &str = "unknown";
pub const DRIVE_STATE_UNFORMATTED: &str = "unformatted"; // only returned by disk
lazy_static! {
pub static ref TIME_SENTINEL: OffsetDateTime = OffsetDateTime::from_unix_timestamp(0).unwrap();
}
#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
pub struct HealOpts {
pub recursive: bool,
#[serde(rename = "dryRun")]
pub dry_run: bool,
pub remove: bool,
pub recreate: bool,
#[serde(rename = "scanMode")]
pub scan_mode: HealScanMode,
#[serde(rename = "updateParity")]
pub update_parity: bool,
#[serde(rename = "nolock")]
pub no_lock: bool,
pub pool: Option<usize>,
pub set: Option<usize>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct HealStartSuccess {
#[serde(rename = "clientToken")]
pub client_token: String,
#[serde(rename = "clientAddress")]
pub client_address: String,
#[serde(rename = "startTime")]
pub start_time: DateTime<Utc>,
}
impl Default for HealStartSuccess {
fn default() -> Self {
Self {
client_token: Default::default(),
client_address: Default::default(),
start_time: Utc::now(),
}
}
}
pub type HealStopSuccess = HealStartSuccess;
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct HealingTracker {
#[serde(skip_serializing, skip_deserializing)]
pub disk: Option<DiskStore>,
pub id: String,
pub pool_index: Option<usize>,
pub set_index: Option<usize>,
pub disk_index: Option<usize>,
pub path: String,
pub endpoint: String,
pub started: Option<OffsetDateTime>,
pub last_update: Option<SystemTime>,
pub objects_total_count: u64,
pub objects_total_size: u64,
pub items_healed: u64,
pub items_failed: u64,
pub item_skipped: u64,
pub bytes_done: u64,
pub bytes_failed: u64,
pub bytes_skipped: u64,
pub bucket: String,
pub object: String,
pub resume_items_healed: u64,
pub resume_items_failed: u64,
pub resume_items_skipped: u64,
pub resume_bytes_done: u64,
pub resume_bytes_failed: u64,
pub resume_bytes_skipped: u64,
pub queue_buckets: Vec<String>,
pub healed_buckets: Vec<String>,
pub heal_id: String,
pub retry_attempts: u64,
pub finished: bool,
#[serde(skip_serializing, skip_deserializing)]
pub mu: RwLock<bool>,
}
impl HealingTracker {
pub fn marshal_msg(&self) -> disk::error::Result<Vec<u8>> {
Ok(serde_json::to_vec(self)?)
}
pub fn unmarshal_msg(data: &[u8]) -> disk::error::Result<Self> {
Ok(serde_json::from_slice::<HealingTracker>(data)?)
}
pub async fn reset_healing(&mut self) {
let _ = self.mu.write().await;
self.items_healed = 0;
self.items_failed = 0;
self.bytes_done = 0;
self.bytes_failed = 0;
self.resume_items_healed = 0;
self.resume_items_failed = 0;
self.resume_bytes_done = 0;
self.resume_bytes_failed = 0;
self.item_skipped = 0;
self.bytes_skipped = 0;
self.healed_buckets = Vec::new();
self.bucket = String::new();
self.object = String::new();
}
pub async fn get_last_update(&self) -> Option<SystemTime> {
let _ = self.mu.read().await;
self.last_update
}
pub async fn get_bucket(&self) -> String {
let _ = self.mu.read().await;
self.bucket.clone()
}
pub async fn set_bucket(&mut self, bucket: &str) {
let _ = self.mu.write().await;
self.bucket = bucket.to_string();
}
pub async fn get_object(&self) -> String {
let _ = self.mu.read().await;
self.object.clone()
}
pub async fn set_object(&mut self, object: &str) {
let _ = self.mu.write().await;
self.object = object.to_string();
}
pub async fn update_progress(&mut self, success: bool, skipped: bool, by: u64) {
let _ = self.mu.write().await;
if success {
self.items_healed += 1;
self.bytes_done += by;
} else if skipped {
self.item_skipped += 1;
self.bytes_skipped += by;
} else {
self.items_failed += 1;
self.bytes_failed += by;
}
}
pub async fn update(&mut self) -> disk::error::Result<()> {
if let Some(disk) = &self.disk {
if healing(disk.path().to_string_lossy().as_ref()).await?.is_none() {
return Err(DiskError::other(format!("healingTracker: drive {} is not marked as healing", self.id)));
}
let _ = self.mu.write().await;
if self.id.is_empty() || self.pool_index.is_none() || self.set_index.is_none() || self.disk_index.is_none() {
self.id = disk.get_disk_id().await?.map_or("".to_string(), |id| id.to_string());
let disk_location = disk.get_disk_location();
self.pool_index = disk_location.pool_idx;
self.set_index = disk_location.set_idx;
self.disk_index = disk_location.disk_idx;
}
}
self.save().await
}
pub async fn save(&mut self) -> disk::error::Result<()> {
let _ = self.mu.write().await;
if self.pool_index.is_none() || self.set_index.is_none() || self.disk_index.is_none() {
let Some(store) = new_object_layer_fn() else {
return Err(DiskError::other("errServerNotInitialized"));
};
// TODO: check error type
(self.pool_index, self.set_index, self.disk_index) =
store.get_pool_and_set(&self.id).await.map_err(|_| DiskError::DiskNotFound)?;
}
self.last_update = Some(SystemTime::now());
let htracker_bytes = self.marshal_msg()?;
GLOBAL_BackgroundHealState.update_heal_status(self).await;
if let Some(disk) = &self.disk {
let file_path = Path::new(BUCKET_META_PREFIX).join(HEALING_TRACKER_FILENAME);
disk.write_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap(), htracker_bytes.into())
.await?;
}
Ok(())
}
pub async fn delete(&self) -> Result<()> {
if let Some(disk) = &self.disk {
let file_path = Path::new(BUCKET_META_PREFIX).join(HEALING_TRACKER_FILENAME);
disk.delete(
RUSTFS_META_BUCKET,
file_path.to_str().unwrap(),
DeleteOptions {
recursive: false,
immediate: false,
..Default::default()
},
)
.await?;
}
Ok(())
}
pub async fn is_healed(&self, bucket: &str) -> bool {
let _ = self.mu.read().await;
for v in self.healed_buckets.iter() {
if v == bucket {
return true;
}
}
false
}
pub async fn resume(&mut self) {
let _ = self.mu.write().await;
self.items_healed = self.resume_items_healed;
self.items_failed = self.resume_items_failed;
self.item_skipped = self.resume_items_skipped;
self.bytes_done = self.resume_bytes_done;
self.bytes_failed = self.resume_bytes_failed;
self.bytes_skipped = self.resume_bytes_skipped;
}
pub async fn bucket_done(&mut self, bucket: &str) {
let _ = self.mu.write().await;
self.resume_items_healed = self.items_healed;
self.resume_items_failed = self.items_failed;
self.resume_items_skipped = self.item_skipped;
self.resume_bytes_done = self.bytes_done;
self.resume_bytes_failed = self.bytes_failed;
self.resume_bytes_skipped = self.bytes_skipped;
self.healed_buckets.push(bucket.to_string());
self.queue_buckets.retain(|x| x != bucket);
}
pub async fn set_queue_buckets(&mut self, buckets: &[BucketInfo]) {
let _ = self.mu.write().await;
buckets.iter().for_each(|bucket| {
if !self.healed_buckets.contains(&bucket.name) {
self.queue_buckets.push(bucket.name.clone());
}
});
}
pub async fn to_healing_disk(&self) -> rustfs_madmin::HealingDisk {
let _ = self.mu.read().await;
rustfs_madmin::HealingDisk {
id: self.id.clone(),
heal_id: self.heal_id.clone(),
pool_index: self.pool_index,
set_index: self.set_index,
disk_index: self.disk_index,
endpoint: self.endpoint.clone(),
path: self.path.clone(),
started: self.started,
last_update: self.last_update,
retry_attempts: self.retry_attempts,
objects_total_count: self.objects_total_count,
objects_total_size: self.objects_total_size,
items_healed: self.items_healed,
items_failed: self.items_failed,
item_skipped: self.item_skipped,
bytes_done: self.bytes_done,
bytes_failed: self.bytes_failed,
bytes_skipped: self.bytes_skipped,
objects_healed: self.items_healed,
objects_failed: self.items_failed,
bucket: self.bucket.clone(),
object: self.object.clone(),
queue_buckets: self.queue_buckets.clone(),
healed_buckets: self.healed_buckets.clone(),
finished: self.finished,
}
}
}
impl Clone for HealingTracker {
fn clone(&self) -> Self {
Self {
disk: self.disk.clone(),
id: self.id.clone(),
pool_index: self.pool_index,
set_index: self.set_index,
disk_index: self.disk_index,
path: self.path.clone(),
endpoint: self.endpoint.clone(),
started: self.started,
last_update: self.last_update,
objects_total_count: self.objects_total_count,
objects_total_size: self.objects_total_size,
items_healed: self.items_healed,
items_failed: self.items_failed,
item_skipped: self.item_skipped,
bytes_done: self.bytes_done,
bytes_failed: self.bytes_failed,
bytes_skipped: self.bytes_skipped,
bucket: self.bucket.clone(),
object: self.object.clone(),
resume_items_healed: self.resume_items_healed,
resume_items_failed: self.resume_items_failed,
resume_items_skipped: self.resume_items_skipped,
resume_bytes_done: self.resume_bytes_done,
resume_bytes_failed: self.resume_bytes_failed,
resume_bytes_skipped: self.resume_bytes_skipped,
queue_buckets: self.queue_buckets.clone(),
healed_buckets: self.healed_buckets.clone(),
heal_id: self.heal_id.clone(),
retry_attempts: self.retry_attempts,
finished: self.finished,
mu: RwLock::new(false),
}
}
}
pub async fn load_healing_tracker(disk: &Option<DiskStore>) -> disk::error::Result<HealingTracker> {
if let Some(disk) = disk {
let disk_id = disk.get_disk_id().await?;
if let Some(disk_id) = disk_id {
let disk_id = disk_id.to_string();
let file_path = Path::new(BUCKET_META_PREFIX).join(HEALING_TRACKER_FILENAME);
let data = disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap()).await?;
let mut healing_tracker = HealingTracker::unmarshal_msg(&data)?;
if healing_tracker.id != disk_id && !healing_tracker.id.is_empty() {
return Err(DiskError::other(format!(
"loadHealingTracker: drive id mismatch expected {}, got {}",
healing_tracker.id, disk_id
)));
}
healing_tracker.id = disk_id;
healing_tracker.disk = Some(disk.clone());
Ok(healing_tracker)
} else {
Err(DiskError::other("loadHealingTracker: disk not have id"))
}
} else {
Err(DiskError::other("loadHealingTracker: nil drive given"))
}
}
pub async fn init_healing_tracker(disk: DiskStore, heal_id: &str) -> disk::error::Result<HealingTracker> {
let disk_location = disk.get_disk_location();
Ok(HealingTracker {
id: disk
.get_disk_id()
.await
.map_or("".to_string(), |id| id.map_or("".to_string(), |id| id.to_string())),
heal_id: heal_id.to_string(),
path: disk.to_string(),
endpoint: disk.endpoint().to_string(),
started: Some(OffsetDateTime::now_utc()),
pool_index: disk_location.pool_idx,
set_index: disk_location.set_idx,
disk_index: disk_location.disk_idx,
disk: Some(disk),
..Default::default()
})
}
pub async fn healing(derive_path: &str) -> disk::error::Result<Option<HealingTracker>> {
let healing_file = Path::new(derive_path)
.join(RUSTFS_META_BUCKET)
.join(BUCKET_META_PREFIX)
.join(HEALING_TRACKER_FILENAME);
let b = read_file(healing_file).await?;
if b.is_empty() {
return Ok(None);
}
let healing_tracker = HealingTracker::unmarshal_msg(&b)?;
Ok(Some(healing_tracker))
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct MRFStatus {
bytes_healed: u64,
items_healed: u64,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct SetStatus {
pub id: String,
pub pool_index: i32,
pub set_index: i32,
pub heal_status: String,
pub heal_priority: String,
pub total_objects: usize,
pub disks: Vec<rustfs_madmin::Disk>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BgHealState {
offline_endpoints: Vec<String>,
scanned_items_count: u64,
heal_disks: Vec<String>,
sets: Vec<SetStatus>,
mrf: HashMap<String, MRFStatus>,
scparity: HashMap<String, usize>,
}
pub async fn get_local_background_heal_status() -> (BgHealState, bool) {
let (bg_seq, ok) = GLOBAL_BackgroundHealState.get_heal_sequence_by_token(BG_HEALING_UUID).await;
if !ok {
return (BgHealState::default(), false);
}
let bg_seq = bg_seq.unwrap();
let mut status = BgHealState {
scanned_items_count: bg_seq.get_scanned_items_count().await as u64,
..Default::default()
};
let mut heal_disks_map = HashSet::new();
for ep in get_local_disks_to_heal().await.iter() {
heal_disks_map.insert(ep.to_string());
}
let Some(store) = new_object_layer_fn() else {
let healing = GLOBAL_BackgroundHealState.get_local_healing_disks().await;
for disk in healing.values() {
status.heal_disks.push(disk.endpoint.clone());
}
return (status, true);
};
let si = store.local_storage_info().await;
let mut indexed = HashMap::new();
for disk in si.disks.iter() {
let set_idx = format!("{}-{}", disk.pool_index, disk.set_index);
// indexed.insert(set_idx, disk);
indexed.entry(set_idx).or_insert(Vec::new()).push(disk);
}
for (id, disks) in indexed {
let mut ss = SetStatus {
id,
set_index: disks[0].set_index,
pool_index: disks[0].pool_index,
..Default::default()
};
for disk in disks {
ss.disks.push(disk.clone());
if disk.healing {
ss.heal_status = "healing".to_string();
ss.heal_priority = "high".to_string();
status.heal_disks.push(disk.endpoint.clone());
}
}
ss.disks.sort_by(|a, b| {
if a.pool_index != b.pool_index {
return a.pool_index.cmp(&b.pool_index);
}
if a.set_index != b.set_index {
return a.set_index.cmp(&b.set_index);
}
a.disk_index.cmp(&b.disk_index)
});
status.sets.push(ss);
}
status.sets.sort_by(|a, b| a.id.cmp(&b.id));
let backend_info = store.backend_info().await;
status
.scparity
.insert(STANDARD.to_string(), backend_info.standard_sc_parity.unwrap_or_default());
status
.scparity
.insert(RRS.to_string(), backend_info.rr_sc_parity.unwrap_or_default());
(status, true)
}

View File

@@ -1,842 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::{
background_heal_ops::HealTask,
data_scanner::HEAL_DELETE_DANGLING,
error::ERR_SKIP_FILE,
heal_commands::{HEAL_ITEM_BUCKET_METADATA, HealOpts, HealScanMode, HealStopSuccess, HealingTracker},
};
use crate::error::{Error, Result};
use crate::heal::heal_commands::{HEAL_ITEM_BUCKET, HEAL_ITEM_OBJECT};
use crate::store_api::StorageAPI;
use crate::{
config::com::CONFIG_PREFIX,
disk::RUSTFS_META_BUCKET,
global::GLOBAL_BackgroundHealRoutine,
heal::{error::ERR_HEAL_STOP_SIGNALLED, heal_commands::DRIVE_STATE_OK},
};
use crate::{
disk::endpoint::Endpoint,
endpoints::Endpoints,
global::GLOBAL_IsDistErasure,
heal::heal_commands::{HEAL_UNKNOWN_SCAN, HealStartSuccess},
new_object_layer_fn,
};
use chrono::Utc;
use futures::join;
use lazy_static::lazy_static;
use rustfs_filemeta::MetaCacheEntry;
use rustfs_madmin::heal_commands::{HealDriveInfo, HealItemType, HealResultItem};
use rustfs_utils::path::has_prefix;
use rustfs_utils::path::path_join;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
future::Future,
path::PathBuf,
pin::Pin,
sync::Arc,
time::{Duration, SystemTime, UNIX_EPOCH},
};
use tokio::{
select, spawn,
sync::{
RwLock, broadcast,
mpsc::{self, Receiver as M_Receiver, Sender as M_Sender},
watch::{self, Receiver as W_Receiver, Sender as W_Sender},
},
time::{interval, sleep},
};
use tracing::{error, info};
use uuid::Uuid;
type HealStatusSummary = String;
type ItemsMap = HashMap<HealItemType, usize>;
pub type HealEntryFn =
Arc<dyn Fn(String, MetaCacheEntry, HealScanMode) -> Pin<Box<dyn Future<Output = Result<()>> + Send>> + Send + Sync + 'static>;
pub const BG_HEALING_UUID: &str = "0000-0000-0000-0000";
pub const HEALING_TRACKER_FILENAME: &str = ".healing.bin";
const KEEP_HEAL_SEQ_STATE_DURATION: Duration = Duration::from_secs(10 * 60);
const HEAL_NOT_STARTED_STATUS: &str = "not started";
const HEAL_RUNNING_STATUS: &str = "running";
const HEAL_STOPPED_STATUS: &str = "stopped";
const HEAL_FINISHED_STATUS: &str = "finished";
pub const RUSTFS_RESERVED_BUCKET: &str = "rustfs";
pub const RUSTFS_RESERVED_BUCKET_PATH: &str = "/rustfs";
pub const LOGIN_PATH_PREFIX: &str = "/login";
const MAX_UNCONSUMED_HEAL_RESULT_ITEMS: usize = 1000;
const HEAL_UNCONSUMED_TIMEOUT: Duration = Duration::from_secs(24 * 60 * 60);
pub const NOP_HEAL: &str = "";
lazy_static! {}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct HealSequenceStatus {
pub summary: HealStatusSummary,
pub failure_detail: String,
pub start_time: u64,
pub heal_setting: HealOpts,
pub items: Vec<HealResultItem>,
}
#[derive(Debug, Default)]
pub struct HealSource {
pub bucket: String,
pub object: String,
pub version_id: String,
pub no_wait: bool,
pub opts: Option<HealOpts>,
}
#[derive(Debug)]
pub struct HealSequence {
pub bucket: String,
pub object: String,
pub report_progress: bool,
pub start_time: SystemTime,
pub end_time: Arc<RwLock<SystemTime>>,
pub client_token: String,
pub client_address: String,
pub force_started: bool,
pub setting: HealOpts,
pub current_status: Arc<RwLock<HealSequenceStatus>>,
pub last_sent_result_index: RwLock<usize>,
pub scanned_items_map: RwLock<ItemsMap>,
pub healed_items_map: RwLock<ItemsMap>,
pub heal_failed_items_map: RwLock<ItemsMap>,
pub last_heal_activity: RwLock<SystemTime>,
traverse_and_heal_done_tx: Arc<RwLock<M_Sender<Option<Error>>>>,
traverse_and_heal_done_rx: Arc<RwLock<M_Receiver<Option<Error>>>>,
tx: W_Sender<bool>,
rx: W_Receiver<bool>,
}
pub fn new_bg_heal_sequence() -> HealSequence {
let hs = HealOpts {
remove: HEAL_DELETE_DANGLING,
..Default::default()
};
HealSequence {
start_time: SystemTime::now(),
client_token: BG_HEALING_UUID.to_string(),
bucket: RUSTFS_RESERVED_BUCKET.to_string(),
setting: hs,
current_status: Arc::new(RwLock::new(HealSequenceStatus {
summary: HEAL_NOT_STARTED_STATUS.to_string(),
heal_setting: hs,
..Default::default()
})),
report_progress: false,
scanned_items_map: HashMap::new().into(),
healed_items_map: HashMap::new().into(),
heal_failed_items_map: HashMap::new().into(),
..Default::default()
}
}
pub fn new_heal_sequence(bucket: &str, obj_prefix: &str, client_addr: &str, hs: HealOpts, force_start: bool) -> HealSequence {
let client_token = Uuid::new_v4().to_string();
let (tx, rx) = mpsc::channel(10);
HealSequence {
bucket: bucket.to_string(),
object: obj_prefix.to_string(),
report_progress: true,
start_time: SystemTime::now(),
client_token,
client_address: client_addr.to_string(),
force_started: force_start,
setting: hs,
current_status: Arc::new(RwLock::new(HealSequenceStatus {
summary: HEAL_NOT_STARTED_STATUS.to_string(),
heal_setting: hs,
..Default::default()
})),
traverse_and_heal_done_tx: Arc::new(RwLock::new(tx)),
traverse_and_heal_done_rx: Arc::new(RwLock::new(rx)),
scanned_items_map: HashMap::new().into(),
healed_items_map: HashMap::new().into(),
heal_failed_items_map: HashMap::new().into(),
..Default::default()
}
}
impl Default for HealSequence {
fn default() -> Self {
let (h_tx, h_rx) = mpsc::channel(1);
let (tx, rx) = watch::channel(false);
Self {
bucket: Default::default(),
object: Default::default(),
report_progress: Default::default(),
start_time: SystemTime::now(),
end_time: Arc::new(RwLock::new(SystemTime::now())),
client_token: Default::default(),
client_address: Default::default(),
force_started: Default::default(),
setting: Default::default(),
current_status: Default::default(),
last_sent_result_index: Default::default(),
scanned_items_map: Default::default(),
healed_items_map: Default::default(),
heal_failed_items_map: Default::default(),
last_heal_activity: RwLock::new(SystemTime::now()),
traverse_and_heal_done_tx: Arc::new(RwLock::new(h_tx)),
traverse_and_heal_done_rx: Arc::new(RwLock::new(h_rx)),
tx,
rx,
}
}
}
impl HealSequence {
pub fn new(bucket: &str, obj_prefix: &str, client_addr: &str, hs: HealOpts, force_start: bool) -> Self {
let client_token = Uuid::new_v4().to_string();
Self {
bucket: bucket.to_string(),
object: obj_prefix.to_string(),
report_progress: true,
client_token,
client_address: client_addr.to_string(),
force_started: force_start,
setting: hs,
current_status: Arc::new(RwLock::new(HealSequenceStatus {
summary: HEAL_NOT_STARTED_STATUS.to_string(),
heal_setting: hs,
..Default::default()
})),
..Default::default()
}
}
}
impl HealSequence {
pub async fn get_scanned_items_count(&self) -> usize {
self.scanned_items_map.read().await.values().sum()
}
async fn _get_scanned_items_map(&self) -> ItemsMap {
self.scanned_items_map.read().await.clone()
}
async fn _get_healed_items_map(&self) -> ItemsMap {
self.healed_items_map.read().await.clone()
}
async fn _get_heal_failed_items_map(&self) -> ItemsMap {
self.heal_failed_items_map.read().await.clone()
}
pub async fn count_failed(&self, heal_type: HealItemType) {
*self.heal_failed_items_map.write().await.entry(heal_type).or_insert(0) += 1;
*self.last_heal_activity.write().await = SystemTime::now();
}
pub async fn count_scanned(&self, heal_type: HealItemType) {
*self.scanned_items_map.write().await.entry(heal_type).or_insert(0) += 1;
*self.last_heal_activity.write().await = SystemTime::now();
}
pub async fn count_healed(&self, heal_type: HealItemType) {
*self.healed_items_map.write().await.entry(heal_type).or_insert(0) += 1;
*self.last_heal_activity.write().await = SystemTime::now();
}
async fn is_quitting(&self) -> bool {
if let Ok(true) = self.rx.has_changed() {
info!("quited");
return true;
}
false
}
async fn has_ended(&self) -> bool {
if self.client_token == *BG_HEALING_UUID {
return false;
}
*(self.end_time.read().await) != self.start_time
}
async fn stop(&self) {
let _ = self.tx.send(true);
}
async fn push_heal_result_item(&self, r: &HealResultItem) -> Result<()> {
let mut r = r.clone();
let mut interval_timer = interval(HEAL_UNCONSUMED_TIMEOUT);
#[allow(unused_assignments)]
let mut items_len = 0;
loop {
{
let current_status_r = self.current_status.read().await;
items_len = current_status_r.items.len();
}
if items_len == MAX_UNCONSUMED_HEAL_RESULT_ITEMS {
select! {
_ = sleep(Duration::from_secs(1)) => {
}
_ = self.is_done() => {
return Err(Error::other("stopped"));
}
_ = interval_timer.tick() => {
return Err(Error::other("timeout"));
}
}
} else {
break;
}
}
let mut current_status_w = self.current_status.write().await;
if items_len > 0 {
r.result_index = 1 + current_status_w.items[items_len - 1].result_index;
} else {
r.result_index = 1 + *self.last_sent_result_index.read().await;
}
current_status_w.items.push(r);
Ok(())
}
pub async fn queue_heal_task(&self, source: HealSource, heal_type: HealItemType) -> Result<()> {
let mut task = HealTask::new(&source.bucket, &source.object, &source.version_id, &self.setting);
info!("queue_heal_task, {:?}", task);
if let Some(opts) = source.opts {
task.opts = opts;
} else {
task.opts.scan_mode = HEAL_UNKNOWN_SCAN;
}
self.count_scanned(heal_type.clone()).await;
if source.no_wait {
let task_str = format!("{task:?}");
if GLOBAL_BackgroundHealRoutine.tasks_tx.try_send(task).is_ok() {
info!("Task in the queue: {:?}", task_str);
}
return Ok(());
}
let (resp_tx, mut resp_rx) = mpsc::channel(1);
task.resp_tx = Some(resp_tx);
let task_str = format!("{task:?}");
if GLOBAL_BackgroundHealRoutine.tasks_tx.try_send(task).is_ok() {
info!("Task in the queue: {:?}", task_str);
} else {
error!("push task to queue failed");
}
let count_ok_drives = |drivers: &[HealDriveInfo]| {
let mut count = 0;
for drive in drivers.iter() {
if drive.state == DRIVE_STATE_OK {
count += 1;
}
}
count
};
match resp_rx.recv().await {
Some(mut res) => {
if res.err.is_none() {
self.count_healed(heal_type.clone()).await;
} else {
self.count_failed(heal_type.clone()).await;
}
if !self.report_progress {
return if let Some(err) = res.err {
if err.to_string() == ERR_SKIP_FILE {
return Ok(());
}
Err(err)
} else {
Ok(())
};
}
res.result.heal_item_type = heal_type.clone();
if let Some(err) = res.err.as_ref() {
res.result.detail = err.to_string();
}
if res.result.parity_blocks > 0 && res.result.data_blocks > 0 && res.result.data_blocks > res.result.parity_blocks
{
let got = count_ok_drives(&res.result.after.drives);
if got < res.result.parity_blocks {
res.result.detail = format!(
"quorum loss - expected {} minimum, got drive states in OK {}",
res.result.parity_blocks, got
);
}
}
info!("queue_heal_task, HealResult: {:?}", res);
self.push_heal_result_item(&res.result).await
}
None => Ok(()),
}
}
async fn heal_disk_meta(h: Arc<HealSequence>) -> Result<()> {
HealSequence::heal_rustfs_sys_meta(h, CONFIG_PREFIX).await
}
async fn heal_items(h: Arc<HealSequence>, buckets_only: bool) -> Result<()> {
if h.client_token == *BG_HEALING_UUID {
return Ok(());
}
let bucket = h.bucket.clone();
let task1 = Self::heal_disk_meta(h.clone());
let task2 = Self::heal_bucket(h.clone(), &bucket, buckets_only);
let results = join!(task1, task2);
results.0?;
results.1?;
Ok(())
}
async fn traverse_and_heal(h: Arc<HealSequence>) {
let buckets_only = false;
let result = Self::heal_items(h.clone(), buckets_only).await.err();
let _ = h.traverse_and_heal_done_tx.read().await.send(result).await;
}
async fn heal_rustfs_sys_meta(h: Arc<HealSequence>, meta_prefix: &str) -> Result<()> {
info!("heal_rustfs_sys_meta, h: {:?}", h);
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let setting = h.setting;
store
.heal_objects(RUSTFS_META_BUCKET, meta_prefix, &setting, h.clone(), true)
.await
}
async fn is_done(&self) -> bool {
if let Ok(true) = self.rx.has_changed() {
return true;
}
false
}
pub async fn heal_bucket(hs: Arc<HealSequence>, bucket: &str, bucket_only: bool) -> Result<()> {
info!("heal_bucket, hs: {:?}", hs);
let (object, setting) = {
hs.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
..Default::default()
},
HEAL_ITEM_BUCKET.to_string(),
)
.await?;
if bucket_only {
return Ok(());
}
if !hs.setting.recursive {
if !hs.object.is_empty() {
HealSequence::heal_object(hs.clone(), bucket, &hs.object, "", hs.setting.scan_mode).await?;
}
return Ok(());
}
(hs.object.clone(), hs.setting)
};
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
store.heal_objects(bucket, &object, &setting, hs.clone(), false).await
}
pub async fn heal_object(
hs: Arc<HealSequence>,
bucket: &str,
object: &str,
version_id: &str,
_scan_mode: HealScanMode,
) -> Result<()> {
info!("heal_object");
if hs.is_quitting().await {
info!("heal_object hs is quitting");
return Err(Error::other(ERR_HEAL_STOP_SIGNALLED));
}
info!("will queue task");
hs.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
object: object.to_string(),
version_id: version_id.to_string(),
opts: Some(hs.setting),
..Default::default()
},
HEAL_ITEM_OBJECT.to_string(),
)
.await?;
Ok(())
}
pub async fn heal_meta_object(
hs: Arc<HealSequence>,
bucket: &str,
object: &str,
version_id: &str,
_scan_mode: HealScanMode,
) -> Result<()> {
if hs.is_quitting().await {
return Err(Error::other(ERR_HEAL_STOP_SIGNALLED));
}
hs.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
object: object.to_string(),
version_id: version_id.to_string(),
..Default::default()
},
HEAL_ITEM_BUCKET_METADATA.to_string(),
)
.await?;
Ok(())
}
}
pub async fn heal_sequence_start(h: Arc<HealSequence>) {
{
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_RUNNING_STATUS.to_string();
current_status_w.start_time = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
}
let h_clone = h.clone();
spawn(async move {
HealSequence::traverse_and_heal(h_clone).await;
});
let h_clone_1 = h.clone();
let mut x = h.traverse_and_heal_done_rx.write().await;
select! {
_ = h.is_done() => {
*(h.end_time.write().await) = SystemTime::now();
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_FINISHED_STATUS.to_string();
spawn(async move {
let mut rx_w = h_clone_1.traverse_and_heal_done_rx.write().await;
rx_w.recv().await;
});
}
result = x.recv() => {
if let Some(err) = result {
match err {
Some(err) => {
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_STOPPED_STATUS.to_string();
current_status_w.failure_detail = err.to_string();
},
None => {
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_FINISHED_STATUS.to_string();
}
}
}
}
}
}
#[derive(Debug, Default)]
pub struct AllHealState {
mu: RwLock<bool>,
heal_seq_map: RwLock<HashMap<String, Arc<HealSequence>>>,
heal_local_disks: RwLock<HashMap<Endpoint, bool>>,
heal_status: RwLock<HashMap<String, HealingTracker>>,
}
impl AllHealState {
pub fn new(cleanup: bool) -> Arc<Self> {
let state = Arc::new(AllHealState::default());
let (_, mut rx) = broadcast::channel(1);
if cleanup {
let state_clone = state.clone();
spawn(async move {
loop {
select! {
result = rx.recv() =>{
if let Ok(true) = result {
return;
}
}
_ = sleep(Duration::from_secs(5 * 60)) => {
state_clone.periodic_heal_seqs_clean().await;
}
}
}
});
}
state
}
pub async fn pop_heal_local_disks(&self, heal_local_disks: &[Endpoint]) {
let _ = self.mu.write().await;
self.heal_local_disks.write().await.retain(|k, _| {
if heal_local_disks.contains(k) {
return false;
}
true
});
let heal_local_disks = heal_local_disks.iter().map(|s| s.to_string()).collect::<Vec<_>>();
self.heal_status.write().await.retain(|_, v| {
if heal_local_disks.contains(&v.endpoint) {
return false;
}
true
});
}
pub async fn pop_heal_status_json(&self, heal_path: &str, client_token: &str) -> Result<Vec<u8>> {
match self.get_heal_sequence(heal_path).await {
Some(h) => {
if client_token != h.client_token {
info!("err heal invalid client token");
return Err(Error::other("err heal invalid client token"));
}
let num_items = h.current_status.read().await.items.len();
let mut last_result_index = *h.last_sent_result_index.read().await;
if num_items > 0 {
if let Some(item) = h.current_status.read().await.items.last() {
last_result_index = item.result_index;
}
}
*h.last_sent_result_index.write().await = last_result_index;
let data = h.current_status.read().await.clone();
match serde_json::to_vec(&data) {
Ok(b) => {
h.current_status.write().await.items.clear();
Ok(b)
}
Err(e) => {
h.current_status.write().await.items.clear();
info!("json encode err, e: {}", e);
Err(Error::other(e.to_string()))
}
}
}
None => serde_json::to_vec(&HealSequenceStatus {
summary: HEAL_FINISHED_STATUS.to_string(),
..Default::default()
})
.map_err(|e| {
info!("json encode err, e: {}", e);
Error::other(e.to_string())
}),
}
}
pub async fn update_heal_status(&self, tracker: &HealingTracker) {
let _ = self.mu.write().await;
let _ = tracker.mu.read().await;
self.heal_status.write().await.insert(tracker.id.clone(), tracker.clone());
}
pub async fn get_local_healing_disks(&self) -> HashMap<String, rustfs_madmin::HealingDisk> {
let _ = self.mu.read().await;
let mut dst = HashMap::new();
for v in self.heal_status.read().await.values() {
dst.insert(v.endpoint.clone(), v.to_healing_disk().await);
}
dst
}
pub async fn get_heal_local_disk_endpoints(&self) -> Endpoints {
let _ = self.mu.read().await;
let mut endpoints = Vec::new();
self.heal_local_disks.read().await.iter().for_each(|(k, v)| {
if !v {
endpoints.push(k.clone());
}
});
Endpoints::from(endpoints)
}
pub async fn set_disk_healing_status(&self, ep: Endpoint, healing: bool) {
let _ = self.mu.write().await;
self.heal_local_disks.write().await.insert(ep, healing);
}
pub async fn push_heal_local_disks(&self, heal_local_disks: &[Endpoint]) {
let _ = self.mu.write().await;
for heal_local_disk in heal_local_disks.iter() {
self.heal_local_disks.write().await.insert(heal_local_disk.clone(), false);
}
}
pub async fn periodic_heal_seqs_clean(&self) {
let _ = self.mu.write().await;
let now = SystemTime::now();
let mut keys_to_remove = Vec::new();
for (k, v) in self.heal_seq_map.read().await.iter() {
if v.has_ended().await && now.duration_since(*(v.end_time.read().await)).unwrap() > KEEP_HEAL_SEQ_STATE_DURATION {
keys_to_remove.push(k.clone())
}
}
for key in keys_to_remove.iter() {
self.heal_seq_map.write().await.remove(key);
}
}
pub async fn get_heal_sequence_by_token(&self, token: &str) -> (Option<Arc<HealSequence>>, bool) {
let _ = self.mu.read().await;
for v in self.heal_seq_map.read().await.values() {
if v.client_token == token {
return (Some(v.clone()), true);
}
}
(None, false)
}
pub async fn get_heal_sequence(&self, path: &str) -> Option<Arc<HealSequence>> {
let _ = self.mu.read().await;
self.heal_seq_map.read().await.get(path).cloned()
}
pub async fn stop_heal_sequence(&self, path: &str) -> Result<Vec<u8>> {
let mut hsp = HealStopSuccess::default();
if let Some(he) = self.get_heal_sequence(path).await {
let client_token = he.client_token.clone();
if *GLOBAL_IsDistErasure.read().await {
// TODO: proxy
}
hsp.client_token = client_token;
hsp.client_address = he.client_address.clone();
hsp.start_time = Utc::now();
he.stop().await;
loop {
if he.has_ended().await {
break;
}
sleep(Duration::from_secs(1)).await;
}
let _ = self.mu.write().await;
self.heal_seq_map.write().await.remove(path);
} else {
hsp.client_token = "unknown".to_string();
}
let b = serde_json::to_string(&hsp)?;
Ok(b.as_bytes().to_vec())
}
// LaunchNewHealSequence - launches a background routine that performs
// healing according to the healSequence argument. For each heal
// sequence, state is stored in the `globalAllHealState`, which is a
// map of the heal path to `healSequence` which holds state about the
// heal sequence.
//
// Heal results are persisted in server memory for
// `keepHealSeqStateDuration`. This function also launches a
// background routine to clean up heal results after the
// aforementioned duration.
pub async fn launch_new_heal_sequence(&self, heal_sequence: Arc<HealSequence>) -> Result<Vec<u8>> {
let path = path_join(&[
PathBuf::from(heal_sequence.bucket.clone()),
PathBuf::from(heal_sequence.object.clone()),
]);
let path_s = path.to_str().unwrap();
if heal_sequence.force_started {
self.stop_heal_sequence(path_s).await?;
} else if let Some(hs) = self.get_heal_sequence(path_s).await {
if !hs.has_ended().await {
return Err(Error::other(format!(
"Heal is already running on the given path (use force-start option to stop and start afresh). The heal was started by IP {} at {:?}, token is {}",
heal_sequence.client_address, heal_sequence.start_time, heal_sequence.client_token
)));
}
}
let _ = self.mu.write().await;
for (k, v) in self.heal_seq_map.read().await.iter() {
if (has_prefix(k, path_s) || has_prefix(path_s, k)) && !v.has_ended().await {
return Err(Error::other(format!(
"The provided heal sequence path overlaps with an existing heal path: {k}"
)));
}
}
self.heal_seq_map
.write()
.await
.insert(path_s.to_string(), heal_sequence.clone());
let client_token = heal_sequence.client_token.clone();
if *GLOBAL_IsDistErasure.read().await {
// TODO: proxy
}
if heal_sequence.client_token == BG_HEALING_UUID {
// For background heal do nothing, do not spawn an unnecessary goroutine.
} else {
let heal_sequence_clone = heal_sequence.clone();
spawn(async {
heal_sequence_start(heal_sequence_clone).await;
});
}
let b = serde_json::to_vec(&HealStartSuccess {
client_token,
client_address: heal_sequence.client_address.clone(),
// start_time: Utc::now(),
start_time: heal_sequence.start_time.into(),
})?;
Ok(b)
}
}

View File

@@ -1,183 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::heal::background_heal_ops::{heal_bucket, heal_object};
use crate::heal::heal_commands::{HEAL_DEEP_SCAN, HEAL_NORMAL_SCAN};
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use regex::Regex;
use rustfs_utils::path::SLASH_SEPARATOR;
use std::ops::Sub;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Duration;
use tokio::sync::RwLock;
use tokio::sync::mpsc::{Receiver, Sender};
use tokio::time::sleep;
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
use uuid::Uuid;
pub const MRF_OPS_QUEUE_SIZE: u64 = 100000;
pub const HEAL_DIR: &str = ".heal";
pub const HEAL_MRFMETA_FORMAT: u64 = 1;
pub const HEAL_MRFMETA_VERSION_V1: u64 = 1;
lazy_static! {
pub static ref HEAL_MRF_DIR: String =
format!("{}{}{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, HEAL_DIR, SLASH_SEPARATOR, "mrf");
static ref PATTERNS: Vec<Regex> = vec![
Regex::new(r"^buckets/.*/.metacache/.*").unwrap(),
Regex::new(r"^tmp/.*").unwrap(),
Regex::new(r"^multipart/.*").unwrap(),
Regex::new(r"^tmp-old/.*").unwrap(),
];
}
#[derive(Default)]
pub struct PartialOperation {
pub bucket: String,
pub object: String,
pub version_id: Option<String>,
pub versions: Vec<u8>,
pub set_index: usize,
pub pool_index: usize,
pub queued: DateTime<Utc>,
pub bitrot_scan: bool,
}
pub struct MRFState {
tx: Sender<PartialOperation>,
rx: RwLock<Receiver<PartialOperation>>,
closed: AtomicBool,
closing: AtomicBool,
}
impl Default for MRFState {
fn default() -> Self {
Self::new()
}
}
impl MRFState {
pub fn new() -> MRFState {
let (tx, rx) = tokio::sync::mpsc::channel(MRF_OPS_QUEUE_SIZE as usize);
MRFState {
tx,
rx: RwLock::new(rx),
closed: Default::default(),
closing: Default::default(),
}
}
pub async fn add_partial(&self, op: PartialOperation) {
if self.closed.load(Ordering::SeqCst) || self.closing.load(Ordering::SeqCst) {
return;
}
let _ = self.tx.send(op).await;
}
/// Enhanced heal routine with cancellation support
///
/// This method implements the same healing logic as the original heal_routine,
/// but adds proper cancellation support via CancellationToken.
/// The core logic remains identical to maintain compatibility.
pub async fn heal_routine_with_cancel(&self, cancel_token: CancellationToken) {
info!("MRF heal routine started with cancellation support");
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("MRF heal routine received shutdown signal, exiting gracefully");
break;
}
op_result = async {
let mut rx_guard = self.rx.write().await;
rx_guard.recv().await
} => {
if let Some(op) = op_result {
// Special path filtering (original logic)
if op.bucket == RUSTFS_META_BUCKET {
for pattern in &*PATTERNS {
if pattern.is_match(&op.object) {
continue; // Skip this operation, continue with next
}
}
}
// Network reconnection delay (original logic)
let now = Utc::now();
if now.sub(op.queued).num_seconds() < 1 {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("MRF heal routine cancelled during reconnection delay");
break;
}
_ = sleep(Duration::from_secs(1)) => {}
}
}
// Core healing logic (original logic preserved)
let scan_mode = if op.bitrot_scan { HEAL_DEEP_SCAN } else { HEAL_NORMAL_SCAN };
if op.object.is_empty() {
// Heal bucket (original logic)
if let Err(err) = heal_bucket(&op.bucket).await {
error!("heal bucket failed, bucket: {}, err: {:?}", op.bucket, err);
}
} else if op.versions.is_empty() {
// Heal single object (original logic)
if let Err(err) = heal_object(
&op.bucket,
&op.object,
&op.version_id.clone().unwrap_or_default(),
scan_mode
).await {
error!("heal object failed, bucket: {}, object: {}, err: {:?}", op.bucket, op.object, err);
}
} else {
// Heal multiple versions (original logic)
let vers = op.versions.len() / 16;
if vers > 0 {
for i in 0..vers {
// Check for cancellation before each version
if cancel_token.is_cancelled() {
info!("MRF heal routine cancelled during version processing");
return;
}
let start = i * 16;
let end = start + 16;
if let Err(err) = heal_object(
&op.bucket,
&op.object,
&Uuid::from_slice(&op.versions[start..end]).expect("").to_string(),
scan_mode,
).await {
error!("heal object failed, bucket: {}, object: {}, err: {:?}", op.bucket, op.object, err);
}
}
}
}
} else {
info!("MRF heal routine channel closed, exiting");
break;
}
}
}
}
info!("MRF heal routine stopped gracefully");
}
}

View File

@@ -23,13 +23,14 @@ mod chunk_stream;
pub mod cmd;
pub mod compress;
pub mod config;
pub mod data_usage;
pub mod disk;
pub mod disks_layout;
pub mod endpoints;
pub mod erasure_coding;
pub mod error;
pub mod global;
pub mod heal;
pub mod lock_utils;
pub mod metrics_realtime;
pub mod notification_sys;
pub mod pools;

View File

@@ -0,0 +1,136 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::disk::endpoint::Endpoint;
use crate::error::Result;
use rustfs_lock::client::{LockClient, local::LocalClient, remote::RemoteClient};
use std::collections::HashMap;
use std::sync::Arc;
/// Create unique lock clients from endpoints
/// This function creates one client per unique host:port combination
/// to avoid duplicate connections to the same server
pub async fn create_unique_clients(endpoints: &[Endpoint]) -> Result<Vec<Arc<dyn LockClient>>> {
let mut unique_endpoints: HashMap<String, &Endpoint> = HashMap::new();
// Collect unique endpoints based on host:port
for endpoint in endpoints {
if endpoint.is_local {
// For local endpoints, use "local" as the key
unique_endpoints.insert("local".to_string(), endpoint);
} else {
// For remote endpoints, use host:port as the key
let host_port = format!(
"{}:{}",
endpoint.url.host_str().unwrap_or("localhost"),
endpoint.url.port().unwrap_or(9000)
);
unique_endpoints.insert(host_port, endpoint);
}
}
let mut clients = Vec::new();
// Create clients for unique endpoints
for (_key, endpoint) in unique_endpoints {
if endpoint.is_local {
// For local endpoints, create a local lock client
let local_client = LocalClient::new();
clients.push(Arc::new(local_client) as Arc<dyn LockClient>);
} else {
// For remote endpoints, create a remote lock client
let remote_client = RemoteClient::new(endpoint.url.to_string());
clients.push(Arc::new(remote_client) as Arc<dyn LockClient>);
}
}
Ok(clients)
}
#[cfg(test)]
mod tests {
use super::*;
use url::Url;
#[tokio::test]
async fn test_create_unique_clients_local() {
let endpoints = vec![
Endpoint {
url: Url::parse("http://localhost:9000").unwrap(),
is_local: true,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
},
Endpoint {
url: Url::parse("http://localhost:9000").unwrap(),
is_local: true,
pool_idx: 0,
set_idx: 0,
disk_idx: 1,
},
];
let clients = create_unique_clients(&endpoints).await.unwrap();
// Should only create one client for local endpoints
assert_eq!(clients.len(), 1);
assert!(clients[0].is_local().await);
}
#[tokio::test]
async fn test_create_unique_clients_mixed() {
let endpoints = vec![
Endpoint {
url: Url::parse("http://localhost:9000").unwrap(),
is_local: true,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
},
Endpoint {
url: Url::parse("http://remote1:9000").unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 1,
},
Endpoint {
url: Url::parse("http://remote1:9000").unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 2,
},
Endpoint {
url: Url::parse("http://remote2:9000").unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 3,
},
];
let clients = create_unique_clients(&endpoints).await.unwrap();
// Should create 3 clients: 1 local + 2 unique remote
assert_eq!(clients.len(), 3);
// Check that we have one local client
let local_count = clients.iter().filter(|c| futures::executor::block_on(c.is_local())).count();
assert_eq!(local_count, 1);
// Check that we have two remote clients
let remote_count = clients.iter().filter(|c| !futures::executor::block_on(c.is_local())).count();
assert_eq!(remote_count, 2);
}
}

View File

@@ -15,7 +15,11 @@
use std::collections::{HashMap, HashSet};
use chrono::Utc;
use rustfs_common::globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Addr};
use rustfs_common::{
globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Addr},
heal_channel::DriveState,
metrics::globalMetrics,
};
use rustfs_madmin::metrics::{DiskIOStats, DiskMetric, RealtimeMetrics};
use rustfs_utils::os::get_drive_stats;
use serde::{Deserialize, Serialize};
@@ -23,10 +27,6 @@ use tracing::info;
use crate::{
admin_server_info::get_local_server_property,
heal::{
data_scanner_metric::globalScannerMetrics,
heal_commands::{DRIVE_STATE_OK, DRIVE_STATE_UNFORMATTED},
},
new_object_layer_fn,
store_api::StorageAPI,
// utils::os::get_drive_stats,
@@ -108,7 +108,7 @@ pub async fn collect_local_metrics(types: MetricType, opts: &CollectMetricsOpts)
if types.contains(&MetricType::SCANNER) {
info!("start get scanner metrics");
let metrics = globalScannerMetrics.report().await;
let metrics = globalMetrics.report().await;
real_time_metrics.aggregated.scanner = Some(metrics);
}
@@ -147,7 +147,7 @@ async fn collect_local_disks_metrics(disks: &HashSet<String>) -> HashMap<String,
continue;
}
if d.state != *DRIVE_STATE_OK && d.state != *DRIVE_STATE_UNFORMATTED {
if d.state != DriveState::Ok.to_string() && d.state != DriveState::Unformatted.to_string() {
metrics.insert(
d.endpoint.clone(),
DiskMetric {

View File

@@ -15,6 +15,7 @@
use crate::bucket::versioning_sys::BucketVersioningSys;
use crate::cache_value::metacache_set::{ListPathRawOptions, list_path_raw};
use crate::config::com::{CONFIG_PREFIX, read_config, save_config};
use crate::data_usage::DATA_USAGE_CACHE_NAME;
use crate::disk::error::DiskError;
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::error::{Error, Result};
@@ -22,8 +23,6 @@ use crate::error::{
StorageError, is_err_bucket_exists, is_err_bucket_not_found, is_err_data_movement_overwrite, is_err_object_not_found,
is_err_version_not_found,
};
use crate::heal::data_usage::DATA_USAGE_CACHE_NAME;
use crate::heal::heal_commands::HealOpts;
use crate::new_object_layer_fn;
use crate::notification_sys::get_global_notification_sys;
use crate::set_disk::SetDisks;
@@ -36,6 +35,7 @@ use futures::future::BoxFuture;
use http::HeaderMap;
use rmp_serde::{Deserializer, Serializer};
use rustfs_common::defer;
use rustfs_common::heal_channel::HealOpts;
use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams};
use rustfs_rio::{HashReader, WarpReader};
use rustfs_utils::path::{SLASH_SEPARATOR, encode_dir_object, path_join};

View File

@@ -22,4 +22,4 @@ pub use http_auth::{build_auth_headers, verify_rpc_signature};
pub use peer_rest_client::PeerRestClient;
pub use peer_s3_client::{LocalPeerS3Client, PeerS3Client, RemotePeerS3Client, S3PeerSys};
pub use remote_disk::RemoteDisk;
pub use tonic_service::make_server;
pub use tonic_service::{NodeService, make_server};

View File

@@ -16,7 +16,6 @@ use crate::error::{Error, Result};
use crate::{
endpoints::EndpointServerPools,
global::is_dist_erasure,
heal::heal_commands::BgHealState,
metrics_realtime::{CollectMetricsOpts, MetricType},
};
use rmp_serde::{Deserializer, Serializer};
@@ -29,13 +28,12 @@ use rustfs_madmin::{
use rustfs_protos::{
node_service_time_out_client,
proto_gen::node_service::{
BackgroundHealStatusRequest, DeleteBucketMetadataRequest, DeletePolicyRequest, DeleteServiceAccountRequest,
DeleteUserRequest, GetCpusRequest, GetMemInfoRequest, GetMetricsRequest, GetNetInfoRequest, GetOsInfoRequest,
GetPartitionsRequest, GetProcInfoRequest, GetSeLinuxInfoRequest, GetSysConfigRequest, GetSysErrorsRequest,
LoadBucketMetadataRequest, LoadGroupRequest, LoadPolicyMappingRequest, LoadPolicyRequest, LoadRebalanceMetaRequest,
LoadServiceAccountRequest, LoadTransitionTierConfigRequest, LoadUserRequest, LocalStorageInfoRequest, Mss,
ReloadPoolMetaRequest, ReloadSiteReplicationConfigRequest, ServerInfoRequest, SignalServiceRequest,
StartProfilingRequest, StopRebalanceRequest,
DeleteBucketMetadataRequest, DeletePolicyRequest, DeleteServiceAccountRequest, DeleteUserRequest, GetCpusRequest,
GetMemInfoRequest, GetMetricsRequest, GetNetInfoRequest, GetOsInfoRequest, GetPartitionsRequest, GetProcInfoRequest,
GetSeLinuxInfoRequest, GetSysConfigRequest, GetSysErrorsRequest, LoadBucketMetadataRequest, LoadGroupRequest,
LoadPolicyMappingRequest, LoadPolicyRequest, LoadRebalanceMetaRequest, LoadServiceAccountRequest,
LoadTransitionTierConfigRequest, LoadUserRequest, LocalStorageInfoRequest, Mss, ReloadPoolMetaRequest,
ReloadSiteReplicationConfigRequest, ServerInfoRequest, SignalServiceRequest, StartProfilingRequest, StopRebalanceRequest,
},
};
use rustfs_utils::XHost;
@@ -601,27 +599,6 @@ impl PeerRestClient {
Ok(())
}
pub async fn background_heal_status(&self) -> Result<BgHealState> {
let mut client = node_service_time_out_client(&self.grid_host)
.await
.map_err(|err| Error::other(err.to_string()))?;
let request = Request::new(BackgroundHealStatusRequest {});
let response = client.background_heal_status(request).await?.into_inner();
if !response.success {
if let Some(msg) = response.error_info {
return Err(Error::other(msg));
}
return Err(Error::other(""));
}
let data = response.bg_heal_state;
let mut buf = Deserializer::new(Cursor::new(data));
let bg_heal_state: BgHealState = Deserialize::deserialize(&mut buf)?;
Ok(bg_heal_state)
}
pub async fn get_metacache_listing(&self) -> Result<()> {
let _client = node_service_time_out_client(&self.grid_host)
.await

View File

@@ -17,10 +17,6 @@ use crate::disk::error::{Error, Result};
use crate::disk::error_reduce::{BUCKET_OP_IGNORED_ERRS, is_all_buckets_not_found, reduce_write_quorum_errs};
use crate::disk::{DiskAPI, DiskStore};
use crate::global::GLOBAL_LOCAL_DISK_MAP;
use crate::heal::heal_commands::{
DRIVE_STATE_CORRUPT, DRIVE_STATE_MISSING, DRIVE_STATE_OFFLINE, DRIVE_STATE_OK, HEAL_ITEM_BUCKET, HealOpts,
};
use crate::heal::heal_ops::RUSTFS_RESERVED_BUCKET;
use crate::store::all_local_disk;
use crate::store_utils::is_reserved_or_invalid_bucket;
use crate::{
@@ -30,6 +26,7 @@ use crate::{
};
use async_trait::async_trait;
use futures::future::join_all;
use rustfs_common::heal_channel::{DriveState, HealItemType, HealOpts, RUSTFS_RESERVED_BUCKET};
use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem};
use rustfs_protos::node_service_time_out_client;
use rustfs_protos::proto_gen::node_service::{
@@ -542,7 +539,7 @@ impl PeerS3Client for RemotePeerS3Client {
}
Ok(HealResultItem {
heal_item_type: HEAL_ITEM_BUCKET.to_string(),
heal_item_type: HealItemType::Bucket.to_string(),
bucket: bucket.to_string(),
set_count: 0,
..Default::default()
@@ -651,13 +648,13 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
let disk = match disk {
Some(disk) => disk,
None => {
bs_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
as_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
bs_clone.write().await[index] = DriveState::Offline.to_string();
as_clone.write().await[index] = DriveState::Offline.to_string();
return Some(Error::DiskNotFound);
}
};
bs_clone.write().await[index] = DRIVE_STATE_OK.to_string();
as_clone.write().await[index] = DRIVE_STATE_OK.to_string();
bs_clone.write().await[index] = DriveState::Ok.to_string();
as_clone.write().await[index] = DriveState::Ok.to_string();
if bucket == RUSTFS_RESERVED_BUCKET {
return None;
@@ -667,18 +664,18 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
Ok(_) => None,
Err(err) => match err {
Error::DiskNotFound => {
bs_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
as_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
bs_clone.write().await[index] = DriveState::Offline.to_string();
as_clone.write().await[index] = DriveState::Offline.to_string();
Some(err)
}
Error::VolumeNotFound => {
bs_clone.write().await[index] = DRIVE_STATE_MISSING.to_string();
as_clone.write().await[index] = DRIVE_STATE_MISSING.to_string();
bs_clone.write().await[index] = DriveState::Missing.to_string();
as_clone.write().await[index] = DriveState::Missing.to_string();
Some(err)
}
_ => {
bs_clone.write().await[index] = DRIVE_STATE_CORRUPT.to_string();
as_clone.write().await[index] = DRIVE_STATE_CORRUPT.to_string();
bs_clone.write().await[index] = DriveState::Corrupt.to_string();
as_clone.write().await[index] = DriveState::Corrupt.to_string();
Some(err)
}
},
@@ -687,7 +684,7 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
}
let errs = join_all(futures).await;
let mut res = HealResultItem {
heal_item_type: HEAL_ITEM_BUCKET.to_string(),
heal_item_type: HealItemType::Bucket.to_string(),
bucket: bucket.to_string(),
disk_count: disks.len(),
set_count: 0,
@@ -736,11 +733,11 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
let as_clone = after_state.clone();
let errs_clone = errs.to_vec();
futures.push(async move {
if bs_clone.read().await[idx] == DRIVE_STATE_MISSING {
if bs_clone.read().await[idx] == DriveState::Missing.to_string() {
info!("bucket not find, will recreate");
match disk.as_ref().unwrap().make_volume(&bucket).await {
Ok(_) => {
as_clone.write().await[idx] = DRIVE_STATE_OK.to_string();
as_clone.write().await[idx] = DriveState::Ok.to_string();
return None;
}
Err(err) => {

View File

@@ -21,9 +21,9 @@ use rustfs_protos::{
node_service_time_out_client,
proto_gen::node_service::{
CheckPartsRequest, DeletePathsRequest, DeleteRequest, DeleteVersionRequest, DeleteVersionsRequest, DeleteVolumeRequest,
DiskInfoRequest, ListDirRequest, ListVolumesRequest, MakeVolumeRequest, MakeVolumesRequest, NsScannerRequest,
ReadAllRequest, ReadMultipleRequest, ReadPartsRequest, ReadVersionRequest, ReadXlRequest, RenameDataRequest,
RenameFileRequest, StatVolumeRequest, UpdateMetadataRequest, VerifyFileRequest, WriteAllRequest, WriteMetadataRequest,
DiskInfoRequest, ListDirRequest, ListVolumesRequest, MakeVolumeRequest, MakeVolumesRequest, ReadAllRequest,
ReadMultipleRequest, ReadPartsRequest, ReadVersionRequest, ReadXlRequest, RenameDataRequest, RenameFileRequest,
StatVolumeRequest, UpdateMetadataRequest, VerifyFileRequest, WriteAllRequest, WriteMetadataRequest,
},
};
@@ -32,26 +32,15 @@ use crate::disk::{
ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, UpdateMetadataOpts, VolumeInfo, WalkDirOptions,
endpoint::Endpoint,
};
use crate::disk::{FileReader, FileWriter};
use crate::{
disk::error::{Error, Result},
rpc::build_auth_headers,
};
use crate::{
disk::{FileReader, FileWriter},
heal::{
data_scanner::ShouldSleepFn,
data_usage_cache::{DataUsageCache, DataUsageEntry},
heal_commands::{HealScanMode, HealingTracker},
},
};
use rustfs_filemeta::{FileInfo, ObjectPartInfo, RawFileInfo};
use rustfs_protos::proto_gen::node_service::RenamePartRequest;
use rustfs_rio::{HttpReader, HttpWriter};
use tokio::{
io::AsyncWrite,
sync::mpsc::{self, Sender},
};
use tokio_stream::{StreamExt, wrappers::ReceiverStream};
use tokio::io::AsyncWrite;
use tonic::Request;
use tracing::info;
use uuid::Uuid;
@@ -927,55 +916,6 @@ impl DiskAPI for RemoteDisk {
Ok(disk_info)
}
#[tracing::instrument(skip(self, cache, scan_mode, _we_sleep))]
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
_we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
info!("ns_scanner");
let cache = serde_json::to_string(cache)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let (tx, rx) = mpsc::channel(10);
let in_stream = ReceiverStream::new(rx);
let mut response = client.ns_scanner(in_stream).await?.into_inner();
let request = NsScannerRequest {
disk: self.endpoint.to_string(),
cache,
scan_mode: scan_mode as u64,
};
tx.send(request)
.await
.map_err(|err| Error::other(format!("can not send request, err: {err}")))?;
loop {
match response.next().await {
Some(Ok(resp)) => {
if !resp.update.is_empty() {
let data_usage_cache = serde_json::from_str::<DataUsageEntry>(&resp.update)?;
let _ = updates.send(data_usage_cache).await;
} else if !resp.data_usage_cache.is_empty() {
let data_usage_cache = serde_json::from_str::<DataUsageCache>(&resp.data_usage_cache)?;
return Ok(data_usage_cache);
} else {
return Err(Error::other("scan was interrupted"));
}
}
_ => return Err(Error::other("scan was interrupted")),
}
}
}
#[tracing::instrument(skip(self))]
async fn healing(&self) -> Option<HealingTracker> {
None
}
}
#[cfg(test)]

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, io::Cursor, pin::Pin};
use std::{collections::HashMap, io::Cursor, pin::Pin, sync::Arc};
// use common::error::Error as EcsError;
use crate::{
@@ -22,25 +22,21 @@ use crate::{
DeleteOptions, DiskAPI, DiskInfoOptions, DiskStore, FileInfoVersions, ReadMultipleReq, ReadOptions, UpdateMetadataOpts,
error::DiskError,
},
heal::{
data_usage_cache::DataUsageCache,
heal_commands::{HealOpts, get_local_background_heal_status},
},
metrics_realtime::{CollectMetricsOpts, MetricType, collect_local_metrics},
new_object_layer_fn,
rpc::{LocalPeerS3Client, PeerS3Client},
store::{all_local_disk_path, find_local_disk},
store_api::{BucketOptions, DeleteBucketOptions, MakeBucketOptions, StorageAPI},
};
use futures::{Stream, StreamExt};
use futures::Stream;
use futures_util::future::join_all;
use rustfs_lock::{GLOBAL_LOCAL_SERVER, Locker, lock_args::LockArgs};
use rustfs_common::globals::GLOBAL_Local_Node_Name;
use rustfs_common::{globals::GLOBAL_Local_Node_Name, heal_channel::HealOpts};
use bytes::Bytes;
use rmp_serde::{Deserializer, Serializer};
use rustfs_filemeta::{FileInfo, MetacacheReader};
use rustfs_lock::{LockClient, LockRequest};
use rustfs_madmin::health::{
get_cpus, get_mem_info, get_os_info, get_partitions, get_proc_info, get_sys_config, get_sys_errors, get_sys_services,
};
@@ -81,11 +77,16 @@ type ResponseStream<T> = Pin<Box<dyn Stream<Item = Result<T, tonic::Status>> + S
#[derive(Debug)]
pub struct NodeService {
local_peer: LocalPeerS3Client,
lock_manager: Arc<rustfs_lock::LocalClient>,
}
pub fn make_server() -> NodeService {
let local_peer = LocalPeerS3Client::new(None, None);
NodeService { local_peer }
let lock_manager = Arc::new(rustfs_lock::LocalClient::new());
NodeService {
local_peer,
lock_manager,
}
}
impl NodeService {
@@ -1434,214 +1435,158 @@ impl Node for NodeService {
}
}
type NsScannerStream = ResponseStream<NsScannerResponse>;
async fn ns_scanner(&self, request: Request<Streaming<NsScannerRequest>>) -> Result<Response<Self::NsScannerStream>, Status> {
info!("ns_scanner");
let mut in_stream = request.into_inner();
let (tx, rx) = mpsc::channel(10);
tokio::spawn(async move {
match in_stream.next().await {
Some(Ok(request)) => {
if let Some(disk) = find_local_disk(&request.disk).await {
let cache = match serde_json::from_str::<DataUsageCache>(&request.cache) {
Ok(cache) => cache,
Err(err) => {
tx.send(Ok(NsScannerResponse {
success: false,
update: "".to_string(),
data_usage_cache: "".to_string(),
error: Some(DiskError::other(format!("decode DataUsageCache failed: {err}")).into()),
}))
.await
.expect("working rx");
return;
}
};
let (updates_tx, mut updates_rx) = mpsc::channel(100);
let tx_clone = tx.clone();
let task = tokio::spawn(async move {
loop {
match updates_rx.recv().await {
Some(update) => {
let update = serde_json::to_string(&update).expect("encode failed");
tx_clone
.send(Ok(NsScannerResponse {
success: true,
update,
data_usage_cache: "".to_string(),
error: None,
}))
.await
.expect("working rx");
}
None => return,
}
}
});
let data_usage_cache = disk.ns_scanner(&cache, updates_tx, request.scan_mode as usize, None).await;
let _ = task.await;
match data_usage_cache {
Ok(data_usage_cache) => {
let data_usage_cache = serde_json::to_string(&data_usage_cache).expect("encode failed");
tx.send(Ok(NsScannerResponse {
success: true,
update: "".to_string(),
data_usage_cache,
error: None,
}))
.await
.expect("working rx");
}
Err(err) => {
tx.send(Ok(NsScannerResponse {
success: false,
update: "".to_string(),
data_usage_cache: "".to_string(),
error: Some(err.into()),
}))
.await
.expect("working rx");
}
}
} else {
tx.send(Ok(NsScannerResponse {
success: false,
update: "".to_string(),
data_usage_cache: "".to_string(),
error: Some(DiskError::other("can not find disk".to_string()).into()),
}))
.await
.expect("working rx");
}
}
_ => todo!(),
}
});
let out_stream = ReceiverStream::new(rx);
Ok(tonic::Response::new(Box::pin(out_stream)))
}
async fn lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.lock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
// Parse the request to extract resource and owner
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not lock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.acquire_exclusive(&args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not lock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.unlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not unlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn r_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.rlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not rlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.acquire_shared(&args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not rlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn r_un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.runlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not runlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not runlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn force_un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.force_unlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not force_unlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not force_unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn refresh(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.refresh(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let _args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not refresh, args: {args}, err: {err}")),
})),
},
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
})),
}
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
}))
}
async fn local_storage_info(
@@ -2157,28 +2102,7 @@ impl Node for NodeService {
&self,
_request: Request<BackgroundHealStatusRequest>,
) -> Result<Response<BackgroundHealStatusResponse>, Status> {
let (state, ok) = get_local_background_heal_status().await;
if !ok {
return Ok(tonic::Response::new(BackgroundHealStatusResponse {
success: false,
bg_heal_state: Bytes::new(),
error_info: Some("errServerNotInitialized".to_string()),
}));
}
let mut buf = Vec::new();
if let Err(err) = state.serialize(&mut Serializer::new(&mut buf)) {
return Ok(tonic::Response::new(BackgroundHealStatusResponse {
success: false,
bg_heal_state: Bytes::new(),
error_info: Some(err.to_string()),
}));
}
Ok(tonic::Response::new(BackgroundHealStatusResponse {
success: true,
bg_heal_state: buf.into(),
error_info: None,
}))
todo!()
}
async fn get_metacache_listing(
@@ -3373,20 +3297,6 @@ mod tests {
assert!(!proc_response.proc_info.is_empty());
}
#[tokio::test]
async fn test_background_heal_status() {
let service = create_test_node_service();
let request = Request::new(BackgroundHealStatusRequest {});
let response = service.background_heal_status(request).await;
assert!(response.is_ok());
let heal_response = response.unwrap().into_inner();
// May fail if heal status is not available
assert!(heal_response.success || heal_response.error_info.is_some());
}
#[tokio::test]
async fn test_reload_pool_meta() {
let service = create_test_node_service();
@@ -3629,15 +3539,15 @@ mod tests {
// Note: signal_service test is skipped because it contains todo!() and would panic
#[test]
fn test_node_service_debug() {
#[tokio::test]
async fn test_node_service_debug() {
let service = create_test_node_service();
let debug_str = format!("{service:?}");
assert!(debug_str.contains("NodeService"));
}
#[test]
fn test_node_service_creation() {
#[tokio::test]
async fn test_node_service_creation() {
let service1 = make_server();
let service2 = make_server();
@@ -3646,14 +3556,6 @@ mod tests {
assert!(format!("{service2:?}").contains("NodeService"));
}
#[tokio::test]
async fn test_all_disk_method() {
let service = create_test_node_service();
let disks = service.all_disk().await;
// Should return empty vector in test environment
assert!(disks.is_empty());
}
#[tokio::test]
async fn test_find_disk_method() {
let service = create_test_node_service();

File diff suppressed because it is too large Load Diff

View File

@@ -28,9 +28,6 @@ use crate::{
endpoints::{Endpoints, PoolEndpoints},
error::StorageError,
global::{GLOBAL_LOCAL_DISK_SET_DRIVES, is_dist_erasure},
heal::heal_commands::{
DRIVE_STATE_CORRUPT, DRIVE_STATE_MISSING, DRIVE_STATE_OFFLINE, DRIVE_STATE_OK, HEAL_ITEM_METADATA, HealOpts,
},
set_disk::SetDisks,
store_api::{
BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, GetObjectReader, HTTPRangeSpec,
@@ -41,26 +38,30 @@ use crate::{
};
use futures::future::join_all;
use http::HeaderMap;
use rustfs_common::globals::GLOBAL_Local_Node_Name;
use rustfs_common::heal_channel::HealOpts;
use rustfs_common::{
globals::GLOBAL_Local_Node_Name,
heal_channel::{DriveState, HealItemType},
};
use rustfs_filemeta::FileInfo;
use rustfs_lock::{LockApi, namespace_lock::NsLockMap, new_lock_api};
use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem};
use rustfs_utils::{crc_hash, path::path_join_buf, sip_hash};
use tokio::sync::RwLock;
use uuid::Uuid;
use crate::heal::heal_ops::HealSequence;
use tokio::sync::broadcast::{Receiver, Sender};
use tokio::time::Duration;
use tracing::warn;
use tracing::{error, info};
use crate::lock_utils::create_unique_clients;
#[derive(Debug, Clone)]
pub struct Sets {
pub id: Uuid,
// pub sets: Vec<Objects>,
// pub disk_set: Vec<Vec<Option<DiskStore>>>, // [set_count_idx][set_drive_count_idx] = disk_idx
pub lockers: Vec<Vec<LockApi>>,
pub disk_set: Vec<Arc<SetDisks>>, // [set_count_idx][set_drive_count_idx] = disk_idx
pub pool_idx: usize,
pub endpoints: PoolEndpoints,
@@ -93,27 +94,25 @@ impl Sets {
let set_count = fm.erasure.sets.len();
let set_drive_count = fm.erasure.sets[0].len();
let mut unique: Vec<Vec<String>> = vec![vec![]; set_count];
let mut lockers: Vec<Vec<LockApi>> = vec![vec![]; set_count];
endpoints.endpoints.as_ref().iter().enumerate().for_each(|(idx, endpoint)| {
let mut unique: Vec<Vec<String>> = (0..set_count).map(|_| vec![]).collect();
for (idx, endpoint) in endpoints.endpoints.as_ref().iter().enumerate() {
let set_idx = idx / set_drive_count;
if endpoint.is_local && !unique[set_idx].contains(&"local".to_string()) {
unique[set_idx].push("local".to_string());
lockers[set_idx].push(new_lock_api(true, None));
}
if !endpoint.is_local {
let host_port = format!("{}:{}", endpoint.url.host_str().unwrap(), endpoint.url.port().unwrap());
if !unique[set_idx].contains(&host_port) {
unique[set_idx].push(host_port);
lockers[set_idx].push(new_lock_api(false, Some(endpoint.url.clone())));
}
}
});
}
let mut disk_set = Vec::with_capacity(set_count);
for (i, locker) in lockers.iter().enumerate().take(set_count) {
for i in 0..set_count {
let mut set_drive = Vec::with_capacity(set_drive_count);
let mut set_endpoints = Vec::with_capacity(set_drive_count);
for j in 0..set_drive_count {
@@ -121,7 +120,6 @@ impl Sets {
let mut disk = disks[idx].clone();
let endpoint = endpoints.endpoints.as_ref()[idx].clone();
// let endpoint = endpoints.endpoints.as_ref().get(idx).cloned();
set_endpoints.push(endpoint);
if disk.is_none() {
@@ -165,12 +163,13 @@ impl Sets {
}
}
// warn!("sets new set_drive {:?}", &set_drive);
let lock_clients = create_unique_clients(&set_endpoints).await?;
let namespace_lock = rustfs_lock::NamespaceLock::with_clients(format!("set-{i}"), lock_clients);
let set_disks = SetDisks::new(
locker.clone(),
Arc::new(namespace_lock),
GLOBAL_Local_Node_Name.read().await.to_string(),
Arc::new(RwLock::new(NsLockMap::new(is_dist_erasure().await))),
Arc::new(RwLock::new(set_drive)),
set_drive_count,
parity_count,
@@ -190,7 +189,6 @@ impl Sets {
id: fm.id,
// sets: todo!(),
disk_set,
lockers,
pool_idx,
endpoints: endpoints.clone(),
format: fm.clone(),
@@ -543,7 +541,7 @@ impl StorageAPI for Sets {
objects: Vec<ObjectToDelete>,
opts: ObjectOptions,
) -> Result<(Vec<DeletedObject>, Vec<Option<Error>>)> {
// 默认返回值
// Default return value
let mut del_objects = vec![DeletedObject::default(); objects.len()];
let mut del_errs = Vec::with_capacity(objects.len());
@@ -602,7 +600,7 @@ impl StorageAPI for Sets {
// del_errs.extend(errs);
// }
// TODO: 并发
// TODO: Implement concurrency
for (k, v) in set_obj_map {
let disks = self.get_disks(k);
let objs: Vec<ObjectToDelete> = v.iter().map(|v| v.obj.clone()).collect();
@@ -789,7 +787,7 @@ impl StorageAPI for Sets {
Err(err) => return Ok((HealResultItem::default(), Some(err))),
};
let mut res = HealResultItem {
heal_item_type: HEAL_ITEM_METADATA.to_string(),
heal_item_type: HealItemType::Metadata.to_string(),
detail: "disk-format".to_string(),
disk_count: self.set_count * self.set_drive_count,
set_count: self.set_count,
@@ -813,7 +811,6 @@ impl StorageAPI for Sets {
// return Ok((res, Some(Error::new(DiskError::CorruptedFormat))));
// }
let format_op_id = Uuid::new_v4().to_string();
let (new_format_sets, _) = new_heal_format_sets(&ref_format, self.set_count, self.set_drive_count, &formats, &errs);
if !dry_run {
let mut tmp_new_formats = vec![None; self.set_count * self.set_drive_count];
@@ -821,14 +818,14 @@ impl StorageAPI for Sets {
for (j, fm) in set.iter().enumerate() {
if let Some(fm) = fm {
res.after.drives[i * self.set_drive_count + j].uuid = fm.erasure.this.to_string();
res.after.drives[i * self.set_drive_count + j].state = DRIVE_STATE_OK.to_string();
res.after.drives[i * self.set_drive_count + j].state = DriveState::Ok.to_string();
tmp_new_formats[i * self.set_drive_count + j] = Some(fm.clone());
}
}
}
// Save new formats `format.json` on unformatted disks.
for (fm, disk) in tmp_new_formats.iter_mut().zip(disks.iter()) {
if fm.is_some() && disk.is_some() && save_format_file(disk, fm, &format_op_id).await.is_err() {
if fm.is_some() && disk.is_some() && save_format_file(disk, fm).await.is_err() {
let _ = disk.as_ref().unwrap().close().await;
*fm = None;
}
@@ -871,17 +868,6 @@ impl StorageAPI for Sets {
.await
}
#[tracing::instrument(skip(self))]
async fn heal_objects(
&self,
_bucket: &str,
_prefix: &str,
_opts: &HealOpts,
_hs: Arc<HealSequence>,
_is_meta: bool,
) -> Result<()> {
unimplemented!()
}
#[tracing::instrument(skip(self))]
async fn get_pool_and_set(&self, _id: &str) -> Result<(Option<usize>, Option<usize>, Option<usize>)> {
unimplemented!()
}
@@ -889,6 +875,13 @@ impl StorageAPI for Sets {
async fn check_abandoned_parts(&self, _bucket: &str, _object: &str, _opts: &HealOpts) -> Result<()> {
unimplemented!()
}
#[tracing::instrument(skip(self))]
async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()> {
self.get_disks_by_key(object)
.verify_object_integrity(bucket, object, opts)
.await
}
}
async fn _close_storage_disks(disks: &[Option<DiskStore>]) {
@@ -959,17 +952,17 @@ fn formats_to_drives_info(endpoints: &Endpoints, formats: &[Option<FormatV3>], e
for (index, format) in formats.iter().enumerate() {
let drive = endpoints.get_string(index);
let state = if format.is_some() {
DRIVE_STATE_OK
DriveState::Ok.to_string()
} else if let Some(Some(err)) = errs.get(index) {
if *err == DiskError::UnformattedDisk {
DRIVE_STATE_MISSING
DriveState::Missing.to_string()
} else if *err == DiskError::DiskNotFound {
DRIVE_STATE_OFFLINE
DriveState::Offline.to_string()
} else {
DRIVE_STATE_CORRUPT
DriveState::Corrupt.to_string()
}
} else {
DRIVE_STATE_CORRUPT
DriveState::Corrupt.to_string()
};
let uuid = if let Some(format) = format {

View File

@@ -30,11 +30,6 @@ use crate::global::{
GLOBAL_LOCAL_DISK_MAP, GLOBAL_LOCAL_DISK_SET_DRIVES, GLOBAL_TierConfigMgr, get_global_endpoints, is_dist_erasure,
is_erasure_sd, set_global_deployment_id, set_object_layer,
};
use crate::heal::data_usage::{DATA_USAGE_ROOT, DataUsageInfo};
use crate::heal::data_usage_cache::{DataUsageCache, DataUsageCacheInfo};
use crate::heal::heal_commands::{HEAL_ITEM_METADATA, HealOpts, HealScanMode};
use crate::heal::heal_ops::{HealEntryFn, HealSequence};
use crate::new_object_layer_fn;
use crate::notification_sys::get_global_notification_sys;
use crate::pools::PoolMeta;
use crate::rebalance::RebalanceMeta;
@@ -54,13 +49,12 @@ use crate::{
store_init,
};
use futures::future::join_all;
use glob::Pattern;
use http::HeaderMap;
use lazy_static::lazy_static;
use rand::Rng as _;
use rustfs_common::globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Host, GLOBAL_Rustfs_Port};
use rustfs_common::heal_channel::{HealItemType, HealOpts};
use rustfs_filemeta::FileInfo;
use rustfs_filemeta::MetaCacheEntry;
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_utils::crypto::base64_decode;
use rustfs_utils::path::{SLASH_SEPARATOR, decode_dir_object, encode_dir_object, path_join_buf};
@@ -73,9 +67,8 @@ use std::time::SystemTime;
use std::{collections::HashMap, sync::Arc, time::Duration};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::Sender;
use tokio::sync::{RwLock, broadcast, mpsc};
use tokio::time::{interval, sleep};
use tokio::sync::{RwLock, broadcast};
use tokio::time::sleep;
use tracing::{debug, info};
use tracing::{error, warn};
use uuid::Uuid;
@@ -811,123 +804,6 @@ impl ECStore {
errs
}
pub async fn ns_scanner(
&self,
updates: Sender<DataUsageInfo>,
want_cycle: usize,
heal_scan_mode: HealScanMode,
) -> Result<()> {
info!("ns_scanner updates - {}", want_cycle);
let all_buckets = self.list_bucket(&BucketOptions::default()).await?;
if all_buckets.is_empty() {
info!("No buckets found");
let _ = updates.send(DataUsageInfo::default()).await;
return Ok(());
}
let mut total_results = 0;
let mut result_index = 0;
self.pools.iter().for_each(|pool| {
total_results += pool.disk_set.len();
});
let results = Arc::new(RwLock::new(vec![DataUsageCache::default(); total_results]));
let (cancel, _) = broadcast::channel(100);
let first_err = Arc::new(RwLock::new(None));
let mut futures = Vec::new();
for pool in self.pools.iter() {
for set in pool.disk_set.iter() {
let index = result_index;
let results_clone = results.clone();
let first_err_clone = first_err.clone();
let cancel_clone = cancel.clone();
let all_buckets_clone = all_buckets.clone();
futures.push(async move {
let (tx, mut rx) = mpsc::channel(1);
let task = tokio::spawn(async move {
loop {
match rx.recv().await {
Some(info) => {
results_clone.write().await[index] = info;
}
None => {
return;
}
}
}
});
if let Err(err) = set
.clone()
.ns_scanner(&all_buckets_clone, want_cycle as u32, tx, heal_scan_mode)
.await
{
let mut f_w = first_err_clone.write().await;
if f_w.is_none() {
*f_w = Some(err);
}
let _ = cancel_clone.send(true);
return;
}
let _ = task.await;
});
result_index += 1;
}
}
let (update_closer_tx, mut update_close_rx) = mpsc::channel(10);
let mut ctx_clone = cancel.subscribe();
let all_buckets_clone = all_buckets.clone();
// 新增:从环境变量读取 interval默认 30 秒
let ns_scanner_interval_secs = std::env::var("RUSTFS_NS_SCANNER_INTERVAL")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(30);
// 检查是否跳过后台任务
let skip_background_task = std::env::var("RUSTFS_SKIP_BACKGROUND_TASK")
.ok()
.and_then(|v| v.parse::<bool>().ok())
.unwrap_or(false);
if skip_background_task {
info!("跳过后台任务执行RUSTFS_SKIP_BACKGROUND_TASK=true");
return Ok(());
}
let task = tokio::spawn(async move {
let mut last_update: Option<SystemTime> = None;
let mut interval = interval(Duration::from_secs(ns_scanner_interval_secs));
let all_merged = Arc::new(RwLock::new(DataUsageCache::default()));
loop {
select! {
_ = ctx_clone.recv() => {
return;
}
_ = update_close_rx.recv() => {
update_scan(all_merged.clone(), results.clone(), &mut last_update, all_buckets_clone.clone(), updates.clone()).await;
return;
}
_ = interval.tick() => {
update_scan(all_merged.clone(), results.clone(), &mut last_update, all_buckets_clone.clone(), updates.clone()).await;
}
}
}
});
let _ = join_all(futures).await;
let mut ctx_closer = cancel.subscribe();
select! {
_ = update_closer_tx.send(true) => {
}
_ = ctx_closer.recv() => {
}
}
let _ = task.await;
if let Some(err) = first_err.read().await.as_ref() {
return Err(err.clone());
}
Ok(())
}
async fn get_latest_object_info_with_idx(
&self,
bucket: &str,
@@ -1068,34 +944,6 @@ impl ECStore {
}
}
#[tracing::instrument(level = "info", skip(all_buckets, updates))]
async fn update_scan(
all_merged: Arc<RwLock<DataUsageCache>>,
results: Arc<RwLock<Vec<DataUsageCache>>>,
last_update: &mut Option<SystemTime>,
all_buckets: Vec<BucketInfo>,
updates: Sender<DataUsageInfo>,
) {
let mut w = all_merged.write().await;
*w = DataUsageCache {
info: DataUsageCacheInfo {
name: DATA_USAGE_ROOT.to_string(),
..Default::default()
},
..Default::default()
};
for info in results.read().await.iter() {
if info.info.last_update.is_none() {
return;
}
w.merge(info);
}
if (last_update.is_none() || w.info.last_update > *last_update) && w.root().is_some() {
let _ = updates.send(w.dui(&w.info.name, &all_buckets)).await;
*last_update = w.info.last_update;
}
}
pub async fn find_local_disk(disk_path: &String) -> Option<DiskStore> {
let disk_map = GLOBAL_LOCAL_DISK_MAP.read().await;
@@ -2237,7 +2085,7 @@ impl StorageAPI for ECStore {
async fn heal_format(&self, dry_run: bool) -> Result<(HealResultItem, Option<Error>)> {
info!("heal_format");
let mut r = HealResultItem {
heal_item_type: HEAL_ITEM_METADATA.to_string(),
heal_item_type: HealItemType::Metadata.to_string(),
detail: "disk-format".to_string(),
..Default::default()
};
@@ -2351,120 +2199,6 @@ impl StorageAPI for ECStore {
Ok((HealResultItem::default(), Some(Error::FileNotFound)))
}
#[tracing::instrument(skip(self))]
async fn heal_objects(
&self,
bucket: &str,
prefix: &str,
opts: &HealOpts,
hs: Arc<HealSequence>,
is_meta: bool,
) -> Result<()> {
info!("heal objects");
let opts_clone = *opts;
let heal_entry: HealEntryFn = Arc::new(move |bucket: String, entry: MetaCacheEntry, scan_mode: HealScanMode| {
let opts_clone = opts_clone;
let hs_clone = hs.clone();
Box::pin(async move {
if entry.is_dir() {
return Ok(());
}
if bucket == RUSTFS_META_BUCKET
&& Pattern::new("buckets/*/.metacache/*")
.map(|p| p.matches(&entry.name))
.unwrap_or(false)
|| Pattern::new("tmp/*").map(|p| p.matches(&entry.name)).unwrap_or(false)
|| Pattern::new("multipart/*").map(|p| p.matches(&entry.name)).unwrap_or(false)
|| Pattern::new("tmp-old/*").map(|p| p.matches(&entry.name)).unwrap_or(false)
{
return Ok(());
}
let fivs = match entry.file_info_versions(&bucket) {
Ok(fivs) => fivs,
Err(_) => {
return if is_meta {
HealSequence::heal_meta_object(hs_clone.clone(), &bucket, &entry.name, "", scan_mode).await
} else {
HealSequence::heal_object(hs_clone.clone(), &bucket, &entry.name, "", scan_mode).await
};
}
};
if opts_clone.remove && !opts_clone.dry_run {
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
if let Err(err) = store.check_abandoned_parts(&bucket, &entry.name, &opts_clone).await {
info!("unable to check object {}/{} for abandoned data: {}", bucket, entry.name, err.to_string());
}
}
for version in fivs.versions.iter() {
if is_meta {
if let Err(err) = HealSequence::heal_meta_object(
hs_clone.clone(),
&bucket,
&version.name,
&version.version_id.map(|v| v.to_string()).unwrap_or("".to_string()),
scan_mode,
)
.await
{
match err {
Error::FileNotFound | Error::FileVersionNotFound => {}
_ => {
return Err(err);
}
}
}
} else if let Err(err) = HealSequence::heal_object(
hs_clone.clone(),
&bucket,
&version.name,
&version.version_id.map(|v| v.to_string()).unwrap_or("".to_string()),
scan_mode,
)
.await
{
match err {
Error::FileNotFound | Error::FileVersionNotFound => {}
_ => {
return Err(err);
}
}
}
}
Ok(())
})
});
let mut first_err = None;
for (idx, pool) in self.pools.iter().enumerate() {
if opts.pool.is_some() && opts.pool.unwrap() != idx {
continue;
}
//TODO: IsSuspended
for (idx, set) in pool.disk_set.iter().enumerate() {
if opts.set.is_some() && opts.set.unwrap() != idx {
continue;
}
if let Err(err) = set.list_and_heal(bucket, prefix, opts, heal_entry.clone()).await {
if first_err.is_none() {
first_err = Some(err)
}
}
}
}
if first_err.is_some() {
return Err(first_err.unwrap());
}
Ok(())
}
#[tracing::instrument(skip(self))]
async fn get_pool_and_set(&self, id: &str) -> Result<(Option<usize>, Option<usize>, Option<usize>)> {
for (pool_idx, pool) in self.pools.iter().enumerate() {
@@ -2501,6 +2235,13 @@ impl StorageAPI for ECStore {
Ok(())
}
async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()> {
let mut get_object_reader =
<Self as ObjectIO>::get_object_reader(self, bucket, object, None, HeaderMap::new(), opts).await?;
let _ = get_object_reader.read_all().await?;
Ok(())
}
}
async fn init_local_peer(endpoint_pools: &EndpointServerPools, host: &String, port: &String) {

View File

@@ -15,16 +15,16 @@
use crate::bucket::metadata_sys::get_versioning_config;
use crate::bucket::versioning::VersioningApi as _;
use crate::cmd::bucket_replication::{ReplicationStatusType, VersionPurgeStatusType};
use crate::disk::DiskStore;
use crate::error::{Error, Result};
use crate::heal::heal_ops::HealSequence;
use crate::store_utils::clean_metadata;
use crate::{
bucket::lifecycle::bucket_lifecycle_audit::LcAuditEvent,
bucket::lifecycle::lifecycle::ExpirationOptions,
bucket::lifecycle::{bucket_lifecycle_ops::TransitionedObject, lifecycle::TransitionOptions},
};
use crate::{disk::DiskStore, heal::heal_commands::HealOpts};
use http::{HeaderMap, HeaderValue};
use rustfs_common::heal_channel::HealOpts;
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
use rustfs_filemeta::{FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, headers::AMZ_OBJECT_TAGGING};
use rustfs_madmin::heal_commands::HealResultItem;
@@ -970,6 +970,7 @@ pub trait StorageAPI: ObjectIO {
// Walk TODO:
async fn get_object_info(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<ObjectInfo>;
async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()>;
async fn copy_object(
&self,
src_bucket: &str,
@@ -1072,8 +1073,8 @@ pub trait StorageAPI: ObjectIO {
version_id: &str,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)>;
async fn heal_objects(&self, bucket: &str, prefix: &str, opts: &HealOpts, hs: Arc<HealSequence>, is_meta: bool)
-> Result<()>;
// async fn heal_objects(&self, bucket: &str, prefix: &str, opts: &HealOpts, hs: Arc<HealSequence>, is_meta: bool)
// -> Result<()>;
async fn get_pool_and_set(&self, id: &str) -> Result<(Option<usize>, Option<usize>, Option<usize>)>;
async fn check_abandoned_parts(&self, bucket: &str, object: &str, opts: &HealOpts) -> Result<()>;
}

View File

@@ -24,7 +24,6 @@ use crate::{
new_disk,
},
endpoints::Endpoints,
heal::heal_commands::init_healing_tracker,
};
use futures::future::join_all;
use std::collections::{HashMap, hash_map::Entry};
@@ -288,7 +287,7 @@ async fn save_format_file_all(disks: &[Option<DiskStore>], formats: &[Option<For
let mut futures = Vec::with_capacity(disks.len());
for (i, disk) in disks.iter().enumerate() {
futures.push(save_format_file(disk, &formats[i], ""));
futures.push(save_format_file(disk, &formats[i]));
}
let mut errors = Vec::with_capacity(disks.len());
@@ -312,7 +311,7 @@ async fn save_format_file_all(disks: &[Option<DiskStore>], formats: &[Option<For
Ok(())
}
pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3>, heal_id: &str) -> disk::error::Result<()> {
pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3>) -> disk::error::Result<()> {
if disk.is_none() {
return Err(DiskError::DiskNotFound);
}
@@ -331,10 +330,6 @@ pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3
.await?;
disk.set_disk_id(Some(format.erasure.this)).await?;
if !heal_id.is_empty() {
let mut ht = init_healing_tracker(disk.clone(), heal_id).await?;
return ht.save().await;
}
Ok(())
}

View File

@@ -1523,8 +1523,7 @@ impl MetaObject {
}
pub fn uses_data_dir(&self) -> bool {
// TODO: when use inlinedata
true
!self.inlinedata()
}
pub fn inlinedata(&self) -> bool {

View File

@@ -30,6 +30,9 @@ workspace = true
[dependencies]
async-trait.workspace = true
bytes.workspace = true
futures.workspace = true
lazy_static.workspace = true
rustfs-protos.workspace = true
rand.workspace = true
serde.workspace = true
@@ -38,4 +41,7 @@ tokio.workspace = true
tonic.workspace = true
tracing.workspace = true
url.workspace = true
uuid.workspace = true
uuid.workspace = true
thiserror.workspace = true
once_cell.workspace = true
lru.workspace = true

View File

@@ -0,0 +1,366 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use crate::{
client::LockClient,
error::Result,
local::LocalLockMap,
types::{LockId, LockInfo, LockMetadata, LockPriority, LockRequest, LockResponse, LockStats, LockType},
};
/// Local lock client
///
/// Uses global singleton LocalLockMap to ensure all clients access the same lock instance
#[derive(Debug, Clone)]
pub struct LocalClient;
impl LocalClient {
/// Create new local client
pub fn new() -> Self {
Self
}
/// Get global lock map instance
pub fn get_lock_map(&self) -> Arc<LocalLockMap> {
crate::get_global_lock_map()
}
}
impl Default for LocalClient {
fn default() -> Self {
Self::new()
}
}
#[async_trait::async_trait]
impl LockClient for LocalClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse> {
let lock_map = self.get_lock_map();
let success = lock_map
.lock_with_ttl_id(request)
.await
.map_err(|e| crate::error::LockError::internal(format!("Lock acquisition failed: {e}")))?;
if success {
let lock_info = LockInfo {
id: crate::types::LockId::new_deterministic(&request.resource),
resource: request.resource.clone(),
lock_type: LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
} else {
Ok(LockResponse::failure("Lock acquisition failed".to_string(), std::time::Duration::ZERO))
}
}
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse> {
let lock_map = self.get_lock_map();
let success = lock_map
.rlock_with_ttl_id(request)
.await
.map_err(|e| crate::error::LockError::internal(format!("Shared lock acquisition failed: {e}")))?;
if success {
let lock_info = LockInfo {
id: crate::types::LockId::new_deterministic(&request.resource),
resource: request.resource.clone(),
lock_type: LockType::Shared,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
} else {
Ok(LockResponse::failure("Lock acquisition failed".to_string(), std::time::Duration::ZERO))
}
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
let lock_map = self.get_lock_map();
// Try to release the lock directly by ID
match lock_map.unlock_by_id(lock_id).await {
Ok(()) => Ok(true),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
// Try as read lock if exclusive unlock failed
match lock_map.runlock_by_id(lock_id).await {
Ok(()) => Ok(true),
Err(_) => Err(crate::error::LockError::internal("Lock ID not found".to_string())),
}
}
Err(e) => Err(crate::error::LockError::internal(format!("Release lock failed: {e}"))),
}
}
async fn refresh(&self, _lock_id: &LockId) -> Result<bool> {
// For local locks, refresh is not needed as they don't expire automatically
Ok(true)
}
async fn force_release(&self, lock_id: &LockId) -> Result<bool> {
self.release(lock_id).await
}
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>> {
let lock_map = self.get_lock_map();
// Check if the lock exists in our locks map
let locks_guard = lock_map.locks.read().await;
if let Some(entry) = locks_guard.get(lock_id) {
let entry_guard = entry.read().await;
// Determine lock type and owner based on the entry
if let Some(owner) = &entry_guard.writer {
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: crate::types::LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(30),
last_refreshed: std::time::SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
}))
} else if !entry_guard.readers.is_empty() {
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: crate::types::LockType::Shared,
status: crate::types::LockStatus::Acquired,
owner: entry_guard.readers.iter().next().map(|(k, _)| k.clone()).unwrap_or_default(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(30),
last_refreshed: std::time::SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
}))
} else {
Ok(None)
}
} else {
Ok(None)
}
}
async fn get_stats(&self) -> Result<LockStats> {
Ok(LockStats::default())
}
async fn close(&self) -> Result<()> {
Ok(())
}
async fn is_online(&self) -> bool {
true
}
async fn is_local(&self) -> bool {
true
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::LockType;
#[tokio::test]
async fn test_local_client_acquire_exclusive() {
let client = LocalClient::new();
let resource_name = format!("test-resource-exclusive-{}", uuid::Uuid::new_v4());
let request = LockRequest::new(&resource_name, LockType::Exclusive, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_exclusive(&request).await.unwrap();
assert!(response.is_success());
// Clean up
if let Some(lock_info) = response.lock_info() {
let _ = client.release(&lock_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_acquire_shared() {
let client = LocalClient::new();
let resource_name = format!("test-resource-shared-{}", uuid::Uuid::new_v4());
let request = LockRequest::new(&resource_name, LockType::Shared, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_shared(&request).await.unwrap();
assert!(response.is_success());
// Clean up
if let Some(lock_info) = response.lock_info() {
let _ = client.release(&lock_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_release() {
let client = LocalClient::new();
let resource_name = format!("test-resource-release-{}", uuid::Uuid::new_v4());
// First acquire a lock
let request = LockRequest::new(&resource_name, LockType::Exclusive, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_exclusive(&request).await.unwrap();
assert!(response.is_success());
// Get the lock ID from the response
if let Some(lock_info) = response.lock_info() {
let result = client.release(&lock_info.id).await.unwrap();
assert!(result);
} else {
panic!("No lock info in response");
}
}
#[tokio::test]
async fn test_local_client_is_local() {
let client = LocalClient::new();
assert!(client.is_local().await);
}
#[tokio::test]
async fn test_local_client_read_write_lock_exclusion() {
let client = LocalClient::new();
let resource_name = format!("test-resource-exclusion-{}", uuid::Uuid::new_v4());
// First, acquire an exclusive lock
let exclusive_request = LockRequest::new(&resource_name, LockType::Exclusive, "exclusive-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let exclusive_response = client.acquire_exclusive(&exclusive_request).await.unwrap();
assert!(exclusive_response.is_success());
// Try to acquire a shared lock on the same resource - should fail
let shared_request = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let shared_response = client.acquire_shared(&shared_request).await.unwrap();
assert!(!shared_response.is_success(), "Shared lock should fail when exclusive lock exists");
// Clean up exclusive lock
if let Some(exclusive_info) = exclusive_response.lock_info() {
let _ = client.release(&exclusive_info.id).await;
}
// Now shared lock should succeed
let shared_request2 = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let shared_response2 = client.acquire_shared(&shared_request2).await.unwrap();
assert!(
shared_response2.is_success(),
"Shared lock should succeed after exclusive lock is released"
);
// Clean up
if let Some(shared_info) = shared_response2.lock_info() {
let _ = client.release(&shared_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_read_write_lock_distinction() {
let client = LocalClient::new();
let resource_name = format!("test-resource-rw-{}", uuid::Uuid::new_v4());
// Test exclusive lock
let exclusive_request = LockRequest::new(&resource_name, LockType::Exclusive, "exclusive-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let exclusive_response = client.acquire_exclusive(&exclusive_request).await.unwrap();
assert!(exclusive_response.is_success());
if let Some(exclusive_info) = exclusive_response.lock_info() {
assert_eq!(exclusive_info.lock_type, LockType::Exclusive);
// Check status should return correct lock type
let status = client.check_status(&exclusive_info.id).await.unwrap();
assert!(status.is_some());
assert_eq!(status.unwrap().lock_type, LockType::Exclusive);
// Release exclusive lock
let result = client.release(&exclusive_info.id).await.unwrap();
assert!(result);
}
// Test shared lock
let shared_request = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let shared_response = client.acquire_shared(&shared_request).await.unwrap();
assert!(shared_response.is_success());
if let Some(shared_info) = shared_response.lock_info() {
assert_eq!(shared_info.lock_type, LockType::Shared);
// Check status should return correct lock type
let status = client.check_status(&shared_info.id).await.unwrap();
assert!(status.is_some());
assert_eq!(status.unwrap().lock_type, LockType::Shared);
// Release shared lock
let result = client.release(&shared_info.id).await.unwrap();
assert!(result);
}
}
#[tokio::test]
async fn test_multiple_local_clients_exclusive_mutex() {
let client1 = LocalClient::new();
let client2 = LocalClient::new();
let resource_name = format!("test-multi-client-mutex-{}", uuid::Uuid::new_v4());
// client1 acquire exclusive lock
let req1 = LockRequest::new(&resource_name, LockType::Exclusive, "owner1")
.with_acquire_timeout(std::time::Duration::from_millis(50));
let resp1 = client1.acquire_exclusive(&req1).await.unwrap();
assert!(resp1.is_success(), "client1 should acquire exclusive lock");
// client2 try to acquire exclusive lock, should fail
let req2 = LockRequest::new(&resource_name, LockType::Exclusive, "owner2")
.with_acquire_timeout(std::time::Duration::from_millis(50));
let resp2 = client2.acquire_exclusive(&req2).await.unwrap();
assert!(!resp2.is_success(), "client2 should not acquire exclusive lock while client1 holds it");
// client1 release lock
if let Some(lock_info) = resp1.lock_info() {
let _ = client1.release(&lock_info.id).await;
}
// client2 try again, should succeed
let resp3 = client2.acquire_exclusive(&req2).await.unwrap();
assert!(resp3.is_success(), "client2 should acquire exclusive lock after client1 releases it");
// clean up
if let Some(lock_info) = resp3.lock_info() {
let _ = client2.release(&lock_info.id).await;
}
}
}

View File

@@ -0,0 +1,123 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod local;
pub mod remote;
use async_trait::async_trait;
use std::sync::Arc;
use crate::{
error::Result,
types::{LockId, LockInfo, LockRequest, LockResponse, LockStats},
};
/// Lock client trait
#[async_trait]
pub trait LockClient: Send + Sync + std::fmt::Debug {
/// Acquire exclusive lock
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse>;
/// Acquire shared lock
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse>;
/// Acquire lock (generic method)
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
match request.lock_type {
crate::types::LockType::Exclusive => self.acquire_exclusive(request).await,
crate::types::LockType::Shared => self.acquire_shared(request).await,
}
}
/// Release lock
async fn release(&self, lock_id: &LockId) -> Result<bool>;
/// Refresh lock
async fn refresh(&self, lock_id: &LockId) -> Result<bool>;
/// Force release lock
async fn force_release(&self, lock_id: &LockId) -> Result<bool>;
/// Check lock status
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>>;
/// Get statistics
async fn get_stats(&self) -> Result<LockStats>;
/// Close client
async fn close(&self) -> Result<()>;
/// Check if client is online
async fn is_online(&self) -> bool;
/// Check if client is local
async fn is_local(&self) -> bool;
}
/// Client factory
pub struct ClientFactory;
impl ClientFactory {
/// Create local client
pub fn create_local() -> Arc<dyn LockClient> {
Arc::new(local::LocalClient::new())
}
/// Create remote client
pub fn create_remote(endpoint: String) -> Arc<dyn LockClient> {
Arc::new(remote::RemoteClient::new(endpoint))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::LockType;
#[tokio::test]
async fn test_client_factory() {
let local_client = ClientFactory::create_local();
assert!(local_client.is_local().await);
let remote_client = ClientFactory::create_remote("http://localhost:8080".to_string());
assert!(!remote_client.is_local().await);
}
#[tokio::test]
async fn test_local_client_basic_operations() {
let client = ClientFactory::create_local();
let request = crate::types::LockRequest::new("test-resource", LockType::Exclusive, "test-owner");
// Test lock acquisition
let response = client.acquire_exclusive(&request).await;
assert!(response.is_ok());
if let Ok(response) = response {
if response.success {
let lock_info = response.lock_info.unwrap();
// Test status check
let status = client.check_status(&lock_info.id).await;
assert!(status.is_ok());
assert!(status.unwrap().is_some());
// Test lock release
let released = client.release(&lock_info.id).await;
assert!(released.is_ok());
assert!(released.unwrap());
}
}
}
}

View File

@@ -0,0 +1,403 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use rustfs_protos::{
node_service_time_out_client,
proto_gen::node_service::{GenerallyLockRequest, PingRequest},
};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use tonic::Request;
use tracing::info;
use crate::{
error::{LockError, Result},
types::{LockId, LockInfo, LockRequest, LockResponse, LockStats},
};
use super::LockClient;
/// Remote lock client implementation
#[derive(Debug)]
pub struct RemoteClient {
addr: String,
// Track active locks with their original owner information
active_locks: Arc<RwLock<HashMap<LockId, String>>>, // lock_id -> owner
}
impl Clone for RemoteClient {
fn clone(&self) -> Self {
Self {
addr: self.addr.clone(),
active_locks: self.active_locks.clone(),
}
}
}
impl RemoteClient {
pub fn new(endpoint: String) -> Self {
Self {
addr: endpoint,
active_locks: Arc::new(RwLock::new(HashMap::new())),
}
}
pub fn from_url(url: url::Url) -> Self {
Self {
addr: url.to_string(),
active_locks: Arc::new(RwLock::new(HashMap::new())),
}
}
/// Create a minimal LockRequest for unlock operations
fn create_unlock_request(&self, lock_id: &LockId, owner: &str) -> LockRequest {
LockRequest {
lock_id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: crate::types::LockType::Exclusive, // Type doesn't matter for unlock
owner: owner.to_string(),
acquire_timeout: std::time::Duration::from_secs(30),
ttl: std::time::Duration::from_secs(300),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
}
}
}
#[async_trait]
impl LockClient for RemoteClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse> {
info!("remote acquire_exclusive for {}", request.resource);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
// Check for explicit error first
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
// Check if the lock acquisition was successful
if resp.success {
// Save the lock information for later release
let mut locks = self.active_locks.write().await;
locks.insert(request.lock_id.clone(), request.owner.clone());
Ok(LockResponse::success(
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
std::time::Duration::ZERO,
))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
"Lock acquisition failed on remote server".to_string(),
std::time::Duration::ZERO,
))
}
}
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse> {
info!("remote acquire_shared for {}", request.resource);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.r_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
// Check for explicit error first
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
// Check if the lock acquisition was successful
if resp.success {
// Save the lock information for later release
let mut locks = self.active_locks.write().await;
locks.insert(request.lock_id.clone(), request.owner.clone());
Ok(LockResponse::success(
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
std::time::Duration::ZERO,
))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
"Shared lock acquisition failed on remote server".to_string(),
std::time::Duration::ZERO,
))
}
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
info!("remote release for {}", lock_id);
// Get the original owner for this lock
let owner = {
let locks = self.active_locks.read().await;
locks.get(lock_id).cloned().unwrap_or_else(|| "remote".to_string())
};
let unlock_request = self.create_unlock_request(lock_id, &owner);
let request_string = serde_json::to_string(&unlock_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
// Try UnLock first (for exclusive locks)
let req = Request::new(GenerallyLockRequest {
args: request_string.clone(),
});
let resp = client.un_lock(req).await;
let success = if resp.is_err() {
// If that fails, try RUnLock (for shared locks)
let req = Request::new(GenerallyLockRequest { args: request_string });
let resp = client
.r_un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
resp.success
} else {
let resp = resp.map_err(|e| LockError::internal(e.to_string()))?.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
resp.success
};
// Remove the lock from our tracking if successful
if success {
let mut locks = self.active_locks.write().await;
locks.remove(lock_id);
}
Ok(success)
}
async fn refresh(&self, lock_id: &LockId) -> Result<bool> {
info!("remote refresh for {}", lock_id);
let refresh_request = self.create_unlock_request(lock_id, "remote");
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&refresh_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.refresh(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn force_release(&self, lock_id: &LockId) -> Result<bool> {
info!("remote force_release for {}", lock_id);
let force_request = self.create_unlock_request(lock_id, "remote");
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&force_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.force_un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>> {
info!("remote check_status for {}", lock_id);
// Since there's no direct status query in the gRPC service,
// we attempt a non-blocking lock acquisition to check if the resource is available
let status_request = self.create_unlock_request(lock_id, "remote");
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
// Try to acquire a very short-lived lock to test availability
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&status_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
// Try exclusive lock first with very short timeout
let resp = client.lock(req).await;
match resp {
Ok(response) => {
let resp = response.into_inner();
if resp.success {
// If we successfully acquired the lock, the resource was free
// Immediately release it
let release_req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&status_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let _ = client.un_lock(release_req).await; // Best effort release
// Return None since no one was holding the lock
Ok(None)
} else {
// Lock acquisition failed, meaning someone is holding it
// We can't determine the exact details remotely, so return a generic status
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.as_str().to_string(),
lock_type: crate::types::LockType::Exclusive, // We can't know the exact type
status: crate::types::LockStatus::Acquired,
owner: "unknown".to_string(), // Remote client can't determine owner
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(3600),
last_refreshed: std::time::SystemTime::now(),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
wait_start_time: None,
}))
}
}
Err(_) => {
// Communication error or lock is held
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.as_str().to_string(),
lock_type: crate::types::LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: "unknown".to_string(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(3600),
last_refreshed: std::time::SystemTime::now(),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
wait_start_time: None,
}))
}
}
}
async fn get_stats(&self) -> Result<LockStats> {
info!("remote get_stats from {}", self.addr);
// Since there's no direct statistics endpoint in the gRPC service,
// we return basic stats indicating this is a remote client
let stats = LockStats {
last_updated: std::time::SystemTime::now(),
..Default::default()
};
// We could potentially enhance this by:
// 1. Keeping local counters of operations performed
// 2. Adding a stats gRPC method to the service
// 3. Querying server health endpoints
// For now, return minimal stats indicating remote connectivity
Ok(stats)
}
async fn close(&self) -> Result<()> {
Ok(())
}
async fn is_online(&self) -> bool {
// Use Ping interface to test if remote service is online
let mut client = match node_service_time_out_client(&self.addr).await {
Ok(client) => client,
Err(_) => {
info!("remote client {} connection failed", self.addr);
return false;
}
};
let ping_req = Request::new(PingRequest {
version: 1,
body: bytes::Bytes::new(),
});
match client.ping(ping_req).await {
Ok(_) => {
info!("remote client {} is online", self.addr);
true
}
Err(_) => {
info!("remote client {} ping failed", self.addr);
false
}
}
}
async fn is_local(&self) -> bool {
false
}
}

File diff suppressed because it is too large Load Diff

329
crates/lock/src/error.rs Normal file
View File

@@ -0,0 +1,329 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::types::LockId;
use std::time::Duration;
use thiserror::Error;
/// Lock operation related error types
#[derive(Error, Debug)]
pub enum LockError {
/// Lock acquisition timeout
#[error("Lock acquisition timeout for resource '{resource}' after {timeout:?}")]
Timeout { resource: String, timeout: Duration },
/// Resource not found
#[error("Resource not found: {resource}")]
ResourceNotFound { resource: String },
/// Permission denied
#[error("Permission denied: {reason}")]
PermissionDenied { reason: String },
/// Network error
#[error("Network error: {message}")]
Network {
message: String,
#[source]
source: Box<dyn std::error::Error + Send + Sync>,
},
/// Internal error
#[error("Internal error: {message}")]
Internal { message: String },
/// Resource is already locked
#[error("Resource '{resource}' is already locked by {owner}")]
AlreadyLocked { resource: String, owner: String },
/// Invalid lock handle
#[error("Invalid lock handle: {handle_id}")]
InvalidHandle { handle_id: String },
/// Configuration error
#[error("Configuration error: {message}")]
Configuration { message: String },
/// Serialization error
#[error("Serialization error: {message}")]
Serialization {
message: String,
#[source]
source: Box<dyn std::error::Error + Send + Sync>,
},
/// Deserialization error
#[error("Deserialization error: {message}")]
Deserialization {
message: String,
#[source]
source: Box<dyn std::error::Error + Send + Sync>,
},
/// Insufficient nodes for quorum
#[error("Insufficient nodes for quorum: required {required}, available {available}")]
InsufficientNodes { required: usize, available: usize },
/// Quorum not reached
#[error("Quorum not reached: required {required}, achieved {achieved}")]
QuorumNotReached { required: usize, achieved: usize },
/// Queue is full
#[error("Queue is full: {message}")]
QueueFull { message: String },
/// Not the lock owner
#[error("Not the lock owner: lock_id {lock_id}, owner {owner}")]
NotOwner { lock_id: LockId, owner: String },
}
impl Clone for LockError {
fn clone(&self) -> Self {
match self {
LockError::Timeout { resource, timeout } => LockError::Timeout {
resource: resource.clone(),
timeout: *timeout,
},
LockError::ResourceNotFound { resource } => LockError::ResourceNotFound {
resource: resource.clone(),
},
LockError::PermissionDenied { reason } => LockError::PermissionDenied { reason: reason.clone() },
LockError::Network { message, source: _ } => LockError::Network {
message: message.clone(),
source: Box::new(std::io::Error::other(message.clone())),
},
LockError::Internal { message } => LockError::Internal {
message: message.clone(),
},
LockError::AlreadyLocked { resource, owner } => LockError::AlreadyLocked {
resource: resource.clone(),
owner: owner.clone(),
},
LockError::InvalidHandle { handle_id } => LockError::InvalidHandle {
handle_id: handle_id.clone(),
},
LockError::Configuration { message } => LockError::Configuration {
message: message.clone(),
},
LockError::Serialization { message, source: _ } => LockError::Serialization {
message: message.clone(),
source: Box::new(std::io::Error::other(message.clone())),
},
LockError::Deserialization { message, source: _ } => LockError::Deserialization {
message: message.clone(),
source: Box::new(std::io::Error::other(message.clone())),
},
LockError::InsufficientNodes { required, available } => LockError::InsufficientNodes {
required: *required,
available: *available,
},
LockError::QuorumNotReached { required, achieved } => LockError::QuorumNotReached {
required: *required,
achieved: *achieved,
},
LockError::QueueFull { message } => LockError::QueueFull {
message: message.clone(),
},
LockError::NotOwner { lock_id, owner } => LockError::NotOwner {
lock_id: lock_id.clone(),
owner: owner.clone(),
},
}
}
}
impl LockError {
/// Create timeout error
pub fn timeout(resource: impl Into<String>, timeout: Duration) -> Self {
Self::Timeout {
resource: resource.into(),
timeout,
}
}
/// Create resource not found error
pub fn resource_not_found(resource: impl Into<String>) -> Self {
Self::ResourceNotFound {
resource: resource.into(),
}
}
/// Create permission denied error
pub fn permission_denied(reason: impl Into<String>) -> Self {
Self::PermissionDenied { reason: reason.into() }
}
/// Create network error
pub fn network(message: impl Into<String>, source: impl std::error::Error + Send + Sync + 'static) -> Self {
Self::Network {
message: message.into(),
source: Box::new(source),
}
}
/// Create internal error
pub fn internal(message: impl Into<String>) -> Self {
Self::Internal { message: message.into() }
}
/// Create lock already locked error
pub fn already_locked(resource: impl Into<String>, owner: impl Into<String>) -> Self {
Self::AlreadyLocked {
resource: resource.into(),
owner: owner.into(),
}
}
/// Create invalid handle error
pub fn invalid_handle(handle_id: impl Into<String>) -> Self {
Self::InvalidHandle {
handle_id: handle_id.into(),
}
}
/// Create configuration error
pub fn configuration(message: impl Into<String>) -> Self {
Self::Configuration { message: message.into() }
}
/// Create serialization error
pub fn serialization(message: impl Into<String>, source: impl std::error::Error + Send + Sync + 'static) -> Self {
Self::Serialization {
message: message.into(),
source: Box::new(source),
}
}
/// Create deserialization error
pub fn deserialization(message: impl Into<String>, source: impl std::error::Error + Send + Sync + 'static) -> Self {
Self::Deserialization {
message: message.into(),
source: Box::new(source),
}
}
/// Check if it is a retryable error
pub fn is_retryable(&self) -> bool {
matches!(self, Self::Timeout { .. } | Self::Network { .. } | Self::Internal { .. })
}
/// Check if it is a fatal error
pub fn is_fatal(&self) -> bool {
matches!(
self,
Self::ResourceNotFound { .. } | Self::PermissionDenied { .. } | Self::Configuration { .. }
)
}
}
/// Lock operation Result type
pub type Result<T> = std::result::Result<T, LockError>;
/// Convert from std::io::Error
impl From<std::io::Error> for LockError {
fn from(err: std::io::Error) -> Self {
match err.kind() {
std::io::ErrorKind::TimedOut => Self::Internal {
message: "IO timeout".to_string(),
},
std::io::ErrorKind::NotFound => Self::ResourceNotFound {
resource: "unknown".to_string(),
},
std::io::ErrorKind::PermissionDenied => Self::PermissionDenied { reason: err.to_string() },
_ => Self::Internal {
message: err.to_string(),
},
}
}
}
/// Convert from serde_json::Error
impl From<serde_json::Error> for LockError {
fn from(err: serde_json::Error) -> Self {
if err.is_io() {
Self::network("JSON serialization IO error", err)
} else if err.is_syntax() {
Self::deserialization("JSON syntax error", err)
} else if err.is_data() {
Self::deserialization("JSON data error", err)
} else {
Self::serialization("JSON serialization error", err)
}
}
}
/// Convert from tonic::Status
impl From<tonic::Status> for LockError {
fn from(status: tonic::Status) -> Self {
match status.code() {
tonic::Code::DeadlineExceeded => Self::Internal {
message: "gRPC deadline exceeded".to_string(),
},
tonic::Code::NotFound => Self::ResourceNotFound {
resource: "unknown".to_string(),
},
tonic::Code::PermissionDenied => Self::PermissionDenied {
reason: status.message().to_string(),
},
tonic::Code::Unavailable => Self::Network {
message: "gRPC service unavailable".to_string(),
source: Box::new(status),
},
_ => Self::Internal {
message: status.message().to_string(),
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_creation() {
let timeout_err = LockError::timeout("test-resource", Duration::from_secs(5));
assert!(matches!(timeout_err, LockError::Timeout { .. }));
let not_found_err = LockError::resource_not_found("missing-resource");
assert!(matches!(not_found_err, LockError::ResourceNotFound { .. }));
let permission_err = LockError::permission_denied("insufficient privileges");
assert!(matches!(permission_err, LockError::PermissionDenied { .. }));
}
#[test]
fn test_error_retryable() {
let timeout_err = LockError::timeout("test", Duration::from_secs(1));
assert!(timeout_err.is_retryable());
let network_err = LockError::network("connection failed", std::io::Error::new(std::io::ErrorKind::ConnectionRefused, ""));
assert!(network_err.is_retryable());
let not_found_err = LockError::resource_not_found("test");
assert!(!not_found_err.is_retryable());
}
#[test]
fn test_error_fatal() {
let not_found_err = LockError::resource_not_found("test");
assert!(not_found_err.is_fatal());
let permission_err = LockError::permission_denied("test");
assert!(permission_err.is_fatal());
let timeout_err = LockError::timeout("test", Duration::from_secs(1));
assert!(!timeout_err.is_fatal());
}
}

View File

@@ -1,4 +1,3 @@
#![allow(dead_code)]
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,115 +12,78 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use local_locker::LocalLocker;
use lock_args::LockArgs;
use remote_client::RemoteClient;
use std::io::Result;
// ============================================================================
// Core Module Declarations
// ============================================================================
// Application Layer Modules
pub mod namespace;
// Abstraction Layer Modules
pub mod client;
// Local Layer Modules
pub mod local;
// Core Modules
pub mod error;
pub mod types;
// ============================================================================
// Public API Exports
// ============================================================================
// Re-export main types for easy access
pub use crate::{
// Client interfaces
client::{LockClient, local::LocalClient, remote::RemoteClient},
// Error types
error::{LockError, Result},
local::LocalLockMap,
// Main components
namespace::{NamespaceLock, NamespaceLockManager},
// Core types
types::{
HealthInfo, HealthStatus, LockId, LockInfo, LockMetadata, LockPriority, LockRequest, LockResponse, LockStats, LockStatus,
LockType,
},
};
// ============================================================================
// Version Information
// ============================================================================
/// Current version of the lock crate
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
/// Build timestamp
pub const BUILD_TIMESTAMP: &str = "unknown";
/// Maximum number of items in delete list
pub const MAX_DELETE_LIST: usize = 1000;
// ============================================================================
// Global Lock Map
// ============================================================================
// Global singleton lock map shared across all lock implementations
use once_cell::sync::OnceCell;
use std::sync::Arc;
use std::sync::LazyLock;
use tokio::sync::RwLock;
pub mod drwmutex;
pub mod local_locker;
pub mod lock_args;
pub mod lrwmutex;
pub mod namespace_lock;
pub mod remote_client;
static GLOBAL_LOCK_MAP: OnceCell<Arc<local::LocalLockMap>> = OnceCell::new();
pub static GLOBAL_LOCAL_SERVER: LazyLock<Arc<RwLock<LocalLocker>>> = LazyLock::new(|| Arc::new(RwLock::new(LocalLocker::new())));
type LockClient = dyn Locker;
#[async_trait]
pub trait Locker {
async fn lock(&mut self, args: &LockArgs) -> Result<bool>;
async fn unlock(&mut self, args: &LockArgs) -> Result<bool>;
async fn rlock(&mut self, args: &LockArgs) -> Result<bool>;
async fn runlock(&mut self, args: &LockArgs) -> Result<bool>;
async fn refresh(&mut self, args: &LockArgs) -> Result<bool>;
async fn force_unlock(&mut self, args: &LockArgs) -> Result<bool>;
async fn close(&self);
async fn is_online(&self) -> bool;
async fn is_local(&self) -> bool;
/// Get the global shared lock map instance
pub fn get_global_lock_map() -> Arc<local::LocalLockMap> {
GLOBAL_LOCK_MAP.get_or_init(|| Arc::new(local::LocalLockMap::new())).clone()
}
#[derive(Debug, Clone)]
pub enum LockApi {
Local,
Remote(RemoteClient),
}
#[async_trait]
impl Locker for LockApi {
async fn lock(&mut self, args: &LockArgs) -> Result<bool> {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.lock(args).await,
LockApi::Remote(r) => r.lock(args).await,
}
}
async fn unlock(&mut self, args: &LockArgs) -> Result<bool> {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.unlock(args).await,
LockApi::Remote(r) => r.unlock(args).await,
}
}
async fn rlock(&mut self, args: &LockArgs) -> Result<bool> {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.rlock(args).await,
LockApi::Remote(r) => r.rlock(args).await,
}
}
async fn runlock(&mut self, args: &LockArgs) -> Result<bool> {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.runlock(args).await,
LockApi::Remote(r) => r.runlock(args).await,
}
}
async fn refresh(&mut self, args: &LockArgs) -> Result<bool> {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.refresh(args).await,
LockApi::Remote(r) => r.refresh(args).await,
}
}
async fn force_unlock(&mut self, args: &LockArgs) -> Result<bool> {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.force_unlock(args).await,
LockApi::Remote(r) => r.force_unlock(args).await,
}
}
async fn close(&self) {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.read().await.close().await,
LockApi::Remote(r) => r.close().await,
}
}
async fn is_online(&self) -> bool {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.read().await.is_online().await,
LockApi::Remote(r) => r.is_online().await,
}
}
async fn is_local(&self) -> bool {
match self {
LockApi::Local => GLOBAL_LOCAL_SERVER.write().await.is_local().await,
LockApi::Remote(r) => r.is_local().await,
}
}
}
pub fn new_lock_api(is_local: bool, url: Option<url::Url>) -> LockApi {
if is_local {
return LockApi::Local;
}
LockApi::Remote(RemoteClient::new(url.unwrap()))
// ============================================================================
// Convenience Functions
// ============================================================================
/// Create a new namespace lock
pub fn create_namespace_lock(namespace: String, _distributed: bool) -> NamespaceLock {
// The distributed behavior is now determined by the type of clients added to the NamespaceLock
// This function just creates an empty NamespaceLock
NamespaceLock::new(namespace)
}

925
crates/lock/src/local.rs Normal file
View File

@@ -0,0 +1,925 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
use crate::LockRequest;
/// local lock entry
#[derive(Debug)]
pub struct LocalLockEntry {
/// current writer
pub writer: Option<String>,
/// current readers with their lock counts
pub readers: HashMap<String, usize>,
/// lock expiration time
pub expires_at: Option<Instant>,
}
/// local lock map
#[derive(Debug)]
pub struct LocalLockMap {
/// LockId to lock object map
pub locks: Arc<RwLock<HashMap<crate::types::LockId, Arc<RwLock<LocalLockEntry>>>>>,
/// Shutdown flag for background tasks
shutdown: Arc<AtomicBool>,
}
impl Default for LocalLockMap {
fn default() -> Self {
Self::new()
}
}
impl LocalLockMap {
/// create new local lock map
pub fn new() -> Self {
let map = Self {
locks: Arc::new(RwLock::new(HashMap::new())),
shutdown: Arc::new(AtomicBool::new(false)),
};
map.spawn_expiry_task();
map
}
/// spawn expiry task to clean up expired locks
fn spawn_expiry_task(&self) {
let locks = self.locks.clone();
let shutdown = self.shutdown.clone();
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(1));
loop {
interval.tick().await;
if shutdown.load(Ordering::Relaxed) {
tracing::debug!("Expiry task shutting down");
break;
}
let now = Instant::now();
let mut to_remove = Vec::new();
{
let locks_guard = locks.read().await;
for (key, entry) in locks_guard.iter() {
if let Ok(mut entry_guard) = entry.try_write() {
if let Some(exp) = entry_guard.expires_at {
if exp <= now {
entry_guard.writer = None;
entry_guard.readers.clear();
entry_guard.expires_at = None;
if entry_guard.writer.is_none() && entry_guard.readers.is_empty() {
to_remove.push(key.clone());
}
}
}
}
}
}
if !to_remove.is_empty() {
let mut locks_guard = locks.write().await;
for key in to_remove {
locks_guard.remove(&key);
}
}
}
});
}
/// write lock with TTL, support timeout, use LockRequest
pub async fn lock_with_ttl_id(&self, request: &LockRequest) -> std::io::Result<bool> {
let start = Instant::now();
let expires_at = Some(Instant::now() + request.ttl);
loop {
// get or create lock entry
let entry = {
let mut locks_guard = self.locks.write().await;
locks_guard
.entry(request.lock_id.clone())
.or_insert_with(|| {
Arc::new(RwLock::new(LocalLockEntry {
writer: None,
readers: HashMap::new(),
expires_at: None,
}))
})
.clone()
};
// try to get write lock to modify state
if let Ok(mut entry_guard) = entry.try_write() {
// check expired state
let now = Instant::now();
if let Some(exp) = entry_guard.expires_at {
if exp <= now {
entry_guard.writer = None;
entry_guard.readers.clear();
entry_guard.expires_at = None;
}
}
// check if can get write lock
if entry_guard.writer.is_none() && entry_guard.readers.is_empty() {
entry_guard.writer = Some(request.owner.clone());
entry_guard.expires_at = expires_at;
tracing::debug!("Write lock acquired for resource '{}' by owner '{}'", request.resource, request.owner);
return Ok(true);
}
}
if start.elapsed() >= request.acquire_timeout {
return Ok(false);
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
}
/// read lock with TTL, support timeout, use LockRequest
pub async fn rlock_with_ttl_id(&self, request: &LockRequest) -> std::io::Result<bool> {
let start = Instant::now();
let expires_at = Some(Instant::now() + request.ttl);
loop {
// get or create lock entry
let entry = {
let mut locks_guard = self.locks.write().await;
locks_guard
.entry(request.lock_id.clone())
.or_insert_with(|| {
Arc::new(RwLock::new(LocalLockEntry {
writer: None,
readers: HashMap::new(),
expires_at: None,
}))
})
.clone()
};
// try to get write lock to modify state
if let Ok(mut entry_guard) = entry.try_write() {
// check expired state
let now = Instant::now();
if let Some(exp) = entry_guard.expires_at {
if exp <= now {
entry_guard.writer = None;
entry_guard.readers.clear();
entry_guard.expires_at = None;
}
}
// check if can get read lock
if entry_guard.writer.is_none() {
// increase read lock count
*entry_guard.readers.entry(request.owner.clone()).or_insert(0) += 1;
entry_guard.expires_at = expires_at;
tracing::debug!("Read lock acquired for resource '{}' by owner '{}'", request.resource, request.owner);
return Ok(true);
}
}
if start.elapsed() >= request.acquire_timeout {
return Ok(false);
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
}
/// unlock by LockId and owner - need to specify owner to correctly unlock
pub async fn unlock_by_id_and_owner(&self, lock_id: &crate::types::LockId, owner: &str) -> std::io::Result<()> {
println!("Unlocking lock_id: {lock_id:?}, owner: {owner}");
let mut need_remove = false;
{
let locks_guard = self.locks.read().await;
if let Some(entry) = locks_guard.get(lock_id) {
println!("Found lock entry, attempting to acquire write lock...");
match entry.try_write() {
Ok(mut entry_guard) => {
println!("Successfully acquired write lock for unlock");
// try to release write lock
if entry_guard.writer.as_ref() == Some(&owner.to_string()) {
println!("Releasing write lock for owner: {owner}");
entry_guard.writer = None;
}
// try to release read lock
else if let Some(count) = entry_guard.readers.get_mut(owner) {
println!("Releasing read lock for owner: {owner} (count: {count})");
*count -= 1;
if *count == 0 {
entry_guard.readers.remove(owner);
println!("Removed owner {owner} from readers");
}
} else {
println!("Owner {owner} not found in writers or readers");
}
// check if need to remove
if entry_guard.readers.is_empty() && entry_guard.writer.is_none() {
println!("Lock entry is empty, marking for removal");
entry_guard.expires_at = None;
need_remove = true;
} else {
println!(
"Lock entry still has content: writer={:?}, readers={:?}",
entry_guard.writer, entry_guard.readers
);
}
}
Err(_) => {
println!("Failed to acquire write lock for unlock - this is the problem!");
return Err(std::io::Error::new(
std::io::ErrorKind::WouldBlock,
"Failed to acquire write lock for unlock",
));
}
}
} else {
println!("Lock entry not found for lock_id: {lock_id:?}");
}
}
// only here, entry's Ref is really dropped, can safely remove
if need_remove {
println!("Removing lock entry from map...");
let mut locks_guard = self.locks.write().await;
let removed = locks_guard.remove(lock_id);
println!("Lock entry removed: {:?}", removed.is_some());
}
println!("Unlock operation completed");
Ok(())
}
/// unlock by LockId - smart release (compatible with old interface, but may be inaccurate)
pub async fn unlock_by_id(&self, lock_id: &crate::types::LockId) -> std::io::Result<()> {
let mut need_remove = false;
{
let locks_guard = self.locks.read().await;
if let Some(entry) = locks_guard.get(lock_id) {
if let Ok(mut entry_guard) = entry.try_write() {
// release write lock first
if entry_guard.writer.is_some() {
entry_guard.writer = None;
}
// if no write lock, release first read lock
else if let Some((owner, _)) = entry_guard.readers.iter().next() {
let owner = owner.clone();
if let Some(count) = entry_guard.readers.get_mut(&owner) {
*count -= 1;
if *count == 0 {
entry_guard.readers.remove(&owner);
}
}
}
// if completely idle, clean entry
if entry_guard.readers.is_empty() && entry_guard.writer.is_none() {
entry_guard.expires_at = None;
need_remove = true;
}
}
}
}
if need_remove {
let mut locks_guard = self.locks.write().await;
locks_guard.remove(lock_id);
}
Ok(())
}
/// runlock by LockId and owner - need to specify owner to correctly unlock read lock
pub async fn runlock_by_id_and_owner(&self, lock_id: &crate::types::LockId, owner: &str) -> std::io::Result<()> {
let mut need_remove = false;
{
let locks_guard = self.locks.read().await;
if let Some(entry) = locks_guard.get(lock_id) {
if let Ok(mut entry_guard) = entry.try_write() {
// release read lock
if let Some(count) = entry_guard.readers.get_mut(owner) {
*count -= 1;
if *count == 0 {
entry_guard.readers.remove(owner);
}
}
// if completely idle, clean entry
if entry_guard.readers.is_empty() && entry_guard.writer.is_none() {
entry_guard.expires_at = None;
need_remove = true;
}
}
}
}
if need_remove {
let mut locks_guard = self.locks.write().await;
locks_guard.remove(lock_id);
}
Ok(())
}
/// runlock by LockId - smart release read lock (compatible with old interface)
pub async fn runlock_by_id(&self, lock_id: &crate::types::LockId) -> std::io::Result<()> {
let mut need_remove = false;
{
let locks_guard = self.locks.read().await;
if let Some(entry) = locks_guard.get(lock_id) {
if let Ok(mut entry_guard) = entry.try_write() {
// release first read lock
if let Some((owner, _)) = entry_guard.readers.iter().next() {
let owner = owner.clone();
if let Some(count) = entry_guard.readers.get_mut(&owner) {
*count -= 1;
if *count == 0 {
entry_guard.readers.remove(&owner);
}
}
}
// if completely idle, clean entry
if entry_guard.readers.is_empty() && entry_guard.writer.is_none() {
entry_guard.expires_at = None;
need_remove = true;
}
}
}
}
if need_remove {
let mut locks_guard = self.locks.write().await;
locks_guard.remove(lock_id);
}
Ok(())
}
/// check if resource is locked
pub async fn is_locked(&self, resource: &str) -> bool {
let lock_id = crate::types::LockId::new_deterministic(resource);
let locks_guard = self.locks.read().await;
if let Some(entry) = locks_guard.get(&lock_id) {
let entry_guard = entry.read().await;
entry_guard.writer.is_some() || !entry_guard.readers.is_empty()
} else {
false
}
}
/// get lock info for a resource
pub async fn get_lock(&self, resource: &str) -> Option<crate::types::LockInfo> {
let lock_id = crate::types::LockId::new_deterministic(resource);
let locks_guard = self.locks.read().await;
if let Some(entry) = locks_guard.get(&lock_id) {
let entry_guard = entry.read().await;
if let Some(owner) = &entry_guard.writer {
Some(crate::types::LockInfo {
id: lock_id,
resource: resource.to_string(),
lock_type: crate::types::LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(30),
last_refreshed: std::time::SystemTime::now(),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
wait_start_time: None,
})
} else if !entry_guard.readers.is_empty() {
let owner = entry_guard.readers.keys().next().unwrap().clone();
Some(crate::types::LockInfo {
id: lock_id,
resource: resource.to_string(),
lock_type: crate::types::LockType::Shared,
status: crate::types::LockStatus::Acquired,
owner,
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(30),
last_refreshed: std::time::SystemTime::now(),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
wait_start_time: None,
})
} else {
None
}
} else {
None
}
}
/// get statistics
pub async fn get_stats(&self) -> crate::types::LockStats {
let mut stats = crate::types::LockStats::default();
let locks_guard = self.locks.read().await;
for (_, entry) in locks_guard.iter() {
let entry_guard = entry.read().await;
if entry_guard.writer.is_some() {
stats.exclusive_locks += 1;
}
stats.shared_locks += entry_guard.readers.len();
}
stats.total_locks = stats.exclusive_locks + stats.shared_locks;
stats
}
/// shutdown background tasks
pub async fn shutdown(&self) {
self.shutdown.store(true, Ordering::Relaxed);
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use std::time::Duration;
use tokio::task;
use tokio::time::{sleep, timeout};
/// Test basic write lock operations
#[tokio::test]
async fn test_write_lock_basic() {
let lock_map = LocalLockMap::new();
// create a simple lock request
let request = LockRequest {
lock_id: crate::types::LockId::new_deterministic("test_resource"),
resource: "test_resource".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "test_owner".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
// try to acquire lock
println!("Attempting to acquire lock...");
let result = lock_map.lock_with_ttl_id(&request).await;
println!("Lock acquisition result: {result:?}");
match result {
Ok(success) => {
if success {
println!("Lock acquired successfully");
// check lock state
let is_locked = lock_map.is_locked("test_resource").await;
println!("Is locked: {is_locked}");
// try to unlock
println!("Attempting to unlock...");
let unlock_result = lock_map.unlock_by_id_and_owner(&request.lock_id, "test_owner").await;
println!("Unlock result: {unlock_result:?}");
// check lock state again
let is_locked_after = lock_map.is_locked("test_resource").await;
println!("Is locked after unlock: {is_locked_after}");
assert!(!is_locked_after, "Should be unlocked after release");
} else {
println!("Lock acquisition failed (timeout)");
}
}
Err(e) => {
println!("Lock acquisition error: {e:?}");
panic!("Lock acquisition failed with error: {e:?}");
}
}
}
/// Test basic read lock operations
#[tokio::test]
async fn test_read_lock_basic() {
let lock_map = LocalLockMap::new();
// Test successful acquisition
let request = LockRequest {
lock_id: crate::types::LockId::new_deterministic("bar"),
resource: "bar".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok = lock_map.rlock_with_ttl_id(&request).await.unwrap();
assert!(ok, "Read lock should be successfully acquired");
assert!(lock_map.is_locked("bar").await, "Lock state should be locked");
// Test lock info
let lock_info = lock_map.get_lock("bar").await;
assert!(lock_info.is_some(), "Lock info should exist");
let info = lock_info.unwrap();
assert_eq!(info.owner, "reader1");
assert_eq!(info.lock_type, crate::types::LockType::Shared);
// Test unlock with owner
lock_map.runlock_by_id_and_owner(&request.lock_id, "reader1").await.unwrap();
assert!(!lock_map.is_locked("bar").await, "Should be unlocked after release");
}
/// Test write lock mutual exclusion
#[tokio::test]
async fn test_write_lock_mutex() {
let lock_map = Arc::new(LocalLockMap::new());
// Owner1 acquires write lock
let request1 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_mutex_test"),
resource: "res_mutex_test".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "owner1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok = lock_map.lock_with_ttl_id(&request1).await.unwrap();
assert!(ok, "First write lock should succeed");
// Owner2 tries to acquire write lock on same resource - should fail due to timeout
let lock_map2 = lock_map.clone();
let request2 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_mutex_test"),
resource: "res_mutex_test".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "owner2".to_string(),
acquire_timeout: Duration::from_millis(50),
ttl: Duration::from_millis(50),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let request2_clone = request2.clone();
let result = timeout(Duration::from_millis(100), async move {
lock_map2.lock_with_ttl_id(&request2_clone).await.unwrap()
})
.await;
assert!(result.is_ok(), "Lock attempt should complete");
assert!(!result.unwrap(), "Second write lock should fail due to conflict");
// Release first lock
lock_map.unlock_by_id_and_owner(&request1.lock_id, "owner1").await.unwrap();
// Now owner2 should be able to acquire the lock
let ok = lock_map.lock_with_ttl_id(&request2).await.unwrap();
assert!(ok, "Write lock should succeed after first is released");
lock_map.unlock_by_id_and_owner(&request2.lock_id, "owner2").await.unwrap();
}
/// Test read lock sharing
#[tokio::test]
async fn test_read_lock_sharing() {
let lock_map = LocalLockMap::new();
// Multiple readers should be able to acquire read locks
let request1 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_sharing_test"),
resource: "res_sharing_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let request2 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_sharing_test"),
resource: "res_sharing_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader2".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let request3 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_sharing_test"),
resource: "res_sharing_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader3".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok1 = lock_map.rlock_with_ttl_id(&request1).await.unwrap();
let ok2 = lock_map.rlock_with_ttl_id(&request2).await.unwrap();
let ok3 = lock_map.rlock_with_ttl_id(&request3).await.unwrap();
assert!(ok1 && ok2 && ok3, "All read locks should succeed");
assert!(lock_map.is_locked("res_sharing_test").await, "Resource should be locked");
// Release readers one by one
lock_map.runlock_by_id_and_owner(&request1.lock_id, "reader1").await.unwrap();
assert!(
lock_map.is_locked("res_sharing_test").await,
"Should still be locked with remaining readers"
);
lock_map.runlock_by_id_and_owner(&request2.lock_id, "reader2").await.unwrap();
assert!(lock_map.is_locked("res_sharing_test").await, "Should still be locked with one reader");
lock_map.runlock_by_id_and_owner(&request3.lock_id, "reader3").await.unwrap();
assert!(
!lock_map.is_locked("res_sharing_test").await,
"Should be unlocked when all readers release"
);
}
/// Test read-write lock exclusion
#[tokio::test]
async fn test_read_write_exclusion() {
let lock_map = LocalLockMap::new();
// Reader acquires read lock
let read_request = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_rw_test"),
resource: "res_rw_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok = lock_map.rlock_with_ttl_id(&read_request).await.unwrap();
assert!(ok, "Read lock should succeed");
// Writer tries to acquire write lock - should fail
let write_request = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_rw_test"),
resource: "res_rw_test".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "writer1".to_string(),
acquire_timeout: Duration::from_millis(50),
ttl: Duration::from_millis(50),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let result = timeout(Duration::from_millis(100), async {
lock_map.lock_with_ttl_id(&write_request).await.unwrap()
})
.await;
assert!(result.is_ok(), "Write lock attempt should complete");
assert!(!result.unwrap(), "Write lock should fail when read lock is held");
// Release read lock
lock_map
.runlock_by_id_and_owner(&read_request.lock_id, "reader1")
.await
.unwrap();
// Now writer should be able to acquire the lock with longer TTL
let write_request_long_ttl = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_rw_test"),
resource: "res_rw_test".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "writer1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(200), // Longer TTL to prevent expiration during test
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok = lock_map.lock_with_ttl_id(&write_request_long_ttl).await.unwrap();
assert!(ok, "Write lock should succeed after read lock is released");
// Reader tries to acquire read lock while write lock is held - should fail
let read_request2 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res_rw_test"),
resource: "res_rw_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader2".to_string(),
acquire_timeout: Duration::from_millis(50),
ttl: Duration::from_millis(50),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let result = timeout(Duration::from_millis(100), async {
lock_map.rlock_with_ttl_id(&read_request2).await.unwrap()
})
.await;
assert!(result.is_ok(), "Read lock attempt should complete");
assert!(!result.unwrap(), "Read lock should fail when write lock is held");
// Release write lock
lock_map
.unlock_by_id_and_owner(&write_request_long_ttl.lock_id, "writer1")
.await
.unwrap();
}
/// Test statistics
#[tokio::test]
async fn test_statistics() {
let lock_map = LocalLockMap::new();
// Initially no locks
let stats = lock_map.get_stats().await;
assert_eq!(stats.total_locks, 0, "Should have no locks initially");
assert_eq!(stats.exclusive_locks, 0, "Should have no exclusive locks initially");
assert_eq!(stats.shared_locks, 0, "Should have no shared locks initially");
// Add some locks
let write_request = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res1_stats_test"),
resource: "res1_stats_test".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "owner1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let read_request1 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res2_stats_test"),
resource: "res2_stats_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let read_request2 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("res2_stats_test"),
resource: "res2_stats_test".to_string(),
lock_type: crate::types::LockType::Shared,
owner: "reader2".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(100),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
lock_map.lock_with_ttl_id(&write_request).await.unwrap();
lock_map.rlock_with_ttl_id(&read_request1).await.unwrap();
lock_map.rlock_with_ttl_id(&read_request2).await.unwrap();
let stats = lock_map.get_stats().await;
assert_eq!(stats.exclusive_locks, 1, "Should have 1 exclusive lock");
assert_eq!(stats.shared_locks, 2, "Should have 2 shared locks");
assert_eq!(stats.total_locks, 3, "Should have 3 total locks");
// Clean up
lock_map
.unlock_by_id_and_owner(&write_request.lock_id, "owner1")
.await
.unwrap();
lock_map
.runlock_by_id_and_owner(&read_request1.lock_id, "reader1")
.await
.unwrap();
lock_map
.runlock_by_id_and_owner(&read_request2.lock_id, "reader2")
.await
.unwrap();
}
/// Test concurrent access
#[tokio::test]
async fn test_concurrent_access() {
let lock_map = Arc::new(LocalLockMap::new());
let num_tasks = 10;
let num_iterations = 100;
let mut handles = Vec::new();
for i in 0..num_tasks {
let lock_map = lock_map.clone();
let owner = format!("owner{i}");
let handle = task::spawn(async move {
for j in 0..num_iterations {
let resource = format!("resource{}", j % 5);
let request = LockRequest {
lock_id: crate::types::LockId::new_deterministic(&resource),
resource: resource.clone(),
lock_type: if j % 2 == 0 {
crate::types::LockType::Exclusive
} else {
crate::types::LockType::Shared
},
owner: owner.clone(),
acquire_timeout: Duration::from_millis(10),
ttl: Duration::from_millis(10),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
if request.lock_type == crate::types::LockType::Exclusive {
if lock_map.lock_with_ttl_id(&request).await.unwrap() {
sleep(Duration::from_micros(100)).await;
lock_map.unlock_by_id_and_owner(&request.lock_id, &owner).await.unwrap();
}
} else if lock_map.rlock_with_ttl_id(&request).await.unwrap() {
sleep(Duration::from_micros(100)).await;
lock_map.runlock_by_id_and_owner(&request.lock_id, &owner).await.unwrap();
}
}
});
handles.push(handle);
}
for handle in handles {
handle.await.unwrap();
}
// Verify no locks remain
let stats = lock_map.get_stats().await;
assert_eq!(stats.total_locks, 0, "No locks should remain after concurrent access");
}
#[tokio::test]
async fn test_write_lock_timeout_and_reacquire() {
let lock_map = LocalLockMap::new();
// 1. acquire lock
let request = LockRequest {
lock_id: crate::types::LockId::new_deterministic("timeout_resource"),
resource: "timeout_resource".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "owner1".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(200),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok = lock_map.lock_with_ttl_id(&request).await.unwrap();
assert!(ok, "First lock should succeed");
// 2. try to acquire lock again, should fail
let request2 = LockRequest {
lock_id: crate::types::LockId::new_deterministic("timeout_resource"),
resource: "timeout_resource".to_string(),
lock_type: crate::types::LockType::Exclusive,
owner: "owner2".to_string(),
acquire_timeout: Duration::from_millis(100),
ttl: Duration::from_millis(200),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
};
let ok2 = lock_map.lock_with_ttl_id(&request2).await.unwrap();
assert!(!ok2, "Second lock should fail before timeout");
// 3. wait for TTL to expire
tokio::time::sleep(Duration::from_millis(300)).await;
// 4. try to acquire lock again, should succeed
let ok3 = lock_map.lock_with_ttl_id(&request2).await.unwrap();
assert!(ok3, "Lock should succeed after timeout");
}
}

View File

@@ -1,427 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use std::io::{Error, Result};
use std::{
collections::HashMap,
time::{Duration, Instant},
};
use crate::{Locker, lock_args::LockArgs};
pub const MAX_DELETE_LIST: usize = 1000;
#[derive(Clone, Debug)]
struct LockRequesterInfo {
name: String,
writer: bool,
uid: String,
time_stamp: Instant,
time_last_refresh: Instant,
source: String,
group: bool,
owner: String,
quorum: usize,
idx: usize,
}
impl Default for LockRequesterInfo {
fn default() -> Self {
Self {
name: Default::default(),
writer: Default::default(),
uid: Default::default(),
time_stamp: Instant::now(),
time_last_refresh: Instant::now(),
source: Default::default(),
group: Default::default(),
owner: Default::default(),
quorum: Default::default(),
idx: Default::default(),
}
}
}
fn is_write_lock(lri: &[LockRequesterInfo]) -> bool {
lri.len() == 1 && lri[0].writer
}
#[derive(Debug, Default)]
pub struct LockStats {
total: usize,
writes: usize,
reads: usize,
}
#[derive(Debug, Default)]
pub struct LocalLocker {
lock_map: HashMap<String, Vec<LockRequesterInfo>>,
lock_uid: HashMap<String, String>,
}
impl LocalLocker {
pub fn new() -> Self {
LocalLocker::default()
}
}
impl LocalLocker {
fn can_take_lock(&self, resource: &[String]) -> bool {
resource.iter().fold(true, |acc, x| !self.lock_map.contains_key(x) && acc)
}
pub fn stats(&self) -> LockStats {
let mut st = LockStats {
total: self.lock_map.len(),
..Default::default()
};
self.lock_map.iter().for_each(|(_, value)| {
if !value.is_empty() {
if value[0].writer {
st.writes += 1;
} else {
st.reads += 1;
}
}
});
st
}
fn dump_lock_map(&mut self) -> HashMap<String, Vec<LockRequesterInfo>> {
let mut lock_copy = HashMap::new();
self.lock_map.iter().for_each(|(key, value)| {
lock_copy.insert(key.to_string(), value.to_vec());
});
lock_copy
}
fn expire_old_locks(&mut self, interval: Duration) {
self.lock_map.iter_mut().for_each(|(_, lris)| {
lris.retain(|lri| {
if Instant::now().duration_since(lri.time_last_refresh) > interval {
let mut key = lri.uid.to_string();
format_uuid(&mut key, &lri.idx);
self.lock_uid.remove(&key);
return false;
}
true
});
});
}
}
#[async_trait]
impl Locker for LocalLocker {
async fn lock(&mut self, args: &LockArgs) -> Result<bool> {
if args.resources.len() > MAX_DELETE_LIST {
return Err(Error::other(format!(
"internal error: LocalLocker.lock called with more than {MAX_DELETE_LIST} resources"
)));
}
if !self.can_take_lock(&args.resources) {
return Ok(false);
}
args.resources.iter().enumerate().for_each(|(idx, resource)| {
self.lock_map.insert(
resource.to_string(),
vec![LockRequesterInfo {
name: resource.to_string(),
writer: true,
source: args.source.to_string(),
owner: args.owner.to_string(),
uid: args.uid.to_string(),
group: args.resources.len() > 1,
quorum: args.quorum,
idx,
..Default::default()
}],
);
let mut uuid = args.uid.to_string();
format_uuid(&mut uuid, &idx);
self.lock_uid.insert(uuid, resource.to_string());
});
Ok(true)
}
async fn unlock(&mut self, args: &LockArgs) -> Result<bool> {
if args.resources.len() > MAX_DELETE_LIST {
return Err(Error::other(format!(
"internal error: LocalLocker.unlock called with more than {MAX_DELETE_LIST} resources"
)));
}
let mut reply = false;
let mut err_info = String::new();
for resource in args.resources.iter() {
match self.lock_map.get_mut(resource) {
Some(lris) => {
if !is_write_lock(lris) {
if err_info.is_empty() {
err_info = format!("unlock attempted on a read locked entity: {resource}");
} else {
err_info.push_str(&format!(", {resource}"));
}
} else {
lris.retain(|lri| {
if lri.uid == args.uid && (args.owner.is_empty() || lri.owner == args.owner) {
let mut key = args.uid.to_string();
format_uuid(&mut key, &lri.idx);
self.lock_uid.remove(&key).unwrap();
reply |= true;
return false;
}
true
});
}
if lris.is_empty() {
self.lock_map.remove(resource);
}
}
None => {
continue;
}
};
}
Ok(reply)
}
async fn rlock(&mut self, args: &LockArgs) -> Result<bool> {
if args.resources.len() != 1 {
return Err(Error::other("internal error: localLocker.RLock called with more than one resource"));
}
let resource = &args.resources[0];
match self.lock_map.get_mut(resource) {
Some(lri) => {
if !is_write_lock(lri) {
lri.push(LockRequesterInfo {
name: resource.to_string(),
writer: false,
source: args.source.to_string(),
owner: args.owner.to_string(),
uid: args.uid.to_string(),
quorum: args.quorum,
..Default::default()
});
} else {
return Ok(false);
}
}
None => {
self.lock_map.insert(
resource.to_string(),
vec![LockRequesterInfo {
name: resource.to_string(),
writer: false,
source: args.source.to_string(),
owner: args.owner.to_string(),
uid: args.uid.to_string(),
quorum: args.quorum,
..Default::default()
}],
);
}
}
let mut uuid = args.uid.to_string();
format_uuid(&mut uuid, &0);
self.lock_uid.insert(uuid, resource.to_string());
Ok(true)
}
async fn runlock(&mut self, args: &LockArgs) -> Result<bool> {
if args.resources.len() != 1 {
return Err(Error::other("internal error: localLocker.RLock called with more than one resource"));
}
let mut reply = false;
let resource = &args.resources[0];
match self.lock_map.get_mut(resource) {
Some(lris) => {
if is_write_lock(lris) {
return Err(Error::other(format!("runlock attempted on a write locked entity: {resource}")));
} else {
lris.retain(|lri| {
if lri.uid == args.uid && (args.owner.is_empty() || lri.owner == args.owner) {
let mut key = args.uid.to_string();
format_uuid(&mut key, &lri.idx);
self.lock_uid.remove(&key).unwrap();
reply |= true;
return false;
}
true
});
}
if lris.is_empty() {
self.lock_map.remove(resource);
}
}
None => {
return Ok(reply);
}
};
Ok(reply)
}
async fn refresh(&mut self, args: &LockArgs) -> Result<bool> {
let mut idx = 0;
let mut key = args.uid.to_string();
format_uuid(&mut key, &idx);
match self.lock_uid.get(&key) {
Some(resource) => {
let mut resource = resource;
loop {
match self.lock_map.get_mut(resource) {
Some(_lris) => {}
None => {
let mut key = args.uid.to_string();
format_uuid(&mut key, &0);
self.lock_uid.remove(&key);
return Ok(idx > 0);
}
}
idx += 1;
let mut key = args.uid.to_string();
format_uuid(&mut key, &idx);
resource = match self.lock_uid.get(&key) {
Some(resource) => resource,
None => return Ok(true),
};
}
}
None => Ok(false),
}
}
// TODO: need add timeout mechanism
async fn force_unlock(&mut self, args: &LockArgs) -> Result<bool> {
if args.uid.is_empty() {
args.resources.iter().for_each(|resource| {
if let Some(lris) = self.lock_map.get(resource) {
lris.iter().for_each(|lri| {
let mut key = lri.uid.to_string();
format_uuid(&mut key, &lri.idx);
self.lock_uid.remove(&key);
});
if lris.is_empty() {
self.lock_map.remove(resource);
}
}
});
return Ok(true);
}
let mut idx = 0;
let mut need_remove_resource = Vec::new();
let mut need_remove_map_id = Vec::new();
let reply = loop {
let mut map_id = args.uid.to_string();
format_uuid(&mut map_id, &idx);
match self.lock_uid.get(&map_id) {
Some(resource) => match self.lock_map.get_mut(resource) {
Some(lris) => {
{
lris.retain(|lri| {
if lri.uid == args.uid && (args.owner.is_empty() || lri.owner == args.owner) {
let mut key = args.uid.to_string();
format_uuid(&mut key, &lri.idx);
need_remove_map_id.push(key);
return false;
}
true
});
}
idx += 1;
if lris.is_empty() {
need_remove_resource.push(resource.to_string());
}
}
None => {
need_remove_map_id.push(map_id);
idx += 1;
continue;
}
},
None => {
break idx > 0;
}
}
};
need_remove_resource.into_iter().for_each(|resource| {
self.lock_map.remove(&resource);
});
need_remove_map_id.into_iter().for_each(|map_id| {
self.lock_uid.remove(&map_id);
});
Ok(reply)
}
async fn close(&self) {}
async fn is_online(&self) -> bool {
true
}
async fn is_local(&self) -> bool {
true
}
}
fn format_uuid(s: &mut String, idx: &usize) {
s.push_str(&idx.to_string());
}
#[cfg(test)]
mod test {
use super::LocalLocker;
use crate::{Locker, lock_args::LockArgs};
use std::io::Result;
use tokio;
#[tokio::test]
async fn test_lock_unlock() -> Result<()> {
let mut local_locker = LocalLocker::new();
let args = LockArgs {
uid: "1111".to_string(),
resources: vec!["dandan".to_string()],
owner: "dd".to_string(),
source: "".to_string(),
quorum: 3,
};
local_locker.lock(&args).await?;
println!("lock local_locker: {local_locker:?} \n");
local_locker.unlock(&args).await?;
println!("unlock local_locker: {local_locker:?}");
Ok(())
}
}

View File

@@ -1,191 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use rand::Rng;
use std::time::{Duration, Instant};
use tokio::{sync::RwLock, time::sleep};
use tracing::info;
#[derive(Debug, Default)]
pub struct LRWMutex {
id: RwLock<String>,
source: RwLock<String>,
is_write: RwLock<bool>,
reference: RwLock<usize>,
}
impl LRWMutex {
pub async fn lock(&self) -> bool {
let is_write = true;
let id = self.id.read().await.clone();
let source = self.source.read().await.clone();
let timeout = Duration::from_secs(10000);
self.look_loop(
&id, &source, &timeout, // big enough
is_write,
)
.await
}
pub async fn get_lock(&self, id: &str, source: &str, timeout: &Duration) -> bool {
let is_write = true;
self.look_loop(id, source, timeout, is_write).await
}
pub async fn r_lock(&self) -> bool {
let is_write: bool = false;
let id = self.id.read().await.clone();
let source = self.source.read().await.clone();
let timeout = Duration::from_secs(10000);
self.look_loop(
&id, &source, &timeout, // big enough
is_write,
)
.await
}
pub async fn get_r_lock(&self, id: &str, source: &str, timeout: &Duration) -> bool {
let is_write = false;
self.look_loop(id, source, timeout, is_write).await
}
async fn inner_lock(&self, id: &str, source: &str, is_write: bool) -> bool {
*self.id.write().await = id.to_string();
*self.source.write().await = source.to_string();
let mut locked = false;
if is_write {
if *self.reference.read().await == 0 && !*self.is_write.read().await {
*self.reference.write().await = 1;
*self.is_write.write().await = true;
locked = true;
}
} else if !*self.is_write.read().await {
*self.reference.write().await += 1;
locked = true;
}
locked
}
async fn look_loop(&self, id: &str, source: &str, timeout: &Duration, is_write: bool) -> bool {
let start = Instant::now();
loop {
if self.inner_lock(id, source, is_write).await {
return true;
} else {
if Instant::now().duration_since(start) > *timeout {
return false;
}
let sleep_time: u64;
{
let mut rng = rand::rng();
sleep_time = rng.random_range(10..=50);
}
sleep(Duration::from_millis(sleep_time)).await;
}
}
}
pub async fn un_lock(&self) {
let is_write = true;
if !self.unlock(is_write).await {
info!("Trying to un_lock() while no Lock() is active")
}
}
pub async fn un_r_lock(&self) {
let is_write = false;
if !self.unlock(is_write).await {
info!("Trying to un_r_lock() while no Lock() is active")
}
}
async fn unlock(&self, is_write: bool) -> bool {
let mut unlocked = false;
if is_write {
if *self.is_write.read().await && *self.reference.read().await == 1 {
*self.reference.write().await = 0;
*self.is_write.write().await = false;
unlocked = true;
}
} else if !*self.is_write.read().await && *self.reference.read().await > 0 {
*self.reference.write().await -= 1;
unlocked = true;
}
unlocked
}
pub async fn force_un_lock(&self) {
*self.reference.write().await = 0;
*self.is_write.write().await = false;
}
}
#[cfg(test)]
mod test {
use std::{sync::Arc, time::Duration};
use std::io::Result;
use tokio::time::sleep;
use crate::lrwmutex::LRWMutex;
#[tokio::test]
async fn test_lock_unlock() -> Result<()> {
let l_rw_lock = LRWMutex::default();
let id = "foo";
let source = "dandan";
let timeout = Duration::from_secs(5);
assert!(l_rw_lock.get_lock(id, source, &timeout).await);
l_rw_lock.un_lock().await;
l_rw_lock.lock().await;
assert!(!l_rw_lock.get_r_lock(id, source, &timeout).await);
l_rw_lock.un_lock().await;
assert!(l_rw_lock.get_r_lock(id, source, &timeout).await);
Ok(())
}
#[tokio::test]
async fn multi_thread_test() -> Result<()> {
let l_rw_lock = Arc::new(LRWMutex::default());
let id = "foo";
let source = "dandan";
let one_fn = async {
let one = Arc::clone(&l_rw_lock);
let timeout = Duration::from_secs(1);
assert!(one.get_lock(id, source, &timeout).await);
sleep(Duration::from_secs(5)).await;
l_rw_lock.un_lock().await;
};
let two_fn = async {
let two = Arc::clone(&l_rw_lock);
let timeout = Duration::from_secs(2);
assert!(!two.get_r_lock(id, source, &timeout).await);
sleep(Duration::from_secs(5)).await;
assert!(two.get_r_lock(id, source, &timeout).await);
two.un_r_lock().await;
};
tokio::join!(one_fn, two_fn);
Ok(())
}
}

View File

@@ -0,0 +1,516 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use std::sync::Arc;
use std::time::Duration;
use crate::{
client::LockClient,
error::{LockError, Result},
types::{LockId, LockInfo, LockRequest, LockResponse, LockStatus, LockType},
};
/// Namespace lock for managing locks by resource namespaces
#[derive(Debug)]
pub struct NamespaceLock {
/// Lock clients for this namespace
clients: Vec<Arc<dyn LockClient>>,
/// Namespace identifier
namespace: String,
/// Quorum size for operations (1 for local, majority for distributed)
quorum: usize,
}
impl NamespaceLock {
/// Create new namespace lock
pub fn new(namespace: String) -> Self {
Self {
clients: Vec::new(),
namespace,
quorum: 1,
}
}
/// Create namespace lock with clients
pub fn with_clients(namespace: String, clients: Vec<Arc<dyn LockClient>>) -> Self {
let quorum = if clients.len() > 1 {
// For multiple clients (distributed mode), require majority
(clients.len() / 2) + 1
} else {
// For single client (local mode), only need 1
1
};
Self {
clients,
namespace,
quorum,
}
}
/// Create namespace lock with client (compatibility)
pub fn with_client(client: Arc<dyn LockClient>) -> Self {
Self::with_clients("default".to_string(), vec![client])
}
/// Get namespace identifier
pub fn namespace(&self) -> &str {
&self.namespace
}
/// Get resource key for this namespace
pub fn get_resource_key(&self, resource: &str) -> String {
format!("{}:{}", self.namespace, resource)
}
/// Acquire lock using clients with transactional semantics (all-or-nothing)
pub async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// For single client, use it directly
if self.clients.len() == 1 {
return self.clients[0].acquire_lock(request).await;
}
// Two-phase commit for distributed lock acquisition
self.acquire_lock_with_2pc(request).await
}
/// Two-phase commit lock acquisition: all nodes must succeed or all fail
async fn acquire_lock_with_2pc(&self, request: &LockRequest) -> Result<LockResponse> {
// Phase 1: Prepare - try to acquire lock on all clients
let futures: Vec<_> = self
.clients
.iter()
.enumerate()
.map(|(idx, client)| async move {
let result = client.acquire_lock(request).await;
(idx, result)
})
.collect();
let results = futures::future::join_all(futures).await;
let mut successful_clients = Vec::new();
let mut failed_clients = Vec::new();
// Collect results
for (idx, result) in results {
match result {
Ok(response) if response.success => {
successful_clients.push(idx);
}
_ => {
failed_clients.push(idx);
}
}
}
// Check if we have enough successful acquisitions for quorum
if successful_clients.len() >= self.quorum {
// Phase 2a: Commit - we have quorum, but need to ensure consistency
// If not all clients succeeded, we need to rollback for consistency
if successful_clients.len() < self.clients.len() {
// Rollback all successful acquisitions to maintain consistency
self.rollback_acquisitions(request, &successful_clients).await;
return Ok(LockResponse::failure(
"Partial success detected, rolled back for consistency".to_string(),
Duration::ZERO,
));
}
// All clients succeeded - lock acquired successfully
Ok(LockResponse::success(
LockInfo {
id: LockId::new_deterministic(&request.resource),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
Duration::ZERO,
))
} else {
// Phase 2b: Abort - insufficient quorum, rollback any successful acquisitions
if !successful_clients.is_empty() {
self.rollback_acquisitions(request, &successful_clients).await;
}
Ok(LockResponse::failure(
format!("Failed to acquire quorum: {}/{} required", successful_clients.len(), self.quorum),
Duration::ZERO,
))
}
}
/// Rollback lock acquisitions on specified clients
async fn rollback_acquisitions(&self, request: &LockRequest, client_indices: &[usize]) {
let lock_id = LockId::new_deterministic(&request.resource);
let rollback_futures: Vec<_> = client_indices
.iter()
.filter_map(|&idx| self.clients.get(idx))
.map(|client| async {
if let Err(e) = client.release(&lock_id).await {
tracing::warn!("Failed to rollback lock on client: {}", e);
}
})
.collect();
futures::future::join_all(rollback_futures).await;
tracing::info!(
"Rolled back {} lock acquisitions for resource: {}",
client_indices.len(),
request.resource
);
}
/// Release lock using clients
pub async fn release_lock(&self, lock_id: &LockId) -> Result<bool> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// For single client, use it directly
if self.clients.len() == 1 {
return self.clients[0].release(lock_id).await;
}
// For multiple clients, try to release from all clients
let futures: Vec<_> = self
.clients
.iter()
.map(|client| {
let id = lock_id.clone();
async move { client.release(&id).await }
})
.collect();
let results = futures::future::join_all(futures).await;
let successful = results.into_iter().filter_map(|r| r.ok()).filter(|&r| r).count();
// For release, if any succeed, consider it successful
Ok(successful > 0)
}
/// Get health information
pub async fn get_health(&self) -> crate::types::HealthInfo {
let lock_stats = self.get_stats().await;
let mut health = crate::types::HealthInfo {
node_id: self.namespace.clone(),
lock_stats,
..Default::default()
};
// Check client status
let mut connected_clients = 0;
for client in &self.clients {
if client.is_online().await {
connected_clients += 1;
}
}
health.status = if connected_clients > 0 {
crate::types::HealthStatus::Healthy
} else {
crate::types::HealthStatus::Degraded
};
health.connected_nodes = connected_clients;
health.total_nodes = self.clients.len();
health
}
/// Get namespace statistics
pub async fn get_stats(&self) -> crate::types::LockStats {
let mut stats = crate::types::LockStats::default();
// Try to get stats from clients
for client in &self.clients {
if let Ok(client_stats) = client.get_stats().await {
stats.successful_acquires += client_stats.successful_acquires;
stats.failed_acquires += client_stats.failed_acquires;
}
}
stats
}
}
impl Default for NamespaceLock {
fn default() -> Self {
Self::new("default".to_string())
}
}
/// Namespace lock manager trait
#[async_trait]
pub trait NamespaceLockManager: Send + Sync {
/// Batch get write lock
async fn lock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool>;
/// Batch release write lock
async fn unlock_batch(&self, resources: &[String], owner: &str) -> Result<()>;
/// Batch get read lock
async fn rlock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool>;
/// Batch release read lock
async fn runlock_batch(&self, resources: &[String], owner: &str) -> Result<()>;
}
#[async_trait]
impl NamespaceLockManager for NamespaceLock {
async fn lock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Transactional batch lock: all resources must be locked or none
let mut acquired_resources = Vec::new();
for resource in resources {
let namespaced_resource = self.get_resource_key(resource);
let request = LockRequest::new(&namespaced_resource, LockType::Exclusive, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
let response = self.acquire_lock(&request).await?;
if response.success {
acquired_resources.push(namespaced_resource);
} else {
// Rollback all previously acquired locks
self.rollback_batch_locks(&acquired_resources, owner).await;
return Ok(false);
}
}
Ok(true)
}
async fn unlock_batch(&self, resources: &[String], _owner: &str) -> Result<()> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Release all locks (best effort)
let release_futures: Vec<_> = resources
.iter()
.map(|resource| {
let namespaced_resource = self.get_resource_key(resource);
let lock_id = LockId::new_deterministic(&namespaced_resource);
async move {
if let Err(e) = self.release_lock(&lock_id).await {
tracing::warn!("Failed to release lock for resource {}: {}", resource, e);
}
}
})
.collect();
futures::future::join_all(release_futures).await;
Ok(())
}
async fn rlock_batch(&self, resources: &[String], owner: &str, timeout: Duration, ttl: Duration) -> Result<bool> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Transactional batch read lock: all resources must be locked or none
let mut acquired_resources = Vec::new();
for resource in resources {
let namespaced_resource = self.get_resource_key(resource);
let request = LockRequest::new(&namespaced_resource, LockType::Shared, owner)
.with_acquire_timeout(timeout)
.with_ttl(ttl);
let response = self.acquire_lock(&request).await?;
if response.success {
acquired_resources.push(namespaced_resource);
} else {
// Rollback all previously acquired read locks
self.rollback_batch_locks(&acquired_resources, owner).await;
return Ok(false);
}
}
Ok(true)
}
async fn runlock_batch(&self, resources: &[String], _owner: &str) -> Result<()> {
if self.clients.is_empty() {
return Err(LockError::internal("No lock clients available"));
}
// Release all read locks (best effort)
let release_futures: Vec<_> = resources
.iter()
.map(|resource| {
let namespaced_resource = self.get_resource_key(resource);
let lock_id = LockId::new_deterministic(&namespaced_resource);
async move {
if let Err(e) = self.release_lock(&lock_id).await {
tracing::warn!("Failed to release read lock for resource {}: {}", resource, e);
}
}
})
.collect();
futures::future::join_all(release_futures).await;
Ok(())
}
}
impl NamespaceLock {
/// Rollback batch lock acquisitions
async fn rollback_batch_locks(&self, acquired_resources: &[String], _owner: &str) {
let rollback_futures: Vec<_> = acquired_resources
.iter()
.map(|resource| {
let lock_id = LockId::new_deterministic(resource);
async move {
if let Err(e) = self.release_lock(&lock_id).await {
tracing::warn!("Failed to rollback lock for resource {}: {}", resource, e);
}
}
})
.collect();
futures::future::join_all(rollback_futures).await;
tracing::info!("Rolled back {} batch lock acquisitions", acquired_resources.len());
}
}
#[cfg(test)]
mod tests {
use crate::LocalClient;
use super::*;
#[tokio::test]
async fn test_namespace_lock_local() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
let resources = vec!["test1".to_string(), "test2".to_string()];
// Test batch lock
let result = ns_lock
.lock_batch(&resources, "test_owner", Duration::from_millis(100), Duration::from_secs(10))
.await;
assert!(result.is_ok());
assert!(result.unwrap());
// Test batch unlock
let result = ns_lock.unlock_batch(&resources, "test_owner").await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_connection_health() {
let local_lock = NamespaceLock::new("test-namespace".to_string());
let health = local_lock.get_health().await;
assert_eq!(health.status, crate::types::HealthStatus::Degraded); // No clients
}
#[tokio::test]
async fn test_namespace_lock_creation() {
let ns_lock = NamespaceLock::new("test-namespace".to_string());
assert_eq!(ns_lock.namespace(), "test-namespace");
}
#[tokio::test]
async fn test_namespace_lock_new_local() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
assert_eq!(ns_lock.namespace(), "default");
assert_eq!(ns_lock.clients.len(), 1);
assert!(ns_lock.clients[0].is_local().await);
// Test that it can perform lock operations
let resources = vec!["test-resource".to_string()];
let result = ns_lock
.lock_batch(&resources, "test-owner", Duration::from_millis(100), Duration::from_secs(10))
.await;
assert!(result.is_ok());
assert!(result.unwrap());
}
#[tokio::test]
async fn test_namespace_lock_resource_key() {
let ns_lock = NamespaceLock::new("test-namespace".to_string());
// Test resource key generation
let resource_key = ns_lock.get_resource_key("test-resource");
assert_eq!(resource_key, "test-namespace:test-resource");
}
#[tokio::test]
async fn test_transactional_batch_lock() {
let ns_lock = NamespaceLock::with_client(Arc::new(LocalClient::new()));
let resources = vec!["resource1".to_string(), "resource2".to_string(), "resource3".to_string()];
// First, acquire one of the resources to simulate conflict
let conflicting_request = LockRequest::new(ns_lock.get_resource_key("resource2"), LockType::Exclusive, "other_owner")
.with_ttl(Duration::from_secs(10));
let response = ns_lock.acquire_lock(&conflicting_request).await.unwrap();
assert!(response.success);
// Now try batch lock - should fail and rollback
let result = ns_lock
.lock_batch(&resources, "test_owner", Duration::from_millis(10), Duration::from_secs(5))
.await;
assert!(result.is_ok());
assert!(!result.unwrap()); // Should fail due to conflict
// Verify that no locks were left behind (all rolled back)
for resource in &resources {
if resource != "resource2" {
// Skip the one we intentionally locked
let check_request = LockRequest::new(ns_lock.get_resource_key(resource), LockType::Exclusive, "verify_owner")
.with_ttl(Duration::from_secs(1));
let check_response = ns_lock.acquire_lock(&check_request).await.unwrap();
assert!(check_response.success, "Resource {resource} should be available after rollback");
// Clean up
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(resource));
let _ = ns_lock.release_lock(&lock_id).await;
}
}
}
#[tokio::test]
async fn test_distributed_lock_consistency() {
// Create a namespace with multiple local clients to simulate distributed scenario
let client1: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let client2: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let clients = vec![client1, client2];
let ns_lock = NamespaceLock::with_clients("test-namespace".to_string(), clients);
let request = LockRequest::new("test-resource", LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(10));
// This should succeed only if ALL clients can acquire the lock
let response = ns_lock.acquire_lock(&request).await.unwrap();
// Since we're using separate LocalClient instances, they don't share state
// so this test demonstrates the consistency check
assert!(response.success); // Either all succeed or rollback happens
}
}

View File

@@ -1,306 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use std::{collections::HashMap, path::Path, sync::Arc, time::Duration};
use tokio::sync::RwLock;
use uuid::Uuid;
use crate::{
LockApi,
drwmutex::{DRWMutex, Options},
lrwmutex::LRWMutex,
};
use std::io::Result;
pub type RWLockerImpl = Box<dyn RWLocker + Send + Sync>;
#[async_trait]
pub trait RWLocker {
async fn get_lock(&mut self, opts: &Options) -> Result<bool>;
async fn un_lock(&mut self) -> Result<()>;
async fn get_u_lock(&mut self, opts: &Options) -> Result<bool>;
async fn un_r_lock(&mut self) -> Result<()>;
}
#[derive(Debug)]
struct NsLock {
reference: usize,
lock: LRWMutex,
}
#[derive(Debug, Default)]
pub struct NsLockMap {
is_dist_erasure: bool,
lock_map: RwLock<HashMap<String, NsLock>>,
}
impl NsLockMap {
pub fn new(is_dist_erasure: bool) -> Self {
Self {
is_dist_erasure,
..Default::default()
}
}
async fn lock(
&mut self,
volume: &String,
path: &String,
lock_source: &str,
ops_id: &str,
read_lock: bool,
timeout: Duration,
) -> bool {
let resource = Path::new(volume).join(path).to_str().unwrap().to_string();
let mut w_lock_map = self.lock_map.write().await;
let nslk = w_lock_map.entry(resource.clone()).or_insert(NsLock {
reference: 0,
lock: LRWMutex::default(),
});
nslk.reference += 1;
let locked = if read_lock {
nslk.lock.get_r_lock(ops_id, lock_source, &timeout).await
} else {
nslk.lock.get_lock(ops_id, lock_source, &timeout).await
};
if !locked {
nslk.reference -= 1;
if nslk.reference == 0 {
w_lock_map.remove(&resource);
}
}
locked
}
async fn un_lock(&mut self, volume: &String, path: &String, read_lock: bool) {
let resource = Path::new(volume).join(path).to_str().unwrap().to_string();
let mut w_lock_map = self.lock_map.write().await;
if let Some(nslk) = w_lock_map.get_mut(&resource) {
if read_lock {
nslk.lock.un_r_lock().await;
} else {
nslk.lock.un_lock().await;
}
nslk.reference -= 1;
if nslk.reference == 0 {
w_lock_map.remove(&resource);
}
}
}
}
pub struct WrapperLocker(pub Arc<RwLock<RWLockerImpl>>);
impl Drop for WrapperLocker {
fn drop(&mut self) {
let inner = self.0.clone();
tokio::spawn(async move {
let _ = inner.write().await.un_lock().await;
});
}
}
pub async fn new_nslock(
ns: Arc<RwLock<NsLockMap>>,
owner: String,
volume: String,
paths: Vec<String>,
lockers: Vec<LockApi>,
) -> WrapperLocker {
if ns.read().await.is_dist_erasure {
let names = paths
.iter()
.map(|path| Path::new(&volume).join(path).to_str().unwrap().to_string())
.collect();
return WrapperLocker(Arc::new(RwLock::new(Box::new(DistLockInstance::new(owner, names, lockers)))));
}
WrapperLocker(Arc::new(RwLock::new(Box::new(LocalLockInstance::new(ns, volume, paths)))))
}
struct DistLockInstance {
lock: Box<DRWMutex>,
ops_id: String,
}
impl DistLockInstance {
fn new(owner: String, names: Vec<String>, lockers: Vec<LockApi>) -> Self {
let ops_id = Uuid::new_v4().to_string();
Self {
lock: Box::new(DRWMutex::new(owner, names, lockers)),
ops_id,
}
}
}
#[async_trait]
impl RWLocker for DistLockInstance {
async fn get_lock(&mut self, opts: &Options) -> Result<bool> {
let source = "".to_string();
Ok(self.lock.get_lock(&self.ops_id, &source, opts).await)
}
async fn un_lock(&mut self) -> Result<()> {
self.lock.un_lock().await;
Ok(())
}
async fn get_u_lock(&mut self, opts: &Options) -> Result<bool> {
let source = "".to_string();
Ok(self.lock.get_r_lock(&self.ops_id, &source, opts).await)
}
async fn un_r_lock(&mut self) -> Result<()> {
self.lock.un_r_lock().await;
Ok(())
}
}
struct LocalLockInstance {
ns: Arc<RwLock<NsLockMap>>,
volume: String,
paths: Vec<String>,
ops_id: String,
}
impl LocalLockInstance {
fn new(ns: Arc<RwLock<NsLockMap>>, volume: String, paths: Vec<String>) -> Self {
let ops_id = Uuid::new_v4().to_string();
Self {
ns,
volume,
paths,
ops_id,
}
}
}
#[async_trait]
impl RWLocker for LocalLockInstance {
async fn get_lock(&mut self, opts: &Options) -> Result<bool> {
let source = "".to_string();
let read_lock = false;
let mut success = vec![false; self.paths.len()];
for (idx, path) in self.paths.iter().enumerate() {
if !self
.ns
.write()
.await
.lock(&self.volume, path, &source, &self.ops_id, read_lock, opts.timeout)
.await
{
for (i, x) in success.iter().enumerate() {
if *x {
self.ns.write().await.un_lock(&self.volume, &self.paths[i], read_lock).await;
}
}
return Ok(false);
}
success[idx] = true;
}
Ok(true)
}
async fn un_lock(&mut self) -> Result<()> {
let read_lock = false;
for path in self.paths.iter() {
self.ns.write().await.un_lock(&self.volume, path, read_lock).await;
}
Ok(())
}
async fn get_u_lock(&mut self, opts: &Options) -> Result<bool> {
let source = "".to_string();
let read_lock = true;
let mut success = Vec::with_capacity(self.paths.len());
for (idx, path) in self.paths.iter().enumerate() {
if !self
.ns
.write()
.await
.lock(&self.volume, path, &source, &self.ops_id, read_lock, opts.timeout)
.await
{
for (i, x) in success.iter().enumerate() {
if *x {
self.ns.write().await.un_lock(&self.volume, &self.paths[i], read_lock).await;
}
}
return Ok(false);
}
success[idx] = true;
}
Ok(true)
}
async fn un_r_lock(&mut self) -> Result<()> {
let read_lock = true;
for path in self.paths.iter() {
self.ns.write().await.un_lock(&self.volume, path, read_lock).await;
}
Ok(())
}
}
#[cfg(test)]
mod test {
use std::{sync::Arc, time::Duration};
use std::io::Result;
use tokio::sync::RwLock;
use crate::{
drwmutex::Options,
namespace_lock::{NsLockMap, new_nslock},
};
#[tokio::test]
async fn test_local_instance() -> Result<()> {
let ns_lock_map = Arc::new(RwLock::new(NsLockMap::default()));
let ns = new_nslock(
Arc::clone(&ns_lock_map),
"local".to_string(),
"test".to_string(),
vec!["foo".to_string()],
Vec::new(),
)
.await;
let result =
ns.0.write()
.await
.get_lock(&Options {
timeout: Duration::from_secs(5),
retry_interval: Duration::from_secs(1),
})
.await?;
assert!(result);
Ok(())
}
}

View File

@@ -1,147 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::{Locker, lock_args::LockArgs};
use async_trait::async_trait;
use rustfs_protos::{node_service_time_out_client, proto_gen::node_service::GenerallyLockRequest};
use std::io::{Error, Result};
use tonic::Request;
use tracing::info;
#[derive(Debug, Clone)]
pub struct RemoteClient {
addr: String,
}
impl RemoteClient {
pub fn new(url: url::Url) -> Self {
let addr = format!("{}://{}:{}", url.scheme(), url.host_str().unwrap(), url.port().unwrap());
Self { addr }
}
}
#[async_trait]
impl Locker for RemoteClient {
async fn lock(&mut self, args: &LockArgs) -> Result<bool> {
info!("remote lock");
let args = serde_json::to_string(args)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GenerallyLockRequest { args });
let response = client.lock(request).await.map_err(Error::other)?.into_inner();
if let Some(error_info) = response.error_info {
return Err(Error::other(error_info));
}
Ok(response.success)
}
async fn unlock(&mut self, args: &LockArgs) -> Result<bool> {
info!("remote unlock");
let args = serde_json::to_string(args)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GenerallyLockRequest { args });
let response = client.un_lock(request).await.map_err(Error::other)?.into_inner();
if let Some(error_info) = response.error_info {
return Err(Error::other(error_info));
}
Ok(response.success)
}
async fn rlock(&mut self, args: &LockArgs) -> Result<bool> {
info!("remote rlock");
let args = serde_json::to_string(args)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GenerallyLockRequest { args });
let response = client.r_lock(request).await.map_err(Error::other)?.into_inner();
if let Some(error_info) = response.error_info {
return Err(Error::other(error_info));
}
Ok(response.success)
}
async fn runlock(&mut self, args: &LockArgs) -> Result<bool> {
info!("remote runlock");
let args = serde_json::to_string(args)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GenerallyLockRequest { args });
let response = client.r_un_lock(request).await.map_err(Error::other)?.into_inner();
if let Some(error_info) = response.error_info {
return Err(Error::other(error_info));
}
Ok(response.success)
}
async fn refresh(&mut self, args: &LockArgs) -> Result<bool> {
info!("remote refresh");
let args = serde_json::to_string(args)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GenerallyLockRequest { args });
let response = client.refresh(request).await.map_err(Error::other)?.into_inner();
if let Some(error_info) = response.error_info {
return Err(Error::other(error_info));
}
Ok(response.success)
}
async fn force_unlock(&mut self, args: &LockArgs) -> Result<bool> {
info!("remote force_unlock");
let args = serde_json::to_string(args)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let request = Request::new(GenerallyLockRequest { args });
let response = client.force_un_lock(request).await.map_err(Error::other)?.into_inner();
if let Some(error_info) = response.error_info {
return Err(Error::other(error_info));
}
Ok(response.success)
}
async fn close(&self) {}
async fn is_online(&self) -> bool {
true
}
async fn is_local(&self) -> bool {
false
}
}

702
crates/lock/src/types.rs Normal file
View File

@@ -0,0 +1,702 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use uuid::Uuid;
/// Lock type enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum LockType {
/// Exclusive lock (write lock)
Exclusive,
/// Shared lock (read lock)
Shared,
}
/// Lock status enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum LockStatus {
/// Acquired
Acquired,
/// Waiting
Waiting,
/// Released
Released,
/// Expired
Expired,
/// Force released
ForceReleased,
}
/// Lock priority
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Serialize, Deserialize)]
pub enum LockPriority {
Low = 1,
#[default]
Normal = 2,
High = 3,
Critical = 4,
}
/// Lock information structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockInfo {
/// Unique identifier for the lock
pub id: LockId,
/// Resource path
pub resource: String,
/// Lock type
pub lock_type: LockType,
/// Lock status
pub status: LockStatus,
/// Lock owner
pub owner: String,
/// Acquisition time
pub acquired_at: SystemTime,
/// Expiration time
pub expires_at: SystemTime,
/// Last refresh time
pub last_refreshed: SystemTime,
/// Lock metadata
pub metadata: LockMetadata,
/// Lock priority
pub priority: LockPriority,
/// Wait start time
pub wait_start_time: Option<SystemTime>,
}
impl LockInfo {
/// Check if the lock has expired
pub fn has_expired(&self) -> bool {
self.expires_at <= SystemTime::now()
}
/// Get remaining time until expiration
pub fn remaining_time(&self) -> Duration {
let now = SystemTime::now();
if self.expires_at > now {
self.expires_at.duration_since(now).unwrap_or(Duration::ZERO)
} else {
Duration::ZERO
}
}
/// Check if the lock is still valid
pub fn is_valid(&self) -> bool {
!self.has_expired() && self.status == LockStatus::Acquired
}
}
/// Lock ID type
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct LockId {
pub resource: String,
pub uuid: String,
}
impl LockId {
/// Generate new lock ID for a resource
pub fn new(resource: &str) -> Self {
Self {
resource: resource.to_string(),
uuid: Uuid::new_v4().to_string(),
}
}
/// Generate deterministic lock ID for a resource (same resource = same ID)
pub fn new_deterministic(resource: &str) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
resource.hash(&mut hasher);
let hash = hasher.finish();
Self {
resource: resource.to_string(),
uuid: format!("{hash:016x}"),
}
}
/// Create lock ID from resource and uuid
pub fn from_parts(resource: impl Into<String>, uuid: impl Into<String>) -> Self {
Self {
resource: resource.into(),
uuid: uuid.into(),
}
}
/// Create lock ID from string (for compatibility, expects "resource:uuid")
pub fn from_string(id: impl Into<String>) -> Self {
let s = id.into();
if let Some((resource, uuid)) = s.split_once(":") {
Self {
resource: resource.to_string(),
uuid: uuid.to_string(),
}
} else {
// fallback: treat as uuid only
Self {
resource: "unknown".to_string(),
uuid: s,
}
}
}
/// Get string representation of lock ID ("resource:uuid")
pub fn as_str(&self) -> String {
format!("{}:{}", self.resource, self.uuid)
}
}
impl Default for LockId {
fn default() -> Self {
Self::new("default")
}
}
impl std::fmt::Display for LockId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.resource, self.uuid)
}
}
/// Lock metadata structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockMetadata {
/// Client information
pub client_info: Option<String>,
/// Operation ID
pub operation_id: Option<String>,
/// Priority (lower number = higher priority)
pub priority: Option<i32>,
/// Custom tags
pub tags: std::collections::HashMap<String, String>,
/// Creation time
pub created_at: SystemTime,
}
impl Default for LockMetadata {
fn default() -> Self {
Self {
client_info: None,
operation_id: None,
priority: None,
tags: std::collections::HashMap::new(),
created_at: SystemTime::now(),
}
}
}
impl LockMetadata {
/// Create new lock metadata
pub fn new() -> Self {
Self::default()
}
/// Set client information
pub fn with_client_info(mut self, client_info: impl Into<String>) -> Self {
self.client_info = Some(client_info.into());
self
}
/// Set operation ID
pub fn with_operation_id(mut self, operation_id: impl Into<String>) -> Self {
self.operation_id = Some(operation_id.into());
self
}
/// Set priority
pub fn with_priority(mut self, priority: i32) -> Self {
self.priority = Some(priority);
self
}
/// Add tag
pub fn with_tag(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.tags.insert(key.into(), value.into());
self
}
}
/// Lock request structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockRequest {
/// Lock ID
pub lock_id: LockId,
/// Resource path
pub resource: String,
/// Lock type
pub lock_type: LockType,
/// Lock owner
pub owner: String,
/// Acquire timeout duration (how long to wait for lock acquisition)
pub acquire_timeout: Duration,
/// Lock TTL (Time To Live - how long the lock remains valid after acquisition)
pub ttl: Duration,
/// Lock metadata
pub metadata: LockMetadata,
/// Lock priority
pub priority: LockPriority,
/// Deadlock detection
pub deadlock_detection: bool,
}
impl LockRequest {
/// Create new lock request
pub fn new(resource: impl Into<String>, lock_type: LockType, owner: impl Into<String>) -> Self {
let resource_str = resource.into();
Self {
lock_id: LockId::new_deterministic(&resource_str),
resource: resource_str,
lock_type,
owner: owner.into(),
acquire_timeout: Duration::from_secs(10), // Default 10 seconds to acquire
ttl: Duration::from_secs(30), // Default 30 seconds lock lifetime
metadata: LockMetadata::default(),
priority: LockPriority::default(),
deadlock_detection: false,
}
}
/// Set acquire timeout (how long to wait for lock acquisition)
pub fn with_acquire_timeout(mut self, timeout: Duration) -> Self {
self.acquire_timeout = timeout;
self
}
/// Set lock TTL (how long the lock remains valid after acquisition)
pub fn with_ttl(mut self, ttl: Duration) -> Self {
self.ttl = ttl;
self
}
/// Set metadata
pub fn with_metadata(mut self, metadata: LockMetadata) -> Self {
self.metadata = metadata;
self
}
/// Set priority
pub fn with_priority(mut self, priority: LockPriority) -> Self {
self.priority = priority;
self
}
/// Set deadlock detection
pub fn with_deadlock_detection(mut self, enabled: bool) -> Self {
self.deadlock_detection = enabled;
self
}
}
/// Lock response structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockResponse {
/// Whether lock acquisition was successful
pub success: bool,
/// Lock information (if successful)
pub lock_info: Option<LockInfo>,
/// Error message (if failed)
pub error: Option<String>,
/// Wait time
pub wait_time: Duration,
/// Position in wait queue
pub position_in_queue: Option<usize>,
}
impl LockResponse {
/// Create success response
pub fn success(lock_info: LockInfo, wait_time: Duration) -> Self {
Self {
success: true,
lock_info: Some(lock_info),
error: None,
wait_time,
position_in_queue: None,
}
}
/// Create failure response
pub fn failure(error: impl Into<String>, wait_time: Duration) -> Self {
Self {
success: false,
lock_info: None,
error: Some(error.into()),
wait_time,
position_in_queue: None,
}
}
/// Create waiting response
pub fn waiting(wait_time: Duration, position: usize) -> Self {
Self {
success: false,
lock_info: None,
error: None,
wait_time,
position_in_queue: Some(position),
}
}
/// Check if response indicates success
pub fn is_success(&self) -> bool {
self.success
}
/// Check if response indicates failure
pub fn is_failure(&self) -> bool {
!self.success && self.error.is_some()
}
/// Check if response indicates waiting
pub fn is_waiting(&self) -> bool {
!self.success && self.position_in_queue.is_some()
}
/// Get lock info
pub fn lock_info(&self) -> Option<&LockInfo> {
self.lock_info.as_ref()
}
}
/// Lock statistics structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockStats {
/// Total number of locks
pub total_locks: usize,
/// Number of exclusive locks
pub exclusive_locks: usize,
/// Number of shared locks
pub shared_locks: usize,
/// Number of waiting locks
pub waiting_locks: usize,
/// Number of deadlock detections
pub deadlock_detections: usize,
/// Number of priority upgrades
pub priority_upgrades: usize,
/// Last update time
pub last_updated: SystemTime,
/// Total releases
pub total_releases: usize,
/// Total hold time
pub total_hold_time: Duration,
/// Average hold time
pub average_hold_time: Duration,
/// Total wait queues
pub total_wait_queues: usize,
/// Queue entries
pub queue_entries: usize,
/// Average wait time
pub avg_wait_time: Duration,
/// Successful acquires
pub successful_acquires: usize,
/// Failed acquires
pub failed_acquires: usize,
}
impl Default for LockStats {
fn default() -> Self {
Self {
total_locks: 0,
exclusive_locks: 0,
shared_locks: 0,
waiting_locks: 0,
deadlock_detections: 0,
priority_upgrades: 0,
last_updated: SystemTime::now(),
total_releases: 0,
total_hold_time: Duration::ZERO,
average_hold_time: Duration::ZERO,
total_wait_queues: 0,
queue_entries: 0,
avg_wait_time: Duration::ZERO,
successful_acquires: 0,
failed_acquires: 0,
}
}
}
/// Node information structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeInfo {
/// Node ID
pub id: String,
/// Node address
pub address: String,
/// Node status
pub status: NodeStatus,
/// Last heartbeat time
pub last_heartbeat: SystemTime,
/// Node weight
pub weight: f64,
}
/// Node status enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum NodeStatus {
/// Online
#[default]
Online,
/// Offline
Offline,
/// Degraded
Degraded,
}
/// Cluster information structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusterInfo {
/// Cluster ID
pub cluster_id: String,
/// List of nodes
pub nodes: Vec<NodeInfo>,
/// Quorum size
pub quorum: usize,
/// Cluster status
pub status: ClusterStatus,
/// Last update time
pub last_updated: SystemTime,
}
/// Cluster status enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum ClusterStatus {
/// Healthy
#[default]
Healthy,
/// Degraded
Degraded,
/// Unhealthy
Unhealthy,
}
/// Health check status
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum HealthStatus {
/// Healthy
Healthy,
/// Degraded
Degraded,
/// Unhealthy
Unhealthy,
}
/// Health check information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthInfo {
/// Overall status
pub status: HealthStatus,
/// Node ID
pub node_id: String,
/// Last heartbeat time
pub last_heartbeat: SystemTime,
/// Connected nodes count
pub connected_nodes: usize,
/// Total nodes count
pub total_nodes: usize,
/// Lock statistics
pub lock_stats: LockStats,
/// Error message (if any)
pub error_message: Option<String>,
}
impl Default for HealthInfo {
fn default() -> Self {
Self {
status: HealthStatus::Healthy,
node_id: "unknown".to_string(),
last_heartbeat: SystemTime::now(),
connected_nodes: 1,
total_nodes: 1,
lock_stats: LockStats::default(),
error_message: None,
}
}
}
/// Timestamp type alias
pub type Timestamp = u64;
/// Get current timestamp
pub fn current_timestamp() -> Timestamp {
SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs()
}
/// Convert timestamp to system time
pub fn timestamp_to_system_time(timestamp: Timestamp) -> SystemTime {
UNIX_EPOCH + Duration::from_secs(timestamp)
}
/// Convert system time to timestamp
pub fn system_time_to_timestamp(time: SystemTime) -> Timestamp {
time.duration_since(UNIX_EPOCH).unwrap().as_secs()
}
/// Deadlock detection result structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeadlockDetectionResult {
/// Whether deadlock exists
pub has_deadlock: bool,
/// Deadlock cycle
pub deadlock_cycle: Vec<String>,
/// Suggested resolution
pub suggested_resolution: Option<String>,
/// Affected resources
pub affected_resources: Vec<String>,
/// Affected owners
pub affected_owners: Vec<String>,
}
/// Wait graph node structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WaitGraphNode {
/// Owner
pub owner: String,
/// Resources being waited for
pub waiting_for: Vec<String>,
/// Resources currently held
pub held_resources: Vec<String>,
/// Priority
pub priority: LockPriority,
/// Wait start time
pub wait_start_time: SystemTime,
}
/// Wait queue item structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WaitQueueItem {
/// Owner
pub owner: String,
/// Lock type
pub lock_type: LockType,
/// Priority
pub priority: LockPriority,
/// Wait start time
pub wait_start_time: SystemTime,
/// Request time
pub request_time: SystemTime,
}
impl WaitQueueItem {
/// Create new wait queue item
pub fn new(owner: &str, lock_type: LockType, priority: LockPriority) -> Self {
let now = SystemTime::now();
Self {
owner: owner.to_string(),
lock_type,
priority,
wait_start_time: now,
request_time: now,
}
}
/// Get wait duration
pub fn wait_duration(&self) -> Duration {
self.wait_start_time.elapsed().unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lock_id() {
let id1 = LockId::new("test-resource");
let id2 = LockId::new("test-resource");
assert_ne!(id1, id2);
let id3 = LockId::from_string("test-resource:test-uuid");
assert_eq!(id3.as_str(), "test-resource:test-uuid");
}
#[test]
fn test_lock_metadata() {
let metadata = LockMetadata::new()
.with_client_info("test-client")
.with_operation_id("test-op")
.with_priority(1)
.with_tag("key", "value");
assert_eq!(metadata.client_info, Some("test-client".to_string()));
assert_eq!(metadata.operation_id, Some("test-op".to_string()));
assert_eq!(metadata.priority, Some(1));
assert_eq!(metadata.tags.get("key"), Some(&"value".to_string()));
}
#[test]
fn test_lock_request() {
let request = LockRequest::new("test-resource", LockType::Exclusive, "test-owner")
.with_acquire_timeout(Duration::from_secs(60))
.with_priority(LockPriority::High)
.with_deadlock_detection(true);
assert_eq!(request.resource, "test-resource");
assert_eq!(request.lock_type, LockType::Exclusive);
assert_eq!(request.owner, "test-owner");
assert_eq!(request.acquire_timeout, Duration::from_secs(60));
assert_eq!(request.priority, LockPriority::High);
assert!(request.deadlock_detection);
}
#[test]
fn test_lock_response() {
let lock_info = LockInfo {
id: LockId::new("test-resource"),
resource: "test".to_string(),
lock_type: LockType::Exclusive,
status: LockStatus::Acquired,
owner: "test".to_string(),
acquired_at: SystemTime::now(),
expires_at: SystemTime::now() + Duration::from_secs(30),
last_refreshed: SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
};
let success = LockResponse::success(lock_info.clone(), Duration::ZERO);
assert!(success.is_success());
let failure = LockResponse::failure("error", Duration::ZERO);
assert!(failure.is_failure());
let waiting = LockResponse::waiting(Duration::ZERO, 1);
assert!(waiting.is_waiting());
}
#[test]
fn test_timestamp_conversion() {
let now = SystemTime::now();
let timestamp = system_time_to_timestamp(now);
let converted = timestamp_to_system_time(timestamp);
// Allow for small time differences
let diff = now.duration_since(converted).unwrap();
assert!(diff < Duration::from_secs(1));
}
#[test]
fn test_serialization() {
let request = LockRequest::new("test", LockType::Exclusive, "owner");
let serialized = serde_json::to_string(&request).unwrap();
let deserialized: LockRequest = serde_json::from_str(&serialized).unwrap();
assert_eq!(request.resource, deserialized.resource);
assert_eq!(request.lock_type, deserialized.lock_type);
assert_eq!(request.owner, deserialized.owner);
}
}

View File

@@ -28,6 +28,12 @@
- Type-safe message definitions
- Code generation for multiple programming languages
## generate code
```
cargo run --bin gproto
```
## 📚 Documentation
For comprehensive documentation, examples, and usage guides, please visit the main [RustFS repository](https://github.com/rustfs/rustfs).

View File

@@ -604,26 +604,6 @@ pub struct DiskInfoResponse {
#[prost(message, optional, tag = "3")]
pub error: ::core::option::Option<Error>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NsScannerRequest {
#[prost(string, tag = "1")]
pub disk: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub cache: ::prost::alloc::string::String,
#[prost(uint64, tag = "3")]
pub scan_mode: u64,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NsScannerResponse {
#[prost(bool, tag = "1")]
pub success: bool,
#[prost(string, tag = "2")]
pub update: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub data_usage_cache: ::prost::alloc::string::String,
#[prost(message, optional, tag = "4")]
pub error: ::core::option::Option<Error>,
}
/// lock api have same argument type
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GenerallyLockRequest {
@@ -1660,21 +1640,6 @@ pub mod node_service_client {
.insert(GrpcMethod::new("node_service.NodeService", "DiskInfo"));
self.inner.unary(req, path, codec).await
}
pub async fn ns_scanner(
&mut self,
request: impl tonic::IntoStreamingRequest<Message = super::NsScannerRequest>,
) -> std::result::Result<tonic::Response<tonic::codec::Streaming<super::NsScannerResponse>>, tonic::Status> {
self.inner
.ready()
.await
.map_err(|e| tonic::Status::unknown(format!("Service was not ready: {}", e.into())))?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static("/node_service.NodeService/NsScanner");
let mut req = request.into_streaming_request();
req.extensions_mut()
.insert(GrpcMethod::new("node_service.NodeService", "NsScanner"));
self.inner.streaming(req, path, codec).await
}
pub async fn lock(
&mut self,
request: impl tonic::IntoRequest<super::GenerallyLockRequest>,
@@ -2466,14 +2431,6 @@ pub mod node_service_server {
&self,
request: tonic::Request<super::DiskInfoRequest>,
) -> std::result::Result<tonic::Response<super::DiskInfoResponse>, tonic::Status>;
/// Server streaming response type for the NsScanner method.
type NsScannerStream: tonic::codegen::tokio_stream::Stream<Item = std::result::Result<super::NsScannerResponse, tonic::Status>>
+ std::marker::Send
+ 'static;
async fn ns_scanner(
&self,
request: tonic::Request<tonic::Streaming<super::NsScannerRequest>>,
) -> std::result::Result<tonic::Response<Self::NsScannerStream>, tonic::Status>;
async fn lock(
&self,
request: tonic::Request<super::GenerallyLockRequest>,
@@ -3670,35 +3627,6 @@ pub mod node_service_server {
};
Box::pin(fut)
}
"/node_service.NodeService/NsScanner" => {
#[allow(non_camel_case_types)]
struct NsScannerSvc<T: NodeService>(pub Arc<T>);
impl<T: NodeService> tonic::server::StreamingService<super::NsScannerRequest> for NsScannerSvc<T> {
type Response = super::NsScannerResponse;
type ResponseStream = T::NsScannerStream;
type Future = BoxFuture<tonic::Response<Self::ResponseStream>, tonic::Status>;
fn call(&mut self, request: tonic::Request<tonic::Streaming<super::NsScannerRequest>>) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move { <T as NodeService>::ns_scanner(&inner, request).await };
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let method = NsScannerSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(accept_compression_encodings, send_compression_encodings)
.apply_max_message_size_config(max_decoding_message_size, max_encoding_message_size);
let res = grpc.streaming(method, req).await;
Ok(res)
};
Box::pin(fut)
}
"/node_service.NodeService/Lock" => {
#[allow(non_camel_case_types)]
struct LockSvc<T: NodeService>(pub Arc<T>);

View File

@@ -16,7 +16,7 @@ use std::{cmp, env, fs, io::Write, path::Path, process::Command};
type AnyError = Box<dyn std::error::Error>;
const VERSION_PROTOBUF: Version = Version(30, 2, 0); // 30.2.0
const VERSION_PROTOBUF: Version = Version(27, 2, 0); // 27.2.0
const VERSION_FLATBUFFERS: Version = Version(24, 3, 25); // 24.3.25
/// Build protos if the major version of `flatc` or `protoc` is greater
/// or lesser than the expected version.
@@ -27,7 +27,7 @@ const ENV_FLATC_PATH: &str = "FLATC_PATH";
fn main() -> Result<(), AnyError> {
let version = protobuf_compiler_version()?;
let need_compile = match version.compare_ext(&VERSION_PROTOBUF) {
Ok(cmp::Ordering::Equal) => true,
Ok(cmp::Ordering::Greater) => true,
Ok(_) => {
let version_err = Version::build_error_message(&version, &VERSION_PROTOBUF).unwrap();
println!("cargo:warning=Tool `protoc` {version_err}, skip compiling.");
@@ -47,6 +47,7 @@ fn main() -> Result<(), AnyError> {
// path of proto file
let project_root_dir = env::current_dir()?.join("crates/protos/src");
let proto_dir = project_root_dir.clone();
println!("proto_dir: {proto_dir:?}");
let proto_files = &["node.proto"];
let proto_out_dir = project_root_dir.join("generated").join("proto_gen");
let flatbuffer_out_dir = project_root_dir.join("generated").join("flatbuffers_generated");
@@ -67,12 +68,44 @@ fn main() -> Result<(), AnyError> {
let mut generated_mod_rs = fs::File::create(generated_mod_rs_path)?;
writeln!(&mut generated_mod_rs, "pub mod node_service;")?;
writeln!(
&mut generated_mod_rs,
r#"// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License."#
)?;
generated_mod_rs.flush()?;
let generated_mod_rs_path = project_root_dir.join("generated").join("mod.rs");
let mut generated_mod_rs = fs::File::create(generated_mod_rs_path)?;
writeln!(&mut generated_mod_rs, "#![allow(unused_imports)]")?;
writeln!(
&mut generated_mod_rs,
r#"// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License."#
)?;
writeln!(&mut generated_mod_rs, "#![allow(clippy::all)]")?;
writeln!(&mut generated_mod_rs, "pub mod proto_gen;")?;
generated_mod_rs.flush()?;
@@ -107,7 +140,7 @@ fn compile_flatbuffers_models<P: AsRef<Path>, S: AsRef<str>>(
) -> Result<(), AnyError> {
let version = flatbuffers_compiler_version(flatc_path)?;
let need_compile = match version.compare_ext(&VERSION_FLATBUFFERS) {
Ok(cmp::Ordering::Equal) => true,
Ok(cmp::Ordering::Greater) => true,
Ok(_) => {
let version_err = Version::build_error_message(&version, &VERSION_FLATBUFFERS).unwrap();
println!("cargo:warning=Tool `{flatc_path}` {version_err}, skip compiling.");
@@ -217,7 +250,7 @@ impl Version {
Ok(self.compare_major_version(expected_version))
} else {
match self.compare_major_version(expected_version) {
cmp::Ordering::Equal => Ok(cmp::Ordering::Equal),
cmp::Ordering::Greater => Ok(cmp::Ordering::Greater),
_ => Err(Self::build_error_message(self, expected_version).unwrap()),
}
}

View File

@@ -423,19 +423,6 @@ message DiskInfoResponse {
optional Error error = 3;
}
message NsScannerRequest {
string disk = 1;
string cache = 2;
uint64 scan_mode = 3;
}
message NsScannerResponse {
bool success = 1;
string update = 2;
string data_usage_cache = 3;
optional Error error = 4;
}
// lock api have same argument type
message GenerallyLockRequest {
string args = 1;
@@ -805,7 +792,6 @@ service NodeService {
rpc ReadMultiple(ReadMultipleRequest) returns (ReadMultipleResponse) {};
rpc DeleteVolume(DeleteVolumeRequest) returns (DeleteVolumeResponse) {};
rpc DiskInfo(DiskInfoRequest) returns (DiskInfoResponse) {};
rpc NsScanner(stream NsScannerRequest) returns (stream NsScannerResponse) {};
/* -------------------------------lock service-------------------------- */

View File

@@ -21,6 +21,9 @@ pub mod object_store;
pub mod query;
pub mod server;
#[cfg(test)]
mod test;
pub type QueryResult<T> = Result<T, QueryError>;
#[derive(Debug, Snafu)]
@@ -90,3 +93,82 @@ impl Display for ResolvedTable {
write!(f, "{table}")
}
}
#[cfg(test)]
mod tests {
use super::*;
use datafusion::common::DataFusionError;
use datafusion::sql::sqlparser::parser::ParserError;
#[test]
fn test_query_error_display() {
let err = QueryError::NotImplemented {
err: "feature X".to_string(),
};
assert_eq!(err.to_string(), "This feature is not implemented: feature X");
let err = QueryError::MultiStatement {
num: 2,
sql: "SELECT 1; SELECT 2;".to_string(),
};
assert_eq!(err.to_string(), "Multi-statement not allow, found num:2, sql:SELECT 1; SELECT 2;");
let err = QueryError::Cancel;
assert_eq!(err.to_string(), "The query has been canceled");
let err = QueryError::FunctionNotExists {
name: "my_func".to_string(),
};
assert_eq!(err.to_string(), "Udf not exists, name:my_func.");
let err = QueryError::StoreError {
e: "connection failed".to_string(),
};
assert_eq!(err.to_string(), "Store Error, e:connection failed.");
}
#[test]
fn test_query_error_from_datafusion_error() {
let df_error = DataFusionError::Plan("invalid plan".to_string());
let query_error: QueryError = df_error.into();
match query_error {
QueryError::Datafusion { source, .. } => {
assert!(source.to_string().contains("invalid plan"));
}
_ => panic!("Expected Datafusion error"),
}
}
#[test]
fn test_query_error_from_parser_error() {
let parser_error = ParserError::ParserError("syntax error".to_string());
let query_error = QueryError::Parser { source: parser_error };
assert!(query_error.to_string().contains("syntax error"));
}
#[test]
fn test_resolved_table() {
let table = ResolvedTable {
table: "my_table".to_string(),
};
assert_eq!(table.table(), "my_table");
assert_eq!(table.to_string(), "my_table");
}
#[test]
fn test_resolved_table_clone_and_eq() {
let table1 = ResolvedTable {
table: "table1".to_string(),
};
let table2 = table1.clone();
let table3 = ResolvedTable {
table: "table2".to_string(),
};
assert_eq!(table1, table2);
assert_ne!(table1, table3);
}
}

View File

@@ -12,12 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod background_heal_ops;
pub mod data_scanner;
pub mod data_scanner_metric;
pub mod data_usage;
pub mod data_usage_cache;
pub mod error;
pub mod heal_commands;
pub mod heal_ops;
pub mod mrf;
//! Test modules for s3select-api
pub mod query_execution_test;

View File

@@ -0,0 +1,167 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(test)]
mod tests {
use crate::query::execution::{DONE, Output, QueryExecution, QueryState, QueryType, RUNNING};
use crate::{QueryError, QueryResult};
use async_trait::async_trait;
#[test]
fn test_query_type_display() {
assert_eq!(format!("{}", QueryType::Batch), "batch");
assert_eq!(format!("{}", QueryType::Stream), "stream");
}
#[test]
fn test_query_type_equality() {
assert_eq!(QueryType::Batch, QueryType::Batch);
assert_ne!(QueryType::Batch, QueryType::Stream);
assert_eq!(QueryType::Stream, QueryType::Stream);
}
#[tokio::test]
async fn test_output_nil_methods() {
let output = Output::Nil(());
let result = output.chunk_result().await;
assert!(result.is_ok(), "Output::Nil result should be Ok");
let output2 = Output::Nil(());
let rows = output2.num_rows().await;
assert_eq!(rows, 0, "Output::Nil should have 0 rows");
let output3 = Output::Nil(());
let affected = output3.affected_rows().await;
assert_eq!(affected, 0, "Output::Nil should have 0 affected rows");
}
#[test]
fn test_query_state_as_ref() {
let accepting = QueryState::ACCEPTING;
assert_eq!(accepting.as_ref(), "ACCEPTING");
let running = QueryState::RUNNING(RUNNING::ANALYZING);
assert_eq!(running.as_ref(), "ANALYZING");
let done = QueryState::DONE(DONE::FINISHED);
assert_eq!(done.as_ref(), "FINISHED");
}
#[test]
fn test_running_state_as_ref() {
assert_eq!(RUNNING::DISPATCHING.as_ref(), "DISPATCHING");
assert_eq!(RUNNING::ANALYZING.as_ref(), "ANALYZING");
assert_eq!(RUNNING::OPTMIZING.as_ref(), "OPTMIZING");
assert_eq!(RUNNING::SCHEDULING.as_ref(), "SCHEDULING");
}
#[test]
fn test_done_state_as_ref() {
assert_eq!(DONE::FINISHED.as_ref(), "FINISHED");
assert_eq!(DONE::FAILED.as_ref(), "FAILED");
assert_eq!(DONE::CANCELLED.as_ref(), "CANCELLED");
}
// Mock implementation for testing
struct MockQueryExecution {
should_succeed: bool,
should_cancel: bool,
}
#[async_trait]
impl QueryExecution for MockQueryExecution {
async fn start(&self) -> QueryResult<Output> {
if self.should_cancel {
return Err(QueryError::Cancel);
}
if self.should_succeed {
Ok(Output::Nil(()))
} else {
Err(QueryError::NotImplemented {
err: "Mock execution failed".to_string(),
})
}
}
fn cancel(&self) -> QueryResult<()> {
Ok(())
}
}
#[tokio::test]
async fn test_mock_query_execution_success() {
let execution = MockQueryExecution {
should_succeed: true,
should_cancel: false,
};
let result = execution.start().await;
assert!(result.is_ok(), "Mock execution should succeed");
if let Ok(Output::Nil(_)) = result {
// Expected result
} else {
panic!("Expected Output::Nil");
}
}
#[tokio::test]
async fn test_mock_query_execution_failure() {
let execution = MockQueryExecution {
should_succeed: false,
should_cancel: false,
};
let result = execution.start().await;
assert!(result.is_err(), "Mock execution should fail");
if let Err(QueryError::NotImplemented { .. }) = result {
// Expected error
} else {
panic!("Expected NotImplemented error");
}
}
#[tokio::test]
async fn test_mock_query_execution_cancel() {
let execution = MockQueryExecution {
should_succeed: false,
should_cancel: true,
};
let result = execution.start().await;
assert!(result.is_err(), "Cancelled execution should fail");
if let Err(QueryError::Cancel) = result {
// Expected cancellation error
} else {
panic!("Expected Cancel error");
}
let cancel_result = execution.cancel();
assert!(cancel_result.is_ok(), "Cancel should succeed");
}
#[test]
fn test_query_execution_default_type() {
let execution = MockQueryExecution {
should_succeed: true,
should_cancel: false,
};
assert_eq!(execution.query_type(), QueryType::Batch);
}
}

View File

@@ -107,27 +107,51 @@ pub async fn make_rustfsms(input: Arc<SelectObjectContentInput>, is_test: bool)
Ok(db_server)
}
pub async fn make_rustfsms_with_components(
input: Arc<SelectObjectContentInput>,
is_test: bool,
func_manager: Arc<SimpleFunctionMetadataManager>,
parser: Arc<DefaultParser>,
query_execution_factory: Arc<SqlQueryExecutionFactory>,
default_table_provider: Arc<BaseTableProvider>,
) -> QueryResult<impl DatabaseManagerSystem> {
// TODO session config need load global system config
let session_factory = Arc::new(SessionCtxFactory { is_test });
let query_dispatcher = SimpleQueryDispatcherBuilder::default()
.with_input(input)
.with_func_manager(func_manager)
.with_default_table_provider(default_table_provider)
.with_session_factory(session_factory)
.with_parser(parser)
.with_query_execution_factory(query_execution_factory)
.build()?;
let mut builder = RustFSmsBuilder::default();
let db_server = builder.query_dispatcher(query_dispatcher).build().expect("build db server");
Ok(db_server)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datafusion::{arrow::util::pretty, assert_batches_eq};
use rustfs_s3select_api::{
query::{Context, Query},
server::dbms::DatabaseManagerSystem,
};
use rustfs_s3select_api::query::{Context, Query};
use s3s::dto::{
CSVInput, CSVOutput, ExpressionType, FieldDelimiter, FileHeaderInfo, InputSerialization, OutputSerialization,
RecordDelimiter, SelectObjectContentInput, SelectObjectContentRequest,
};
use crate::instance::make_rustfsms;
use crate::get_global_db;
#[tokio::test]
#[ignore]
async fn test_simple_sql() {
let sql = "select * from S3Object";
let input = Arc::new(SelectObjectContentInput {
let input = SelectObjectContentInput {
bucket: "dandan".to_string(),
expected_bucket_owner: None,
key: "test.csv".to_string(),
@@ -151,9 +175,9 @@ mod tests {
request_progress: None,
scan_range: None,
},
});
let db = make_rustfsms(input.clone(), true).await.unwrap();
let query = Query::new(Context { input }, sql.to_string());
};
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await.unwrap();
@@ -184,7 +208,7 @@ mod tests {
#[ignore]
async fn test_func_sql() {
let sql = "SELECT * FROM S3Object s";
let input = Arc::new(SelectObjectContentInput {
let input = SelectObjectContentInput {
bucket: "dandan".to_string(),
expected_bucket_owner: None,
key: "test.csv".to_string(),
@@ -210,9 +234,9 @@ mod tests {
request_progress: None,
scan_range: None,
},
});
let db = make_rustfsms(input.clone(), true).await.unwrap();
let query = Query::new(Context { input }, sql.to_string());
};
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await.unwrap();

View File

@@ -19,3 +19,84 @@ pub mod function;
pub mod instance;
pub mod metadata;
pub mod sql;
#[cfg(test)]
mod test;
use rustfs_s3select_api::{QueryResult, server::dbms::DatabaseManagerSystem};
use s3s::dto::SelectObjectContentInput;
use std::sync::{Arc, LazyLock};
use crate::{
execution::{factory::SqlQueryExecutionFactory, scheduler::local::LocalScheduler},
function::simple_func_manager::SimpleFunctionMetadataManager,
metadata::base_table::BaseTableProvider,
sql::{optimizer::CascadeOptimizerBuilder, parser::DefaultParser},
};
// Global cached components that can be reused across database instances
struct GlobalComponents {
func_manager: Arc<SimpleFunctionMetadataManager>,
parser: Arc<DefaultParser>,
query_execution_factory: Arc<SqlQueryExecutionFactory>,
default_table_provider: Arc<BaseTableProvider>,
}
static GLOBAL_COMPONENTS: LazyLock<GlobalComponents> = LazyLock::new(|| {
let func_manager = Arc::new(SimpleFunctionMetadataManager::default());
let parser = Arc::new(DefaultParser::default());
let optimizer = Arc::new(CascadeOptimizerBuilder::default().build());
let scheduler = Arc::new(LocalScheduler {});
let query_execution_factory = Arc::new(SqlQueryExecutionFactory::new(optimizer, scheduler));
let default_table_provider = Arc::new(BaseTableProvider::default());
GlobalComponents {
func_manager,
parser,
query_execution_factory,
default_table_provider,
}
});
/// Get or create database instance with cached components
pub async fn get_global_db(
input: SelectObjectContentInput,
enable_debug: bool,
) -> QueryResult<Arc<dyn DatabaseManagerSystem + Send + Sync>> {
let components = &*GLOBAL_COMPONENTS;
let db = crate::instance::make_rustfsms_with_components(
Arc::new(input),
enable_debug,
components.func_manager.clone(),
components.parser.clone(),
components.query_execution_factory.clone(),
components.default_table_provider.clone(),
)
.await?;
Ok(Arc::new(db) as Arc<dyn DatabaseManagerSystem + Send + Sync>)
}
/// Create a fresh database instance without using cached components (for testing)
pub async fn create_fresh_db() -> QueryResult<Arc<dyn DatabaseManagerSystem + Send + Sync>> {
// Create a default test input for fresh database creation
let default_input = SelectObjectContentInput {
bucket: "test-bucket".to_string(),
expected_bucket_owner: None,
key: "test.csv".to_string(),
sse_customer_algorithm: None,
sse_customer_key: None,
sse_customer_key_md5: None,
request: s3s::dto::SelectObjectContentRequest {
expression: "SELECT * FROM S3Object".to_string(),
expression_type: s3s::dto::ExpressionType::from_static("SQL"),
input_serialization: s3s::dto::InputSerialization::default(),
output_serialization: s3s::dto::OutputSerialization::default(),
request_progress: None,
scan_range: None,
},
};
let db = crate::instance::make_rustfsms(Arc::new(default_input), true).await?;
Ok(Arc::new(db) as Arc<dyn DatabaseManagerSystem + Send + Sync>)
}

View File

@@ -0,0 +1,247 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(test)]
mod error_handling_tests {
use crate::get_global_db;
use rustfs_s3select_api::{
QueryError,
query::{Context, Query},
};
use s3s::dto::{
CSVInput, ExpressionType, FileHeaderInfo, InputSerialization, SelectObjectContentInput, SelectObjectContentRequest,
};
use std::sync::Arc;
fn create_test_input_with_sql(sql: &str) -> SelectObjectContentInput {
SelectObjectContentInput {
bucket: "test-bucket".to_string(),
expected_bucket_owner: None,
key: "test.csv".to_string(),
sse_customer_algorithm: None,
sse_customer_key: None,
sse_customer_key_md5: None,
request: SelectObjectContentRequest {
expression: sql.to_string(),
expression_type: ExpressionType::from_static("SQL"),
input_serialization: InputSerialization {
csv: Some(CSVInput {
file_header_info: Some(FileHeaderInfo::from_static(FileHeaderInfo::USE)),
..Default::default()
}),
..Default::default()
},
output_serialization: s3s::dto::OutputSerialization::default(),
request_progress: None,
scan_range: None,
},
}
}
#[tokio::test]
async fn test_syntax_error_handling() {
let invalid_sqls = vec![
"INVALID SQL",
"SELECT FROM",
"SELECT * FORM S3Object", // typo in FROM
"SELECT * FROM",
"SELECT * FROM S3Object WHERE",
"SELECT COUNT( FROM S3Object", // missing closing parenthesis
];
for sql in invalid_sqls {
let input = create_test_input_with_sql(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_err(), "Expected error for SQL: {sql}");
}
}
#[tokio::test]
async fn test_multi_statement_error() {
let multi_statement_sqls = vec![
"SELECT * FROM S3Object; SELECT 1;",
"SELECT 1; SELECT 2; SELECT 3;",
"SELECT * FROM S3Object; DROP TABLE test;",
];
for sql in multi_statement_sqls {
let input = create_test_input_with_sql(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_err(), "Expected multi-statement error for SQL: {sql}");
if let Err(QueryError::MultiStatement { num, .. }) = result {
assert!(num >= 2, "Expected at least 2 statements, got: {num}");
}
}
}
#[tokio::test]
async fn test_unsupported_operations() {
let unsupported_sqls = vec![
"INSERT INTO S3Object VALUES (1, 'test')",
"UPDATE S3Object SET name = 'test'",
"DELETE FROM S3Object",
"CREATE TABLE test (id INT)",
"DROP TABLE S3Object",
];
for sql in unsupported_sqls {
let input = create_test_input_with_sql(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
// These should either fail with syntax error or not implemented error
assert!(result.is_err(), "Expected error for unsupported SQL: {sql}");
}
}
#[tokio::test]
async fn test_invalid_column_references() {
let invalid_column_sqls = vec![
"SELECT nonexistent_column FROM S3Object",
"SELECT * FROM S3Object WHERE nonexistent_column = 1",
"SELECT * FROM S3Object ORDER BY nonexistent_column",
"SELECT * FROM S3Object GROUP BY nonexistent_column",
];
for sql in invalid_column_sqls {
let input = create_test_input_with_sql(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
// These might succeed or fail depending on schema inference
// The test verifies that the system handles them gracefully
match result {
Ok(_) => {
// If it succeeds, verify we can get results
let handle = result.unwrap();
let output = handle.result().chunk_result().await;
// Should either succeed with empty results or fail gracefully
let _ = output;
}
Err(_) => {
// Expected to fail - this is acceptable
}
}
}
}
#[tokio::test]
async fn test_complex_query_error_recovery() {
let complex_invalid_sql = r#"
SELECT
name,
age,
INVALID_FUNCTION(salary) as invalid_calc,
department
FROM S3Object
WHERE age > 'invalid_number'
GROUP BY department, nonexistent_column
HAVING COUNT(*) > INVALID_FUNCTION()
ORDER BY invalid_column
"#;
let input = create_test_input_with_sql(complex_invalid_sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, complex_invalid_sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_err(), "Expected error for complex invalid SQL");
}
#[tokio::test]
async fn test_empty_query() {
let empty_sqls = vec!["", " ", "\n\t \n"];
for sql in empty_sqls {
let input = create_test_input_with_sql(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
// Empty queries might be handled differently by the parser
match result {
Ok(_) => {
// Some parsers might accept empty queries
}
Err(_) => {
// Expected to fail for empty SQL
}
}
}
}
#[tokio::test]
async fn test_very_long_query() {
// Create a very long but valid query
let mut long_sql = "SELECT ".to_string();
for i in 0..1000 {
if i > 0 {
long_sql.push_str(", ");
}
long_sql.push_str(&format!("'column_{i}' as col_{i}"));
}
long_sql.push_str(" FROM S3Object LIMIT 1");
let input = create_test_input_with_sql(&long_sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, long_sql);
let result = db.execute(&query).await;
// This should either succeed or fail gracefully
match result {
Ok(handle) => {
let output = handle.result().chunk_result().await;
assert!(output.is_ok(), "Query execution should complete successfully");
}
Err(_) => {
// Acceptable to fail due to resource constraints
}
}
}
#[tokio::test]
async fn test_sql_injection_patterns() {
let injection_patterns = vec![
"SELECT * FROM S3Object WHERE name = 'test'; DROP TABLE users; --",
"SELECT * FROM S3Object UNION SELECT * FROM information_schema.tables",
"SELECT * FROM S3Object WHERE 1=1 OR 1=1",
];
for sql in injection_patterns {
let input = create_test_input_with_sql(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
// These should be handled safely - either succeed with limited scope or fail
match result {
Ok(_) => {
// If successful, it should only access S3Object data
}
Err(_) => {
// Expected to fail for security reasons
}
}
}
}
}

View File

@@ -0,0 +1,228 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(test)]
mod integration_tests {
use crate::{create_fresh_db, get_global_db, instance::make_rustfsms};
use rustfs_s3select_api::{
QueryError,
query::{Context, Query},
};
use s3s::dto::{
CSVInput, CSVOutput, ExpressionType, FileHeaderInfo, InputSerialization, OutputSerialization, SelectObjectContentInput,
SelectObjectContentRequest,
};
use std::sync::Arc;
fn create_test_input(sql: &str) -> SelectObjectContentInput {
SelectObjectContentInput {
bucket: "test-bucket".to_string(),
expected_bucket_owner: None,
key: "test.csv".to_string(),
sse_customer_algorithm: None,
sse_customer_key: None,
sse_customer_key_md5: None,
request: SelectObjectContentRequest {
expression: sql.to_string(),
expression_type: ExpressionType::from_static("SQL"),
input_serialization: InputSerialization {
csv: Some(CSVInput {
file_header_info: Some(FileHeaderInfo::from_static(FileHeaderInfo::USE)),
..Default::default()
}),
..Default::default()
},
output_serialization: OutputSerialization {
csv: Some(CSVOutput::default()),
..Default::default()
},
request_progress: None,
scan_range: None,
},
}
}
#[tokio::test]
async fn test_database_creation() {
let input = create_test_input("SELECT * FROM S3Object");
let result = make_rustfsms(Arc::new(input), true).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_global_db_creation() {
let input = create_test_input("SELECT * FROM S3Object");
let result = get_global_db(input.clone(), true).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_fresh_db_creation() {
let result = create_fresh_db().await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_simple_select_query() {
let sql = "SELECT * FROM S3Object";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_ok());
let query_handle = result.unwrap();
let output = query_handle.result().chunk_result().await;
assert!(output.is_ok());
}
#[tokio::test]
async fn test_select_with_where_clause() {
let sql = "SELECT name, age FROM S3Object WHERE age > 30";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_select_with_aggregation() {
let sql = "SELECT department, COUNT(*) as count FROM S3Object GROUP BY department";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
// Aggregation queries might fail due to lack of actual data, which is acceptable
match result {
Ok(_) => {
// If successful, that's great
}
Err(_) => {
// Expected to fail due to no actual data source
}
}
}
#[tokio::test]
async fn test_invalid_sql_syntax() {
let sql = "INVALID SQL SYNTAX";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_multi_statement_error() {
let sql = "SELECT * FROM S3Object; SELECT 1;";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_err());
if let Err(QueryError::MultiStatement { num, .. }) = result {
assert_eq!(num, 2);
} else {
panic!("Expected MultiStatement error");
}
}
#[tokio::test]
async fn test_query_state_machine_workflow() {
let sql = "SELECT * FROM S3Object";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
// Test state machine creation
let state_machine = db.build_query_state_machine(query.clone()).await;
assert!(state_machine.is_ok());
let state_machine = state_machine.unwrap();
// Test logical plan building
let logical_plan = db.build_logical_plan(state_machine.clone()).await;
assert!(logical_plan.is_ok());
// Test execution if plan exists
if let Ok(Some(plan)) = logical_plan {
let execution_result = db.execute_logical_plan(plan, state_machine).await;
assert!(execution_result.is_ok());
}
}
#[tokio::test]
async fn test_query_with_limit() {
let sql = "SELECT * FROM S3Object LIMIT 5";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_ok());
let query_handle = result.unwrap();
let output = query_handle.result().chunk_result().await.unwrap();
// Verify that we get results (exact count depends on test data)
let total_rows: usize = output.iter().map(|batch| batch.num_rows()).sum();
assert!(total_rows <= 5);
}
#[tokio::test]
async fn test_query_with_order_by() {
let sql = "SELECT name, age FROM S3Object ORDER BY age DESC";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
let query = Query::new(Context { input: Arc::new(input) }, sql.to_string());
let result = db.execute(&query).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_concurrent_queries() {
let sql = "SELECT * FROM S3Object";
let input = create_test_input(sql);
let db = get_global_db(input.clone(), true).await.unwrap();
// Execute multiple queries concurrently
let mut handles = vec![];
for i in 0..3 {
let query = Query::new(
Context {
input: Arc::new(input.clone()),
},
format!("SELECT * FROM S3Object LIMIT {}", i + 1),
);
let db_clone = db.clone();
let handle = tokio::spawn(async move { db_clone.execute(&query).await });
handles.push(handle);
}
// Wait for all queries to complete
for handle in handles {
let result = handle.await.unwrap();
assert!(result.is_ok());
}
}
}

View File

@@ -12,24 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use std::fmt::Display;
//! Test modules for s3select-query
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct LockArgs {
pub uid: String,
pub resources: Vec<String>,
pub owner: String,
pub source: String,
pub quorum: usize,
}
impl Display for LockArgs {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"LockArgs[ uid: {}, resources: {:?}, owner: {}, source:{}, quorum: {} ]",
self.uid, self.resources, self.owner, self.source, self.quorum
)
}
}
pub mod error_handling_test;
pub mod integration_test;

58
rustfs.spec Normal file
View File

@@ -0,0 +1,58 @@
%global _enable_debug_packages 0
%global _empty_manifest_terminate_build 0
Name: rustfs
Version: 1.0.0
Release: alpha.36%{?dist}
Summary: High-performance distributed object storage for MinIO alternative
License: Apache-2.0
URL: https://github.com/rustfs/rustfs
Source0: https://github.com/rustfs/rustfs/archive/refs/tags/%{version}.tar.gz
BuildRequires: cargo
BuildRequires: rust
BuildRequires: mold
BuildRequires: pango-devel
BuildRequires: cairo-devel
BuildRequires: cairo-gobject-devel
BuildRequires: gdk-pixbuf2-devel
BuildRequires: atk-devel
BuildRequires: gtk3-devel
BuildRequires: libsoup-devel
BuildRequires: cmake
BuildRequires: clang-devel
BuildRequires: webkit2gtk4.1-devel >= 2.40
%description
RustFS is a high-performance distributed object storage software built using Rust, one of the most popular languages worldwide. Along with MinIO, it shares a range of advantages such as simplicity, S3 compatibility, open-source nature, support for data lakes, AI, and big data. Furthermore, it has a better and more user-friendly open-source license in comparison to other storage systems, being constructed under the Apache license. As Rust serves as its foundation, RustFS provides faster speed and safer distributed features for high-performance object storage.
%prep
%autosetup -n %{name}-%{version}-%{release}
%build
# Set the target directory according to the schema
export CMAKE=$(which cmake3)
%ifarch x86_64 || aarch64 || loongarch64
TARGET_DIR="target/%_arch"
PLATFORM=%_arch-unknown-linux-musl
%else
TARGET_DIR="target/unknown"
PLATFORM=unknown-platform
%endif
# Set CARGO_TARGET_DIR and build the project
#CARGO_TARGET_DIR=$TARGET_DIR RUSTFLAGS="-C link-arg=-fuse-ld=mold" cargo build --release --package rustfs
CARGO_TARGET_DIR=$TARGET_DIR RUSTFLAGS="-C link-arg=-fuse-ld=mold" cargo build --release --target $PLATFORM -p rustfs --bins
%install
mkdir -p %buildroot/usr/bin/
install %_builddir/%{name}-%{version}-%{release}/target/%_arch/$PLATFORM/release/rustfs %buildroot/usr/bin/
%files
%license LICENSE
%doc docs
%_bindir/rustfs
%changelog
* Tue Jul 08 2025 Wenlong Zhang <zhangwenlong@loongson.cn>
- Initial RPM package for RustFS 1.0.0-alpha.36

View File

@@ -23,18 +23,15 @@ use http::{HeaderMap, Uri};
use hyper::StatusCode;
use matchit::Params;
use percent_encoding::{AsciiSet, CONTROLS, percent_encode};
use rustfs_common::heal_channel::HealOpts;
use rustfs_ecstore::admin_server_info::get_server_info;
use rustfs_ecstore::bucket::metadata_sys::{self, get_replication_config};
use rustfs_ecstore::bucket::target::BucketTarget;
use rustfs_ecstore::bucket::versioning_sys::BucketVersioningSys;
use rustfs_ecstore::cmd::bucket_targets::{self, GLOBAL_Bucket_Target_Sys};
use rustfs_ecstore::data_usage::load_data_usage_from_backend;
use rustfs_ecstore::error::StorageError;
use rustfs_ecstore::global::GLOBAL_ALlHealState;
use rustfs_ecstore::global::get_global_action_cred;
// use rustfs_ecstore::heal::data_usage::load_data_usage_from_backend;
use rustfs_ecstore::heal::data_usage::load_data_usage_from_backend;
use rustfs_ecstore::heal::heal_commands::HealOpts;
use rustfs_ecstore::heal::heal_ops::new_heal_sequence;
use rustfs_ecstore::metrics_realtime::{CollectMetricsOpts, MetricType, collect_local_metrics};
use rustfs_ecstore::new_object_layer_fn;
use rustfs_ecstore::pools::{get_total_usable_capacity, get_total_usable_capacity_free};
@@ -689,33 +686,20 @@ impl Operation for HealHandler {
}
let heal_path = path_join(&[PathBuf::from(hip.bucket.clone()), PathBuf::from(hip.obj_prefix.clone())]);
if !hip.client_token.is_empty() && !hip.force_start && !hip.force_stop {
match GLOBAL_ALlHealState
.pop_heal_status_json(heal_path.to_str().unwrap_or_default(), &hip.client_token)
.await
{
Ok(b) => {
info!("pop_heal_status_json success");
return Ok(S3Response::new((StatusCode::OK, Body::from(b))));
}
Err(_e) => {
info!("pop_heal_status_json failed");
return Ok(S3Response::new((StatusCode::INTERNAL_SERVER_ERROR, Body::from(vec![]))));
}
}
}
let (tx, mut rx) = mpsc::channel(1);
if hip.force_stop {
if !hip.client_token.is_empty() && !hip.force_start && !hip.force_stop {
// Query heal status
let tx_clone = tx.clone();
let heal_path_str = heal_path.to_str().unwrap_or_default().to_string();
let client_token = hip.client_token.clone();
spawn(async move {
match GLOBAL_ALlHealState
.stop_heal_sequence(heal_path.to_str().unwrap_or_default())
.await
{
Ok(b) => {
match rustfs_common::heal_channel::query_heal_status(heal_path_str, client_token).await {
Ok(_) => {
// TODO: Get actual response from channel
let _ = tx_clone
.send(HealResp {
resp_bytes: b,
resp_bytes: vec![],
..Default::default()
})
.await;
@@ -723,7 +707,32 @@ impl Operation for HealHandler {
Err(e) => {
let _ = tx_clone
.send(HealResp {
_api_err: Some(e),
_api_err: Some(StorageError::other(e)),
..Default::default()
})
.await;
}
}
});
} else if hip.force_stop {
// Cancel heal task
let tx_clone = tx.clone();
let heal_path_str = heal_path.to_str().unwrap_or_default().to_string();
spawn(async move {
match rustfs_common::heal_channel::cancel_heal_task(heal_path_str).await {
Ok(_) => {
// TODO: Get actual response from channel
let _ = tx_clone
.send(HealResp {
resp_bytes: vec![],
..Default::default()
})
.await;
}
Err(e) => {
let _ = tx_clone
.send(HealResp {
_api_err: Some(StorageError::other(e)),
..Default::default()
})
.await;
@@ -731,22 +740,36 @@ impl Operation for HealHandler {
}
});
} else if hip.client_token.is_empty() {
let nh = Arc::new(new_heal_sequence(&hip.bucket, &hip.obj_prefix, "", hip.hs, hip.force_start));
// Use new heal channel mechanism
let tx_clone = tx.clone();
spawn(async move {
match GLOBAL_ALlHealState.launch_new_heal_sequence(nh).await {
Ok(b) => {
// Create heal request through channel
let heal_request = rustfs_common::heal_channel::create_heal_request(
hip.bucket.clone(),
if hip.obj_prefix.is_empty() {
None
} else {
Some(hip.obj_prefix.clone())
},
hip.force_start,
Some(rustfs_common::heal_channel::HealChannelPriority::Normal),
);
match rustfs_common::heal_channel::send_heal_request(heal_request).await {
Ok(_) => {
// Success - send empty response for now
let _ = tx_clone
.send(HealResp {
resp_bytes: b,
resp_bytes: vec![],
..Default::default()
})
.await;
}
Err(e) => {
// Error - send error response
let _ = tx_clone
.send(HealResp {
_api_err: Some(e),
_api_err: Some(StorageError::other(e)),
..Default::default()
})
.await;
@@ -1072,7 +1095,7 @@ impl Operation for RemoveRemoteTargetHandler {
#[cfg(test)]
mod test {
use rustfs_ecstore::heal::heal_commands::HealOpts;
use rustfs_common::heal_channel::HealOpts;
#[ignore] // FIXME: failed in github actions
#[test]

View File

@@ -32,6 +32,7 @@ use rpc::register_rpc_route;
use s3s::route::S3Route;
const ADMIN_PREFIX: &str = "/rustfs/admin";
// const ADMIN_PREFIX: &str = "/minio/admin";
pub fn make_admin_route(console_enabled: bool) -> std::io::Result<impl S3Route> {
let mut r: S3Router<AdminOperation> = S3Router::new(console_enabled);

Some files were not shown because too many files have changed in this diff Show More