Compare commits

...

87 Commits

Author SHA1 Message Date
weisd
56fd8132e9 fix:#303 returns empty when querying an empty or not dir (#304) 2025-07-28 16:17:40 +08:00
guojidan
35daa74430 Merge pull request #302 from guojidan/lock
Lock: add transactional
2025-07-28 12:00:44 +08:00
junxiang Mu
dc156fb4cd Fix: clippy
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 11:38:42 +08:00
junxiang Mu
de905a878c Cargo: use workspace dependence
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 11:02:40 +08:00
junxiang Mu
f3252f989b Test: Add e2e test case for lock transactional
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 11:00:10 +08:00
junxiang Mu
01a2afca9a lock: Add transactional
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 10:59:43 +08:00
guojidan
a4fe68ad21 Merge pull request #301 from guojidan/improve-sql
s3Select: add unit test case
2025-07-28 09:56:10 +08:00
junxiang Mu
c03f86b23c s3Select: add unit test case
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-28 09:19:47 +08:00
guojidan
5667f324ae Merge pull request #297 from guojidan/improve-sql
Test: Add e2e_test case for sql && add script for e2e_test
2025-07-25 17:16:41 +08:00
junxiang Mu
bcd806796f Test: add test script for e2e
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-25 16:52:06 +08:00
junxiang Mu
612404c47f Test: add e2e_test for s3select
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-25 15:07:44 +08:00
guojidan
85388262b3 Merge pull request #294 from guojidan/improve-sql
Refactor: DatabaseManagerSystem as global
2025-07-25 08:33:54 +08:00
junxiang Mu
25a4503285 fix: fmt
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-25 08:18:14 +08:00
安正超
526c4d5a61 refactor: 优化构建工作流,统一 latest 文件处理和简化制品上传 (#293) 2025-07-25 01:10:04 +08:00
junxiang Mu
addc964d56 Refactor: DatabaseManagerSystem as global
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 17:12:51 +08:00
loverustfs
371119f733 GNU to MUSL modify Dockerfile 2025-07-24 16:36:15 +08:00
guojidan
021abc0398 Merge pull request #292 from guojidan/Arc
Chore: remove dirty file(cache.rs)
2025-07-24 16:32:20 +08:00
junxiang Mu
0672b6dd3e Chore: remove dirty file(cache.rs)
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 14:57:48 +08:00
guojidan
1372dc2857 Merge pull request #288 from guojidan/scanner
Refactor: Scanner
2025-07-24 14:42:54 +08:00
houseme
77bc9af109 Update Cargo.toml 2025-07-24 14:14:12 +08:00
junxiang Mu
91b1c84430 rebase
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:18:05 +08:00
junxiang Mu
b667927216 fix fmt
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:28 +08:00
junxiang Mu
29795fac51 fix Cargo.toml
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:28 +08:00
junxiang Mu
2ce7e01f55 Chore: remove dirty file(heal)
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:27 +08:00
junxiang Mu
4fefd63a5b rebase
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
2a8c46874d fix: auto heal when xl.meta lose
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
b8b5511b68 fix: heal data part lose
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
bdaee228db fix(ahm): adjust test expectations for missing xl.meta recovery scenario
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
d562620e99 fix: implement uses_data_dir method
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
69b0c828c9 fix: scanner add heal bucket
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
2bfd1efb9b Fix: fix add heal_manager into scanner when scanner start
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
0854e6b921 Chore: rename init_heal_manager_with_channel
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
b907f4e61b refactor(ahm): remove obsolete scanner/data_usage.rs after data usage refactor
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
6ec568459c chore: update admin handlers, lockfile, and minor fixes
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:05 +08:00
junxiang Mu
ea210d52dc refactor(heal): unify heal request interface, add disk field, update ahm/ecstore/common for erasure set healing
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:14:03 +08:00
junxiang Mu
3d3c6e4e06 chore(protos): update proto definitions, remove ns_scanner, fix codegen and formatting
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
e7d0a8d4b9 feat: integrate global metrics system into AHM scanner
- Add global metrics system to common crate for cross-module usage
- Integrate global metrics collection into AHM scanner operations
- Update ECStore to use common metrics system instead of local implementation
- Add chrono dependency to AHM crate for timestamp handling
- Re-export IlmAction from common metrics in ECStore lifecycle module
- Update scanner methods to use global metrics for cycle, disk, and volume scans
- Maintain backward compatibility with local metrics collector
- Fix clippy warnings and ensure proper code formatting

This change enables unified metrics collection across the entire RustFS system,
allowing better monitoring and observability of scanner operations.

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
7d3b2b774c fix heal disk
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
aed8f52423 refactor: integrate disk healing into erasure set healing
- Remove HealType::Disk and related disk-specific healing methods
- Integrate disk format healing into heal_erasure_set with include_format_heal option
- Update auto disk scanner to use ErasureSet heal type instead of Disk heal
- Fix disk status change event handling to use ErasureSet heal requests
- Add proper bucket list retrieval for auto healing scenarios
- Update data scanner to submit ErasureSet heal tasks for offline disks
- Remove duplicate healing logic between Disk and ErasureSet types
- Ensure all healing operations go through unified ErasureSet healing path
2025-07-24 12:12:49 +08:00
junxiang Mu
c49414f6ac fix: resolve test conflicts and improve data scanner functionality
- Fix multi-threaded test conflicts in AHM heal integration tests
- Remove global environment sharing to prevent test state pollution
- Fix test_all_disk_method by clearing global disk map before test
- Improve data scanner and cache value implementations
- Update dependencies and resolve clippy warnings

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
8e766b90cd feat: implement heal channel mechanism for admin-ahm communication
- Add global unbounded channel in common crate for heal requests
- Implement channel processor in ahm to handle heal commands
- Add Start/Query/Cancel commands support via channel
- Integrate heal manager initialization in main.rs
- Replace direct MRF calls with channel-based heal requests in ecstore
- Support advanced heal options including pool_index and set_index
- Enable admin handlers to send heal requests via channel
2025-07-24 12:12:49 +08:00
junxiang Mu
3409cd8dff feat(ahm): add HealingTracker support & complete fresh-disk healing
• Introduce ecstore HealingTracker into ahm crate; load/init/save tracker
• Re-implement heal_fresh_disk to use heal_erasure_set with tracker
• Enhance auto-disk scanner: detect unformatted disks via get_disk_id()
• Remove DataUsageCache handling for now
• Refactor imports & types, clean up duplicate constants
2025-07-24 12:12:49 +08:00
junxiang Mu
f4973a681c feat: implement complete ahm heal system with ecstore integration
- Add comprehensive heal storage API with ECStore integration
- Implement heal object, bucket, disk, metadata, and EC decode operations
- Add heal task management with progress tracking and statistics
- Optimize heal manager by removing unnecessary workers
- Add integration tests for core heal functionality (heal_object, heal_bucket, heal_format)
- Integrate with ecstore's native heal commands for actual repair operations

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
4fb3d187d0 feat: implement heal subsystem for automatic data repair
- Add heal module with core types (HealType, HealRequest, HealTask)
- Implement HealManager for task scheduling and execution
- Add HealStorageAPI trait and ECStoreHealStorage implementation
- Integrate heal capabilities into scanner for automatic repair
- Support multiple heal types: object, bucket, disk, metadata, MRF, EC decode
- Add progress tracking and event system for heal operations
- Merge heal and scanner error types for unified error handling
- Include comprehensive logging and metrics for heal operations

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
dandan
0aff736efd Chore: fix ref and fix comment
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
dandan
2aa7a631ef feat: refactor scanner module and add data usage statistics
- Move scanner code to scanner/ subdirectory for better organization
- Add data usage statistics collection and persistence
- Implement histogram support for size and version distribution
- Add global cancel token management for scanner operations
- Integrate scanner with ECStore for comprehensive data analysis
- Update error handling and improve test isolation
- Add data usage API endpoints and backend integration

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
dandan
b40ef147a9 refact: step 2
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
junxiang Mu
1f11a3167b fix: Refact heal and scanner design
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 12:12:49 +08:00
guojidan
18b0134ddf Merge pull request #290 from guojidan/feat/complete-lock-implementation
refactor: reimplement lock
2025-07-24 12:11:19 +08:00
junxiang Mu
b48a5fdc94 fix fmt
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 11:52:57 +08:00
junxiang Mu
168a07a670 add api into ecstore
Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 11:52:57 +08:00
junxiang Mu
cad005bc21 refactor(lock): unify NamespaceLock client model and LockRequest API
- Refactor NamespaceLock to use a unified client vector and quorum mechanism,
  removing legacy local/distributed lock split and related code.
- Update LockRequest to split timeout into acquire_timeout and ttl, and add
  builder methods for both.
- Adjust all batch lock APIs to accept ttl and use new LockRequest fields.
- Update all affected tests and documentation for the new API.

Signed-off-by: junxiang Mu <1948535941@qq.com>
2025-07-24 11:52:57 +08:00
root
dc44cde081 tmp
Signed-off-by: root <root@PC.localdomain>
2025-07-24 11:52:57 +08:00
dandan
4ccdeb9d2a refactor(lock): restructure lock crate, remove unused modules and clarify directory layout
- Remove unused core/rwlock.rs and manager/ modules (ManagerFactory, LifecycleManager, NamespaceManager)
- Move all lock-related code into crates/lock/src with clear submodules: client, core, utils, etc.
- Ensure only necessary files and APIs are exposed, improve maintainability
- No functional logic change, pure structure and cleanup refactor

Signed-off-by: dandan <dandan@dandandeMac-Studio.local>
2025-07-24 11:52:55 +08:00
dependabot[bot]
1b48934f47 build(deps): bump the dependencies group with 5 updates (#289)
Bumps the dependencies group with 5 updates:

| Package | From | To |
| --- | --- | --- |
| [hyper-util](https://github.com/hyperium/hyper-util) | `0.1.15` | `0.1.16` |
| [rand](https://github.com/rust-random/rand) | `0.9.1` | `0.9.2` |
| [serde_json](https://github.com/serde-rs/json) | `1.0.140` | `1.0.141` |
| [strum](https://github.com/Peternator7/strum) | `0.27.1` | `0.27.2` |
| [sysinfo](https://github.com/GuillaumeGomez/sysinfo) | `0.36.0` | `0.36.1` |


Updates `hyper-util` from 0.1.15 to 0.1.16
- [Release notes](https://github.com/hyperium/hyper-util/releases)
- [Changelog](https://github.com/hyperium/hyper-util/blob/master/CHANGELOG.md)
- [Commits](https://github.com/hyperium/hyper-util/compare/v0.1.15...v0.1.16)

Updates `rand` from 0.9.1 to 0.9.2
- [Release notes](https://github.com/rust-random/rand/releases)
- [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-random/rand/compare/rand_core-0.9.1...rand_core-0.9.2)

Updates `serde_json` from 1.0.140 to 1.0.141
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.140...v1.0.141)

Updates `strum` from 0.27.1 to 0.27.2
- [Release notes](https://github.com/Peternator7/strum/releases)
- [Changelog](https://github.com/Peternator7/strum/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Peternator7/strum/compare/v0.27.1...v0.27.2)

Updates `sysinfo` from 0.36.0 to 0.36.1
- [Changelog](https://github.com/GuillaumeGomez/sysinfo/blob/master/CHANGELOG.md)
- [Commits](https://github.com/GuillaumeGomez/sysinfo/compare/v0.36.0...v0.36.1)

---
updated-dependencies:
- dependency-name: hyper-util
  dependency-version: 0.1.16
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: rand
  dependency-version: 0.9.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: serde_json
  dependency-version: 1.0.141
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: strum
  dependency-version: 0.27.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
- dependency-name: sysinfo
  dependency-version: 0.36.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-24 11:50:52 +08:00
zhangwenlong
25fa645184 add rustfs.spec for rustfs (#103)
add support on loongarch64
2025-07-24 11:39:09 +08:00
Marco Orlandin
3a3bb880f2 fix: update link in README.md leading to a 404 error (#285) 2025-07-24 09:15:04 +08:00
安正超
affe27298c fix: improve Windows build support and CI/CD workflow (#283)
- Fix Windows zip command issue by using PowerShell Compress-Archive
- Add Windows support for OSS upload with ossutil
- Replace Chinese comments with English in build.yml
- Fix bash syntax error in package_zip function
- Improve code formatting and consistency
- Update various configuration files for better cross-platform support

Resolves Windows build failures in GitHub Actions.
2025-07-22 23:55:57 +08:00
shiro.lee
629db6218e fix: the issue where preview fails when the path length exceeds 255 characters (#280) 2025-07-22 22:10:57 +08:00
安正超
aa1a3ce4e8 feat: add cargo clippy --fix --allow-dirty to pre-commit command (#282)
Resolves #277

- Add --fix flag to automatically fix clippy warnings
- Add --allow-dirty flag to run on dirty Git trees
- Improves code quality in pre-commit workflow
2025-07-22 22:10:53 +08:00
houseme
693db59fcc fix 2025-07-21 20:45:59 +08:00
houseme
0a7df4ef26 fix 2025-07-21 19:03:15 +08:00
houseme
9dcdc44718 fix 2025-07-21 18:03:01 +08:00
houseme
2a0c618f8b fix: windows build 2025-07-21 17:45:56 +08:00
loverustfs
bebd78fbbb Add GNU to build.yml (#275)
* fix unzip error

* fix url change error

fix url change error

* Simplify user experience and integrate console and endpoint

Simplify user experience and integrate console and endpoint

* Add gnu to  build.yml
2025-07-21 16:58:29 +08:00
houseme
3f095e75cb improve code for logger and fix typo (#272) 2025-07-21 15:20:36 +08:00
houseme
f7d30da9e0 fix typo (#267)
* fix typo

* cargo fmt
2025-07-20 00:11:15 +08:00
Chrislearn Young
823d4b6f79 Add typos github actions and fix typos (#265)
* Add typo github actions and fix typos

* cargo fmt
2025-07-19 22:08:50 +08:00
安正超
051ea7786f fix: ossutil install command. (#263) 2025-07-19 18:21:31 +08:00
安正超
42b645e355 fix: robust Dockerfile version logic for v prefix handling (#262)
* fix: robust Dockerfile version logic for v prefix handling

* wip
2025-07-19 15:50:15 +08:00
安正超
f27ee96014 feat: enhance entrypoint and Dockerfiles for flexible volume and permission management (#260)
* feat: enhance entrypoint and Dockerfiles for flexible volume and permission management\n\n- Support batch mount and permission fix in entrypoint.sh\n- Add coreutils/shadow (alpine) and coreutils/passwd (ubuntu) for UID/GID/ownership\n- Use ENTRYPOINT for unified startup\n- Make local dev and prod Dockerfile behavior consistent\n- Improve security and user experience\n\nBREAKING CHANGE: entrypoint.sh and Dockerfile now require additional packages for permission management, and support batch volume mount via RUSTFS_VOLUMES.

* chore: update Dockerfile comments to English only

* fix(entrypoint): improve local/remote volume detection and permission logic in entrypoint.sh

* Update entrypoint.sh

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update entrypoint.sh

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update Dockerfile

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-19 11:48:46 +08:00
houseme
20cd117aa6 improve code for dockerfile (#256)
* improve code for dockerfile

* Update Dockerfile

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* improve code for file name

* improve code for dockerfile

* fix

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-18 15:53:00 +08:00
houseme
fc8931d69f improve code for dockerfile (#253)
* improve code for dockerfile

* Update Dockerfile

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-18 11:05:00 +08:00
weisd
0167b2decd fix: optimize RPC connection management and prevent race conditions (#252) 2025-07-18 10:41:00 +08:00
weisd
e67980ff3c Fix/range content length (#251)
* fix:getobject range length
2025-07-17 23:25:21 +08:00
weisd
96760bba5a fix:getobject range length (#250) 2025-07-17 23:14:19 +08:00
overtrue
2501d7d241 fix: remove branch restriction from Docker workflow_run trigger
The Docker workflow was not triggering for tag-based releases because it had
'branches: [main]' restriction in the workflow_run configuration. When pushing
tags, the triggering workflow runs on the tag, not on main branch.

Changes:
- Remove 'branches: [main]' from workflow_run trigger
- Simplify tag detection using github.event.workflow_run context instead of API calls
- Use official workflow_run event properties (head_branch, event) for reliable detection
- Support both 'refs/tags/VERSION' and direct 'VERSION' formats
- Add better logging for debugging workflow trigger issues

This fixes the issue where Docker images were not built for tagged releases.
2025-07-17 08:13:34 +08:00
overtrue
55b84262b5 fix: use GitHub API for reliable tag detection in Docker workflow
- Replace git commands with GitHub API calls for tag detection
- Add proper commit checkout for workflow_run events
- Use gh CLI and curl fallback for better reliability
- Add debug output to help troubleshoot tag detection issues

This should fix the issue where Docker builds were not triggered for tagged releases
due to missing tag information in the workflow_run environment.
2025-07-17 08:01:33 +08:00
overtrue
ce4252eb1a fix: correct Docker workflow trigger logic for tag-based releases
BREAKING CHANGE: Fixed Docker workflow that was incorrectly skipping builds for tagged releases
- Fix logic to detect tag pushes using git refs instead of branch names
- Properly identify tag pushes vs branch pushes using git show-ref
- Support both v-prefixed and bare version formats
- Ensure Docker images are built for all tagged releases including prereleases
2025-07-17 07:46:54 +08:00
overtrue
db708917b4 docs: update .docker/README.md to reflect simplified Makefile commands
- Add new Makefile Commands section with simplified docker-dev* commands
- Update Development Workflow to use new dev-env-* commands
- Update directory structure (remove deleted alpine/ directory)
- Reorganize build instructions to prioritize Makefile over direct scripts
- Add Common Development Tasks section with make help commands
2025-07-17 07:30:13 +08:00
overtrue
8ddb45627d refactor: simplify Docker build commands and fix version matching
- Remove obsolete .docker/alpine/Dockerfile.protoc (superseded by Dockerfile.source)
- Simplify Makefile commands by removing backward compatibility aliases
  * Replace docker-buildx-source* with shorter docker-dev* commands
  * Replace start/stop with explicit dev-env-start/dev-env-stop commands
- Fix Docker workflow version matching logic to correctly distinguish:
  * 1.0.0 vs 1.0.0-alpha.11 (prerelease detection)
  * Support both v1.0.0 and 1.0.0 formats (with/without v prefix)
  * Reorder case patterns to match prereleases before releases

BREAKING CHANGE: Removed legacy command aliases
- Use 'make docker-dev-local' instead of 'make docker-buildx-source-local'
- Use 'make dev-env-start' instead of 'make start'
2025-07-17 07:29:00 +08:00
overtrue
550c225b79 wip 2025-07-17 07:07:02 +08:00
overtrue
0d46b550a8 refactor: merge release workflow into build workflow and clean up
- Merge release logic into build.yml to avoid cross-workflow artifact access issues
- Add release jobs (create-release, upload-release-assets, update-latest-version, publish-release) that run only for tag pushes
- Use standard actions/download-artifact@v4 within the same workflow (no cross-workflow limitations)
- Deprecate standalone release.yml workflow with warning job and confirmation requirement
- Remove references to deleted release-notes-template.md file from both workflows
- Update build summary messages to reflect integrated release process

This resolves the 'Prepare release assets' failure by eliminating the need for cross-workflow artifact access.
2025-07-17 07:06:51 +08:00
overtrue
0693cca1a4 fix: resolve workflow_run artifact access issue in release pipeline
- Replace actions/download-artifact@v4 with GitHub API calls to access artifacts from triggering workflow
- Add proper permissions (contents: read, actions: read) to prepare-assets job
- Handle both workflow_run and workflow_dispatch trigger scenarios
- Fix the root cause: workflow_run events cannot access artifacts from triggering workflows using standard download-artifact action

Fixes the 'Prepare release assets' step failure by implementing cross-workflow artifact access through GitHub API.
2025-07-17 06:58:09 +08:00
安正超
0d9f9e381a refactor: use workflow_run trigger for release workflow to eliminate timing issues (#241)
* fix: use correct tag reference in release workflow wait-for-artifacts step

- Change ref from github.ref to needs.release-check.outputs.tag
- Fix issue where wait-on-check-action receives full git reference (refs/tags/1.0.0-alpha.21)
  instead of clean tag name (1.0.0-alpha.21)
- This resolves timeout errors when waiting for build artifacts during release process

Fixes the release workflow failure for tag 1.0.0-alpha.21

* refactor: use workflow_run trigger for release workflow instead of push

- Replace push trigger with workflow_run to eliminate timing issues
- Release workflow now triggers only after Build workflow completes successfully
- Remove wait-for-artifacts step completely (no longer needed)
- Add should_release condition to control release execution
- Support both tag pushes and manual releases via workflow_dispatch
- Align with docker.yml pattern for better reliability

This completely resolves the release workflow timeout issues by ensuring
build artifacts are always available before the release process starts.

Fixes the fundamental timing issue where release.yml and build.yml
were racing against each other when triggered by the same tag push.
2025-07-17 06:48:09 +08:00
安正超
6c7aa5a7ae fix: use correct tag reference in release workflow wait-for-artifacts step (#240)
- Change ref from github.ref to needs.release-check.outputs.tag
- Fix issue where wait-on-check-action receives full git reference (refs/tags/1.0.0-alpha.21)
  instead of clean tag name (1.0.0-alpha.21)
- This resolves timeout errors when waiting for build artifacts during release process

Fixes the release workflow failure for tag 1.0.0-alpha.21
2025-07-17 06:36:57 +08:00
overtrue
a27d935925 wip 2025-07-17 06:31:25 +08:00
141 changed files with 15921 additions and 11993 deletions

View File

@@ -6,10 +6,10 @@ This directory contains Docker configuration files and supporting infrastructure
```
rustfs/
├── Dockerfile # Production image (Alpine + GitHub Releases)
├── Dockerfile.source # Source build (Debian + cross-compilation)
├── cargo.config.toml # Rust cargo configuration
├── Dockerfile # Production image (Alpine + pre-built binaries)
├── Dockerfile.source # Development image (Debian + source build)
├── docker-buildx.sh # Multi-architecture build script
├── Makefile # Build automation with simplified commands
└── .docker/ # Supporting infrastructure
├── observability/ # Monitoring and observability configs
├── compose/ # Docker Compose configurations
@@ -64,7 +64,11 @@ docker run rustfs/rustfs:main-latest # Main branch latest
### Development Environment
```bash
# Start development container
# Quick setup using Makefile (recommended)
make docker-dev-local # Build development image locally
make dev-env-start # Start development container
# Manual Docker commands
docker run -it -v $(pwd):/workspace -p 9000:9000 rustfs/rustfs:latest-dev
# Build from source locally
@@ -76,9 +80,33 @@ docker-compose up rustfs-dev
## 🏗️ Build Arguments and Scripts
### Using docker-buildx.sh (Recommended)
### Using Makefile Commands (Recommended)
For multi-architecture builds, use the provided script:
The easiest way to build images using simplified commands:
```bash
# Development images (build from source)
make docker-dev-local # Build for local use (single arch)
make docker-dev # Build multi-arch (for CI/CD)
make docker-dev-push REGISTRY=xxx # Build and push to registry
# Production images (using pre-built binaries)
make docker-buildx # Build multi-arch production images
make docker-buildx-push # Build and push production images
make docker-buildx-version VERSION=v1.0.0 # Build specific version
# Development environment
make dev-env-start # Start development container
make dev-env-stop # Stop development container
make dev-env-restart # Restart development container
# Help
make help-docker # Show all Docker-related commands
```
### Using docker-buildx.sh (Advanced)
For direct script usage and advanced scenarios:
```bash
# Build latest version for all architectures
@@ -147,17 +175,51 @@ Architecture is automatically detected during build using Docker's `TARGETARCH`
## 🛠️ Development Workflow
For local development and testing:
### Quick Start with Makefile (Recommended)
```bash
# Quick development setup
docker-compose up rustfs-dev
# 1. Start development environment
make dev-env-start
# Custom source build
docker build -f Dockerfile.source -t rustfs:custom .
# 2. Your development container is now running with:
# - Port 9000 exposed for RustFS
# - Port 9010 exposed for admin console
# - Current directory mounted as /workspace
# 3. Stop when done
make dev-env-stop
```
### Manual Development Setup
```bash
# Build development image from source
make docker-dev-local
# Or use traditional Docker commands
docker build -f Dockerfile.source -t rustfs:dev .
# Run with development tools
docker run -it -v $(pwd):/workspace rustfs:custom bash
docker run -it -v $(pwd):/workspace -p 9000:9000 rustfs:dev bash
# Or use docker-compose for complex setups
docker-compose up rustfs-dev
```
### Common Development Tasks
```bash
# Build and test locally
make build # Build binary natively
make docker-dev-local # Build development Docker image
make test # Run tests
make fmt # Format code
make clippy # Run linter
# Get help
make help # General help
make help-docker # Docker-specific help
make help-build # Build-specific help
```
## 🚀 CI/CD Integration

View File

@@ -1,40 +0,0 @@
FROM alpine:3.18
ENV LANG C.UTF-8
# Install base dependencies
RUN apk add --no-cache \
wget \
git \
curl \
unzip \
gcc \
musl-dev \
pkgconfig \
openssl-dev \
dbus-dev \
wayland-dev \
webkit2gtk-4.1-dev \
build-base \
linux-headers
# install protoc
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v30.2/protoc-30.2-linux-x86_64.zip \
&& unzip protoc-30.2-linux-x86_64.zip -d protoc3 \
&& mv protoc3/bin/* /usr/local/bin/ && chmod +x /usr/local/bin/protoc \
&& mv protoc3/include/* /usr/local/include/ && rm -rf protoc-30.2-linux-x86_64.zip protoc3
# install flatc
RUN wget https://github.com/google/flatbuffers/releases/download/v24.3.25/Linux.flatc.binary.g++-13.zip \
&& unzip Linux.flatc.binary.g++-13.zip \
&& mv flatc /usr/local/bin/ && chmod +x /usr/local/bin/flatc && rm -rf Linux.flatc.binary.g++-13.zip
# install rust
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
# Set PATH for rust
ENV PATH="/root/.cargo/bin:${PATH}"
COPY .docker/cargo.config.toml /root/.cargo/config.toml
WORKDIR /root/rustfs

View File

@@ -27,7 +27,7 @@ services:
ports:
- "9000:9000" # Map port 9001 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"
node1:
@@ -44,7 +44,7 @@ services:
ports:
- "9001:9000" # Map port 9002 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"
node2:
@@ -61,7 +61,7 @@ services:
ports:
- "9002:9000" # Map port 9003 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"
node3:
@@ -78,5 +78,5 @@ services:
ports:
- "9003:9000" # Map port 9004 of the host to port 9000 of the container
volumes:
- ../../target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
- ../../target/x86_64-unknown-linux-gnu/release/rustfs:/app/rustfs
command: "/app/rustfs"

View File

@@ -172,6 +172,14 @@ jobs:
target: aarch64-unknown-linux-musl
cross: true
platform: linux
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
cross: false
platform: linux
- os: ubuntu-latest
target: aarch64-unknown-linux-gnu
cross: true
platform: linux
# macOS builds
- os: macos-latest
target: aarch64-apple-darwin
@@ -181,15 +189,15 @@ jobs:
target: x86_64-apple-darwin
cross: false
platform: macos
# # Windows builds (temporarily disabled)
# - os: windows-latest
# target: x86_64-pc-windows-msvc
# cross: false
# platform: windows
# - os: windows-latest
# target: aarch64-pc-windows-msvc
# cross: true
# platform: windows
# Windows builds (temporarily disabled)
- os: windows-latest
target: x86_64-pc-windows-msvc
cross: false
platform: windows
#- os: windows-latest
# target: aarch64-pc-windows-msvc
# cross: true
# platform: windows
steps:
- name: Checkout repository
uses: actions/checkout@v4
@@ -207,6 +215,7 @@ jobs:
install-cross-tools: ${{ matrix.cross }}
- name: Download static console assets
shell: bash
run: |
mkdir -p ./rustfs/static
if [[ "${{ matrix.platform }}" == "windows" ]]; then
@@ -232,6 +241,7 @@ jobs:
fi
- name: Build RustFS
shell: bash
run: |
# Force rebuild by touching build.rs
touch rustfs/build.rs
@@ -260,30 +270,55 @@ jobs:
# Extract platform and arch from target
TARGET="${{ matrix.target }}"
PLATFORM="${{ matrix.platform }}"
# Map target to architecture
# Map target to architecture and variant
case "$TARGET" in
*x86_64*musl*)
ARCH="x86_64"
VARIANT="musl"
;;
*x86_64*gnu*)
ARCH="x86_64"
VARIANT="gnu"
;;
*x86_64*)
ARCH="x86_64"
VARIANT=""
;;
*aarch64*musl*|*arm64*musl*)
ARCH="aarch64"
VARIANT="musl"
;;
*aarch64*gnu*|*arm64*gnu*)
ARCH="aarch64"
VARIANT="gnu"
;;
*aarch64*|*arm64*)
ARCH="aarch64"
VARIANT=""
;;
*armv7*)
ARCH="armv7"
VARIANT=""
;;
*)
ARCH="unknown"
VARIANT=""
;;
esac
# Generate package name based on build type
if [[ "$BUILD_TYPE" == "development" ]]; then
# Development build: rustfs-${platform}-${arch}-dev-${short_sha}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH}-dev-${SHORT_SHA}"
if [[ -n "$VARIANT" ]]; then
ARCH_WITH_VARIANT="${ARCH}-${VARIANT}"
else
# Release/Prerelease build: rustfs-${platform}-${arch}-v${version}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH}-v${VERSION}"
ARCH_WITH_VARIANT="${ARCH}"
fi
if [[ "$BUILD_TYPE" == "development" ]]; then
# Development build: rustfs-${platform}-${arch}-${variant}-dev-${short_sha}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH_WITH_VARIANT}-dev-${SHORT_SHA}"
else
# Release/Prerelease build: rustfs-${platform}-${arch}-${variant}-v${version}.zip
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH_WITH_VARIANT}-v${VERSION}"
fi
# Create zip packages for all platforms
@@ -295,23 +330,119 @@ jobs:
fi
cd target/${{ matrix.target }}/release
zip "../../../${PACKAGE_NAME}.zip" rustfs
# Determine the binary name based on platform
if [[ "${{ matrix.platform }}" == "windows" ]]; then
BINARY_NAME="rustfs.exe"
else
BINARY_NAME="rustfs"
fi
# Verify the binary exists before packaging
if [[ ! -f "$BINARY_NAME" ]]; then
echo "❌ Binary $BINARY_NAME not found in $(pwd)"
if [[ "${{ matrix.platform }}" == "windows" ]]; then
dir
else
ls -la
fi
exit 1
fi
# Universal packaging function
package_zip() {
local src=$1
local dst=$2
if [[ "${{ matrix.platform }}" == "windows" ]]; then
# Windows uses PowerShell Compress-Archive
powershell -Command "Compress-Archive -Path '$src' -DestinationPath '$dst' -Force"
elif command -v zip &> /dev/null; then
# Unix systems use zip command
zip "$dst" "$src"
else
echo "❌ No zip utility available"
exit 1
fi
}
# Create the zip package
echo "Start packaging: $BINARY_NAME -> ../../../${PACKAGE_NAME}.zip"
package_zip "$BINARY_NAME" "../../../${PACKAGE_NAME}.zip"
cd ../../..
# Verify the package was created
if [[ -f "${PACKAGE_NAME}.zip" ]]; then
echo "✅ Package created successfully: ${PACKAGE_NAME}.zip"
if [[ "${{ matrix.platform }}" == "windows" ]]; then
dir
else
ls -lh ${PACKAGE_NAME}.zip
fi
else
echo "❌ Failed to create package: ${PACKAGE_NAME}.zip"
exit 1
fi
# Create latest version files right after the main package
LATEST_FILES=""
if [[ "$BUILD_TYPE" == "release" ]] || [[ "$BUILD_TYPE" == "prerelease" ]]; then
# Create latest version filename
# Convert from rustfs-linux-x86_64-musl-v1.0.0 to rustfs-linux-x86_64-musl-latest
LATEST_FILE="${PACKAGE_NAME%-v*}-latest.zip"
echo "🔄 Creating latest version: ${PACKAGE_NAME}.zip -> $LATEST_FILE"
cp "${PACKAGE_NAME}.zip" "$LATEST_FILE"
if [[ -f "$LATEST_FILE" ]]; then
echo "✅ Latest version created: $LATEST_FILE"
LATEST_FILES="$LATEST_FILE"
fi
elif [[ "$BUILD_TYPE" == "development" ]]; then
# Development builds (only main branch triggers development builds)
# Create main-latest version filename
# Convert from rustfs-linux-x86_64-dev-abc123 to rustfs-linux-x86_64-main-latest
MAIN_LATEST_FILE="${PACKAGE_NAME%-dev-*}-main-latest.zip"
echo "🔄 Creating main-latest version: ${PACKAGE_NAME}.zip -> $MAIN_LATEST_FILE"
cp "${PACKAGE_NAME}.zip" "$MAIN_LATEST_FILE"
if [[ -f "$MAIN_LATEST_FILE" ]]; then
echo "✅ Main-latest version created: $MAIN_LATEST_FILE"
LATEST_FILES="$MAIN_LATEST_FILE"
# Also create a generic main-latest for Docker builds (Linux only)
if [[ "${{ matrix.platform }}" == "linux" ]]; then
DOCKER_MAIN_LATEST_FILE="rustfs-linux-${ARCH_WITH_VARIANT}-main-latest.zip"
echo "🔄 Creating Docker main-latest version: ${PACKAGE_NAME}.zip -> $DOCKER_MAIN_LATEST_FILE"
cp "${PACKAGE_NAME}.zip" "$DOCKER_MAIN_LATEST_FILE"
if [[ -f "$DOCKER_MAIN_LATEST_FILE" ]]; then
echo "✅ Docker main-latest version created: $DOCKER_MAIN_LATEST_FILE"
LATEST_FILES="$LATEST_FILES $DOCKER_MAIN_LATEST_FILE"
fi
fi
fi
fi
echo "package_name=${PACKAGE_NAME}" >> $GITHUB_OUTPUT
echo "package_file=${PACKAGE_NAME}.zip" >> $GITHUB_OUTPUT
echo "latest_files=${LATEST_FILES}" >> $GITHUB_OUTPUT
echo "build_type=${BUILD_TYPE}" >> $GITHUB_OUTPUT
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "📦 Package created: ${PACKAGE_NAME}.zip"
if [[ -n "$LATEST_FILES" ]]; then
echo "📦 Latest files created: $LATEST_FILES"
fi
echo "🔧 Build type: ${BUILD_TYPE}"
echo "📊 Version: ${VERSION}"
- name: Upload artifacts
- name: Upload to GitHub artifacts
uses: actions/upload-artifact@v4
with:
name: ${{ steps.package.outputs.package_name }}
path: ${{ steps.package.outputs.package_file }}
path: "rustfs-*.zip"
retention-days: ${{ startsWith(github.ref, 'refs/tags/') && 30 || 7 }}
- name: Upload to Aliyun OSS
@@ -321,6 +452,7 @@ jobs:
OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }}
OSS_REGION: cn-beijing
OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com
shell: bash
run: |
BUILD_TYPE="${{ needs.build-check.outputs.build_type }}"
@@ -359,6 +491,16 @@ jobs:
chmod +x /usr/local/bin/ossutil
OSSUTIL_BIN=ossutil
;;
windows)
OSSUTIL_ZIP="ossutil-${OSSUTIL_VERSION}-windows-amd64.zip"
OSSUTIL_DIR="ossutil-${OSSUTIL_VERSION}-windows-amd64"
curl -o "$OSSUTIL_ZIP" "https://gosspublic.alicdn.com/ossutil/v2/${OSSUTIL_VERSION}/${OSSUTIL_ZIP}"
unzip "$OSSUTIL_ZIP"
mv "${OSSUTIL_DIR}/ossutil.exe" ./ossutil.exe
rm -rf "$OSSUTIL_DIR" "$OSSUTIL_ZIP"
OSSUTIL_BIN=./ossutil.exe
;;
esac
# Determine upload path based on build type
@@ -370,72 +512,15 @@ jobs:
echo "📤 Uploading release build to OSS release directory"
fi
# Upload the package file to OSS
echo "Uploading ${{ steps.package.outputs.package_file }} to $OSS_PATH..."
$OSSUTIL_BIN cp "${{ steps.package.outputs.package_file }}" "$OSS_PATH" --force
# For release and prerelease builds, also create a latest version
if [[ "$BUILD_TYPE" == "release" ]] || [[ "$BUILD_TYPE" == "prerelease" ]]; then
# Extract platform and arch from package name
PACKAGE_NAME="${{ steps.package.outputs.package_name }}"
# Create latest version filename
# Convert from rustfs-linux-x86_64-v1.0.0 to rustfs-linux-x86_64-latest
LATEST_FILE="${PACKAGE_NAME%-v*}-latest.zip"
# Copy the original file to latest version
cp "${{ steps.package.outputs.package_file }}" "$LATEST_FILE"
# Upload the latest version
echo "Uploading latest version: $LATEST_FILE to $OSS_PATH..."
$OSSUTIL_BIN cp "$LATEST_FILE" "$OSS_PATH" --force
echo "✅ Latest version uploaded: $LATEST_FILE"
fi
# For development builds, create dev-latest version
if [[ "$BUILD_TYPE" == "development" ]]; then
# Extract platform and arch from package name
PACKAGE_NAME="${{ steps.package.outputs.package_name }}"
# Create dev-latest version filename
# Convert from rustfs-linux-x86_64-dev-abc123 to rustfs-linux-x86_64-dev-latest
DEV_LATEST_FILE="${PACKAGE_NAME%-*}-latest.zip"
# Copy the original file to dev-latest version
cp "${{ steps.package.outputs.package_file }}" "$DEV_LATEST_FILE"
# Upload the dev-latest version
echo "Uploading dev-latest version: $DEV_LATEST_FILE to $OSS_PATH..."
$OSSUTIL_BIN cp "$DEV_LATEST_FILE" "$OSS_PATH" --force
echo "✅ Dev-latest version uploaded: $DEV_LATEST_FILE"
# For main branch builds, also create a main-latest version
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
# Create main-latest version filename
# Convert from rustfs-linux-x86_64-dev-abc123 to rustfs-linux-x86_64-main-latest
MAIN_LATEST_FILE="${PACKAGE_NAME%-dev-*}-main-latest.zip"
# Copy the original file to main-latest version
cp "${{ steps.package.outputs.package_file }}" "$MAIN_LATEST_FILE"
# Upload the main-latest version
echo "Uploading main-latest version: $MAIN_LATEST_FILE to $OSS_PATH..."
$OSSUTIL_BIN cp "$MAIN_LATEST_FILE" "$OSS_PATH" --force
echo "✅ Main-latest version uploaded: $MAIN_LATEST_FILE"
# Also create a generic main-latest for Docker builds
if [[ "${{ matrix.platform }}" == "linux" ]]; then
DOCKER_MAIN_LATEST_FILE="rustfs-linux-${{ matrix.target == 'x86_64-unknown-linux-musl' && 'x86_64' || 'aarch64' }}-main-latest.zip"
cp "${{ steps.package.outputs.package_file }}" "$DOCKER_MAIN_LATEST_FILE"
$OSSUTIL_BIN cp "$DOCKER_MAIN_LATEST_FILE" "$OSS_PATH" --force
echo "✅ Docker main-latest version uploaded: $DOCKER_MAIN_LATEST_FILE"
fi
# Upload all rustfs zip files to OSS using glob pattern
echo "📤 Uploading all rustfs-*.zip files to $OSS_PATH..."
for zip_file in rustfs-*.zip; do
if [[ -f "$zip_file" ]]; then
echo "Uploading: $zip_file to $OSS_PATH..."
$OSSUTIL_BIN cp "$zip_file" "$OSS_PATH" --force
echo "✅ Uploaded: $zip_file"
fi
fi
done
echo "✅ Upload completed successfully"
@@ -447,6 +532,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Build completion summary
shell: bash
run: |
BUILD_TYPE="${{ needs.build-check.outputs.build_type }}"
VERSION="${{ needs.build-check.outputs.version }}"
@@ -471,12 +557,12 @@ jobs:
"release")
echo "🚀 Release build artifacts have been uploaded to OSS release directory"
echo "✅ This build is ready for production use"
echo "🏷️ GitHub Release will be created automatically by the release workflow"
echo "🏷️ GitHub Release will be created in this workflow"
;;
"prerelease")
echo "🧪 Prerelease build artifacts have been uploaded to OSS release directory"
echo "⚠️ This is a prerelease build - use with caution"
echo "🏷️ GitHub Release will be created automatically by the release workflow"
echo "🏷️ GitHub Release will be created in this workflow"
;;
esac
@@ -489,3 +575,273 @@ jobs:
else
echo "❌ Docker image build will be skipped due to build failure"
fi
# Create GitHub Release (only for tag pushes)
create-release:
name: Create GitHub Release
needs: [build-check, build-rustfs]
if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development'
runs-on: ubuntu-latest
permissions:
contents: write
outputs:
release_id: ${{ steps.create.outputs.release_id }}
release_url: ${{ steps.create.outputs.release_url }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Create GitHub Release
id: create
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
TAG="${{ needs.build-check.outputs.version }}"
VERSION="${{ needs.build-check.outputs.version }}"
IS_PRERELEASE="${{ needs.build-check.outputs.is_prerelease }}"
BUILD_TYPE="${{ needs.build-check.outputs.build_type }}"
# Determine release type for title
if [[ "$BUILD_TYPE" == "prerelease" ]]; then
if [[ "$TAG" == *"alpha"* ]]; then
RELEASE_TYPE="alpha"
elif [[ "$TAG" == *"beta"* ]]; then
RELEASE_TYPE="beta"
elif [[ "$TAG" == *"rc"* ]]; then
RELEASE_TYPE="rc"
else
RELEASE_TYPE="prerelease"
fi
else
RELEASE_TYPE="release"
fi
# Check if release already exists
if gh release view "$TAG" >/dev/null 2>&1; then
echo "Release $TAG already exists"
RELEASE_ID=$(gh release view "$TAG" --json databaseId --jq '.databaseId')
RELEASE_URL=$(gh release view "$TAG" --json url --jq '.url')
else
# Get release notes from tag message
RELEASE_NOTES=$(git tag -l --format='%(contents)' "${TAG}")
if [[ -z "$RELEASE_NOTES" || "$RELEASE_NOTES" =~ ^[[:space:]]*$ ]]; then
if [[ "$IS_PRERELEASE" == "true" ]]; then
RELEASE_NOTES="Pre-release ${VERSION} (${RELEASE_TYPE})"
else
RELEASE_NOTES="Release ${VERSION}"
fi
fi
# Create release title
if [[ "$IS_PRERELEASE" == "true" ]]; then
TITLE="RustFS $VERSION (${RELEASE_TYPE})"
else
TITLE="RustFS $VERSION"
fi
# Create the release
PRERELEASE_FLAG=""
if [[ "$IS_PRERELEASE" == "true" ]]; then
PRERELEASE_FLAG="--prerelease"
fi
gh release create "$TAG" \
--title "$TITLE" \
--notes "$RELEASE_NOTES" \
$PRERELEASE_FLAG \
--draft
RELEASE_ID=$(gh release view "$TAG" --json databaseId --jq '.databaseId')
RELEASE_URL=$(gh release view "$TAG" --json url --jq '.url')
fi
echo "release_id=$RELEASE_ID" >> $GITHUB_OUTPUT
echo "release_url=$RELEASE_URL" >> $GITHUB_OUTPUT
echo "Created release: $RELEASE_URL"
# Prepare and upload release assets
upload-release-assets:
name: Upload Release Assets
needs: [build-check, build-rustfs, create-release]
if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development'
runs-on: ubuntu-latest
permissions:
contents: write
actions: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download all build artifacts
uses: actions/download-artifact@v4
with:
path: ./artifacts
pattern: rustfs-*
merge-multiple: true
- name: Prepare release assets
id: prepare
shell: bash
run: |
VERSION="${{ needs.build-check.outputs.version }}"
TAG="${{ needs.build-check.outputs.version }}"
mkdir -p ./release-assets
# Copy and verify artifacts (including latest files created during build)
ASSETS_COUNT=0
for file in ./artifacts/*.zip; do
if [[ -f "$file" ]]; then
cp "$file" ./release-assets/
ASSETS_COUNT=$((ASSETS_COUNT + 1))
fi
done
if [[ $ASSETS_COUNT -eq 0 ]]; then
echo "❌ No artifacts found!"
exit 1
fi
cd ./release-assets
# Generate checksums for all files (including latest versions)
if ls *.zip >/dev/null 2>&1; then
sha256sum *.zip > SHA256SUMS
sha512sum *.zip > SHA512SUMS
fi
# Create signature placeholder files
for file in *.zip; do
echo "# Signature for $file" > "${file}.asc"
echo "# GPG signature will be added in future versions" >> "${file}.asc"
done
echo "📦 Prepared assets:"
ls -la
echo "🔢 Total asset count: $ASSETS_COUNT"
- name: Upload to GitHub Release
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
TAG="${{ needs.build-check.outputs.version }}"
cd ./release-assets
# Upload all files
for file in *; do
if [[ -f "$file" ]]; then
echo "📤 Uploading $file..."
gh release upload "$TAG" "$file" --clobber
fi
done
echo "✅ All assets uploaded successfully"
# Update latest.json for stable releases only
update-latest-version:
name: Update Latest Version
needs: [build-check, upload-release-assets]
if: startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
steps:
- name: Update latest.json
env:
OSS_ACCESS_KEY_ID: ${{ secrets.ALICLOUDOSS_KEY_ID }}
OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }}
OSS_REGION: cn-beijing
OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com
shell: bash
run: |
if [[ -z "$OSS_ACCESS_KEY_ID" ]]; then
echo "⚠️ OSS credentials not available, skipping latest.json update"
exit 0
fi
VERSION="${{ needs.build-check.outputs.version }}"
TAG="${{ needs.build-check.outputs.version }}"
# Install ossutil
OSSUTIL_VERSION="2.1.1"
OSSUTIL_ZIP="ossutil-${OSSUTIL_VERSION}-linux-amd64.zip"
OSSUTIL_DIR="ossutil-${OSSUTIL_VERSION}-linux-amd64"
curl -o "$OSSUTIL_ZIP" "https://gosspublic.alicdn.com/ossutil/v2/${OSSUTIL_VERSION}/${OSSUTIL_ZIP}"
unzip "$OSSUTIL_ZIP"
mv "${OSSUTIL_DIR}/ossutil" /usr/local/bin/
rm -rf "$OSSUTIL_DIR" "$OSSUTIL_ZIP"
chmod +x /usr/local/bin/ossutil
# Create latest.json
cat > latest.json << EOF
{
"version": "${VERSION}",
"tag": "${TAG}",
"release_date": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"release_type": "stable",
"download_url": "https://github.com/${{ github.repository }}/releases/tag/${TAG}"
}
EOF
# Upload to OSS
ossutil cp latest.json oss://rustfs-version/latest.json --force
echo "✅ Updated latest.json for stable release $VERSION"
# Publish release (remove draft status)
publish-release:
name: Publish Release
needs: [build-check, create-release, upload-release-assets]
if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Update release notes and publish
env:
GH_TOKEN: ${{ github.token }}
shell: bash
run: |
TAG="${{ needs.build-check.outputs.version }}"
VERSION="${{ needs.build-check.outputs.version }}"
IS_PRERELEASE="${{ needs.build-check.outputs.is_prerelease }}"
BUILD_TYPE="${{ needs.build-check.outputs.build_type }}"
# Determine release type
if [[ "$BUILD_TYPE" == "prerelease" ]]; then
if [[ "$TAG" == *"alpha"* ]]; then
RELEASE_TYPE="alpha"
elif [[ "$TAG" == *"beta"* ]]; then
RELEASE_TYPE="beta"
elif [[ "$TAG" == *"rc"* ]]; then
RELEASE_TYPE="rc"
else
RELEASE_TYPE="prerelease"
fi
else
RELEASE_TYPE="release"
fi
# Get original release notes from tag
ORIGINAL_NOTES=$(git tag -l --format='%(contents)' "${TAG}")
if [[ -z "$ORIGINAL_NOTES" || "$ORIGINAL_NOTES" =~ ^[[:space:]]*$ ]]; then
if [[ "$IS_PRERELEASE" == "true" ]]; then
ORIGINAL_NOTES="Pre-release ${VERSION} (${RELEASE_TYPE})"
else
ORIGINAL_NOTES="Release ${VERSION}"
fi
fi
# Publish the release (remove draft status)
gh release edit "$TAG" --draft=false
echo "🎉 Released $TAG successfully!"
echo "📄 Release URL: ${{ needs.create-release.outputs.release_url }}"

View File

@@ -83,6 +83,16 @@ jobs:
# Never skip release events and tag pushes
do_not_skip: '["workflow_dispatch", "schedule", "merge_group", "release", "push"]'
typos:
name: Typos
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: Typos check with custom config file
uses: crate-ci/typos@master
test-and-lint:
name: Test and Lint
needs: skip-check

View File

@@ -38,7 +38,6 @@ on:
workflow_run:
workflows: ["Build and Release"]
types: [completed]
branches: [main]
# Manual trigger with same parameters for consistency
workflow_dispatch:
inputs:
@@ -83,6 +82,8 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
# For workflow_run events, checkout the specific commit that triggered the workflow
ref: ${{ github.event.workflow_run.head_sha || github.sha }}
- name: Check build conditions
id: check
@@ -114,32 +115,62 @@ jobs:
# Use Git to generate consistent short SHA (ensures uniqueness like build.yml)
short_sha=$(git rev-parse --short "${{ github.event.workflow_run.head_sha }}")
# Determine build type based on branch and commit
if [[ "${{ github.event.workflow_run.head_branch }}" == "main" ]]; then
build_type="development"
version="dev-${short_sha}"
# Skip Docker build for development builds
should_build=false
echo "⏭️ Skipping Docker build for development version (main branch)"
elif [[ "${{ github.event.workflow_run.event }}" == "push" ]] && [[ "${{ github.event.workflow_run.head_branch }}" =~ ^refs/tags/ ]]; then
# Tag push - only build for releases and prereleases
tag_name="${{ github.event.workflow_run.head_branch }}"
version="${tag_name#refs/tags/}"
if [[ "$version" == *"alpha"* ]] || [[ "$version" == *"beta"* ]] || [[ "$version" == *"rc"* ]]; then
build_type="prerelease"
is_prerelease=true
echo "🧪 Building Docker image for prerelease: $version"
# Determine build type based on triggering workflow event and ref
triggering_event="${{ github.event.workflow_run.event }}"
head_branch="${{ github.event.workflow_run.head_branch }}"
echo "🔍 Analyzing triggering workflow:"
echo " 📋 Event: $triggering_event"
echo " 🌿 Head branch: $head_branch"
echo " 📎 Head SHA: ${{ github.event.workflow_run.head_sha }}"
# Check if this was triggered by a tag push
if [[ "$triggering_event" == "push" ]]; then
# For tag pushes, head_branch will be like "refs/tags/v1.0.0" or just "v1.0.0"
if [[ "$head_branch" == refs/tags/* ]]; then
# Extract tag name from refs/tags/TAG_NAME
tag_name="${head_branch#refs/tags/}"
version="$tag_name"
elif [[ "$head_branch" =~ ^v?[0-9]+\.[0-9]+\.[0-9]+ ]]; then
# Direct tag name like "v1.0.0" or "1.0.0-alpha.1"
version="$head_branch"
elif [[ "$head_branch" == "main" ]]; then
# Regular branch push to main
build_type="development"
version="dev-${short_sha}"
should_build=false
echo "⏭️ Skipping Docker build for development version (main branch push)"
else
build_type="release"
create_latest=true
echo "🚀 Building Docker image for release: $version"
# Other branch push
build_type="development"
version="dev-${short_sha}"
should_build=false
echo "⏭️ Skipping Docker build for development version (branch: $head_branch)"
fi
# If we extracted a version (tag), determine release type
if [[ -n "$version" ]] && [[ "$version" != "dev-${short_sha}" ]]; then
# Remove 'v' prefix if present for consistent version format
if [[ "$version" == v* ]]; then
version="${version#v}"
fi
if [[ "$version" == *"alpha"* ]] || [[ "$version" == *"beta"* ]] || [[ "$version" == *"rc"* ]]; then
build_type="prerelease"
is_prerelease=true
echo "🧪 Building Docker image for prerelease: $version"
else
build_type="release"
create_latest=true
echo "🚀 Building Docker image for release: $version"
fi
fi
else
# Non-push events
build_type="development"
version="dev-${short_sha}"
# Skip Docker build for development builds
should_build=false
echo "⏭️ Skipping Docker build for development version"
echo "⏭️ Skipping Docker build for development version (event: $triggering_event)"
fi
echo "🔄 Build triggered by workflow_run:"
@@ -169,21 +200,23 @@ jobs:
create_latest=true
echo "🚀 Building with latest stable release version"
;;
v[0-9]*)
build_type="release"
create_latest=true
echo "📦 Building with specific release version: $input_version"
;;
v*alpha*|v*beta*|v*rc*)
# Prerelease versions (must match first, more specific)
v*alpha*|v*beta*|v*rc*|*alpha*|*beta*|*rc*)
build_type="prerelease"
is_prerelease=true
echo "🧪 Building with prerelease version: $input_version"
;;
# Release versions (match after prereleases, more general)
v[0-9]*|[0-9]*.*.*)
build_type="release"
create_latest=true
echo "📦 Building with specific release version: $input_version"
;;
*)
# Invalid version for Docker build
should_build=false
echo "❌ Invalid version for Docker build: $input_version"
echo "⚠️ Only release versions (latest, v1.0.0) and prereleases (v1.0.0-alpha1) are supported"
echo "⚠️ Only release versions (latest, v1.0.0, 1.0.0) and prereleases (v1.0.0-alpha1, 1.0.0-beta2) are supported"
;;
esac
fi

View File

@@ -1,78 +0,0 @@
## RustFS ${VERSION_CLEAN}
${ORIGINAL_NOTES}
---
### 🚀 Quick Download
**Linux (Static Binaries - No Dependencies):**
```bash
# x86_64 (Intel/AMD)
curl -LO https://github.com/rustfs/rustfs/releases/download/${VERSION}/rustfs-x86_64-unknown-linux-musl.zip
unzip rustfs-x86_64-unknown-linux-musl.zip
sudo mv rustfs /usr/local/bin/
# ARM64 (Graviton, Apple Silicon VMs)
curl -LO https://github.com/rustfs/rustfs/releases/download/${VERSION}/rustfs-aarch64-unknown-linux-musl.zip
unzip rustfs-aarch64-unknown-linux-musl.zip
sudo mv rustfs /usr/local/bin/
```
**macOS:**
```bash
# Apple Silicon (M1/M2/M3)
curl -LO https://github.com/rustfs/rustfs/releases/download/${VERSION}/rustfs-aarch64-apple-darwin.zip
unzip rustfs-aarch64-apple-darwin.zip
sudo mv rustfs /usr/local/bin/
# Intel
curl -LO https://github.com/rustfs/rustfs/releases/download/${VERSION}/rustfs-x86_64-apple-darwin.zip
unzip rustfs-x86_64-apple-darwin.zip
sudo mv rustfs /usr/local/bin/
```
### 📁 Available Downloads
| Platform | Architecture | File | Description |
|----------|-------------|------|-------------|
| Linux | x86_64 | `rustfs-x86_64-unknown-linux-musl.zip` | Static binary, no dependencies |
| Linux | ARM64 | `rustfs-aarch64-unknown-linux-musl.zip` | Static binary, no dependencies |
| macOS | Apple Silicon | `rustfs-aarch64-apple-darwin.zip` | Native binary, ZIP archive |
| macOS | Intel | `rustfs-x86_64-apple-darwin.zip` | Native binary, ZIP archive |
### 🔐 Verification
Download checksums and verify your download:
```bash
# Download checksums
curl -LO https://github.com/rustfs/rustfs/releases/download/${VERSION}/SHA256SUMS
# Verify (Linux)
sha256sum -c SHA256SUMS --ignore-missing
# Verify (macOS)
shasum -a 256 -c SHA256SUMS --ignore-missing
```
### 🛠️ System Requirements
- **Linux**: Any distribution with glibc 2.17+ (CentOS 7+, Ubuntu 16.04+)
- **macOS**: 10.15+ (Catalina or later)
- **Windows**: Windows 10 version 1809 or later
### 📚 Documentation
- [Installation Guide](https://github.com/rustfs/rustfs#installation)
- [Quick Start](https://github.com/rustfs/rustfs#quick-start)
- [Configuration](https://github.com/rustfs/rustfs/blob/main/docs/)
- [API Documentation](https://docs.rs/rustfs)
### 🆘 Support
- 🐛 [Report Issues](https://github.com/rustfs/rustfs/issues)
- 💬 [Community Discussions](https://github.com/rustfs/rustfs/discussions)
- 📖 [Documentation](https://github.com/rustfs/rustfs/tree/main/docs)

View File

@@ -1,353 +0,0 @@
# Copyright 2024 RustFS Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Release
on:
push:
tags: ["*.*.*"]
workflow_dispatch:
inputs:
tag:
description: "Tag to create release for"
required: true
type: string
env:
CARGO_TERM_COLOR: always
jobs:
# Determine release type
release-check:
name: Release Type Check
runs-on: ubuntu-latest
outputs:
tag: ${{ steps.check.outputs.tag }}
version: ${{ steps.check.outputs.version }}
is_prerelease: ${{ steps.check.outputs.is_prerelease }}
release_type: ${{ steps.check.outputs.release_type }}
steps:
- name: Determine release type
id: check
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAG="${{ github.event.inputs.tag }}"
else
TAG="${GITHUB_REF#refs/tags/}"
fi
VERSION="${TAG}"
# Check if this is a prerelease
IS_PRERELEASE=false
RELEASE_TYPE="release"
if [[ "$TAG" == *"alpha"* ]] || [[ "$TAG" == *"beta"* ]] || [[ "$TAG" == *"rc"* ]]; then
IS_PRERELEASE=true
if [[ "$TAG" == *"alpha"* ]]; then
RELEASE_TYPE="alpha"
elif [[ "$TAG" == *"beta"* ]]; then
RELEASE_TYPE="beta"
elif [[ "$TAG" == *"rc"* ]]; then
RELEASE_TYPE="rc"
fi
fi
echo "tag=$TAG" >> $GITHUB_OUTPUT
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "is_prerelease=$IS_PRERELEASE" >> $GITHUB_OUTPUT
echo "release_type=$RELEASE_TYPE" >> $GITHUB_OUTPUT
echo "📦 Release Type: $RELEASE_TYPE"
echo "🏷️ Tag: $TAG"
echo "🔢 Version: $VERSION"
echo "🚀 Is Prerelease: $IS_PRERELEASE"
# Create GitHub Release
create-release:
name: Create GitHub Release
needs: release-check
runs-on: ubuntu-latest
permissions:
contents: write
outputs:
release_id: ${{ steps.create.outputs.release_id }}
release_url: ${{ steps.create.outputs.release_url }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Create GitHub Release
id: create
env:
GH_TOKEN: ${{ github.token }}
run: |
TAG="${{ needs.release-check.outputs.tag }}"
VERSION="${{ needs.release-check.outputs.version }}"
IS_PRERELEASE="${{ needs.release-check.outputs.is_prerelease }}"
RELEASE_TYPE="${{ needs.release-check.outputs.release_type }}"
# Check if release already exists
if gh release view "$TAG" >/dev/null 2>&1; then
echo "Release $TAG already exists"
RELEASE_ID=$(gh release view "$TAG" --json databaseId --jq '.databaseId')
RELEASE_URL=$(gh release view "$TAG" --json url --jq '.url')
else
# Get release notes from tag message
RELEASE_NOTES=$(git tag -l --format='%(contents)' "${TAG}")
if [[ -z "$RELEASE_NOTES" || "$RELEASE_NOTES" =~ ^[[:space:]]*$ ]]; then
if [[ "$IS_PRERELEASE" == "true" ]]; then
RELEASE_NOTES="Pre-release ${VERSION} (${RELEASE_TYPE})"
else
RELEASE_NOTES="Release ${VERSION}"
fi
fi
# Create release title
if [[ "$IS_PRERELEASE" == "true" ]]; then
TITLE="RustFS $VERSION (${RELEASE_TYPE})"
else
TITLE="RustFS $VERSION"
fi
# Create the release
PRERELEASE_FLAG=""
if [[ "$IS_PRERELEASE" == "true" ]]; then
PRERELEASE_FLAG="--prerelease"
fi
gh release create "$TAG" \
--title "$TITLE" \
--notes "$RELEASE_NOTES" \
$PRERELEASE_FLAG \
--draft
RELEASE_ID=$(gh release view "$TAG" --json databaseId --jq '.databaseId')
RELEASE_URL=$(gh release view "$TAG" --json url --jq '.url')
fi
echo "release_id=$RELEASE_ID" >> $GITHUB_OUTPUT
echo "release_url=$RELEASE_URL" >> $GITHUB_OUTPUT
echo "Created release: $RELEASE_URL"
# Wait for build artifacts from build.yml
wait-for-artifacts:
name: Wait for Build Artifacts
needs: release-check
runs-on: ubuntu-latest
steps:
- name: Wait for build workflow
uses: lewagon/wait-on-check-action@v1.3.1
with:
ref: ${{ needs.release-check.outputs.tag }}
check-name: "Build RustFS"
repo-token: ${{ secrets.GITHUB_TOKEN }}
wait-interval: 30
allowed-conclusions: success
# Download and prepare release assets
prepare-assets:
name: Prepare Release Assets
needs: [release-check, wait-for-artifacts]
runs-on: ubuntu-latest
outputs:
assets_prepared: ${{ steps.prepare.outputs.assets_prepared }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download artifacts from build workflow
uses: actions/download-artifact@v4
with:
path: ./artifacts
pattern: rustfs-*
merge-multiple: true
- name: Prepare release assets
id: prepare
run: |
VERSION="${{ needs.release-check.outputs.version }}"
TAG="${{ needs.release-check.outputs.tag }}"
mkdir -p ./release-assets
# Copy and verify artifacts
ASSETS_COUNT=0
for file in ./artifacts/rustfs-*.zip; do
if [[ -f "$file" ]]; then
cp "$file" ./release-assets/
ASSETS_COUNT=$((ASSETS_COUNT + 1))
fi
done
if [[ $ASSETS_COUNT -eq 0 ]]; then
echo "❌ No artifacts found!"
exit 1
fi
cd ./release-assets
# Generate checksums
if ls *.zip >/dev/null 2>&1; then
sha256sum *.zip > SHA256SUMS
sha512sum *.zip > SHA512SUMS
fi
# TODO: Add GPG signing for signatures
# For now, create placeholder signature files
for file in *.zip; do
echo "# Signature for $file" > "${file}.asc"
echo "# GPG signature will be added in future versions" >> "${file}.asc"
done
echo "assets_prepared=true" >> $GITHUB_OUTPUT
echo "📦 Prepared assets:"
ls -la
echo "🔢 Asset count: $ASSETS_COUNT"
- name: Upload prepared assets
uses: actions/upload-artifact@v4
with:
name: release-assets-${{ needs.release-check.outputs.tag }}
path: ./release-assets/
retention-days: 30
# Upload assets to GitHub Release
upload-assets:
name: Upload Release Assets
needs: [release-check, create-release, prepare-assets]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Download prepared assets
uses: actions/download-artifact@v4
with:
name: release-assets-${{ needs.release-check.outputs.tag }}
path: ./release-assets
- name: Upload to GitHub Release
env:
GH_TOKEN: ${{ github.token }}
run: |
TAG="${{ needs.release-check.outputs.tag }}"
cd ./release-assets
# Upload all files
for file in *; do
if [[ -f "$file" ]]; then
echo "📤 Uploading $file..."
gh release upload "$TAG" "$file" --clobber
fi
done
echo "✅ All assets uploaded successfully"
# Update latest.json for stable releases only
update-latest:
name: Update Latest Version
needs: [release-check, upload-assets]
if: needs.release-check.outputs.is_prerelease == 'false'
runs-on: ubuntu-latest
steps:
- name: Update latest.json
env:
OSS_ACCESS_KEY_ID: ${{ secrets.ALICLOUDOSS_KEY_ID }}
OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }}
run: |
if [[ -z "$OSS_ACCESS_KEY_ID" ]]; then
echo "⚠️ OSS credentials not available, skipping latest.json update"
exit 0
fi
VERSION="${{ needs.release-check.outputs.version }}"
TAG="${{ needs.release-check.outputs.tag }}"
# Install ossutil
OSSUTIL_VERSION="2.1.1"
OSSUTIL_ZIP="ossutil-${OSSUTIL_VERSION}-linux-amd64.zip"
OSSUTIL_DIR="ossutil-${OSSUTIL_VERSION}-linux-amd64"
curl -o "$OSSUTIL_ZIP" "https://gosspublic.alicdn.com/ossutil/v2/${OSSUTIL_VERSION}/${OSSUTIL_ZIP}"
unzip "$OSSUTIL_ZIP"
chmod +x "${OSSUTIL_DIR}/ossutil"
# Create latest.json
cat > latest.json << EOF
{
"version": "${VERSION}",
"tag": "${TAG}",
"release_date": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"release_type": "stable",
"download_url": "https://github.com/${{ github.repository }}/releases/tag/${TAG}"
}
EOF
# Upload to OSS
./${OSSUTIL_DIR}/ossutil cp latest.json oss://rustfs-version/latest.json --force
echo "✅ Updated latest.json for stable release $VERSION"
# Publish release (remove draft status)
publish-release:
name: Publish Release
needs: [release-check, create-release, upload-assets]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Update release notes and publish
env:
GH_TOKEN: ${{ github.token }}
run: |
TAG="${{ needs.release-check.outputs.tag }}"
VERSION="${{ needs.release-check.outputs.version }}"
IS_PRERELEASE="${{ needs.release-check.outputs.is_prerelease }}"
RELEASE_TYPE="${{ needs.release-check.outputs.release_type }}"
# Get original release notes from tag
ORIGINAL_NOTES=$(git tag -l --format='%(contents)' "${TAG}")
if [[ -z "$ORIGINAL_NOTES" || "$ORIGINAL_NOTES" =~ ^[[:space:]]*$ ]]; then
if [[ "$IS_PRERELEASE" == "true" ]]; then
ORIGINAL_NOTES="Pre-release ${VERSION} (${RELEASE_TYPE})"
else
ORIGINAL_NOTES="Release ${VERSION}"
fi
fi
# Use release notes template if available
if [[ -f ".github/workflows/release-notes-template.md" ]]; then
# Substitute variables in template
sed -e "s/\${VERSION}/$TAG/g" \
-e "s/\${VERSION_CLEAN}/$VERSION/g" \
-e "s/\${ORIGINAL_NOTES}/$(echo "$ORIGINAL_NOTES" | sed 's/[[\.*^$()+?{|]/\\&/g')/g" \
.github/workflows/release-notes-template.md > enhanced_notes.md
# Update release notes
gh release edit "$TAG" --notes-file enhanced_notes.md
fi
# Publish the release (remove draft status)
gh release edit "$TAG" --draft=false
echo "🎉 Released $TAG successfully!"
echo "📄 Release URL: ${{ needs.create-release.outputs.release_url }}"

553
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -108,8 +108,8 @@ cfg-if = "1.0.1"
chacha20poly1305 = { version = "0.10.1" }
chrono = { version = "0.4.41", features = ["serde"] }
clap = { version = "4.5.41", features = ["derive", "env"] }
const-str = { version = "0.6.2", features = ["std", "proc"] }
crc32fast = "1.4.2"
const-str = { version = "0.6.3", features = ["std", "proc"] }
crc32fast = "1.5.0"
criterion = { version = "0.5", features = ["html_reports"] }
dashmap = "6.1.0"
datafusion = "46.0.1"
@@ -130,7 +130,7 @@ hex-simd = "0.8.0"
highway = { version = "1.3.0" }
hmac = "0.12.1"
hyper = "1.6.0"
hyper-util = { version = "0.1.15", features = [
hyper-util = { version = "0.1.16", features = [
"tokio",
"server-auto",
"server-graceful",
@@ -148,6 +148,7 @@ keyring = { version = "3.6.2", features = [
] }
lazy_static = "1.5.0"
libsystemd = { version = "0.7.2" }
lru = "0.16"
local-ip-address = "0.6.5"
lz4 = "1.28.1"
matchit = "0.8.4"
@@ -183,7 +184,7 @@ percent-encoding = "2.3.1"
pin-project-lite = "0.2.16"
prost = "0.13.5"
quick-xml = "0.38.0"
rand = "0.9.1"
rand = "0.9.2"
rdkafka = { version = "0.38.0", features = ["tokio"] }
reed-solomon-simd = { version = "3.0.1" }
regex = { version = "1.11.1" }
@@ -196,7 +197,7 @@ reqwest = { version = "0.12.22", default-features = false, features = [
"json",
"blocking",
] }
rfd = { version = "0.15.3", default-features = false, features = [
rfd = { version = "0.15.4", default-features = false, features = [
"xdg-portal",
"tokio",
] }
@@ -213,7 +214,7 @@ rustls-pemfile = "2.2.0"
s3s = { version = "0.12.0-minio-preview.2" }
shadow-rs = { version = "1.2.0", default-features = false }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["raw_value"] }
serde_json = { version = "1.0.141", features = ["raw_value"] }
serde-xml-rs = "0.8.1"
serde_urlencoded = "0.7.1"
sha1 = "0.10.6"
@@ -223,8 +224,8 @@ smallvec = { version = "1.15.1", features = ["serde"] }
snafu = "0.8.6"
snap = "1.1.1"
socket2 = "0.6.0"
strum = { version = "0.27.1", features = ["derive"] }
sysinfo = "0.36.0"
strum = { version = "0.27.2", features = ["derive"] }
sysinfo = "0.36.1"
sysctl = "0.6.0"
tempfile = "3.20.0"
temp-env = "0.3.6"

View File

@@ -1,121 +1,101 @@
# Multi-stage build for RustFS production image
FROM alpine:latest AS build
# Build arguments - use TARGETPLATFORM for consistency with Dockerfile.source
ARG TARGETPLATFORM
ARG BUILDPLATFORM
# Build stage: Download and extract RustFS binary
FROM alpine:3.22 AS build
# Build arguments for platform and release
ARG TARGETARCH
ARG RELEASE=latest
# Install dependencies for downloading and verifying binaries
RUN apk add --no-cache \
ca-certificates \
curl \
bash \
wget \
unzip \
jq
# Install minimal dependencies for downloading and extracting
RUN apk add --no-cache ca-certificates curl unzip
# Create build directory
WORKDIR /build
# Map TARGETPLATFORM to architecture format used in builds
RUN case "${TARGETPLATFORM}" in \
"linux/amd64") ARCH="x86_64" ;; \
"linux/arm64") ARCH="aarch64" ;; \
*) echo "Unsupported platform: ${TARGETPLATFORM}" && exit 1 ;; \
esac && \
echo "ARCH=${ARCH}" > /build/arch.env
# Download rustfs binary from dl.rustfs.com (release channel only)
RUN . /build/arch.env && \
BASE_URL="https://dl.rustfs.com/artifacts/rustfs/release" && \
PLATFORM="linux" && \
if [ "${RELEASE}" = "latest" ]; then \
# Download latest release version \
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH}-latest.zip"; \
DOWNLOAD_URL="${BASE_URL}/${PACKAGE_NAME}"; \
echo "📥 Downloading latest release build: ${PACKAGE_NAME}"; \
# Set architecture-specific variables
RUN if [ "$TARGETARCH" = "amd64" ]; then \
echo "x86_64-musl" > /tmp/arch; \
elif [ "$TARGETARCH" = "arm64" ]; then \
echo "aarch64-musl" > /tmp/arch; \
else \
# Download specific release version \
PACKAGE_NAME="rustfs-${PLATFORM}-${ARCH}-v${RELEASE}.zip"; \
DOWNLOAD_URL="${BASE_URL}/${PACKAGE_NAME}"; \
echo "📥 Downloading specific release version: ${PACKAGE_NAME}"; \
echo "unsupported" > /tmp/arch; \
fi
RUN ARCH=$(cat /tmp/arch) && \
if [ "$ARCH" = "unsupported" ]; then \
echo "Unsupported architecture: $TARGETARCH" && exit 1; \
fi && \
echo "🔗 Download URL: ${DOWNLOAD_URL}" && \
curl -f -L "${DOWNLOAD_URL}" -o /build/rustfs.zip && \
if [ ! -f /build/rustfs.zip ] || [ ! -s /build/rustfs.zip ]; then \
echo "❌ Failed to download binary package"; \
echo "💡 Make sure the package ${PACKAGE_NAME} exists"; \
echo "🔗 Check: ${DOWNLOAD_URL}"; \
exit 1; \
if [ "${RELEASE}" = "latest" ]; then \
VERSION="latest"; \
else \
VERSION="v${RELEASE#v}"; \
fi && \
unzip /build/rustfs.zip -d /build && \
BASE_URL="https://dl.rustfs.com/artifacts/rustfs/release" && \
PACKAGE_NAME="rustfs-linux-${ARCH}-${VERSION}.zip" && \
DOWNLOAD_URL="${BASE_URL}/${PACKAGE_NAME}" && \
echo "Downloading ${PACKAGE_NAME} from ${DOWNLOAD_URL}" >&2 && \
curl -f -L "${DOWNLOAD_URL}" -o rustfs.zip && \
unzip rustfs.zip -d /build && \
chmod +x /build/rustfs && \
rm /build/rustfs.zip && \
echo "✅ Successfully downloaded and extracted rustfs binary"
rm rustfs.zip || { echo "Failed to download or extract ${PACKAGE_NAME}" >&2; exit 1; }
# Runtime stage
FROM alpine:latest
# Runtime stage: Configure runtime environment
FROM alpine:3.22.1
# Set build arguments and labels
# Build arguments and labels
ARG RELEASE=latest
ARG BUILD_DATE
ARG VCS_REF
LABEL name="RustFS" \
vendor="RustFS Team" \
maintainer="RustFS Team <dev@rustfs.com>" \
version="${RELEASE}" \
release="${RELEASE}" \
build-date="${BUILD_DATE}" \
vcs-ref="${VCS_REF}" \
summary="RustFS is a high-performance distributed object storage system written in Rust, compatible with S3 API." \
description="RustFS is a high-performance distributed object storage software built using Rust. It supports erasure coding storage, multi-tenant management, observability, and other enterprise-level features." \
url="https://rustfs.com" \
license="Apache-2.0"
vendor="RustFS Team" \
maintainer="RustFS Team <dev@rustfs.com>" \
version="${RELEASE}" \
release="${RELEASE}" \
build-date="${BUILD_DATE}" \
vcs-ref="${VCS_REF}" \
summary="High-performance distributed object storage system compatible with S3 API" \
description="RustFS is a distributed object storage system written in Rust, supporting erasure coding, multi-tenant management, and observability." \
url="https://rustfs.com" \
license="Apache-2.0"
# Install runtime dependencies
RUN apk add --no-cache \
ca-certificates \
curl \
tzdata \
bash \
&& addgroup -g 1000 rustfs \
&& adduser -u 1000 -G rustfs -s /bin/sh -D rustfs
RUN echo "https://dl-cdn.alpinelinux.org/alpine/v3.20/community" >> /etc/apk/repositories && \
apk update && \
apk add --no-cache ca-certificates bash gosu coreutils shadow && \
addgroup -g 1000 rustfs && \
adduser -u 1000 -G rustfs -s /bin/bash -D rustfs
# Environment variables
ENV RUSTFS_ACCESS_KEY=rustfsadmin \
# Copy CA certificates and RustFS binary from build stage
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=build /build/rustfs /usr/bin/rustfs
# Copy entry point script
COPY entrypoint.sh /entrypoint.sh
# Set permissions
RUN chmod +x /usr/bin/rustfs /entrypoint.sh && \
mkdir -p /data /logs && \
chown rustfs:rustfs /data /logs && \
chmod 700 /data /logs
# Environment variables (credentials should be set via environment or secrets)
ENV RUSTFS_ADDRESS=:9000 \
RUSTFS_ACCESS_KEY=rustfsadmin \
RUSTFS_SECRET_KEY=rustfsadmin \
RUSTFS_ADDRESS=":9000" \
RUSTFS_CONSOLE_ENABLE=true \
RUSTFS_VOLUMES=/data \
RUST_LOG=warn
# Set permissions for /usr/bin (similar to MinIO's approach)
RUN chmod -R 755 /usr/bin
# Copy CA certificates and binaries from build stage
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=build /build/rustfs /usr/bin/
# Set executable permissions
RUN chmod +x /usr/bin/rustfs
# Create data directory
RUN mkdir -p /data /config && chown -R rustfs:rustfs /data /config
# Switch to non-root user
USER rustfs
# Set working directory
WORKDIR /data
RUST_LOG=warn \
RUSTFS_OBS_LOG_DIRECTORY=/logs \
RUSTFS_SINKS_FILE_PATH=/logs
# Expose port
EXPOSE 9000
# Volumes for data and logs
VOLUME ["/data", "/logs"]
# Volume for data
VOLUME ["/data"]
# Set entry point
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/usr/bin/rustfs"]
# Set entrypoint
ENTRYPOINT ["/usr/bin/rustfs"]

View File

@@ -112,6 +112,8 @@ RUN apt-get update && apt-get install -y \
ca-certificates \
tzdata \
wget \
coreutils \
passwd \
&& rm -rf /var/lib/apt/lists/*
# Create rustfs user and group
@@ -128,6 +130,10 @@ RUN mkdir -p /data/rustfs{0,1,2,3} && \
COPY --from=builder /usr/local/bin/rustfs /app/rustfs
RUN chmod +x /app/rustfs && chown rustfs:rustfs /app/rustfs
# Copy entrypoint script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Switch to non-root user
USER rustfs
@@ -142,9 +148,9 @@ ENV RUSTFS_ACCESS_KEY=rustfsadmin \
RUSTFS_VOLUMES=/data \
RUST_LOG=warn
# Volume for data
VOLUME ["/data"]
# Set default command
# Set entrypoint and default command
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/app/rustfs"]

222
Makefile
View File

@@ -23,7 +23,7 @@ fmt-check:
.PHONY: clippy
clippy:
@echo "🔍 Running clippy checks..."
cargo clippy --all-targets --all-features -- -D warnings
cargo clippy --all-targets --all-features --fix --allow-dirty -- -D warnings
.PHONY: check
check:
@@ -46,21 +46,6 @@ setup-hooks:
chmod +x .git/hooks/pre-commit
@echo "✅ Git hooks setup complete!"
.PHONY: init-devenv
init-devenv:
$(DOCKER_CLI) build -t $(IMAGE_NAME) -f Dockerfile.source .
$(DOCKER_CLI) stop $(CONTAINER_NAME)
$(DOCKER_CLI) rm $(CONTAINER_NAME)
$(DOCKER_CLI) run -d --name $(CONTAINER_NAME) -p 9010:9010 -p 9000:9000 -v $(shell pwd):/root/s3-rustfs -it $(IMAGE_NAME)
.PHONY: start
start:
$(DOCKER_CLI) start $(CONTAINER_NAME)
.PHONY: stop
stop:
$(DOCKER_CLI) stop $(CONTAINER_NAME)
.PHONY: e2e-server
e2e-server:
sh $(shell pwd)/scripts/run.sh
@@ -80,8 +65,6 @@ build-dev:
@echo "🔨 Building RustFS in development mode..."
./build-rustfs.sh --dev
# Docker-based build (alternative approach)
# Usage: make BUILD_OS=ubuntu22.04 build-docker
# Output: target/ubuntu22.04/release/rustfs
@@ -98,29 +81,45 @@ build-docker:
.PHONY: build-musl
build-musl:
@echo "🔨 Building rustfs for x86_64-unknown-linux-musl..."
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-buildx' instead"
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform x86_64-unknown-linux-musl
.PHONY: build-gnu
build-gnu:
@echo "🔨 Building rustfs for x86_64-unknown-linux-gnu..."
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-buildx' instead"
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform x86_64-unknown-linux-gnu
.PHONY: build-musl-arm64
build-musl-arm64:
@echo "🔨 Building rustfs for aarch64-unknown-linux-musl..."
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform aarch64-unknown-linux-musl
.PHONY: build-gnu-arm64
build-gnu-arm64:
@echo "🔨 Building rustfs for aarch64-unknown-linux-gnu..."
@echo "💡 On macOS/Windows, use 'make build-docker' or 'make docker-dev' instead"
./build-rustfs.sh --platform aarch64-unknown-linux-gnu
.PHONY: deploy-dev
deploy-dev: build-musl
@echo "🚀 Deploying to dev server: $${IP}"
./scripts/dev_deploy.sh $${IP}
# Multi-architecture Docker build targets (NEW: using docker-buildx.sh)
# ========================================================================================
# Docker Multi-Architecture Builds (Primary Methods)
# ========================================================================================
# Production builds using docker-buildx.sh (for CI/CD and production)
.PHONY: docker-buildx
docker-buildx:
@echo "🏗️ Building multi-architecture Docker images with buildx..."
@echo "🏗️ Building multi-architecture production Docker images with buildx..."
./docker-buildx.sh
.PHONY: docker-buildx-push
docker-buildx-push:
@echo "🚀 Building and pushing multi-architecture Docker images with buildx..."
@echo "🚀 Building and pushing multi-architecture production Docker images with buildx..."
./docker-buildx.sh --push
.PHONY: docker-buildx-version
@@ -129,7 +128,7 @@ docker-buildx-version:
echo "❌ 错误: 请指定版本, 例如: make docker-buildx-version VERSION=v1.0.0"; \
exit 1; \
fi
@echo "🏗️ Building multi-architecture Docker images (version: $(VERSION))..."
@echo "🏗️ Building multi-architecture production Docker images (version: $(VERSION))..."
./docker-buildx.sh --release $(VERSION)
.PHONY: docker-buildx-push-version
@@ -138,21 +137,114 @@ docker-buildx-push-version:
echo "❌ 错误: 请指定版本, 例如: make docker-buildx-push-version VERSION=v1.0.0"; \
exit 1; \
fi
@echo "🚀 Building and pushing multi-architecture Docker images (version: $(VERSION))..."
@echo "🚀 Building and pushing multi-architecture production Docker images (version: $(VERSION))..."
./docker-buildx.sh --release $(VERSION) --push
# Development/Source builds using direct buildx commands
.PHONY: docker-dev
docker-dev:
@echo "🏗️ Building multi-architecture development Docker images with buildx..."
@echo "💡 This builds from source code and is intended for local development and testing"
@echo "⚠️ Multi-arch images cannot be loaded locally, use docker-dev-push to push to registry"
$(DOCKER_CLI) buildx build \
--platform linux/amd64,linux/arm64 \
--file $(DOCKERFILE_SOURCE) \
--tag rustfs:source-latest \
--tag rustfs:dev-latest \
.
.PHONY: docker-dev-local
docker-dev-local:
@echo "🏗️ Building single-architecture development Docker image for local use..."
@echo "💡 This builds from source code for the current platform and loads locally"
$(DOCKER_CLI) buildx build \
--file $(DOCKERFILE_SOURCE) \
--tag rustfs:source-latest \
--tag rustfs:dev-latest \
--load \
.
.PHONY: docker-dev-push
docker-dev-push:
@if [ -z "$(REGISTRY)" ]; then \
echo "❌ 错误: 请指定镜像仓库, 例如: make docker-dev-push REGISTRY=ghcr.io/username"; \
exit 1; \
fi
@echo "🚀 Building and pushing multi-architecture development Docker images..."
@echo "💡 推送到仓库: $(REGISTRY)"
$(DOCKER_CLI) buildx build \
--platform linux/amd64,linux/arm64 \
--file $(DOCKERFILE_SOURCE) \
--tag $(REGISTRY)/rustfs:source-latest \
--tag $(REGISTRY)/rustfs:dev-latest \
--push \
.
# Local production builds using direct buildx (alternative to docker-buildx.sh)
.PHONY: docker-buildx-production-local
docker-buildx-production-local:
@echo "🏗️ Building single-architecture production Docker image locally..."
@echo "💡 Alternative to docker-buildx.sh for local testing"
$(DOCKER_CLI) buildx build \
--file $(DOCKERFILE_PRODUCTION) \
--tag rustfs:production-latest \
--tag rustfs:latest \
--load \
--build-arg RELEASE=latest \
.
# ========================================================================================
# Single Architecture Docker Builds (Traditional)
# ========================================================================================
.PHONY: docker-build-production
docker-build-production:
@echo "🏗️ Building production Docker image..."
@echo "🏗️ Building single-architecture production Docker image..."
@echo "💡 Consider using 'make docker-buildx-production-local' for multi-arch support"
$(DOCKER_CLI) build -f $(DOCKERFILE_PRODUCTION) -t rustfs:latest .
.PHONY: docker-build-source
docker-build-source:
@echo "🏗️ Building source Docker image..."
@echo "🏗️ Building single-architecture source Docker image..."
@echo "💡 Consider using 'make docker-dev-local' for multi-arch support"
$(DOCKER_CLI) build -f $(DOCKERFILE_SOURCE) -t rustfs:source .
# ========================================================================================
# Development Environment
# ========================================================================================
.PHONY: dev-env-start
dev-env-start:
@echo "🚀 Starting development environment..."
$(DOCKER_CLI) buildx build \
--file $(DOCKERFILE_SOURCE) \
--tag rustfs:dev \
--load \
.
$(DOCKER_CLI) stop $(CONTAINER_NAME) 2>/dev/null || true
$(DOCKER_CLI) rm $(CONTAINER_NAME) 2>/dev/null || true
$(DOCKER_CLI) run -d --name $(CONTAINER_NAME) \
-p 9010:9010 -p 9000:9000 \
-v $(shell pwd):/workspace \
-it rustfs:dev
.PHONY: dev-env-stop
dev-env-stop:
@echo "🛑 Stopping development environment..."
$(DOCKER_CLI) stop $(CONTAINER_NAME) 2>/dev/null || true
$(DOCKER_CLI) rm $(CONTAINER_NAME) 2>/dev/null || true
.PHONY: dev-env-restart
dev-env-restart: dev-env-stop dev-env-start
# ========================================================================================
# Build Utilities
# ========================================================================================
.PHONY: docker-inspect-multiarch
docker-inspect-multiarch:
@if [ -z "$(IMAGE)" ]; then \
@@ -165,15 +257,23 @@ docker-inspect-multiarch:
.PHONY: build-cross-all
build-cross-all:
@echo "🔧 Building all target architectures..."
@echo "💡 On macOS/Windows, use 'make docker-buildx' for reliable multi-arch builds"
@echo "💡 On macOS/Windows, use 'make docker-dev' for reliable multi-arch builds"
@echo "🔨 Generating protobuf code..."
cargo run --bin gproto || true
@echo "🔨 Building x86_64-unknown-linux-musl..."
./build-rustfs.sh --platform x86_64-unknown-linux-musl
@echo "🔨 Building x86_64-unknown-linux-gnu..."
./build-rustfs.sh --platform x86_64-unknown-linux-gnu
@echo "🔨 Building aarch64-unknown-linux-gnu..."
./build-rustfs.sh --platform aarch64-unknown-linux-gnu
@echo "🔨 Building x86_64-unknown-linux-musl..."
./build-rustfs.sh --platform x86_64-unknown-linux-musl
@echo "🔨 Building aarch64-unknown-linux-musl..."
./build-rustfs.sh --platform aarch64-unknown-linux-musl
@echo "✅ All architectures built successfully!"
# ========================================================================================
# Help and Documentation
# ========================================================================================
.PHONY: help-build
help-build:
@echo "🔨 RustFS 构建帮助:"
@@ -181,8 +281,10 @@ help-build:
@echo "🚀 本地构建 (推荐使用):"
@echo " make build # 构建 RustFS 二进制文件 (默认包含 console)"
@echo " make build-dev # 开发模式构建"
@echo " make build-musl # 构建 musl 版本"
@echo " make build-gnu # 构建 GNU 版本"
@echo " make build-musl # 构建 x86_64 musl 版本"
@echo " make build-gnu # 构建 x86_64 GNU 版本"
@echo " make build-musl-arm64 # 构建 aarch64 musl 版本"
@echo " make build-gnu-arm64 # 构建 aarch64 GNU 版本"
@echo ""
@echo "🐳 Docker 构建:"
@echo " make build-docker # 使用 Docker 容器构建"
@@ -197,7 +299,7 @@ help-build:
@echo " ./build-rustfs.sh --force-console-update # 强制更新 console 资源"
@echo " ./build-rustfs.sh --dev # 开发模式构建"
@echo " ./build-rustfs.sh --sign # 签名二进制文件"
@echo " ./build-rustfs.sh --platform x86_64-unknown-linux-musl # 指定目标平台"
@echo " ./build-rustfs.sh --platform x86_64-unknown-linux-gnu # 指定目标平台"
@echo " ./build-rustfs.sh --skip-verification # 跳过二进制验证"
@echo ""
@echo "💡 build-rustfs.sh 脚本提供了更多选项、智能检测和二进制验证功能"
@@ -206,23 +308,61 @@ help-build:
help-docker:
@echo "🐳 Docker 多架构构建帮助:"
@echo ""
@echo "🚀 推荐使用 (新的 docker-buildx 方式):"
@echo " make docker-buildx # 构建多架构镜像(不推送)"
@echo " make docker-buildx-push # 构建并推送多架构镜像"
@echo "🚀 生产镜像构建 (推荐使用 docker-buildx.sh):"
@echo " make docker-buildx # 构建生产多架构镜像(不推送)"
@echo " make docker-buildx-push # 构建并推送生产多架构镜像"
@echo " make docker-buildx-version VERSION=v1.0.0 # 构建指定版本"
@echo " make docker-buildx-push-version VERSION=v1.0.0 # 构建并推送指定版本"
@echo ""
@echo "🏗️ 单架构构建:"
@echo " make docker-build-production # 构建生产环境镜像"
@echo " make docker-build-source # 构建源码构建镜像"
@echo "🔧 开发/源码镜像构建 (本地开发测试):"
@echo " make docker-dev # 构建开发多架构镜像(无法本地加载)"
@echo " make docker-dev-local # 构建开发单架构镜像(本地加载)"
@echo " make docker-dev-push REGISTRY=xxx # 构建并推送开发镜像"
@echo ""
@echo "🏗️ 本地生产镜像构建 (替代方案):"
@echo " make docker-buildx-production-local # 本地构建生产单架构镜像"
@echo ""
@echo "📦 单架构构建 (传统方式):"
@echo " make docker-build-production # 构建单架构生产镜像"
@echo " make docker-build-source # 构建单架构源码镜像"
@echo ""
@echo "🚀 开发环境管理:"
@echo " make dev-env-start # 启动开发容器环境"
@echo " make dev-env-stop # 停止开发容器环境"
@echo " make dev-env-restart # 重启开发容器环境"
@echo ""
@echo "🔧 辅助工具:"
@echo " make build-cross-all # 构建所有架构的二进制文件"
@echo " make docker-inspect-multiarch IMAGE=xxx # 检查镜像的架构支持"
@echo ""
@echo "📋 环境变量 (在推送时需要设置):"
@echo "📋 环境变量:"
@echo " REGISTRY 镜像仓库地址 (推送时需要)"
@echo " DOCKERHUB_USERNAME Docker Hub 用户名"
@echo " DOCKERHUB_TOKEN Docker Hub 访问令牌"
@echo " GITHUB_TOKEN GitHub 访问令牌"
@echo ""
@echo "💡 更多详情请参考项目根目录的 docker-buildx.sh 脚本"
@echo "💡 建议:"
@echo " - 生产用途: 使用 docker-buildx* 命令 (基于预编译二进制)"
@echo " - 本地开发: 使用 docker-dev* 命令 (从源码构建)"
@echo " - 开发环境: 使用 dev-env-* 命令管理开发容器"
.PHONY: help
help:
@echo "🦀 RustFS Makefile 帮助:"
@echo ""
@echo "📋 主要命令分类:"
@echo " make help-build # 显示构建相关帮助"
@echo " make help-docker # 显示 Docker 相关帮助"
@echo ""
@echo "🔧 代码质量:"
@echo " make fmt # 格式化代码"
@echo " make clippy # 运行 clippy 检查"
@echo " make test # 运行测试"
@echo " make pre-commit # 运行所有预提交检查"
@echo ""
@echo "🚀 快速开始:"
@echo " make build # 构建 RustFS 二进制"
@echo " make docker-dev-local # 构建开发 Docker 镜像(本地)"
@echo " make dev-env-start # 启动开发环境"
@echo ""
@echo "💡 更多帮助请使用 'make help-build' 或 'make help-docker'"

View File

@@ -11,8 +11,8 @@
</p>
<p align="center">
<a href="https://docs.rustfs.com/en/introduction.html">Getting Started</a>
· <a href="https://docs.rustfs.com/en/">Docs</a>
<a href="https://docs.rustfs.com/introduction.html">Getting Started</a>
· <a href="https://docs.rustfs.com/">Docs</a>
· <a href="https://github.com/rustfs/rustfs/issues">Bug reports</a>
· <a href="https://github.com/rustfs/rustfs/discussions">Discussions</a>
</p>

41
_typos.toml Normal file
View File

@@ -0,0 +1,41 @@
[default]
# # Ignore specific spell checking patterns
# extend-ignore-identifiers-re = [
# # Ignore common patterns in base64 encoding and hash values
# "[A-Za-z0-9+/]{8,}={0,2}", # base64 encoding
# "[A-Fa-f0-9]{8,}", # hexadecimal hash
# "[A-Za-z0-9_-]{20,}", # long random strings
# ]
# # Ignore specific regex patterns in content
# extend-ignore-re = [
# # Ignore hash values and encoded strings (base64 patterns)
# "(?i)[A-Za-z0-9+/]{8,}={0,2}",
# # Ignore long strings in quotes (usually hash or base64)
# '"[A-Za-z0-9+/=_-]{8,}"',
# # Ignore IV values and similar cryptographic strings
# '"[A-Za-z0-9+/=]{12,}"',
# # Ignore cryptographic signatures and keys (including partial strings)
# "[A-Za-z0-9+/]{6,}[A-Za-z0-9+/=]*",
# # Ignore base64-like strings in comments (common in examples)
# "//.*[A-Za-z0-9+/]{8,}[A-Za-z0-9+/=]*",
# ]
extend-ignore-re = [
# Ignore long strings in quotes (usually hash or base64)
'"[A-Za-z0-9+/=_-]{32,}"',
# Ignore IV values and similar cryptographic strings
'"[A-Za-z0-9+/=]{12,}"',
# Ignore cryptographic signatures and keys (including partial strings)
"[A-Za-z0-9+/]{16,}[A-Za-z0-9+/=]*",
]
[default.extend-words]
bui = "bui"
typ = "typ"
clen = "clen"
datas = "datas"
bre = "bre"
abd = "abd"
[files]
extend-exclude = []

View File

@@ -21,13 +21,17 @@ detect_platform() {
"linux")
case "$arch" in
"x86_64")
echo "x86_64-unknown-linux-musl"
# Default to GNU for better compatibility
echo "x86_64-unknown-linux-gnu"
;;
"aarch64"|"arm64")
echo "aarch64-unknown-linux-musl"
echo "aarch64-unknown-linux-gnu"
;;
"armv7l")
echo "armv7-unknown-linux-musleabihf"
echo "armv7-unknown-linux-gnueabihf"
;;
"loongarch64")
echo "loongarch64-unknown-linux-musl"
;;
*)
echo "unknown-platform"
@@ -119,6 +123,17 @@ usage() {
echo " -o, --output-dir DIR Output directory (default: target/release)"
echo " -b, --binary-name NAME Binary name (default: rustfs)"
echo " -p, --platform TARGET Target platform (default: auto-detect)"
echo " Supported platforms:"
echo " x86_64-unknown-linux-gnu"
echo " aarch64-unknown-linux-gnu"
echo " armv7-unknown-linux-gnueabihf"
echo " x86_64-unknown-linux-musl"
echo " aarch64-unknown-linux-musl"
echo " armv7-unknown-linux-musleabihf"
echo " x86_64-apple-darwin"
echo " aarch64-apple-darwin"
echo " x86_64-pc-windows-msvc"
echo " aarch64-pc-windows-msvc"
echo " --dev Build in dev mode"
echo " --sign Sign binaries after build"
echo " --with-console Download console static assets (default)"
@@ -385,7 +400,7 @@ build_binary() {
fi
else
# Native compilation
build_cmd="cargo build"
build_cmd="RUSTFLAGS=-Clink-arg=-lm cargo build"
fi
if [ "$BUILD_TYPE" = "release" ]; then

View File

@@ -34,8 +34,14 @@ url = { workspace = true }
rustfs-lock = { workspace = true }
lazy_static = { workspace = true }
chrono = { workspace = true }
[dev-dependencies]
rmp-serde = { workspace = true }
tokio-test = { workspace = true }
serde_json = { workspace = true }
serial_test = "3.2.0"
once_cell = { workspace = true }
tracing-subscriber = { workspace = true }
walkdir = "2.5.0"
tempfile = { workspace = true }

View File

@@ -14,30 +14,79 @@
use thiserror::Error;
/// RustFS AHM/Heal/Scanner 统一错误类型
#[derive(Debug, Error)]
pub enum Error {
// 通用
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("Storage error: {0}")]
Storage(#[from] rustfs_ecstore::error::Error),
#[error("Disk error: {0}")]
Disk(#[from] rustfs_ecstore::disk::error::DiskError),
#[error("Configuration error: {0}")]
Config(String),
#[error("Heal configuration error: {message}")]
ConfigurationError { message: String },
#[error("Other error: {0}")]
Other(String),
#[error(transparent)]
Anyhow(#[from] anyhow::Error),
// Scanner相关
#[error("Scanner error: {0}")]
Scanner(String),
#[error("Metrics error: {0}")]
Metrics(String),
#[error(transparent)]
Other(#[from] anyhow::Error),
// Heal相关
#[error("Heal task not found: {task_id}")]
TaskNotFound { task_id: String },
#[error("Heal task already exists: {task_id}")]
TaskAlreadyExists { task_id: String },
#[error("Heal manager is not running")]
ManagerNotRunning,
#[error("Heal task execution failed: {message}")]
TaskExecutionFailed { message: String },
#[error("Invalid heal type: {heal_type}")]
InvalidHealType { heal_type: String },
#[error("Heal task cancelled")]
TaskCancelled,
#[error("Heal task timeout")]
TaskTimeout,
#[error("Heal event processing failed: {message}")]
EventProcessingFailed { message: String },
#[error("Heal progress tracking failed: {message}")]
ProgressTrackingFailed { message: String },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
// Implement conversion from ahm::Error to std::io::Error for use in main.rs
impl Error {
pub fn other<E>(error: E) -> Self
where
E: Into<Box<dyn std::error::Error + Send + Sync>>,
{
Error::Other(error.into().to_string())
}
}
// 可选:实现与 std::io::Error 的互转
impl From<Error> for std::io::Error {
fn from(err: Error) -> Self {
std::io::Error::other(err)

View File

@@ -0,0 +1,233 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::Result;
use crate::heal::{
manager::HealManager,
task::{HealOptions, HealPriority, HealRequest, HealType},
};
use rustfs_common::heal_channel::{
HealChannelCommand, HealChannelPriority, HealChannelReceiver, HealChannelRequest, HealChannelResponse, HealScanMode,
};
use std::sync::Arc;
use tokio::sync::mpsc;
use tracing::{error, info};
/// Heal channel processor
pub struct HealChannelProcessor {
/// Heal manager
heal_manager: Arc<HealManager>,
/// Response sender
response_sender: mpsc::UnboundedSender<HealChannelResponse>,
/// Response receiver
response_receiver: mpsc::UnboundedReceiver<HealChannelResponse>,
}
impl HealChannelProcessor {
/// Create new HealChannelProcessor
pub fn new(heal_manager: Arc<HealManager>) -> Self {
let (response_tx, response_rx) = mpsc::unbounded_channel();
Self {
heal_manager,
response_sender: response_tx,
response_receiver: response_rx,
}
}
/// Start processing heal channel requests
pub async fn start(&mut self, mut receiver: HealChannelReceiver) -> Result<()> {
info!("Starting heal channel processor");
loop {
tokio::select! {
command = receiver.recv() => {
match command {
Some(command) => {
if let Err(e) = self.process_command(command).await {
error!("Failed to process heal command: {}", e);
}
}
None => {
info!("Heal channel receiver closed, stopping processor");
break;
}
}
}
response = self.response_receiver.recv() => {
if let Some(response) = response {
// Handle response if needed
info!("Received heal response for request: {}", response.request_id);
}
}
}
}
info!("Heal channel processor stopped");
Ok(())
}
/// Process heal command
async fn process_command(&self, command: HealChannelCommand) -> Result<()> {
match command {
HealChannelCommand::Start(request) => self.process_start_request(request).await,
HealChannelCommand::Query { heal_path, client_token } => self.process_query_request(heal_path, client_token).await,
HealChannelCommand::Cancel { heal_path } => self.process_cancel_request(heal_path).await,
}
}
/// Process start request
async fn process_start_request(&self, request: HealChannelRequest) -> Result<()> {
info!("Processing heal start request: {} for bucket: {}", request.id, request.bucket);
// Convert channel request to heal request
let heal_request = self.convert_to_heal_request(request.clone())?;
// Submit to heal manager
match self.heal_manager.submit_heal_request(heal_request).await {
Ok(task_id) => {
info!("Successfully submitted heal request: {} as task: {}", request.id, task_id);
// Send success response
let response = HealChannelResponse {
request_id: request.id,
success: true,
data: Some(format!("Task ID: {task_id}").into_bytes()),
error: None,
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send heal response: {}", e);
}
}
Err(e) => {
error!("Failed to submit heal request: {} - {}", request.id, e);
// Send error response
let response = HealChannelResponse {
request_id: request.id,
success: false,
data: None,
error: Some(e.to_string()),
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send heal error response: {}", e);
}
}
}
Ok(())
}
/// Process query request
async fn process_query_request(&self, heal_path: String, client_token: String) -> Result<()> {
info!("Processing heal query request for path: {}", heal_path);
// TODO: Implement query logic based on heal_path and client_token
// For now, return a placeholder response
let response = HealChannelResponse {
request_id: client_token,
success: true,
data: Some(format!("Query result for path: {heal_path}").into_bytes()),
error: None,
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send query response: {}", e);
}
Ok(())
}
/// Process cancel request
async fn process_cancel_request(&self, heal_path: String) -> Result<()> {
info!("Processing heal cancel request for path: {}", heal_path);
// TODO: Implement cancel logic based on heal_path
// For now, return a placeholder response
let response = HealChannelResponse {
request_id: heal_path.clone(),
success: true,
data: Some(format!("Cancel request for path: {heal_path}").into_bytes()),
error: None,
};
if let Err(e) = self.response_sender.send(response) {
error!("Failed to send cancel response: {}", e);
}
Ok(())
}
/// Convert channel request to heal request
fn convert_to_heal_request(&self, request: HealChannelRequest) -> Result<HealRequest> {
let heal_type = if let Some(disk_id) = &request.disk {
HealType::ErasureSet {
buckets: vec![],
set_disk_id: disk_id.clone(),
}
} else if let Some(prefix) = &request.object_prefix {
if !prefix.is_empty() {
HealType::Object {
bucket: request.bucket.clone(),
object: prefix.clone(),
version_id: None,
}
} else {
HealType::Bucket {
bucket: request.bucket.clone(),
}
}
} else {
HealType::Bucket {
bucket: request.bucket.clone(),
}
};
let priority = match request.priority {
HealChannelPriority::Low => HealPriority::Low,
HealChannelPriority::Normal => HealPriority::Normal,
HealChannelPriority::High => HealPriority::High,
HealChannelPriority::Critical => HealPriority::Urgent,
};
// Build HealOptions with all available fields
let mut options = HealOptions {
scan_mode: request.scan_mode.unwrap_or(HealScanMode::Normal),
remove_corrupted: request.remove_corrupted.unwrap_or(false),
recreate_missing: request.recreate_missing.unwrap_or(true),
update_parity: request.update_parity.unwrap_or(true),
recursive: request.recursive.unwrap_or(false),
dry_run: request.dry_run.unwrap_or(false),
timeout: request.timeout_seconds.map(std::time::Duration::from_secs),
pool_index: request.pool_index,
set_index: request.set_index,
};
// Apply force_start overrides
if request.force_start {
options.remove_corrupted = true;
options.recreate_missing = true;
options.update_parity = true;
}
Ok(HealRequest::new(heal_type, options, priority))
}
/// Get response sender for external use
pub fn get_response_sender(&self) -> mpsc::UnboundedSender<HealChannelResponse> {
self.response_sender.clone()
}
}

View File

@@ -0,0 +1,456 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::heal::{
progress::HealProgress,
resume::{CheckpointManager, ResumeManager, ResumeUtils},
storage::HealStorageAPI,
};
use futures::future::join_all;
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::disk::DiskStore;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{error, info, warn};
/// Erasure Set Healer
pub struct ErasureSetHealer {
storage: Arc<dyn HealStorageAPI>,
progress: Arc<RwLock<HealProgress>>,
cancel_token: tokio_util::sync::CancellationToken,
disk: DiskStore,
}
impl ErasureSetHealer {
pub fn new(
storage: Arc<dyn HealStorageAPI>,
progress: Arc<RwLock<HealProgress>>,
cancel_token: tokio_util::sync::CancellationToken,
disk: DiskStore,
) -> Self {
Self {
storage,
progress,
cancel_token,
disk,
}
}
/// execute erasure set heal with resume
pub async fn heal_erasure_set(&self, buckets: &[String], set_disk_id: &str) -> Result<()> {
info!("Starting erasure set heal for {} buckets on set disk {}", buckets.len(), set_disk_id);
// 1. generate or get task id
let task_id = self.get_or_create_task_id(set_disk_id).await?;
// 2. initialize or resume resume state
let (resume_manager, checkpoint_manager) = self.initialize_resume_state(&task_id, buckets).await?;
// 3. execute heal with resume
let result = self
.execute_heal_with_resume(buckets, &resume_manager, &checkpoint_manager)
.await;
// 4. cleanup resume state
if result.is_ok() {
if let Err(e) = resume_manager.cleanup().await {
warn!("Failed to cleanup resume state: {}", e);
}
if let Err(e) = checkpoint_manager.cleanup().await {
warn!("Failed to cleanup checkpoint: {}", e);
}
}
result
}
/// get or create task id
async fn get_or_create_task_id(&self, _set_disk_id: &str) -> Result<String> {
// check if there are resumable tasks
let resumable_tasks = ResumeUtils::get_resumable_tasks(&self.disk).await?;
for task_id in resumable_tasks {
if ResumeUtils::can_resume_task(&self.disk, &task_id).await {
info!("Found resumable task: {}", task_id);
return Ok(task_id);
}
}
// create new task id
let task_id = ResumeUtils::generate_task_id();
info!("Created new heal task: {}", task_id);
Ok(task_id)
}
/// initialize or resume resume state
async fn initialize_resume_state(&self, task_id: &str, buckets: &[String]) -> Result<(ResumeManager, CheckpointManager)> {
// check if resume state exists
if ResumeManager::has_resume_state(&self.disk, task_id).await {
info!("Loading existing resume state for task: {}", task_id);
let resume_manager = ResumeManager::load_from_disk(self.disk.clone(), task_id).await?;
let checkpoint_manager = if CheckpointManager::has_checkpoint(&self.disk, task_id).await {
CheckpointManager::load_from_disk(self.disk.clone(), task_id).await?
} else {
CheckpointManager::new(self.disk.clone(), task_id.to_string()).await?
};
Ok((resume_manager, checkpoint_manager))
} else {
info!("Creating new resume state for task: {}", task_id);
let resume_manager =
ResumeManager::new(self.disk.clone(), task_id.to_string(), "erasure_set".to_string(), buckets.to_vec()).await?;
let checkpoint_manager = CheckpointManager::new(self.disk.clone(), task_id.to_string()).await?;
Ok((resume_manager, checkpoint_manager))
}
}
/// execute heal with resume
async fn execute_heal_with_resume(
&self,
buckets: &[String],
resume_manager: &ResumeManager,
checkpoint_manager: &CheckpointManager,
) -> Result<()> {
// 1. get current state
let state = resume_manager.get_state().await;
let checkpoint = checkpoint_manager.get_checkpoint().await;
info!(
"Resuming from bucket {} object {}",
checkpoint.current_bucket_index, checkpoint.current_object_index
);
// 2. initialize progress
self.initialize_progress(buckets, &state).await;
// 3. continue from checkpoint
let current_bucket_index = checkpoint.current_bucket_index;
let mut current_object_index = checkpoint.current_object_index;
let mut processed_objects = state.processed_objects;
let mut successful_objects = state.successful_objects;
let mut failed_objects = state.failed_objects;
let mut skipped_objects = state.skipped_objects;
// 4. process remaining buckets
for (bucket_idx, bucket) in buckets.iter().enumerate().skip(current_bucket_index) {
// check if completed
if state.completed_buckets.contains(bucket) {
continue;
}
// update current bucket
resume_manager.set_current_item(Some(bucket.clone()), None).await?;
// process objects in bucket
let bucket_result = self
.heal_bucket_with_resume(
bucket,
&mut current_object_index,
&mut processed_objects,
&mut successful_objects,
&mut failed_objects,
&mut skipped_objects,
resume_manager,
checkpoint_manager,
)
.await;
// update checkpoint position
checkpoint_manager.update_position(bucket_idx, current_object_index).await?;
// update progress
resume_manager
.update_progress(processed_objects, successful_objects, failed_objects, skipped_objects)
.await?;
// check cancel status
if self.cancel_token.is_cancelled() {
info!("Heal task cancelled");
return Err(Error::TaskCancelled);
}
// process bucket result
match bucket_result {
Ok(_) => {
resume_manager.complete_bucket(bucket).await?;
info!("Completed heal for bucket: {}", bucket);
}
Err(e) => {
error!("Failed to heal bucket {}: {}", bucket, e);
// continue to next bucket, do not interrupt the whole process
}
}
// reset object index
current_object_index = 0;
}
// 5. mark task completed
resume_manager.mark_completed().await?;
info!("Erasure set heal completed successfully");
Ok(())
}
/// heal single bucket with resume
#[allow(clippy::too_many_arguments)]
async fn heal_bucket_with_resume(
&self,
bucket: &str,
current_object_index: &mut usize,
processed_objects: &mut u64,
successful_objects: &mut u64,
failed_objects: &mut u64,
_skipped_objects: &mut u64,
resume_manager: &ResumeManager,
checkpoint_manager: &CheckpointManager,
) -> Result<()> {
info!("Starting heal for bucket: {} from object index {}", bucket, current_object_index);
// 1. get bucket info
let _bucket_info = match self.storage.get_bucket_info(bucket).await? {
Some(info) => info,
None => {
warn!("Bucket {} not found, skipping", bucket);
return Ok(());
}
};
// 2. get objects to heal
let objects = self.storage.list_objects_for_heal(bucket, "").await?;
// 3. continue from checkpoint
for (obj_idx, object) in objects.iter().enumerate().skip(*current_object_index) {
// check if already processed
if checkpoint_manager.get_checkpoint().await.processed_objects.contains(object) {
continue;
}
// update current object
resume_manager
.set_current_item(Some(bucket.to_string()), Some(object.clone()))
.await?;
// heal object
let heal_opts = HealOpts {
scan_mode: HealScanMode::Normal,
remove: true,
recreate: true,
..Default::default()
};
match self.storage.heal_object(bucket, object, None, &heal_opts).await {
Ok((_result, None)) => {
*successful_objects += 1;
checkpoint_manager.add_processed_object(object.clone()).await?;
info!("Successfully healed object {}/{}", bucket, object);
}
Ok((_, Some(err))) => {
*failed_objects += 1;
checkpoint_manager.add_failed_object(object.clone()).await?;
warn!("Failed to heal object {}/{}: {}", bucket, object, err);
}
Err(err) => {
*failed_objects += 1;
checkpoint_manager.add_failed_object(object.clone()).await?;
warn!("Error healing object {}/{}: {}", bucket, object, err);
}
}
*processed_objects += 1;
*current_object_index = obj_idx + 1;
// check cancel status
if self.cancel_token.is_cancelled() {
info!("Heal task cancelled during object processing");
return Err(Error::TaskCancelled);
}
// save checkpoint periodically
if obj_idx % 100 == 0 {
checkpoint_manager.update_position(0, *current_object_index).await?;
}
}
Ok(())
}
/// initialize progress tracking
async fn initialize_progress(&self, _buckets: &[String], state: &crate::heal::resume::ResumeState) {
let mut progress = self.progress.write().await;
progress.objects_scanned = state.total_objects;
progress.objects_healed = state.successful_objects;
progress.objects_failed = state.failed_objects;
progress.bytes_processed = 0; // set to 0 for now, can be extended later
progress.set_current_object(state.current_object.clone());
}
/// heal all buckets concurrently
#[allow(dead_code)]
async fn heal_buckets_concurrently(&self, buckets: &[String]) -> Vec<Result<()>> {
// use semaphore to control concurrency, avoid too many concurrent healings
let semaphore = Arc::new(tokio::sync::Semaphore::new(4)); // max 4 concurrent healings
let heal_futures = buckets.iter().map(|bucket| {
let bucket = bucket.clone();
let storage = self.storage.clone();
let progress = self.progress.clone();
let semaphore = semaphore.clone();
let cancel_token = self.cancel_token.clone();
async move {
let _permit = semaphore.acquire().await.unwrap();
if cancel_token.is_cancelled() {
return Err(Error::TaskCancelled);
}
Self::heal_single_bucket(&storage, &bucket, &progress).await
}
});
// use join_all to process concurrently
join_all(heal_futures).await
}
/// heal single bucket
#[allow(dead_code)]
async fn heal_single_bucket(
storage: &Arc<dyn HealStorageAPI>,
bucket: &str,
progress: &Arc<RwLock<HealProgress>>,
) -> Result<()> {
info!("Starting heal for bucket: {}", bucket);
// 1. get bucket info
let _bucket_info = match storage.get_bucket_info(bucket).await? {
Some(info) => info,
None => {
warn!("Bucket {} not found, skipping", bucket);
return Ok(());
}
};
// 2. get objects to heal
let objects = storage.list_objects_for_heal(bucket, "").await?;
// 3. update progress
{
let mut p = progress.write().await;
p.objects_scanned += objects.len() as u64;
}
// 4. heal objects concurrently
let heal_opts = HealOpts {
scan_mode: HealScanMode::Normal,
remove: true, // remove corrupted data
recreate: true, // recreate missing data
..Default::default()
};
let object_results = Self::heal_objects_concurrently(storage, bucket, &objects, &heal_opts, progress).await;
// 5. count results
let (success_count, failure_count) = object_results
.into_iter()
.fold((0, 0), |(success, failure), result| match result {
Ok(_) => (success + 1, failure),
Err(_) => (success, failure + 1),
});
// 6. update progress
{
let mut p = progress.write().await;
p.objects_healed += success_count;
p.objects_failed += failure_count;
p.set_current_object(Some(format!("completed bucket: {bucket}")));
}
info!(
"Completed heal for bucket {}: {} success, {} failures",
bucket, success_count, failure_count
);
Ok(())
}
/// heal objects concurrently
#[allow(dead_code)]
async fn heal_objects_concurrently(
storage: &Arc<dyn HealStorageAPI>,
bucket: &str,
objects: &[String],
heal_opts: &HealOpts,
_progress: &Arc<RwLock<HealProgress>>,
) -> Vec<Result<()>> {
// use semaphore to control object healing concurrency
let semaphore = Arc::new(tokio::sync::Semaphore::new(8)); // max 8 concurrent object healings
let heal_futures = objects.iter().map(|object| {
let object = object.clone();
let bucket = bucket.to_string();
let storage = storage.clone();
let heal_opts = *heal_opts;
let semaphore = semaphore.clone();
async move {
let _permit = semaphore.acquire().await.unwrap();
match storage.heal_object(&bucket, &object, None, &heal_opts).await {
Ok((_result, None)) => {
info!("Successfully healed object {}/{}", bucket, object);
Ok(())
}
Ok((_, Some(err))) => {
warn!("Failed to heal object {}/{}: {}", bucket, object, err);
Err(Error::other(err))
}
Err(err) => {
warn!("Error healing object {}/{}: {}", bucket, object, err);
Err(err)
}
}
}
});
join_all(heal_futures).await
}
/// process results
#[allow(dead_code)]
async fn process_results(&self, results: Vec<Result<()>>) -> Result<()> {
let (success_count, failure_count): (usize, usize) =
results.into_iter().fold((0, 0), |(success, failure), result| match result {
Ok(_) => (success + 1, failure),
Err(_) => (success, failure + 1),
});
let total = success_count + failure_count;
info!("Erasure set heal completed: {}/{} buckets successful", success_count, total);
if failure_count > 0 {
warn!("{} buckets failed to heal", failure_count);
return Err(Error::other(format!("{failure_count} buckets failed to heal")));
}
Ok(())
}
}

View File

@@ -0,0 +1,359 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::heal::task::{HealOptions, HealPriority, HealRequest, HealType};
use rustfs_ecstore::disk::endpoint::Endpoint;
use serde::{Deserialize, Serialize};
use std::time::SystemTime;
/// Corruption type
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CorruptionType {
/// Data corruption
DataCorruption,
/// Metadata corruption
MetadataCorruption,
/// Partial corruption
PartialCorruption,
/// Complete corruption
CompleteCorruption,
}
/// Severity level
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum Severity {
/// Low severity
Low = 0,
/// Medium severity
Medium = 1,
/// High severity
High = 2,
/// Critical severity
Critical = 3,
}
/// Heal event
#[derive(Debug, Clone)]
pub enum HealEvent {
/// Object corruption event
ObjectCorruption {
bucket: String,
object: String,
version_id: Option<String>,
corruption_type: CorruptionType,
severity: Severity,
},
/// Object missing event
ObjectMissing {
bucket: String,
object: String,
version_id: Option<String>,
expected_locations: Vec<usize>,
available_locations: Vec<usize>,
},
/// Metadata corruption event
MetadataCorruption {
bucket: String,
object: String,
corruption_type: CorruptionType,
},
/// Disk status change event
DiskStatusChange {
endpoint: Endpoint,
old_status: String,
new_status: String,
},
/// EC decode failure event
ECDecodeFailure {
bucket: String,
object: String,
version_id: Option<String>,
missing_shards: Vec<usize>,
available_shards: Vec<usize>,
},
/// Checksum mismatch event
ChecksumMismatch {
bucket: String,
object: String,
version_id: Option<String>,
expected_checksum: String,
actual_checksum: String,
},
/// Bucket metadata corruption event
BucketMetadataCorruption {
bucket: String,
corruption_type: CorruptionType,
},
/// MRF metadata corruption event
MRFMetadataCorruption {
meta_path: String,
corruption_type: CorruptionType,
},
}
impl HealEvent {
/// Convert HealEvent to HealRequest
pub fn to_heal_request(&self) -> HealRequest {
match self {
HealEvent::ObjectCorruption {
bucket,
object,
version_id,
severity,
..
} => HealRequest::new(
HealType::Object {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
Self::severity_to_priority(severity),
),
HealEvent::ObjectMissing {
bucket,
object,
version_id,
..
} => HealRequest::new(
HealType::Object {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
HealPriority::High,
),
HealEvent::MetadataCorruption { bucket, object, .. } => HealRequest::new(
HealType::Metadata {
bucket: bucket.clone(),
object: object.clone(),
},
HealOptions::default(),
HealPriority::High,
),
HealEvent::DiskStatusChange { endpoint, .. } => {
// Convert disk status change to erasure set heal
// Note: This requires access to storage to get bucket list, which is not available here
// The actual bucket list will need to be provided by the caller or retrieved differently
HealRequest::new(
HealType::ErasureSet {
buckets: vec![], // Empty bucket list - caller should populate this
set_disk_id: format!("{}_{}", endpoint.pool_idx, endpoint.set_idx),
},
HealOptions::default(),
HealPriority::High,
)
}
HealEvent::ECDecodeFailure {
bucket,
object,
version_id,
..
} => HealRequest::new(
HealType::ECDecode {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
HealPriority::Urgent,
),
HealEvent::ChecksumMismatch {
bucket,
object,
version_id,
..
} => HealRequest::new(
HealType::Object {
bucket: bucket.clone(),
object: object.clone(),
version_id: version_id.clone(),
},
HealOptions::default(),
HealPriority::High,
),
HealEvent::BucketMetadataCorruption { bucket, .. } => {
HealRequest::new(HealType::Bucket { bucket: bucket.clone() }, HealOptions::default(), HealPriority::High)
}
HealEvent::MRFMetadataCorruption { meta_path, .. } => HealRequest::new(
HealType::MRF {
meta_path: meta_path.clone(),
},
HealOptions::default(),
HealPriority::High,
),
}
}
/// Convert severity to priority
fn severity_to_priority(severity: &Severity) -> HealPriority {
match severity {
Severity::Low => HealPriority::Low,
Severity::Medium => HealPriority::Normal,
Severity::High => HealPriority::High,
Severity::Critical => HealPriority::Urgent,
}
}
/// Get event description
pub fn description(&self) -> String {
match self {
HealEvent::ObjectCorruption {
bucket,
object,
corruption_type,
..
} => {
format!("Object corruption detected: {bucket}/{object} - {corruption_type:?}")
}
HealEvent::ObjectMissing { bucket, object, .. } => {
format!("Object missing: {bucket}/{object}")
}
HealEvent::MetadataCorruption {
bucket,
object,
corruption_type,
..
} => {
format!("Metadata corruption: {bucket}/{object} - {corruption_type:?}")
}
HealEvent::DiskStatusChange {
endpoint,
old_status,
new_status,
..
} => {
format!("Disk status changed: {endpoint:?} {old_status} -> {new_status}")
}
HealEvent::ECDecodeFailure {
bucket,
object,
missing_shards,
..
} => {
format!("EC decode failure: {bucket}/{object} - missing shards: {missing_shards:?}")
}
HealEvent::ChecksumMismatch {
bucket,
object,
expected_checksum,
actual_checksum,
..
} => {
format!("Checksum mismatch: {bucket}/{object} - expected: {expected_checksum}, actual: {actual_checksum}")
}
HealEvent::BucketMetadataCorruption {
bucket, corruption_type, ..
} => {
format!("Bucket metadata corruption: {bucket} - {corruption_type:?}")
}
HealEvent::MRFMetadataCorruption {
meta_path,
corruption_type,
..
} => {
format!("MRF metadata corruption: {meta_path} - {corruption_type:?}")
}
}
}
/// Get event severity
pub fn severity(&self) -> Severity {
match self {
HealEvent::ObjectCorruption { severity, .. } => severity.clone(),
HealEvent::ObjectMissing { .. } => Severity::High,
HealEvent::MetadataCorruption { .. } => Severity::High,
HealEvent::DiskStatusChange { .. } => Severity::High,
HealEvent::ECDecodeFailure { .. } => Severity::Critical,
HealEvent::ChecksumMismatch { .. } => Severity::High,
HealEvent::BucketMetadataCorruption { .. } => Severity::High,
HealEvent::MRFMetadataCorruption { .. } => Severity::High,
}
}
/// Get event timestamp
pub fn timestamp(&self) -> SystemTime {
SystemTime::now()
}
}
/// Heal event handler
pub struct HealEventHandler {
/// Event queue
events: Vec<HealEvent>,
/// Maximum number of events
max_events: usize,
}
impl HealEventHandler {
pub fn new(max_events: usize) -> Self {
Self {
events: Vec::new(),
max_events,
}
}
/// Add event
pub fn add_event(&mut self, event: HealEvent) {
if self.events.len() >= self.max_events {
// Remove oldest event
self.events.remove(0);
}
self.events.push(event);
}
/// Get all events
pub fn get_events(&self) -> &[HealEvent] {
&self.events
}
/// Clear events
pub fn clear_events(&mut self) {
self.events.clear();
}
/// Get event count
pub fn event_count(&self) -> usize {
self.events.len()
}
/// Filter events by severity
pub fn filter_by_severity(&self, min_severity: Severity) -> Vec<&HealEvent> {
self.events.iter().filter(|event| event.severity() >= min_severity).collect()
}
/// Filter events by type
pub fn filter_by_type(&self, event_type: &str) -> Vec<&HealEvent> {
self.events
.iter()
.filter(|event| match event {
HealEvent::ObjectCorruption { .. } => event_type == "ObjectCorruption",
HealEvent::ObjectMissing { .. } => event_type == "ObjectMissing",
HealEvent::MetadataCorruption { .. } => event_type == "MetadataCorruption",
HealEvent::DiskStatusChange { .. } => event_type == "DiskStatusChange",
HealEvent::ECDecodeFailure { .. } => event_type == "ECDecodeFailure",
HealEvent::ChecksumMismatch { .. } => event_type == "ChecksumMismatch",
HealEvent::BucketMetadataCorruption { .. } => event_type == "BucketMetadataCorruption",
HealEvent::MRFMetadataCorruption { .. } => event_type == "MRFMetadataCorruption",
})
.collect()
}
}
impl Default for HealEventHandler {
fn default() -> Self {
Self::new(1000)
}
}

View File

@@ -0,0 +1,422 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::heal::{
progress::{HealProgress, HealStatistics},
storage::HealStorageAPI,
task::{HealOptions, HealPriority, HealRequest, HealTask, HealTaskStatus, HealType},
};
use rustfs_ecstore::disk::DiskAPI;
use rustfs_ecstore::disk::error::DiskError;
use rustfs_ecstore::global::GLOBAL_LOCAL_DISK_MAP;
use std::{
collections::{HashMap, VecDeque},
sync::Arc,
time::{Duration, SystemTime},
};
use tokio::{
sync::{Mutex, RwLock},
time::interval,
};
use tokio_util::sync::CancellationToken;
use tracing::{error, info, warn};
/// Heal config
#[derive(Debug, Clone)]
pub struct HealConfig {
/// Whether to enable auto heal
pub enable_auto_heal: bool,
/// Heal interval
pub heal_interval: Duration,
/// Maximum concurrent heal tasks
pub max_concurrent_heals: usize,
/// Task timeout
pub task_timeout: Duration,
/// Queue size
pub queue_size: usize,
}
impl Default for HealConfig {
fn default() -> Self {
Self {
enable_auto_heal: true,
heal_interval: Duration::from_secs(10), // 10 seconds
max_concurrent_heals: 4,
task_timeout: Duration::from_secs(300), // 5 minutes
queue_size: 1000,
}
}
}
/// Heal state
#[derive(Debug, Default)]
pub struct HealState {
/// Whether running
pub is_running: bool,
/// Current heal cycle
pub current_cycle: u64,
/// Last heal time
pub last_heal_time: Option<SystemTime>,
/// Total healed objects
pub total_healed_objects: u64,
/// Total heal failures
pub total_heal_failures: u64,
/// Current active heal tasks
pub active_heal_count: usize,
}
/// Heal manager
pub struct HealManager {
/// Heal config
config: Arc<RwLock<HealConfig>>,
/// Heal state
state: Arc<RwLock<HealState>>,
/// Active heal tasks
active_heals: Arc<Mutex<HashMap<String, Arc<HealTask>>>>,
/// Heal queue
heal_queue: Arc<Mutex<VecDeque<HealRequest>>>,
/// Storage layer interface
storage: Arc<dyn HealStorageAPI>,
/// Cancel token
cancel_token: CancellationToken,
/// Statistics
statistics: Arc<RwLock<HealStatistics>>,
}
impl HealManager {
/// Create new HealManager
pub fn new(storage: Arc<dyn HealStorageAPI>, config: Option<HealConfig>) -> Self {
let config = config.unwrap_or_default();
Self {
config: Arc::new(RwLock::new(config)),
state: Arc::new(RwLock::new(HealState::default())),
active_heals: Arc::new(Mutex::new(HashMap::new())),
heal_queue: Arc::new(Mutex::new(VecDeque::new())),
storage,
cancel_token: CancellationToken::new(),
statistics: Arc::new(RwLock::new(HealStatistics::new())),
}
}
/// Start HealManager
pub async fn start(&self) -> Result<()> {
let mut state = self.state.write().await;
if state.is_running {
warn!("HealManager is already running");
return Ok(());
}
state.is_running = true;
drop(state);
info!("Starting HealManager");
// start scheduler
self.start_scheduler().await?;
// start auto disk scanner
self.start_auto_disk_scanner().await?;
info!("HealManager started successfully");
Ok(())
}
/// Stop HealManager
pub async fn stop(&self) -> Result<()> {
info!("Stopping HealManager");
// cancel all tasks
self.cancel_token.cancel();
// wait for all tasks to complete
let mut active_heals = self.active_heals.lock().await;
for task in active_heals.values() {
if let Err(e) = task.cancel().await {
warn!("Failed to cancel task {}: {}", task.id, e);
}
}
active_heals.clear();
// update state
let mut state = self.state.write().await;
state.is_running = false;
info!("HealManager stopped successfully");
Ok(())
}
/// Submit heal request
pub async fn submit_heal_request(&self, request: HealRequest) -> Result<String> {
let config = self.config.read().await;
let mut queue = self.heal_queue.lock().await;
if queue.len() >= config.queue_size {
return Err(Error::ConfigurationError {
message: "Heal queue is full".to_string(),
});
}
let request_id = request.id.clone();
queue.push_back(request);
drop(queue);
info!("Submitted heal request: {}", request_id);
Ok(request_id)
}
/// Get task status
pub async fn get_task_status(&self, task_id: &str) -> Result<HealTaskStatus> {
let active_heals = self.active_heals.lock().await;
if let Some(task) = active_heals.get(task_id) {
Ok(task.get_status().await)
} else {
Err(Error::TaskNotFound {
task_id: task_id.to_string(),
})
}
}
/// Get task progress
pub async fn get_active_tasks_count(&self) -> usize {
self.active_heals.lock().await.len()
}
pub async fn get_task_progress(&self, task_id: &str) -> Result<HealProgress> {
let active_heals = self.active_heals.lock().await;
if let Some(task) = active_heals.get(task_id) {
Ok(task.get_progress().await)
} else {
Err(Error::TaskNotFound {
task_id: task_id.to_string(),
})
}
}
/// Cancel task
pub async fn cancel_task(&self, task_id: &str) -> Result<()> {
let mut active_heals = self.active_heals.lock().await;
if let Some(task) = active_heals.get(task_id) {
task.cancel().await?;
active_heals.remove(task_id);
info!("Cancelled heal task: {}", task_id);
Ok(())
} else {
Err(Error::TaskNotFound {
task_id: task_id.to_string(),
})
}
}
/// Get statistics
pub async fn get_statistics(&self) -> HealStatistics {
self.statistics.read().await.clone()
}
/// Get active task count
pub async fn get_active_task_count(&self) -> usize {
let active_heals = self.active_heals.lock().await;
active_heals.len()
}
/// Get queue length
pub async fn get_queue_length(&self) -> usize {
let queue = self.heal_queue.lock().await;
queue.len()
}
/// Start scheduler
async fn start_scheduler(&self) -> Result<()> {
let config = self.config.clone();
let heal_queue = self.heal_queue.clone();
let active_heals = self.active_heals.clone();
let cancel_token = self.cancel_token.clone();
let statistics = self.statistics.clone();
let storage = self.storage.clone();
tokio::spawn(async move {
let mut interval = interval(config.read().await.heal_interval);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("Heal scheduler received shutdown signal");
break;
}
_ = interval.tick() => {
Self::process_heal_queue(&heal_queue, &active_heals, &config, &statistics, &storage).await;
}
}
}
});
Ok(())
}
/// Start background task to auto scan local disks and enqueue erasure set heal requests
async fn start_auto_disk_scanner(&self) -> Result<()> {
let config = self.config.clone();
let heal_queue = self.heal_queue.clone();
let active_heals = self.active_heals.clone();
let cancel_token = self.cancel_token.clone();
let storage = self.storage.clone();
tokio::spawn(async move {
let mut interval = interval(config.read().await.heal_interval);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("Auto disk scanner received shutdown signal");
break;
}
_ = interval.tick() => {
// Build list of endpoints that need healing
let mut endpoints = Vec::new();
for (_, disk_opt) in GLOBAL_LOCAL_DISK_MAP.read().await.iter() {
if let Some(disk) = disk_opt {
// detect unformatted disk via get_disk_id()
if let Err(err) = disk.get_disk_id().await {
if err == DiskError::UnformattedDisk {
endpoints.push(disk.endpoint());
continue;
}
}
}
}
if endpoints.is_empty() {
continue;
}
// Get bucket list for erasure set healing
let buckets = match storage.list_buckets().await {
Ok(buckets) => buckets.iter().map(|b| b.name.clone()).collect::<Vec<String>>(),
Err(e) => {
error!("Failed to get bucket list for auto healing: {}", e);
continue;
}
};
// Create erasure set heal requests for each endpoint
for ep in endpoints {
// skip if already queued or healing
let mut skip = false;
{
let queue = heal_queue.lock().await;
if queue.iter().any(|req| matches!(&req.heal_type, crate::heal::task::HealType::ErasureSet { set_disk_id, .. } if set_disk_id == &format!("{}_{}", ep.pool_idx, ep.set_idx))) {
skip = true;
}
}
if !skip {
let active = active_heals.lock().await;
if active.values().any(|task| matches!(&task.heal_type, crate::heal::task::HealType::ErasureSet { set_disk_id, .. } if set_disk_id == &format!("{}_{}", ep.pool_idx, ep.set_idx))) {
skip = true;
}
}
if skip {
continue;
}
// enqueue erasure set heal request for this disk
let set_disk_id = format!("pool_{}_set_{}", ep.pool_idx, ep.set_idx);
let req = HealRequest::new(
HealType::ErasureSet {
buckets: buckets.clone(),
set_disk_id: set_disk_id.clone()
},
HealOptions::default(),
HealPriority::Normal,
);
let mut queue = heal_queue.lock().await;
queue.push_back(req);
info!("Enqueued auto erasure set heal for endpoint: {} (set_disk_id: {})", ep, set_disk_id);
}
}
}
}
});
Ok(())
}
/// Process heal queue
async fn process_heal_queue(
heal_queue: &Arc<Mutex<VecDeque<HealRequest>>>,
active_heals: &Arc<Mutex<HashMap<String, Arc<HealTask>>>>,
config: &Arc<RwLock<HealConfig>>,
statistics: &Arc<RwLock<HealStatistics>>,
storage: &Arc<dyn HealStorageAPI>,
) {
let config = config.read().await;
let mut active_heals_guard = active_heals.lock().await;
// check if new heal tasks can be started
if active_heals_guard.len() >= config.max_concurrent_heals {
return;
}
let mut queue = heal_queue.lock().await;
if let Some(request) = queue.pop_front() {
let task = Arc::new(HealTask::from_request(request, storage.clone()));
let task_id = task.id.clone();
active_heals_guard.insert(task_id.clone(), task.clone());
drop(active_heals_guard);
let active_heals_clone = active_heals.clone();
let statistics_clone = statistics.clone();
// start heal task
tokio::spawn(async move {
info!("Starting heal task: {}", task_id);
let result = task.execute().await;
match result {
Ok(_) => {
info!("Heal task completed successfully: {}", task_id);
}
Err(e) => {
error!("Heal task failed: {} - {}", task_id, e);
}
}
let mut active_heals_guard = active_heals_clone.lock().await;
if let Some(completed_task) = active_heals_guard.remove(&task_id) {
// update statistics
let mut stats = statistics_clone.write().await;
match completed_task.get_status().await {
HealTaskStatus::Completed => {
stats.update_task_completion(true);
}
_ => {
stats.update_task_completion(false);
}
}
stats.update_running_tasks(active_heals_guard.len() as u64);
}
});
// update statistics
let mut stats = statistics.write().await;
stats.total_tasks += 1;
}
}
}
impl std::fmt::Debug for HealManager {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HealManager")
.field("config", &"<config>")
.field("state", &"<state>")
.field("active_heals_count", &"<active_heals>")
.field("queue_length", &"<queue>")
.finish()
}
}

View File

@@ -12,8 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub const ERR_IGNORE_FILE_CONTRIB: &str = "ignore this file's contribution toward data-usage";
pub const ERR_SKIP_FILE: &str = "skip this file";
pub const ERR_HEAL_STOP_SIGNALLED: &str = "heal stop signaled";
pub const ERR_HEAL_IDLE_TIMEOUT: &str = "healing results were not consumed for too long";
pub const ERR_RETRY_HEALING: &str = "some items failed to heal, we will retry healing this drive again";
pub mod channel;
pub mod erasure_healer;
pub mod event;
pub mod manager;
pub mod progress;
pub mod resume;
pub mod storage;
pub mod task;
pub use erasure_healer::ErasureSetHealer;
pub use manager::HealManager;
pub use resume::{CheckpointManager, ResumeCheckpoint, ResumeManager, ResumeState, ResumeUtils};
pub use task::{HealOptions, HealPriority, HealRequest, HealTask, HealType};

View File

@@ -0,0 +1,148 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use std::time::SystemTime;
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct HealProgress {
/// Objects scanned
pub objects_scanned: u64,
/// Objects healed
pub objects_healed: u64,
/// Objects failed
pub objects_failed: u64,
/// Bytes processed
pub bytes_processed: u64,
/// Current object
pub current_object: Option<String>,
/// Progress percentage
pub progress_percentage: f64,
/// Start time
pub start_time: Option<SystemTime>,
/// Last update time
pub last_update_time: Option<SystemTime>,
/// Estimated completion time
pub estimated_completion_time: Option<SystemTime>,
}
impl HealProgress {
pub fn new() -> Self {
Self {
start_time: Some(SystemTime::now()),
last_update_time: Some(SystemTime::now()),
..Default::default()
}
}
pub fn update_progress(&mut self, scanned: u64, healed: u64, failed: u64, bytes: u64) {
self.objects_scanned = scanned;
self.objects_healed = healed;
self.objects_failed = failed;
self.bytes_processed = bytes;
self.last_update_time = Some(SystemTime::now());
// calculate progress percentage
let total = scanned + healed + failed;
if total > 0 {
self.progress_percentage = (healed as f64 / total as f64) * 100.0;
}
}
pub fn set_current_object(&mut self, object: Option<String>) {
self.current_object = object;
self.last_update_time = Some(SystemTime::now());
}
pub fn is_completed(&self) -> bool {
self.progress_percentage >= 100.0
|| self.objects_scanned > 0 && self.objects_healed + self.objects_failed >= self.objects_scanned
}
pub fn get_success_rate(&self) -> f64 {
let total = self.objects_healed + self.objects_failed;
if total > 0 {
(self.objects_healed as f64 / total as f64) * 100.0
} else {
0.0
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealStatistics {
/// Total heal tasks
pub total_tasks: u64,
/// Successful tasks
pub successful_tasks: u64,
/// Failed tasks
pub failed_tasks: u64,
/// Running tasks
pub running_tasks: u64,
/// Total healed objects
pub total_objects_healed: u64,
/// Total healed bytes
pub total_bytes_healed: u64,
/// Last update time
pub last_update_time: SystemTime,
}
impl Default for HealStatistics {
fn default() -> Self {
Self::new()
}
}
impl HealStatistics {
pub fn new() -> Self {
Self {
total_tasks: 0,
successful_tasks: 0,
failed_tasks: 0,
running_tasks: 0,
total_objects_healed: 0,
total_bytes_healed: 0,
last_update_time: SystemTime::now(),
}
}
pub fn update_task_completion(&mut self, success: bool) {
if success {
self.successful_tasks += 1;
} else {
self.failed_tasks += 1;
}
self.last_update_time = SystemTime::now();
}
pub fn update_running_tasks(&mut self, count: u64) {
self.running_tasks = count;
self.last_update_time = SystemTime::now();
}
pub fn add_healed_objects(&mut self, count: u64, bytes: u64) {
self.total_objects_healed += count;
self.total_bytes_healed += bytes;
self.last_update_time = SystemTime::now();
}
pub fn get_success_rate(&self) -> f64 {
let total = self.successful_tasks + self.failed_tasks;
if total > 0 {
(self.successful_tasks as f64 / total as f64) * 100.0
} else {
0.0
}
}
}

View File

@@ -0,0 +1,696 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use rustfs_ecstore::disk::{BUCKET_META_PREFIX, DiskAPI, DiskStore, RUSTFS_META_BUCKET};
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
use uuid::Uuid;
/// resume state file constants
const RESUME_STATE_FILE: &str = "ahm_resume_state.json";
const RESUME_PROGRESS_FILE: &str = "ahm_progress.json";
const RESUME_CHECKPOINT_FILE: &str = "ahm_checkpoint.json";
/// resume state
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResumeState {
/// task id
pub task_id: String,
/// task type
pub task_type: String,
/// start time
pub start_time: u64,
/// last update time
pub last_update: u64,
/// completed
pub completed: bool,
/// total objects
pub total_objects: u64,
/// processed objects
pub processed_objects: u64,
/// successful objects
pub successful_objects: u64,
/// failed objects
pub failed_objects: u64,
/// skipped objects
pub skipped_objects: u64,
/// current bucket
pub current_bucket: Option<String>,
/// current object
pub current_object: Option<String>,
/// completed buckets
pub completed_buckets: Vec<String>,
/// pending buckets
pub pending_buckets: Vec<String>,
/// error message
pub error_message: Option<String>,
/// retry count
pub retry_count: u32,
/// max retries
pub max_retries: u32,
}
impl ResumeState {
pub fn new(task_id: String, task_type: String, buckets: Vec<String>) -> Self {
Self {
task_id,
task_type,
start_time: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
last_update: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
completed: false,
total_objects: 0,
processed_objects: 0,
successful_objects: 0,
failed_objects: 0,
skipped_objects: 0,
current_bucket: None,
current_object: None,
completed_buckets: Vec::new(),
pending_buckets: buckets,
error_message: None,
retry_count: 0,
max_retries: 3,
}
}
pub fn update_progress(&mut self, processed: u64, successful: u64, failed: u64, skipped: u64) {
self.processed_objects = processed;
self.successful_objects = successful;
self.failed_objects = failed;
self.skipped_objects = skipped;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn set_current_item(&mut self, bucket: Option<String>, object: Option<String>) {
self.current_bucket = bucket;
self.current_object = object;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn complete_bucket(&mut self, bucket: &str) {
if !self.completed_buckets.contains(&bucket.to_string()) {
self.completed_buckets.push(bucket.to_string());
}
if let Some(pos) = self.pending_buckets.iter().position(|b| b == bucket) {
self.pending_buckets.remove(pos);
}
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn mark_completed(&mut self) {
self.completed = true;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn set_error(&mut self, error: String) {
self.error_message = Some(error);
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn increment_retry(&mut self) {
self.retry_count += 1;
self.last_update = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn can_retry(&self) -> bool {
self.retry_count < self.max_retries
}
pub fn get_progress_percentage(&self) -> f64 {
if self.total_objects == 0 {
return 0.0;
}
(self.processed_objects as f64 / self.total_objects as f64) * 100.0
}
pub fn get_success_rate(&self) -> f64 {
let total = self.successful_objects + self.failed_objects;
if total == 0 {
return 0.0;
}
(self.successful_objects as f64 / total as f64) * 100.0
}
}
/// resume manager
pub struct ResumeManager {
disk: DiskStore,
state: Arc<RwLock<ResumeState>>,
}
impl ResumeManager {
/// create new resume manager
pub async fn new(disk: DiskStore, task_id: String, task_type: String, buckets: Vec<String>) -> Result<Self> {
let state = ResumeState::new(task_id, task_type, buckets);
let manager = Self {
disk,
state: Arc::new(RwLock::new(state)),
};
// save initial state
manager.save_state().await?;
Ok(manager)
}
/// load resume state from disk
pub async fn load_from_disk(disk: DiskStore, task_id: &str) -> Result<Self> {
let state_data = Self::read_state_file(&disk, task_id).await?;
let state: ResumeState = serde_json::from_slice(&state_data).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to deserialize resume state: {e}"),
})?;
Ok(Self {
disk,
state: Arc::new(RwLock::new(state)),
})
}
/// check if resume state exists
pub async fn has_resume_state(disk: &DiskStore, task_id: &str) -> bool {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_STATE_FILE}"));
match disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap()).await {
Ok(data) => !data.is_empty(),
Err(_) => false,
}
}
/// get current state
pub async fn get_state(&self) -> ResumeState {
self.state.read().await.clone()
}
/// update progress
pub async fn update_progress(&self, processed: u64, successful: u64, failed: u64, skipped: u64) -> Result<()> {
let mut state = self.state.write().await;
state.update_progress(processed, successful, failed, skipped);
drop(state);
self.save_state().await
}
/// set current item
pub async fn set_current_item(&self, bucket: Option<String>, object: Option<String>) -> Result<()> {
let mut state = self.state.write().await;
state.set_current_item(bucket, object);
drop(state);
self.save_state().await
}
/// complete bucket
pub async fn complete_bucket(&self, bucket: &str) -> Result<()> {
let mut state = self.state.write().await;
state.complete_bucket(bucket);
drop(state);
self.save_state().await
}
/// mark task completed
pub async fn mark_completed(&self) -> Result<()> {
let mut state = self.state.write().await;
state.mark_completed();
drop(state);
self.save_state().await
}
/// set error message
pub async fn set_error(&self, error: String) -> Result<()> {
let mut state = self.state.write().await;
state.set_error(error);
drop(state);
self.save_state().await
}
/// increment retry count
pub async fn increment_retry(&self) -> Result<()> {
let mut state = self.state.write().await;
state.increment_retry();
drop(state);
self.save_state().await
}
/// cleanup resume state
pub async fn cleanup(&self) -> Result<()> {
let state = self.state.read().await;
let task_id = &state.task_id;
// delete state files
let state_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_STATE_FILE}"));
let progress_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_PROGRESS_FILE}"));
let checkpoint_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
// ignore delete errors, files may not exist
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, state_file.to_str().unwrap(), Default::default())
.await;
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, progress_file.to_str().unwrap(), Default::default())
.await;
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, checkpoint_file.to_str().unwrap(), Default::default())
.await;
info!("Cleaned up resume state for task: {}", task_id);
Ok(())
}
/// save state to disk
async fn save_state(&self) -> Result<()> {
let state = self.state.read().await;
let state_data = serde_json::to_vec(&*state).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to serialize resume state: {e}"),
})?;
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{}_{}", state.task_id, RESUME_STATE_FILE));
self.disk
.write_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap(), state_data.into())
.await
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to save resume state: {e}"),
})?;
debug!("Saved resume state for task: {}", state.task_id);
Ok(())
}
/// read state file from disk
async fn read_state_file(disk: &DiskStore, task_id: &str) -> Result<Vec<u8>> {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_STATE_FILE}"));
disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap())
.await
.map(|bytes| bytes.to_vec())
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to read resume state file: {e}"),
})
}
}
/// resume checkpoint
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResumeCheckpoint {
/// task id
pub task_id: String,
/// checkpoint time
pub checkpoint_time: u64,
/// current bucket index
pub current_bucket_index: usize,
/// current object index
pub current_object_index: usize,
/// processed objects
pub processed_objects: Vec<String>,
/// failed objects
pub failed_objects: Vec<String>,
/// skipped objects
pub skipped_objects: Vec<String>,
}
impl ResumeCheckpoint {
pub fn new(task_id: String) -> Self {
Self {
task_id,
checkpoint_time: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
current_bucket_index: 0,
current_object_index: 0,
processed_objects: Vec::new(),
failed_objects: Vec::new(),
skipped_objects: Vec::new(),
}
}
pub fn update_position(&mut self, bucket_index: usize, object_index: usize) {
self.current_bucket_index = bucket_index;
self.current_object_index = object_index;
self.checkpoint_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
}
pub fn add_processed_object(&mut self, object: String) {
if !self.processed_objects.contains(&object) {
self.processed_objects.push(object);
}
}
pub fn add_failed_object(&mut self, object: String) {
if !self.failed_objects.contains(&object) {
self.failed_objects.push(object);
}
}
pub fn add_skipped_object(&mut self, object: String) {
if !self.skipped_objects.contains(&object) {
self.skipped_objects.push(object);
}
}
}
/// resume checkpoint manager
pub struct CheckpointManager {
disk: DiskStore,
checkpoint: Arc<RwLock<ResumeCheckpoint>>,
}
impl CheckpointManager {
/// create new checkpoint manager
pub async fn new(disk: DiskStore, task_id: String) -> Result<Self> {
let checkpoint = ResumeCheckpoint::new(task_id);
let manager = Self {
disk,
checkpoint: Arc::new(RwLock::new(checkpoint)),
};
// save initial checkpoint
manager.save_checkpoint().await?;
Ok(manager)
}
/// load checkpoint from disk
pub async fn load_from_disk(disk: DiskStore, task_id: &str) -> Result<Self> {
let checkpoint_data = Self::read_checkpoint_file(&disk, task_id).await?;
let checkpoint: ResumeCheckpoint = serde_json::from_slice(&checkpoint_data).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to deserialize checkpoint: {e}"),
})?;
Ok(Self {
disk,
checkpoint: Arc::new(RwLock::new(checkpoint)),
})
}
/// check if checkpoint exists
pub async fn has_checkpoint(disk: &DiskStore, task_id: &str) -> bool {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
match disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap()).await {
Ok(data) => !data.is_empty(),
Err(_) => false,
}
}
/// get current checkpoint
pub async fn get_checkpoint(&self) -> ResumeCheckpoint {
self.checkpoint.read().await.clone()
}
/// update position
pub async fn update_position(&self, bucket_index: usize, object_index: usize) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.update_position(bucket_index, object_index);
drop(checkpoint);
self.save_checkpoint().await
}
/// add processed object
pub async fn add_processed_object(&self, object: String) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.add_processed_object(object);
drop(checkpoint);
self.save_checkpoint().await
}
/// add failed object
pub async fn add_failed_object(&self, object: String) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.add_failed_object(object);
drop(checkpoint);
self.save_checkpoint().await
}
/// add skipped object
pub async fn add_skipped_object(&self, object: String) -> Result<()> {
let mut checkpoint = self.checkpoint.write().await;
checkpoint.add_skipped_object(object);
drop(checkpoint);
self.save_checkpoint().await
}
/// cleanup checkpoint
pub async fn cleanup(&self) -> Result<()> {
let checkpoint = self.checkpoint.read().await;
let task_id = &checkpoint.task_id;
let checkpoint_file = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
let _ = self
.disk
.delete(RUSTFS_META_BUCKET, checkpoint_file.to_str().unwrap(), Default::default())
.await;
info!("Cleaned up checkpoint for task: {}", task_id);
Ok(())
}
/// save checkpoint to disk
async fn save_checkpoint(&self) -> Result<()> {
let checkpoint = self.checkpoint.read().await;
let checkpoint_data = serde_json::to_vec(&*checkpoint).map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to serialize checkpoint: {e}"),
})?;
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{}_{}", checkpoint.task_id, RESUME_CHECKPOINT_FILE));
self.disk
.write_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap(), checkpoint_data.into())
.await
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to save checkpoint: {e}"),
})?;
debug!("Saved checkpoint for task: {}", checkpoint.task_id);
Ok(())
}
/// read checkpoint file from disk
async fn read_checkpoint_file(disk: &DiskStore, task_id: &str) -> Result<Vec<u8>> {
let file_path = Path::new(BUCKET_META_PREFIX).join(format!("{task_id}_{RESUME_CHECKPOINT_FILE}"));
disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap())
.await
.map(|bytes| bytes.to_vec())
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to read checkpoint file: {e}"),
})
}
}
/// resume utils
pub struct ResumeUtils;
impl ResumeUtils {
/// generate unique task id
pub fn generate_task_id() -> String {
Uuid::new_v4().to_string()
}
/// check if task can be resumed
pub async fn can_resume_task(disk: &DiskStore, task_id: &str) -> bool {
ResumeManager::has_resume_state(disk, task_id).await
}
/// get all resumable task ids
pub async fn get_resumable_tasks(disk: &DiskStore) -> Result<Vec<String>> {
// List all files in the buckets metadata directory
let entries = match disk.list_dir("", RUSTFS_META_BUCKET, BUCKET_META_PREFIX, -1).await {
Ok(entries) => entries,
Err(e) => {
debug!("Failed to list resume state files: {}", e);
return Ok(Vec::new());
}
};
let mut task_ids = Vec::new();
// Filter files that end with ahm_resume_state.json and extract task IDs
for entry in entries {
if entry.ends_with(&format!("_{RESUME_STATE_FILE}")) {
// Extract task ID from filename: {task_id}_ahm_resume_state.json
if let Some(task_id) = entry.strip_suffix(&format!("_{RESUME_STATE_FILE}")) {
if !task_id.is_empty() {
task_ids.push(task_id.to_string());
}
}
}
}
debug!("Found {} resumable tasks: {:?}", task_ids.len(), task_ids);
Ok(task_ids)
}
/// cleanup expired resume states
pub async fn cleanup_expired_states(disk: &DiskStore, max_age_hours: u64) -> Result<()> {
let task_ids = Self::get_resumable_tasks(disk).await?;
let current_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
for task_id in task_ids {
if let Ok(resume_manager) = ResumeManager::load_from_disk(disk.clone(), &task_id).await {
let state = resume_manager.get_state().await;
let age_hours = (current_time - state.last_update) / 3600;
if age_hours > max_age_hours {
info!("Cleaning up expired resume state for task: {} (age: {} hours)", task_id, age_hours);
if let Err(e) = resume_manager.cleanup().await {
warn!("Failed to cleanup expired resume state for task {}: {}", task_id, e);
}
}
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_resume_state_creation() {
let task_id = ResumeUtils::generate_task_id();
let buckets = vec!["bucket1".to_string(), "bucket2".to_string()];
let state = ResumeState::new(task_id.clone(), "erasure_set".to_string(), buckets);
assert_eq!(state.task_id, task_id);
assert_eq!(state.task_type, "erasure_set");
assert!(!state.completed);
assert_eq!(state.processed_objects, 0);
assert_eq!(state.pending_buckets.len(), 2);
}
#[tokio::test]
async fn test_resume_state_progress() {
let task_id = ResumeUtils::generate_task_id();
let buckets = vec!["bucket1".to_string()];
let mut state = ResumeState::new(task_id, "erasure_set".to_string(), buckets);
state.update_progress(10, 8, 1, 1);
assert_eq!(state.processed_objects, 10);
assert_eq!(state.successful_objects, 8);
assert_eq!(state.failed_objects, 1);
assert_eq!(state.skipped_objects, 1);
let progress = state.get_progress_percentage();
assert_eq!(progress, 0.0); // total_objects is 0
state.total_objects = 100;
let progress = state.get_progress_percentage();
assert_eq!(progress, 10.0);
}
#[tokio::test]
async fn test_resume_state_bucket_completion() {
let task_id = ResumeUtils::generate_task_id();
let buckets = vec!["bucket1".to_string(), "bucket2".to_string()];
let mut state = ResumeState::new(task_id, "erasure_set".to_string(), buckets);
assert_eq!(state.pending_buckets.len(), 2);
assert_eq!(state.completed_buckets.len(), 0);
state.complete_bucket("bucket1");
assert_eq!(state.pending_buckets.len(), 1);
assert_eq!(state.completed_buckets.len(), 1);
assert!(state.completed_buckets.contains(&"bucket1".to_string()));
}
#[tokio::test]
async fn test_resume_utils() {
let task_id1 = ResumeUtils::generate_task_id();
let task_id2 = ResumeUtils::generate_task_id();
assert_ne!(task_id1, task_id2);
assert_eq!(task_id1.len(), 36); // UUID length
assert_eq!(task_id2.len(), 36);
}
#[tokio::test]
async fn test_get_resumable_tasks_integration() {
use rustfs_ecstore::disk::{DiskOption, endpoint::Endpoint, new_disk};
use tempfile::TempDir;
// Create a temporary directory for testing
let temp_dir = TempDir::new().unwrap();
let disk_path = temp_dir.path().join("test_disk");
std::fs::create_dir_all(&disk_path).unwrap();
// Create a local disk for testing
let endpoint = Endpoint::try_from(disk_path.to_string_lossy().as_ref()).unwrap();
let disk_option = DiskOption {
cleanup: false,
health_check: false,
};
let disk = new_disk(&endpoint, &disk_option).await.unwrap();
// Create necessary directories first (ignore if already exist)
let _ = disk.make_volume(RUSTFS_META_BUCKET).await;
let _ = disk.make_volume(&format!("{RUSTFS_META_BUCKET}/{BUCKET_META_PREFIX}")).await;
// Create some test resume state files
let task_ids = vec![
"test-task-1".to_string(),
"test-task-2".to_string(),
"test-task-3".to_string(),
];
// Save resume state files for each task
for task_id in &task_ids {
let state = ResumeState::new(
task_id.clone(),
"erasure_set".to_string(),
vec!["bucket1".to_string(), "bucket2".to_string()],
);
let state_data = serde_json::to_vec(&state).unwrap();
let file_path = format!("{BUCKET_META_PREFIX}/{task_id}_{RESUME_STATE_FILE}");
disk.write_all(RUSTFS_META_BUCKET, &file_path, state_data.into())
.await
.unwrap();
}
// Also create some non-resume state files to test filtering
let non_resume_files = vec![
"other_file.txt",
"task4_ahm_checkpoint.json",
"task5_ahm_progress.json",
"_ahm_resume_state.json", // Invalid: empty task ID
];
for file_name in non_resume_files {
let file_path = format!("{BUCKET_META_PREFIX}/{file_name}");
disk.write_all(RUSTFS_META_BUCKET, &file_path, b"test data".to_vec().into())
.await
.unwrap();
}
// Now call get_resumable_tasks to see if it finds the correct files
let found_task_ids = ResumeUtils::get_resumable_tasks(&disk).await.unwrap();
// Verify that only the valid resume state files are found
assert_eq!(found_task_ids.len(), 3);
for task_id in &task_ids {
assert!(found_task_ids.contains(task_id), "Task ID {task_id} not found");
}
// Verify that invalid files are not included
assert!(!found_task_ids.contains(&"".to_string()));
assert!(!found_task_ids.contains(&"task4".to_string()));
assert!(!found_task_ids.contains(&"task5".to_string()));
// Clean up
temp_dir.close().unwrap();
}
}

View File

@@ -0,0 +1,506 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use async_trait::async_trait;
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::{
disk::{DiskStore, endpoint::Endpoint},
store::ECStore,
store_api::{BucketInfo, ObjectIO, StorageAPI},
};
use rustfs_madmin::heal_commands::HealResultItem;
use std::sync::Arc;
use tracing::{debug, error, info, warn};
/// Disk status for heal operations
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DiskStatus {
/// Ok
Ok,
/// Offline
Offline,
/// Corrupt
Corrupt,
/// Missing
Missing,
/// Permission denied
PermissionDenied,
/// Faulty
Faulty,
/// Root mount
RootMount,
/// Unknown
Unknown,
/// Unformatted
Unformatted,
}
/// Heal storage layer interface
#[async_trait]
pub trait HealStorageAPI: Send + Sync {
/// Get object meta
async fn get_object_meta(&self, bucket: &str, object: &str) -> Result<Option<rustfs_ecstore::store_api::ObjectInfo>>;
/// Get object data
async fn get_object_data(&self, bucket: &str, object: &str) -> Result<Option<Vec<u8>>>;
/// Put object data
async fn put_object_data(&self, bucket: &str, object: &str, data: &[u8]) -> Result<()>;
/// Delete object
async fn delete_object(&self, bucket: &str, object: &str) -> Result<()>;
/// Check object integrity
async fn verify_object_integrity(&self, bucket: &str, object: &str) -> Result<bool>;
/// EC decode rebuild
async fn ec_decode_rebuild(&self, bucket: &str, object: &str) -> Result<Vec<u8>>;
/// Get disk status
async fn get_disk_status(&self, endpoint: &Endpoint) -> Result<DiskStatus>;
/// Format disk
async fn format_disk(&self, endpoint: &Endpoint) -> Result<()>;
/// Get bucket info
async fn get_bucket_info(&self, bucket: &str) -> Result<Option<BucketInfo>>;
/// Fix bucket metadata
async fn heal_bucket_metadata(&self, bucket: &str) -> Result<()>;
/// Get all buckets
async fn list_buckets(&self) -> Result<Vec<BucketInfo>>;
/// Check object exists
async fn object_exists(&self, bucket: &str, object: &str) -> Result<bool>;
/// Get object size
async fn get_object_size(&self, bucket: &str, object: &str) -> Result<Option<u64>>;
/// Get object checksum
async fn get_object_checksum(&self, bucket: &str, object: &str) -> Result<Option<String>>;
/// Heal object using ecstore
async fn heal_object(
&self,
bucket: &str,
object: &str,
version_id: Option<&str>,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)>;
/// Heal bucket using ecstore
async fn heal_bucket(&self, bucket: &str, opts: &HealOpts) -> Result<HealResultItem>;
/// Heal format using ecstore
async fn heal_format(&self, dry_run: bool) -> Result<(HealResultItem, Option<Error>)>;
/// List objects for healing
async fn list_objects_for_heal(&self, bucket: &str, prefix: &str) -> Result<Vec<String>>;
/// Get disk for resume functionality
async fn get_disk_for_resume(&self, set_disk_id: &str) -> Result<DiskStore>;
}
/// ECStore Heal storage layer implementation
pub struct ECStoreHealStorage {
ecstore: Arc<ECStore>,
}
impl ECStoreHealStorage {
pub fn new(ecstore: Arc<ECStore>) -> Self {
Self { ecstore }
}
}
#[async_trait]
impl HealStorageAPI for ECStoreHealStorage {
async fn get_object_meta(&self, bucket: &str, object: &str) -> Result<Option<rustfs_ecstore::store_api::ObjectInfo>> {
debug!("Getting object meta: {}/{}", bucket, object);
match self.ecstore.get_object_info(bucket, object, &Default::default()).await {
Ok(info) => Ok(Some(info)),
Err(e) => {
error!("Failed to get object meta: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn get_object_data(&self, bucket: &str, object: &str) -> Result<Option<Vec<u8>>> {
debug!("Getting object data: {}/{}", bucket, object);
match (*self.ecstore)
.get_object_reader(bucket, object, None, Default::default(), &Default::default())
.await
{
Ok(mut reader) => match reader.read_all().await {
Ok(data) => Ok(Some(data)),
Err(e) => {
error!("Failed to read object data: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
},
Err(e) => {
error!("Failed to get object: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn put_object_data(&self, bucket: &str, object: &str, data: &[u8]) -> Result<()> {
debug!("Putting object data: {}/{} ({} bytes)", bucket, object, data.len());
let mut reader = rustfs_ecstore::store_api::PutObjReader::from_vec(data.to_vec());
match (*self.ecstore)
.put_object(bucket, object, &mut reader, &Default::default())
.await
{
Ok(_) => {
info!("Successfully put object: {}/{}", bucket, object);
Ok(())
}
Err(e) => {
error!("Failed to put object: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn delete_object(&self, bucket: &str, object: &str) -> Result<()> {
debug!("Deleting object: {}/{}", bucket, object);
match self.ecstore.delete_object(bucket, object, Default::default()).await {
Ok(_) => {
info!("Successfully deleted object: {}/{}", bucket, object);
Ok(())
}
Err(e) => {
error!("Failed to delete object: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn verify_object_integrity(&self, bucket: &str, object: &str) -> Result<bool> {
debug!("Verifying object integrity: {}/{}", bucket, object);
// Try to get object info and data to verify integrity
match self.get_object_meta(bucket, object).await? {
Some(obj_info) => {
// Check if object has valid metadata
if obj_info.size < 0 {
warn!("Object has invalid size: {}/{}", bucket, object);
return Ok(false);
}
// Try to read object data to verify it's accessible
match self.get_object_data(bucket, object).await {
Ok(Some(_)) => {
info!("Object integrity check passed: {}/{}", bucket, object);
Ok(true)
}
Ok(None) => {
warn!("Object data not found: {}/{}", bucket, object);
Ok(false)
}
Err(_) => {
warn!("Object data read failed: {}/{}", bucket, object);
Ok(false)
}
}
}
None => {
warn!("Object metadata not found: {}/{}", bucket, object);
Ok(false)
}
}
}
async fn ec_decode_rebuild(&self, bucket: &str, object: &str) -> Result<Vec<u8>> {
debug!("EC decode rebuild: {}/{}", bucket, object);
// Use ecstore's heal_object to rebuild the object
let heal_opts = HealOpts {
recursive: false,
dry_run: false,
remove: false,
recreate: true,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.heal_object(bucket, object, None, &heal_opts).await {
Ok((_result, error)) => {
if error.is_some() {
return Err(Error::TaskExecutionFailed {
message: format!("Heal failed: {error:?}"),
});
}
// After healing, try to read the object data
match self.get_object_data(bucket, object).await? {
Some(data) => {
info!("EC decode rebuild successful: {}/{} ({} bytes)", bucket, object, data.len());
Ok(data)
}
None => {
error!("Object not found after heal: {}/{}", bucket, object);
Err(Error::TaskExecutionFailed {
message: format!("Object not found after heal: {bucket}/{object}"),
})
}
}
}
Err(e) => {
error!("Heal operation failed: {}/{} - {}", bucket, object, e);
Err(e)
}
}
}
async fn get_disk_status(&self, endpoint: &Endpoint) -> Result<DiskStatus> {
debug!("Getting disk status: {:?}", endpoint);
// TODO: implement disk status check using ecstore
// For now, return Ok status
info!("Disk status check: {:?} - OK", endpoint);
Ok(DiskStatus::Ok)
}
async fn format_disk(&self, endpoint: &Endpoint) -> Result<()> {
debug!("Formatting disk: {:?}", endpoint);
// Use ecstore's heal_format
match self.heal_format(false).await {
Ok((_, error)) => {
if error.is_some() {
return Err(Error::other(format!("Format failed: {error:?}")));
}
info!("Successfully formatted disk: {:?}", endpoint);
Ok(())
}
Err(e) => {
error!("Failed to format disk: {:?} - {}", endpoint, e);
Err(e)
}
}
}
async fn get_bucket_info(&self, bucket: &str) -> Result<Option<BucketInfo>> {
debug!("Getting bucket info: {}", bucket);
match self.ecstore.get_bucket_info(bucket, &Default::default()).await {
Ok(info) => Ok(Some(info)),
Err(e) => {
error!("Failed to get bucket info: {} - {}", bucket, e);
Err(Error::other(e))
}
}
}
async fn heal_bucket_metadata(&self, bucket: &str) -> Result<()> {
debug!("Healing bucket metadata: {}", bucket);
let heal_opts = HealOpts {
recursive: true,
dry_run: false,
remove: false,
recreate: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
no_lock: false,
pool: None,
set: None,
};
match self.heal_bucket(bucket, &heal_opts).await {
Ok(_) => {
info!("Successfully healed bucket metadata: {}", bucket);
Ok(())
}
Err(e) => {
error!("Failed to heal bucket metadata: {} - {}", bucket, e);
Err(e)
}
}
}
async fn list_buckets(&self) -> Result<Vec<BucketInfo>> {
debug!("Listing buckets");
match self.ecstore.list_bucket(&Default::default()).await {
Ok(buckets) => Ok(buckets),
Err(e) => {
error!("Failed to list buckets: {}", e);
Err(Error::other(e))
}
}
}
async fn object_exists(&self, bucket: &str, object: &str) -> Result<bool> {
debug!("Checking object exists: {}/{}", bucket, object);
match self.get_object_meta(bucket, object).await {
Ok(Some(_)) => Ok(true),
Ok(None) => Ok(false),
Err(_) => Ok(false),
}
}
async fn get_object_size(&self, bucket: &str, object: &str) -> Result<Option<u64>> {
debug!("Getting object size: {}/{}", bucket, object);
match self.get_object_meta(bucket, object).await {
Ok(Some(obj_info)) => Ok(Some(obj_info.size as u64)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
async fn get_object_checksum(&self, bucket: &str, object: &str) -> Result<Option<String>> {
debug!("Getting object checksum: {}/{}", bucket, object);
match self.get_object_meta(bucket, object).await {
Ok(Some(obj_info)) => {
// Convert checksum bytes to hex string
let checksum = obj_info.checksum.iter().map(|b| format!("{b:02x}")).collect::<String>();
Ok(Some(checksum))
}
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
async fn heal_object(
&self,
bucket: &str,
object: &str,
version_id: Option<&str>,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)> {
debug!("Healing object: {}/{}", bucket, object);
let version_id_str = version_id.unwrap_or("");
match self.ecstore.heal_object(bucket, object, version_id_str, opts).await {
Ok((result, ecstore_error)) => {
let error = ecstore_error.map(Error::other);
info!("Heal object completed: {}/{} - result: {:?}, error: {:?}", bucket, object, result, error);
Ok((result, error))
}
Err(e) => {
error!("Heal object failed: {}/{} - {}", bucket, object, e);
Err(Error::other(e))
}
}
}
async fn heal_bucket(&self, bucket: &str, opts: &HealOpts) -> Result<HealResultItem> {
debug!("Healing bucket: {}", bucket);
match self.ecstore.heal_bucket(bucket, opts).await {
Ok(result) => {
info!("Heal bucket completed: {} - result: {:?}", bucket, result);
Ok(result)
}
Err(e) => {
error!("Heal bucket failed: {} - {}", bucket, e);
Err(Error::other(e))
}
}
}
async fn heal_format(&self, dry_run: bool) -> Result<(HealResultItem, Option<Error>)> {
debug!("Healing format (dry_run: {})", dry_run);
match self.ecstore.heal_format(dry_run).await {
Ok((result, ecstore_error)) => {
let error = ecstore_error.map(Error::other);
info!("Heal format completed - result: {:?}, error: {:?}", result, error);
Ok((result, error))
}
Err(e) => {
error!("Heal format failed: {}", e);
Err(Error::other(e))
}
}
}
async fn list_objects_for_heal(&self, bucket: &str, prefix: &str) -> Result<Vec<String>> {
debug!("Listing objects for heal: {}/{}", bucket, prefix);
// Use list_objects_v2 to get objects
match self
.ecstore
.clone()
.list_objects_v2(bucket, prefix, None, None, 1000, false, None)
.await
{
Ok(list_info) => {
let objects: Vec<String> = list_info.objects.into_iter().map(|obj| obj.name).collect();
info!("Found {} objects for heal in {}/{}", objects.len(), bucket, prefix);
Ok(objects)
}
Err(e) => {
error!("Failed to list objects for heal: {}/{} - {}", bucket, prefix, e);
Err(Error::other(e))
}
}
}
async fn get_disk_for_resume(&self, set_disk_id: &str) -> Result<DiskStore> {
debug!("Getting disk for resume: {}", set_disk_id);
// Parse set_disk_id to extract pool and set indices
// Format: "pool_{pool_idx}_set_{set_idx}"
let parts: Vec<&str> = set_disk_id.split('_').collect();
if parts.len() != 4 || parts[0] != "pool" || parts[2] != "set" {
return Err(Error::TaskExecutionFailed {
message: format!("Invalid set_disk_id format: {set_disk_id}"),
});
}
let pool_idx: usize = parts[1].parse().map_err(|_| Error::TaskExecutionFailed {
message: format!("Invalid pool index in set_disk_id: {set_disk_id}"),
})?;
let set_idx: usize = parts[3].parse().map_err(|_| Error::TaskExecutionFailed {
message: format!("Invalid set index in set_disk_id: {set_disk_id}"),
})?;
// Get the first available disk from the set
let disks = self
.ecstore
.get_disks(pool_idx, set_idx)
.await
.map_err(|e| Error::TaskExecutionFailed {
message: format!("Failed to get disks for pool {pool_idx} set {set_idx}: {e}"),
})?;
// Find the first available disk
if let Some(disk_store) = disks.into_iter().flatten().next() {
info!("Found disk for resume: {:?}", disk_store);
return Ok(disk_store);
}
Err(Error::TaskExecutionFailed {
message: format!("No available disk found for set_disk_id: {set_disk_id}"),
})
}
}

855
crates/ahm/src/heal/task.rs Normal file
View File

@@ -0,0 +1,855 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::heal::ErasureSetHealer;
use crate::heal::{progress::HealProgress, storage::HealStorageAPI};
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use tokio::sync::RwLock;
use tracing::{error, info, warn};
use uuid::Uuid;
/// Heal type
#[derive(Debug, Clone)]
pub enum HealType {
/// Object heal
Object {
bucket: String,
object: String,
version_id: Option<String>,
},
/// Bucket heal
Bucket { bucket: String },
/// Erasure Set heal (includes disk format repair)
ErasureSet { buckets: Vec<String>, set_disk_id: String },
/// Metadata heal
Metadata { bucket: String, object: String },
/// MRF heal
MRF { meta_path: String },
/// EC decode heal
ECDecode {
bucket: String,
object: String,
version_id: Option<String>,
},
}
/// Heal priority
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum HealPriority {
/// Low priority
Low = 0,
/// Normal priority
Normal = 1,
/// High priority
High = 2,
/// Urgent priority
Urgent = 3,
}
impl Default for HealPriority {
fn default() -> Self {
Self::Normal
}
}
/// Heal options
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealOptions {
/// Scan mode
pub scan_mode: HealScanMode,
/// Whether to remove corrupted data
pub remove_corrupted: bool,
/// Whether to recreate
pub recreate_missing: bool,
/// Whether to update parity
pub update_parity: bool,
/// Whether to recursively process
pub recursive: bool,
/// Whether to dry run
pub dry_run: bool,
/// Timeout
pub timeout: Option<Duration>,
/// pool index
pub pool_index: Option<usize>,
/// set index
pub set_index: Option<usize>,
}
impl Default for HealOptions {
fn default() -> Self {
Self {
scan_mode: HealScanMode::Normal,
remove_corrupted: false,
recreate_missing: true,
update_parity: true,
recursive: false,
dry_run: false,
timeout: Some(Duration::from_secs(300)), // 5 minutes default timeout
pool_index: None,
set_index: None,
}
}
}
/// Heal task status
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum HealTaskStatus {
/// Pending
Pending,
/// Running
Running,
/// Completed
Completed,
/// Failed
Failed { error: String },
/// Cancelled
Cancelled,
/// Timeout
Timeout,
}
/// Heal request
#[derive(Debug, Clone)]
pub struct HealRequest {
/// Request ID
pub id: String,
/// Heal type
pub heal_type: HealType,
/// Heal options
pub options: HealOptions,
/// Priority
pub priority: HealPriority,
/// Created time
pub created_at: SystemTime,
}
impl HealRequest {
pub fn new(heal_type: HealType, options: HealOptions, priority: HealPriority) -> Self {
Self {
id: Uuid::new_v4().to_string(),
heal_type,
options,
priority,
created_at: SystemTime::now(),
}
}
pub fn object(bucket: String, object: String, version_id: Option<String>) -> Self {
Self::new(
HealType::Object {
bucket,
object,
version_id,
},
HealOptions::default(),
HealPriority::Normal,
)
}
pub fn bucket(bucket: String) -> Self {
Self::new(HealType::Bucket { bucket }, HealOptions::default(), HealPriority::Normal)
}
pub fn metadata(bucket: String, object: String) -> Self {
Self::new(HealType::Metadata { bucket, object }, HealOptions::default(), HealPriority::High)
}
pub fn ec_decode(bucket: String, object: String, version_id: Option<String>) -> Self {
Self::new(
HealType::ECDecode {
bucket,
object,
version_id,
},
HealOptions::default(),
HealPriority::Urgent,
)
}
}
/// Heal task
pub struct HealTask {
/// Task ID
pub id: String,
/// Heal type
pub heal_type: HealType,
/// Heal options
pub options: HealOptions,
/// Task status
pub status: Arc<RwLock<HealTaskStatus>>,
/// Progress tracking
pub progress: Arc<RwLock<HealProgress>>,
/// Created time
pub created_at: SystemTime,
/// Started time
pub started_at: Arc<RwLock<Option<SystemTime>>>,
/// Completed time
pub completed_at: Arc<RwLock<Option<SystemTime>>>,
/// Cancel token
pub cancel_token: tokio_util::sync::CancellationToken,
/// Storage layer interface
pub storage: Arc<dyn HealStorageAPI>,
}
impl HealTask {
pub fn from_request(request: HealRequest, storage: Arc<dyn HealStorageAPI>) -> Self {
Self {
id: request.id,
heal_type: request.heal_type,
options: request.options,
status: Arc::new(RwLock::new(HealTaskStatus::Pending)),
progress: Arc::new(RwLock::new(HealProgress::new())),
created_at: request.created_at,
started_at: Arc::new(RwLock::new(None)),
completed_at: Arc::new(RwLock::new(None)),
cancel_token: tokio_util::sync::CancellationToken::new(),
storage,
}
}
pub async fn execute(&self) -> Result<()> {
// update status to running
{
let mut status = self.status.write().await;
*status = HealTaskStatus::Running;
}
{
let mut started_at = self.started_at.write().await;
*started_at = Some(SystemTime::now());
}
info!("Starting heal task: {} with type: {:?}", self.id, self.heal_type);
let result = match &self.heal_type {
HealType::Object {
bucket,
object,
version_id,
} => self.heal_object(bucket, object, version_id.as_deref()).await,
HealType::Bucket { bucket } => self.heal_bucket(bucket).await,
HealType::Metadata { bucket, object } => self.heal_metadata(bucket, object).await,
HealType::MRF { meta_path } => self.heal_mrf(meta_path).await,
HealType::ECDecode {
bucket,
object,
version_id,
} => self.heal_ec_decode(bucket, object, version_id.as_deref()).await,
HealType::ErasureSet { buckets, set_disk_id } => self.heal_erasure_set(buckets.clone(), set_disk_id.clone()).await,
};
// update completed time and status
{
let mut completed_at = self.completed_at.write().await;
*completed_at = Some(SystemTime::now());
}
match &result {
Ok(_) => {
let mut status = self.status.write().await;
*status = HealTaskStatus::Completed;
info!("Heal task completed successfully: {}", self.id);
}
Err(e) => {
let mut status = self.status.write().await;
*status = HealTaskStatus::Failed { error: e.to_string() };
error!("Heal task failed: {} with error: {}", self.id, e);
}
}
result
}
pub async fn cancel(&self) -> Result<()> {
self.cancel_token.cancel();
let mut status = self.status.write().await;
*status = HealTaskStatus::Cancelled;
info!("Heal task cancelled: {}", self.id);
Ok(())
}
pub async fn get_status(&self) -> HealTaskStatus {
self.status.read().await.clone()
}
pub async fn get_progress(&self) -> HealProgress {
self.progress.read().await.clone()
}
// specific heal implementation method
async fn heal_object(&self, bucket: &str, object: &str, version_id: Option<&str>) -> Result<()> {
info!("Healing object: {}/{}", bucket, object);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("{bucket}/{object}")));
progress.update_progress(0, 4, 0, 0); // 开始heal总共4个步骤
}
// Step 1: Check if object exists and get metadata
info!("Step 1: Checking object existence and metadata");
let object_exists = self.storage.object_exists(bucket, object).await?;
if !object_exists {
warn!("Object does not exist: {}/{}", bucket, object);
if self.options.recreate_missing {
info!("Attempting to recreate missing object: {}/{}", bucket, object);
return self.recreate_missing_object(bucket, object, version_id).await;
} else {
return Err(Error::TaskExecutionFailed {
message: format!("Object not found: {bucket}/{object}"),
});
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: directly call ecstore to perform heal
info!("Step 2: Performing heal using ecstore");
let heal_opts = HealOpts {
recursive: self.options.recursive,
dry_run: self.options.dry_run,
remove: self.options.remove_corrupted,
recreate: self.options.recreate_missing,
scan_mode: self.options.scan_mode,
update_parity: self.options.update_parity,
no_lock: false,
pool: self.options.pool_index,
set: self.options.set_index,
};
match self.storage.heal_object(bucket, object, version_id, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Heal operation failed: {}/{} - {}", bucket, object, e);
// If heal failed and remove_corrupted is enabled, delete the corrupted object
if self.options.remove_corrupted {
warn!("Removing corrupted object: {}/{}", bucket, object);
if !self.options.dry_run {
self.storage.delete_object(bucket, object).await?;
info!("Successfully deleted corrupted object: {}/{}", bucket, object);
} else {
info!("Dry run mode - would delete corrupted object: {}/{}", bucket, object);
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal object {bucket}/{object}: {e}"),
});
}
// Step 3: Verify heal result
info!("Step 3: Verifying heal result");
let object_size = result.object_size as u64;
info!(
"Heal completed successfully: {}/{} ({} bytes, {} drives healed)",
bucket,
object,
object_size,
result.after.drives.len()
);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, object_size, object_size);
}
Ok(())
}
Err(e) => {
error!("Heal operation failed: {}/{} - {}", bucket, object, e);
// If heal failed and remove_corrupted is enabled, delete the corrupted object
if self.options.remove_corrupted {
warn!("Removing corrupted object: {}/{}", bucket, object);
if !self.options.dry_run {
self.storage.delete_object(bucket, object).await?;
info!("Successfully deleted corrupted object: {}/{}", bucket, object);
} else {
info!("Dry run mode - would delete corrupted object: {}/{}", bucket, object);
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal object {bucket}/{object}: {e}"),
})
}
}
}
/// Recreate missing object (for EC decode scenarios)
async fn recreate_missing_object(&self, bucket: &str, object: &str, version_id: Option<&str>) -> Result<()> {
info!("Attempting to recreate missing object: {}/{}", bucket, object);
// Use ecstore's heal_object with recreate option
let heal_opts = HealOpts {
recursive: false,
dry_run: self.options.dry_run,
remove: false,
recreate: true,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.storage.heal_object(bucket, object, version_id, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Failed to recreate missing object: {}/{} - {}", bucket, object, e);
return Err(Error::TaskExecutionFailed {
message: format!("Failed to recreate missing object {bucket}/{object}: {e}"),
});
}
let object_size = result.object_size as u64;
info!("Successfully recreated missing object: {}/{} ({} bytes)", bucket, object, object_size);
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, object_size, object_size);
}
Ok(())
}
Err(e) => {
error!("Failed to recreate missing object: {}/{} - {}", bucket, object, e);
Err(Error::TaskExecutionFailed {
message: format!("Failed to recreate missing object {bucket}/{object}: {e}"),
})
}
}
}
async fn heal_bucket(&self, bucket: &str) -> Result<()> {
info!("Healing bucket: {}", bucket);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("bucket: {bucket}")));
progress.update_progress(0, 3, 0, 0);
}
// Step 1: Check if bucket exists
info!("Step 1: Checking bucket existence");
let bucket_exists = self.storage.get_bucket_info(bucket).await?.is_some();
if !bucket_exists {
warn!("Bucket does not exist: {}", bucket);
return Err(Error::TaskExecutionFailed {
message: format!("Bucket not found: {bucket}"),
});
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: Perform bucket heal using ecstore
info!("Step 2: Performing bucket heal using ecstore");
let heal_opts = HealOpts {
recursive: self.options.recursive,
dry_run: self.options.dry_run,
remove: self.options.remove_corrupted,
recreate: self.options.recreate_missing,
scan_mode: self.options.scan_mode,
update_parity: self.options.update_parity,
no_lock: false,
pool: self.options.pool_index,
set: self.options.set_index,
};
match self.storage.heal_bucket(bucket, &heal_opts).await {
Ok(result) => {
info!("Bucket heal completed successfully: {} ({} drives)", bucket, result.after.drives.len());
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Ok(())
}
Err(e) => {
error!("Bucket heal failed: {} - {}", bucket, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal bucket {bucket}: {e}"),
})
}
}
}
async fn heal_metadata(&self, bucket: &str, object: &str) -> Result<()> {
info!("Healing metadata: {}/{}", bucket, object);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("metadata: {bucket}/{object}")));
progress.update_progress(0, 3, 0, 0);
}
// Step 1: Check if object exists
info!("Step 1: Checking object existence");
let object_exists = self.storage.object_exists(bucket, object).await?;
if !object_exists {
warn!("Object does not exist: {}/{}", bucket, object);
return Err(Error::TaskExecutionFailed {
message: format!("Object not found: {bucket}/{object}"),
});
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: Perform metadata heal using ecstore
info!("Step 2: Performing metadata heal using ecstore");
let heal_opts = HealOpts {
recursive: false,
dry_run: self.options.dry_run,
remove: false,
recreate: false,
scan_mode: HealScanMode::Deep,
update_parity: false,
no_lock: false,
pool: self.options.pool_index,
set: self.options.set_index,
};
match self.storage.heal_object(bucket, object, None, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Metadata heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal metadata {bucket}/{object}: {e}"),
});
}
info!(
"Metadata heal completed successfully: {}/{} ({} drives)",
bucket,
object,
result.after.drives.len()
);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Ok(())
}
Err(e) => {
error!("Metadata heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal metadata {bucket}/{object}: {e}"),
})
}
}
}
async fn heal_mrf(&self, meta_path: &str) -> Result<()> {
info!("Healing MRF: {}", meta_path);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("mrf: {meta_path}")));
progress.update_progress(0, 2, 0, 0);
}
// Parse meta_path to extract bucket and object
let parts: Vec<&str> = meta_path.split('/').collect();
if parts.len() < 2 {
return Err(Error::TaskExecutionFailed {
message: format!("Invalid meta path format: {meta_path}"),
});
}
let bucket = parts[0];
let object = parts[1..].join("/");
// Step 1: Perform MRF heal using ecstore
info!("Step 1: Performing MRF heal using ecstore");
let heal_opts = HealOpts {
recursive: true,
dry_run: self.options.dry_run,
remove: self.options.remove_corrupted,
recreate: self.options.recreate_missing,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.storage.heal_object(bucket, &object, None, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("MRF heal failed: {} - {}", meta_path, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 2, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal MRF {meta_path}: {e}"),
});
}
info!("MRF heal completed successfully: {} ({} drives)", meta_path, result.after.drives.len());
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 2, 0, 0);
}
Ok(())
}
Err(e) => {
error!("MRF heal failed: {} - {}", meta_path, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 2, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal MRF {meta_path}: {e}"),
})
}
}
}
async fn heal_ec_decode(&self, bucket: &str, object: &str, version_id: Option<&str>) -> Result<()> {
info!("Healing EC decode: {}/{}", bucket, object);
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("ec_decode: {bucket}/{object}")));
progress.update_progress(0, 3, 0, 0);
}
// Step 1: Check if object exists
info!("Step 1: Checking object existence");
let object_exists = self.storage.object_exists(bucket, object).await?;
if !object_exists {
warn!("Object does not exist: {}/{}", bucket, object);
return Err(Error::TaskExecutionFailed {
message: format!("Object not found: {bucket}/{object}"),
});
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 3, 0, 0);
}
// Step 2: Perform EC decode heal using ecstore
info!("Step 2: Performing EC decode heal using ecstore");
let heal_opts = HealOpts {
recursive: false,
dry_run: self.options.dry_run,
remove: false,
recreate: true,
scan_mode: HealScanMode::Deep,
update_parity: true,
no_lock: false,
pool: None,
set: None,
};
match self.storage.heal_object(bucket, object, version_id, &heal_opts).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("EC decode heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal EC decode {bucket}/{object}: {e}"),
});
}
let object_size = result.object_size as u64;
info!(
"EC decode heal completed successfully: {}/{} ({} bytes, {} drives)",
bucket,
object,
object_size,
result.after.drives.len()
);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, object_size, object_size);
}
Ok(())
}
Err(e) => {
error!("EC decode heal failed: {}/{} - {}", bucket, object, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 3, 0, 0);
}
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal EC decode {bucket}/{object}: {e}"),
})
}
}
}
async fn heal_erasure_set(&self, buckets: Vec<String>, set_disk_id: String) -> Result<()> {
info!("Healing Erasure Set: {} ({} buckets)", set_disk_id, buckets.len());
// update progress
{
let mut progress = self.progress.write().await;
progress.set_current_object(Some(format!("erasure_set: {} ({} buckets)", set_disk_id, buckets.len())));
progress.update_progress(0, 4, 0, 0);
}
let buckets = if buckets.is_empty() {
info!("No buckets specified, listing all buckets");
let bucket_infos = self.storage.list_buckets().await?;
bucket_infos.into_iter().map(|info| info.name).collect()
} else {
buckets
};
// Step 1: Perform disk format heal using ecstore
info!("Step 1: Performing disk format heal using ecstore");
match self.storage.heal_format(self.options.dry_run).await {
Ok((result, error)) => {
if let Some(e) = error {
error!("Disk format heal failed: {} - {}", set_disk_id, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal disk format for {set_disk_id}: {e}"),
});
}
info!(
"Disk format heal completed successfully: {} ({} drives)",
set_disk_id,
result.after.drives.len()
);
}
Err(e) => {
error!("Disk format heal failed: {} - {}", set_disk_id, e);
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, 0, 0);
}
return Err(Error::TaskExecutionFailed {
message: format!("Failed to heal disk format for {set_disk_id}: {e}"),
});
}
}
{
let mut progress = self.progress.write().await;
progress.update_progress(1, 4, 0, 0);
}
// Step 2: Get disk for resume functionality
info!("Step 2: Getting disk for resume functionality");
let disk = self.storage.get_disk_for_resume(&set_disk_id).await?;
{
let mut progress = self.progress.write().await;
progress.update_progress(2, 4, 0, 0);
}
// Step 3: Heal bucket structure
for bucket in buckets.iter() {
if let Err(err) = self.heal_bucket(bucket).await {
info!("{}", err.to_string());
}
}
// Step 3: Create erasure set healer with resume support
info!("Step 3: Creating erasure set healer with resume support");
let erasure_healer = ErasureSetHealer::new(self.storage.clone(), self.progress.clone(), self.cancel_token.clone(), disk);
{
let mut progress = self.progress.write().await;
progress.update_progress(3, 4, 0, 0);
}
// Step 4: Execute erasure set heal with resume
info!("Step 4: Executing erasure set heal with resume");
let result = erasure_healer.heal_erasure_set(&buckets, &set_disk_id).await;
{
let mut progress = self.progress.write().await;
progress.update_progress(4, 4, 0, 0);
}
match result {
Ok(_) => {
info!("Erasure set heal completed successfully: {} ({} buckets)", set_disk_id, buckets.len());
Ok(())
}
Err(e) => {
error!("Erasure set heal failed: {} - {}", set_disk_id, e);
Err(Error::TaskExecutionFailed {
message: format!("Failed to heal erasure set {set_disk_id}: {e}"),
})
}
}
}
}
impl std::fmt::Debug for HealTask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HealTask")
.field("id", &self.id)
.field("heal_type", &self.heal_type)
.field("options", &self.options)
.field("created_at", &self.created_at)
.finish()
}
}

View File

@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::OnceLock;
use std::sync::{Arc, OnceLock};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
pub mod error;
pub mod heal;
pub mod scanner;
pub use error::{Error, Result};
pub use scanner::{
BucketTargetUsageInfo, BucketUsageInfo, DataUsageInfo, Scanner, ScannerMetrics, load_data_usage_from_backend,
store_data_usage_in_backend,
};
pub use heal::{HealManager, HealOptions, HealPriority, HealRequest, HealType, channel::HealChannelProcessor};
pub use scanner::Scanner;
// Global cancellation token for AHM services (scanner and other background tasks)
static GLOBAL_AHM_SERVICES_CANCEL_TOKEN: OnceLock<CancellationToken> = OnceLock::new();
@@ -52,3 +52,61 @@ pub fn shutdown_ahm_services() {
cancel_token.cancel();
}
}
/// Global heal manager instance
static GLOBAL_HEAL_MANAGER: OnceLock<Arc<HealManager>> = OnceLock::new();
/// Global heal channel processor instance
static GLOBAL_HEAL_CHANNEL_PROCESSOR: OnceLock<Arc<tokio::sync::Mutex<HealChannelProcessor>>> = OnceLock::new();
/// Initialize and start heal manager with channel processor
pub async fn init_heal_manager(
storage: Arc<dyn heal::storage::HealStorageAPI>,
config: Option<heal::manager::HealConfig>,
) -> Result<Arc<HealManager>> {
// Create heal manager
let heal_manager = Arc::new(HealManager::new(storage, config));
// Start heal manager
heal_manager.start().await?;
// Store global instance
GLOBAL_HEAL_MANAGER
.set(heal_manager.clone())
.map_err(|_| Error::Config("Heal manager already initialized".to_string()))?;
// Initialize heal channel
let channel_receiver = rustfs_common::heal_channel::init_heal_channel();
// Create channel processor
let channel_processor = HealChannelProcessor::new(heal_manager.clone());
// Store channel processor instance first
GLOBAL_HEAL_CHANNEL_PROCESSOR
.set(Arc::new(tokio::sync::Mutex::new(channel_processor)))
.map_err(|_| Error::Config("Heal channel processor already initialized".to_string()))?;
// Start channel processor in background
let receiver = channel_receiver;
tokio::spawn(async move {
if let Some(processor_guard) = GLOBAL_HEAL_CHANNEL_PROCESSOR.get() {
let mut processor = processor_guard.lock().await;
if let Err(e) = processor.start(receiver).await {
error!("Heal channel processor failed: {}", e);
}
}
});
info!("Heal manager with channel processor initialized successfully");
Ok(heal_manager)
}
/// Get global heal manager instance
pub fn get_heal_manager() -> Option<&'static Arc<HealManager>> {
GLOBAL_HEAL_MANAGER.get()
}
/// Get global heal channel processor instance
pub fn get_heal_channel_processor() -> Option<&'static Arc<tokio::sync::Mutex<HealChannelProcessor>>> {
GLOBAL_HEAL_CHANNEL_PROCESSOR.get()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,671 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, sync::Arc, time::SystemTime};
use rustfs_ecstore::{bucket::metadata_sys::get_replication_config, config::com::read_config, store::ECStore};
use rustfs_utils::path::SLASH_SEPARATOR;
use serde::{Deserialize, Serialize};
use tracing::{error, info, warn};
use crate::error::{Error, Result};
// Data usage storage constants
pub const DATA_USAGE_ROOT: &str = SLASH_SEPARATOR;
const DATA_USAGE_OBJ_NAME: &str = ".usage.json";
const DATA_USAGE_BLOOM_NAME: &str = ".bloomcycle.bin";
pub const DATA_USAGE_CACHE_NAME: &str = ".usage-cache.bin";
// Data usage storage paths
lazy_static::lazy_static! {
pub static ref DATA_USAGE_BUCKET: String = format!("{}{}{}",
rustfs_ecstore::disk::RUSTFS_META_BUCKET,
SLASH_SEPARATOR,
rustfs_ecstore::disk::BUCKET_META_PREFIX
);
pub static ref DATA_USAGE_OBJ_NAME_PATH: String = format!("{}{}{}",
rustfs_ecstore::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_OBJ_NAME
);
pub static ref DATA_USAGE_BLOOM_NAME_PATH: String = format!("{}{}{}",
rustfs_ecstore::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_BLOOM_NAME
);
}
/// Bucket target usage info provides replication statistics
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct BucketTargetUsageInfo {
pub replication_pending_size: u64,
pub replication_failed_size: u64,
pub replicated_size: u64,
pub replica_size: u64,
pub replication_pending_count: u64,
pub replication_failed_count: u64,
pub replicated_count: u64,
}
/// Bucket usage info provides bucket-level statistics
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct BucketUsageInfo {
pub size: u64,
// Following five fields suffixed with V1 are here for backward compatibility
// Total Size for objects that have not yet been replicated
pub replication_pending_size_v1: u64,
// Total size for objects that have witness one or more failures and will be retried
pub replication_failed_size_v1: u64,
// Total size for objects that have been replicated to destination
pub replicated_size_v1: u64,
// Total number of objects pending replication
pub replication_pending_count_v1: u64,
// Total number of objects that failed replication
pub replication_failed_count_v1: u64,
pub objects_count: u64,
pub object_size_histogram: HashMap<String, u64>,
pub object_versions_histogram: HashMap<String, u64>,
pub versions_count: u64,
pub delete_markers_count: u64,
pub replica_size: u64,
pub replica_count: u64,
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
}
/// DataUsageInfo represents data usage stats of the underlying storage
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct DataUsageInfo {
/// Total capacity
pub total_capacity: u64,
/// Total used capacity
pub total_used_capacity: u64,
/// Total free capacity
pub total_free_capacity: u64,
/// LastUpdate is the timestamp of when the data usage info was last updated
pub last_update: Option<SystemTime>,
/// Objects total count across all buckets
pub objects_total_count: u64,
/// Versions total count across all buckets
pub versions_total_count: u64,
/// Delete markers total count across all buckets
pub delete_markers_total_count: u64,
/// Objects total size across all buckets
pub objects_total_size: u64,
/// Replication info across all buckets
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
/// Total number of buckets in this cluster
pub buckets_count: u64,
/// Buckets usage info provides following information across all buckets
pub buckets_usage: HashMap<String, BucketUsageInfo>,
/// Deprecated kept here for backward compatibility reasons
pub bucket_sizes: HashMap<String, u64>,
}
/// Size summary for a single object or group of objects
#[derive(Debug, Default, Clone)]
pub struct SizeSummary {
/// Total size
pub total_size: usize,
/// Number of versions
pub versions: usize,
/// Number of delete markers
pub delete_markers: usize,
/// Replicated size
pub replicated_size: usize,
/// Replicated count
pub replicated_count: usize,
/// Pending size
pub pending_size: usize,
/// Failed size
pub failed_size: usize,
/// Replica size
pub replica_size: usize,
/// Replica count
pub replica_count: usize,
/// Pending count
pub pending_count: usize,
/// Failed count
pub failed_count: usize,
/// Replication target stats
pub repl_target_stats: HashMap<String, ReplTargetSizeSummary>,
}
/// Replication target size summary
#[derive(Debug, Default, Clone)]
pub struct ReplTargetSizeSummary {
/// Replicated size
pub replicated_size: usize,
/// Replicated count
pub replicated_count: usize,
/// Pending size
pub pending_size: usize,
/// Failed size
pub failed_size: usize,
/// Pending count
pub pending_count: usize,
/// Failed count
pub failed_count: usize,
}
impl DataUsageInfo {
/// Create a new DataUsageInfo
pub fn new() -> Self {
Self::default()
}
/// Add object metadata to data usage statistics
pub fn add_object(&mut self, object_path: &str, meta_object: &rustfs_filemeta::MetaObject) {
// This method is kept for backward compatibility
// For accurate version counting, use add_object_from_file_meta instead
let bucket_name = match self.extract_bucket_from_path(object_path) {
Ok(name) => name,
Err(_) => return,
};
// Update bucket statistics
if let Some(bucket_usage) = self.buckets_usage.get_mut(&bucket_name) {
bucket_usage.size += meta_object.size as u64;
bucket_usage.objects_count += 1;
bucket_usage.versions_count += 1; // Simplified: assume 1 version per object
// Update size histogram
let total_size = meta_object.size as u64;
let size_ranges = [
("0-1KB", 0, 1024),
("1KB-1MB", 1024, 1024 * 1024),
("1MB-10MB", 1024 * 1024, 10 * 1024 * 1024),
("10MB-100MB", 10 * 1024 * 1024, 100 * 1024 * 1024),
("100MB-1GB", 100 * 1024 * 1024, 1024 * 1024 * 1024),
("1GB+", 1024 * 1024 * 1024, u64::MAX),
];
for (range_name, min_size, max_size) in size_ranges {
if total_size >= min_size && total_size < max_size {
*bucket_usage.object_size_histogram.entry(range_name.to_string()).or_insert(0) += 1;
break;
}
}
// Update version histogram (simplified - count as single version)
*bucket_usage
.object_versions_histogram
.entry("SINGLE_VERSION".to_string())
.or_insert(0) += 1;
} else {
// Create new bucket usage
let mut bucket_usage = BucketUsageInfo {
size: meta_object.size as u64,
objects_count: 1,
versions_count: 1,
..Default::default()
};
bucket_usage.object_size_histogram.insert("0-1KB".to_string(), 1);
bucket_usage.object_versions_histogram.insert("SINGLE_VERSION".to_string(), 1);
self.buckets_usage.insert(bucket_name, bucket_usage);
}
// Update global statistics
self.objects_total_size += meta_object.size as u64;
self.objects_total_count += 1;
self.versions_total_count += 1;
}
/// Add object from FileMeta for accurate version counting
pub fn add_object_from_file_meta(&mut self, object_path: &str, file_meta: &rustfs_filemeta::FileMeta) {
let bucket_name = match self.extract_bucket_from_path(object_path) {
Ok(name) => name,
Err(_) => return,
};
// Calculate accurate statistics from all versions
let mut total_size = 0u64;
let mut versions_count = 0u64;
let mut delete_markers_count = 0u64;
let mut latest_object_size = 0u64;
// Process all versions to get accurate counts
for version in &file_meta.versions {
match rustfs_filemeta::FileMetaVersion::try_from(version.clone()) {
Ok(ver) => {
if let Some(obj) = ver.object {
total_size += obj.size as u64;
versions_count += 1;
latest_object_size = obj.size as u64; // Keep track of latest object size
} else if ver.delete_marker.is_some() {
delete_markers_count += 1;
}
}
Err(_) => {
// Skip invalid versions
continue;
}
}
}
// Update bucket statistics
if let Some(bucket_usage) = self.buckets_usage.get_mut(&bucket_name) {
bucket_usage.size += total_size;
bucket_usage.objects_count += 1;
bucket_usage.versions_count += versions_count;
bucket_usage.delete_markers_count += delete_markers_count;
// Update size histogram based on latest object size
let size_ranges = [
("0-1KB", 0, 1024),
("1KB-1MB", 1024, 1024 * 1024),
("1MB-10MB", 1024 * 1024, 10 * 1024 * 1024),
("10MB-100MB", 10 * 1024 * 1024, 100 * 1024 * 1024),
("100MB-1GB", 100 * 1024 * 1024, 1024 * 1024 * 1024),
("1GB+", 1024 * 1024 * 1024, u64::MAX),
];
for (range_name, min_size, max_size) in size_ranges {
if latest_object_size >= min_size && latest_object_size < max_size {
*bucket_usage.object_size_histogram.entry(range_name.to_string()).or_insert(0) += 1;
break;
}
}
// Update version histogram based on actual version count
let version_ranges = [
("1", 1, 1),
("2-5", 2, 5),
("6-10", 6, 10),
("11-50", 11, 50),
("51-100", 51, 100),
("100+", 101, usize::MAX),
];
for (range_name, min_versions, max_versions) in version_ranges {
if versions_count as usize >= min_versions && versions_count as usize <= max_versions {
*bucket_usage
.object_versions_histogram
.entry(range_name.to_string())
.or_insert(0) += 1;
break;
}
}
} else {
// Create new bucket usage
let mut bucket_usage = BucketUsageInfo {
size: total_size,
objects_count: 1,
versions_count,
delete_markers_count,
..Default::default()
};
// Set size histogram
let size_ranges = [
("0-1KB", 0, 1024),
("1KB-1MB", 1024, 1024 * 1024),
("1MB-10MB", 1024 * 1024, 10 * 1024 * 1024),
("10MB-100MB", 10 * 1024 * 1024, 100 * 1024 * 1024),
("100MB-1GB", 100 * 1024 * 1024, 1024 * 1024 * 1024),
("1GB+", 1024 * 1024 * 1024, u64::MAX),
];
for (range_name, min_size, max_size) in size_ranges {
if latest_object_size >= min_size && latest_object_size < max_size {
bucket_usage.object_size_histogram.insert(range_name.to_string(), 1);
break;
}
}
// Set version histogram
let version_ranges = [
("1", 1, 1),
("2-5", 2, 5),
("6-10", 6, 10),
("11-50", 11, 50),
("51-100", 51, 100),
("100+", 101, usize::MAX),
];
for (range_name, min_versions, max_versions) in version_ranges {
if versions_count as usize >= min_versions && versions_count as usize <= max_versions {
bucket_usage.object_versions_histogram.insert(range_name.to_string(), 1);
break;
}
}
self.buckets_usage.insert(bucket_name, bucket_usage);
// Update buckets count when adding new bucket
self.buckets_count = self.buckets_usage.len() as u64;
}
// Update global statistics
self.objects_total_size += total_size;
self.objects_total_count += 1;
self.versions_total_count += versions_count;
self.delete_markers_total_count += delete_markers_count;
}
/// Extract bucket name from object path
fn extract_bucket_from_path(&self, object_path: &str) -> Result<String> {
let parts: Vec<&str> = object_path.split('/').collect();
if parts.is_empty() {
return Err(Error::Scanner("Invalid object path: empty".to_string()));
}
Ok(parts[0].to_string())
}
/// Update capacity information
pub fn update_capacity(&mut self, total: u64, used: u64, free: u64) {
self.total_capacity = total;
self.total_used_capacity = used;
self.total_free_capacity = free;
self.last_update = Some(SystemTime::now());
}
/// Add bucket usage info
pub fn add_bucket_usage(&mut self, bucket: String, usage: BucketUsageInfo) {
self.buckets_usage.insert(bucket.clone(), usage);
self.buckets_count = self.buckets_usage.len() as u64;
self.last_update = Some(SystemTime::now());
}
/// Get bucket usage info
pub fn get_bucket_usage(&self, bucket: &str) -> Option<&BucketUsageInfo> {
self.buckets_usage.get(bucket)
}
/// Calculate total statistics from all buckets
pub fn calculate_totals(&mut self) {
self.objects_total_count = 0;
self.versions_total_count = 0;
self.delete_markers_total_count = 0;
self.objects_total_size = 0;
for usage in self.buckets_usage.values() {
self.objects_total_count += usage.objects_count;
self.versions_total_count += usage.versions_count;
self.delete_markers_total_count += usage.delete_markers_count;
self.objects_total_size += usage.size;
}
}
/// Merge another DataUsageInfo into this one
pub fn merge(&mut self, other: &DataUsageInfo) {
// Merge bucket usage
for (bucket, usage) in &other.buckets_usage {
if let Some(existing) = self.buckets_usage.get_mut(bucket) {
existing.merge(usage);
} else {
self.buckets_usage.insert(bucket.clone(), usage.clone());
}
}
// Recalculate totals
self.calculate_totals();
// Ensure buckets_count stays consistent with buckets_usage
self.buckets_count = self.buckets_usage.len() as u64;
// Update last update time
if let Some(other_update) = other.last_update {
if self.last_update.is_none() || other_update > self.last_update.unwrap() {
self.last_update = Some(other_update);
}
}
}
}
impl BucketUsageInfo {
/// Create a new BucketUsageInfo
pub fn new() -> Self {
Self::default()
}
/// Add size summary to this bucket usage
pub fn add_size_summary(&mut self, summary: &SizeSummary) {
self.size += summary.total_size as u64;
self.versions_count += summary.versions as u64;
self.delete_markers_count += summary.delete_markers as u64;
self.replica_size += summary.replica_size as u64;
self.replica_count += summary.replica_count as u64;
}
/// Merge another BucketUsageInfo into this one
pub fn merge(&mut self, other: &BucketUsageInfo) {
self.size += other.size;
self.objects_count += other.objects_count;
self.versions_count += other.versions_count;
self.delete_markers_count += other.delete_markers_count;
self.replica_size += other.replica_size;
self.replica_count += other.replica_count;
// Merge histograms
for (key, value) in &other.object_size_histogram {
*self.object_size_histogram.entry(key.clone()).or_insert(0) += value;
}
for (key, value) in &other.object_versions_histogram {
*self.object_versions_histogram.entry(key.clone()).or_insert(0) += value;
}
// Merge replication info
for (target, info) in &other.replication_info {
let entry = self.replication_info.entry(target.clone()).or_default();
entry.replicated_size += info.replicated_size;
entry.replica_size += info.replica_size;
entry.replication_pending_size += info.replication_pending_size;
entry.replication_failed_size += info.replication_failed_size;
entry.replication_pending_count += info.replication_pending_count;
entry.replication_failed_count += info.replication_failed_count;
entry.replicated_count += info.replicated_count;
}
// Merge backward compatibility fields
self.replication_pending_size_v1 += other.replication_pending_size_v1;
self.replication_failed_size_v1 += other.replication_failed_size_v1;
self.replicated_size_v1 += other.replicated_size_v1;
self.replication_pending_count_v1 += other.replication_pending_count_v1;
self.replication_failed_count_v1 += other.replication_failed_count_v1;
}
}
impl SizeSummary {
/// Create a new SizeSummary
pub fn new() -> Self {
Self::default()
}
/// Add another SizeSummary to this one
pub fn add(&mut self, other: &SizeSummary) {
self.total_size += other.total_size;
self.versions += other.versions;
self.delete_markers += other.delete_markers;
self.replicated_size += other.replicated_size;
self.replicated_count += other.replicated_count;
self.pending_size += other.pending_size;
self.failed_size += other.failed_size;
self.replica_size += other.replica_size;
self.replica_count += other.replica_count;
self.pending_count += other.pending_count;
self.failed_count += other.failed_count;
// Merge replication target stats
for (target, stats) in &other.repl_target_stats {
let entry = self.repl_target_stats.entry(target.clone()).or_default();
entry.replicated_size += stats.replicated_size;
entry.replicated_count += stats.replicated_count;
entry.pending_size += stats.pending_size;
entry.failed_size += stats.failed_size;
entry.pending_count += stats.pending_count;
entry.failed_count += stats.failed_count;
}
}
}
/// Store data usage info to backend storage
pub async fn store_data_usage_in_backend(data_usage_info: DataUsageInfo, store: Arc<ECStore>) -> Result<()> {
let data =
serde_json::to_vec(&data_usage_info).map_err(|e| Error::Config(format!("Failed to serialize data usage info: {e}")))?;
// Save to backend using the same mechanism as original code
rustfs_ecstore::config::com::save_config(store, &DATA_USAGE_OBJ_NAME_PATH, data)
.await
.map_err(Error::Storage)?;
Ok(())
}
/// Load data usage info from backend storage
pub async fn load_data_usage_from_backend(store: Arc<ECStore>) -> Result<DataUsageInfo> {
let buf = match read_config(store, &DATA_USAGE_OBJ_NAME_PATH).await {
Ok(data) => data,
Err(e) => {
error!("Failed to read data usage info from backend: {}", e);
if e == rustfs_ecstore::error::Error::ConfigNotFound {
return Ok(DataUsageInfo::default());
}
return Err(Error::Storage(e));
}
};
let mut data_usage_info: DataUsageInfo =
serde_json::from_slice(&buf).map_err(|e| Error::Config(format!("Failed to deserialize data usage info: {e}")))?;
warn!("Loaded data usage info from backend {:?}", &data_usage_info);
// Handle backward compatibility like original code
if data_usage_info.buckets_usage.is_empty() {
data_usage_info.buckets_usage = data_usage_info
.bucket_sizes
.iter()
.map(|(bucket, &size)| {
(
bucket.clone(),
BucketUsageInfo {
size,
..Default::default()
},
)
})
.collect();
}
if data_usage_info.bucket_sizes.is_empty() {
data_usage_info.bucket_sizes = data_usage_info
.buckets_usage
.iter()
.map(|(bucket, bui)| (bucket.clone(), bui.size))
.collect();
}
for (bucket, bui) in &data_usage_info.buckets_usage {
if bui.replicated_size_v1 > 0
|| bui.replication_failed_count_v1 > 0
|| bui.replication_failed_size_v1 > 0
|| bui.replication_pending_count_v1 > 0
{
if let Ok((cfg, _)) = get_replication_config(bucket).await {
if !cfg.role.is_empty() {
data_usage_info.replication_info.insert(
cfg.role.clone(),
BucketTargetUsageInfo {
replication_failed_size: bui.replication_failed_size_v1,
replication_failed_count: bui.replication_failed_count_v1,
replicated_size: bui.replicated_size_v1,
replication_pending_count: bui.replication_pending_count_v1,
replication_pending_size: bui.replication_pending_size_v1,
..Default::default()
},
);
}
}
}
}
Ok(data_usage_info)
}
/// Example function showing how to use AHM data usage functionality
/// This demonstrates the integration pattern for DataUsageInfoHandler
pub async fn example_data_usage_integration() -> Result<()> {
// Get the global storage instance
let Some(store) = rustfs_ecstore::new_object_layer_fn() else {
return Err(Error::Config("Storage not initialized".to_string()));
};
// Load data usage from backend (this replaces the original load_data_usage_from_backend)
let data_usage = load_data_usage_from_backend(store).await?;
info!(
"Loaded data usage info: {} buckets, {} total objects",
data_usage.buckets_count, data_usage.objects_total_count
);
// Example: Store updated data usage back to backend
// This would typically be called by the scanner after collecting new statistics
// store_data_usage_in_backend(data_usage, store).await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_data_usage_info_creation() {
let mut info = DataUsageInfo::new();
info.update_capacity(1000, 500, 500);
assert_eq!(info.total_capacity, 1000);
assert_eq!(info.total_used_capacity, 500);
assert_eq!(info.total_free_capacity, 500);
assert!(info.last_update.is_some());
}
#[test]
fn test_bucket_usage_info_merge() {
let mut usage1 = BucketUsageInfo::new();
usage1.size = 100;
usage1.objects_count = 10;
usage1.versions_count = 5;
let mut usage2 = BucketUsageInfo::new();
usage2.size = 200;
usage2.objects_count = 20;
usage2.versions_count = 10;
usage1.merge(&usage2);
assert_eq!(usage1.size, 300);
assert_eq!(usage1.objects_count, 30);
assert_eq!(usage1.versions_count, 15);
}
#[test]
fn test_size_summary_add() {
let mut summary1 = SizeSummary::new();
summary1.total_size = 100;
summary1.versions = 5;
let mut summary2 = SizeSummary::new();
summary2.total_size = 200;
summary2.versions = 10;
summary1.add(&summary2);
assert_eq!(summary1.total_size, 300);
assert_eq!(summary1.versions, 15);
}
}

View File

@@ -12,197 +12,258 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::{
collections::HashMap,
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
/// Size interval for object size histogram
#[derive(Debug, Clone)]
pub struct SizeInterval {
pub start: u64,
pub end: u64,
pub name: &'static str,
use serde::{Deserialize, Serialize};
use tracing::info;
/// Scanner metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScannerMetrics {
/// Total objects scanned since server start
pub objects_scanned: u64,
/// Total object versions scanned since server start
pub versions_scanned: u64,
/// Total directories scanned since server start
pub directories_scanned: u64,
/// Total bucket scans started since server start
pub bucket_scans_started: u64,
/// Total bucket scans finished since server start
pub bucket_scans_finished: u64,
/// Total objects with health issues found
pub objects_with_issues: u64,
/// Total heal tasks queued
pub heal_tasks_queued: u64,
/// Total heal tasks completed
pub heal_tasks_completed: u64,
/// Total heal tasks failed
pub heal_tasks_failed: u64,
/// Total healthy objects found
pub healthy_objects: u64,
/// Total corrupted objects found
pub corrupted_objects: u64,
/// Last scan activity time
pub last_activity: Option<SystemTime>,
/// Current scan cycle
pub current_cycle: u64,
/// Total scan cycles completed
pub total_cycles: u64,
/// Current scan duration
pub current_scan_duration: Option<Duration>,
/// Average scan duration
pub avg_scan_duration: Duration,
/// Objects scanned per second
pub objects_per_second: f64,
/// Buckets scanned per second
pub buckets_per_second: f64,
/// Storage metrics by bucket
pub bucket_metrics: HashMap<String, BucketMetrics>,
/// Disk metrics
pub disk_metrics: HashMap<String, DiskMetrics>,
}
/// Version interval for object versions histogram
#[derive(Debug, Clone)]
pub struct VersionInterval {
pub start: u64,
pub end: u64,
pub name: &'static str,
/// Bucket-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BucketMetrics {
/// Bucket name
pub bucket: String,
/// Total objects in bucket
pub total_objects: u64,
/// Total size of objects in bucket (bytes)
pub total_size: u64,
/// Objects with health issues
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Scan duration
pub scan_duration: Option<Duration>,
/// Heal tasks queued for this bucket
pub heal_tasks_queued: u64,
/// Heal tasks completed for this bucket
pub heal_tasks_completed: u64,
/// Heal tasks failed for this bucket
pub heal_tasks_failed: u64,
}
/// Object size histogram intervals
pub const OBJECTS_HISTOGRAM_INTERVALS: &[SizeInterval] = &[
SizeInterval {
start: 0,
end: 1024 - 1,
name: "LESS_THAN_1_KiB",
},
SizeInterval {
start: 1024,
end: 1024 * 1024 - 1,
name: "1_KiB_TO_1_MiB",
},
SizeInterval {
start: 1024 * 1024,
end: 10 * 1024 * 1024 - 1,
name: "1_MiB_TO_10_MiB",
},
SizeInterval {
start: 10 * 1024 * 1024,
end: 64 * 1024 * 1024 - 1,
name: "10_MiB_TO_64_MiB",
},
SizeInterval {
start: 64 * 1024 * 1024,
end: 128 * 1024 * 1024 - 1,
name: "64_MiB_TO_128_MiB",
},
SizeInterval {
start: 128 * 1024 * 1024,
end: 512 * 1024 * 1024 - 1,
name: "128_MiB_TO_512_MiB",
},
SizeInterval {
start: 512 * 1024 * 1024,
end: u64::MAX,
name: "MORE_THAN_512_MiB",
},
];
/// Object version count histogram intervals
pub const OBJECTS_VERSION_COUNT_INTERVALS: &[VersionInterval] = &[
VersionInterval {
start: 1,
end: 1,
name: "1_VERSION",
},
VersionInterval {
start: 2,
end: 10,
name: "2_TO_10_VERSIONS",
},
VersionInterval {
start: 11,
end: 100,
name: "11_TO_100_VERSIONS",
},
VersionInterval {
start: 101,
end: 1000,
name: "101_TO_1000_VERSIONS",
},
VersionInterval {
start: 1001,
end: u64::MAX,
name: "MORE_THAN_1000_VERSIONS",
},
];
/// Size histogram for object size distribution
#[derive(Debug, Clone, Default)]
pub struct SizeHistogram {
counts: Vec<u64>,
/// Disk-specific metrics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DiskMetrics {
/// Disk path
pub disk_path: String,
/// Total disk space (bytes)
pub total_space: u64,
/// Used disk space (bytes)
pub used_space: u64,
/// Free disk space (bytes)
pub free_space: u64,
/// Objects scanned on this disk
pub objects_scanned: u64,
/// Objects with issues on this disk
pub objects_with_issues: u64,
/// Last scan time
pub last_scan_time: Option<SystemTime>,
/// Whether disk is online
pub is_online: bool,
/// Whether disk is being scanned
pub is_scanning: bool,
}
/// Versions histogram for object version count distribution
#[derive(Debug, Clone, Default)]
pub struct VersionsHistogram {
counts: Vec<u64>,
/// Thread-safe metrics collector
pub struct MetricsCollector {
/// Atomic counters for real-time metrics
objects_scanned: AtomicU64,
versions_scanned: AtomicU64,
directories_scanned: AtomicU64,
bucket_scans_started: AtomicU64,
bucket_scans_finished: AtomicU64,
objects_with_issues: AtomicU64,
heal_tasks_queued: AtomicU64,
heal_tasks_completed: AtomicU64,
heal_tasks_failed: AtomicU64,
current_cycle: AtomicU64,
total_cycles: AtomicU64,
healthy_objects: AtomicU64,
corrupted_objects: AtomicU64,
}
impl SizeHistogram {
/// Create a new size histogram
impl MetricsCollector {
/// Create a new metrics collector
pub fn new() -> Self {
Self {
counts: vec![0; OBJECTS_HISTOGRAM_INTERVALS.len()],
objects_scanned: AtomicU64::new(0),
versions_scanned: AtomicU64::new(0),
directories_scanned: AtomicU64::new(0),
bucket_scans_started: AtomicU64::new(0),
bucket_scans_finished: AtomicU64::new(0),
objects_with_issues: AtomicU64::new(0),
heal_tasks_queued: AtomicU64::new(0),
heal_tasks_completed: AtomicU64::new(0),
heal_tasks_failed: AtomicU64::new(0),
current_cycle: AtomicU64::new(0),
total_cycles: AtomicU64::new(0),
healthy_objects: AtomicU64::new(0),
corrupted_objects: AtomicU64::new(0),
}
}
/// Add a size to the histogram
pub fn add(&mut self, size: u64) {
for (idx, interval) in OBJECTS_HISTOGRAM_INTERVALS.iter().enumerate() {
if size >= interval.start && size <= interval.end {
self.counts[idx] += 1;
break;
}
/// Increment objects scanned count
pub fn increment_objects_scanned(&self, count: u64) {
self.objects_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment versions scanned count
pub fn increment_versions_scanned(&self, count: u64) {
self.versions_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment directories scanned count
pub fn increment_directories_scanned(&self, count: u64) {
self.directories_scanned.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans started count
pub fn increment_bucket_scans_started(&self, count: u64) {
self.bucket_scans_started.fetch_add(count, Ordering::Relaxed);
}
/// Increment bucket scans finished count
pub fn increment_bucket_scans_finished(&self, count: u64) {
self.bucket_scans_finished.fetch_add(count, Ordering::Relaxed);
}
/// Increment objects with issues count
pub fn increment_objects_with_issues(&self, count: u64) {
self.objects_with_issues.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks queued count
pub fn increment_heal_tasks_queued(&self, count: u64) {
self.heal_tasks_queued.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks completed count
pub fn increment_heal_tasks_completed(&self, count: u64) {
self.heal_tasks_completed.fetch_add(count, Ordering::Relaxed);
}
/// Increment heal tasks failed count
pub fn increment_heal_tasks_failed(&self, count: u64) {
self.heal_tasks_failed.fetch_add(count, Ordering::Relaxed);
}
/// Set current cycle
pub fn set_current_cycle(&self, cycle: u64) {
self.current_cycle.store(cycle, Ordering::Relaxed);
}
/// Increment total cycles
pub fn increment_total_cycles(&self) {
self.total_cycles.fetch_add(1, Ordering::Relaxed);
}
/// Increment healthy objects count
pub fn increment_healthy_objects(&self) {
self.healthy_objects.fetch_add(1, Ordering::Relaxed);
}
/// Increment corrupted objects count
pub fn increment_corrupted_objects(&self) {
self.corrupted_objects.fetch_add(1, Ordering::Relaxed);
}
/// Get current metrics snapshot
pub fn get_metrics(&self) -> ScannerMetrics {
ScannerMetrics {
objects_scanned: self.objects_scanned.load(Ordering::Relaxed),
versions_scanned: self.versions_scanned.load(Ordering::Relaxed),
directories_scanned: self.directories_scanned.load(Ordering::Relaxed),
bucket_scans_started: self.bucket_scans_started.load(Ordering::Relaxed),
bucket_scans_finished: self.bucket_scans_finished.load(Ordering::Relaxed),
objects_with_issues: self.objects_with_issues.load(Ordering::Relaxed),
heal_tasks_queued: self.heal_tasks_queued.load(Ordering::Relaxed),
heal_tasks_completed: self.heal_tasks_completed.load(Ordering::Relaxed),
heal_tasks_failed: self.heal_tasks_failed.load(Ordering::Relaxed),
healthy_objects: self.healthy_objects.load(Ordering::Relaxed),
corrupted_objects: self.corrupted_objects.load(Ordering::Relaxed),
last_activity: Some(SystemTime::now()),
current_cycle: self.current_cycle.load(Ordering::Relaxed),
total_cycles: self.total_cycles.load(Ordering::Relaxed),
current_scan_duration: None, // Will be set by scanner
avg_scan_duration: Duration::ZERO, // Will be calculated
objects_per_second: 0.0, // Will be calculated
buckets_per_second: 0.0, // Will be calculated
bucket_metrics: HashMap::new(), // Will be populated by scanner
disk_metrics: HashMap::new(), // Will be populated by scanner
}
}
/// Get the histogram as a map
pub fn to_map(&self) -> HashMap<String, u64> {
let mut result = HashMap::new();
for (idx, count) in self.counts.iter().enumerate() {
let interval = &OBJECTS_HISTOGRAM_INTERVALS[idx];
result.insert(interval.name.to_string(), *count);
}
result
}
/// Reset all metrics
pub fn reset(&self) {
self.objects_scanned.store(0, Ordering::Relaxed);
self.versions_scanned.store(0, Ordering::Relaxed);
self.directories_scanned.store(0, Ordering::Relaxed);
self.bucket_scans_started.store(0, Ordering::Relaxed);
self.bucket_scans_finished.store(0, Ordering::Relaxed);
self.objects_with_issues.store(0, Ordering::Relaxed);
self.heal_tasks_queued.store(0, Ordering::Relaxed);
self.heal_tasks_completed.store(0, Ordering::Relaxed);
self.heal_tasks_failed.store(0, Ordering::Relaxed);
self.current_cycle.store(0, Ordering::Relaxed);
self.total_cycles.store(0, Ordering::Relaxed);
self.healthy_objects.store(0, Ordering::Relaxed);
self.corrupted_objects.store(0, Ordering::Relaxed);
/// Merge another histogram into this one
pub fn merge(&mut self, other: &SizeHistogram) {
for (idx, count) in other.counts.iter().enumerate() {
self.counts[idx] += count;
}
}
/// Get total count
pub fn total_count(&self) -> u64 {
self.counts.iter().sum()
}
/// Reset the histogram
pub fn reset(&mut self) {
for count in &mut self.counts {
*count = 0;
}
info!("Scanner metrics reset");
}
}
impl VersionsHistogram {
/// Create a new versions histogram
pub fn new() -> Self {
Self {
counts: vec![0; OBJECTS_VERSION_COUNT_INTERVALS.len()],
}
}
/// Add a version count to the histogram
pub fn add(&mut self, versions: u64) {
for (idx, interval) in OBJECTS_VERSION_COUNT_INTERVALS.iter().enumerate() {
if versions >= interval.start && versions <= interval.end {
self.counts[idx] += 1;
break;
}
}
}
/// Get the histogram as a map
pub fn to_map(&self) -> HashMap<String, u64> {
let mut result = HashMap::new();
for (idx, count) in self.counts.iter().enumerate() {
let interval = &OBJECTS_VERSION_COUNT_INTERVALS[idx];
result.insert(interval.name.to_string(), *count);
}
result
}
/// Merge another histogram into this one
pub fn merge(&mut self, other: &VersionsHistogram) {
for (idx, count) in other.counts.iter().enumerate() {
self.counts[idx] += count;
}
}
/// Get total count
pub fn total_count(&self) -> u64 {
self.counts.iter().sum()
}
/// Reset the histogram
pub fn reset(&mut self) {
for count in &mut self.counts {
*count = 0;
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
@@ -211,67 +272,35 @@ mod tests {
use super::*;
#[test]
fn test_size_histogram() {
let mut histogram = SizeHistogram::new();
// Add some sizes
histogram.add(512); // LESS_THAN_1_KiB
histogram.add(1024); // 1_KiB_TO_1_MiB
histogram.add(1024 * 1024); // 1_MiB_TO_10_MiB
histogram.add(5 * 1024 * 1024); // 1_MiB_TO_10_MiB
let map = histogram.to_map();
assert_eq!(map.get("LESS_THAN_1_KiB"), Some(&1));
assert_eq!(map.get("1_KiB_TO_1_MiB"), Some(&1));
assert_eq!(map.get("1_MiB_TO_10_MiB"), Some(&2));
assert_eq!(map.get("10_MiB_TO_64_MiB"), Some(&0));
fn test_metrics_collector_creation() {
let collector = MetricsCollector::new();
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
assert_eq!(metrics.versions_scanned, 0);
}
#[test]
fn test_versions_histogram() {
let mut histogram = VersionsHistogram::new();
fn test_metrics_increment() {
let collector = MetricsCollector::new();
// Add some version counts
histogram.add(1); // 1_VERSION
histogram.add(5); // 2_TO_10_VERSIONS
histogram.add(50); // 11_TO_100_VERSIONS
histogram.add(500); // 101_TO_1000_VERSIONS
collector.increment_objects_scanned(10);
collector.increment_versions_scanned(5);
collector.increment_objects_with_issues(2);
let map = histogram.to_map();
assert_eq!(map.get("1_VERSION"), Some(&1));
assert_eq!(map.get("2_TO_10_VERSIONS"), Some(&1));
assert_eq!(map.get("11_TO_100_VERSIONS"), Some(&1));
assert_eq!(map.get("101_TO_1000_VERSIONS"), Some(&1));
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 10);
assert_eq!(metrics.versions_scanned, 5);
assert_eq!(metrics.objects_with_issues, 2);
}
#[test]
fn test_histogram_merge() {
let mut histogram1 = SizeHistogram::new();
histogram1.add(1024);
histogram1.add(1024 * 1024);
fn test_metrics_reset() {
let collector = MetricsCollector::new();
let mut histogram2 = SizeHistogram::new();
histogram2.add(1024);
histogram2.add(5 * 1024 * 1024);
collector.increment_objects_scanned(10);
collector.reset();
histogram1.merge(&histogram2);
let map = histogram1.to_map();
assert_eq!(map.get("1_KiB_TO_1_MiB"), Some(&2)); // 1 from histogram1 + 1 from histogram2
assert_eq!(map.get("1_MiB_TO_10_MiB"), Some(&2)); // 1 from histogram1 + 1 from histogram2
}
#[test]
fn test_histogram_reset() {
let mut histogram = SizeHistogram::new();
histogram.add(1024);
histogram.add(1024 * 1024);
assert_eq!(histogram.total_count(), 2);
histogram.reset();
assert_eq!(histogram.total_count(), 0);
let metrics = collector.get_metrics();
assert_eq!(metrics.objects_scanned, 0);
}
}

View File

@@ -42,6 +42,10 @@ pub struct ScannerMetrics {
pub heal_tasks_completed: u64,
/// Total heal tasks failed
pub heal_tasks_failed: u64,
/// Total healthy objects found
pub healthy_objects: u64,
/// Total corrupted objects found
pub corrupted_objects: u64,
/// Last scan activity time
pub last_activity: Option<SystemTime>,
/// Current scan cycle
@@ -122,6 +126,8 @@ pub struct MetricsCollector {
heal_tasks_failed: AtomicU64,
current_cycle: AtomicU64,
total_cycles: AtomicU64,
healthy_objects: AtomicU64,
corrupted_objects: AtomicU64,
}
impl MetricsCollector {
@@ -139,6 +145,8 @@ impl MetricsCollector {
heal_tasks_failed: AtomicU64::new(0),
current_cycle: AtomicU64::new(0),
total_cycles: AtomicU64::new(0),
healthy_objects: AtomicU64::new(0),
corrupted_objects: AtomicU64::new(0),
}
}
@@ -197,6 +205,16 @@ impl MetricsCollector {
self.total_cycles.fetch_add(1, Ordering::Relaxed);
}
/// Increment healthy objects count
pub fn increment_healthy_objects(&self) {
self.healthy_objects.fetch_add(1, Ordering::Relaxed);
}
/// Increment corrupted objects count
pub fn increment_corrupted_objects(&self) {
self.corrupted_objects.fetch_add(1, Ordering::Relaxed);
}
/// Get current metrics snapshot
pub fn get_metrics(&self) -> ScannerMetrics {
ScannerMetrics {
@@ -209,6 +227,8 @@ impl MetricsCollector {
heal_tasks_queued: self.heal_tasks_queued.load(Ordering::Relaxed),
heal_tasks_completed: self.heal_tasks_completed.load(Ordering::Relaxed),
heal_tasks_failed: self.heal_tasks_failed.load(Ordering::Relaxed),
healthy_objects: self.healthy_objects.load(Ordering::Relaxed),
corrupted_objects: self.corrupted_objects.load(Ordering::Relaxed),
last_activity: Some(SystemTime::now()),
current_cycle: self.current_cycle.load(Ordering::Relaxed),
total_cycles: self.total_cycles.load(Ordering::Relaxed),
@@ -234,6 +254,8 @@ impl MetricsCollector {
self.heal_tasks_failed.store(0, Ordering::Relaxed);
self.current_cycle.store(0, Ordering::Relaxed);
self.total_cycles.store(0, Ordering::Relaxed);
self.healthy_objects.store(0, Ordering::Relaxed);
self.corrupted_objects.store(0, Ordering::Relaxed);
info!("Scanner metrics reset");
}

View File

@@ -13,13 +13,8 @@
// limitations under the License.
pub mod data_scanner;
pub mod data_usage;
pub mod histogram;
pub mod metrics;
// Re-export main types for convenience
pub use data_scanner::Scanner;
pub use data_usage::{
BucketTargetUsageInfo, BucketUsageInfo, DataUsageInfo, load_data_usage_from_backend, store_data_usage_in_backend,
};
pub use metrics::ScannerMetrics;

View File

@@ -0,0 +1,410 @@
use rustfs_ahm::heal::{
manager::{HealConfig, HealManager},
storage::{ECStoreHealStorage, HealStorageAPI},
task::{HealOptions, HealPriority, HealRequest, HealTaskStatus, HealType},
};
use rustfs_common::heal_channel::{HealOpts, HealScanMode};
use rustfs_ecstore::{
disk::endpoint::Endpoint,
endpoints::{EndpointServerPools, Endpoints, PoolEndpoints},
store::ECStore,
store_api::{ObjectIO, ObjectOptions, PutObjReader, StorageAPI},
};
use serial_test::serial;
use std::sync::Once;
use std::sync::OnceLock;
use std::{path::PathBuf, sync::Arc, time::Duration};
use tokio::fs;
use tracing::info;
use walkdir::WalkDir;
static GLOBAL_ENV: OnceLock<(Vec<PathBuf>, Arc<ECStore>, Arc<ECStoreHealStorage>)> = OnceLock::new();
static INIT: Once = Once::new();
fn init_tracing() {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt::try_init();
});
}
/// Test helper: Create test environment with ECStore
async fn setup_test_env() -> (Vec<PathBuf>, Arc<ECStore>, Arc<ECStoreHealStorage>) {
init_tracing();
// Fast path: already initialized, just clone and return
if let Some((paths, ecstore, heal_storage)) = GLOBAL_ENV.get() {
return (paths.clone(), ecstore.clone(), heal_storage.clone());
}
// create temp dir as 4 disks with unique base dir
let test_base_dir = format!("/tmp/rustfs_ahm_heal_test_{}", uuid::Uuid::new_v4());
let temp_dir = std::path::PathBuf::from(&test_base_dir);
if temp_dir.exists() {
fs::remove_dir_all(&temp_dir).await.ok();
}
fs::create_dir_all(&temp_dir).await.unwrap();
// create 4 disk dirs
let disk_paths = vec![
temp_dir.join("disk1"),
temp_dir.join("disk2"),
temp_dir.join("disk3"),
temp_dir.join("disk4"),
];
for disk_path in &disk_paths {
fs::create_dir_all(disk_path).await.unwrap();
}
// create EndpointServerPools
let mut endpoints = Vec::new();
for (i, disk_path) in disk_paths.iter().enumerate() {
let mut endpoint = Endpoint::try_from(disk_path.to_str().unwrap()).unwrap();
// set correct index
endpoint.set_pool_index(0);
endpoint.set_set_index(0);
endpoint.set_disk_index(i);
endpoints.push(endpoint);
}
let pool_endpoints = PoolEndpoints {
legacy: false,
set_count: 1,
drives_per_set: 4,
endpoints: Endpoints::from(endpoints),
cmd_line: "test".to_string(),
platform: format!("OS: {} | Arch: {}", std::env::consts::OS, std::env::consts::ARCH),
};
let endpoint_pools = EndpointServerPools(vec![pool_endpoints]);
// format disks (only first time)
rustfs_ecstore::store::init_local_disks(endpoint_pools.clone()).await.unwrap();
// create ECStore with dynamic port 0 (let OS assign) or fixed 9001 if free
let port = 9001; // for simplicity
let server_addr: std::net::SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
let ecstore = ECStore::new(server_addr, endpoint_pools).await.unwrap();
// init bucket metadata system
let buckets_list = ecstore
.list_bucket(&rustfs_ecstore::store_api::BucketOptions {
no_metadata: true,
..Default::default()
})
.await
.unwrap();
let buckets = buckets_list.into_iter().map(|v| v.name).collect();
rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys(ecstore.clone(), buckets).await;
// Create heal storage layer
let heal_storage = Arc::new(ECStoreHealStorage::new(ecstore.clone()));
// Store in global once lock
let _ = GLOBAL_ENV.set((disk_paths.clone(), ecstore.clone(), heal_storage.clone()));
(disk_paths, ecstore, heal_storage)
}
/// Test helper: Create a test bucket
async fn create_test_bucket(ecstore: &Arc<ECStore>, bucket_name: &str) {
(**ecstore)
.make_bucket(bucket_name, &Default::default())
.await
.expect("Failed to create test bucket");
info!("Created test bucket: {}", bucket_name);
}
/// Test helper: Upload test object
async fn upload_test_object(ecstore: &Arc<ECStore>, bucket: &str, object: &str, data: &[u8]) {
let mut reader = PutObjReader::from_vec(data.to_vec());
let object_info = (**ecstore)
.put_object(bucket, object, &mut reader, &ObjectOptions::default())
.await
.expect("Failed to upload test object");
info!("Uploaded test object: {}/{} ({} bytes)", bucket, object, object_info.size);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_object_basic() {
let (disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Create test bucket and object
let bucket_name = "test-bucket";
let object_name = "test-object.txt";
let test_data = b"Hello, this is test data for healing!";
create_test_bucket(&ecstore, bucket_name).await;
upload_test_object(&ecstore, bucket_name, object_name, test_data).await;
// ─── 1⃣ delete single data shard file ─────────────────────────────────────
let obj_dir = disk_paths[0].join(bucket_name).join(object_name);
// find part file at depth 2, e.g. .../<uuid>/part.1
let target_part = WalkDir::new(&obj_dir)
.min_depth(2)
.max_depth(2)
.into_iter()
.filter_map(Result::ok)
.find(|e| e.file_type().is_file() && e.file_name().to_str().map(|n| n.starts_with("part.")).unwrap_or(false))
.map(|e| e.into_path())
.expect("Failed to locate part file to delete");
std::fs::remove_file(&target_part).expect("failed to delete part file");
assert!(!target_part.exists());
println!("✅ Deleted shard part file: {target_part:?}");
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_millis(1),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Submit heal request for the object
let heal_request = HealRequest::new(
HealType::Object {
bucket: bucket_name.to_string(),
object: object_name.to_string(),
version_id: None,
},
HealOptions {
dry_run: false,
recursive: false,
remove_corrupted: false,
recreate_missing: true,
scan_mode: HealScanMode::Normal,
update_parity: true,
timeout: Some(Duration::from_secs(300)),
pool_index: None,
set_index: None,
},
HealPriority::Normal,
);
let task_id = heal_manager
.submit_heal_request(heal_request)
.await
.expect("Failed to submit heal request");
info!("Submitted heal request with task ID: {}", task_id);
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(8)).await;
// Attempt to fetch task status (might be removed if finished)
match heal_manager.get_task_status(&task_id).await {
Ok(status) => info!("Task status: {:?}", status),
Err(e) => info!("Task status not found (likely completed): {}", e),
}
// ─── 2⃣ verify each part file is restored ───────
assert!(target_part.exists());
info!("Heal object basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_bucket_basic() {
let (disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Create test bucket
let bucket_name = "test-bucket-heal";
create_test_bucket(&ecstore, bucket_name).await;
// ─── 1⃣ delete bucket dir on disk ──────────────
let broken_bucket_path = disk_paths[0].join(bucket_name);
assert!(broken_bucket_path.exists(), "bucket dir does not exist on disk");
std::fs::remove_dir_all(&broken_bucket_path).expect("failed to delete bucket dir on disk");
assert!(!broken_bucket_path.exists(), "bucket dir still exists after deletion");
println!("✅ Deleted bucket directory on disk: {broken_bucket_path:?}");
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_millis(1),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Submit heal request for the bucket
let heal_request = HealRequest::new(
HealType::Bucket {
bucket: bucket_name.to_string(),
},
HealOptions {
dry_run: false,
recursive: true,
remove_corrupted: false,
recreate_missing: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
timeout: Some(Duration::from_secs(300)),
pool_index: None,
set_index: None,
},
HealPriority::Normal,
);
let task_id = heal_manager
.submit_heal_request(heal_request)
.await
.expect("Failed to submit bucket heal request");
info!("Submitted bucket heal request with task ID: {}", task_id);
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// Attempt to fetch task status (optional)
if let Ok(status) = heal_manager.get_task_status(&task_id).await {
if status == HealTaskStatus::Completed {
info!("Bucket heal task status: {:?}", status);
} else {
panic!("Bucket heal task status: {status:?}");
}
}
// ─── 3⃣ Verify bucket directory is restored on every disk ───────
assert!(broken_bucket_path.exists(), "bucket dir does not exist on disk");
info!("Heal bucket basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_format_basic() {
let (disk_paths, _ecstore, heal_storage) = setup_test_env().await;
// ─── 1⃣ delete format.json on one disk ──────────────
let format_path = disk_paths[0].join(".rustfs.sys").join("format.json");
assert!(format_path.exists(), "format.json does not exist on disk");
std::fs::remove_file(&format_path).expect("failed to delete format.json on disk");
assert!(!format_path.exists(), "format.json still exists after deletion");
println!("✅ Deleted format.json on disk: {format_path:?}");
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_secs(2),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// ─── 2⃣ verify format.json is restored ───────
assert!(format_path.exists(), "format.json does not exist on disk after heal");
info!("Heal format basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_format_with_data() {
let (disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Create test bucket and object
let bucket_name = "test-bucket";
let object_name = "test-object.txt";
let test_data = b"Hello, this is test data for healing!";
create_test_bucket(&ecstore, bucket_name).await;
upload_test_object(&ecstore, bucket_name, object_name, test_data).await;
let obj_dir = disk_paths[0].join(bucket_name).join(object_name);
let target_part = WalkDir::new(&obj_dir)
.min_depth(2)
.max_depth(2)
.into_iter()
.filter_map(Result::ok)
.find(|e| e.file_type().is_file() && e.file_name().to_str().map(|n| n.starts_with("part.")).unwrap_or(false))
.map(|e| e.into_path())
.expect("Failed to locate part file to delete");
// ─── 1⃣ delete format.json on one disk ──────────────
let format_path = disk_paths[0].join(".rustfs.sys").join("format.json");
std::fs::remove_dir_all(&disk_paths[0]).expect("failed to delete all contents under disk_paths[0]");
std::fs::create_dir_all(&disk_paths[0]).expect("failed to recreate disk_paths[0] directory");
println!("✅ Deleted format.json on disk: {:?}", disk_paths[0]);
// Create heal manager with faster interval
let cfg = HealConfig {
heal_interval: Duration::from_secs(2),
..Default::default()
};
let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg));
heal_manager.start().await.unwrap();
// Wait for task completion
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// ─── 2⃣ verify format.json is restored ───────
assert!(format_path.exists(), "format.json does not exist on disk after heal");
// ─── 3 verify each part file is restored ───────
assert!(target_part.exists());
info!("Heal format basic test passed");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
async fn test_heal_storage_api_direct() {
let (_disk_paths, ecstore, heal_storage) = setup_test_env().await;
// Test direct heal storage API calls
// Test heal_format
let format_result = heal_storage.heal_format(true).await; // dry run
assert!(format_result.is_ok());
info!("Direct heal_format test passed");
// Test heal_bucket
let bucket_name = "test-bucket-direct";
create_test_bucket(&ecstore, bucket_name).await;
let heal_opts = HealOpts {
recursive: true,
dry_run: true,
remove: false,
recreate: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
no_lock: false,
pool: None,
set: None,
};
let bucket_result = heal_storage.heal_bucket(bucket_name, &heal_opts).await;
assert!(bucket_result.is_ok());
info!("Direct heal_bucket test passed");
// Test heal_object
let object_name = "test-object-direct.txt";
let test_data = b"Test data for direct heal API";
upload_test_object(&ecstore, bucket_name, object_name, test_data).await;
let object_heal_opts = HealOpts {
recursive: false,
dry_run: true,
remove: false,
recreate: false,
scan_mode: HealScanMode::Normal,
update_parity: false,
no_lock: false,
pool: None,
set: None,
};
let object_result = heal_storage
.heal_object(bucket_name, object_name, None, &object_heal_opts)
.await;
assert!(object_result.is_ok());
info!("Direct heal_object test passed");
info!("Direct heal storage API test passed");
}

View File

@@ -28,5 +28,15 @@ categories = ["web-programming", "development-tools", "data-structures"]
workspace = true
[dependencies]
tokio.workspace = true
lazy_static = { workspace = true}
tokio = { workspace = true }
tonic = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
rustfs-madmin = { workspace = true }
rustfs-filemeta = { workspace = true }
serde = { workspace = true }
path-clean = { workspace = true }
rmp-serde = { workspace = true }
async-trait = { workspace = true }
s3s = { workspace = true }

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,427 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use s3s::dto::{BucketLifecycleConfiguration, ExpirationStatus, LifecycleRule, ReplicationConfiguration, ReplicationRuleStatus};
use serde::{Deserialize, Serialize};
use std::{
fmt::{self, Display},
sync::OnceLock,
};
use tokio::sync::mpsc;
use uuid::Uuid;
pub const HEAL_DELETE_DANGLING: bool = true;
pub const RUSTFS_RESERVED_BUCKET: &str = "rustfs";
pub const RUSTFS_RESERVED_BUCKET_PATH: &str = "/rustfs";
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub enum HealItemType {
Metadata,
Bucket,
BucketMetadata,
Object,
}
impl HealItemType {
pub fn to_str(&self) -> &str {
match self {
HealItemType::Metadata => "metadata",
HealItemType::Bucket => "bucket",
HealItemType::BucketMetadata => "bucket-metadata",
HealItemType::Object => "object",
}
}
}
impl Display for HealItemType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_str())
}
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub enum DriveState {
Ok,
Offline,
Corrupt,
Missing,
PermissionDenied,
Faulty,
RootMount,
Unknown,
Unformatted, // only returned by disk
}
impl DriveState {
pub fn to_str(&self) -> &str {
match self {
DriveState::Ok => "ok",
DriveState::Offline => "offline",
DriveState::Corrupt => "corrupt",
DriveState::Missing => "missing",
DriveState::PermissionDenied => "permission-denied",
DriveState::Faulty => "faulty",
DriveState::RootMount => "root-mount",
DriveState::Unknown => "unknown",
DriveState::Unformatted => "unformatted",
}
}
}
impl Display for DriveState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_str())
}
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum HealScanMode {
Unknown,
Normal,
Deep,
}
impl Default for HealScanMode {
fn default() -> Self {
Self::Normal
}
}
#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
pub struct HealOpts {
pub recursive: bool,
#[serde(rename = "dryRun")]
pub dry_run: bool,
pub remove: bool,
pub recreate: bool,
#[serde(rename = "scanMode")]
pub scan_mode: HealScanMode,
#[serde(rename = "updateParity")]
pub update_parity: bool,
#[serde(rename = "nolock")]
pub no_lock: bool,
pub pool: Option<usize>,
pub set: Option<usize>,
}
/// Heal channel command type
#[derive(Debug, Clone)]
pub enum HealChannelCommand {
/// Start a new heal task
Start(HealChannelRequest),
/// Query heal task status
Query { heal_path: String, client_token: String },
/// Cancel heal task
Cancel { heal_path: String },
}
/// Heal request from admin to ahm
#[derive(Debug, Clone, Default)]
pub struct HealChannelRequest {
/// Unique request ID
pub id: String,
/// Disk ID for heal disk/erasure set task
pub disk: Option<String>,
/// Bucket name
pub bucket: String,
/// Object prefix (optional)
pub object_prefix: Option<String>,
/// Force start heal
pub force_start: bool,
/// Priority
pub priority: HealChannelPriority,
/// Pool index (optional)
pub pool_index: Option<usize>,
/// Set index (optional)
pub set_index: Option<usize>,
/// Scan mode (optional)
pub scan_mode: Option<HealScanMode>,
/// Whether to remove corrupted data
pub remove_corrupted: Option<bool>,
/// Whether to recreate missing data
pub recreate_missing: Option<bool>,
/// Whether to update parity
pub update_parity: Option<bool>,
/// Whether to recursively process
pub recursive: Option<bool>,
/// Whether to dry run
pub dry_run: Option<bool>,
/// Timeout in seconds (optional)
pub timeout_seconds: Option<u64>,
}
/// Heal response from ahm to admin
#[derive(Debug, Clone)]
pub struct HealChannelResponse {
/// Request ID
pub request_id: String,
/// Success status
pub success: bool,
/// Response data (if successful)
pub data: Option<Vec<u8>>,
/// Error message (if failed)
pub error: Option<String>,
}
/// Heal priority
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HealChannelPriority {
/// Low priority
Low,
/// Normal priority
Normal,
/// High priority
High,
/// Critical priority
Critical,
}
impl Default for HealChannelPriority {
fn default() -> Self {
Self::Normal
}
}
/// Heal channel sender
pub type HealChannelSender = mpsc::UnboundedSender<HealChannelCommand>;
/// Heal channel receiver
pub type HealChannelReceiver = mpsc::UnboundedReceiver<HealChannelCommand>;
/// Global heal channel sender
static GLOBAL_HEAL_CHANNEL_SENDER: OnceLock<HealChannelSender> = OnceLock::new();
/// Initialize global heal channel
pub fn init_heal_channel() -> HealChannelReceiver {
let (tx, rx) = mpsc::unbounded_channel();
GLOBAL_HEAL_CHANNEL_SENDER
.set(tx)
.expect("Heal channel sender already initialized");
rx
}
/// Get global heal channel sender
pub fn get_heal_channel_sender() -> Option<&'static HealChannelSender> {
GLOBAL_HEAL_CHANNEL_SENDER.get()
}
/// Send heal command through global channel
pub async fn send_heal_command(command: HealChannelCommand) -> Result<(), String> {
if let Some(sender) = get_heal_channel_sender() {
sender
.send(command)
.map_err(|e| format!("Failed to send heal command: {e}"))?;
Ok(())
} else {
Err("Heal channel not initialized".to_string())
}
}
/// Send heal start request
pub async fn send_heal_request(request: HealChannelRequest) -> Result<(), String> {
send_heal_command(HealChannelCommand::Start(request)).await
}
/// Send heal query request
pub async fn query_heal_status(heal_path: String, client_token: String) -> Result<(), String> {
send_heal_command(HealChannelCommand::Query { heal_path, client_token }).await
}
/// Send heal cancel request
pub async fn cancel_heal_task(heal_path: String) -> Result<(), String> {
send_heal_command(HealChannelCommand::Cancel { heal_path }).await
}
/// Create a new heal request
pub fn create_heal_request(
bucket: String,
object_prefix: Option<String>,
force_start: bool,
priority: Option<HealChannelPriority>,
) -> HealChannelRequest {
HealChannelRequest {
id: Uuid::new_v4().to_string(),
bucket,
object_prefix,
force_start,
priority: priority.unwrap_or_default(),
pool_index: None,
set_index: None,
scan_mode: None,
remove_corrupted: None,
recreate_missing: None,
update_parity: None,
recursive: None,
dry_run: None,
timeout_seconds: None,
disk: None,
}
}
/// Create a new heal request with advanced options
pub fn create_heal_request_with_options(
bucket: String,
object_prefix: Option<String>,
force_start: bool,
priority: Option<HealChannelPriority>,
pool_index: Option<usize>,
set_index: Option<usize>,
) -> HealChannelRequest {
HealChannelRequest {
id: Uuid::new_v4().to_string(),
bucket,
object_prefix,
force_start,
priority: priority.unwrap_or_default(),
pool_index,
set_index,
..Default::default()
}
}
/// Create a heal response
pub fn create_heal_response(
request_id: String,
success: bool,
data: Option<Vec<u8>>,
error: Option<String>,
) -> HealChannelResponse {
HealChannelResponse {
request_id,
success,
data,
error,
}
}
fn lc_get_prefix(rule: &LifecycleRule) -> String {
if let Some(p) = &rule.prefix {
return p.to_string();
} else if let Some(filter) = &rule.filter {
if let Some(p) = &filter.prefix {
return p.to_string();
} else if let Some(and) = &filter.and {
if let Some(p) = &and.prefix {
return p.to_string();
}
}
}
"".into()
}
pub fn lc_has_active_rules(config: &BucketLifecycleConfiguration, prefix: &str) -> bool {
if config.rules.is_empty() {
return false;
}
for rule in config.rules.iter() {
if rule.status == ExpirationStatus::from_static(ExpirationStatus::DISABLED) {
continue;
}
let rule_prefix = lc_get_prefix(rule);
if !prefix.is_empty() && !rule_prefix.is_empty() && !prefix.starts_with(&rule_prefix) && !rule_prefix.starts_with(prefix)
{
continue;
}
if let Some(e) = &rule.noncurrent_version_expiration {
if let Some(true) = e.noncurrent_days.map(|d| d > 0) {
return true;
}
if let Some(true) = e.newer_noncurrent_versions.map(|d| d > 0) {
return true;
}
}
if rule.noncurrent_version_transitions.is_some() {
return true;
}
if let Some(true) = rule.expiration.as_ref().map(|e| e.date.is_some()) {
return true;
}
if let Some(true) = rule.expiration.as_ref().map(|e| e.days.is_some()) {
return true;
}
if let Some(Some(true)) = rule.expiration.as_ref().map(|e| e.expired_object_delete_marker) {
return true;
}
if let Some(true) = rule.transitions.as_ref().map(|t| !t.is_empty()) {
return true;
}
if rule.transitions.is_some() {
return true;
}
}
false
}
pub fn rep_has_active_rules(config: &ReplicationConfiguration, prefix: &str, recursive: bool) -> bool {
if config.rules.is_empty() {
return false;
}
for rule in config.rules.iter() {
if rule
.status
.eq(&ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED))
{
continue;
}
if !prefix.is_empty() {
if let Some(filter) = &rule.filter {
if let Some(r_prefix) = &filter.prefix {
if !r_prefix.is_empty() {
// incoming prefix must be in rule prefix
if !recursive && !prefix.starts_with(r_prefix) {
continue;
}
// If recursive, we can skip this rule if it doesn't match the tested prefix or level below prefix
// does not match
if recursive && !r_prefix.starts_with(prefix) && !prefix.starts_with(r_prefix) {
continue;
}
}
}
}
}
return true;
}
false
}
pub async fn send_heal_disk(set_disk_id: String, priority: Option<HealChannelPriority>) -> Result<(), String> {
let req = HealChannelRequest {
id: Uuid::new_v4().to_string(),
bucket: "".to_string(),
object_prefix: None,
disk: Some(set_disk_id),
force_start: false,
priority: priority.unwrap_or_default(),
pool_index: None,
set_index: None,
scan_mode: None,
remove_corrupted: None,
recreate_missing: None,
update_parity: None,
recursive: None,
dry_run: None,
timeout_seconds: None,
};
send_heal_request(req).await
}

View File

@@ -14,8 +14,11 @@
pub mod bucket_stats;
// pub mod error;
pub mod data_usage;
pub mod globals;
pub mod heal_channel;
pub mod last_minute;
pub mod metrics;
// is ','
pub static DEFAULT_DELIMITER: u8 = 44;

View File

@@ -12,14 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use super::data_scanner::CurrentScannerCycle;
use crate::bucket::lifecycle::lifecycle;
use chrono::Utc;
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use rustfs_common::last_minute::{AccElem, LastMinuteLatency};
use rustfs_madmin::metrics::ScannerMetrics as M_ScannerMetrics;
use std::{
collections::HashMap,
fmt::Display,
pin::Pin,
sync::{
Arc,
@@ -29,12 +27,58 @@ use std::{
};
use tokio::sync::{Mutex, RwLock};
use crate::last_minute::{AccElem, LastMinuteLatency};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IlmAction {
NoneAction = 0,
DeleteAction,
DeleteVersionAction,
TransitionAction,
TransitionVersionAction,
DeleteRestoredAction,
DeleteRestoredVersionAction,
DeleteAllVersionsAction,
DelMarkerDeleteAllVersionsAction,
ActionCount,
}
impl IlmAction {
pub fn delete_restored(&self) -> bool {
*self == Self::DeleteRestoredAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_versioned(&self) -> bool {
*self == Self::DeleteVersionAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_all(&self) -> bool {
*self == Self::DeleteAllVersionsAction || *self == Self::DelMarkerDeleteAllVersionsAction
}
pub fn delete(&self) -> bool {
if self.delete_restored() {
return true;
}
*self == Self::DeleteVersionAction
|| *self == Self::DeleteAction
|| *self == Self::DeleteAllVersionsAction
|| *self == Self::DelMarkerDeleteAllVersionsAction
}
}
impl Display for IlmAction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}
lazy_static! {
pub static ref globalScannerMetrics: Arc<ScannerMetrics> = Arc::new(ScannerMetrics::new());
pub static ref globalMetrics: Arc<Metrics> = Arc::new(Metrics::new());
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub enum ScannerMetric {
pub enum Metric {
// START Realtime metrics, that only records
// last minute latencies and total operation count.
ReadMetadata = 0,
@@ -69,7 +113,7 @@ pub enum ScannerMetric {
Last,
}
impl ScannerMetric {
impl Metric {
/// Convert to string representation for metrics
pub fn as_str(self) -> &'static str {
match self {
@@ -203,7 +247,7 @@ impl CurrentPathTracker {
}
/// Main scanner metrics structure
pub struct ScannerMetrics {
pub struct Metrics {
// All fields must be accessed atomically and aligned.
operations: Vec<AtomicU64>,
latency: Vec<LockedLastMinuteLatency>,
@@ -213,94 +257,102 @@ pub struct ScannerMetrics {
current_paths: Arc<RwLock<HashMap<String, Arc<CurrentPathTracker>>>>,
// Cycle information
cycle_info: Arc<RwLock<Option<CurrentScannerCycle>>>,
cycle_info: Arc<RwLock<Option<CurrentCycle>>>,
}
impl ScannerMetrics {
pub fn new() -> Self {
let operations = (0..ScannerMetric::Last as usize).map(|_| AtomicU64::new(0)).collect();
// This is a placeholder. We'll need to define this struct.
#[derive(Clone, Debug)]
pub struct CurrentCycle {
pub current: u64,
pub cycle_completed: Vec<DateTime<Utc>>,
pub started: DateTime<Utc>,
}
let latency = (0..ScannerMetric::LastRealtime as usize)
impl Metrics {
pub fn new() -> Self {
let operations = (0..Metric::Last as usize).map(|_| AtomicU64::new(0)).collect();
let latency = (0..Metric::LastRealtime as usize)
.map(|_| LockedLastMinuteLatency::new())
.collect();
Self {
operations,
latency,
actions: (0..ScannerMetric::Last as usize).map(|_| AtomicU64::new(0)).collect(),
actions_latency: vec![LockedLastMinuteLatency::default(); ScannerMetric::LastRealtime as usize],
actions: (0..IlmAction::ActionCount as usize).map(|_| AtomicU64::new(0)).collect(),
actions_latency: vec![LockedLastMinuteLatency::default(); IlmAction::ActionCount as usize],
current_paths: Arc::new(RwLock::new(HashMap::new())),
cycle_info: Arc::new(RwLock::new(None)),
}
}
/// Log scanner action with custom metadata - compatible with existing usage
pub fn log(metric: ScannerMetric) -> impl Fn(&HashMap<String, String>) {
pub fn log(metric: Metric) -> impl Fn(&HashMap<String, String>) {
let metric = metric as usize;
let start_time = SystemTime::now();
move |_custom: &HashMap<String, String>| {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics (spawn async task for this)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add(duration).await;
globalMetrics.latency[metric_index].add(duration).await;
});
}
// Log trace metrics
if metric as u8 > ScannerMetric::StartTrace as u8 {
if metric as u8 > Metric::StartTrace as u8 {
//debug!(metric = metric.as_str(), duration_ms = duration.as_millis(), "Scanner trace metric");
}
}
}
/// Time scanner action with size - returns function that takes size
pub fn time_size(metric: ScannerMetric) -> impl Fn(u64) {
pub fn time_size(metric: Metric) -> impl Fn(u64) {
let metric = metric as usize;
let start_time = SystemTime::now();
move |size: u64| {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics with size (spawn async task)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add_size(duration, size).await;
globalMetrics.latency[metric_index].add_size(duration, size).await;
});
}
}
}
/// Time a scanner action - returns a closure to call when done
pub fn time(metric: ScannerMetric) -> impl Fn() {
pub fn time(metric: Metric) -> impl Fn() {
let metric = metric as usize;
let start_time = SystemTime::now();
move || {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics (spawn async task)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add(duration).await;
globalMetrics.latency[metric_index].add(duration).await;
});
}
}
}
/// Time N scanner actions - returns function that takes count, then returns completion function
pub fn time_n(metric: ScannerMetric) -> Box<dyn Fn(usize) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
pub fn time_n(metric: Metric) -> Box<dyn Fn(usize) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
let metric = metric as usize;
let start_time = SystemTime::now();
Box::new(move |count: usize| {
@@ -308,22 +360,23 @@ impl ScannerMetrics {
let duration = SystemTime::now().duration_since(start_time).unwrap_or_default();
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(count as u64, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(count as u64, Ordering::Relaxed);
// Update latency for realtime metrics (spawn async task)
if (metric) < ScannerMetric::LastRealtime as usize {
if (metric) < Metric::LastRealtime as usize {
let metric_index = metric;
tokio::spawn(async move {
globalScannerMetrics.latency[metric_index].add(duration).await;
globalMetrics.latency[metric_index].add(duration).await;
});
}
})
})
}
pub fn time_ilm(a: lifecycle::IlmAction) -> Box<dyn Fn(u64) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
/// Time ILM action with versions - returns function that takes versions, then returns completion function
pub fn time_ilm(a: IlmAction) -> Box<dyn Fn(u64) -> Box<dyn Fn() + Send + Sync> + Send + Sync> {
let a_clone = a as usize;
if a_clone == lifecycle::IlmAction::NoneAction as usize || a_clone >= lifecycle::IlmAction::ActionCount as usize {
if a_clone == IlmAction::NoneAction as usize || a_clone >= IlmAction::ActionCount as usize {
return Box::new(move |_: u64| Box::new(move || {}));
}
let start = SystemTime::now();
@@ -331,50 +384,50 @@ impl ScannerMetrics {
Box::new(move || {
let duration = SystemTime::now().duration_since(start).unwrap_or(Duration::from_secs(0));
tokio::spawn(async move {
globalScannerMetrics.actions[a_clone].fetch_add(versions, Ordering::Relaxed);
globalScannerMetrics.actions_latency[a_clone].add(duration).await;
globalMetrics.actions[a_clone].fetch_add(versions, Ordering::Relaxed);
globalMetrics.actions_latency[a_clone].add(duration).await;
});
})
})
}
/// Increment time with specific duration
pub async fn inc_time(metric: ScannerMetric, duration: Duration) {
pub async fn inc_time(metric: Metric, duration: Duration) {
let metric = metric as usize;
// Update operation count
globalScannerMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
globalMetrics.operations[metric].fetch_add(1, Ordering::Relaxed);
// Update latency for realtime metrics
if (metric) < ScannerMetric::LastRealtime as usize {
globalScannerMetrics.latency[metric].add(duration).await;
if (metric) < Metric::LastRealtime as usize {
globalMetrics.latency[metric].add(duration).await;
}
}
/// Get lifetime operation count for a metric
pub fn lifetime(&self, metric: ScannerMetric) -> u64 {
pub fn lifetime(&self, metric: Metric) -> u64 {
let metric = metric as usize;
if (metric) >= ScannerMetric::Last as usize {
if (metric) >= Metric::Last as usize {
return 0;
}
self.operations[metric].load(Ordering::Relaxed)
}
/// Get last minute statistics for a metric
pub async fn last_minute(&self, metric: ScannerMetric) -> AccElem {
pub async fn last_minute(&self, metric: Metric) -> AccElem {
let metric = metric as usize;
if (metric) >= ScannerMetric::LastRealtime as usize {
if (metric) >= Metric::LastRealtime as usize {
return AccElem::default();
}
self.latency[metric].total().await
}
/// Set current cycle information
pub async fn set_cycle(&self, cycle: Option<CurrentScannerCycle>) {
pub async fn set_cycle(&self, cycle: Option<CurrentCycle>) {
*self.cycle_info.write().await = cycle;
}
/// Get current cycle information
pub async fn get_cycle(&self) -> Option<CurrentScannerCycle> {
pub async fn get_cycle(&self) -> Option<CurrentCycle> {
self.cycle_info.read().await.clone()
}
@@ -411,20 +464,20 @@ impl ScannerMetrics {
metrics.active_paths = self.get_current_paths().await;
// Lifetime operations
for i in 0..ScannerMetric::Last as usize {
for i in 0..Metric::Last as usize {
let count = self.operations[i].load(Ordering::Relaxed);
if count > 0 {
if let Some(metric) = ScannerMetric::from_index(i) {
if let Some(metric) = Metric::from_index(i) {
metrics.life_time_ops.insert(metric.as_str().to_string(), count);
}
}
}
// Last minute statistics for realtime metrics
for i in 0..ScannerMetric::LastRealtime as usize {
for i in 0..Metric::LastRealtime as usize {
let last_min = self.latency[i].total().await;
if last_min.n > 0 {
if let Some(_metric) = ScannerMetric::from_index(i) {
if let Some(_metric) = Metric::from_index(i) {
// Convert to madmin TimedAction format if needed
// This would require implementing the conversion
}
@@ -448,11 +501,7 @@ pub fn current_path_updater(disk: &str, initial: &str) -> (UpdateCurrentPathFn,
let tracker_clone = Arc::clone(&tracker);
let disk_clone = disk_name.clone();
tokio::spawn(async move {
globalScannerMetrics
.current_paths
.write()
.await
.insert(disk_clone, tracker_clone);
globalMetrics.current_paths.write().await.insert(disk_clone, tracker_clone);
});
let update_fn = {
@@ -471,7 +520,7 @@ pub fn current_path_updater(disk: &str, initial: &str) -> (UpdateCurrentPathFn,
Arc::new(move || -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> {
let disk_name = disk_name.clone();
Box::pin(async move {
globalScannerMetrics.current_paths.write().await.remove(&disk_name);
globalMetrics.current_paths.write().await.remove(&disk_name);
})
})
};
@@ -479,7 +528,7 @@ pub fn current_path_updater(disk: &str, initial: &str) -> (UpdateCurrentPathFn,
(update_fn, done_fn)
}
impl Default for ScannerMetrics {
impl Default for Metrics {
fn default() -> Self {
Self::new()
}

View File

@@ -108,14 +108,26 @@ pub const DEFAULT_CONSOLE_ADDRESS: &str = concat!(":", DEFAULT_CONSOLE_PORT);
/// It is used to store the logs of the application.
/// Default value: rustfs.log
/// Environment variable: RUSTFS_OBSERVABILITY_LOG_FILENAME
pub const DEFAULT_LOG_FILENAME: &str = "rustfs.log";
pub const DEFAULT_LOG_FILENAME: &str = "rustfs";
/// Default OBS log filename for rustfs
/// This is the default log filename for OBS.
/// It is used to store the logs of the application.
/// Default value: rustfs.log
pub const DEFAULT_OBS_LOG_FILENAME: &str = concat!(DEFAULT_LOG_FILENAME, ".log");
/// Default sink file log file for rustfs
/// This is the default sink file log file for rustfs.
/// It is used to store the logs of the application.
/// Default value: rustfs-sink.log
pub const DEFAULT_SINK_FILE_LOG_FILE: &str = concat!(DEFAULT_LOG_FILENAME, "-sink.log");
/// Default log directory for rustfs
/// This is the default log directory for rustfs.
/// It is used to store the logs of the application.
/// Default value: logs
/// Environment variable: RUSTFS_OBSERVABILITY_LOG_DIRECTORY
pub const DEFAULT_LOG_DIR: &str = "deploy/logs";
pub const DEFAULT_LOG_DIR: &str = "/logs";
/// Default log rotation size mb for rustfs
/// This is the default log rotation size for rustfs.

View File

@@ -33,11 +33,11 @@ pub fn decrypt_data(password: &[u8], data: &[u8]) -> Result<Vec<u8>, crate::Erro
match id {
ID::Argon2idChaCHa20Poly1305 => {
let key = id.get_key(password, salt)?;
decryp(ChaCha20Poly1305::new_from_slice(&key)?, nonce, data)
decrypt(ChaCha20Poly1305::new_from_slice(&key)?, nonce, data)
}
_ => {
let key = id.get_key(password, salt)?;
decryp(Aes256Gcm::new_from_slice(&key)?, nonce, data)
decrypt(Aes256Gcm::new_from_slice(&key)?, nonce, data)
}
}
}
@@ -135,7 +135,7 @@ pub fn decrypt_data(password: &[u8], data: &[u8]) -> Result<Vec<u8>, crate::Erro
#[cfg(any(test, feature = "crypto"))]
#[inline]
fn decryp<T: aes_gcm::aead::Aead>(stream: T, nonce: &[u8], data: &[u8]) -> Result<Vec<u8>, crate::Error> {
fn decrypt<T: aes_gcm::aead::Aead>(stream: T, nonce: &[u8], data: &[u8]) -> Result<Vec<u8>, crate::Error> {
use crate::error::Error;
stream
.decrypt(aes_gcm::Nonce::from_slice(nonce), data)

View File

@@ -38,3 +38,7 @@ url.workspace = true
rustfs-madmin.workspace = true
rustfs-filemeta.workspace = true
bytes.workspace = true
serial_test = "3.2.0"
aws-sdk-s3 = "1.99.0"
aws-config = "1.8.3"
async-trait = { workspace = true }

View File

@@ -13,28 +13,45 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use rustfs_lock::{
drwmutex::Options,
lock_args::LockArgs,
namespace_lock::{NsLockMap, new_nslock},
new_lock_api,
};
use async_trait::async_trait;
use rustfs_ecstore::{disk::endpoint::Endpoint, lock_utils::create_unique_clients};
use rustfs_lock::client::{LockClient, local::LocalClient};
use rustfs_lock::types::{LockInfo, LockResponse, LockStats};
use rustfs_lock::{LockId, LockMetadata, LockPriority, LockType};
use rustfs_lock::{LockRequest, NamespaceLock, NamespaceLockManager};
use rustfs_protos::{node_service_time_out_client, proto_gen::node_service::GenerallyLockRequest};
use serial_test::serial;
use std::{error::Error, sync::Arc, time::Duration};
use tokio::sync::RwLock;
use tokio::time::sleep;
use tonic::Request;
use url::Url;
const CLUSTER_ADDR: &str = "http://localhost:9000";
fn get_cluster_endpoints() -> Vec<Endpoint> {
vec![Endpoint {
url: Url::parse(CLUSTER_ADDR).unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
}]
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_rpc() -> Result<(), Box<dyn Error>> {
let args = LockArgs {
uid: "1111".to_string(),
resources: vec!["dandan".to_string()],
let args = LockRequest {
lock_id: LockId::new_deterministic("dandan"),
resource: "dandan".to_string(),
lock_type: LockType::Exclusive,
owner: "dd".to_string(),
source: "".to_string(),
quorum: 3,
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args = serde_json::to_string(&args)?;
@@ -58,31 +75,616 @@ async fn test_lock_unlock_rpc() -> Result<(), Box<dyn Error>> {
Ok(())
}
#[tokio::test]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_ns_lock() -> Result<(), Box<dyn Error>> {
let url = url::Url::parse("http://127.0.0.1:9000/data")?;
let locker = new_lock_api(false, Some(url));
let ns_mutex = Arc::new(RwLock::new(NsLockMap::new(true)));
let ns = new_nslock(
Arc::clone(&ns_mutex),
"local".to_string(),
"dandan".to_string(),
vec!["foo".to_string()],
vec![locker],
)
.await;
assert!(
ns.0.write()
.await
.get_lock(&Options {
timeout: Duration::from_secs(5),
retry_interval: Duration::from_secs(1),
})
.await
.unwrap()
);
/// Mock client that simulates remote node failures
#[derive(Debug)]
struct FailingMockClient {
local_client: Arc<dyn LockClient>,
should_fail_acquire: bool,
should_fail_release: bool,
}
impl FailingMockClient {
fn new(should_fail_acquire: bool, should_fail_release: bool) -> Self {
Self {
local_client: Arc::new(LocalClient::new()),
should_fail_acquire,
should_fail_release,
}
}
}
#[async_trait]
impl LockClient for FailingMockClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> rustfs_lock::error::Result<LockResponse> {
if self.should_fail_acquire {
// Simulate network timeout or remote node failure
return Ok(LockResponse::failure("Simulated remote node failure", Duration::from_millis(100)));
}
self.local_client.acquire_exclusive(request).await
}
async fn acquire_shared(&self, request: &LockRequest) -> rustfs_lock::error::Result<LockResponse> {
if self.should_fail_acquire {
return Ok(LockResponse::failure("Simulated remote node failure", Duration::from_millis(100)));
}
self.local_client.acquire_shared(request).await
}
async fn release(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
if self.should_fail_release {
return Err(rustfs_lock::error::LockError::internal("Simulated release failure"));
}
self.local_client.release(lock_id).await
}
async fn refresh(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
self.local_client.refresh(lock_id).await
}
async fn force_release(&self, lock_id: &LockId) -> rustfs_lock::error::Result<bool> {
self.local_client.force_release(lock_id).await
}
async fn check_status(&self, lock_id: &LockId) -> rustfs_lock::error::Result<Option<LockInfo>> {
self.local_client.check_status(lock_id).await
}
async fn get_stats(&self) -> rustfs_lock::error::Result<LockStats> {
self.local_client.get_stats().await
}
async fn close(&self) -> rustfs_lock::error::Result<()> {
self.local_client.close().await
}
async fn is_online(&self) -> bool {
if self.should_fail_acquire {
return false; // Simulate offline node
}
true // Simulate online node
}
async fn is_local(&self) -> bool {
false // Simulate remote client
}
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_with_remote_failure() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional lock with simulated remote node failure");
// Create a two-node cluster: one local (success) + one remote (failure)
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_transactional".to_string(), clients);
let resource = "critical_resource".to_string();
// Test single lock operation with 2PC
println!("📝 Testing single lock with remote failure...");
let request = LockRequest::new(&resource, LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(30));
let response = ns_lock.acquire_lock(&request).await?;
// Should fail because quorum (2/2) is not met due to remote failure
assert!(!response.success, "Lock should fail due to remote node failure");
println!("✅ Single lock correctly failed due to remote node failure");
// Verify no locks are left behind on the local node
let local_client_direct = LocalClient::new();
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(&resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain on local node after rollback");
println!("✅ Verified rollback: no locks left on local node");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_batch_lock_with_mixed_failures() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional batch lock with mixed node failures");
// Create a cluster with different failure patterns
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_batch_transactional".to_string(), clients);
let resources = vec!["resource_1".to_string(), "resource_2".to_string(), "resource_3".to_string()];
println!("📝 Testing batch lock with remote failure...");
let result = ns_lock
.lock_batch(&resources, "batch_owner", Duration::from_millis(100), Duration::from_secs(30))
.await?;
// Should fail because remote node cannot acquire locks
assert!(!result, "Batch lock should fail due to remote node failure");
println!("✅ Batch lock correctly failed due to remote node failure");
// Verify no locks are left behind on any resource
let local_client_direct = LocalClient::new();
for resource in &resources {
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain for resource: {resource}");
}
println!("✅ Verified rollback: no locks left on any resource");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_with_quorum_success() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing transactional lock with quorum success");
// Create a three-node cluster where 2 succeed and 1 fails (quorum = 2 automatically)
let local_client1: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let local_client2: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_remote_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(true, false));
let clients = vec![local_client1, local_client2, failing_remote_client];
let ns_lock = NamespaceLock::with_clients("test_quorum".to_string(), clients);
let resource = "quorum_resource".to_string();
println!("📝 Testing lock with automatic quorum=2, 2 success + 1 failure...");
let request = LockRequest::new(&resource, LockType::Exclusive, "quorum_owner").with_ttl(Duration::from_secs(30));
let response = ns_lock.acquire_lock(&request).await?;
// Should fail because we require all nodes to succeed for consistency
// (even though quorum is met, the implementation requires all nodes for consistency)
assert!(!response.success, "Lock should fail due to consistency requirement");
println!("✅ Lock correctly failed due to consistency requirement (partial success rolled back)");
Ok(())
}
#[tokio::test]
#[serial]
async fn test_transactional_lock_rollback_on_release_failure() -> Result<(), Box<dyn Error>> {
println!("🧪 Testing rollback behavior when release fails");
// Create clients where acquire succeeds but release fails
let local_client: Arc<dyn LockClient> = Arc::new(LocalClient::new());
let failing_release_client: Arc<dyn LockClient> = Arc::new(FailingMockClient::new(false, true));
let clients = vec![local_client, failing_release_client];
let ns_lock = NamespaceLock::with_clients("test_release_failure".to_string(), clients);
let resource = "release_test_resource".to_string();
println!("📝 Testing lock acquisition with release failure handling...");
let request = LockRequest::new(&resource, LockType::Exclusive, "test_owner").with_ttl(Duration::from_secs(30));
// This should fail because both LocalClient instances share the same global lock map
// The first client (LocalClient) will acquire the lock, but the second client
// (FailingMockClient's internal LocalClient) will fail to acquire the same resource
let response = ns_lock.acquire_lock(&request).await?;
// The operation should fail due to lock contention between the two LocalClient instances
assert!(
!response.success,
"Lock should fail due to lock contention between LocalClient instances sharing global lock map"
);
println!("✅ Lock correctly failed due to lock contention (both clients use same global lock map)");
// Verify no locks are left behind after rollback
let local_client_direct = LocalClient::new();
let lock_id = LockId::new_deterministic(&ns_lock.get_resource_key(&resource));
let lock_status = local_client_direct.check_status(&lock_id).await?;
assert!(lock_status.is_none(), "No lock should remain after rollback");
println!("✅ Verified rollback: no locks left after failed acquisition");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_unlock_ns_lock() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test".to_string(), clients);
let resources = vec!["foo".to_string()];
let result = ns_lock
.lock_batch(&resources, "dandan", Duration::from_secs(5), Duration::from_secs(10))
.await;
match &result {
Ok(success) => println!("Lock result: {success}"),
Err(e) => println!("Lock error: {e}"),
}
let result = result?;
assert!(result, "Lock should succeed, but got: {result}");
ns_lock.unlock_batch(&resources, "dandan").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_concurrent_lock_attempts() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test".to_string(), clients);
let resource = vec!["concurrent_resource".to_string()];
// First lock should succeed
println!("Attempting first lock...");
let result1 = ns_lock
.lock_batch(&resource, "owner1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
println!("First lock result: {result1}");
assert!(result1, "First lock should succeed");
// Second lock should fail (resource already locked)
println!("Attempting second lock...");
let result2 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(1), Duration::from_secs(10))
.await?;
println!("Second lock result: {result2}");
assert!(!result2, "Second lock should fail");
// Unlock by first owner
println!("Unlocking first lock...");
ns_lock.unlock_batch(&resource, "owner1").await?;
println!("First lock unlocked");
// Now second owner should be able to lock
println!("Attempting third lock...");
let result3 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
println!("Third lock result: {result3}");
assert!(result3, "Lock should succeed after unlock");
// Clean up
println!("Cleaning up...");
ns_lock.unlock_batch(&resource, "owner2").await?;
println!("Test completed");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_read_write_lock_compatibility() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_rw".to_string(), clients);
let resource = vec!["rw_resource".to_string()];
// First read lock should succeed
let result1 = ns_lock
.rlock_batch(&resource, "reader1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result1, "First read lock should succeed");
// Second read lock should also succeed (read locks are compatible)
let result2 = ns_lock
.rlock_batch(&resource, "reader2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result2, "Second read lock should succeed");
// Write lock should fail (read locks are held)
let result3 = ns_lock
.lock_batch(&resource, "writer1", Duration::from_secs(1), Duration::from_secs(10))
.await?;
assert!(!result3, "Write lock should fail when read locks are held");
// Release read locks
ns_lock.runlock_batch(&resource, "reader1").await?;
ns_lock.runlock_batch(&resource, "reader2").await?;
// Now write lock should succeed
let result4 = ns_lock
.lock_batch(&resource, "writer1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result4, "Write lock should succeed after read locks released");
// Clean up
ns_lock.unlock_batch(&resource, "writer1").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_timeout() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_timeout".to_string(), clients);
let resource = vec!["timeout_resource".to_string()];
// First lock with short timeout
let result1 = ns_lock
.lock_batch(&resource, "owner1", Duration::from_secs(2), Duration::from_secs(1))
.await?;
assert!(result1, "First lock should succeed");
// Wait for lock to expire
sleep(Duration::from_secs(5)).await;
// Second lock should succeed after timeout
let result2 = ns_lock
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(1))
.await?;
assert!(result2, "Lock should succeed after timeout");
// Clean up
ns_lock.unlock_batch(&resource, "owner2").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_batch_lock_operations() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock = NamespaceLock::with_clients("test_batch".to_string(), clients);
let resources = vec![
"batch_resource1".to_string(),
"batch_resource2".to_string(),
"batch_resource3".to_string(),
];
// Lock all resources
let result = ns_lock
.lock_batch(&resources, "batch_owner", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result, "Batch lock should succeed");
// Try to lock one of the resources with different owner - should fail
let single_resource = vec!["batch_resource2".to_string()];
let result2 = ns_lock
.lock_batch(&single_resource, "other_owner", Duration::from_secs(1), Duration::from_secs(10))
.await?;
assert!(!result2, "Lock should fail for already locked resource");
// Unlock all resources
ns_lock.unlock_batch(&resources, "batch_owner").await?;
// Now should be able to lock single resource
let result3 = ns_lock
.lock_batch(&single_resource, "other_owner", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result3, "Lock should succeed after batch unlock");
// Clean up
ns_lock.unlock_batch(&single_resource, "other_owner").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_multiple_namespaces() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock1 = NamespaceLock::with_clients("namespace1".to_string(), clients.clone());
let ns_lock2 = NamespaceLock::with_clients("namespace2".to_string(), clients);
let resource = vec!["shared_resource".to_string()];
// Lock same resource in different namespaces - both should succeed
let result1 = ns_lock1
.lock_batch(&resource, "owner1", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result1, "Lock in namespace1 should succeed");
let result2 = ns_lock2
.lock_batch(&resource, "owner2", Duration::from_secs(5), Duration::from_secs(10))
.await?;
assert!(result2, "Lock in namespace2 should succeed");
// Clean up
ns_lock1.unlock_batch(&resource, "owner1").await?;
ns_lock2.unlock_batch(&resource, "owner2").await?;
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_rpc_read_lock() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("read_resource"),
resource: "read_resource".to_string(),
lock_type: LockType::Shared,
owner: "reader1".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = node_service_time_out_client(&CLUSTER_ADDR.to_string()).await?;
// First read lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.r_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get read lock: {error_info}");
}
// Second read lock with different owner should also succeed
let args2 = LockRequest {
lock_id: LockId::new_deterministic("read_resource"),
resource: "read_resource".to_string(),
lock_type: LockType::Shared,
owner: "reader2".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args2_str = serde_json::to_string(&args2)?;
let request2 = Request::new(GenerallyLockRequest { args: args2_str });
let response2 = client.r_lock(request2).await?.into_inner();
if let Some(error_info) = response2.error_info {
panic!("can not get second read lock: {error_info}");
}
// Unlock both
let request = Request::new(GenerallyLockRequest { args: args_str });
let response = client.r_un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not unlock read lock: {error_info}");
}
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_lock_refresh() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("refresh_resource"),
resource: "refresh_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "refresh_owner".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = node_service_time_out_client(&CLUSTER_ADDR.to_string()).await?;
// Acquire lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
// Refresh lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.refresh(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not refresh lock: {error_info}");
}
assert!(response.success, "Lock refresh should succeed");
// Unlock
let request = Request::new(GenerallyLockRequest { args: args_str });
let response = client.un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not unlock: {error_info}");
}
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_force_unlock() -> Result<(), Box<dyn Error>> {
let args = LockRequest {
lock_id: LockId::new_deterministic("force_resource"),
resource: "force_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "force_owner".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let args_str = serde_json::to_string(&args)?;
let mut client = node_service_time_out_client(&CLUSTER_ADDR.to_string()).await?;
// Acquire lock
let request = Request::new(GenerallyLockRequest { args: args_str.clone() });
let response = client.lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not get lock: {error_info}");
}
// Force unlock (even by different owner)
let force_args = LockRequest {
lock_id: LockId::new_deterministic("force_resource"),
resource: "force_resource".to_string(),
lock_type: LockType::Exclusive,
owner: "admin".to_string(),
acquire_timeout: Duration::from_secs(30),
ttl: Duration::from_secs(30),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
deadlock_detection: false,
};
let force_args_str = serde_json::to_string(&force_args)?;
let request = Request::new(GenerallyLockRequest { args: force_args_str });
let response = client.force_un_lock(request).await?.into_inner();
if let Some(error_info) = response.error_info {
panic!("can not force unlock: {error_info}");
}
assert!(response.success, "Force unlock should succeed");
Ok(())
}
#[tokio::test]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_global_lock_map_sharing() -> Result<(), Box<dyn Error>> {
let endpoints = get_cluster_endpoints();
let clients = create_unique_clients(&endpoints).await?;
let ns_lock1 = NamespaceLock::with_clients("global_test".to_string(), clients.clone());
let ns_lock2 = NamespaceLock::with_clients("global_test".to_string(), clients);
let resource = vec!["global_test_resource".to_string()];
// First instance acquires lock
println!("First lock map attempting to acquire lock...");
let result1 = ns_lock1
.lock_batch(&resource, "owner1", std::time::Duration::from_secs(5), std::time::Duration::from_secs(10))
.await?;
println!("First lock result: {result1}");
assert!(result1, "First lock should succeed");
// Second instance should fail to acquire the same lock
println!("Second lock map attempting to acquire lock...");
let result2 = ns_lock2
.lock_batch(&resource, "owner2", std::time::Duration::from_secs(1), std::time::Duration::from_secs(10))
.await?;
println!("Second lock result: {result2}");
assert!(!result2, "Second lock should fail because resource is already locked");
// Release lock from first instance
println!("First lock map releasing lock...");
ns_lock1.unlock_batch(&resource, "owner1").await?;
// Now second instance should be able to acquire lock
println!("Second lock map attempting to acquire lock again...");
let result3 = ns_lock2
.lock_batch(&resource, "owner2", std::time::Duration::from_secs(5), std::time::Duration::from_secs(10))
.await?;
println!("Third lock result: {result3}");
assert!(result3, "Lock should succeed after first lock is released");
// Clean up
ns_lock2.unlock_batch(&resource, "owner2").await?;
ns.0.write().await.un_lock().await.unwrap();
Ok(())
}

View File

@@ -14,3 +14,4 @@
mod lock;
mod node_interact_test;
mod sql;

View File

@@ -0,0 +1,402 @@
#![cfg(test)]
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use aws_config::meta::region::RegionProviderChain;
use aws_sdk_s3::Client;
use aws_sdk_s3::config::{Credentials, Region};
use aws_sdk_s3::types::{
CsvInput, CsvOutput, ExpressionType, FileHeaderInfo, InputSerialization, JsonInput, JsonOutput, JsonType, OutputSerialization,
};
use bytes::Bytes;
use serial_test::serial;
use std::error::Error;
const ENDPOINT: &str = "http://localhost:9000";
const ACCESS_KEY: &str = "rustfsadmin";
const SECRET_KEY: &str = "rustfsadmin";
const BUCKET: &str = "test-sql-bucket";
const CSV_OBJECT: &str = "test-data.csv";
const JSON_OBJECT: &str = "test-data.json";
async fn create_aws_s3_client() -> Result<Client, Box<dyn Error>> {
let region_provider = RegionProviderChain::default_provider().or_else(Region::new("us-east-1"));
let shared_config = aws_config::defaults(aws_config::BehaviorVersion::latest())
.region(region_provider)
.credentials_provider(Credentials::new(ACCESS_KEY, SECRET_KEY, None, None, "static"))
.endpoint_url(ENDPOINT)
.load()
.await;
let client = Client::from_conf(
aws_sdk_s3::Config::from(&shared_config)
.to_builder()
.force_path_style(true) // Important for S3-compatible services
.build(),
);
Ok(client)
}
async fn setup_test_bucket(client: &Client) -> Result<(), Box<dyn Error>> {
match client.create_bucket().bucket(BUCKET).send().await {
Ok(_) => {}
Err(e) => {
let error_str = e.to_string();
if !error_str.contains("BucketAlreadyOwnedByYou") && !error_str.contains("BucketAlreadyExists") {
return Err(e.into());
}
}
}
Ok(())
}
async fn upload_test_csv(client: &Client) -> Result<(), Box<dyn Error>> {
let csv_data = "name,age,city\nAlice,30,New York\nBob,25,Los Angeles\nCharlie,35,Chicago\nDiana,28,Boston";
client
.put_object()
.bucket(BUCKET)
.key(CSV_OBJECT)
.body(Bytes::from(csv_data.as_bytes()).into())
.send()
.await?;
Ok(())
}
async fn upload_test_json(client: &Client) -> Result<(), Box<dyn Error>> {
let json_data = r#"{"name":"Alice","age":30,"city":"New York"}
{"name":"Bob","age":25,"city":"Los Angeles"}
{"name":"Charlie","age":35,"city":"Chicago"}
{"name":"Diana","age":28,"city":"Boston"}"#;
client
.put_object()
.bucket(BUCKET)
.key(JSON_OBJECT)
.body(Bytes::from(json_data.as_bytes()).into())
.send()
.await?;
Ok(())
}
async fn process_select_response(
mut event_stream: aws_sdk_s3::operation::select_object_content::SelectObjectContentOutput,
) -> Result<String, Box<dyn Error>> {
let mut total_data = Vec::new();
while let Ok(Some(event)) = event_stream.payload.recv().await {
match event {
aws_sdk_s3::types::SelectObjectContentEventStream::Records(records_event) => {
if let Some(payload) = records_event.payload {
let data = payload.into_inner();
total_data.extend_from_slice(&data);
}
}
aws_sdk_s3::types::SelectObjectContentEventStream::End(_) => {
break;
}
_ => {
// Handle other event types (Stats, Progress, Cont, etc.)
}
}
}
Ok(String::from_utf8(total_data)?)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_basic() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Construct SelectObjectContent request - basic query
let sql = "SELECT * FROM S3Object WHERE age > 28";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Select result: {result_str}");
// Verify results contain records with age > 28
assert!(result_str.contains("Alice,30,New York"));
assert!(result_str.contains("Charlie,35,Chicago"));
assert!(!result_str.contains("Bob,25,Los Angeles"));
assert!(!result_str.contains("Diana,28,Boston"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_aggregation() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Construct aggregation query - use simpler approach
let sql = "SELECT name, age FROM S3Object WHERE age >= 25";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Aggregation result: {result_str}");
// Verify query results - should include records with age >= 25
assert!(result_str.contains("Alice"));
assert!(result_str.contains("Bob"));
assert!(result_str.contains("Charlie"));
assert!(result_str.contains("Diana"));
assert!(result_str.contains("30"));
assert!(result_str.contains("25"));
assert!(result_str.contains("35"));
assert!(result_str.contains("28"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_json_basic() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_json(&client).await?;
// Construct JSON query
let sql = "SELECT s.name, s.age FROM S3Object s WHERE s.age > 28";
let json_input = JsonInput::builder().set_type(Some(JsonType::Document)).build();
let input_serialization = InputSerialization::builder().json(json_input).build();
let json_output = JsonOutput::builder().build();
let output_serialization = OutputSerialization::builder().json(json_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(JSON_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("JSON Select result: {result_str}");
// Verify JSON query results
assert!(result_str.contains("Alice"));
assert!(result_str.contains("Charlie"));
assert!(result_str.contains("30"));
assert!(result_str.contains("35"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_limit() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Test LIMIT clause
let sql = "SELECT * FROM S3Object LIMIT 2";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Limit result: {result_str}");
// Verify only first 2 records are returned
let lines: Vec<&str> = result_str.lines().filter(|line| !line.trim().is_empty()).collect();
assert_eq!(lines.len(), 2, "Should return exactly 2 records");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_csv_order_by() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Test ORDER BY clause
let sql = "SELECT name, age FROM S3Object ORDER BY age DESC LIMIT 2";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let response = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await?;
let result_str = process_select_response(response).await?;
println!("CSV Order By result: {result_str}");
// Verify ordered by age descending
let lines: Vec<&str> = result_str.lines().filter(|line| !line.trim().is_empty()).collect();
assert!(lines.len() >= 2, "Should return at least 2 records");
// Check if contains highest age records
assert!(result_str.contains("Charlie,35"));
assert!(result_str.contains("Alice,30"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_error_handling() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
upload_test_csv(&client).await?;
// Test invalid SQL query
let sql = "SELECT * FROM S3Object WHERE invalid_column > 10";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
// This query should fail because invalid_column doesn't exist
let result = client
.select_object_content()
.bucket(BUCKET)
.key(CSV_OBJECT)
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await;
// Verify query fails (expected behavior)
assert!(result.is_err(), "Query with invalid column should fail");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
#[serial]
#[ignore = "requires running RustFS server at localhost:9000"]
async fn test_select_object_content_nonexistent_object() -> Result<(), Box<dyn Error>> {
let client = create_aws_s3_client().await?;
setup_test_bucket(&client).await?;
// Test query on nonexistent object
let sql = "SELECT * FROM S3Object";
let csv_input = CsvInput::builder().file_header_info(FileHeaderInfo::Use).build();
let input_serialization = InputSerialization::builder().csv(csv_input).build();
let csv_output = CsvOutput::builder().build();
let output_serialization = OutputSerialization::builder().csv(csv_output).build();
let result = client
.select_object_content()
.bucket(BUCKET)
.key("nonexistent.csv")
.expression(sql)
.expression_type(ExpressionType::Sql)
.input_serialization(input_serialization)
.output_serialization(output_serialization)
.send()
.await;
// Verify query fails (expected behavior)
assert!(result.is_err(), "Query on nonexistent object should fail");
Ok(())
}

View File

@@ -12,23 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::data_usage::{DATA_USAGE_CACHE_NAME, DATA_USAGE_ROOT, load_data_usage_from_backend};
use crate::error::{Error, Result};
use crate::{
disk::endpoint::Endpoint,
global::{GLOBAL_BOOT_TIME, GLOBAL_Endpoints},
heal::{
data_usage::{DATA_USAGE_CACHE_NAME, DATA_USAGE_ROOT, load_data_usage_from_backend},
data_usage_cache::DataUsageCache,
heal_commands::{DRIVE_STATE_OK, DRIVE_STATE_UNFORMATTED},
},
new_object_layer_fn,
notification_sys::get_global_notification_sys,
store_api::StorageAPI,
};
use rustfs_common::{
// error::{Error, Result},
globals::GLOBAL_Local_Node_Name,
};
use crate::data_usage::load_data_usage_cache;
use rustfs_common::{globals::GLOBAL_Local_Node_Name, heal_channel::DriveState};
use rustfs_madmin::{
BackendDisks, Disk, ErasureSetInfo, ITEM_INITIALIZING, ITEM_OFFLINE, ITEM_ONLINE, InfoMessage, ServerProperties,
};
@@ -253,7 +248,7 @@ pub async fn get_server_info(get_pools: bool) -> InfoMessage {
warn!("load_data_usage_from_backend end {:?}", after3 - after2);
let backen_info = store.clone().backend_info().await;
let backend_info = store.clone().backend_info().await;
let after4 = OffsetDateTime::now_utc();
@@ -272,10 +267,10 @@ pub async fn get_server_info(get_pools: bool) -> InfoMessage {
backend_type: rustfs_madmin::BackendType::ErasureType,
online_disks: online_disks.sum(),
offline_disks: offline_disks.sum(),
standard_sc_parity: backen_info.standard_sc_parity,
rr_sc_parity: backen_info.rr_sc_parity,
total_sets: backen_info.total_sets,
drives_per_set: backen_info.drives_per_set,
standard_sc_parity: backend_info.standard_sc_parity,
rr_sc_parity: backend_info.rr_sc_parity,
total_sets: backend_info.total_sets,
drives_per_set: backend_info.drives_per_set,
};
if get_pools {
pools = get_pools_info(&all_disks).await.unwrap_or_default();
@@ -318,7 +313,7 @@ fn get_online_offline_disks_stats(disks_info: &[Disk]) -> (BackendDisks, Backend
for disk in disks_info {
let ep = &disk.endpoint;
let state = &disk.state;
if *state != DRIVE_STATE_OK && *state != DRIVE_STATE_UNFORMATTED {
if *state != DriveState::Ok.to_string() && *state != DriveState::Unformatted.to_string() {
*offline_disks.get_mut(ep).unwrap() += 1;
continue;
}
@@ -359,13 +354,13 @@ async fn get_pools_info(all_disks: &[Disk]) -> Result<HashMap<i32, HashMap<i32,
if erasure_set.id == 0 {
erasure_set.id = d.set_index;
if let Ok(cache) = DataUsageCache::load(
if let Ok(cache) = load_data_usage_cache(
&store.pools[d.pool_index as usize].disk_set[d.set_index as usize].clone(),
DATA_USAGE_CACHE_NAME,
)
.await
{
let data_usage_info = cache.dui(DATA_USAGE_ROOT, &[]);
let data_usage_info = cache.dui(DATA_USAGE_ROOT, &Vec::<String>::new());
erasure_set.objects_count = data_usage_info.objects_total_count;
erasure_set.versions_count = data_usage_info.versions_total_count;
erasure_set.delete_markers_count = data_usage_info.delete_markers_total_count;

View File

@@ -22,6 +22,10 @@ use async_channel::{Receiver as A_Receiver, Sender as A_Sender, bounded};
use futures::Future;
use http::HeaderMap;
use lazy_static::lazy_static;
use rustfs_common::data_usage::TierStats;
use rustfs_common::heal_channel::rep_has_active_rules;
use rustfs_common::metrics::{IlmAction, Metrics};
use rustfs_utils::path::encode_dir_object;
use s3s::Body;
use sha2::{Digest, Sha256};
use std::any::Any;
@@ -31,6 +35,7 @@ use std::io::Write;
use std::pin::Pin;
use std::sync::atomic::{AtomicI64, Ordering};
use std::sync::{Arc, Mutex};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{Receiver, Sender};
use tokio::sync::{RwLock, mpsc};
@@ -41,9 +46,10 @@ use xxhash_rust::xxh64;
//use rustfs_notify::{BucketNotificationConfig, Event, EventName, LogLevel, NotificationError, init_logger};
//use rustfs_notify::{initialize, notification_system};
use super::bucket_lifecycle_audit::{LcAuditEvent, LcEventSrc};
use super::lifecycle::{self, ExpirationOptions, IlmAction, Lifecycle, TransitionOptions};
use super::lifecycle::{self, ExpirationOptions, Lifecycle, TransitionOptions};
use super::tier_last_day_stats::{DailyAllTierStats, LastDayTierStats};
use super::tier_sweeper::{Jentry, delete_object_from_remote_tier};
use crate::bucket::object_lock::objectlock_sys::enforce_retention_for_deletion;
use crate::bucket::{metadata_sys::get_lifecycle_config, versioning_sys::BucketVersioningSys};
use crate::client::object_api_utils::new_getobjectreader;
use crate::error::Error;
@@ -52,16 +58,11 @@ use crate::event::name::EventName;
use crate::event_notification::{EventArgs, send_event};
use crate::global::GLOBAL_LocalNodeName;
use crate::global::{GLOBAL_LifecycleSys, GLOBAL_TierConfigMgr, get_global_deployment_id};
use crate::heal::{
data_scanner::{apply_expiry_on_non_transitioned_objects, apply_expiry_on_transitioned_object},
data_scanner_metric::ScannerMetrics,
data_usage_cache::TierStats,
};
use crate::store::ECStore;
use crate::store_api::StorageAPI;
use crate::store_api::{GetObjectReader, HTTPRangeSpec, ObjectInfo, ObjectOptions, ObjectToDelete};
use crate::tier::warm_backend::WarmBackendGetOpts;
use s3s::dto::BucketLifecycleConfiguration;
use s3s::dto::{BucketLifecycleConfiguration, DefaultRetention, ReplicationConfiguration};
pub type TimeFn = Arc<dyn Fn() -> Pin<Box<dyn Future<Output = ()> + Send>> + Send + Sync + 'static>;
pub type TraceFn =
@@ -631,7 +632,7 @@ pub async fn enqueue_transition_immediate(oi: &ObjectInfo, src: LcEventSrc) {
if !lc.is_none() {
let event = lc.expect("err").eval(&oi.to_lifecycle_opts()).await;
match event.action {
lifecycle::IlmAction::TransitionAction | lifecycle::IlmAction::TransitionVersionAction => {
IlmAction::TransitionAction | IlmAction::TransitionVersionAction => {
if oi.delete_marker || oi.is_dir {
return;
}
@@ -728,7 +729,7 @@ pub fn gen_transition_objname(bucket: &str) -> Result<String, Error> {
}
pub async fn transition_object(api: Arc<ECStore>, oi: &ObjectInfo, lae: LcAuditEvent) -> Result<(), Error> {
let time_ilm = ScannerMetrics::time_ilm(lae.event.action);
let time_ilm = Metrics::time_ilm(lae.event.action);
let opts = ObjectOptions {
transition: TransitionOptions {
@@ -842,3 +843,161 @@ pub struct RestoreObjectRequest {
}
const _MAX_RESTORE_OBJECT_REQUEST_SIZE: i64 = 2 << 20;
pub async fn eval_action_from_lifecycle(
lc: &BucketLifecycleConfiguration,
lr: Option<DefaultRetention>,
rcfg: Option<(ReplicationConfiguration, OffsetDateTime)>,
oi: &ObjectInfo,
) -> lifecycle::Event {
let event = lc.eval(&oi.to_lifecycle_opts()).await;
//if serverDebugLog {
info!("lifecycle: Secondary scan: {}", event.action);
//}
let lock_enabled = if let Some(lr) = lr { lr.mode.is_some() } else { false };
match event.action {
lifecycle::IlmAction::DeleteAllVersionsAction | lifecycle::IlmAction::DelMarkerDeleteAllVersionsAction => {
if lock_enabled {
return lifecycle::Event::default();
}
}
lifecycle::IlmAction::DeleteVersionAction | lifecycle::IlmAction::DeleteRestoredVersionAction => {
if oi.version_id.is_none() {
return lifecycle::Event::default();
}
if lock_enabled && enforce_retention_for_deletion(oi) {
//if serverDebugLog {
if oi.version_id.is_some() {
info!("lifecycle: {} v({}) is locked, not deleting", oi.name, oi.version_id.expect("err"));
} else {
info!("lifecycle: {} is locked, not deleting", oi.name);
}
//}
return lifecycle::Event::default();
}
if let Some(rcfg) = rcfg {
if rep_has_active_rules(&rcfg.0, &oi.name, true) {
return lifecycle::Event::default();
}
}
}
_ => (),
}
event
}
async fn apply_transition_rule(event: &lifecycle::Event, src: &LcEventSrc, oi: &ObjectInfo) -> bool {
if oi.delete_marker || oi.is_dir {
return false;
}
GLOBAL_TransitionState.queue_transition_task(oi, event, src).await;
true
}
pub async fn apply_expiry_on_transitioned_object(
api: Arc<ECStore>,
oi: &ObjectInfo,
lc_event: &lifecycle::Event,
src: &LcEventSrc,
) -> bool {
// let time_ilm = ScannerMetrics::time_ilm(lc_event.action.clone());
if let Err(_err) = expire_transitioned_object(api, oi, lc_event, src).await {
return false;
}
// let _ = time_ilm(1);
true
}
pub async fn apply_expiry_on_non_transitioned_objects(
api: Arc<ECStore>,
oi: &ObjectInfo,
lc_event: &lifecycle::Event,
_src: &LcEventSrc,
) -> bool {
let mut opts = ObjectOptions {
expiration: ExpirationOptions { expire: true },
..Default::default()
};
if lc_event.action.delete_versioned() {
opts.version_id = Some(oi.version_id.expect("err").to_string());
}
opts.versioned = BucketVersioningSys::prefix_enabled(&oi.bucket, &oi.name).await;
opts.version_suspended = BucketVersioningSys::prefix_suspended(&oi.bucket, &oi.name).await;
if lc_event.action.delete_all() {
opts.delete_prefix = true;
opts.delete_prefix_object = true;
}
// let time_ilm = ScannerMetrics::time_ilm(lc_event.action.clone());
let mut dobj = api
.delete_object(&oi.bucket, &encode_dir_object(&oi.name), opts)
.await
.unwrap();
if dobj.name.is_empty() {
dobj = oi.clone();
}
//let tags = LcAuditEvent::new(lc_event.clone(), src.clone()).tags();
//tags["version-id"] = dobj.version_id;
let mut event_name = EventName::ObjectRemovedDelete;
if oi.delete_marker {
event_name = EventName::ObjectRemovedDeleteMarkerCreated;
}
match lc_event.action {
lifecycle::IlmAction::DeleteAllVersionsAction => event_name = EventName::ObjectRemovedDeleteAllVersions,
lifecycle::IlmAction::DelMarkerDeleteAllVersionsAction => event_name = EventName::ILMDelMarkerExpirationDelete,
_ => (),
}
send_event(EventArgs {
event_name: event_name.as_ref().to_string(),
bucket_name: dobj.bucket.clone(),
object: dobj,
user_agent: "Internal: [ILM-Expiry]".to_string(),
host: GLOBAL_LocalNodeName.to_string(),
..Default::default()
});
if lc_event.action != lifecycle::IlmAction::NoneAction {
// let mut num_versions = 1_u64;
// if lc_event.action.delete_all() {
// num_versions = oi.num_versions as u64;
// }
// let _ = time_ilm(num_versions);
}
true
}
async fn apply_expiry_rule(event: &lifecycle::Event, src: &LcEventSrc, oi: &ObjectInfo) -> bool {
let mut expiry_state = GLOBAL_ExpiryState.write().await;
expiry_state.enqueue_by_days(oi, event, src).await;
true
}
pub async fn apply_lifecycle_action(event: &lifecycle::Event, src: &LcEventSrc, oi: &ObjectInfo) -> bool {
let mut success = false;
match event.action {
lifecycle::IlmAction::DeleteVersionAction
| lifecycle::IlmAction::DeleteAction
| lifecycle::IlmAction::DeleteRestoredAction
| lifecycle::IlmAction::DeleteRestoredVersionAction
| lifecycle::IlmAction::DeleteAllVersionsAction
| lifecycle::IlmAction::DelMarkerDeleteAllVersionsAction => {
success = apply_expiry_rule(event, src, oi).await;
}
lifecycle::IlmAction::TransitionAction | lifecycle::IlmAction::TransitionVersionAction => {
success = apply_transition_rule(event, src, oi).await;
}
_ => (),
}
success
}

View File

@@ -43,49 +43,7 @@ const _ERR_XML_NOT_WELL_FORMED: &str =
const ERR_LIFECYCLE_BUCKET_LOCKED: &str =
"ExpiredObjectAllVersions element and DelMarkerExpiration action cannot be used on an retention bucket";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IlmAction {
NoneAction = 0,
DeleteAction,
DeleteVersionAction,
TransitionAction,
TransitionVersionAction,
DeleteRestoredAction,
DeleteRestoredVersionAction,
DeleteAllVersionsAction,
DelMarkerDeleteAllVersionsAction,
ActionCount,
}
impl IlmAction {
pub fn delete_restored(&self) -> bool {
*self == Self::DeleteRestoredAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_versioned(&self) -> bool {
*self == Self::DeleteVersionAction || *self == Self::DeleteRestoredVersionAction
}
pub fn delete_all(&self) -> bool {
*self == Self::DeleteAllVersionsAction || *self == Self::DelMarkerDeleteAllVersionsAction
}
pub fn delete(&self) -> bool {
if self.delete_restored() {
return true;
}
*self == Self::DeleteVersionAction
|| *self == Self::DeleteAction
|| *self == Self::DeleteAllVersionsAction
|| *self == Self::DelMarkerDeleteAllVersionsAction
}
}
impl Display for IlmAction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
pub use rustfs_common::metrics::IlmAction;
#[async_trait::async_trait]
pub trait RuleValidate {

View File

@@ -25,7 +25,7 @@ use std::ops::Sub;
use time::OffsetDateTime;
use tracing::{error, warn};
use crate::heal::data_usage_cache::TierStats;
use rustfs_common::data_usage::TierStats;
pub type DailyAllTierStats = HashMap<String, LastDayTierStats>;

View File

@@ -18,9 +18,9 @@ use crate::bucket::utils::{deserialize, is_meta_bucketname};
use crate::cmd::bucket_targets;
use crate::error::{Error, Result, is_err_bucket_not_found};
use crate::global::{GLOBAL_Endpoints, is_dist_erasure, is_erasure, new_object_layer_fn};
use crate::heal::heal_commands::HealOpts;
use crate::store::ECStore;
use futures::future::join_all;
use rustfs_common::heal_channel::HealOpts;
use rustfs_policy::policy::BucketPolicy;
use s3s::dto::{
BucketLifecycleConfiguration, NotificationConfiguration, ObjectLockConfiguration, ReplicationConfiguration,

View File

@@ -1,137 +0,0 @@
#![allow(unsafe_code)] // TODO: audit unsafe code
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{
fmt::Debug,
future::Future,
pin::Pin,
ptr,
sync::{
Arc,
atomic::{AtomicPtr, AtomicU64, Ordering},
},
time::{Duration, SystemTime, UNIX_EPOCH},
};
use tokio::{spawn, sync::Mutex};
use std::io::Result;
pub type UpdateFn<T> = Box<dyn Fn() -> Pin<Box<dyn Future<Output = Result<T>> + Send>> + Send + Sync + 'static>;
#[derive(Clone, Debug, Default)]
pub struct Opts {
return_last_good: bool,
no_wait: bool,
}
pub struct Cache<T: Clone + Debug + Send> {
update_fn: UpdateFn<T>,
ttl: Duration,
opts: Opts,
val: AtomicPtr<T>,
last_update_ms: AtomicU64,
updating: Arc<Mutex<bool>>,
}
impl<T: Clone + Debug + Send + 'static> Cache<T> {
pub fn new(update_fn: UpdateFn<T>, ttl: Duration, opts: Opts) -> Self {
let val = AtomicPtr::new(ptr::null_mut());
Self {
update_fn,
ttl,
opts,
val,
last_update_ms: AtomicU64::new(0),
updating: Arc::new(Mutex::new(false)),
}
}
pub async fn get(self: Arc<Self>) -> Result<T> {
let v_ptr = self.val.load(Ordering::SeqCst);
let v = if v_ptr.is_null() {
None
} else {
Some(unsafe { (*v_ptr).clone() })
};
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
if now - self.last_update_ms.load(Ordering::SeqCst) < self.ttl.as_secs() {
if let Some(v) = v {
return Ok(v);
}
}
if self.opts.no_wait && v.is_some() && now - self.last_update_ms.load(Ordering::SeqCst) < self.ttl.as_secs() * 2 {
if self.updating.try_lock().is_ok() {
let this = Arc::clone(&self);
spawn(async move {
let _ = this.update().await;
});
}
return Ok(v.unwrap());
}
let _ = self.updating.lock().await;
if let Ok(duration) =
SystemTime::now().duration_since(UNIX_EPOCH + Duration::from_secs(self.last_update_ms.load(Ordering::SeqCst)))
{
if duration < self.ttl {
return Ok(v.unwrap());
}
}
match self.update().await {
Ok(_) => {
let v_ptr = self.val.load(Ordering::SeqCst);
let v = if v_ptr.is_null() {
None
} else {
Some(unsafe { (*v_ptr).clone() })
};
Ok(v.unwrap())
}
Err(err) => Err(err),
}
}
async fn update(&self) -> Result<()> {
match (self.update_fn)().await {
Ok(val) => {
self.val.store(Box::into_raw(Box::new(val)), Ordering::SeqCst);
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
self.last_update_ms.store(now, Ordering::SeqCst);
Ok(())
}
Err(err) => {
let v_ptr = self.val.load(Ordering::SeqCst);
if self.opts.return_last_good && !v_ptr.is_null() {
return Ok(());
}
Err(err)
}
}
}
}

View File

@@ -31,7 +31,7 @@ pub struct ListPathRawOptions {
pub fallback_disks: Vec<Option<DiskStore>>,
pub bucket: String,
pub path: String,
pub recursice: bool,
pub recursive: bool,
pub filter_prefix: Option<String>,
pub forward_to: Option<String>,
pub min_disks: usize,
@@ -52,7 +52,7 @@ impl Clone for ListPathRawOptions {
fallback_disks: self.fallback_disks.clone(),
bucket: self.bucket.clone(),
path: self.path.clone(),
recursice: self.recursice,
recursive: self.recursive,
filter_prefix: self.filter_prefix.clone(),
forward_to: self.forward_to.clone(),
min_disks: self.min_disks,
@@ -85,7 +85,7 @@ pub async fn list_path_raw(mut rx: B_Receiver<bool>, opts: ListPathRawOptions) -
let wakl_opts = WalkDirOptions {
bucket: opts_clone.bucket.clone(),
base_dir: opts_clone.path.clone(),
recursive: opts_clone.recursice,
recursive: opts_clone.recursive,
report_notfound: opts_clone.report_not_found,
filter_prefix: opts_clone.filter_prefix.clone(),
forward_to: opts_clone.forward_to.clone(),
@@ -133,7 +133,7 @@ pub async fn list_path_raw(mut rx: B_Receiver<bool>, opts: ListPathRawOptions) -
WalkDirOptions {
bucket: opts_clone.bucket.clone(),
base_dir: opts_clone.path.clone(),
recursive: opts_clone.recursice,
recursive: opts_clone.recursive,
report_notfound: opts_clone.report_not_found,
filter_prefix: opts_clone.filter_prefix.clone(),
forward_to: opts_clone.forward_to.clone(),

View File

@@ -12,5 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// pub mod cache;
use std::sync::Arc;
use lazy_static::lazy_static;
use tokio_util::sync::CancellationToken;
pub mod metacache_set;
lazy_static! {
pub static ref LIST_PATH_RAW_CANCEL_TOKEN: Arc<CancellationToken> = Arc::new(CancellationToken::new());
}

View File

@@ -41,7 +41,7 @@
// pin_mut!(body);
// // 上一次没用完的数据
// let mut prev_bytes = Bytes::new();
// let mut readed_size = 0;
// let mut read_size = 0;
// loop {
// let data: Vec<Bytes> = {
@@ -51,9 +51,9 @@
// Some(Err(e)) => return Err(e),
// Some(Ok((data, remaining_bytes))) => {
// // debug!(
// // "content_length:{},readed_size:{}, read_data data:{}, remaining_bytes: {} ",
// // "content_length:{},read_size:{}, read_data data:{}, remaining_bytes: {} ",
// // content_length,
// // readed_size,
// // read_size,
// // data.len(),
// // remaining_bytes.len()
// // );
@@ -65,15 +65,15 @@
// };
// for bytes in data {
// readed_size += bytes.len();
// // debug!("readed_size {}, content_length {}", readed_size, content_length,);
// read_size += bytes.len();
// // debug!("read_size {}, content_length {}", read_size, content_length,);
// y.yield_ok(bytes).await;
// }
// if readed_size + prev_bytes.len() >= content_length {
// if read_size + prev_bytes.len() >= content_length {
// // debug!(
// // "读完了 readed_size:{} + prev_bytes.len({}) == content_length {}",
// // readed_size,
// // "读完了 read_size:{} + prev_bytes.len({}) == content_length {}",
// // read_size,
// // prev_bytes.len(),
// // content_length,
// // );

View File

@@ -135,7 +135,7 @@ impl Default for PutObjectOptions {
#[allow(dead_code)]
impl PutObjectOptions {
fn set_matche_tag(&mut self, etag: &str) {
fn set_match_tag(&mut self, etag: &str) {
if etag == "*" {
self.custom_header
.insert("If-Match", HeaderValue::from_str("*").expect("err"));
@@ -145,7 +145,7 @@ impl PutObjectOptions {
}
}
fn set_matche_tag_except(&mut self, etag: &str) {
fn set_match_tag_except(&mut self, etag: &str) {
if etag == "*" {
self.custom_header
.insert("If-None-Match", HeaderValue::from_str("*").expect("err"));
@@ -181,7 +181,7 @@ impl PutObjectOptions {
header.insert(
"Expires",
HeaderValue::from_str(&self.expires.format(ISO8601_DATEFORMAT).unwrap()).expect("err"),
); //rustfs invalid heade
); //rustfs invalid header
}
if self.mode.as_str() != "" {

View File

@@ -18,7 +18,7 @@ use crate::bucket::versioning::VersioningApi;
use crate::bucket::versioning_sys::BucketVersioningSys;
use crate::store::ECStore;
use crate::store_api::{ObjectOptions, ObjectToDelete};
use rustfs_lock::local_locker::MAX_DELETE_LIST;
use rustfs_lock::MAX_DELETE_LIST;
pub async fn delete_object_versions(api: ECStore, bucket: &str, to_del: &[ObjectToDelete], _lc_event: lifecycle::Event) {
let mut remaining = to_del;

View File

@@ -2422,7 +2422,7 @@ impl ReplicateObjectInfo {
// let mut arns = Vec::new();
// let mut tgts_map = std::collections::HashSet::new();
// for rule in cfg.rules {
// if rule.status.as_str() == "Disabe" {
// if rule.status.as_str() == "Disable" {
// continue;
// }

View File

@@ -95,7 +95,7 @@ impl ArnTarget {
Self {
client: TargetClient {
bucket,
storage_class: "STANDRD".to_string(),
storage_class: "STANDARD".to_string(),
disable_proxy: false,
health_check_duration: Duration::from_secs(100),
endpoint,
@@ -361,7 +361,7 @@ impl BucketTargetSys {
// // Mocked implementation for obtaining a remote client
// let tcli = TargetClient {
// bucket: _tgt.target_bucket.clone(),
// storage_class: "STANDRD".to_string(),
// storage_class: "STANDARD".to_string(),
// disable_proxy: false,
// health_check_duration: Duration::from_secs(100),
// endpoint: _tgt.endpoint.clone(),
@@ -379,7 +379,7 @@ impl BucketTargetSys {
// // Mocked implementation for obtaining a remote client
// let tcli = TargetClient {
// bucket: _tgt.target_bucket.clone(),
// storage_class: "STANDRD".to_string(),
// storage_class: "STANDARD".to_string(),
// disable_proxy: false,
// health_check_duration: Duration::from_secs(100),
// endpoint: _tgt.endpoint.clone(),
@@ -403,7 +403,7 @@ impl BucketTargetSys {
match store.get_bucket_info(_bucket, &store_api::BucketOptions::default()).await {
Ok(info) => {
println!("Bucket Info: {info:?}");
info.versionning
info.versioning
}
Err(err) => {
eprintln!("Error: {err:?}");
@@ -431,7 +431,7 @@ impl BucketTargetSys {
// {
// Ok(info) => {
// println!("Bucket Info: {:?}", info);
// info.versionning
// info.versioning
// }
// Err(err) => {
// eprintln!("Error: {:?}", err);
@@ -475,8 +475,7 @@ impl BucketTargetSys {
{
Ok(info) => {
println!("Bucket Info: {info:?}");
if !info.versionning {
println!("2222222222 {}", info.versionning);
if !info.versioning {
return Err(SetTargetError::TargetNotVersioned(tgt.target_bucket.to_string()));
}
}

View File

@@ -0,0 +1,297 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, sync::Arc};
use crate::{bucket::metadata_sys::get_replication_config, config::com::read_config, store::ECStore};
use rustfs_common::data_usage::{BucketTargetUsageInfo, DataUsageCache, DataUsageEntry, DataUsageInfo, SizeSummary};
use rustfs_utils::path::SLASH_SEPARATOR;
use tracing::{error, warn};
use crate::error::Error;
// Data usage storage constants
pub const DATA_USAGE_ROOT: &str = SLASH_SEPARATOR;
const DATA_USAGE_OBJ_NAME: &str = ".usage.json";
const DATA_USAGE_BLOOM_NAME: &str = ".bloomcycle.bin";
pub const DATA_USAGE_CACHE_NAME: &str = ".usage-cache.bin";
// Data usage storage paths
lazy_static::lazy_static! {
pub static ref DATA_USAGE_BUCKET: String = format!("{}{}{}",
crate::disk::RUSTFS_META_BUCKET,
SLASH_SEPARATOR,
crate::disk::BUCKET_META_PREFIX
);
pub static ref DATA_USAGE_OBJ_NAME_PATH: String = format!("{}{}{}",
crate::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_OBJ_NAME
);
pub static ref DATA_USAGE_BLOOM_NAME_PATH: String = format!("{}{}{}",
crate::disk::BUCKET_META_PREFIX,
SLASH_SEPARATOR,
DATA_USAGE_BLOOM_NAME
);
}
/// Store data usage info to backend storage
pub async fn store_data_usage_in_backend(data_usage_info: DataUsageInfo, store: Arc<ECStore>) -> Result<(), Error> {
let data =
serde_json::to_vec(&data_usage_info).map_err(|e| Error::other(format!("Failed to serialize data usage info: {e}")))?;
// Save to backend using the same mechanism as original code
crate::config::com::save_config(store, &DATA_USAGE_OBJ_NAME_PATH, data)
.await
.map_err(Error::other)?;
Ok(())
}
/// Load data usage info from backend storage
pub async fn load_data_usage_from_backend(store: Arc<ECStore>) -> Result<DataUsageInfo, Error> {
let buf: Vec<u8> = match read_config(store, &DATA_USAGE_OBJ_NAME_PATH).await {
Ok(data) => data,
Err(e) => {
error!("Failed to read data usage info from backend: {}", e);
if e == crate::error::Error::ConfigNotFound {
return Ok(DataUsageInfo::default());
}
return Err(Error::other(e));
}
};
let mut data_usage_info: DataUsageInfo =
serde_json::from_slice(&buf).map_err(|e| Error::other(format!("Failed to deserialize data usage info: {e}")))?;
warn!("Loaded data usage info from backend {:?}", &data_usage_info);
// Handle backward compatibility like original code
if data_usage_info.buckets_usage.is_empty() {
data_usage_info.buckets_usage = data_usage_info
.bucket_sizes
.iter()
.map(|(bucket, &size)| {
(
bucket.clone(),
rustfs_common::data_usage::BucketUsageInfo {
size,
..Default::default()
},
)
})
.collect();
}
if data_usage_info.bucket_sizes.is_empty() {
data_usage_info.bucket_sizes = data_usage_info
.buckets_usage
.iter()
.map(|(bucket, bui)| (bucket.clone(), bui.size))
.collect();
}
for (bucket, bui) in &data_usage_info.buckets_usage {
if bui.replicated_size_v1 > 0
|| bui.replication_failed_count_v1 > 0
|| bui.replication_failed_size_v1 > 0
|| bui.replication_pending_count_v1 > 0
{
if let Ok((cfg, _)) = get_replication_config(bucket).await {
if !cfg.role.is_empty() {
data_usage_info.replication_info.insert(
cfg.role.clone(),
BucketTargetUsageInfo {
replication_failed_size: bui.replication_failed_size_v1,
replication_failed_count: bui.replication_failed_count_v1,
replicated_size: bui.replicated_size_v1,
replication_pending_count: bui.replication_pending_count_v1,
replication_pending_size: bui.replication_pending_size_v1,
..Default::default()
},
);
}
}
}
}
Ok(data_usage_info)
}
/// Create a data usage cache entry from size summary
pub fn create_cache_entry_from_summary(summary: &SizeSummary) -> DataUsageEntry {
let mut entry = DataUsageEntry::default();
entry.add_sizes(summary);
entry
}
/// Convert data usage cache to DataUsageInfo
pub fn cache_to_data_usage_info(cache: &DataUsageCache, path: &str, buckets: &[crate::store_api::BucketInfo]) -> DataUsageInfo {
let e = match cache.find(path) {
Some(e) => e,
None => return DataUsageInfo::default(),
};
let flat = cache.flatten(&e);
let mut buckets_usage = HashMap::new();
for bucket in buckets.iter() {
let e = match cache.find(&bucket.name) {
Some(e) => e,
None => continue,
};
let flat = cache.flatten(&e);
let mut bui = rustfs_common::data_usage::BucketUsageInfo {
size: flat.size as u64,
versions_count: flat.versions as u64,
objects_count: flat.objects as u64,
delete_markers_count: flat.delete_markers as u64,
object_size_histogram: flat.obj_sizes.to_map(),
object_versions_histogram: flat.obj_versions.to_map(),
..Default::default()
};
if let Some(rs) = &flat.replication_stats {
bui.replica_size = rs.replica_size;
bui.replica_count = rs.replica_count;
for (arn, stat) in rs.targets.iter() {
bui.replication_info.insert(
arn.clone(),
BucketTargetUsageInfo {
replication_pending_size: stat.pending_size,
replicated_size: stat.replicated_size,
replication_failed_size: stat.failed_size,
replication_pending_count: stat.pending_count,
replication_failed_count: stat.failed_count,
replicated_count: stat.replicated_count,
..Default::default()
},
);
}
}
buckets_usage.insert(bucket.name.clone(), bui);
}
DataUsageInfo {
last_update: cache.info.last_update,
objects_total_count: flat.objects as u64,
versions_total_count: flat.versions as u64,
delete_markers_total_count: flat.delete_markers as u64,
objects_total_size: flat.size as u64,
buckets_count: e.children.len() as u64,
buckets_usage,
..Default::default()
}
}
// Helper functions for DataUsageCache operations
pub async fn load_data_usage_cache(store: &crate::set_disk::SetDisks, name: &str) -> crate::error::Result<DataUsageCache> {
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::store_api::{ObjectIO, ObjectOptions};
use http::HeaderMap;
use rand::Rng;
use std::path::Path;
use std::time::Duration;
use tokio::time::sleep;
let mut d = DataUsageCache::default();
let mut retries = 0;
while retries < 5 {
let path = Path::new(BUCKET_META_PREFIX).join(name);
match store
.get_object_reader(
RUSTFS_META_BUCKET,
path.to_str().unwrap(),
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = DataUsageCache::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(err) => match err {
crate::error::Error::FileNotFound | crate::error::Error::VolumeNotFound => {
match store
.get_object_reader(
RUSTFS_META_BUCKET,
name,
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = DataUsageCache::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(_) => match err {
crate::error::Error::FileNotFound | crate::error::Error::VolumeNotFound => {
break;
}
_ => {}
},
}
}
_ => {
break;
}
},
}
retries += 1;
let dur = {
let mut rng = rand::rng();
rng.random_range(0..1_000)
};
sleep(Duration::from_millis(dur)).await;
}
Ok(d)
}
pub async fn save_data_usage_cache(cache: &DataUsageCache, name: &str) -> crate::error::Result<()> {
use crate::config::com::save_config;
use crate::disk::BUCKET_META_PREFIX;
use crate::new_object_layer_fn;
use std::path::Path;
let Some(store) = new_object_layer_fn() else {
return Err(crate::error::Error::other("errServerNotInitialized"));
};
let buf = cache.marshal_msg().map_err(crate::error::Error::other)?;
let buf_clone = buf.clone();
let store_clone = store.clone();
let name = Path::new(BUCKET_META_PREFIX).join(name).to_string_lossy().to_string();
let name_clone = name.clone();
tokio::spawn(async move {
let _ = save_config(store_clone, &format!("{}{}", &name_clone, ".bkp"), buf_clone).await;
});
save_config(store, &name, buf).await?;
Ok(())
}

View File

@@ -21,9 +21,6 @@ use super::{
};
use super::{endpoint::Endpoint, error::DiskError, format::FormatV3};
use crate::bucket::metadata_sys::{self};
use crate::bucket::versioning::VersioningApi;
use crate::bucket::versioning_sys::BucketVersioningSys;
use crate::disk::error::FileAccessDeniedWithContext;
use crate::disk::error_conv::{to_access_error, to_file_error, to_unformatted_disk_error, to_volume_error};
use crate::disk::fs::{
@@ -36,16 +33,6 @@ use crate::disk::{
};
use crate::disk::{FileWriter, STORAGE_FORMAT_FILE};
use crate::global::{GLOBAL_IsErasureSD, GLOBAL_RootDiskThreshold};
use crate::heal::data_scanner::{
ScannerItem, ShouldSleepFn, SizeSummary, lc_has_active_rules, rep_has_active_rules, scan_data_folder,
};
use crate::heal::data_scanner_metric::{ScannerMetric, ScannerMetrics};
use crate::heal::data_usage_cache::{DataUsageCache, DataUsageEntry};
use crate::heal::error::{ERR_IGNORE_FILE_CONTRIB, ERR_SKIP_FILE};
use crate::heal::heal_commands::{HealScanMode, HealingTracker};
use crate::heal::heal_ops::HEALING_TRACKER_FILENAME;
use crate::new_object_layer_fn;
use crate::store_api::{ObjectInfo, StorageAPI};
use rustfs_utils::path::{
GLOBAL_DIR_SUFFIX, GLOBAL_DIR_SUFFIX_WITH_SLASH, SLASH_SEPARATOR, clean, decode_dir_object, encode_dir_object, has_suffix,
path_join, path_join_buf,
@@ -55,19 +42,18 @@ use tokio::time::interval;
use crate::erasure_coding::bitrot_verify;
use bytes::Bytes;
use path_absolutize::Absolutize;
use rustfs_common::defer;
use rustfs_filemeta::{
Cache, FileInfo, FileInfoOpts, FileMeta, MetaCacheEntry, MetacacheWriter, ObjectPartInfo, Opts, RawFileInfo, UpdateFn,
get_file_info, read_xl_meta_no_data,
};
use rustfs_utils::HashAlgorithm;
use rustfs_utils::os::get_info;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt::Debug;
use std::io::SeekFrom;
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use std::time::{Duration, SystemTime};
use std::time::Duration;
use std::{
fs::Metadata,
path::{Path, PathBuf},
@@ -76,7 +62,6 @@ use time::OffsetDateTime;
use tokio::fs::{self, File};
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt, ErrorKind};
use tokio::sync::RwLock;
use tokio::sync::mpsc::Sender;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
@@ -563,7 +548,7 @@ impl LocalDisk {
}
async fn read_metadata(&self, file_path: impl AsRef<Path>) -> Result<Vec<u8>> {
// TODO: suport timeout
// TODO: support timeout
let (data, _) = self.read_metadata_with_dmtime(file_path.as_ref()).await?;
Ok(data)
}
@@ -595,7 +580,7 @@ impl LocalDisk {
}
async fn read_all_data(&self, volume: &str, volume_dir: impl AsRef<Path>, file_path: impl AsRef<Path>) -> Result<Vec<u8>> {
// TODO: timeout suport
// TODO: timeout support
let (data, _) = self.read_all_data_with_dmtime(volume, volume_dir, file_path).await?;
Ok(data)
}
@@ -750,7 +735,7 @@ impl LocalDisk {
let mut f = {
if sync {
// TODO: suport sync
// TODO: support sync
self.open_file(file_path, flags, skip_parent).await?
} else {
self.open_file(file_path, flags, skip_parent).await?
@@ -1705,6 +1690,15 @@ impl DiskAPI for LocalDisk {
};
out.write_obj(&meta).await?;
objs_returned += 1;
} else {
let fpath =
self.get_object_path(&opts.bucket, path_join_buf(&[opts.base_dir.as_str(), STORAGE_FORMAT_FILE]).as_str())?;
if let Ok(meta) = tokio::fs::metadata(fpath).await
&& meta.is_file()
{
return Err(DiskError::FileNotFound);
}
}
}
@@ -2268,184 +2262,6 @@ impl DiskAPI for LocalDisk {
Ok(info)
}
#[tracing::instrument(level = "info", skip_all)]
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
self.scanning.fetch_add(1, Ordering::SeqCst);
defer!(|| { self.scanning.fetch_sub(1, Ordering::SeqCst) });
// must before metadata_sys
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let mut cache = cache.clone();
// Check if the current bucket has a configured lifecycle policy
if let Ok((lc, _)) = metadata_sys::get_lifecycle_config(&cache.info.name).await {
if lc_has_active_rules(&lc, "") {
cache.info.lifecycle = Some(lc);
}
}
// Check if the current bucket has replication configuration
if let Ok((rcfg, _)) = metadata_sys::get_replication_config(&cache.info.name).await {
if rep_has_active_rules(&rcfg, "", true) {
// TODO: globalBucketTargetSys
}
}
let vcfg = BucketVersioningSys::get(&cache.info.name).await.ok();
let loc = self.get_disk_location();
// TODO: 这里需要处理错误
let disks = store
.get_disks(loc.pool_idx.unwrap(), loc.disk_idx.unwrap())
.await
.map_err(|e| Error::other(e.to_string()))?;
let disk = Arc::new(LocalDisk::new(&self.endpoint(), false).await?);
let disk_clone = disk.clone();
cache.info.updates = Some(updates.clone());
let mut data_usage_info = scan_data_folder(
&disks,
disk,
&cache,
Box::new(move |item: &ScannerItem| {
let mut item = item.clone();
let disk = disk_clone.clone();
let vcfg = vcfg.clone();
Box::pin(async move {
if !item.path.ends_with(&format!("{SLASH_SEPARATOR}{STORAGE_FORMAT_FILE}")) {
return Err(Error::other(ERR_SKIP_FILE).into());
}
let stop_fn = ScannerMetrics::log(ScannerMetric::ScanObject);
let mut res = HashMap::new();
let done_sz = ScannerMetrics::time_size(ScannerMetric::ReadMetadata);
let buf = match disk.read_metadata(item.path.clone()).await {
Ok(buf) => buf,
Err(err) => {
res.insert("err".to_string(), err.to_string());
stop_fn(&res);
return Err(Error::other(ERR_SKIP_FILE).into());
}
};
done_sz(buf.len() as u64);
res.insert("metasize".to_string(), buf.len().to_string());
item.transform_meda_dir();
let meta_cache = MetaCacheEntry {
name: item.object_path().to_string_lossy().to_string(),
metadata: buf,
..Default::default()
};
let fivs = match meta_cache.file_info_versions(&item.bucket) {
Ok(fivs) => fivs,
Err(err) => {
res.insert("err".to_string(), err.to_string());
stop_fn(&res);
return Err(Error::other(ERR_SKIP_FILE).into());
}
};
let mut size_s = SizeSummary::default();
let done = ScannerMetrics::time(ScannerMetric::ApplyAll);
let obj_infos = match item.apply_versions_actions(&fivs.versions).await {
Ok(obj_infos) => obj_infos,
Err(err) => {
res.insert("err".to_string(), err.to_string());
stop_fn(&res);
return Err(Error::other(ERR_SKIP_FILE).into());
}
};
let versioned = if let Some(vcfg) = vcfg.as_ref() {
vcfg.versioned(item.object_path().to_str().unwrap_or_default())
} else {
false
};
let mut obj_deleted = false;
for info in obj_infos.iter() {
let done = ScannerMetrics::time(ScannerMetric::ApplyVersion);
let sz: i64;
(obj_deleted, sz) = item.apply_actions(info, &mut size_s).await;
done();
if obj_deleted {
break;
}
let actual_sz = match info.get_actual_size() {
Ok(size) => size,
Err(_) => continue,
};
if info.delete_marker {
size_s.delete_markers += 1;
}
if info.version_id.is_some() && sz == actual_sz {
size_s.versions += 1;
}
size_s.total_size += sz as usize;
if info.delete_marker {
continue;
}
}
for free_version in fivs.free_versions.iter() {
let _obj_info = ObjectInfo::from_file_info(
free_version,
&item.bucket,
&item.object_path().to_string_lossy(),
versioned,
);
let done = ScannerMetrics::time(ScannerMetric::TierObjSweep);
done();
}
// todo: global trace
if obj_deleted {
return Err(Error::other(ERR_IGNORE_FILE_CONTRIB).into());
}
done();
Ok(size_s)
})
}),
scan_mode,
we_sleep,
)
.await?;
data_usage_info.info.last_update = Some(SystemTime::now());
debug!("ns_scanner completed: {data_usage_info:?}");
Ok(data_usage_info)
}
#[tracing::instrument(skip(self))]
async fn healing(&self) -> Option<HealingTracker> {
let healing_file = path_join(&[
self.path(),
PathBuf::from(RUSTFS_META_BUCKET),
PathBuf::from(BUCKET_META_PREFIX),
PathBuf::from(HEALING_TRACKER_FILENAME),
]);
let b = match fs::read(healing_file).await {
Ok(b) => b,
Err(_) => return None,
};
if b.is_empty() {
return None;
}
match HealingTracker::unmarshal_msg(&b) {
Ok(h) => Some(h),
Err(_) => Some(HealingTracker::default()),
}
}
}
async fn get_disk_info(drive_path: PathBuf) -> Result<(rustfs_utils::os::DiskInfo, bool)> {

View File

@@ -30,11 +30,6 @@ pub const FORMAT_CONFIG_FILE: &str = "format.json";
pub const STORAGE_FORMAT_FILE: &str = "xl.meta";
pub const STORAGE_FORMAT_FILE_BACKUP: &str = "xl.meta.bkp";
use crate::heal::{
data_scanner::ShouldSleepFn,
data_usage_cache::{DataUsageCache, DataUsageEntry},
heal_commands::{HealScanMode, HealingTracker},
};
use crate::rpc::RemoteDisk;
use bytes::Bytes;
use endpoint::Endpoint;
@@ -46,10 +41,7 @@ use rustfs_madmin::info_commands::DiskMetrics;
use serde::{Deserialize, Serialize};
use std::{fmt::Debug, path::PathBuf, sync::Arc};
use time::OffsetDateTime;
use tokio::{
io::{AsyncRead, AsyncWrite},
sync::mpsc::Sender,
};
use tokio::io::{AsyncRead, AsyncWrite};
use uuid::Uuid;
pub type DiskStore = Arc<Disk>;
@@ -406,28 +398,6 @@ impl DiskAPI for Disk {
Disk::Remote(remote_disk) => remote_disk.disk_info(opts).await,
}
}
#[tracing::instrument(skip(self, cache, we_sleep, scan_mode))]
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
match self {
Disk::Local(local_disk) => local_disk.ns_scanner(cache, updates, scan_mode, we_sleep).await,
Disk::Remote(remote_disk) => remote_disk.ns_scanner(cache, updates, scan_mode, we_sleep).await,
}
}
#[tracing::instrument(skip(self))]
async fn healing(&self) -> Option<HealingTracker> {
match self {
Disk::Local(local_disk) => local_disk.healing().await,
Disk::Remote(remote_disk) => remote_disk.healing().await,
}
}
}
pub async fn new_disk(ep: &Endpoint, opt: &DiskOption) -> Result<DiskStore> {
@@ -527,14 +497,6 @@ pub trait DiskAPI: Debug + Send + Sync + 'static {
async fn write_all(&self, volume: &str, path: &str, data: Bytes) -> Result<()>;
async fn read_all(&self, volume: &str, path: &str) -> Result<Bytes>;
async fn disk_info(&self, opts: &DiskInfoOptions) -> Result<DiskInfo>;
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache>;
async fn healing(&self) -> Option<HealingTracker>;
}
#[derive(Debug, Default, Serialize, Deserialize)]

View File

@@ -49,7 +49,8 @@ pub fn check_path_length(path_name: &str) -> Result<()> {
let mut count = 0usize;
for c in path_name.chars() {
match c {
'/' | '\\' if cfg!(target_os = "windows") => count = 0, // Reset
'/' => count = 0,
'\\' if cfg!(target_os = "windows") => count = 0, // Reset
_ => {
count += 1;
if count > 255 {

View File

@@ -308,7 +308,7 @@ impl Erasure {
// ec encode, 结果会写进 data_buffer
let data_slices: SmallVec<[&mut [u8]; 16]> = data_buffer.chunks_exact_mut(shard_size).collect();
// partiy 数量大于 0 才 ec
// parity 数量大于 0 才 ec
if self.parity_shards > 0 {
self.encoder.as_ref().unwrap().encode(data_slices).map_err(Error::other)?;
}

View File

@@ -183,6 +183,9 @@ pub enum StorageError {
#[error("Io error: {0}")]
Io(std::io::Error),
#[error("Lock error: {0}")]
Lock(#[from] rustfs_lock::LockError),
}
impl StorageError {
@@ -409,6 +412,7 @@ impl Clone for StorageError {
StorageError::FirstDiskWait => StorageError::FirstDiskWait,
StorageError::TooManyOpenFiles => StorageError::TooManyOpenFiles,
StorageError::NoHealRequired => StorageError::NoHealRequired,
StorageError::Lock(e) => StorageError::Lock(e.clone()),
}
}
}
@@ -471,6 +475,7 @@ impl StorageError {
StorageError::ConfigNotFound => 0x35,
StorageError::TooManyOpenFiles => 0x36,
StorageError::NoHealRequired => 0x37,
StorageError::Lock(_) => 0x38,
}
}
@@ -535,6 +540,7 @@ impl StorageError {
0x35 => Some(StorageError::ConfigNotFound),
0x36 => Some(StorageError::TooManyOpenFiles),
0x37 => Some(StorageError::NoHealRequired),
0x38 => Some(StorageError::Lock(rustfs_lock::LockError::internal("Generic lock error".to_string()))),
_ => None,
}
}

View File

@@ -12,13 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::heal::mrf::MRFState;
use crate::{
bucket::lifecycle::bucket_lifecycle_ops::LifecycleSys,
disk::DiskStore,
endpoints::{EndpointServerPools, PoolEndpoints, SetupType},
event_notification::EventNotifier,
heal::{background_heal_ops::HealRoutine, heal_ops::AllHealState},
store::ECStore,
tier::tier::TierConfigMgr,
};
@@ -51,14 +49,10 @@ pub static ref GLOBAL_LOCAL_DISK_MAP: Arc<RwLock<HashMap<String, Option<DiskStor
pub static ref GLOBAL_LOCAL_DISK_SET_DRIVES: Arc<RwLock<TypeLocalDiskSetDrives>> = Arc::new(RwLock::new(Vec::new()));
pub static ref GLOBAL_Endpoints: OnceLock<EndpointServerPools> = OnceLock::new();
pub static ref GLOBAL_RootDiskThreshold: RwLock<u64> = RwLock::new(0);
pub static ref GLOBAL_BackgroundHealRoutine: Arc<HealRoutine> = HealRoutine::new();
pub static ref GLOBAL_BackgroundHealState: Arc<AllHealState> = AllHealState::new(false);
pub static ref GLOBAL_TierConfigMgr: Arc<RwLock<TierConfigMgr>> = TierConfigMgr::new();
pub static ref GLOBAL_LifecycleSys: Arc<LifecycleSys> = LifecycleSys::new();
pub static ref GLOBAL_EventNotifier: Arc<RwLock<EventNotifier>> = EventNotifier::new();
//pub static ref GLOBAL_RemoteTargetTransport
pub static ref GLOBAL_ALlHealState: Arc<AllHealState> = AllHealState::new(false);
pub static ref GLOBAL_MRFState: Arc<MRFState> = Arc::new(MRFState::new());
static ref globalDeploymentIDPtr: OnceLock<Uuid> = OnceLock::new();
pub static ref GLOBAL_BOOT_TIME: OnceCell<SystemTime> = OnceCell::new();
pub static ref GLOBAL_LocalNodeName: String = "127.0.0.1:9000".to_string();

View File

@@ -1,512 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use futures::future::join_all;
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_utils::path::{SLASH_SEPARATOR, path_join};
use std::{cmp::Ordering, env, path::PathBuf, sync::Arc, time::Duration};
use tokio::{
spawn,
sync::{
RwLock,
mpsc::{self, Receiver, Sender},
},
time::interval,
};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
use uuid::Uuid;
use super::{
heal_commands::HealOpts,
heal_ops::{HealSequence, new_bg_heal_sequence},
};
use crate::error::{Error, Result};
use crate::global::{GLOBAL_MRFState, get_background_services_cancel_token};
use crate::heal::error::ERR_RETRY_HEALING;
use crate::heal::heal_commands::{HEAL_ITEM_BUCKET, HealScanMode};
use crate::heal::heal_ops::{BG_HEALING_UUID, HealSource};
use crate::{
config::RUSTFS_CONFIG_PREFIX,
disk::{BUCKET_META_PREFIX, DiskAPI, DiskInfoOptions, RUSTFS_META_BUCKET, endpoint::Endpoint, error::DiskError},
global::{GLOBAL_BackgroundHealRoutine, GLOBAL_BackgroundHealState, GLOBAL_LOCAL_DISK_MAP},
heal::{
data_usage::{DATA_USAGE_CACHE_NAME, DATA_USAGE_ROOT},
data_usage_cache::DataUsageCache,
heal_commands::{init_healing_tracker, load_healing_tracker},
heal_ops::NOP_HEAL,
},
new_object_layer_fn,
store::get_disk_via_endpoint,
store_api::{BucketInfo, BucketOptions, StorageAPI},
};
pub static DEFAULT_MONITOR_NEW_DISK_INTERVAL: Duration = Duration::from_secs(10);
pub async fn init_auto_heal() {
info!("Initializing auto heal background task");
let Some(cancel_token) = get_background_services_cancel_token() else {
error!("Background services cancel token not initialized");
return;
};
init_background_healing().await;
let v = env::var("_RUSTFS_AUTO_DRIVE_HEALING").unwrap_or("on".to_string());
if v == "on" {
info!("start monitor local disks and heal");
GLOBAL_BackgroundHealState
.push_heal_local_disks(&get_local_disks_to_heal().await)
.await;
let cancel_clone = cancel_token.clone();
spawn(async move {
monitor_local_disks_and_heal(cancel_clone).await;
});
}
let cancel_clone = cancel_token.clone();
spawn(async move {
GLOBAL_MRFState.heal_routine_with_cancel(cancel_clone).await;
});
}
async fn init_background_healing() {
let bg_seq = Arc::new(new_bg_heal_sequence());
for _ in 0..GLOBAL_BackgroundHealRoutine.workers {
let bg_seq_clone = bg_seq.clone();
spawn(async {
GLOBAL_BackgroundHealRoutine.add_worker(bg_seq_clone).await;
});
}
let _ = GLOBAL_BackgroundHealState.launch_new_heal_sequence(bg_seq).await;
}
pub async fn get_local_disks_to_heal() -> Vec<Endpoint> {
let mut disks_to_heal = Vec::new();
for (_, disk) in GLOBAL_LOCAL_DISK_MAP.read().await.iter() {
if let Some(disk) = disk {
if let Err(err) = disk.disk_info(&DiskInfoOptions::default()).await {
if err == DiskError::UnformattedDisk {
info!("get_local_disks_to_heal, disk is unformatted: {}", err);
disks_to_heal.push(disk.endpoint());
}
}
let h = disk.healing().await;
if let Some(h) = h {
if !h.finished {
info!("get_local_disks_to_heal, disk healing not finished");
disks_to_heal.push(disk.endpoint());
}
}
}
}
// todo
// if disks_to_heal.len() == GLOBAL_Endpoints.read().await.n {
// }
disks_to_heal
}
async fn monitor_local_disks_and_heal(cancel_token: CancellationToken) {
info!("Auto heal monitor started");
let mut interval = interval(DEFAULT_MONITOR_NEW_DISK_INTERVAL);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("Auto heal monitor received shutdown signal, exiting gracefully");
break;
}
_ = interval.tick() => {
let heal_disks = GLOBAL_BackgroundHealState.get_heal_local_disk_endpoints().await;
if heal_disks.is_empty() {
info!("heal local disks is empty");
interval.reset();
continue;
}
info!("heal local disks: {:?}", heal_disks);
let store = new_object_layer_fn().expect("errServerNotInitialized");
if let (_result, Some(err)) = store.heal_format(false).await.expect("heal format failed") {
error!("heal local disk format error: {}", err);
if err == Error::NoHealRequired {
} else {
info!("heal format err: {}", err.to_string());
interval.reset();
continue;
}
}
let mut futures = Vec::new();
for disk in heal_disks.into_ref().iter() {
let disk_clone = disk.clone();
let cancel_clone = cancel_token.clone();
futures.push(async move {
let disk_for_cancel = disk_clone.clone();
tokio::select! {
_ = cancel_clone.cancelled() => {
info!("Disk healing task cancelled for disk: {}", disk_for_cancel);
}
_ = async {
GLOBAL_BackgroundHealState
.set_disk_healing_status(disk_clone.clone(), true)
.await;
if heal_fresh_disk(&disk_clone).await.is_err() {
info!("heal_fresh_disk is err");
GLOBAL_BackgroundHealState
.set_disk_healing_status(disk_clone.clone(), false)
.await;
}
GLOBAL_BackgroundHealState.pop_heal_local_disks(&[disk_clone]).await;
} => {}
}
});
}
let _ = join_all(futures).await;
interval.reset();
}
}
}
}
async fn heal_fresh_disk(endpoint: &Endpoint) -> Result<()> {
let (pool_idx, set_idx) = (endpoint.pool_idx as usize, endpoint.set_idx as usize);
let disk = match get_disk_via_endpoint(endpoint).await {
Some(disk) => disk,
None => {
return Err(Error::other(format!(
"Unexpected error disk must be initialized by now after formatting: {endpoint}"
)));
}
};
if let Err(err) = disk.disk_info(&DiskInfoOptions::default()).await {
match err {
DiskError::DriveIsRoot => {
return Ok(());
}
DiskError::UnformattedDisk => {}
_ => {
return Err(err.into());
}
}
}
let mut tracker = match load_healing_tracker(&Some(disk.clone())).await {
Ok(tracker) => tracker,
Err(err) => {
match err {
DiskError::FileNotFound => {
return Ok(());
}
_ => {
info!(
"Unable to load healing tracker on '{}': {}, re-initializing..",
disk.to_string(),
err.to_string()
);
}
}
init_healing_tracker(disk.clone(), &Uuid::new_v4().to_string()).await?
}
};
info!(
"Healing drive '{}' - 'mc admin heal alias/ --verbose' to check the current status.",
endpoint.to_string()
);
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let mut buckets = store.list_bucket(&BucketOptions::default()).await?;
buckets.push(BucketInfo {
name: path_join(&[PathBuf::from(RUSTFS_META_BUCKET), PathBuf::from(RUSTFS_CONFIG_PREFIX)])
.to_string_lossy()
.to_string(),
..Default::default()
});
buckets.push(BucketInfo {
name: path_join(&[PathBuf::from(RUSTFS_META_BUCKET), PathBuf::from(BUCKET_META_PREFIX)])
.to_string_lossy()
.to_string(),
..Default::default()
});
buckets.sort_by(|a, b| {
let a_has_prefix = a.name.starts_with(RUSTFS_META_BUCKET);
let b_has_prefix = b.name.starts_with(RUSTFS_META_BUCKET);
match (a_has_prefix, b_has_prefix) {
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
_ => b.created.cmp(&a.created),
}
});
if let Ok(cache) = DataUsageCache::load(&store.pools[pool_idx].disk_set[set_idx], DATA_USAGE_CACHE_NAME).await {
let data_usage_info = cache.dui(DATA_USAGE_ROOT, &Vec::new());
tracker.objects_total_count = data_usage_info.objects_total_count;
tracker.objects_total_size = data_usage_info.objects_total_size;
};
tracker.set_queue_buckets(&buckets).await;
tracker.save().await?;
let tracker = Arc::new(RwLock::new(tracker));
let qb = tracker.read().await.queue_buckets.clone();
store.pools[pool_idx].disk_set[set_idx]
.clone()
.heal_erasure_set(&qb, tracker.clone())
.await?;
let mut tracker_w = tracker.write().await;
if tracker_w.items_failed > 0 && tracker_w.retry_attempts < 4 {
tracker_w.retry_attempts += 1;
tracker_w.reset_healing().await;
if let Err(err) = tracker_w.update().await {
info!("update tracker failed: {}", err.to_string());
}
return Err(Error::other(ERR_RETRY_HEALING));
}
if tracker_w.items_failed > 0 {
info!(
"Healing of drive '{}' is incomplete, retried {} times (healed: {}, skipped: {}, failed: {}).",
disk.to_string(),
tracker_w.retry_attempts,
tracker_w.items_healed,
tracker_w.item_skipped,
tracker_w.items_failed
);
} else if tracker_w.retry_attempts > 0 {
info!(
"Healing of drive '{}' is incomplete, retried {} times (healed: {}, skipped: {}).",
disk.to_string(),
tracker_w.retry_attempts,
tracker_w.items_healed,
tracker_w.item_skipped
);
} else {
info!(
"Healing of drive '{}' is finished (healed: {}, skipped: {}).",
disk.to_string(),
tracker_w.items_healed,
tracker_w.item_skipped
);
}
if tracker_w.heal_id.is_empty() {
if let Err(err) = tracker_w.delete().await {
error!("delete tracker failed: {}", err.to_string());
}
}
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let disks = store.get_disks(pool_idx, set_idx).await?;
for disk in disks.into_iter() {
if disk.is_none() {
continue;
}
let mut tracker = match load_healing_tracker(&disk).await {
Ok(tracker) => tracker,
Err(err) => {
match err {
DiskError::FileNotFound => {}
_ => {
info!("Unable to load healing tracker on '{:?}': {}, re-initializing..", disk, err.to_string());
}
}
continue;
}
};
if tracker.heal_id == tracker_w.heal_id {
tracker.finished = true;
tracker.update().await?;
}
}
Ok(())
}
#[derive(Debug)]
pub struct HealTask {
pub bucket: String,
pub object: String,
pub version_id: String,
pub opts: HealOpts,
pub resp_tx: Option<Sender<HealResult>>,
pub resp_rx: Option<Receiver<HealResult>>,
}
impl HealTask {
pub fn new(bucket: &str, object: &str, version_id: &str, opts: &HealOpts) -> Self {
Self {
bucket: bucket.to_string(),
object: object.to_string(),
version_id: version_id.to_string(),
opts: *opts,
resp_tx: None,
resp_rx: None,
}
}
}
#[derive(Debug)]
pub struct HealResult {
pub result: HealResultItem,
pub err: Option<Error>,
}
pub struct HealRoutine {
pub tasks_tx: Sender<HealTask>,
tasks_rx: RwLock<Receiver<HealTask>>,
workers: usize,
}
impl HealRoutine {
pub fn new() -> Arc<Self> {
let mut workers = num_cpus::get() / 2;
if let Ok(env_heal_workers) = env::var("_RUSTFS_HEAL_WORKERS") {
if let Ok(num_healers) = env_heal_workers.parse::<usize>() {
workers = num_healers;
}
}
if workers == 0 {
workers = 4;
}
let (tx, rx) = mpsc::channel(100);
Arc::new(Self {
tasks_tx: tx,
tasks_rx: RwLock::new(rx),
workers,
})
}
pub async fn add_worker(&self, bgseq: Arc<HealSequence>) {
loop {
let mut d_res = HealResultItem::default();
let d_err: Option<Error>;
match self.tasks_rx.write().await.recv().await {
Some(task) => {
info!("got task: {:?}", task);
if task.bucket == NOP_HEAL {
d_err = Some(Error::other("skip file"));
} else if task.bucket == SLASH_SEPARATOR {
match heal_disk_format(task.opts).await {
Ok((res, err)) => {
d_res = res;
d_err = err;
}
Err(err) => d_err = Some(err),
}
} else {
let store = new_object_layer_fn().expect("errServerNotInitialized");
if task.object.is_empty() {
match store.heal_bucket(&task.bucket, &task.opts).await {
Ok(res) => {
d_res = res;
d_err = None;
}
Err(err) => d_err = Some(err),
}
} else {
match store
.heal_object(&task.bucket, &task.object, &task.version_id, &task.opts)
.await
{
Ok((res, err)) => {
d_res = res;
d_err = err;
}
Err(err) => d_err = Some(err),
}
}
}
info!("task finished, task: {:?}", task);
if let Some(resp_tx) = task.resp_tx {
let _ = resp_tx
.send(HealResult {
result: d_res,
err: d_err,
})
.await;
} else {
// when respCh is not set caller is not waiting but we
// update the relevant metrics for them
if d_err.is_none() {
bgseq.count_healed(d_res.heal_item_type).await;
} else {
bgseq.count_failed(d_res.heal_item_type).await;
}
}
}
None => {
info!("add_worker, tasks_rx was closed, return");
return;
}
}
}
}
}
// pub fn active_listeners() -> Result<usize> {
// }
async fn heal_disk_format(opts: HealOpts) -> Result<(HealResultItem, Option<Error>)> {
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let (res, err) = store.heal_format(opts.dry_run).await?;
// return any error, ignore error returned when disks have
// already healed.
if err.is_some() {
return Ok((HealResultItem::default(), err));
}
Ok((res, err))
}
pub(crate) async fn heal_bucket(bucket: &str) -> Result<()> {
let (bg_seq, ok) = GLOBAL_BackgroundHealState.get_heal_sequence_by_token(BG_HEALING_UUID).await;
if ok {
// bg_seq must be Some when ok is true
return bg_seq
.unwrap()
.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
..Default::default()
},
HEAL_ITEM_BUCKET.to_string(),
)
.await;
}
Ok(())
}
pub(crate) async fn heal_object(bucket: &str, object: &str, version_id: &str, scan_mode: HealScanMode) -> Result<()> {
let (bg_seq, ok) = GLOBAL_BackgroundHealState.get_heal_sequence_by_token(BG_HEALING_UUID).await;
if ok {
// bg_seq must be Some when ok is true
return HealSequence::heal_object(bg_seq.unwrap(), bucket, object, version_id, scan_mode).await;
}
Ok(())
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,221 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::{
bucket::metadata_sys::get_replication_config,
config::com::{read_config, save_config},
disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET},
error::to_object_err,
new_object_layer_fn,
store::ECStore,
};
use lazy_static::lazy_static;
use rustfs_utils::path::SLASH_SEPARATOR;
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, sync::Arc, time::SystemTime};
use tokio::sync::mpsc::Receiver;
use tracing::{error, warn};
pub const DATA_USAGE_ROOT: &str = SLASH_SEPARATOR;
const DATA_USAGE_OBJ_NAME: &str = ".usage.json";
const DATA_USAGE_BLOOM_NAME: &str = ".bloomcycle.bin";
pub const DATA_USAGE_CACHE_NAME: &str = ".usage-cache.bin";
lazy_static! {
pub static ref DATA_USAGE_BUCKET: String = format!("{}{}{}", RUSTFS_META_BUCKET, SLASH_SEPARATOR, BUCKET_META_PREFIX);
pub static ref DATA_USAGE_OBJ_NAME_PATH: String = format!("{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, DATA_USAGE_OBJ_NAME);
pub static ref DATA_USAGE_BLOOM_NAME_PATH: String =
format!("{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, DATA_USAGE_BLOOM_NAME);
pub static ref BACKGROUND_HEAL_INFO_PATH: String =
format!("{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, ".background-heal.json");
}
// BucketTargetUsageInfo - bucket target usage info provides
// - replicated size for all objects sent to this target
// - replica size for all objects received from this target
// - replication pending size for all objects pending replication to this target
// - replication failed size for all objects failed replication to this target
// - replica pending count
// - replica failed count
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BucketTargetUsageInfo {
pub replication_pending_size: u64,
pub replication_failed_size: u64,
pub replicated_size: u64,
pub replica_size: u64,
pub replication_pending_count: u64,
pub replication_failed_count: u64,
pub replicated_count: u64,
}
// BucketUsageInfo - bucket usage info provides
// - total size of the bucket
// - total objects in a bucket
// - object size histogram per bucket
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BucketUsageInfo {
pub size: u64,
// Following five fields suffixed with V1 are here for backward compatibility
// Total Size for objects that have not yet been replicated
pub replication_pending_size_v1: u64,
// Total size for objects that have witness one or more failures and will be retried
pub replication_failed_size_v1: u64,
// Total size for objects that have been replicated to destination
pub replicated_size_v1: u64,
// Total number of objects pending replication
pub replication_pending_count_v1: u64,
// Total number of objects that failed replication
pub replication_failed_count_v1: u64,
pub objects_count: u64,
pub object_size_histogram: HashMap<String, u64>,
pub object_versions_histogram: HashMap<String, u64>,
pub versions_count: u64,
pub delete_markers_count: u64,
pub replica_size: u64,
pub replica_count: u64,
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
}
// DataUsageInfo represents data usage stats of the underlying Object API
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct DataUsageInfo {
pub total_capacity: u64,
pub total_used_capacity: u64,
pub total_free_capacity: u64,
// LastUpdate is the timestamp of when the data usage info was last updated.
// This does not indicate a full scan.
pub last_update: Option<SystemTime>,
// Objects total count across all buckets
pub objects_total_count: u64,
// Versions total count across all buckets
pub versions_total_count: u64,
// Delete markers total count across all buckets
pub delete_markers_total_count: u64,
// Objects total size across all buckets
pub objects_total_size: u64,
pub replication_info: HashMap<String, BucketTargetUsageInfo>,
// Total number of buckets in this cluster
pub buckets_count: u64,
// Buckets usage info provides following information across all buckets
// - total size of the bucket
// - total objects in a bucket
// - object size histogram per bucket
pub buckets_usage: HashMap<String, BucketUsageInfo>,
// Deprecated kept here for backward compatibility reasons.
pub bucket_sizes: HashMap<String, u64>,
// Todo: TierStats
// TierStats contains per-tier stats of all configured remote tiers
}
pub async fn store_data_usage_in_backend(mut rx: Receiver<DataUsageInfo>) {
let Some(store) = new_object_layer_fn() else {
error!("errServerNotInitialized");
return;
};
let mut attempts = 1;
loop {
match rx.recv().await {
Some(data_usage_info) => {
if let Ok(data) = serde_json::to_vec(&data_usage_info) {
if attempts > 10 {
let _ =
save_config(store.clone(), &format!("{}{}", *DATA_USAGE_OBJ_NAME_PATH, ".bkp"), data.clone()).await;
attempts += 1;
}
let _ = save_config(store.clone(), &DATA_USAGE_OBJ_NAME_PATH, data).await;
attempts += 1;
} else {
continue;
}
}
None => {
return;
}
}
}
}
// TODO: cancel ctx
pub async fn load_data_usage_from_backend(store: Arc<ECStore>) -> Result<DataUsageInfo> {
let buf = match read_config(store, &DATA_USAGE_OBJ_NAME_PATH).await {
Ok(data) => data,
Err(e) => {
error!("Failed to read data usage info from backend: {}", e);
if e == Error::ConfigNotFound {
return Ok(DataUsageInfo::default());
}
return Err(to_object_err(e, vec![RUSTFS_META_BUCKET, &DATA_USAGE_OBJ_NAME_PATH]));
}
};
let mut data_usage_info: DataUsageInfo = serde_json::from_slice(&buf)?;
warn!("Loaded data usage info from backend {:?}", &data_usage_info);
if data_usage_info.buckets_usage.is_empty() {
data_usage_info.buckets_usage = data_usage_info
.bucket_sizes
.iter()
.map(|(bucket, &size)| {
(
bucket.clone(),
BucketUsageInfo {
size,
..Default::default()
},
)
})
.collect();
}
if data_usage_info.bucket_sizes.is_empty() {
data_usage_info.bucket_sizes = data_usage_info
.buckets_usage
.iter()
.map(|(bucket, bui)| (bucket.clone(), bui.size))
.collect();
}
for (bucket, bui) in &data_usage_info.buckets_usage {
if bui.replicated_size_v1 > 0
|| bui.replication_failed_count_v1 > 0
|| bui.replication_failed_size_v1 > 0
|| bui.replication_pending_count_v1 > 0
{
if let Ok((cfg, _)) = get_replication_config(bucket).await {
if !cfg.role.is_empty() {
data_usage_info.replication_info.insert(
cfg.role.clone(),
BucketTargetUsageInfo {
replication_failed_size: bui.replication_failed_size_v1,
replication_failed_count: bui.replication_failed_count_v1,
replicated_size: bui.replicated_size_v1,
replication_pending_count: bui.replication_pending_count_v1,
replication_pending_size: bui.replication_pending_size_v1,
..Default::default()
},
);
}
}
}
}
Ok(data_usage_info)
}

View File

@@ -1,928 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::config::com::save_config;
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::error::{Error, Result};
use crate::new_object_layer_fn;
use crate::set_disk::SetDisks;
use crate::store_api::{BucketInfo, ObjectIO, ObjectOptions};
use bytesize::ByteSize;
use http::HeaderMap;
use path_clean::PathClean;
use rand::Rng;
use rmp_serde::Serializer;
use s3s::dto::{BucketLifecycleConfiguration, ReplicationConfiguration};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::path::Path;
use std::time::{Duration, SystemTime};
use tokio::sync::mpsc::Sender;
use tokio::time::sleep;
use super::data_scanner::{DATA_SCANNER_FORCE_COMPACT_AT_FOLDERS, SizeSummary};
use super::data_usage::{BucketTargetUsageInfo, BucketUsageInfo, DataUsageInfo};
// DATA_USAGE_BUCKET_LEN must be length of ObjectsHistogramIntervals
pub const DATA_USAGE_BUCKET_LEN: usize = 11;
pub const DATA_USAGE_VERSION_LEN: usize = 7;
pub type DataUsageHashMap = HashSet<String>;
struct ObjectHistogramInterval {
name: &'static str,
start: u64,
end: u64,
}
const OBJECTS_HISTOGRAM_INTERVALS: [ObjectHistogramInterval; DATA_USAGE_BUCKET_LEN] = [
ObjectHistogramInterval {
name: "LESS_THAN_1024_B",
start: 0,
end: ByteSize::kib(1).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_1024_B_AND_64_KB",
start: ByteSize::kib(1).as_u64(),
end: ByteSize::kib(64).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_64_KB_AND_256_KB",
start: ByteSize::kib(64).as_u64(),
end: ByteSize::kib(256).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_256_KB_AND_512_KB",
start: ByteSize::kib(256).as_u64(),
end: ByteSize::kib(512).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_512_KB_AND_1_MB",
start: ByteSize::kib(512).as_u64(),
end: ByteSize::mib(1).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_1024B_AND_1_MB",
start: ByteSize::kib(1).as_u64(),
end: ByteSize::mib(1).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_1_MB_AND_10_MB",
start: ByteSize::mib(1).as_u64(),
end: ByteSize::mib(10).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_10_MB_AND_64_MB",
start: ByteSize::mib(10).as_u64(),
end: ByteSize::mib(64).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_64_MB_AND_128_MB",
start: ByteSize::mib(64).as_u64(),
end: ByteSize::mib(128).as_u64() - 1,
},
ObjectHistogramInterval {
name: "BETWEEN_128_MB_AND_512_MB",
start: ByteSize::mib(128).as_u64(),
end: ByteSize::mib(512).as_u64() - 1,
},
ObjectHistogramInterval {
name: "GREATER_THAN_512_MB",
start: ByteSize::mib(512).as_u64(),
end: u64::MAX,
},
];
const OBJECTS_VERSION_COUNT_INTERVALS: [ObjectHistogramInterval; DATA_USAGE_VERSION_LEN] = [
ObjectHistogramInterval {
name: "UNVERSIONED",
start: 0,
end: 0,
},
ObjectHistogramInterval {
name: "SINGLE_VERSION",
start: 1,
end: 1,
},
ObjectHistogramInterval {
name: "BETWEEN_2_AND_10",
start: 2,
end: 9,
},
ObjectHistogramInterval {
name: "BETWEEN_10_AND_100",
start: 10,
end: 99,
},
ObjectHistogramInterval {
name: "BETWEEN_100_AND_1000",
start: 100,
end: 999,
},
ObjectHistogramInterval {
name: "BETWEEN_1000_AND_10000",
start: 1000,
end: 9999,
},
ObjectHistogramInterval {
name: "GREATER_THAN_10000",
start: 10000,
end: u64::MAX,
},
];
#[derive(Clone, Copy, Default)]
pub struct TierStats {
pub total_size: u64,
pub num_versions: i32,
pub num_objects: i32,
}
impl TierStats {
pub fn add(&self, u: &TierStats) -> TierStats {
TierStats {
total_size: self.total_size + u.total_size,
num_versions: self.num_versions + u.num_versions,
num_objects: self.num_objects + u.num_objects,
}
}
}
struct AllTierStats {
tiers: HashMap<String, TierStats>,
}
impl AllTierStats {
pub fn new() -> Self {
Self { tiers: HashMap::new() }
}
fn add_sizes(&mut self, tiers: HashMap<String, TierStats>) {
for (tier, st) in tiers {
self.tiers.insert(tier.clone(), self.tiers[&tier].add(&st));
}
}
fn merge(&mut self, other: AllTierStats) {
for (tier, st) in other.tiers {
self.tiers.insert(tier.clone(), self.tiers[&tier].add(&st));
}
}
fn populate_stats(&self, stats: &mut HashMap<String, TierStats>) {
for (tier, st) in &self.tiers {
stats.insert(
tier.clone(),
TierStats {
total_size: st.total_size,
num_versions: st.num_versions,
num_objects: st.num_objects,
},
);
}
}
}
// sizeHistogram is a size histogram.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SizeHistogram(Vec<u64>);
impl Default for SizeHistogram {
fn default() -> Self {
Self(vec![0; DATA_USAGE_BUCKET_LEN])
}
}
impl SizeHistogram {
fn add(&mut self, size: u64) {
for (idx, interval) in OBJECTS_HISTOGRAM_INTERVALS.iter().enumerate() {
if size >= interval.start && size <= interval.end {
self.0[idx] += 1;
break;
}
}
}
pub fn to_map(&self) -> HashMap<String, u64> {
let mut res = HashMap::new();
let mut spl_count = 0;
for (count, oh) in self.0.iter().zip(OBJECTS_HISTOGRAM_INTERVALS.iter()) {
if ByteSize::kib(1).as_u64() == oh.start && oh.end == ByteSize::mib(1).as_u64() - 1 {
res.insert(oh.name.to_string(), spl_count);
} else if ByteSize::kib(1).as_u64() <= oh.start && oh.end < ByteSize::mib(1).as_u64() {
spl_count += count;
res.insert(oh.name.to_string(), *count);
} else {
res.insert(oh.name.to_string(), *count);
}
}
res
}
}
// versionsHistogram is a histogram of number of versions in an object.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct VersionsHistogram(Vec<u64>);
impl Default for VersionsHistogram {
fn default() -> Self {
Self(vec![0; DATA_USAGE_VERSION_LEN])
}
}
impl VersionsHistogram {
fn add(&mut self, size: u64) {
for (idx, interval) in OBJECTS_VERSION_COUNT_INTERVALS.iter().enumerate() {
if size >= interval.start && size <= interval.end {
self.0[idx] += 1;
break;
}
}
}
pub fn to_map(&self) -> HashMap<String, u64> {
let mut res = HashMap::new();
for (count, ov) in self.0.iter().zip(OBJECTS_VERSION_COUNT_INTERVALS.iter()) {
res.insert(ov.name.to_string(), *count);
}
res
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct ReplicationStats {
pub pending_size: u64,
pub replicated_size: u64,
pub failed_size: u64,
pub failed_count: u64,
pub pending_count: u64,
pub missed_threshold_size: u64,
pub after_threshold_size: u64,
pub missed_threshold_count: u64,
pub after_threshold_count: u64,
pub replicated_count: u64,
}
impl ReplicationStats {
pub fn empty(&self) -> bool {
self.replicated_size == 0 && self.failed_size == 0 && self.failed_count == 0
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct ReplicationAllStats {
pub targets: HashMap<String, ReplicationStats>,
pub replica_size: u64,
pub replica_count: u64,
}
impl ReplicationAllStats {
pub fn empty(&self) -> bool {
if self.replica_size != 0 && self.replica_count != 0 {
return false;
}
for (_, v) in self.targets.iter() {
if !v.empty() {
return false;
}
}
true
}
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct DataUsageEntry {
pub children: DataUsageHashMap,
// These fields do not include any children.
pub size: usize,
pub objects: usize,
pub versions: usize,
pub delete_markers: usize,
pub obj_sizes: SizeHistogram,
pub obj_versions: VersionsHistogram,
pub replication_stats: Option<ReplicationAllStats>,
// Todo: tier
// pub all_tier_stats: ,
pub compacted: bool,
}
impl DataUsageEntry {
pub fn add_child(&mut self, hash: &DataUsageHash) {
if self.children.contains(&hash.key()) {
return;
}
self.children.insert(hash.key());
}
pub fn add_sizes(&mut self, summary: &SizeSummary) {
self.size += summary.total_size;
self.versions += summary.versions;
self.delete_markers += summary.delete_markers;
self.obj_sizes.add(summary.total_size as u64);
self.obj_versions.add(summary.versions as u64);
let replication_stats = if self.replication_stats.is_none() {
self.replication_stats = Some(ReplicationAllStats::default());
self.replication_stats.as_mut().unwrap()
} else {
self.replication_stats.as_mut().unwrap()
};
replication_stats.replica_size += summary.replica_size as u64;
replication_stats.replica_count += summary.replica_count as u64;
for (arn, st) in &summary.repl_target_stats {
let tgt_stat = replication_stats
.targets
.entry(arn.to_string())
.or_insert(ReplicationStats::default());
tgt_stat.pending_size += st.pending_size as u64;
tgt_stat.failed_size += st.failed_size as u64;
tgt_stat.replicated_size += st.replicated_size as u64;
tgt_stat.replicated_count += st.replicated_count as u64;
tgt_stat.failed_count += st.failed_count as u64;
tgt_stat.pending_count += st.pending_count as u64;
}
// Todo:: tiers
}
pub fn merge(&mut self, other: &DataUsageEntry) {
self.objects += other.objects;
self.versions += other.versions;
self.delete_markers += other.delete_markers;
self.size += other.size;
if let Some(o_rep) = &other.replication_stats {
if self.replication_stats.is_none() {
self.replication_stats = Some(ReplicationAllStats::default());
}
let s_rep = self.replication_stats.as_mut().unwrap();
s_rep.targets.clear();
s_rep.replica_size += o_rep.replica_size;
s_rep.replica_count += o_rep.replica_count;
for (arn, stat) in o_rep.targets.iter() {
let st = s_rep.targets.entry(arn.clone()).or_default();
*st = ReplicationStats {
pending_size: stat.pending_size + st.pending_size,
failed_size: stat.failed_size + st.failed_size,
replicated_size: stat.replicated_size + st.replicated_size,
pending_count: stat.pending_count + st.pending_count,
failed_count: stat.failed_count + st.failed_count,
replicated_count: stat.replicated_count + st.replicated_count,
..Default::default()
};
}
}
for (i, v) in other.obj_sizes.0.iter().enumerate() {
self.obj_sizes.0[i] += v;
}
for (i, v) in other.obj_versions.0.iter().enumerate() {
self.obj_versions.0[i] += v;
}
// todo: tiers
}
}
#[derive(Clone)]
pub struct DataUsageEntryInfo {
pub name: String,
pub parent: String,
pub entry: DataUsageEntry,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct DataUsageCacheInfo {
pub name: String,
pub next_cycle: u32,
pub last_update: Option<SystemTime>,
pub skip_healing: bool,
#[serde(skip)]
pub lifecycle: Option<BucketLifecycleConfiguration>,
#[serde(skip)]
pub updates: Option<Sender<DataUsageEntry>>,
#[serde(skip)]
pub replication: Option<ReplicationConfiguration>,
}
// impl Default for DataUsageCacheInfo {
// fn default() -> Self {
// Self {
// name: Default::default(),
// next_cycle: Default::default(),
// last_update: SystemTime::now(),
// skip_healing: Default::default(),
// updates: Default::default(),
// replication: Default::default(),
// }
// }
// }
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct DataUsageCache {
pub info: DataUsageCacheInfo,
pub cache: HashMap<String, DataUsageEntry>,
}
impl DataUsageCache {
pub async fn load(store: &SetDisks, name: &str) -> Result<Self> {
let mut d = DataUsageCache::default();
let mut retries = 0;
while retries < 5 {
let path = Path::new(BUCKET_META_PREFIX).join(name);
// warn!("Loading data usage cache from backend: {}", path.display());
match store
.get_object_reader(
RUSTFS_META_BUCKET,
path.to_str().unwrap(),
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = Self::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(err) => {
// warn!("Failed to load data usage cache from backend: {}", &err);
match err {
Error::FileNotFound | Error::VolumeNotFound => {
match store
.get_object_reader(
RUSTFS_META_BUCKET,
name,
None,
HeaderMap::new(),
&ObjectOptions {
no_lock: true,
..Default::default()
},
)
.await
{
Ok(mut reader) => {
if let Ok(info) = Self::unmarshal(&reader.read_all().await?) {
d = info
}
break;
}
Err(_) => match err {
Error::FileNotFound | Error::VolumeNotFound => {
break;
}
_ => {}
},
}
}
_ => {
break;
}
}
}
}
retries += 1;
let dur = {
let mut rng = rand::rng();
rng.random_range(0..1_000)
};
sleep(Duration::from_millis(dur)).await;
}
Ok(d)
}
pub async fn save(&self, name: &str) -> Result<()> {
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let buf = self.marshal_msg()?;
let buf_clone = buf.clone();
let store_clone = store.clone();
let name = Path::new(BUCKET_META_PREFIX).join(name).to_string_lossy().to_string();
let name_clone = name.clone();
tokio::spawn(async move {
let _ = save_config(store_clone, &format!("{}{}", &name_clone, ".bkp"), buf_clone).await;
});
save_config(store, &name, buf).await?;
Ok(())
}
pub fn replace(&mut self, path: &str, parent: &str, e: DataUsageEntry) {
let hash = hash_path(path);
self.cache.insert(hash.key(), e);
if !parent.is_empty() {
let phash = hash_path(parent);
let p = {
let p = self.cache.entry(phash.key()).or_default();
p.add_child(&hash);
p.clone()
};
self.cache.insert(phash.key(), p);
}
}
pub fn replace_hashed(&mut self, hash: &DataUsageHash, parent: &Option<DataUsageHash>, e: &DataUsageEntry) {
self.cache.insert(hash.key(), e.clone());
if let Some(parent) = parent {
self.cache.entry(parent.key()).or_default().add_child(hash);
}
}
pub fn find(&self, path: &str) -> Option<DataUsageEntry> {
self.cache.get(&hash_path(path).key()).cloned()
}
pub fn find_children_copy(&mut self, h: DataUsageHash) -> DataUsageHashMap {
self.cache.entry(h.string()).or_default().children.clone()
}
pub fn flatten(&self, root: &DataUsageEntry) -> DataUsageEntry {
let mut root = root.clone();
for id in root.children.clone().iter() {
if let Some(e) = self.cache.get(id) {
let mut e = e.clone();
if !e.children.is_empty() {
e = self.flatten(&e);
}
root.merge(&e);
}
}
root.children.clear();
root
}
pub fn copy_with_children(&mut self, src: &DataUsageCache, hash: &DataUsageHash, parent: &Option<DataUsageHash>) {
if let Some(e) = src.cache.get(&hash.string()) {
self.cache.insert(hash.key(), e.clone());
for ch in e.children.iter() {
if *ch == hash.key() {
return;
}
self.copy_with_children(src, &DataUsageHash(ch.to_string()), &Some(hash.clone()));
}
if let Some(parent) = parent {
let p = self.cache.entry(parent.key()).or_default();
p.add_child(hash);
}
}
}
pub fn delete_recursive(&mut self, hash: &DataUsageHash) {
let mut need_remove = Vec::new();
if let Some(v) = self.cache.get(&hash.string()) {
for child in v.children.iter() {
need_remove.push(child.clone());
}
}
self.cache.remove(&hash.string());
need_remove.iter().for_each(|child| {
self.delete_recursive(&DataUsageHash(child.to_string()));
});
}
pub fn size_recursive(&self, path: &str) -> Option<DataUsageEntry> {
match self.find(path) {
Some(root) => {
if root.children.is_empty() {
return Some(root);
}
let mut flat = self.flatten(&root);
if flat.replication_stats.is_some() && flat.replication_stats.as_ref().unwrap().empty() {
flat.replication_stats = None;
}
Some(flat)
}
None => None,
}
}
pub fn search_parent(&self, hash: &DataUsageHash) -> Option<DataUsageHash> {
let want = hash.key();
if let Some(last_index) = want.rfind('/') {
if let Some(v) = self.find(&want[0..last_index]) {
if v.children.contains(&want) {
let found = hash_path(&want[0..last_index]);
return Some(found);
}
}
}
for (k, v) in self.cache.iter() {
if v.children.contains(&want) {
let found = DataUsageHash(k.clone());
return Some(found);
}
}
None
}
pub fn is_compacted(&self, hash: &DataUsageHash) -> bool {
match self.cache.get(&hash.key()) {
Some(due) => due.compacted,
None => false,
}
}
pub fn force_compact(&mut self, limit: usize) {
if self.cache.len() < limit {
return;
}
let top = hash_path(&self.info.name).key();
let top_e = match self.find(&top) {
Some(e) => e,
None => return,
};
if top_e.children.len() > <u64 as TryInto<usize>>::try_into(DATA_SCANNER_FORCE_COMPACT_AT_FOLDERS).unwrap() {
self.reduce_children_of(&hash_path(&self.info.name), limit, true);
}
if self.cache.len() <= limit {
return;
}
let mut found = HashSet::new();
found.insert(top);
mark(self, &top_e, &mut found);
self.cache.retain(|k, _| {
if !found.contains(k) {
return false;
}
true
});
}
pub fn reduce_children_of(&mut self, path: &DataUsageHash, limit: usize, compact_self: bool) {
let e = match self.cache.get(&path.key()) {
Some(e) => e,
None => return,
};
if e.compacted {
return;
}
if e.children.len() > limit && compact_self {
let mut flat = self.size_recursive(&path.key()).unwrap_or_default();
flat.compacted = true;
self.delete_recursive(path);
self.replace_hashed(path, &None, &flat);
return;
}
let total = self.total_children_rec(&path.key());
if total < limit {
return;
}
let mut leaves = Vec::new();
let mut remove = total - limit;
add(self, path, &mut leaves);
leaves.sort_by(|a, b| a.objects.cmp(&b.objects));
while remove > 0 && !leaves.is_empty() {
let e = leaves.first().unwrap();
let candidate = e.path.clone();
if candidate == *path && !compact_self {
break;
}
let removing = self.total_children_rec(&candidate.key());
let mut flat = match self.size_recursive(&candidate.key()) {
Some(flat) => flat,
None => {
leaves.remove(0);
continue;
}
};
flat.compacted = true;
self.delete_recursive(&candidate);
self.replace_hashed(&candidate, &None, &flat);
remove -= removing;
leaves.remove(0);
}
}
pub fn total_children_rec(&self, path: &str) -> usize {
let root = self.find(path);
if root.is_none() {
return 0;
}
let root = root.unwrap();
if root.children.is_empty() {
return 0;
}
let mut n = root.children.len();
for ch in root.children.iter() {
n += self.total_children_rec(ch);
}
n
}
pub fn merge(&mut self, o: &DataUsageCache) {
let mut existing_root = self.root();
let other_root = o.root();
if existing_root.is_none() && other_root.is_none() {
return;
}
if other_root.is_none() {
return;
}
if existing_root.is_none() {
*self = o.clone();
return;
}
if o.info.last_update.gt(&self.info.last_update) {
self.info.last_update = o.info.last_update;
}
existing_root.as_mut().unwrap().merge(other_root.as_ref().unwrap());
self.cache.insert(hash_path(&self.info.name).key(), existing_root.unwrap());
let e_hash = self.root_hash();
for key in other_root.as_ref().unwrap().children.iter() {
let entry = &o.cache[key];
let flat = o.flatten(entry);
let mut existing = self.cache[key].clone();
existing.merge(&flat);
self.replace_hashed(&DataUsageHash(key.clone()), &Some(e_hash.clone()), &existing);
}
}
pub fn root_hash(&self) -> DataUsageHash {
hash_path(&self.info.name)
}
pub fn root(&self) -> Option<DataUsageEntry> {
self.find(&self.info.name)
}
pub fn dui(&self, path: &str, buckets: &[BucketInfo]) -> DataUsageInfo {
let e = match self.find(path) {
Some(e) => e,
None => return DataUsageInfo::default(),
};
let flat = self.flatten(&e);
DataUsageInfo {
last_update: self.info.last_update,
objects_total_count: flat.objects as u64,
versions_total_count: flat.versions as u64,
delete_markers_total_count: flat.delete_markers as u64,
objects_total_size: flat.size as u64,
buckets_count: e.children.len() as u64,
buckets_usage: self.buckets_usage_info(buckets),
..Default::default()
}
}
pub fn buckets_usage_info(&self, buckets: &[BucketInfo]) -> HashMap<String, BucketUsageInfo> {
let mut dst = HashMap::new();
for bucket in buckets.iter() {
let e = match self.find(&bucket.name) {
Some(e) => e,
None => continue,
};
let flat = self.flatten(&e);
let mut bui = BucketUsageInfo {
size: flat.size as u64,
versions_count: flat.versions as u64,
objects_count: flat.objects as u64,
delete_markers_count: flat.delete_markers as u64,
object_size_histogram: flat.obj_sizes.to_map(),
object_versions_histogram: flat.obj_versions.to_map(),
..Default::default()
};
if let Some(rs) = &flat.replication_stats {
bui.replica_size = rs.replica_size;
bui.replica_count = rs.replica_count;
for (arn, stat) in rs.targets.iter() {
bui.replication_info.insert(
arn.clone(),
BucketTargetUsageInfo {
replication_pending_size: stat.pending_size,
replicated_size: stat.replicated_size,
replication_failed_size: stat.failed_size,
replication_pending_count: stat.pending_count,
replication_failed_count: stat.failed_count,
replicated_count: stat.replicated_count,
..Default::default()
},
);
}
}
dst.insert(bucket.name.clone(), bui);
}
dst
}
pub fn marshal_msg(&self) -> Result<Vec<u8>> {
let mut buf = Vec::new();
self.serialize(&mut Serializer::new(&mut buf))?;
Ok(buf)
}
pub fn unmarshal(buf: &[u8]) -> Result<Self> {
let t: Self = rmp_serde::from_slice(buf)?;
Ok(t)
}
}
#[derive(Default, Clone)]
struct Inner {
objects: usize,
path: DataUsageHash,
}
fn add(data_usage_cache: &DataUsageCache, path: &DataUsageHash, leaves: &mut Vec<Inner>) {
let e = match data_usage_cache.cache.get(&path.key()) {
Some(e) => e,
None => return,
};
if !e.children.is_empty() {
return;
}
let sz = data_usage_cache.size_recursive(&path.key()).unwrap_or_default();
leaves.push(Inner {
objects: sz.objects,
path: path.clone(),
});
for ch in e.children.iter() {
add(data_usage_cache, &DataUsageHash(ch.clone()), leaves);
}
}
fn mark(duc: &DataUsageCache, entry: &DataUsageEntry, found: &mut HashSet<String>) {
for k in entry.children.iter() {
found.insert(k.to_string());
if let Some(ch) = duc.cache.get(k) {
mark(duc, ch, found);
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DataUsageHash(pub String);
impl DataUsageHash {
pub fn string(&self) -> String {
self.0.clone()
}
pub fn key(&self) -> String {
self.0.clone()
}
pub fn mod_(&self, cycle: u32, cycles: u32) -> bool {
if cycles <= 1 {
return cycles == 1;
}
let hash = self.calculate_hash();
hash as u32 % cycles == cycle % cycles
}
pub fn mod_alt(&self, cycle: u32, cycles: u32) -> bool {
if cycles <= 1 {
return cycles == 1;
}
let hash = self.calculate_hash();
(hash >> 32) as u32 % cycles == cycle % cycles
}
fn calculate_hash(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.0.hash(&mut hasher);
hasher.finish()
}
}
pub fn hash_path(data: &str) -> DataUsageHash {
DataUsageHash(Path::new(&data).clean().to_string_lossy().to_string())
}

View File

@@ -1,544 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{
collections::{HashMap, HashSet},
path::Path,
time::SystemTime,
};
use crate::{
config::storageclass::{RRS, STANDARD},
disk::{BUCKET_META_PREFIX, DeleteOptions, DiskAPI, DiskStore, RUSTFS_META_BUCKET, error::DiskError, fs::read_file},
global::GLOBAL_BackgroundHealState,
heal::heal_ops::HEALING_TRACKER_FILENAME,
new_object_layer_fn,
store_api::{BucketInfo, StorageAPI},
};
use crate::{disk, error::Result};
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tokio::sync::RwLock;
use super::{background_heal_ops::get_local_disks_to_heal, heal_ops::BG_HEALING_UUID};
pub type HealScanMode = usize;
pub const HEAL_UNKNOWN_SCAN: HealScanMode = 0;
pub const HEAL_NORMAL_SCAN: HealScanMode = 1;
pub const HEAL_DEEP_SCAN: HealScanMode = 2;
pub const HEAL_ITEM_METADATA: &str = "metadata";
pub const HEAL_ITEM_BUCKET: &str = "bucket";
pub const HEAL_ITEM_BUCKET_METADATA: &str = "bucket-metadata";
pub const HEAL_ITEM_OBJECT: &str = "object";
pub const DRIVE_STATE_OK: &str = "ok";
pub const DRIVE_STATE_OFFLINE: &str = "offline";
pub const DRIVE_STATE_CORRUPT: &str = "corrupt";
pub const DRIVE_STATE_MISSING: &str = "missing";
pub const DRIVE_STATE_PERMISSION: &str = "permission-denied";
pub const DRIVE_STATE_FAULTY: &str = "faulty";
pub const DRIVE_STATE_ROOT_MOUNT: &str = "root-mount";
pub const DRIVE_STATE_UNKNOWN: &str = "unknown";
pub const DRIVE_STATE_UNFORMATTED: &str = "unformatted"; // only returned by disk
lazy_static! {
pub static ref TIME_SENTINEL: OffsetDateTime = OffsetDateTime::from_unix_timestamp(0).unwrap();
}
#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
pub struct HealOpts {
pub recursive: bool,
#[serde(rename = "dryRun")]
pub dry_run: bool,
pub remove: bool,
pub recreate: bool,
#[serde(rename = "scanMode")]
pub scan_mode: HealScanMode,
#[serde(rename = "updateParity")]
pub update_parity: bool,
#[serde(rename = "nolock")]
pub no_lock: bool,
pub pool: Option<usize>,
pub set: Option<usize>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct HealStartSuccess {
#[serde(rename = "clientToken")]
pub client_token: String,
#[serde(rename = "clientAddress")]
pub client_address: String,
#[serde(rename = "startTime")]
pub start_time: DateTime<Utc>,
}
impl Default for HealStartSuccess {
fn default() -> Self {
Self {
client_token: Default::default(),
client_address: Default::default(),
start_time: Utc::now(),
}
}
}
pub type HealStopSuccess = HealStartSuccess;
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct HealingTracker {
#[serde(skip_serializing, skip_deserializing)]
pub disk: Option<DiskStore>,
pub id: String,
pub pool_index: Option<usize>,
pub set_index: Option<usize>,
pub disk_index: Option<usize>,
pub path: String,
pub endpoint: String,
pub started: Option<OffsetDateTime>,
pub last_update: Option<SystemTime>,
pub objects_total_count: u64,
pub objects_total_size: u64,
pub items_healed: u64,
pub items_failed: u64,
pub item_skipped: u64,
pub bytes_done: u64,
pub bytes_failed: u64,
pub bytes_skipped: u64,
pub bucket: String,
pub object: String,
pub resume_items_healed: u64,
pub resume_items_failed: u64,
pub resume_items_skipped: u64,
pub resume_bytes_done: u64,
pub resume_bytes_failed: u64,
pub resume_bytes_skipped: u64,
pub queue_buckets: Vec<String>,
pub healed_buckets: Vec<String>,
pub heal_id: String,
pub retry_attempts: u64,
pub finished: bool,
#[serde(skip_serializing, skip_deserializing)]
pub mu: RwLock<bool>,
}
impl HealingTracker {
pub fn marshal_msg(&self) -> disk::error::Result<Vec<u8>> {
Ok(serde_json::to_vec(self)?)
}
pub fn unmarshal_msg(data: &[u8]) -> disk::error::Result<Self> {
Ok(serde_json::from_slice::<HealingTracker>(data)?)
}
pub async fn reset_healing(&mut self) {
let _ = self.mu.write().await;
self.items_healed = 0;
self.items_failed = 0;
self.bytes_done = 0;
self.bytes_failed = 0;
self.resume_items_healed = 0;
self.resume_items_failed = 0;
self.resume_bytes_done = 0;
self.resume_bytes_failed = 0;
self.item_skipped = 0;
self.bytes_skipped = 0;
self.healed_buckets = Vec::new();
self.bucket = String::new();
self.object = String::new();
}
pub async fn get_last_update(&self) -> Option<SystemTime> {
let _ = self.mu.read().await;
self.last_update
}
pub async fn get_bucket(&self) -> String {
let _ = self.mu.read().await;
self.bucket.clone()
}
pub async fn set_bucket(&mut self, bucket: &str) {
let _ = self.mu.write().await;
self.bucket = bucket.to_string();
}
pub async fn get_object(&self) -> String {
let _ = self.mu.read().await;
self.object.clone()
}
pub async fn set_object(&mut self, object: &str) {
let _ = self.mu.write().await;
self.object = object.to_string();
}
pub async fn update_progress(&mut self, success: bool, skipped: bool, by: u64) {
let _ = self.mu.write().await;
if success {
self.items_healed += 1;
self.bytes_done += by;
} else if skipped {
self.item_skipped += 1;
self.bytes_skipped += by;
} else {
self.items_failed += 1;
self.bytes_failed += by;
}
}
pub async fn update(&mut self) -> disk::error::Result<()> {
if let Some(disk) = &self.disk {
if healing(disk.path().to_string_lossy().as_ref()).await?.is_none() {
return Err(DiskError::other(format!("healingTracker: drive {} is not marked as healing", self.id)));
}
let _ = self.mu.write().await;
if self.id.is_empty() || self.pool_index.is_none() || self.set_index.is_none() || self.disk_index.is_none() {
self.id = disk.get_disk_id().await?.map_or("".to_string(), |id| id.to_string());
let disk_location = disk.get_disk_location();
self.pool_index = disk_location.pool_idx;
self.set_index = disk_location.set_idx;
self.disk_index = disk_location.disk_idx;
}
}
self.save().await
}
pub async fn save(&mut self) -> disk::error::Result<()> {
let _ = self.mu.write().await;
if self.pool_index.is_none() || self.set_index.is_none() || self.disk_index.is_none() {
let Some(store) = new_object_layer_fn() else {
return Err(DiskError::other("errServerNotInitialized"));
};
// TODO: check error type
(self.pool_index, self.set_index, self.disk_index) =
store.get_pool_and_set(&self.id).await.map_err(|_| DiskError::DiskNotFound)?;
}
self.last_update = Some(SystemTime::now());
let htracker_bytes = self.marshal_msg()?;
GLOBAL_BackgroundHealState.update_heal_status(self).await;
if let Some(disk) = &self.disk {
let file_path = Path::new(BUCKET_META_PREFIX).join(HEALING_TRACKER_FILENAME);
disk.write_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap(), htracker_bytes.into())
.await?;
}
Ok(())
}
pub async fn delete(&self) -> Result<()> {
if let Some(disk) = &self.disk {
let file_path = Path::new(BUCKET_META_PREFIX).join(HEALING_TRACKER_FILENAME);
disk.delete(
RUSTFS_META_BUCKET,
file_path.to_str().unwrap(),
DeleteOptions {
recursive: false,
immediate: false,
..Default::default()
},
)
.await?;
}
Ok(())
}
pub async fn is_healed(&self, bucket: &str) -> bool {
let _ = self.mu.read().await;
for v in self.healed_buckets.iter() {
if v == bucket {
return true;
}
}
false
}
pub async fn resume(&mut self) {
let _ = self.mu.write().await;
self.items_healed = self.resume_items_healed;
self.items_failed = self.resume_items_failed;
self.item_skipped = self.resume_items_skipped;
self.bytes_done = self.resume_bytes_done;
self.bytes_failed = self.resume_bytes_failed;
self.bytes_skipped = self.resume_bytes_skipped;
}
pub async fn bucket_done(&mut self, bucket: &str) {
let _ = self.mu.write().await;
self.resume_items_healed = self.items_healed;
self.resume_items_failed = self.items_failed;
self.resume_items_skipped = self.item_skipped;
self.resume_bytes_done = self.bytes_done;
self.resume_bytes_failed = self.bytes_failed;
self.resume_bytes_skipped = self.bytes_skipped;
self.healed_buckets.push(bucket.to_string());
self.queue_buckets.retain(|x| x != bucket);
}
pub async fn set_queue_buckets(&mut self, buckets: &[BucketInfo]) {
let _ = self.mu.write().await;
buckets.iter().for_each(|bucket| {
if !self.healed_buckets.contains(&bucket.name) {
self.queue_buckets.push(bucket.name.clone());
}
});
}
pub async fn to_healing_disk(&self) -> rustfs_madmin::HealingDisk {
let _ = self.mu.read().await;
rustfs_madmin::HealingDisk {
id: self.id.clone(),
heal_id: self.heal_id.clone(),
pool_index: self.pool_index,
set_index: self.set_index,
disk_index: self.disk_index,
endpoint: self.endpoint.clone(),
path: self.path.clone(),
started: self.started,
last_update: self.last_update,
retry_attempts: self.retry_attempts,
objects_total_count: self.objects_total_count,
objects_total_size: self.objects_total_size,
items_healed: self.items_healed,
items_failed: self.items_failed,
item_skipped: self.item_skipped,
bytes_done: self.bytes_done,
bytes_failed: self.bytes_failed,
bytes_skipped: self.bytes_skipped,
objects_healed: self.items_healed,
objects_failed: self.items_failed,
bucket: self.bucket.clone(),
object: self.object.clone(),
queue_buckets: self.queue_buckets.clone(),
healed_buckets: self.healed_buckets.clone(),
finished: self.finished,
}
}
}
impl Clone for HealingTracker {
fn clone(&self) -> Self {
Self {
disk: self.disk.clone(),
id: self.id.clone(),
pool_index: self.pool_index,
set_index: self.set_index,
disk_index: self.disk_index,
path: self.path.clone(),
endpoint: self.endpoint.clone(),
started: self.started,
last_update: self.last_update,
objects_total_count: self.objects_total_count,
objects_total_size: self.objects_total_size,
items_healed: self.items_healed,
items_failed: self.items_failed,
item_skipped: self.item_skipped,
bytes_done: self.bytes_done,
bytes_failed: self.bytes_failed,
bytes_skipped: self.bytes_skipped,
bucket: self.bucket.clone(),
object: self.object.clone(),
resume_items_healed: self.resume_items_healed,
resume_items_failed: self.resume_items_failed,
resume_items_skipped: self.resume_items_skipped,
resume_bytes_done: self.resume_bytes_done,
resume_bytes_failed: self.resume_bytes_failed,
resume_bytes_skipped: self.resume_bytes_skipped,
queue_buckets: self.queue_buckets.clone(),
healed_buckets: self.healed_buckets.clone(),
heal_id: self.heal_id.clone(),
retry_attempts: self.retry_attempts,
finished: self.finished,
mu: RwLock::new(false),
}
}
}
pub async fn load_healing_tracker(disk: &Option<DiskStore>) -> disk::error::Result<HealingTracker> {
if let Some(disk) = disk {
let disk_id = disk.get_disk_id().await?;
if let Some(disk_id) = disk_id {
let disk_id = disk_id.to_string();
let file_path = Path::new(BUCKET_META_PREFIX).join(HEALING_TRACKER_FILENAME);
let data = disk.read_all(RUSTFS_META_BUCKET, file_path.to_str().unwrap()).await?;
let mut healing_tracker = HealingTracker::unmarshal_msg(&data)?;
if healing_tracker.id != disk_id && !healing_tracker.id.is_empty() {
return Err(DiskError::other(format!(
"loadHealingTracker: drive id mismatch expected {}, got {}",
healing_tracker.id, disk_id
)));
}
healing_tracker.id = disk_id;
healing_tracker.disk = Some(disk.clone());
Ok(healing_tracker)
} else {
Err(DiskError::other("loadHealingTracker: disk not have id"))
}
} else {
Err(DiskError::other("loadHealingTracker: nil drive given"))
}
}
pub async fn init_healing_tracker(disk: DiskStore, heal_id: &str) -> disk::error::Result<HealingTracker> {
let disk_location = disk.get_disk_location();
Ok(HealingTracker {
id: disk
.get_disk_id()
.await
.map_or("".to_string(), |id| id.map_or("".to_string(), |id| id.to_string())),
heal_id: heal_id.to_string(),
path: disk.to_string(),
endpoint: disk.endpoint().to_string(),
started: Some(OffsetDateTime::now_utc()),
pool_index: disk_location.pool_idx,
set_index: disk_location.set_idx,
disk_index: disk_location.disk_idx,
disk: Some(disk),
..Default::default()
})
}
pub async fn healing(derive_path: &str) -> disk::error::Result<Option<HealingTracker>> {
let healing_file = Path::new(derive_path)
.join(RUSTFS_META_BUCKET)
.join(BUCKET_META_PREFIX)
.join(HEALING_TRACKER_FILENAME);
let b = read_file(healing_file).await?;
if b.is_empty() {
return Ok(None);
}
let healing_tracker = HealingTracker::unmarshal_msg(&b)?;
Ok(Some(healing_tracker))
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct MRFStatus {
bytes_healed: u64,
items_healed: u64,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct SetStatus {
pub id: String,
pub pool_index: i32,
pub set_index: i32,
pub heal_status: String,
pub heal_priority: String,
pub total_objects: usize,
pub disks: Vec<rustfs_madmin::Disk>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BgHealState {
offline_endpoints: Vec<String>,
scanned_items_count: u64,
heal_disks: Vec<String>,
sets: Vec<SetStatus>,
mrf: HashMap<String, MRFStatus>,
scparity: HashMap<String, usize>,
}
pub async fn get_local_background_heal_status() -> (BgHealState, bool) {
let (bg_seq, ok) = GLOBAL_BackgroundHealState.get_heal_sequence_by_token(BG_HEALING_UUID).await;
if !ok {
return (BgHealState::default(), false);
}
let bg_seq = bg_seq.unwrap();
let mut status = BgHealState {
scanned_items_count: bg_seq.get_scanned_items_count().await as u64,
..Default::default()
};
let mut heal_disks_map = HashSet::new();
for ep in get_local_disks_to_heal().await.iter() {
heal_disks_map.insert(ep.to_string());
}
let Some(store) = new_object_layer_fn() else {
let healing = GLOBAL_BackgroundHealState.get_local_healing_disks().await;
for disk in healing.values() {
status.heal_disks.push(disk.endpoint.clone());
}
return (status, true);
};
let si = store.local_storage_info().await;
let mut indexed = HashMap::new();
for disk in si.disks.iter() {
let set_idx = format!("{}-{}", disk.pool_index, disk.set_index);
// indexed.insert(set_idx, disk);
indexed.entry(set_idx).or_insert(Vec::new()).push(disk);
}
for (id, disks) in indexed {
let mut ss = SetStatus {
id,
set_index: disks[0].set_index,
pool_index: disks[0].pool_index,
..Default::default()
};
for disk in disks {
ss.disks.push(disk.clone());
if disk.healing {
ss.heal_status = "healing".to_string();
ss.heal_priority = "high".to_string();
status.heal_disks.push(disk.endpoint.clone());
}
}
ss.disks.sort_by(|a, b| {
if a.pool_index != b.pool_index {
return a.pool_index.cmp(&b.pool_index);
}
if a.set_index != b.set_index {
return a.set_index.cmp(&b.set_index);
}
a.disk_index.cmp(&b.disk_index)
});
status.sets.push(ss);
}
status.sets.sort_by(|a, b| a.id.cmp(&b.id));
let backend_info = store.backend_info().await;
status
.scparity
.insert(STANDARD.to_string(), backend_info.standard_sc_parity.unwrap_or_default());
status
.scparity
.insert(RRS.to_string(), backend_info.rr_sc_parity.unwrap_or_default());
(status, true)
}

View File

@@ -1,842 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::{
background_heal_ops::HealTask,
data_scanner::HEAL_DELETE_DANGLING,
error::ERR_SKIP_FILE,
heal_commands::{HEAL_ITEM_BUCKET_METADATA, HealOpts, HealScanMode, HealStopSuccess, HealingTracker},
};
use crate::error::{Error, Result};
use crate::heal::heal_commands::{HEAL_ITEM_BUCKET, HEAL_ITEM_OBJECT};
use crate::store_api::StorageAPI;
use crate::{
config::com::CONFIG_PREFIX,
disk::RUSTFS_META_BUCKET,
global::GLOBAL_BackgroundHealRoutine,
heal::{error::ERR_HEAL_STOP_SIGNALLED, heal_commands::DRIVE_STATE_OK},
};
use crate::{
disk::endpoint::Endpoint,
endpoints::Endpoints,
global::GLOBAL_IsDistErasure,
heal::heal_commands::{HEAL_UNKNOWN_SCAN, HealStartSuccess},
new_object_layer_fn,
};
use chrono::Utc;
use futures::join;
use lazy_static::lazy_static;
use rustfs_filemeta::MetaCacheEntry;
use rustfs_madmin::heal_commands::{HealDriveInfo, HealItemType, HealResultItem};
use rustfs_utils::path::has_prefix;
use rustfs_utils::path::path_join;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
future::Future,
path::PathBuf,
pin::Pin,
sync::Arc,
time::{Duration, SystemTime, UNIX_EPOCH},
};
use tokio::{
select, spawn,
sync::{
RwLock, broadcast,
mpsc::{self, Receiver as M_Receiver, Sender as M_Sender},
watch::{self, Receiver as W_Receiver, Sender as W_Sender},
},
time::{interval, sleep},
};
use tracing::{error, info};
use uuid::Uuid;
type HealStatusSummary = String;
type ItemsMap = HashMap<HealItemType, usize>;
pub type HealEntryFn =
Arc<dyn Fn(String, MetaCacheEntry, HealScanMode) -> Pin<Box<dyn Future<Output = Result<()>> + Send>> + Send + Sync + 'static>;
pub const BG_HEALING_UUID: &str = "0000-0000-0000-0000";
pub const HEALING_TRACKER_FILENAME: &str = ".healing.bin";
const KEEP_HEAL_SEQ_STATE_DURATION: Duration = Duration::from_secs(10 * 60);
const HEAL_NOT_STARTED_STATUS: &str = "not started";
const HEAL_RUNNING_STATUS: &str = "running";
const HEAL_STOPPED_STATUS: &str = "stopped";
const HEAL_FINISHED_STATUS: &str = "finished";
pub const RUSTFS_RESERVED_BUCKET: &str = "rustfs";
pub const RUSTFS_RESERVED_BUCKET_PATH: &str = "/rustfs";
pub const LOGIN_PATH_PREFIX: &str = "/login";
const MAX_UNCONSUMED_HEAL_RESULT_ITEMS: usize = 1000;
const HEAL_UNCONSUMED_TIMEOUT: Duration = Duration::from_secs(24 * 60 * 60);
pub const NOP_HEAL: &str = "";
lazy_static! {}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct HealSequenceStatus {
pub summary: HealStatusSummary,
pub failure_detail: String,
pub start_time: u64,
pub heal_setting: HealOpts,
pub items: Vec<HealResultItem>,
}
#[derive(Debug, Default)]
pub struct HealSource {
pub bucket: String,
pub object: String,
pub version_id: String,
pub no_wait: bool,
pub opts: Option<HealOpts>,
}
#[derive(Debug)]
pub struct HealSequence {
pub bucket: String,
pub object: String,
pub report_progress: bool,
pub start_time: SystemTime,
pub end_time: Arc<RwLock<SystemTime>>,
pub client_token: String,
pub client_address: String,
pub force_started: bool,
pub setting: HealOpts,
pub current_status: Arc<RwLock<HealSequenceStatus>>,
pub last_sent_result_index: RwLock<usize>,
pub scanned_items_map: RwLock<ItemsMap>,
pub healed_items_map: RwLock<ItemsMap>,
pub heal_failed_items_map: RwLock<ItemsMap>,
pub last_heal_activity: RwLock<SystemTime>,
traverse_and_heal_done_tx: Arc<RwLock<M_Sender<Option<Error>>>>,
traverse_and_heal_done_rx: Arc<RwLock<M_Receiver<Option<Error>>>>,
tx: W_Sender<bool>,
rx: W_Receiver<bool>,
}
pub fn new_bg_heal_sequence() -> HealSequence {
let hs = HealOpts {
remove: HEAL_DELETE_DANGLING,
..Default::default()
};
HealSequence {
start_time: SystemTime::now(),
client_token: BG_HEALING_UUID.to_string(),
bucket: RUSTFS_RESERVED_BUCKET.to_string(),
setting: hs,
current_status: Arc::new(RwLock::new(HealSequenceStatus {
summary: HEAL_NOT_STARTED_STATUS.to_string(),
heal_setting: hs,
..Default::default()
})),
report_progress: false,
scanned_items_map: HashMap::new().into(),
healed_items_map: HashMap::new().into(),
heal_failed_items_map: HashMap::new().into(),
..Default::default()
}
}
pub fn new_heal_sequence(bucket: &str, obj_prefix: &str, client_addr: &str, hs: HealOpts, force_start: bool) -> HealSequence {
let client_token = Uuid::new_v4().to_string();
let (tx, rx) = mpsc::channel(10);
HealSequence {
bucket: bucket.to_string(),
object: obj_prefix.to_string(),
report_progress: true,
start_time: SystemTime::now(),
client_token,
client_address: client_addr.to_string(),
force_started: force_start,
setting: hs,
current_status: Arc::new(RwLock::new(HealSequenceStatus {
summary: HEAL_NOT_STARTED_STATUS.to_string(),
heal_setting: hs,
..Default::default()
})),
traverse_and_heal_done_tx: Arc::new(RwLock::new(tx)),
traverse_and_heal_done_rx: Arc::new(RwLock::new(rx)),
scanned_items_map: HashMap::new().into(),
healed_items_map: HashMap::new().into(),
heal_failed_items_map: HashMap::new().into(),
..Default::default()
}
}
impl Default for HealSequence {
fn default() -> Self {
let (h_tx, h_rx) = mpsc::channel(1);
let (tx, rx) = watch::channel(false);
Self {
bucket: Default::default(),
object: Default::default(),
report_progress: Default::default(),
start_time: SystemTime::now(),
end_time: Arc::new(RwLock::new(SystemTime::now())),
client_token: Default::default(),
client_address: Default::default(),
force_started: Default::default(),
setting: Default::default(),
current_status: Default::default(),
last_sent_result_index: Default::default(),
scanned_items_map: Default::default(),
healed_items_map: Default::default(),
heal_failed_items_map: Default::default(),
last_heal_activity: RwLock::new(SystemTime::now()),
traverse_and_heal_done_tx: Arc::new(RwLock::new(h_tx)),
traverse_and_heal_done_rx: Arc::new(RwLock::new(h_rx)),
tx,
rx,
}
}
}
impl HealSequence {
pub fn new(bucket: &str, obj_prefix: &str, client_addr: &str, hs: HealOpts, force_start: bool) -> Self {
let client_token = Uuid::new_v4().to_string();
Self {
bucket: bucket.to_string(),
object: obj_prefix.to_string(),
report_progress: true,
client_token,
client_address: client_addr.to_string(),
force_started: force_start,
setting: hs,
current_status: Arc::new(RwLock::new(HealSequenceStatus {
summary: HEAL_NOT_STARTED_STATUS.to_string(),
heal_setting: hs,
..Default::default()
})),
..Default::default()
}
}
}
impl HealSequence {
pub async fn get_scanned_items_count(&self) -> usize {
self.scanned_items_map.read().await.values().sum()
}
async fn _get_scanned_items_map(&self) -> ItemsMap {
self.scanned_items_map.read().await.clone()
}
async fn _get_healed_items_map(&self) -> ItemsMap {
self.healed_items_map.read().await.clone()
}
async fn _get_heal_failed_items_map(&self) -> ItemsMap {
self.heal_failed_items_map.read().await.clone()
}
pub async fn count_failed(&self, heal_type: HealItemType) {
*self.heal_failed_items_map.write().await.entry(heal_type).or_insert(0) += 1;
*self.last_heal_activity.write().await = SystemTime::now();
}
pub async fn count_scanned(&self, heal_type: HealItemType) {
*self.scanned_items_map.write().await.entry(heal_type).or_insert(0) += 1;
*self.last_heal_activity.write().await = SystemTime::now();
}
pub async fn count_healed(&self, heal_type: HealItemType) {
*self.healed_items_map.write().await.entry(heal_type).or_insert(0) += 1;
*self.last_heal_activity.write().await = SystemTime::now();
}
async fn is_quitting(&self) -> bool {
if let Ok(true) = self.rx.has_changed() {
info!("quited");
return true;
}
false
}
async fn has_ended(&self) -> bool {
if self.client_token == *BG_HEALING_UUID {
return false;
}
*(self.end_time.read().await) != self.start_time
}
async fn stop(&self) {
let _ = self.tx.send(true);
}
async fn push_heal_result_item(&self, r: &HealResultItem) -> Result<()> {
let mut r = r.clone();
let mut interval_timer = interval(HEAL_UNCONSUMED_TIMEOUT);
#[allow(unused_assignments)]
let mut items_len = 0;
loop {
{
let current_status_r = self.current_status.read().await;
items_len = current_status_r.items.len();
}
if items_len == MAX_UNCONSUMED_HEAL_RESULT_ITEMS {
select! {
_ = sleep(Duration::from_secs(1)) => {
}
_ = self.is_done() => {
return Err(Error::other("stopped"));
}
_ = interval_timer.tick() => {
return Err(Error::other("timeout"));
}
}
} else {
break;
}
}
let mut current_status_w = self.current_status.write().await;
if items_len > 0 {
r.result_index = 1 + current_status_w.items[items_len - 1].result_index;
} else {
r.result_index = 1 + *self.last_sent_result_index.read().await;
}
current_status_w.items.push(r);
Ok(())
}
pub async fn queue_heal_task(&self, source: HealSource, heal_type: HealItemType) -> Result<()> {
let mut task = HealTask::new(&source.bucket, &source.object, &source.version_id, &self.setting);
info!("queue_heal_task, {:?}", task);
if let Some(opts) = source.opts {
task.opts = opts;
} else {
task.opts.scan_mode = HEAL_UNKNOWN_SCAN;
}
self.count_scanned(heal_type.clone()).await;
if source.no_wait {
let task_str = format!("{task:?}");
if GLOBAL_BackgroundHealRoutine.tasks_tx.try_send(task).is_ok() {
info!("Task in the queue: {:?}", task_str);
}
return Ok(());
}
let (resp_tx, mut resp_rx) = mpsc::channel(1);
task.resp_tx = Some(resp_tx);
let task_str = format!("{task:?}");
if GLOBAL_BackgroundHealRoutine.tasks_tx.try_send(task).is_ok() {
info!("Task in the queue: {:?}", task_str);
} else {
error!("push task to queue failed");
}
let count_ok_drives = |drivers: &[HealDriveInfo]| {
let mut count = 0;
for drive in drivers.iter() {
if drive.state == DRIVE_STATE_OK {
count += 1;
}
}
count
};
match resp_rx.recv().await {
Some(mut res) => {
if res.err.is_none() {
self.count_healed(heal_type.clone()).await;
} else {
self.count_failed(heal_type.clone()).await;
}
if !self.report_progress {
return if let Some(err) = res.err {
if err.to_string() == ERR_SKIP_FILE {
return Ok(());
}
Err(err)
} else {
Ok(())
};
}
res.result.heal_item_type = heal_type.clone();
if let Some(err) = res.err.as_ref() {
res.result.detail = err.to_string();
}
if res.result.parity_blocks > 0 && res.result.data_blocks > 0 && res.result.data_blocks > res.result.parity_blocks
{
let got = count_ok_drives(&res.result.after.drives);
if got < res.result.parity_blocks {
res.result.detail = format!(
"quorum loss - expected {} minimum, got drive states in OK {}",
res.result.parity_blocks, got
);
}
}
info!("queue_heal_task, HealResult: {:?}", res);
self.push_heal_result_item(&res.result).await
}
None => Ok(()),
}
}
async fn heal_disk_meta(h: Arc<HealSequence>) -> Result<()> {
HealSequence::heal_rustfs_sys_meta(h, CONFIG_PREFIX).await
}
async fn heal_items(h: Arc<HealSequence>, buckets_only: bool) -> Result<()> {
if h.client_token == *BG_HEALING_UUID {
return Ok(());
}
let bucket = h.bucket.clone();
let task1 = Self::heal_disk_meta(h.clone());
let task2 = Self::heal_bucket(h.clone(), &bucket, buckets_only);
let results = join!(task1, task2);
results.0?;
results.1?;
Ok(())
}
async fn traverse_and_heal(h: Arc<HealSequence>) {
let buckets_only = false;
let result = Self::heal_items(h.clone(), buckets_only).await.err();
let _ = h.traverse_and_heal_done_tx.read().await.send(result).await;
}
async fn heal_rustfs_sys_meta(h: Arc<HealSequence>, meta_prefix: &str) -> Result<()> {
info!("heal_rustfs_sys_meta, h: {:?}", h);
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
let setting = h.setting;
store
.heal_objects(RUSTFS_META_BUCKET, meta_prefix, &setting, h.clone(), true)
.await
}
async fn is_done(&self) -> bool {
if let Ok(true) = self.rx.has_changed() {
return true;
}
false
}
pub async fn heal_bucket(hs: Arc<HealSequence>, bucket: &str, bucket_only: bool) -> Result<()> {
info!("heal_bucket, hs: {:?}", hs);
let (object, setting) = {
hs.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
..Default::default()
},
HEAL_ITEM_BUCKET.to_string(),
)
.await?;
if bucket_only {
return Ok(());
}
if !hs.setting.recursive {
if !hs.object.is_empty() {
HealSequence::heal_object(hs.clone(), bucket, &hs.object, "", hs.setting.scan_mode).await?;
}
return Ok(());
}
(hs.object.clone(), hs.setting)
};
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
store.heal_objects(bucket, &object, &setting, hs.clone(), false).await
}
pub async fn heal_object(
hs: Arc<HealSequence>,
bucket: &str,
object: &str,
version_id: &str,
_scan_mode: HealScanMode,
) -> Result<()> {
info!("heal_object");
if hs.is_quitting().await {
info!("heal_object hs is quitting");
return Err(Error::other(ERR_HEAL_STOP_SIGNALLED));
}
info!("will queue task");
hs.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
object: object.to_string(),
version_id: version_id.to_string(),
opts: Some(hs.setting),
..Default::default()
},
HEAL_ITEM_OBJECT.to_string(),
)
.await?;
Ok(())
}
pub async fn heal_meta_object(
hs: Arc<HealSequence>,
bucket: &str,
object: &str,
version_id: &str,
_scan_mode: HealScanMode,
) -> Result<()> {
if hs.is_quitting().await {
return Err(Error::other(ERR_HEAL_STOP_SIGNALLED));
}
hs.queue_heal_task(
HealSource {
bucket: bucket.to_string(),
object: object.to_string(),
version_id: version_id.to_string(),
..Default::default()
},
HEAL_ITEM_BUCKET_METADATA.to_string(),
)
.await?;
Ok(())
}
}
pub async fn heal_sequence_start(h: Arc<HealSequence>) {
{
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_RUNNING_STATUS.to_string();
current_status_w.start_time = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs();
}
let h_clone = h.clone();
spawn(async move {
HealSequence::traverse_and_heal(h_clone).await;
});
let h_clone_1 = h.clone();
let mut x = h.traverse_and_heal_done_rx.write().await;
select! {
_ = h.is_done() => {
*(h.end_time.write().await) = SystemTime::now();
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_FINISHED_STATUS.to_string();
spawn(async move {
let mut rx_w = h_clone_1.traverse_and_heal_done_rx.write().await;
rx_w.recv().await;
});
}
result = x.recv() => {
if let Some(err) = result {
match err {
Some(err) => {
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_STOPPED_STATUS.to_string();
current_status_w.failure_detail = err.to_string();
},
None => {
let mut current_status_w = h.current_status.write().await;
current_status_w.summary = HEAL_FINISHED_STATUS.to_string();
}
}
}
}
}
}
#[derive(Debug, Default)]
pub struct AllHealState {
mu: RwLock<bool>,
heal_seq_map: RwLock<HashMap<String, Arc<HealSequence>>>,
heal_local_disks: RwLock<HashMap<Endpoint, bool>>,
heal_status: RwLock<HashMap<String, HealingTracker>>,
}
impl AllHealState {
pub fn new(cleanup: bool) -> Arc<Self> {
let state = Arc::new(AllHealState::default());
let (_, mut rx) = broadcast::channel(1);
if cleanup {
let state_clone = state.clone();
spawn(async move {
loop {
select! {
result = rx.recv() =>{
if let Ok(true) = result {
return;
}
}
_ = sleep(Duration::from_secs(5 * 60)) => {
state_clone.periodic_heal_seqs_clean().await;
}
}
}
});
}
state
}
pub async fn pop_heal_local_disks(&self, heal_local_disks: &[Endpoint]) {
let _ = self.mu.write().await;
self.heal_local_disks.write().await.retain(|k, _| {
if heal_local_disks.contains(k) {
return false;
}
true
});
let heal_local_disks = heal_local_disks.iter().map(|s| s.to_string()).collect::<Vec<_>>();
self.heal_status.write().await.retain(|_, v| {
if heal_local_disks.contains(&v.endpoint) {
return false;
}
true
});
}
pub async fn pop_heal_status_json(&self, heal_path: &str, client_token: &str) -> Result<Vec<u8>> {
match self.get_heal_sequence(heal_path).await {
Some(h) => {
if client_token != h.client_token {
info!("err heal invalid client token");
return Err(Error::other("err heal invalid client token"));
}
let num_items = h.current_status.read().await.items.len();
let mut last_result_index = *h.last_sent_result_index.read().await;
if num_items > 0 {
if let Some(item) = h.current_status.read().await.items.last() {
last_result_index = item.result_index;
}
}
*h.last_sent_result_index.write().await = last_result_index;
let data = h.current_status.read().await.clone();
match serde_json::to_vec(&data) {
Ok(b) => {
h.current_status.write().await.items.clear();
Ok(b)
}
Err(e) => {
h.current_status.write().await.items.clear();
info!("json encode err, e: {}", e);
Err(Error::other(e.to_string()))
}
}
}
None => serde_json::to_vec(&HealSequenceStatus {
summary: HEAL_FINISHED_STATUS.to_string(),
..Default::default()
})
.map_err(|e| {
info!("json encode err, e: {}", e);
Error::other(e.to_string())
}),
}
}
pub async fn update_heal_status(&self, tracker: &HealingTracker) {
let _ = self.mu.write().await;
let _ = tracker.mu.read().await;
self.heal_status.write().await.insert(tracker.id.clone(), tracker.clone());
}
pub async fn get_local_healing_disks(&self) -> HashMap<String, rustfs_madmin::HealingDisk> {
let _ = self.mu.read().await;
let mut dst = HashMap::new();
for v in self.heal_status.read().await.values() {
dst.insert(v.endpoint.clone(), v.to_healing_disk().await);
}
dst
}
pub async fn get_heal_local_disk_endpoints(&self) -> Endpoints {
let _ = self.mu.read().await;
let mut endpoints = Vec::new();
self.heal_local_disks.read().await.iter().for_each(|(k, v)| {
if !v {
endpoints.push(k.clone());
}
});
Endpoints::from(endpoints)
}
pub async fn set_disk_healing_status(&self, ep: Endpoint, healing: bool) {
let _ = self.mu.write().await;
self.heal_local_disks.write().await.insert(ep, healing);
}
pub async fn push_heal_local_disks(&self, heal_local_disks: &[Endpoint]) {
let _ = self.mu.write().await;
for heal_local_disk in heal_local_disks.iter() {
self.heal_local_disks.write().await.insert(heal_local_disk.clone(), false);
}
}
pub async fn periodic_heal_seqs_clean(&self) {
let _ = self.mu.write().await;
let now = SystemTime::now();
let mut keys_to_remove = Vec::new();
for (k, v) in self.heal_seq_map.read().await.iter() {
if v.has_ended().await && now.duration_since(*(v.end_time.read().await)).unwrap() > KEEP_HEAL_SEQ_STATE_DURATION {
keys_to_remove.push(k.clone())
}
}
for key in keys_to_remove.iter() {
self.heal_seq_map.write().await.remove(key);
}
}
pub async fn get_heal_sequence_by_token(&self, token: &str) -> (Option<Arc<HealSequence>>, bool) {
let _ = self.mu.read().await;
for v in self.heal_seq_map.read().await.values() {
if v.client_token == token {
return (Some(v.clone()), true);
}
}
(None, false)
}
pub async fn get_heal_sequence(&self, path: &str) -> Option<Arc<HealSequence>> {
let _ = self.mu.read().await;
self.heal_seq_map.read().await.get(path).cloned()
}
pub async fn stop_heal_sequence(&self, path: &str) -> Result<Vec<u8>> {
let mut hsp = HealStopSuccess::default();
if let Some(he) = self.get_heal_sequence(path).await {
let client_token = he.client_token.clone();
if *GLOBAL_IsDistErasure.read().await {
// TODO: proxy
}
hsp.client_token = client_token;
hsp.client_address = he.client_address.clone();
hsp.start_time = Utc::now();
he.stop().await;
loop {
if he.has_ended().await {
break;
}
sleep(Duration::from_secs(1)).await;
}
let _ = self.mu.write().await;
self.heal_seq_map.write().await.remove(path);
} else {
hsp.client_token = "unknown".to_string();
}
let b = serde_json::to_string(&hsp)?;
Ok(b.as_bytes().to_vec())
}
// LaunchNewHealSequence - launches a background routine that performs
// healing according to the healSequence argument. For each heal
// sequence, state is stored in the `globalAllHealState`, which is a
// map of the heal path to `healSequence` which holds state about the
// heal sequence.
//
// Heal results are persisted in server memory for
// `keepHealSeqStateDuration`. This function also launches a
// background routine to clean up heal results after the
// aforementioned duration.
pub async fn launch_new_heal_sequence(&self, heal_sequence: Arc<HealSequence>) -> Result<Vec<u8>> {
let path = path_join(&[
PathBuf::from(heal_sequence.bucket.clone()),
PathBuf::from(heal_sequence.object.clone()),
]);
let path_s = path.to_str().unwrap();
if heal_sequence.force_started {
self.stop_heal_sequence(path_s).await?;
} else if let Some(hs) = self.get_heal_sequence(path_s).await {
if !hs.has_ended().await {
return Err(Error::other(format!(
"Heal is already running on the given path (use force-start option to stop and start afresh). The heal was started by IP {} at {:?}, token is {}",
heal_sequence.client_address, heal_sequence.start_time, heal_sequence.client_token
)));
}
}
let _ = self.mu.write().await;
for (k, v) in self.heal_seq_map.read().await.iter() {
if (has_prefix(k, path_s) || has_prefix(path_s, k)) && !v.has_ended().await {
return Err(Error::other(format!(
"The provided heal sequence path overlaps with an existing heal path: {k}"
)));
}
}
self.heal_seq_map
.write()
.await
.insert(path_s.to_string(), heal_sequence.clone());
let client_token = heal_sequence.client_token.clone();
if *GLOBAL_IsDistErasure.read().await {
// TODO: proxy
}
if heal_sequence.client_token == BG_HEALING_UUID {
// For background heal do nothing, do not spawn an unnecessary goroutine.
} else {
let heal_sequence_clone = heal_sequence.clone();
spawn(async {
heal_sequence_start(heal_sequence_clone).await;
});
}
let b = serde_json::to_vec(&HealStartSuccess {
client_token,
client_address: heal_sequence.client_address.clone(),
// start_time: Utc::now(),
start_time: heal_sequence.start_time.into(),
})?;
Ok(b)
}
}

View File

@@ -1,183 +0,0 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::heal::background_heal_ops::{heal_bucket, heal_object};
use crate::heal::heal_commands::{HEAL_DEEP_SCAN, HEAL_NORMAL_SCAN};
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use regex::Regex;
use rustfs_utils::path::SLASH_SEPARATOR;
use std::ops::Sub;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Duration;
use tokio::sync::RwLock;
use tokio::sync::mpsc::{Receiver, Sender};
use tokio::time::sleep;
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
use uuid::Uuid;
pub const MRF_OPS_QUEUE_SIZE: u64 = 100000;
pub const HEAL_DIR: &str = ".heal";
pub const HEAL_MRFMETA_FORMAT: u64 = 1;
pub const HEAL_MRFMETA_VERSION_V1: u64 = 1;
lazy_static! {
pub static ref HEAL_MRF_DIR: String =
format!("{}{}{}{}{}", BUCKET_META_PREFIX, SLASH_SEPARATOR, HEAL_DIR, SLASH_SEPARATOR, "mrf");
static ref PATTERNS: Vec<Regex> = vec![
Regex::new(r"^buckets/.*/.metacache/.*").unwrap(),
Regex::new(r"^tmp/.*").unwrap(),
Regex::new(r"^multipart/.*").unwrap(),
Regex::new(r"^tmp-old/.*").unwrap(),
];
}
#[derive(Default)]
pub struct PartialOperation {
pub bucket: String,
pub object: String,
pub version_id: Option<String>,
pub versions: Vec<u8>,
pub set_index: usize,
pub pool_index: usize,
pub queued: DateTime<Utc>,
pub bitrot_scan: bool,
}
pub struct MRFState {
tx: Sender<PartialOperation>,
rx: RwLock<Receiver<PartialOperation>>,
closed: AtomicBool,
closing: AtomicBool,
}
impl Default for MRFState {
fn default() -> Self {
Self::new()
}
}
impl MRFState {
pub fn new() -> MRFState {
let (tx, rx) = tokio::sync::mpsc::channel(MRF_OPS_QUEUE_SIZE as usize);
MRFState {
tx,
rx: RwLock::new(rx),
closed: Default::default(),
closing: Default::default(),
}
}
pub async fn add_partial(&self, op: PartialOperation) {
if self.closed.load(Ordering::SeqCst) || self.closing.load(Ordering::SeqCst) {
return;
}
let _ = self.tx.send(op).await;
}
/// Enhanced heal routine with cancellation support
///
/// This method implements the same healing logic as the original heal_routine,
/// but adds proper cancellation support via CancellationToken.
/// The core logic remains identical to maintain compatibility.
pub async fn heal_routine_with_cancel(&self, cancel_token: CancellationToken) {
info!("MRF heal routine started with cancellation support");
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("MRF heal routine received shutdown signal, exiting gracefully");
break;
}
op_result = async {
let mut rx_guard = self.rx.write().await;
rx_guard.recv().await
} => {
if let Some(op) = op_result {
// Special path filtering (original logic)
if op.bucket == RUSTFS_META_BUCKET {
for pattern in &*PATTERNS {
if pattern.is_match(&op.object) {
continue; // Skip this operation, continue with next
}
}
}
// Network reconnection delay (original logic)
let now = Utc::now();
if now.sub(op.queued).num_seconds() < 1 {
tokio::select! {
_ = cancel_token.cancelled() => {
info!("MRF heal routine cancelled during reconnection delay");
break;
}
_ = sleep(Duration::from_secs(1)) => {}
}
}
// Core healing logic (original logic preserved)
let scan_mode = if op.bitrot_scan { HEAL_DEEP_SCAN } else { HEAL_NORMAL_SCAN };
if op.object.is_empty() {
// Heal bucket (original logic)
if let Err(err) = heal_bucket(&op.bucket).await {
error!("heal bucket failed, bucket: {}, err: {:?}", op.bucket, err);
}
} else if op.versions.is_empty() {
// Heal single object (original logic)
if let Err(err) = heal_object(
&op.bucket,
&op.object,
&op.version_id.clone().unwrap_or_default(),
scan_mode
).await {
error!("heal object failed, bucket: {}, object: {}, err: {:?}", op.bucket, op.object, err);
}
} else {
// Heal multiple versions (original logic)
let vers = op.versions.len() / 16;
if vers > 0 {
for i in 0..vers {
// Check for cancellation before each version
if cancel_token.is_cancelled() {
info!("MRF heal routine cancelled during version processing");
return;
}
let start = i * 16;
let end = start + 16;
if let Err(err) = heal_object(
&op.bucket,
&op.object,
&Uuid::from_slice(&op.versions[start..end]).expect("").to_string(),
scan_mode,
).await {
error!("heal object failed, bucket: {}, object: {}, err: {:?}", op.bucket, op.object, err);
}
}
}
}
} else {
info!("MRF heal routine channel closed, exiting");
break;
}
}
}
}
info!("MRF heal routine stopped gracefully");
}
}

View File

@@ -23,13 +23,14 @@ mod chunk_stream;
pub mod cmd;
pub mod compress;
pub mod config;
pub mod data_usage;
pub mod disk;
pub mod disks_layout;
pub mod endpoints;
pub mod erasure_coding;
pub mod error;
pub mod global;
pub mod heal;
pub mod lock_utils;
pub mod metrics_realtime;
pub mod notification_sys;
pub mod pools;

View File

@@ -0,0 +1,136 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::disk::endpoint::Endpoint;
use crate::error::Result;
use rustfs_lock::client::{LockClient, local::LocalClient, remote::RemoteClient};
use std::collections::HashMap;
use std::sync::Arc;
/// Create unique lock clients from endpoints
/// This function creates one client per unique host:port combination
/// to avoid duplicate connections to the same server
pub async fn create_unique_clients(endpoints: &[Endpoint]) -> Result<Vec<Arc<dyn LockClient>>> {
let mut unique_endpoints: HashMap<String, &Endpoint> = HashMap::new();
// Collect unique endpoints based on host:port
for endpoint in endpoints {
if endpoint.is_local {
// For local endpoints, use "local" as the key
unique_endpoints.insert("local".to_string(), endpoint);
} else {
// For remote endpoints, use host:port as the key
let host_port = format!(
"{}:{}",
endpoint.url.host_str().unwrap_or("localhost"),
endpoint.url.port().unwrap_or(9000)
);
unique_endpoints.insert(host_port, endpoint);
}
}
let mut clients = Vec::new();
// Create clients for unique endpoints
for (_key, endpoint) in unique_endpoints {
if endpoint.is_local {
// For local endpoints, create a local lock client
let local_client = LocalClient::new();
clients.push(Arc::new(local_client) as Arc<dyn LockClient>);
} else {
// For remote endpoints, create a remote lock client
let remote_client = RemoteClient::new(endpoint.url.to_string());
clients.push(Arc::new(remote_client) as Arc<dyn LockClient>);
}
}
Ok(clients)
}
#[cfg(test)]
mod tests {
use super::*;
use url::Url;
#[tokio::test]
async fn test_create_unique_clients_local() {
let endpoints = vec![
Endpoint {
url: Url::parse("http://localhost:9000").unwrap(),
is_local: true,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
},
Endpoint {
url: Url::parse("http://localhost:9000").unwrap(),
is_local: true,
pool_idx: 0,
set_idx: 0,
disk_idx: 1,
},
];
let clients = create_unique_clients(&endpoints).await.unwrap();
// Should only create one client for local endpoints
assert_eq!(clients.len(), 1);
assert!(clients[0].is_local().await);
}
#[tokio::test]
async fn test_create_unique_clients_mixed() {
let endpoints = vec![
Endpoint {
url: Url::parse("http://localhost:9000").unwrap(),
is_local: true,
pool_idx: 0,
set_idx: 0,
disk_idx: 0,
},
Endpoint {
url: Url::parse("http://remote1:9000").unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 1,
},
Endpoint {
url: Url::parse("http://remote1:9000").unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 2,
},
Endpoint {
url: Url::parse("http://remote2:9000").unwrap(),
is_local: false,
pool_idx: 0,
set_idx: 0,
disk_idx: 3,
},
];
let clients = create_unique_clients(&endpoints).await.unwrap();
// Should create 3 clients: 1 local + 2 unique remote
assert_eq!(clients.len(), 3);
// Check that we have one local client
let local_count = clients.iter().filter(|c| futures::executor::block_on(c.is_local())).count();
assert_eq!(local_count, 1);
// Check that we have two remote clients
let remote_count = clients.iter().filter(|c| !futures::executor::block_on(c.is_local())).count();
assert_eq!(remote_count, 2);
}
}

View File

@@ -15,7 +15,11 @@
use std::collections::{HashMap, HashSet};
use chrono::Utc;
use rustfs_common::globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Addr};
use rustfs_common::{
globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Addr},
heal_channel::DriveState,
metrics::globalMetrics,
};
use rustfs_madmin::metrics::{DiskIOStats, DiskMetric, RealtimeMetrics};
use rustfs_utils::os::get_drive_stats;
use serde::{Deserialize, Serialize};
@@ -23,10 +27,6 @@ use tracing::info;
use crate::{
admin_server_info::get_local_server_property,
heal::{
data_scanner_metric::globalScannerMetrics,
heal_commands::{DRIVE_STATE_OK, DRIVE_STATE_UNFORMATTED},
},
new_object_layer_fn,
store_api::StorageAPI,
// utils::os::get_drive_stats,
@@ -108,7 +108,7 @@ pub async fn collect_local_metrics(types: MetricType, opts: &CollectMetricsOpts)
if types.contains(&MetricType::SCANNER) {
info!("start get scanner metrics");
let metrics = globalScannerMetrics.report().await;
let metrics = globalMetrics.report().await;
real_time_metrics.aggregated.scanner = Some(metrics);
}
@@ -147,7 +147,7 @@ async fn collect_local_disks_metrics(disks: &HashSet<String>) -> HashMap<String,
continue;
}
if d.state != *DRIVE_STATE_OK && d.state != *DRIVE_STATE_UNFORMATTED {
if d.state != DriveState::Ok.to_string() && d.state != DriveState::Unformatted.to_string() {
metrics.insert(
d.endpoint.clone(),
DiskMetric {

View File

@@ -15,6 +15,7 @@
use crate::bucket::versioning_sys::BucketVersioningSys;
use crate::cache_value::metacache_set::{ListPathRawOptions, list_path_raw};
use crate::config::com::{CONFIG_PREFIX, read_config, save_config};
use crate::data_usage::DATA_USAGE_CACHE_NAME;
use crate::disk::error::DiskError;
use crate::disk::{BUCKET_META_PREFIX, RUSTFS_META_BUCKET};
use crate::error::{Error, Result};
@@ -22,8 +23,6 @@ use crate::error::{
StorageError, is_err_bucket_exists, is_err_bucket_not_found, is_err_data_movement_overwrite, is_err_object_not_found,
is_err_version_not_found,
};
use crate::heal::data_usage::DATA_USAGE_CACHE_NAME;
use crate::heal::heal_commands::HealOpts;
use crate::new_object_layer_fn;
use crate::notification_sys::get_global_notification_sys;
use crate::set_disk::SetDisks;
@@ -36,6 +35,7 @@ use futures::future::BoxFuture;
use http::HeaderMap;
use rmp_serde::{Deserializer, Serializer};
use rustfs_common::defer;
use rustfs_common::heal_channel::HealOpts;
use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams};
use rustfs_rio::{HashReader, WarpReader};
use rustfs_utils::path::{SLASH_SEPARATOR, encode_dir_object, path_join};
@@ -1355,7 +1355,7 @@ impl SetDisks {
disks: disks.iter().cloned().map(Some).collect(),
bucket: bucket_info.name.clone(),
path: bucket_info.prefix.clone(),
recursice: true,
recursive: true,
min_disks: listing_quorum,
agreed: Some(Box::new(move |entry: MetaCacheEntry| Box::pin(cb1(entry)))),
partial: Some(Box::new(move |entries: MetaCacheEntries, _: &[Option<DiskError>]| {

View File

@@ -1172,7 +1172,7 @@ impl SetDisks {
ListPathRawOptions {
disks: disks.iter().cloned().map(Some).collect(),
bucket: bucket.clone(),
recursice: true,
recursive: true,
min_disks: listing_quorum,
agreed: Some(Box::new(move |entry: MetaCacheEntry| {
info!("list_objects_to_rebalance: agreed: {:?}", &entry.name);

View File

@@ -22,4 +22,4 @@ pub use http_auth::{build_auth_headers, verify_rpc_signature};
pub use peer_rest_client::PeerRestClient;
pub use peer_s3_client::{LocalPeerS3Client, PeerS3Client, RemotePeerS3Client, S3PeerSys};
pub use remote_disk::RemoteDisk;
pub use tonic_service::make_server;
pub use tonic_service::{NodeService, make_server};

View File

@@ -16,7 +16,6 @@ use crate::error::{Error, Result};
use crate::{
endpoints::EndpointServerPools,
global::is_dist_erasure,
heal::heal_commands::BgHealState,
metrics_realtime::{CollectMetricsOpts, MetricType},
};
use rmp_serde::{Deserializer, Serializer};
@@ -29,13 +28,12 @@ use rustfs_madmin::{
use rustfs_protos::{
node_service_time_out_client,
proto_gen::node_service::{
BackgroundHealStatusRequest, DeleteBucketMetadataRequest, DeletePolicyRequest, DeleteServiceAccountRequest,
DeleteUserRequest, GetCpusRequest, GetMemInfoRequest, GetMetricsRequest, GetNetInfoRequest, GetOsInfoRequest,
GetPartitionsRequest, GetProcInfoRequest, GetSeLinuxInfoRequest, GetSysConfigRequest, GetSysErrorsRequest,
LoadBucketMetadataRequest, LoadGroupRequest, LoadPolicyMappingRequest, LoadPolicyRequest, LoadRebalanceMetaRequest,
LoadServiceAccountRequest, LoadTransitionTierConfigRequest, LoadUserRequest, LocalStorageInfoRequest, Mss,
ReloadPoolMetaRequest, ReloadSiteReplicationConfigRequest, ServerInfoRequest, SignalServiceRequest,
StartProfilingRequest, StopRebalanceRequest,
DeleteBucketMetadataRequest, DeletePolicyRequest, DeleteServiceAccountRequest, DeleteUserRequest, GetCpusRequest,
GetMemInfoRequest, GetMetricsRequest, GetNetInfoRequest, GetOsInfoRequest, GetPartitionsRequest, GetProcInfoRequest,
GetSeLinuxInfoRequest, GetSysConfigRequest, GetSysErrorsRequest, LoadBucketMetadataRequest, LoadGroupRequest,
LoadPolicyMappingRequest, LoadPolicyRequest, LoadRebalanceMetaRequest, LoadServiceAccountRequest,
LoadTransitionTierConfigRequest, LoadUserRequest, LocalStorageInfoRequest, Mss, ReloadPoolMetaRequest,
ReloadSiteReplicationConfigRequest, ServerInfoRequest, SignalServiceRequest, StartProfilingRequest, StopRebalanceRequest,
},
};
use rustfs_utils::XHost;
@@ -601,27 +599,6 @@ impl PeerRestClient {
Ok(())
}
pub async fn background_heal_status(&self) -> Result<BgHealState> {
let mut client = node_service_time_out_client(&self.grid_host)
.await
.map_err(|err| Error::other(err.to_string()))?;
let request = Request::new(BackgroundHealStatusRequest {});
let response = client.background_heal_status(request).await?.into_inner();
if !response.success {
if let Some(msg) = response.error_info {
return Err(Error::other(msg));
}
return Err(Error::other(""));
}
let data = response.bg_heal_state;
let mut buf = Deserializer::new(Cursor::new(data));
let bg_heal_state: BgHealState = Deserialize::deserialize(&mut buf)?;
Ok(bg_heal_state)
}
pub async fn get_metacache_listing(&self) -> Result<()> {
let _client = node_service_time_out_client(&self.grid_host)
.await

View File

@@ -17,10 +17,6 @@ use crate::disk::error::{Error, Result};
use crate::disk::error_reduce::{BUCKET_OP_IGNORED_ERRS, is_all_buckets_not_found, reduce_write_quorum_errs};
use crate::disk::{DiskAPI, DiskStore};
use crate::global::GLOBAL_LOCAL_DISK_MAP;
use crate::heal::heal_commands::{
DRIVE_STATE_CORRUPT, DRIVE_STATE_MISSING, DRIVE_STATE_OFFLINE, DRIVE_STATE_OK, HEAL_ITEM_BUCKET, HealOpts,
};
use crate::heal::heal_ops::RUSTFS_RESERVED_BUCKET;
use crate::store::all_local_disk;
use crate::store_utils::is_reserved_or_invalid_bucket;
use crate::{
@@ -30,6 +26,7 @@ use crate::{
};
use async_trait::async_trait;
use futures::future::join_all;
use rustfs_common::heal_channel::{DriveState, HealItemType, HealOpts, RUSTFS_RESERVED_BUCKET};
use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem};
use rustfs_protos::node_service_time_out_client;
use rustfs_protos::proto_gen::node_service::{
@@ -449,7 +446,7 @@ impl PeerS3Client for LocalPeerS3Client {
op.as_ref().map(|v| BucketInfo {
name: v.name.clone(),
created: v.created,
versionning: versioned,
versioning: versioned,
..Default::default()
})
})
@@ -542,7 +539,7 @@ impl PeerS3Client for RemotePeerS3Client {
}
Ok(HealResultItem {
heal_item_type: HEAL_ITEM_BUCKET.to_string(),
heal_item_type: HealItemType::Bucket.to_string(),
bucket: bucket.to_string(),
set_count: 0,
..Default::default()
@@ -651,13 +648,13 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
let disk = match disk {
Some(disk) => disk,
None => {
bs_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
as_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
bs_clone.write().await[index] = DriveState::Offline.to_string();
as_clone.write().await[index] = DriveState::Offline.to_string();
return Some(Error::DiskNotFound);
}
};
bs_clone.write().await[index] = DRIVE_STATE_OK.to_string();
as_clone.write().await[index] = DRIVE_STATE_OK.to_string();
bs_clone.write().await[index] = DriveState::Ok.to_string();
as_clone.write().await[index] = DriveState::Ok.to_string();
if bucket == RUSTFS_RESERVED_BUCKET {
return None;
@@ -667,18 +664,18 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
Ok(_) => None,
Err(err) => match err {
Error::DiskNotFound => {
bs_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
as_clone.write().await[index] = DRIVE_STATE_OFFLINE.to_string();
bs_clone.write().await[index] = DriveState::Offline.to_string();
as_clone.write().await[index] = DriveState::Offline.to_string();
Some(err)
}
Error::VolumeNotFound => {
bs_clone.write().await[index] = DRIVE_STATE_MISSING.to_string();
as_clone.write().await[index] = DRIVE_STATE_MISSING.to_string();
bs_clone.write().await[index] = DriveState::Missing.to_string();
as_clone.write().await[index] = DriveState::Missing.to_string();
Some(err)
}
_ => {
bs_clone.write().await[index] = DRIVE_STATE_CORRUPT.to_string();
as_clone.write().await[index] = DRIVE_STATE_CORRUPT.to_string();
bs_clone.write().await[index] = DriveState::Corrupt.to_string();
as_clone.write().await[index] = DriveState::Corrupt.to_string();
Some(err)
}
},
@@ -687,7 +684,7 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
}
let errs = join_all(futures).await;
let mut res = HealResultItem {
heal_item_type: HEAL_ITEM_BUCKET.to_string(),
heal_item_type: HealItemType::Bucket.to_string(),
bucket: bucket.to_string(),
disk_count: disks.len(),
set_count: 0,
@@ -736,11 +733,11 @@ pub async fn heal_bucket_local(bucket: &str, opts: &HealOpts) -> Result<HealResu
let as_clone = after_state.clone();
let errs_clone = errs.to_vec();
futures.push(async move {
if bs_clone.read().await[idx] == DRIVE_STATE_MISSING {
if bs_clone.read().await[idx] == DriveState::Missing.to_string() {
info!("bucket not find, will recreate");
match disk.as_ref().unwrap().make_volume(&bucket).await {
Ok(_) => {
as_clone.write().await[idx] = DRIVE_STATE_OK.to_string();
as_clone.write().await[idx] = DriveState::Ok.to_string();
return None;
}
Err(err) => {

View File

@@ -21,9 +21,9 @@ use rustfs_protos::{
node_service_time_out_client,
proto_gen::node_service::{
CheckPartsRequest, DeletePathsRequest, DeleteRequest, DeleteVersionRequest, DeleteVersionsRequest, DeleteVolumeRequest,
DiskInfoRequest, ListDirRequest, ListVolumesRequest, MakeVolumeRequest, MakeVolumesRequest, NsScannerRequest,
ReadAllRequest, ReadMultipleRequest, ReadPartsRequest, ReadVersionRequest, ReadXlRequest, RenameDataRequest,
RenameFileRequest, StatVolumeRequest, UpdateMetadataRequest, VerifyFileRequest, WriteAllRequest, WriteMetadataRequest,
DiskInfoRequest, ListDirRequest, ListVolumesRequest, MakeVolumeRequest, MakeVolumesRequest, ReadAllRequest,
ReadMultipleRequest, ReadPartsRequest, ReadVersionRequest, ReadXlRequest, RenameDataRequest, RenameFileRequest,
StatVolumeRequest, UpdateMetadataRequest, VerifyFileRequest, WriteAllRequest, WriteMetadataRequest,
},
};
@@ -32,26 +32,15 @@ use crate::disk::{
ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, UpdateMetadataOpts, VolumeInfo, WalkDirOptions,
endpoint::Endpoint,
};
use crate::disk::{FileReader, FileWriter};
use crate::{
disk::error::{Error, Result},
rpc::build_auth_headers,
};
use crate::{
disk::{FileReader, FileWriter},
heal::{
data_scanner::ShouldSleepFn,
data_usage_cache::{DataUsageCache, DataUsageEntry},
heal_commands::{HealScanMode, HealingTracker},
},
};
use rustfs_filemeta::{FileInfo, ObjectPartInfo, RawFileInfo};
use rustfs_protos::proto_gen::node_service::RenamePartRequest;
use rustfs_rio::{HttpReader, HttpWriter};
use tokio::{
io::AsyncWrite,
sync::mpsc::{self, Sender},
};
use tokio_stream::{StreamExt, wrappers::ReceiverStream};
use tokio::io::AsyncWrite;
use tonic::Request;
use tracing::info;
use uuid::Uuid;
@@ -927,55 +916,6 @@ impl DiskAPI for RemoteDisk {
Ok(disk_info)
}
#[tracing::instrument(skip(self, cache, scan_mode, _we_sleep))]
async fn ns_scanner(
&self,
cache: &DataUsageCache,
updates: Sender<DataUsageEntry>,
scan_mode: HealScanMode,
_we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
info!("ns_scanner");
let cache = serde_json::to_string(cache)?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::other(format!("can not get client, err: {err}")))?;
let (tx, rx) = mpsc::channel(10);
let in_stream = ReceiverStream::new(rx);
let mut response = client.ns_scanner(in_stream).await?.into_inner();
let request = NsScannerRequest {
disk: self.endpoint.to_string(),
cache,
scan_mode: scan_mode as u64,
};
tx.send(request)
.await
.map_err(|err| Error::other(format!("can not send request, err: {err}")))?;
loop {
match response.next().await {
Some(Ok(resp)) => {
if !resp.update.is_empty() {
let data_usage_cache = serde_json::from_str::<DataUsageEntry>(&resp.update)?;
let _ = updates.send(data_usage_cache).await;
} else if !resp.data_usage_cache.is_empty() {
let data_usage_cache = serde_json::from_str::<DataUsageCache>(&resp.data_usage_cache)?;
return Ok(data_usage_cache);
} else {
return Err(Error::other("scan was interrupted"));
}
}
_ => return Err(Error::other("scan was interrupted")),
}
}
}
#[tracing::instrument(skip(self))]
async fn healing(&self) -> Option<HealingTracker> {
None
}
}
#[cfg(test)]

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashMap, io::Cursor, pin::Pin};
use std::{collections::HashMap, io::Cursor, pin::Pin, sync::Arc};
// use common::error::Error as EcsError;
use crate::{
@@ -22,25 +22,21 @@ use crate::{
DeleteOptions, DiskAPI, DiskInfoOptions, DiskStore, FileInfoVersions, ReadMultipleReq, ReadOptions, UpdateMetadataOpts,
error::DiskError,
},
heal::{
data_usage_cache::DataUsageCache,
heal_commands::{HealOpts, get_local_background_heal_status},
},
metrics_realtime::{CollectMetricsOpts, MetricType, collect_local_metrics},
new_object_layer_fn,
rpc::{LocalPeerS3Client, PeerS3Client},
store::{all_local_disk_path, find_local_disk},
store_api::{BucketOptions, DeleteBucketOptions, MakeBucketOptions, StorageAPI},
};
use futures::{Stream, StreamExt};
use futures::Stream;
use futures_util::future::join_all;
use rustfs_lock::{GLOBAL_LOCAL_SERVER, Locker, lock_args::LockArgs};
use rustfs_common::globals::GLOBAL_Local_Node_Name;
use rustfs_common::{globals::GLOBAL_Local_Node_Name, heal_channel::HealOpts};
use bytes::Bytes;
use rmp_serde::{Deserializer, Serializer};
use rustfs_filemeta::{FileInfo, MetacacheReader};
use rustfs_lock::{LockClient, LockRequest};
use rustfs_madmin::health::{
get_cpus, get_mem_info, get_os_info, get_partitions, get_proc_info, get_sys_config, get_sys_errors, get_sys_services,
};
@@ -81,11 +77,16 @@ type ResponseStream<T> = Pin<Box<dyn Stream<Item = Result<T, tonic::Status>> + S
#[derive(Debug)]
pub struct NodeService {
local_peer: LocalPeerS3Client,
lock_manager: Arc<rustfs_lock::LocalClient>,
}
pub fn make_server() -> NodeService {
let local_peer = LocalPeerS3Client::new(None, None);
NodeService { local_peer }
let lock_manager = Arc::new(rustfs_lock::LocalClient::new());
NodeService {
local_peer,
lock_manager,
}
}
impl NodeService {
@@ -1434,214 +1435,158 @@ impl Node for NodeService {
}
}
type NsScannerStream = ResponseStream<NsScannerResponse>;
async fn ns_scanner(&self, request: Request<Streaming<NsScannerRequest>>) -> Result<Response<Self::NsScannerStream>, Status> {
info!("ns_scanner");
let mut in_stream = request.into_inner();
let (tx, rx) = mpsc::channel(10);
tokio::spawn(async move {
match in_stream.next().await {
Some(Ok(request)) => {
if let Some(disk) = find_local_disk(&request.disk).await {
let cache = match serde_json::from_str::<DataUsageCache>(&request.cache) {
Ok(cache) => cache,
Err(err) => {
tx.send(Ok(NsScannerResponse {
success: false,
update: "".to_string(),
data_usage_cache: "".to_string(),
error: Some(DiskError::other(format!("decode DataUsageCache failed: {err}")).into()),
}))
.await
.expect("working rx");
return;
}
};
let (updates_tx, mut updates_rx) = mpsc::channel(100);
let tx_clone = tx.clone();
let task = tokio::spawn(async move {
loop {
match updates_rx.recv().await {
Some(update) => {
let update = serde_json::to_string(&update).expect("encode failed");
tx_clone
.send(Ok(NsScannerResponse {
success: true,
update,
data_usage_cache: "".to_string(),
error: None,
}))
.await
.expect("working rx");
}
None => return,
}
}
});
let data_usage_cache = disk.ns_scanner(&cache, updates_tx, request.scan_mode as usize, None).await;
let _ = task.await;
match data_usage_cache {
Ok(data_usage_cache) => {
let data_usage_cache = serde_json::to_string(&data_usage_cache).expect("encode failed");
tx.send(Ok(NsScannerResponse {
success: true,
update: "".to_string(),
data_usage_cache,
error: None,
}))
.await
.expect("working rx");
}
Err(err) => {
tx.send(Ok(NsScannerResponse {
success: false,
update: "".to_string(),
data_usage_cache: "".to_string(),
error: Some(err.into()),
}))
.await
.expect("working rx");
}
}
} else {
tx.send(Ok(NsScannerResponse {
success: false,
update: "".to_string(),
data_usage_cache: "".to_string(),
error: Some(DiskError::other("can not find disk".to_string()).into()),
}))
.await
.expect("working rx");
}
}
_ => todo!(),
}
});
let out_stream = ReceiverStream::new(rx);
Ok(tonic::Response::new(Box::pin(out_stream)))
}
async fn lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.lock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
// Parse the request to extract resource and owner
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not lock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.acquire_exclusive(&args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not lock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.unlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not unlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn r_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.rlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not rlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.acquire_shared(&args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result.success,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not rlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn r_un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.runlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not runlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not runlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn force_un_lock(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.force_unlock(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not force_unlock, args: {args}, err: {err}")),
})),
},
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
match self.lock_manager.release(&args.lock_id).await {
Ok(_) => Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
error_info: Some(format!(
"can not force_unlock, resource: {0}, owner: {1}, err: {2}",
args.resource, args.owner, err
)),
})),
}
}
async fn refresh(&self, request: Request<GenerallyLockRequest>) -> Result<Response<GenerallyLockResponse>, Status> {
let request = request.into_inner();
match &serde_json::from_str::<LockArgs>(&request.args) {
Ok(args) => match GLOBAL_LOCAL_SERVER.write().await.refresh(args).await {
Ok(result) => Ok(tonic::Response::new(GenerallyLockResponse {
success: result,
error_info: None,
})),
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
let _args: LockRequest = match serde_json::from_str(&request.args) {
Ok(args) => args,
Err(err) => {
return Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not refresh, args: {args}, err: {err}")),
})),
},
Err(err) => Ok(tonic::Response::new(GenerallyLockResponse {
success: false,
error_info: Some(format!("can not decode args, err: {err}")),
})),
}
error_info: Some(format!("can not decode args, err: {err}")),
}));
}
};
Ok(tonic::Response::new(GenerallyLockResponse {
success: true,
error_info: None,
}))
}
async fn local_storage_info(
@@ -2157,28 +2102,7 @@ impl Node for NodeService {
&self,
_request: Request<BackgroundHealStatusRequest>,
) -> Result<Response<BackgroundHealStatusResponse>, Status> {
let (state, ok) = get_local_background_heal_status().await;
if !ok {
return Ok(tonic::Response::new(BackgroundHealStatusResponse {
success: false,
bg_heal_state: Bytes::new(),
error_info: Some("errServerNotInitialized".to_string()),
}));
}
let mut buf = Vec::new();
if let Err(err) = state.serialize(&mut Serializer::new(&mut buf)) {
return Ok(tonic::Response::new(BackgroundHealStatusResponse {
success: false,
bg_heal_state: Bytes::new(),
error_info: Some(err.to_string()),
}));
}
Ok(tonic::Response::new(BackgroundHealStatusResponse {
success: true,
bg_heal_state: buf.into(),
error_info: None,
}))
todo!()
}
async fn get_metacache_listing(
@@ -3373,20 +3297,6 @@ mod tests {
assert!(!proc_response.proc_info.is_empty());
}
#[tokio::test]
async fn test_background_heal_status() {
let service = create_test_node_service();
let request = Request::new(BackgroundHealStatusRequest {});
let response = service.background_heal_status(request).await;
assert!(response.is_ok());
let heal_response = response.unwrap().into_inner();
// May fail if heal status is not available
assert!(heal_response.success || heal_response.error_info.is_some());
}
#[tokio::test]
async fn test_reload_pool_meta() {
let service = create_test_node_service();
@@ -3629,15 +3539,15 @@ mod tests {
// Note: signal_service test is skipped because it contains todo!() and would panic
#[test]
fn test_node_service_debug() {
#[tokio::test]
async fn test_node_service_debug() {
let service = create_test_node_service();
let debug_str = format!("{service:?}");
assert!(debug_str.contains("NodeService"));
}
#[test]
fn test_node_service_creation() {
#[tokio::test]
async fn test_node_service_creation() {
let service1 = make_server();
let service2 = make_server();
@@ -3646,14 +3556,6 @@ mod tests {
assert!(format!("{service2:?}").contains("NodeService"));
}
#[tokio::test]
async fn test_all_disk_method() {
let service = create_test_node_service();
let disks = service.all_disk().await;
// Should return empty vector in test environment
assert!(disks.is_empty());
}
#[tokio::test]
async fn test_find_disk_method() {
let service = create_test_node_service();

File diff suppressed because it is too large Load Diff

View File

@@ -28,9 +28,6 @@ use crate::{
endpoints::{Endpoints, PoolEndpoints},
error::StorageError,
global::{GLOBAL_LOCAL_DISK_SET_DRIVES, is_dist_erasure},
heal::heal_commands::{
DRIVE_STATE_CORRUPT, DRIVE_STATE_MISSING, DRIVE_STATE_OFFLINE, DRIVE_STATE_OK, HEAL_ITEM_METADATA, HealOpts,
},
set_disk::SetDisks,
store_api::{
BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, GetObjectReader, HTTPRangeSpec,
@@ -41,31 +38,35 @@ use crate::{
};
use futures::future::join_all;
use http::HeaderMap;
use rustfs_common::globals::GLOBAL_Local_Node_Name;
use rustfs_common::heal_channel::HealOpts;
use rustfs_common::{
globals::GLOBAL_Local_Node_Name,
heal_channel::{DriveState, HealItemType},
};
use rustfs_filemeta::FileInfo;
use rustfs_lock::{LockApi, namespace_lock::NsLockMap, new_lock_api};
use rustfs_madmin::heal_commands::{HealDriveInfo, HealResultItem};
use rustfs_utils::{crc_hash, path::path_join_buf, sip_hash};
use tokio::sync::RwLock;
use uuid::Uuid;
use crate::heal::heal_ops::HealSequence;
use tokio::sync::broadcast::{Receiver, Sender};
use tokio::time::Duration;
use tracing::warn;
use tracing::{error, info};
use crate::lock_utils::create_unique_clients;
#[derive(Debug, Clone)]
pub struct Sets {
pub id: Uuid,
// pub sets: Vec<Objects>,
// pub disk_set: Vec<Vec<Option<DiskStore>>>, // [set_count_idx][set_drive_count_idx] = disk_idx
pub lockers: Vec<Vec<LockApi>>,
pub disk_set: Vec<Arc<SetDisks>>, // [set_count_idx][set_drive_count_idx] = disk_idx
pub pool_idx: usize,
pub endpoints: PoolEndpoints,
pub format: FormatV3,
pub partiy_count: usize,
pub parity_count: usize,
pub set_count: usize,
pub set_drive_count: usize,
pub default_parity_count: usize,
@@ -82,38 +83,36 @@ impl Drop for Sets {
}
impl Sets {
#[tracing::instrument(level = "debug", skip(disks, endpoints, fm, pool_idx, partiy_count))]
#[tracing::instrument(level = "debug", skip(disks, endpoints, fm, pool_idx, parity_count))]
pub async fn new(
disks: Vec<Option<DiskStore>>,
endpoints: &PoolEndpoints,
fm: &FormatV3,
pool_idx: usize,
partiy_count: usize,
parity_count: usize,
) -> Result<Arc<Self>> {
let set_count = fm.erasure.sets.len();
let set_drive_count = fm.erasure.sets[0].len();
let mut unique: Vec<Vec<String>> = vec![vec![]; set_count];
let mut lockers: Vec<Vec<LockApi>> = vec![vec![]; set_count];
endpoints.endpoints.as_ref().iter().enumerate().for_each(|(idx, endpoint)| {
let mut unique: Vec<Vec<String>> = (0..set_count).map(|_| vec![]).collect();
for (idx, endpoint) in endpoints.endpoints.as_ref().iter().enumerate() {
let set_idx = idx / set_drive_count;
if endpoint.is_local && !unique[set_idx].contains(&"local".to_string()) {
unique[set_idx].push("local".to_string());
lockers[set_idx].push(new_lock_api(true, None));
}
if !endpoint.is_local {
let host_port = format!("{}:{}", endpoint.url.host_str().unwrap(), endpoint.url.port().unwrap());
if !unique[set_idx].contains(&host_port) {
unique[set_idx].push(host_port);
lockers[set_idx].push(new_lock_api(false, Some(endpoint.url.clone())));
}
}
});
}
let mut disk_set = Vec::with_capacity(set_count);
for (i, locker) in lockers.iter().enumerate().take(set_count) {
for i in 0..set_count {
let mut set_drive = Vec::with_capacity(set_drive_count);
let mut set_endpoints = Vec::with_capacity(set_drive_count);
for j in 0..set_drive_count {
@@ -121,7 +120,6 @@ impl Sets {
let mut disk = disks[idx].clone();
let endpoint = endpoints.endpoints.as_ref()[idx].clone();
// let endpoint = endpoints.endpoints.as_ref().get(idx).cloned();
set_endpoints.push(endpoint);
if disk.is_none() {
@@ -165,15 +163,16 @@ impl Sets {
}
}
// warn!("sets new set_drive {:?}", &set_drive);
let lock_clients = create_unique_clients(&set_endpoints).await?;
let namespace_lock = rustfs_lock::NamespaceLock::with_clients(format!("set-{i}"), lock_clients);
let set_disks = SetDisks::new(
locker.clone(),
Arc::new(namespace_lock),
GLOBAL_Local_Node_Name.read().await.to_string(),
Arc::new(RwLock::new(NsLockMap::new(is_dist_erasure().await))),
Arc::new(RwLock::new(set_drive)),
set_drive_count,
partiy_count,
parity_count,
i,
pool_idx,
set_endpoints,
@@ -190,14 +189,13 @@ impl Sets {
id: fm.id,
// sets: todo!(),
disk_set,
lockers,
pool_idx,
endpoints: endpoints.clone(),
format: fm.clone(),
partiy_count,
parity_count,
set_count,
set_drive_count,
default_parity_count: partiy_count,
default_parity_count: parity_count,
distribution_algo: fm.erasure.distribution_algo.clone(),
exit_signal: Some(tx),
});
@@ -543,7 +541,7 @@ impl StorageAPI for Sets {
objects: Vec<ObjectToDelete>,
opts: ObjectOptions,
) -> Result<(Vec<DeletedObject>, Vec<Option<Error>>)> {
// 默认返回值
// Default return value
let mut del_objects = vec![DeletedObject::default(); objects.len()];
let mut del_errs = Vec::with_capacity(objects.len());
@@ -602,7 +600,7 @@ impl StorageAPI for Sets {
// del_errs.extend(errs);
// }
// TODO: 并发
// TODO: Implement concurrency
for (k, v) in set_obj_map {
let disks = self.get_disks(k);
let objs: Vec<ObjectToDelete> = v.iter().map(|v| v.obj.clone()).collect();
@@ -789,7 +787,7 @@ impl StorageAPI for Sets {
Err(err) => return Ok((HealResultItem::default(), Some(err))),
};
let mut res = HealResultItem {
heal_item_type: HEAL_ITEM_METADATA.to_string(),
heal_item_type: HealItemType::Metadata.to_string(),
detail: "disk-format".to_string(),
disk_count: self.set_count * self.set_drive_count,
set_count: self.set_count,
@@ -813,7 +811,6 @@ impl StorageAPI for Sets {
// return Ok((res, Some(Error::new(DiskError::CorruptedFormat))));
// }
let format_op_id = Uuid::new_v4().to_string();
let (new_format_sets, _) = new_heal_format_sets(&ref_format, self.set_count, self.set_drive_count, &formats, &errs);
if !dry_run {
let mut tmp_new_formats = vec![None; self.set_count * self.set_drive_count];
@@ -821,14 +818,14 @@ impl StorageAPI for Sets {
for (j, fm) in set.iter().enumerate() {
if let Some(fm) = fm {
res.after.drives[i * self.set_drive_count + j].uuid = fm.erasure.this.to_string();
res.after.drives[i * self.set_drive_count + j].state = DRIVE_STATE_OK.to_string();
res.after.drives[i * self.set_drive_count + j].state = DriveState::Ok.to_string();
tmp_new_formats[i * self.set_drive_count + j] = Some(fm.clone());
}
}
}
// Save new formats `format.json` on unformatted disks.
for (fm, disk) in tmp_new_formats.iter_mut().zip(disks.iter()) {
if fm.is_some() && disk.is_some() && save_format_file(disk, fm, &format_op_id).await.is_err() {
if fm.is_some() && disk.is_some() && save_format_file(disk, fm).await.is_err() {
let _ = disk.as_ref().unwrap().close().await;
*fm = None;
}
@@ -871,17 +868,6 @@ impl StorageAPI for Sets {
.await
}
#[tracing::instrument(skip(self))]
async fn heal_objects(
&self,
_bucket: &str,
_prefix: &str,
_opts: &HealOpts,
_hs: Arc<HealSequence>,
_is_meta: bool,
) -> Result<()> {
unimplemented!()
}
#[tracing::instrument(skip(self))]
async fn get_pool_and_set(&self, _id: &str) -> Result<(Option<usize>, Option<usize>, Option<usize>)> {
unimplemented!()
}
@@ -889,6 +875,13 @@ impl StorageAPI for Sets {
async fn check_abandoned_parts(&self, _bucket: &str, _object: &str, _opts: &HealOpts) -> Result<()> {
unimplemented!()
}
#[tracing::instrument(skip(self))]
async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()> {
self.get_disks_by_key(object)
.verify_object_integrity(bucket, object, opts)
.await
}
}
async fn _close_storage_disks(disks: &[Option<DiskStore>]) {
@@ -959,17 +952,17 @@ fn formats_to_drives_info(endpoints: &Endpoints, formats: &[Option<FormatV3>], e
for (index, format) in formats.iter().enumerate() {
let drive = endpoints.get_string(index);
let state = if format.is_some() {
DRIVE_STATE_OK
DriveState::Ok.to_string()
} else if let Some(Some(err)) = errs.get(index) {
if *err == DiskError::UnformattedDisk {
DRIVE_STATE_MISSING
DriveState::Missing.to_string()
} else if *err == DiskError::DiskNotFound {
DRIVE_STATE_OFFLINE
DriveState::Offline.to_string()
} else {
DRIVE_STATE_CORRUPT
DriveState::Corrupt.to_string()
}
} else {
DRIVE_STATE_CORRUPT
DriveState::Corrupt.to_string()
};
let uuid = if let Some(format) = format {

View File

@@ -30,11 +30,6 @@ use crate::global::{
GLOBAL_LOCAL_DISK_MAP, GLOBAL_LOCAL_DISK_SET_DRIVES, GLOBAL_TierConfigMgr, get_global_endpoints, is_dist_erasure,
is_erasure_sd, set_global_deployment_id, set_object_layer,
};
use crate::heal::data_usage::{DATA_USAGE_ROOT, DataUsageInfo};
use crate::heal::data_usage_cache::{DataUsageCache, DataUsageCacheInfo};
use crate::heal::heal_commands::{HEAL_ITEM_METADATA, HealOpts, HealScanMode};
use crate::heal::heal_ops::{HealEntryFn, HealSequence};
use crate::new_object_layer_fn;
use crate::notification_sys::get_global_notification_sys;
use crate::pools::PoolMeta;
use crate::rebalance::RebalanceMeta;
@@ -54,13 +49,12 @@ use crate::{
store_init,
};
use futures::future::join_all;
use glob::Pattern;
use http::HeaderMap;
use lazy_static::lazy_static;
use rand::Rng as _;
use rustfs_common::globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Host, GLOBAL_Rustfs_Port};
use rustfs_common::heal_channel::{HealItemType, HealOpts};
use rustfs_filemeta::FileInfo;
use rustfs_filemeta::MetaCacheEntry;
use rustfs_madmin::heal_commands::HealResultItem;
use rustfs_utils::crypto::base64_decode;
use rustfs_utils::path::{SLASH_SEPARATOR, decode_dir_object, encode_dir_object, path_join_buf};
@@ -73,9 +67,8 @@ use std::time::SystemTime;
use std::{collections::HashMap, sync::Arc, time::Duration};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::Sender;
use tokio::sync::{RwLock, broadcast, mpsc};
use tokio::time::{interval, sleep};
use tokio::sync::{RwLock, broadcast};
use tokio::time::sleep;
use tracing::{debug, info};
use tracing::{error, warn};
use uuid::Uuid;
@@ -152,7 +145,7 @@ impl ECStore {
common_parity_drives = parity_drives;
}
// validate_parity(partiy_count, pool_eps.drives_per_set)?;
// validate_parity(parity_count, pool_eps.drives_per_set)?;
let (disks, errs) = store_init::init_disks(
&pool_eps.endpoints,
@@ -302,13 +295,13 @@ impl ECStore {
}
let pools = meta.return_resumable_pools();
let mut pool_indeces = Vec::with_capacity(pools.len());
let mut pool_indices = Vec::with_capacity(pools.len());
let endpoints = get_global_endpoints();
for p in pools.iter() {
if let Some(idx) = endpoints.get_pool_idx(&p.cmd_line) {
pool_indeces.push(idx);
pool_indices.push(idx);
} else {
return Err(Error::other(format!(
"unexpected state present for decommission status pool({}) not found",
@@ -317,8 +310,8 @@ impl ECStore {
}
}
if !pool_indeces.is_empty() {
let idx = pool_indeces[0];
if !pool_indices.is_empty() {
let idx = pool_indices[0];
if endpoints.as_ref()[idx].endpoints.as_ref()[0].is_local {
let (_tx, rx) = broadcast::channel(1);
@@ -328,9 +321,9 @@ impl ECStore {
// wait 3 minutes for cluster init
tokio::time::sleep(Duration::from_secs(60 * 3)).await;
if let Err(err) = store.decommission(rx.resubscribe(), pool_indeces.clone()).await {
if let Err(err) = store.decommission(rx.resubscribe(), pool_indices.clone()).await {
if err == StorageError::DecommissionAlreadyRunning {
for i in pool_indeces.iter() {
for i in pool_indices.iter() {
store.do_decommission_in_routine(rx.resubscribe(), *i).await;
}
return;
@@ -417,9 +410,9 @@ impl ECStore {
// // TODO handle errs
// continue;
// }
// let entrys = disks_res.as_ref().unwrap();
// let entries = disks_res.as_ref().unwrap();
// for entry in entrys {
// for entry in entries {
// // warn!("lst_merged entry---- {}", &entry.name);
// if !opts.prefix.is_empty() && !entry.name.starts_with(&opts.prefix) {
@@ -811,123 +804,6 @@ impl ECStore {
errs
}
pub async fn ns_scanner(
&self,
updates: Sender<DataUsageInfo>,
want_cycle: usize,
heal_scan_mode: HealScanMode,
) -> Result<()> {
info!("ns_scanner updates - {}", want_cycle);
let all_buckets = self.list_bucket(&BucketOptions::default()).await?;
if all_buckets.is_empty() {
info!("No buckets found");
let _ = updates.send(DataUsageInfo::default()).await;
return Ok(());
}
let mut total_results = 0;
let mut result_index = 0;
self.pools.iter().for_each(|pool| {
total_results += pool.disk_set.len();
});
let results = Arc::new(RwLock::new(vec![DataUsageCache::default(); total_results]));
let (cancel, _) = broadcast::channel(100);
let first_err = Arc::new(RwLock::new(None));
let mut futures = Vec::new();
for pool in self.pools.iter() {
for set in pool.disk_set.iter() {
let index = result_index;
let results_clone = results.clone();
let first_err_clone = first_err.clone();
let cancel_clone = cancel.clone();
let all_buckets_clone = all_buckets.clone();
futures.push(async move {
let (tx, mut rx) = mpsc::channel(1);
let task = tokio::spawn(async move {
loop {
match rx.recv().await {
Some(info) => {
results_clone.write().await[index] = info;
}
None => {
return;
}
}
}
});
if let Err(err) = set
.clone()
.ns_scanner(&all_buckets_clone, want_cycle as u32, tx, heal_scan_mode)
.await
{
let mut f_w = first_err_clone.write().await;
if f_w.is_none() {
*f_w = Some(err);
}
let _ = cancel_clone.send(true);
return;
}
let _ = task.await;
});
result_index += 1;
}
}
let (update_closer_tx, mut update_close_rx) = mpsc::channel(10);
let mut ctx_clone = cancel.subscribe();
let all_buckets_clone = all_buckets.clone();
// 新增:从环境变量读取 interval默认 30 秒
let ns_scanner_interval_secs = std::env::var("RUSTFS_NS_SCANNER_INTERVAL")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(30);
// 检查是否跳过后台任务
let skip_background_task = std::env::var("RUSTFS_SKIP_BACKGROUND_TASK")
.ok()
.and_then(|v| v.parse::<bool>().ok())
.unwrap_or(false);
if skip_background_task {
info!("跳过后台任务执行RUSTFS_SKIP_BACKGROUND_TASK=true");
return Ok(());
}
let task = tokio::spawn(async move {
let mut last_update: Option<SystemTime> = None;
let mut interval = interval(Duration::from_secs(ns_scanner_interval_secs));
let all_merged = Arc::new(RwLock::new(DataUsageCache::default()));
loop {
select! {
_ = ctx_clone.recv() => {
return;
}
_ = update_close_rx.recv() => {
update_scan(all_merged.clone(), results.clone(), &mut last_update, all_buckets_clone.clone(), updates.clone()).await;
return;
}
_ = interval.tick() => {
update_scan(all_merged.clone(), results.clone(), &mut last_update, all_buckets_clone.clone(), updates.clone()).await;
}
}
}
});
let _ = join_all(futures).await;
let mut ctx_closer = cancel.subscribe();
select! {
_ = update_closer_tx.send(true) => {
}
_ = ctx_closer.recv() => {
}
}
let _ = task.await;
if let Some(err) = first_err.read().await.as_ref() {
return Err(err.clone());
}
Ok(())
}
async fn get_latest_object_info_with_idx(
&self,
bucket: &str,
@@ -1068,34 +944,6 @@ impl ECStore {
}
}
#[tracing::instrument(level = "info", skip(all_buckets, updates))]
async fn update_scan(
all_merged: Arc<RwLock<DataUsageCache>>,
results: Arc<RwLock<Vec<DataUsageCache>>>,
last_update: &mut Option<SystemTime>,
all_buckets: Vec<BucketInfo>,
updates: Sender<DataUsageInfo>,
) {
let mut w = all_merged.write().await;
*w = DataUsageCache {
info: DataUsageCacheInfo {
name: DATA_USAGE_ROOT.to_string(),
..Default::default()
},
..Default::default()
};
for info in results.read().await.iter() {
if info.info.last_update.is_none() {
return;
}
w.merge(info);
}
if (last_update.is_none() || w.info.last_update > *last_update) && w.root().is_some() {
let _ = updates.send(w.dui(&w.info.name, &all_buckets)).await;
*last_update = w.info.last_update;
}
}
pub async fn find_local_disk(disk_path: &String) -> Option<DiskStore> {
let disk_map = GLOBAL_LOCAL_DISK_MAP.read().await;
@@ -1415,7 +1263,7 @@ impl StorageAPI for ECStore {
if let Ok(sys) = metadata_sys::get(bucket).await {
info.created = Some(sys.created);
info.versionning = sys.versioning();
info.versioning = sys.versioning();
info.object_locking = sys.object_locking();
}
@@ -2237,7 +2085,7 @@ impl StorageAPI for ECStore {
async fn heal_format(&self, dry_run: bool) -> Result<(HealResultItem, Option<Error>)> {
info!("heal_format");
let mut r = HealResultItem {
heal_item_type: HEAL_ITEM_METADATA.to_string(),
heal_item_type: HealItemType::Metadata.to_string(),
detail: "disk-format".to_string(),
..Default::default()
};
@@ -2351,120 +2199,6 @@ impl StorageAPI for ECStore {
Ok((HealResultItem::default(), Some(Error::FileNotFound)))
}
#[tracing::instrument(skip(self))]
async fn heal_objects(
&self,
bucket: &str,
prefix: &str,
opts: &HealOpts,
hs: Arc<HealSequence>,
is_meta: bool,
) -> Result<()> {
info!("heal objects");
let opts_clone = *opts;
let heal_entry: HealEntryFn = Arc::new(move |bucket: String, entry: MetaCacheEntry, scan_mode: HealScanMode| {
let opts_clone = opts_clone;
let hs_clone = hs.clone();
Box::pin(async move {
if entry.is_dir() {
return Ok(());
}
if bucket == RUSTFS_META_BUCKET
&& Pattern::new("buckets/*/.metacache/*")
.map(|p| p.matches(&entry.name))
.unwrap_or(false)
|| Pattern::new("tmp/*").map(|p| p.matches(&entry.name)).unwrap_or(false)
|| Pattern::new("multipart/*").map(|p| p.matches(&entry.name)).unwrap_or(false)
|| Pattern::new("tmp-old/*").map(|p| p.matches(&entry.name)).unwrap_or(false)
{
return Ok(());
}
let fivs = match entry.file_info_versions(&bucket) {
Ok(fivs) => fivs,
Err(_) => {
return if is_meta {
HealSequence::heal_meta_object(hs_clone.clone(), &bucket, &entry.name, "", scan_mode).await
} else {
HealSequence::heal_object(hs_clone.clone(), &bucket, &entry.name, "", scan_mode).await
};
}
};
if opts_clone.remove && !opts_clone.dry_run {
let Some(store) = new_object_layer_fn() else {
return Err(Error::other("errServerNotInitialized"));
};
if let Err(err) = store.check_abandoned_parts(&bucket, &entry.name, &opts_clone).await {
info!("unable to check object {}/{} for abandoned data: {}", bucket, entry.name, err.to_string());
}
}
for version in fivs.versions.iter() {
if is_meta {
if let Err(err) = HealSequence::heal_meta_object(
hs_clone.clone(),
&bucket,
&version.name,
&version.version_id.map(|v| v.to_string()).unwrap_or("".to_string()),
scan_mode,
)
.await
{
match err {
Error::FileNotFound | Error::FileVersionNotFound => {}
_ => {
return Err(err);
}
}
}
} else if let Err(err) = HealSequence::heal_object(
hs_clone.clone(),
&bucket,
&version.name,
&version.version_id.map(|v| v.to_string()).unwrap_or("".to_string()),
scan_mode,
)
.await
{
match err {
Error::FileNotFound | Error::FileVersionNotFound => {}
_ => {
return Err(err);
}
}
}
}
Ok(())
})
});
let mut first_err = None;
for (idx, pool) in self.pools.iter().enumerate() {
if opts.pool.is_some() && opts.pool.unwrap() != idx {
continue;
}
//TODO: IsSuspended
for (idx, set) in pool.disk_set.iter().enumerate() {
if opts.set.is_some() && opts.set.unwrap() != idx {
continue;
}
if let Err(err) = set.list_and_heal(bucket, prefix, opts, heal_entry.clone()).await {
if first_err.is_none() {
first_err = Some(err)
}
}
}
}
if first_err.is_some() {
return Err(first_err.unwrap());
}
Ok(())
}
#[tracing::instrument(skip(self))]
async fn get_pool_and_set(&self, id: &str) -> Result<(Option<usize>, Option<usize>, Option<usize>)> {
for (pool_idx, pool) in self.pools.iter().enumerate() {
@@ -2501,6 +2235,13 @@ impl StorageAPI for ECStore {
Ok(())
}
async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()> {
let mut get_object_reader =
<Self as ObjectIO>::get_object_reader(self, bucket, object, None, HeaderMap::new(), opts).await?;
let _ = get_object_reader.read_all().await?;
Ok(())
}
}
async fn init_local_peer(endpoint_pools: &EndpointServerPools, host: &String, port: &String) {

View File

@@ -15,16 +15,16 @@
use crate::bucket::metadata_sys::get_versioning_config;
use crate::bucket::versioning::VersioningApi as _;
use crate::cmd::bucket_replication::{ReplicationStatusType, VersionPurgeStatusType};
use crate::disk::DiskStore;
use crate::error::{Error, Result};
use crate::heal::heal_ops::HealSequence;
use crate::store_utils::clean_metadata;
use crate::{
bucket::lifecycle::bucket_lifecycle_audit::LcAuditEvent,
bucket::lifecycle::lifecycle::ExpirationOptions,
bucket::lifecycle::{bucket_lifecycle_ops::TransitionedObject, lifecycle::TransitionOptions},
};
use crate::{disk::DiskStore, heal::heal_commands::HealOpts};
use http::{HeaderMap, HeaderValue};
use rustfs_common::heal_channel::HealOpts;
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
use rustfs_filemeta::{FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, headers::AMZ_OBJECT_TAGGING};
use rustfs_madmin::heal_commands::HealResultItem;
@@ -276,7 +276,10 @@ impl HTTPRangeSpec {
return Ok(range_length);
}
Err(Error::other("range value invaild"))
Err(Error::other(format!(
"range value invalid: start={}, end={}, expected start <= end and end >= -1",
self.start, self.end
)))
}
}
@@ -336,7 +339,7 @@ pub struct BucketInfo {
pub name: String,
pub created: Option<OffsetDateTime>,
pub deleted: Option<OffsetDateTime>,
pub versionning: bool,
pub versioning: bool,
pub object_locking: bool,
}
@@ -967,6 +970,7 @@ pub trait StorageAPI: ObjectIO {
// Walk TODO:
async fn get_object_info(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<ObjectInfo>;
async fn verify_object_integrity(&self, bucket: &str, object: &str, opts: &ObjectOptions) -> Result<()>;
async fn copy_object(
&self,
src_bucket: &str,
@@ -1069,8 +1073,8 @@ pub trait StorageAPI: ObjectIO {
version_id: &str,
opts: &HealOpts,
) -> Result<(HealResultItem, Option<Error>)>;
async fn heal_objects(&self, bucket: &str, prefix: &str, opts: &HealOpts, hs: Arc<HealSequence>, is_meta: bool)
-> Result<()>;
// async fn heal_objects(&self, bucket: &str, prefix: &str, opts: &HealOpts, hs: Arc<HealSequence>, is_meta: bool)
// -> Result<()>;
async fn get_pool_and_set(&self, id: &str) -> Result<(Option<usize>, Option<usize>, Option<usize>)>;
async fn check_abandoned_parts(&self, bucket: &str, object: &str, opts: &HealOpts) -> Result<()>;
}

View File

@@ -24,7 +24,6 @@ use crate::{
new_disk,
},
endpoints::Endpoints,
heal::heal_commands::init_healing_tracker,
};
use futures::future::join_all;
use std::collections::{HashMap, hash_map::Entry};
@@ -222,7 +221,7 @@ fn check_format_erasure_value(format: &FormatV3) -> Result<()> {
Ok(())
}
// load_format_erasure_all 读取所有 foramt.json
// load_format_erasure_all 读取所有 format.json
pub async fn load_format_erasure_all(disks: &[Option<DiskStore>], heal: bool) -> (Vec<Option<FormatV3>>, Vec<Option<DiskError>>) {
let mut futures = Vec::with_capacity(disks.len());
let mut datas = Vec::with_capacity(disks.len());
@@ -288,7 +287,7 @@ async fn save_format_file_all(disks: &[Option<DiskStore>], formats: &[Option<For
let mut futures = Vec::with_capacity(disks.len());
for (i, disk) in disks.iter().enumerate() {
futures.push(save_format_file(disk, &formats[i], ""));
futures.push(save_format_file(disk, &formats[i]));
}
let mut errors = Vec::with_capacity(disks.len());
@@ -312,7 +311,7 @@ async fn save_format_file_all(disks: &[Option<DiskStore>], formats: &[Option<For
Ok(())
}
pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3>, heal_id: &str) -> disk::error::Result<()> {
pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3>) -> disk::error::Result<()> {
if disk.is_none() {
return Err(DiskError::DiskNotFound);
}
@@ -331,10 +330,6 @@ pub async fn save_format_file(disk: &Option<DiskStore>, format: &Option<FormatV3
.await?;
disk.set_disk_id(Some(format.erasure.this)).await?;
if !heal_id.is_empty() {
let mut ht = init_healing_tracker(disk.clone(), heal_id).await?;
return ht.save().await;
}
Ok(())
}

View File

@@ -776,7 +776,7 @@ impl ECStore {
fallback_disks: fallback_disks.iter().cloned().map(Some).collect(),
bucket: bucket.to_owned(),
path,
recursice: true,
recursive: true,
filter_prefix: Some(filter_prefix),
forward_to: opts.marker.clone(),
min_disks: listing_quorum,
@@ -851,8 +851,8 @@ impl ECStore {
}
};
if let Some(fiter) = opts.filter {
if fiter(&fi) {
if let Some(filter) = opts.filter {
if filter(&fi) {
let item = ObjectInfoOrErr {
item: Some(ObjectInfo::from_file_info(&fi, &bucket, &fi.name, {
if let Some(v) = &vcf { v.versioned(&fi.name) } else { false }
@@ -899,8 +899,8 @@ impl ECStore {
}
for fi in fvs.versions.iter() {
if let Some(fiter) = opts.filter {
if fiter(fi) {
if let Some(filter) = opts.filter {
if filter(fi) {
let item = ObjectInfoOrErr {
item: Some(ObjectInfo::from_file_info(fi, &bucket, &fi.name, {
if let Some(v) = &vcf { v.versioned(&fi.name) } else { false }
@@ -972,7 +972,7 @@ async fn gather_results(
let mut sender = Some(results_tx);
let mut recv = recv;
let mut entrys = Vec::new();
let mut entries = Vec::new();
while let Some(mut entry) = recv.recv().await {
if returned {
continue;
@@ -1009,11 +1009,11 @@ async fn gather_results(
// TODO: Lifecycle
if opts.limit > 0 && entrys.len() >= opts.limit as usize {
if opts.limit > 0 && entries.len() >= opts.limit as usize {
if let Some(tx) = sender {
tx.send(MetaCacheEntriesSortedResult {
entries: Some(MetaCacheEntriesSorted {
o: MetaCacheEntries(entrys.clone()),
o: MetaCacheEntries(entries.clone()),
..Default::default()
}),
err: None,
@@ -1027,15 +1027,15 @@ async fn gather_results(
continue;
}
entrys.push(Some(entry));
// entrys.push(entry);
entries.push(Some(entry));
// entries.push(entry);
}
// finish not full, return eof
if let Some(tx) = sender {
tx.send(MetaCacheEntriesSortedResult {
entries: Some(MetaCacheEntriesSorted {
o: MetaCacheEntries(entrys.clone()),
o: MetaCacheEntries(entries.clone()),
..Default::default()
}),
err: Some(Error::Unexpected.into()),
@@ -1125,10 +1125,10 @@ async fn merge_entry_channels(
if path::clean(&best_entry.name) == path::clean(&other_entry.name) {
let dir_matches = best_entry.is_dir() && other_entry.is_dir();
let suffix_matche =
let suffix_matches =
best_entry.name.ends_with(SLASH_SEPARATOR) == other_entry.name.ends_with(SLASH_SEPARATOR);
if dir_matches && suffix_matche {
if dir_matches && suffix_matches {
to_merge.push(other_idx);
continue;
}
@@ -1286,7 +1286,7 @@ impl SetDisks {
fallback_disks: fallback_disks.iter().cloned().map(Some).collect(),
bucket: opts.bucket,
path: opts.base_dir,
recursice: opts.recursive,
recursive: opts.recursive,
filter_prefix: opts.filter_prefix,
forward_to: opts.marker,
min_disks: listing_quorum,

View File

@@ -215,7 +215,7 @@ pub struct FileInfo {
impl FileInfo {
pub fn new(object: &str, data_blocks: usize, parity_blocks: usize) -> Self {
let indexs = {
let indices = {
let cardinality = data_blocks + parity_blocks;
let mut nums = vec![0; cardinality];
let key_crc = crc32fast::hash(object.as_bytes());
@@ -233,7 +233,7 @@ impl FileInfo {
data_blocks,
parity_blocks,
block_size: BLOCK_SIZE_V2,
distribution: indexs,
distribution: indices,
..Default::default()
},
..Default::default()

View File

@@ -702,7 +702,7 @@ impl FileMeta {
})
}
pub fn lastest_mod_time(&self) -> Option<OffsetDateTime> {
pub fn latest_mod_time(&self) -> Option<OffsetDateTime> {
if self.versions.is_empty() {
return None;
}
@@ -1523,8 +1523,7 @@ impl MetaObject {
}
pub fn uses_data_dir(&self) -> bool {
// TODO: when use inlinedata
true
!self.inlinedata()
}
pub fn inlinedata(&self) -> bool {
@@ -1762,7 +1761,7 @@ impl MetaDeleteMarker {
// self.meta_sys = Some(map);
// }
// name => return Err(Error::other(format!("not suport field name {name}"))),
// name => return Err(Error::other(format!("not support field name {name}"))),
// }
// }
@@ -1962,32 +1961,32 @@ pub fn merge_file_meta_versions(
n_versions += 1;
}
} else {
let mut lastest_count = 0;
let mut latest_count = 0;
for (i, ver) in tops.iter().enumerate() {
if ver.header == latest.header {
lastest_count += 1;
latest_count += 1;
continue;
}
if i == 0 || ver.header.sorts_before(&latest.header) {
if i == 0 || lastest_count == 0 {
lastest_count = 1;
if i == 0 || latest_count == 0 {
latest_count = 1;
} else if !strict && ver.header.matches_not_strict(&latest.header) {
lastest_count += 1;
latest_count += 1;
} else {
lastest_count = 1;
latest_count = 1;
}
latest = ver.clone();
continue;
}
// Mismatch, but older.
if lastest_count > 0 && !strict && ver.header.matches_not_strict(&latest.header) {
lastest_count += 1;
if latest_count > 0 && !strict && ver.header.matches_not_strict(&latest.header) {
latest_count += 1;
continue;
}
if lastest_count > 0 && ver.header.version_id == latest.header.version_id {
if latest_count > 0 && ver.header.version_id == latest.header.version_id {
let mut x: HashMap<FileMetaVersionHeader, usize> = HashMap::new();
for a in tops.iter() {
if a.header.version_id != ver.header.version_id {
@@ -1999,12 +1998,12 @@ pub fn merge_file_meta_versions(
}
*x.entry(a_clone.header).or_insert(1) += 1;
}
lastest_count = 0;
latest_count = 0;
for (k, v) in x.iter() {
if *v < lastest_count {
if *v < latest_count {
continue;
}
if *v == lastest_count && latest.header.sorts_before(k) {
if *v == latest_count && latest.header.sorts_before(k) {
continue;
}
tops.iter().for_each(|a| {
@@ -2017,12 +2016,12 @@ pub fn merge_file_meta_versions(
}
});
lastest_count = *v;
latest_count = *v;
}
break;
}
}
if lastest_count >= quorum {
if latest_count >= quorum {
if !latest.header.free_version() {
n_versions += 1;
}

View File

@@ -221,7 +221,7 @@ impl MetaCacheEntry {
};
if self_vers.versions.len() != other_vers.versions.len() {
match self_vers.lastest_mod_time().cmp(&other_vers.lastest_mod_time()) {
match self_vers.latest_mod_time().cmp(&other_vers.latest_mod_time()) {
Ordering::Greater => return (Some(self.clone()), false),
Ordering::Less => return (Some(other.clone()), false),
_ => {}

View File

@@ -90,7 +90,7 @@ where
T: Store,
{
pub(crate) async fn new(api: T) -> Arc<Self> {
let (sender, reciver) = mpsc::channel::<i64>(100);
let (sender, receiver) = mpsc::channel::<i64>(100);
let sys = Arc::new(Self {
api,
@@ -101,11 +101,11 @@ where
last_timestamp: AtomicI64::new(0),
});
sys.clone().init(reciver).await.unwrap();
sys.clone().init(receiver).await.unwrap();
sys
}
async fn init(self: Arc<Self>, reciver: Receiver<i64>) -> Result<()> {
async fn init(self: Arc<Self>, receiver: Receiver<i64>) -> Result<()> {
self.clone().save_iam_formatter().await?;
self.clone().load().await?;
@@ -118,7 +118,7 @@ where
let s = Arc::clone(&self);
async move {
let ticker = tokio::time::interval(Duration::from_secs(120));
tokio::pin!(ticker, reciver);
tokio::pin!(ticker, receiver);
loop {
select! {
_ = ticker.tick() => {
@@ -127,13 +127,13 @@ where
error!("iam load err {:?}", err);
}
},
i = reciver.recv() => {
info!("iam load reciver");
i = receiver.recv() => {
info!("iam load receiver");
match i {
Some(t) => {
let last = s.last_timestamp.load(Ordering::Relaxed);
if last <= t {
info!("iam load reciver load");
info!("iam load receiver load");
if let Err(err) =s.clone().load().await{
error!("iam load err {:?}", err);
}
@@ -814,7 +814,7 @@ where
let mp = MappedPolicy::new(policy);
let (_, combined_policy_stmt) = filter_policies(&self.cache, &mp.policies, "temp");
if combined_policy_stmt.is_empty() {
return Err(Error::other(format!("need poliy not found {}", IamError::NoSuchPolicy)));
return Err(Error::other(format!("Required policy not found: {}", IamError::NoSuchPolicy)));
}
self.api
@@ -987,7 +987,7 @@ where
_ => auth::ACCOUNT_OFF,
}
};
let user_entiry = UserIdentity::from(Credentials {
let user_entry = UserIdentity::from(Credentials {
access_key: access_key.to_string(),
secret_key: args.secret_key.to_string(),
status: status.to_owned(),
@@ -995,10 +995,10 @@ where
});
self.api
.save_user_identity(access_key, UserType::Reg, user_entiry.clone(), None)
.save_user_identity(access_key, UserType::Reg, user_entry.clone(), None)
.await?;
self.update_user_with_claims(access_key, user_entiry)?;
self.update_user_with_claims(access_key, user_entry)?;
Ok(OffsetDateTime::now_utc())
}
@@ -1104,7 +1104,7 @@ where
}
};
let user_entiry = UserIdentity::from(Credentials {
let user_entry = UserIdentity::from(Credentials {
access_key: access_key.to_string(),
secret_key: u.credentials.secret_key.clone(),
status: status.to_owned(),
@@ -1112,10 +1112,10 @@ where
});
self.api
.save_user_identity(access_key, UserType::Reg, user_entiry.clone(), None)
.save_user_identity(access_key, UserType::Reg, user_entry.clone(), None)
.await?;
self.update_user_with_claims(access_key, user_entiry)?;
self.update_user_with_claims(access_key, user_entry)?;
Ok(OffsetDateTime::now_utc())
}

View File

@@ -62,8 +62,12 @@ pub trait Store: Clone + Send + Sync + 'static {
is_group: bool,
m: &mut HashMap<String, MappedPolicy>,
) -> Result<()>;
async fn load_mapped_policys(&self, user_type: UserType, is_group: bool, m: &mut HashMap<String, MappedPolicy>)
-> Result<()>;
async fn load_mapped_policies(
&self,
user_type: UserType,
is_group: bool,
m: &mut HashMap<String, MappedPolicy>,
) -> Result<()>;
async fn load_all(&self, cache: &Cache) -> Result<()>;
}

View File

@@ -656,7 +656,7 @@ impl Store for ObjectStore {
Ok(())
}
async fn load_mapped_policys(
async fn load_mapped_policies(
&self,
user_type: UserType,
is_group: bool,

View File

@@ -124,13 +124,13 @@ impl<T: Store> IamSys<T> {
})
}
pub async fn load_mapped_policys(
pub async fn load_mapped_policies(
&self,
user_type: UserType,
is_group: bool,
m: &mut HashMap<String, MappedPolicy>,
) -> Result<()> {
self.store.api.load_mapped_policys(user_type, is_group, m).await
self.store.api.load_mapped_policies(user_type, is_group, m).await
}
pub async fn list_polices(&self, bucket_name: &str) -> Result<HashMap<String, Policy>> {

View File

@@ -30,6 +30,9 @@ workspace = true
[dependencies]
async-trait.workspace = true
bytes.workspace = true
futures.workspace = true
lazy_static.workspace = true
rustfs-protos.workspace = true
rand.workspace = true
serde.workspace = true
@@ -38,4 +41,7 @@ tokio.workspace = true
tonic.workspace = true
tracing.workspace = true
url.workspace = true
uuid.workspace = true
uuid.workspace = true
thiserror.workspace = true
once_cell.workspace = true
lru.workspace = true

View File

@@ -0,0 +1,366 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use crate::{
client::LockClient,
error::Result,
local::LocalLockMap,
types::{LockId, LockInfo, LockMetadata, LockPriority, LockRequest, LockResponse, LockStats, LockType},
};
/// Local lock client
///
/// Uses global singleton LocalLockMap to ensure all clients access the same lock instance
#[derive(Debug, Clone)]
pub struct LocalClient;
impl LocalClient {
/// Create new local client
pub fn new() -> Self {
Self
}
/// Get global lock map instance
pub fn get_lock_map(&self) -> Arc<LocalLockMap> {
crate::get_global_lock_map()
}
}
impl Default for LocalClient {
fn default() -> Self {
Self::new()
}
}
#[async_trait::async_trait]
impl LockClient for LocalClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse> {
let lock_map = self.get_lock_map();
let success = lock_map
.lock_with_ttl_id(request)
.await
.map_err(|e| crate::error::LockError::internal(format!("Lock acquisition failed: {e}")))?;
if success {
let lock_info = LockInfo {
id: crate::types::LockId::new_deterministic(&request.resource),
resource: request.resource.clone(),
lock_type: LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
} else {
Ok(LockResponse::failure("Lock acquisition failed".to_string(), std::time::Duration::ZERO))
}
}
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse> {
let lock_map = self.get_lock_map();
let success = lock_map
.rlock_with_ttl_id(request)
.await
.map_err(|e| crate::error::LockError::internal(format!("Shared lock acquisition failed: {e}")))?;
if success {
let lock_info = LockInfo {
id: crate::types::LockId::new_deterministic(&request.resource),
resource: request.resource.clone(),
lock_type: LockType::Shared,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
};
Ok(LockResponse::success(lock_info, std::time::Duration::ZERO))
} else {
Ok(LockResponse::failure("Lock acquisition failed".to_string(), std::time::Duration::ZERO))
}
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
let lock_map = self.get_lock_map();
// Try to release the lock directly by ID
match lock_map.unlock_by_id(lock_id).await {
Ok(()) => Ok(true),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
// Try as read lock if exclusive unlock failed
match lock_map.runlock_by_id(lock_id).await {
Ok(()) => Ok(true),
Err(_) => Err(crate::error::LockError::internal("Lock ID not found".to_string())),
}
}
Err(e) => Err(crate::error::LockError::internal(format!("Release lock failed: {e}"))),
}
}
async fn refresh(&self, _lock_id: &LockId) -> Result<bool> {
// For local locks, refresh is not needed as they don't expire automatically
Ok(true)
}
async fn force_release(&self, lock_id: &LockId) -> Result<bool> {
self.release(lock_id).await
}
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>> {
let lock_map = self.get_lock_map();
// Check if the lock exists in our locks map
let locks_guard = lock_map.locks.read().await;
if let Some(entry) = locks_guard.get(lock_id) {
let entry_guard = entry.read().await;
// Determine lock type and owner based on the entry
if let Some(owner) = &entry_guard.writer {
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: crate::types::LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(30),
last_refreshed: std::time::SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
}))
} else if !entry_guard.readers.is_empty() {
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: crate::types::LockType::Shared,
status: crate::types::LockStatus::Acquired,
owner: entry_guard.readers.iter().next().map(|(k, _)| k.clone()).unwrap_or_default(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(30),
last_refreshed: std::time::SystemTime::now(),
metadata: LockMetadata::default(),
priority: LockPriority::Normal,
wait_start_time: None,
}))
} else {
Ok(None)
}
} else {
Ok(None)
}
}
async fn get_stats(&self) -> Result<LockStats> {
Ok(LockStats::default())
}
async fn close(&self) -> Result<()> {
Ok(())
}
async fn is_online(&self) -> bool {
true
}
async fn is_local(&self) -> bool {
true
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::LockType;
#[tokio::test]
async fn test_local_client_acquire_exclusive() {
let client = LocalClient::new();
let resource_name = format!("test-resource-exclusive-{}", uuid::Uuid::new_v4());
let request = LockRequest::new(&resource_name, LockType::Exclusive, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_exclusive(&request).await.unwrap();
assert!(response.is_success());
// Clean up
if let Some(lock_info) = response.lock_info() {
let _ = client.release(&lock_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_acquire_shared() {
let client = LocalClient::new();
let resource_name = format!("test-resource-shared-{}", uuid::Uuid::new_v4());
let request = LockRequest::new(&resource_name, LockType::Shared, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_shared(&request).await.unwrap();
assert!(response.is_success());
// Clean up
if let Some(lock_info) = response.lock_info() {
let _ = client.release(&lock_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_release() {
let client = LocalClient::new();
let resource_name = format!("test-resource-release-{}", uuid::Uuid::new_v4());
// First acquire a lock
let request = LockRequest::new(&resource_name, LockType::Exclusive, "test-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let response = client.acquire_exclusive(&request).await.unwrap();
assert!(response.is_success());
// Get the lock ID from the response
if let Some(lock_info) = response.lock_info() {
let result = client.release(&lock_info.id).await.unwrap();
assert!(result);
} else {
panic!("No lock info in response");
}
}
#[tokio::test]
async fn test_local_client_is_local() {
let client = LocalClient::new();
assert!(client.is_local().await);
}
#[tokio::test]
async fn test_local_client_read_write_lock_exclusion() {
let client = LocalClient::new();
let resource_name = format!("test-resource-exclusion-{}", uuid::Uuid::new_v4());
// First, acquire an exclusive lock
let exclusive_request = LockRequest::new(&resource_name, LockType::Exclusive, "exclusive-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let exclusive_response = client.acquire_exclusive(&exclusive_request).await.unwrap();
assert!(exclusive_response.is_success());
// Try to acquire a shared lock on the same resource - should fail
let shared_request = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let shared_response = client.acquire_shared(&shared_request).await.unwrap();
assert!(!shared_response.is_success(), "Shared lock should fail when exclusive lock exists");
// Clean up exclusive lock
if let Some(exclusive_info) = exclusive_response.lock_info() {
let _ = client.release(&exclusive_info.id).await;
}
// Now shared lock should succeed
let shared_request2 = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_millis(10));
let shared_response2 = client.acquire_shared(&shared_request2).await.unwrap();
assert!(
shared_response2.is_success(),
"Shared lock should succeed after exclusive lock is released"
);
// Clean up
if let Some(shared_info) = shared_response2.lock_info() {
let _ = client.release(&shared_info.id).await;
}
}
#[tokio::test]
async fn test_local_client_read_write_lock_distinction() {
let client = LocalClient::new();
let resource_name = format!("test-resource-rw-{}", uuid::Uuid::new_v4());
// Test exclusive lock
let exclusive_request = LockRequest::new(&resource_name, LockType::Exclusive, "exclusive-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let exclusive_response = client.acquire_exclusive(&exclusive_request).await.unwrap();
assert!(exclusive_response.is_success());
if let Some(exclusive_info) = exclusive_response.lock_info() {
assert_eq!(exclusive_info.lock_type, LockType::Exclusive);
// Check status should return correct lock type
let status = client.check_status(&exclusive_info.id).await.unwrap();
assert!(status.is_some());
assert_eq!(status.unwrap().lock_type, LockType::Exclusive);
// Release exclusive lock
let result = client.release(&exclusive_info.id).await.unwrap();
assert!(result);
}
// Test shared lock
let shared_request = LockRequest::new(&resource_name, LockType::Shared, "shared-owner")
.with_acquire_timeout(std::time::Duration::from_secs(30));
let shared_response = client.acquire_shared(&shared_request).await.unwrap();
assert!(shared_response.is_success());
if let Some(shared_info) = shared_response.lock_info() {
assert_eq!(shared_info.lock_type, LockType::Shared);
// Check status should return correct lock type
let status = client.check_status(&shared_info.id).await.unwrap();
assert!(status.is_some());
assert_eq!(status.unwrap().lock_type, LockType::Shared);
// Release shared lock
let result = client.release(&shared_info.id).await.unwrap();
assert!(result);
}
}
#[tokio::test]
async fn test_multiple_local_clients_exclusive_mutex() {
let client1 = LocalClient::new();
let client2 = LocalClient::new();
let resource_name = format!("test-multi-client-mutex-{}", uuid::Uuid::new_v4());
// client1 acquire exclusive lock
let req1 = LockRequest::new(&resource_name, LockType::Exclusive, "owner1")
.with_acquire_timeout(std::time::Duration::from_millis(50));
let resp1 = client1.acquire_exclusive(&req1).await.unwrap();
assert!(resp1.is_success(), "client1 should acquire exclusive lock");
// client2 try to acquire exclusive lock, should fail
let req2 = LockRequest::new(&resource_name, LockType::Exclusive, "owner2")
.with_acquire_timeout(std::time::Duration::from_millis(50));
let resp2 = client2.acquire_exclusive(&req2).await.unwrap();
assert!(!resp2.is_success(), "client2 should not acquire exclusive lock while client1 holds it");
// client1 release lock
if let Some(lock_info) = resp1.lock_info() {
let _ = client1.release(&lock_info.id).await;
}
// client2 try again, should succeed
let resp3 = client2.acquire_exclusive(&req2).await.unwrap();
assert!(resp3.is_success(), "client2 should acquire exclusive lock after client1 releases it");
// clean up
if let Some(lock_info) = resp3.lock_info() {
let _ = client2.release(&lock_info.id).await;
}
}
}

View File

@@ -0,0 +1,123 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod local;
pub mod remote;
use async_trait::async_trait;
use std::sync::Arc;
use crate::{
error::Result,
types::{LockId, LockInfo, LockRequest, LockResponse, LockStats},
};
/// Lock client trait
#[async_trait]
pub trait LockClient: Send + Sync + std::fmt::Debug {
/// Acquire exclusive lock
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse>;
/// Acquire shared lock
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse>;
/// Acquire lock (generic method)
async fn acquire_lock(&self, request: &LockRequest) -> Result<LockResponse> {
match request.lock_type {
crate::types::LockType::Exclusive => self.acquire_exclusive(request).await,
crate::types::LockType::Shared => self.acquire_shared(request).await,
}
}
/// Release lock
async fn release(&self, lock_id: &LockId) -> Result<bool>;
/// Refresh lock
async fn refresh(&self, lock_id: &LockId) -> Result<bool>;
/// Force release lock
async fn force_release(&self, lock_id: &LockId) -> Result<bool>;
/// Check lock status
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>>;
/// Get statistics
async fn get_stats(&self) -> Result<LockStats>;
/// Close client
async fn close(&self) -> Result<()>;
/// Check if client is online
async fn is_online(&self) -> bool;
/// Check if client is local
async fn is_local(&self) -> bool;
}
/// Client factory
pub struct ClientFactory;
impl ClientFactory {
/// Create local client
pub fn create_local() -> Arc<dyn LockClient> {
Arc::new(local::LocalClient::new())
}
/// Create remote client
pub fn create_remote(endpoint: String) -> Arc<dyn LockClient> {
Arc::new(remote::RemoteClient::new(endpoint))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::LockType;
#[tokio::test]
async fn test_client_factory() {
let local_client = ClientFactory::create_local();
assert!(local_client.is_local().await);
let remote_client = ClientFactory::create_remote("http://localhost:8080".to_string());
assert!(!remote_client.is_local().await);
}
#[tokio::test]
async fn test_local_client_basic_operations() {
let client = ClientFactory::create_local();
let request = crate::types::LockRequest::new("test-resource", LockType::Exclusive, "test-owner");
// Test lock acquisition
let response = client.acquire_exclusive(&request).await;
assert!(response.is_ok());
if let Ok(response) = response {
if response.success {
let lock_info = response.lock_info.unwrap();
// Test status check
let status = client.check_status(&lock_info.id).await;
assert!(status.is_ok());
assert!(status.unwrap().is_some());
// Test lock release
let released = client.release(&lock_info.id).await;
assert!(released.is_ok());
assert!(released.unwrap());
}
}
}
}

View File

@@ -0,0 +1,403 @@
// Copyright 2024 RustFS Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use rustfs_protos::{
node_service_time_out_client,
proto_gen::node_service::{GenerallyLockRequest, PingRequest},
};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use tonic::Request;
use tracing::info;
use crate::{
error::{LockError, Result},
types::{LockId, LockInfo, LockRequest, LockResponse, LockStats},
};
use super::LockClient;
/// Remote lock client implementation
#[derive(Debug)]
pub struct RemoteClient {
addr: String,
// Track active locks with their original owner information
active_locks: Arc<RwLock<HashMap<LockId, String>>>, // lock_id -> owner
}
impl Clone for RemoteClient {
fn clone(&self) -> Self {
Self {
addr: self.addr.clone(),
active_locks: self.active_locks.clone(),
}
}
}
impl RemoteClient {
pub fn new(endpoint: String) -> Self {
Self {
addr: endpoint,
active_locks: Arc::new(RwLock::new(HashMap::new())),
}
}
pub fn from_url(url: url::Url) -> Self {
Self {
addr: url.to_string(),
active_locks: Arc::new(RwLock::new(HashMap::new())),
}
}
/// Create a minimal LockRequest for unlock operations
fn create_unlock_request(&self, lock_id: &LockId, owner: &str) -> LockRequest {
LockRequest {
lock_id: lock_id.clone(),
resource: lock_id.resource.clone(),
lock_type: crate::types::LockType::Exclusive, // Type doesn't matter for unlock
owner: owner.to_string(),
acquire_timeout: std::time::Duration::from_secs(30),
ttl: std::time::Duration::from_secs(300),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
deadlock_detection: false,
}
}
}
#[async_trait]
impl LockClient for RemoteClient {
async fn acquire_exclusive(&self, request: &LockRequest) -> Result<LockResponse> {
info!("remote acquire_exclusive for {}", request.resource);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
// Check for explicit error first
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
// Check if the lock acquisition was successful
if resp.success {
// Save the lock information for later release
let mut locks = self.active_locks.write().await;
locks.insert(request.lock_id.clone(), request.owner.clone());
Ok(LockResponse::success(
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
std::time::Duration::ZERO,
))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
"Lock acquisition failed on remote server".to_string(),
std::time::Duration::ZERO,
))
}
}
async fn acquire_shared(&self, request: &LockRequest) -> Result<LockResponse> {
info!("remote acquire_shared for {}", request.resource);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.r_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
// Check for explicit error first
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
// Check if the lock acquisition was successful
if resp.success {
// Save the lock information for later release
let mut locks = self.active_locks.write().await;
locks.insert(request.lock_id.clone(), request.owner.clone());
Ok(LockResponse::success(
LockInfo {
id: request.lock_id.clone(),
resource: request.resource.clone(),
lock_type: request.lock_type,
status: crate::types::LockStatus::Acquired,
owner: request.owner.clone(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + request.ttl,
last_refreshed: std::time::SystemTime::now(),
metadata: request.metadata.clone(),
priority: request.priority,
wait_start_time: None,
},
std::time::Duration::ZERO,
))
} else {
// Lock acquisition failed
Ok(LockResponse::failure(
"Shared lock acquisition failed on remote server".to_string(),
std::time::Duration::ZERO,
))
}
}
async fn release(&self, lock_id: &LockId) -> Result<bool> {
info!("remote release for {}", lock_id);
// Get the original owner for this lock
let owner = {
let locks = self.active_locks.read().await;
locks.get(lock_id).cloned().unwrap_or_else(|| "remote".to_string())
};
let unlock_request = self.create_unlock_request(lock_id, &owner);
let request_string = serde_json::to_string(&unlock_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?;
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
// Try UnLock first (for exclusive locks)
let req = Request::new(GenerallyLockRequest {
args: request_string.clone(),
});
let resp = client.un_lock(req).await;
let success = if resp.is_err() {
// If that fails, try RUnLock (for shared locks)
let req = Request::new(GenerallyLockRequest { args: request_string });
let resp = client
.r_un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
resp.success
} else {
let resp = resp.map_err(|e| LockError::internal(e.to_string()))?.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
resp.success
};
// Remove the lock from our tracking if successful
if success {
let mut locks = self.active_locks.write().await;
locks.remove(lock_id);
}
Ok(success)
}
async fn refresh(&self, lock_id: &LockId) -> Result<bool> {
info!("remote refresh for {}", lock_id);
let refresh_request = self.create_unlock_request(lock_id, "remote");
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&refresh_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.refresh(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn force_release(&self, lock_id: &LockId) -> Result<bool> {
info!("remote force_release for {}", lock_id);
let force_request = self.create_unlock_request(lock_id, "remote");
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&force_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let resp = client
.force_un_lock(req)
.await
.map_err(|e| LockError::internal(e.to_string()))?
.into_inner();
if let Some(error_info) = resp.error_info {
return Err(LockError::internal(error_info));
}
Ok(resp.success)
}
async fn check_status(&self, lock_id: &LockId) -> Result<Option<LockInfo>> {
info!("remote check_status for {}", lock_id);
// Since there's no direct status query in the gRPC service,
// we attempt a non-blocking lock acquisition to check if the resource is available
let status_request = self.create_unlock_request(lock_id, "remote");
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| LockError::internal(format!("can not get client, err: {err}")))?;
// Try to acquire a very short-lived lock to test availability
let req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&status_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
// Try exclusive lock first with very short timeout
let resp = client.lock(req).await;
match resp {
Ok(response) => {
let resp = response.into_inner();
if resp.success {
// If we successfully acquired the lock, the resource was free
// Immediately release it
let release_req = Request::new(GenerallyLockRequest {
args: serde_json::to_string(&status_request)
.map_err(|e| LockError::internal(format!("Failed to serialize request: {e}")))?,
});
let _ = client.un_lock(release_req).await; // Best effort release
// Return None since no one was holding the lock
Ok(None)
} else {
// Lock acquisition failed, meaning someone is holding it
// We can't determine the exact details remotely, so return a generic status
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.as_str().to_string(),
lock_type: crate::types::LockType::Exclusive, // We can't know the exact type
status: crate::types::LockStatus::Acquired,
owner: "unknown".to_string(), // Remote client can't determine owner
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(3600),
last_refreshed: std::time::SystemTime::now(),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
wait_start_time: None,
}))
}
}
Err(_) => {
// Communication error or lock is held
Ok(Some(LockInfo {
id: lock_id.clone(),
resource: lock_id.as_str().to_string(),
lock_type: crate::types::LockType::Exclusive,
status: crate::types::LockStatus::Acquired,
owner: "unknown".to_string(),
acquired_at: std::time::SystemTime::now(),
expires_at: std::time::SystemTime::now() + std::time::Duration::from_secs(3600),
last_refreshed: std::time::SystemTime::now(),
metadata: crate::types::LockMetadata::default(),
priority: crate::types::LockPriority::Normal,
wait_start_time: None,
}))
}
}
}
async fn get_stats(&self) -> Result<LockStats> {
info!("remote get_stats from {}", self.addr);
// Since there's no direct statistics endpoint in the gRPC service,
// we return basic stats indicating this is a remote client
let stats = LockStats {
last_updated: std::time::SystemTime::now(),
..Default::default()
};
// We could potentially enhance this by:
// 1. Keeping local counters of operations performed
// 2. Adding a stats gRPC method to the service
// 3. Querying server health endpoints
// For now, return minimal stats indicating remote connectivity
Ok(stats)
}
async fn close(&self) -> Result<()> {
Ok(())
}
async fn is_online(&self) -> bool {
// Use Ping interface to test if remote service is online
let mut client = match node_service_time_out_client(&self.addr).await {
Ok(client) => client,
Err(_) => {
info!("remote client {} connection failed", self.addr);
return false;
}
};
let ping_req = Request::new(PingRequest {
version: 1,
body: bytes::Bytes::new(),
});
match client.ping(ping_req).await {
Ok(_) => {
info!("remote client {} is online", self.addr);
true
}
Err(_) => {
info!("remote client {} ping failed", self.addr);
false
}
}
}
async fn is_local(&self) -> bool {
false
}
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More