From 443947e1acf4230fe692120f9a792b74f1099cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E6=AD=A3=E8=B6=85?= Date: Wed, 17 Dec 2025 21:50:03 +0800 Subject: [PATCH 01/26] fix: improve S3 API compatibility for ListObjects operations (#1173) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 安正超 Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: houseme Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/s3tests/README.md | 103 ++++++++++++ .github/s3tests/s3tests.conf | 6 +- .github/workflows/e2e-s3tests.yml | 252 +++++++++++++++++++++--------- rustfs/src/storage/ecfs.rs | 223 +++++++++++++++++++++++--- 4 files changed, 488 insertions(+), 96 deletions(-) create mode 100644 .github/s3tests/README.md diff --git a/.github/s3tests/README.md b/.github/s3tests/README.md new file mode 100644 index 00000000..af61ed25 --- /dev/null +++ b/.github/s3tests/README.md @@ -0,0 +1,103 @@ +# S3 Compatibility Tests Configuration + +This directory contains the configuration for running [Ceph S3 compatibility tests](https://github.com/ceph/s3-tests) against RustFS. + +## Configuration File + +The `s3tests.conf` file is based on the official `s3tests.conf.SAMPLE` from the ceph/s3-tests repository. It uses environment variable substitution via `envsubst` to configure the endpoint and credentials. + +### Key Configuration Points + +- **Host**: Set via `${S3_HOST}` environment variable (e.g., `rustfs-single` for single-node, `lb` for multi-node) +- **Port**: 9000 (standard RustFS port) +- **Credentials**: Uses `${S3_ACCESS_KEY}` and `${S3_SECRET_KEY}` from workflow environment +- **TLS**: Disabled (`is_secure = False`) + +## Test Execution Strategy + +### Network Connectivity Fix + +Tests run inside a Docker container on the `rustfs-net` network, which allows them to resolve and connect to the RustFS container hostnames. This fixes the "Temporary failure in name resolution" error that occurred when tests ran on the GitHub runner host. + +### Performance Optimizations + +1. **Parallel Execution**: Uses `pytest-xdist` with `-n 4` to run tests in parallel across 4 workers +2. **Load Distribution**: Uses `--dist=loadgroup` to distribute test groups across workers +3. **Fail-Fast**: Uses `--maxfail=50` to stop after 50 failures, saving time on catastrophic failures + +### Feature Filtering + +Tests are filtered using pytest markers (`-m`) to skip features not yet supported by RustFS: + +- `lifecycle` - Bucket lifecycle policies +- `versioning` - Object versioning +- `s3website` - Static website hosting +- `bucket_logging` - Bucket logging +- `encryption` / `sse_s3` - Server-side encryption +- `cloud_transition` / `cloud_restore` - Cloud storage transitions +- `lifecycle_expiration` / `lifecycle_transition` - Lifecycle operations + +This filtering: +1. Reduces test execution time significantly (from 1+ hour to ~10-15 minutes) +2. Focuses on features RustFS currently supports +3. Avoids hundreds of expected failures + +## Running Tests Locally + +### Single-Node Test + +```bash +# Set credentials +export S3_ACCESS_KEY=rustfsadmin +export S3_SECRET_KEY=rustfsadmin + +# Start RustFS container +docker run -d --name rustfs-single \ + --network rustfs-net \ + -e RUSTFS_ADDRESS=0.0.0.0:9000 \ + -e RUSTFS_ACCESS_KEY=$S3_ACCESS_KEY \ + -e RUSTFS_SECRET_KEY=$S3_SECRET_KEY \ + -e RUSTFS_VOLUMES="/data/rustfs0 /data/rustfs1 /data/rustfs2 /data/rustfs3" \ + rustfs-ci + +# Generate config +export S3_HOST=rustfs-single +envsubst < .github/s3tests/s3tests.conf > /tmp/s3tests.conf + +# Run tests +docker run --rm \ + --network rustfs-net \ + -v /tmp/s3tests.conf:/etc/s3tests.conf:ro \ + python:3.12-slim \ + bash -c ' + apt-get update -qq && apt-get install -y -qq git + git clone --depth 1 https://github.com/ceph/s3-tests.git /s3-tests + cd /s3-tests + pip install -q -r requirements.txt pytest-xdist + S3TEST_CONF=/etc/s3tests.conf pytest -v -n 4 \ + s3tests/functional/test_s3.py \ + -m "not lifecycle and not versioning and not s3website and not bucket_logging and not encryption and not sse_s3" + ' +``` + +## Test Results Interpretation + +- **PASSED**: Test succeeded, feature works correctly +- **FAILED**: Test failed, indicates a potential bug or incompatibility +- **ERROR**: Test setup failed (e.g., network issues, missing dependencies) +- **SKIPPED**: Test skipped due to marker filtering + +## Adding New Feature Support + +When adding support for a new S3 feature to RustFS: + +1. Remove the corresponding marker from the filter in `.github/workflows/e2e-s3tests.yml` +2. Run the tests to verify compatibility +3. Fix any failing tests +4. Update this README to reflect the newly supported feature + +## References + +- [Ceph S3 Tests Repository](https://github.com/ceph/s3-tests) +- [S3 API Compatibility](https://docs.aws.amazon.com/AmazonS3/latest/API/) +- [pytest-xdist Documentation](https://pytest-xdist.readthedocs.io/) diff --git a/.github/s3tests/s3tests.conf b/.github/s3tests/s3tests.conf index 72df037f..c7f8acf3 100644 --- a/.github/s3tests/s3tests.conf +++ b/.github/s3tests/s3tests.conf @@ -75,11 +75,11 @@ email = alt@rustfs.local # alt user_id user_id = rustfsalt -# alt AWS access key - same credentials for RustFS single-user mode -access_key = ${S3_ACCESS_KEY} +# alt AWS access key (must be different from s3 main for many tests) +access_key = ${S3_ALT_ACCESS_KEY} # alt AWS secret key -secret_key = ${S3_SECRET_KEY} +secret_key = ${S3_ALT_SECRET_KEY} #[s3 cloud] ## to run the testcases with "cloud_transition" for transition diff --git a/.github/workflows/e2e-s3tests.yml b/.github/workflows/e2e-s3tests.yml index 08e05475..bea59750 100644 --- a/.github/workflows/e2e-s3tests.yml +++ b/.github/workflows/e2e-s3tests.yml @@ -1,25 +1,39 @@ name: e2e-s3tests on: - push: - branches: [main] - paths: - - ".github/workflows/e2e-s3tests.yml" - - ".github/s3tests/**" - - "Dockerfile.source" - - "entrypoint.sh" - - "rustfs/**" - - "crates/**" workflow_dispatch: inputs: - run-multi: - description: "Run multi-node s3-tests as well" + test-mode: + description: "Test mode to run" + required: true + type: choice + default: "single" + options: + - single + - multi + xdist: + description: "Enable pytest-xdist (parallel). '0' to disable." required: false - default: "false" + default: "0" + maxfail: + description: "Stop after N failures (debug friendly)" + required: false + default: "1" + markexpr: + description: "pytest -m expression (feature filters)" + required: false + default: "not lifecycle and not versioning and not s3website and not bucket_logging and not encryption" env: + # main user S3_ACCESS_KEY: rustfsadmin S3_SECRET_KEY: rustfsadmin + # alt user (must be different from main for many s3-tests) + S3_ALT_ACCESS_KEY: rustfsalt + S3_ALT_SECRET_KEY: rustfsalt + + S3_REGION: us-east-1 + RUST_LOG: info PLATFORM: linux/amd64 @@ -29,18 +43,21 @@ defaults: jobs: s3tests-single: + if: github.event.inputs.test-mode == 'single' runs-on: ubuntu-latest - timeout-minutes: 45 + timeout-minutes: 120 steps: - uses: actions/checkout@v4 - name: Enable buildx uses: docker/setup-buildx-action@v3 - - name: Build RustFS image (source) + - name: Build RustFS image (source, cached) run: | DOCKER_BUILDKIT=1 docker buildx build --load \ --platform ${PLATFORM} \ + --cache-from type=gha \ + --cache-to type=gha,mode=max \ -t rustfs-ci \ -f Dockerfile.source . @@ -54,6 +71,7 @@ jobs: run: | docker run -d --name rustfs-single \ --network rustfs-net \ + -p 9000:9000 \ -e RUSTFS_ADDRESS=0.0.0.0:9000 \ -e RUSTFS_ACCESS_KEY=$S3_ACCESS_KEY \ -e RUSTFS_SECRET_KEY=$S3_SECRET_KEY \ @@ -63,9 +81,8 @@ jobs: - name: Wait for RustFS ready run: | - for i in {1..30}; do - if docker run --rm --network rustfs-net curlimages/curl:latest \ - -sf http://rustfs-single:9000/health >/dev/null 2>&1; then + for i in {1..60}; do + if curl -sf http://127.0.0.1:9000/health >/dev/null 2>&1; then echo "RustFS is ready" exit 0 fi @@ -75,11 +92,53 @@ jobs: docker logs rustfs-single || true exit 1 fi + sleep 2 done - echo "Health check failed; container is running, proceeding with caution" >&2 + echo "Health check timed out" >&2 docker logs rustfs-single || true + exit 1 + + - name: Generate s3tests config + run: | + export S3_HOST=127.0.0.1 + envsubst < .github/s3tests/s3tests.conf > s3tests.conf + + - name: Provision s3-tests alt user (required by suite) + run: | + python3 -m pip install --user --upgrade pip awscurl + export PATH="$HOME/.local/bin:$PATH" + + # Admin API requires AWS SigV4 signing. awscurl is used by RustFS codebase as well. + awscurl \ + --service s3 \ + --region "${S3_REGION}" \ + --access_key "${S3_ACCESS_KEY}" \ + --secret_key "${S3_SECRET_KEY}" \ + -X PUT \ + -H 'Content-Type: application/json' \ + -d '{"secretKey":"'"${S3_ALT_SECRET_KEY}"'","status":"enabled","policy":"readwrite"}' \ + "http://127.0.0.1:9000/rustfs/admin/v3/add-user?accessKey=${S3_ALT_ACCESS_KEY}" + + # Explicitly attach built-in policy via policy mapping. + # s3-tests relies on alt client being able to ListBuckets during setup cleanup. + awscurl \ + --service s3 \ + --region "${S3_REGION}" \ + --access_key "${S3_ACCESS_KEY}" \ + --secret_key "${S3_SECRET_KEY}" \ + -X PUT \ + "http://127.0.0.1:9000/rustfs/admin/v3/set-user-or-group-policy?policyName=readwrite&userOrGroup=${S3_ALT_ACCESS_KEY}&isGroup=false" + + # Sanity check: alt user can list buckets (should not be AccessDenied). + awscurl \ + --service s3 \ + --region "${S3_REGION}" \ + --access_key "${S3_ALT_ACCESS_KEY}" \ + --secret_key "${S3_ALT_SECRET_KEY}" \ + -X GET \ + "http://127.0.0.1:9000/" >/dev/null - name: Prepare s3-tests run: | @@ -87,67 +146,72 @@ jobs: export PATH="$HOME/.local/bin:$PATH" git clone --depth 1 https://github.com/ceph/s3-tests.git s3-tests - - name: Generate s3tests config - run: | - export S3_HOST=rustfs-single - envsubst < .github/s3tests/s3tests.conf > s3tests.conf - echo "Generated s3tests.conf:" - cat s3tests.conf - - - name: Run ceph s3-tests (S3-compatible subset) + - name: Run ceph s3-tests (debug friendly) run: | export PATH="$HOME/.local/bin:$PATH" mkdir -p artifacts/s3tests-single + cd s3-tests - # Check available test directories - echo "Available test directories:" - ls -la s3tests*/functional/ 2>/dev/null || echo "No s3tests directories found" + set -o pipefail - # Use s3tests_boto3 if available, fallback to s3tests - if [ -f "s3tests_boto3/functional/test_s3.py" ]; then - TEST_FILE="s3tests_boto3/functional/test_s3.py" - else - TEST_FILE="s3tests/functional/test_s3.py" + MAXFAIL="${{ github.event.inputs.maxfail }}" + if [ -z "$MAXFAIL" ]; then MAXFAIL="1"; fi + + MARKEXPR="${{ github.event.inputs.markexpr }}" + if [ -z "$MARKEXPR" ]; then MARKEXPR="not lifecycle and not versioning and not s3website and not bucket_logging and not encryption"; fi + + XDIST="${{ github.event.inputs.xdist }}" + if [ -z "$XDIST" ]; then XDIST="0"; fi + XDIST_ARGS="" + if [ "$XDIST" != "0" ]; then + # Add pytest-xdist to requirements.txt so tox installs it inside + # its virtualenv. Installing outside tox does NOT work. + echo "pytest-xdist" >> requirements.txt + XDIST_ARGS="-n $XDIST --dist=loadgroup" fi - echo "Using test file: $TEST_FILE" + # Run tests from s3tests/functional (boto2+boto3 combined directory). S3TEST_CONF=${GITHUB_WORKSPACE}/s3tests.conf \ tox -- \ - -v \ - --tb=short \ + -vv -ra --showlocals --tb=long \ + --maxfail="$MAXFAIL" \ --junitxml=${GITHUB_WORKSPACE}/artifacts/s3tests-single/junit.xml \ - "$TEST_FILE" \ - -k 'not lifecycle and not versioning and not website and not logging and not encryption' + $XDIST_ARGS \ + s3tests/functional/test_s3.py \ + -m "$MARKEXPR" \ + 2>&1 | tee ${GITHUB_WORKSPACE}/artifacts/s3tests-single/pytest.log - name: Collect RustFS logs if: always() run: | mkdir -p artifacts/rustfs-single docker logs rustfs-single > artifacts/rustfs-single/rustfs.log 2>&1 || true + docker inspect rustfs-single > artifacts/rustfs-single/inspect.json || true - name: Upload artifacts - if: always() + if: always() && env.ACT != 'true' uses: actions/upload-artifact@v4 with: name: s3tests-single path: artifacts/** s3tests-multi: - if: github.event_name == 'workflow_dispatch' && github.event.inputs.run-multi == 'true' - needs: s3tests-single + if: github.event_name == 'workflow_dispatch' && github.event.inputs.test-mode == 'multi' runs-on: ubuntu-latest - timeout-minutes: 60 + timeout-minutes: 150 steps: - uses: actions/checkout@v4 - name: Enable buildx uses: docker/setup-buildx-action@v3 - - name: Build RustFS image (source) + - name: Build RustFS image (source, cached) run: | DOCKER_BUILDKIT=1 docker buildx build --load \ --platform ${PLATFORM} \ + --cache-from type=gha \ + --cache-to type=gha,mode=max \ -t rustfs-ci \ -f Dockerfile.source . @@ -241,9 +305,8 @@ jobs: - name: Wait for LB ready run: | - for i in {1..60}; do - if docker run --rm --network rustfs-net curlimages/curl \ - -sf http://lb:9000/health >/dev/null 2>&1; then + for i in {1..90}; do + if curl -sf http://127.0.0.1:9000/health >/dev/null 2>&1; then echo "Load balancer is ready" exit 0 fi @@ -255,32 +318,81 @@ jobs: - name: Generate s3tests config run: | - export S3_HOST=lb + export S3_HOST=127.0.0.1 envsubst < .github/s3tests/s3tests.conf > s3tests.conf - echo "Generated s3tests.conf:" - cat s3tests.conf - - name: Run ceph s3-tests (multi, S3-compatible subset) + - name: Provision s3-tests alt user (required by suite) run: | + python3 -m pip install --user --upgrade pip awscurl + export PATH="$HOME/.local/bin:$PATH" + + awscurl \ + --service s3 \ + --region "${S3_REGION}" \ + --access_key "${S3_ACCESS_KEY}" \ + --secret_key "${S3_SECRET_KEY}" \ + -X PUT \ + -H 'Content-Type: application/json' \ + -d '{"secretKey":"'"${S3_ALT_SECRET_KEY}"'","status":"enabled","policy":"readwrite"}' \ + "http://127.0.0.1:9000/rustfs/admin/v3/add-user?accessKey=${S3_ALT_ACCESS_KEY}" + + awscurl \ + --service s3 \ + --region "${S3_REGION}" \ + --access_key "${S3_ACCESS_KEY}" \ + --secret_key "${S3_SECRET_KEY}" \ + -X PUT \ + "http://127.0.0.1:9000/rustfs/admin/v3/set-user-or-group-policy?policyName=readwrite&userOrGroup=${S3_ALT_ACCESS_KEY}&isGroup=false" + + awscurl \ + --service s3 \ + --region "${S3_REGION}" \ + --access_key "${S3_ALT_ACCESS_KEY}" \ + --secret_key "${S3_ALT_SECRET_KEY}" \ + -X GET \ + "http://127.0.0.1:9000/" >/dev/null + + - name: Prepare s3-tests + run: | + python3 -m pip install --user --upgrade pip tox + export PATH="$HOME/.local/bin:$PATH" + git clone --depth 1 https://github.com/ceph/s3-tests.git s3-tests + + - name: Run ceph s3-tests (multi, debug friendly) + run: | + export PATH="$HOME/.local/bin:$PATH" mkdir -p artifacts/s3tests-multi - docker run --rm --network rustfs-net \ - --platform ${PLATFORM} \ - -e S3TEST_CONF=/tmp/s3tests.conf \ - -v ${GITHUB_WORKSPACE}/s3tests.conf:/tmp/s3tests.conf:ro \ - -v ${GITHUB_WORKSPACE}/artifacts/s3tests-multi:/mnt/logs \ - quay.io/ceph/s3-tests:latest \ - bash -c ' - if [ -f "s3tests_boto3/functional/test_s3.py" ]; then - TEST_FILE="s3tests_boto3/functional/test_s3.py" - else - TEST_FILE="s3tests/functional/test_s3.py" - fi - echo "Using test file: $TEST_FILE" - pytest -v --tb=short \ - --junitxml=/mnt/logs/junit.xml \ - "$TEST_FILE" \ - -k "not lifecycle and not versioning and not website and not logging and not encryption" - ' + + cd s3-tests + + set -o pipefail + + MAXFAIL="${{ github.event.inputs.maxfail }}" + if [ -z "$MAXFAIL" ]; then MAXFAIL="1"; fi + + MARKEXPR="${{ github.event.inputs.markexpr }}" + if [ -z "$MARKEXPR" ]; then MARKEXPR="not lifecycle and not versioning and not s3website and not bucket_logging and not encryption"; fi + + XDIST="${{ github.event.inputs.xdist }}" + if [ -z "$XDIST" ]; then XDIST="0"; fi + XDIST_ARGS="" + if [ "$XDIST" != "0" ]; then + # Add pytest-xdist to requirements.txt so tox installs it inside + # its virtualenv. Installing outside tox does NOT work. + echo "pytest-xdist" >> requirements.txt + XDIST_ARGS="-n $XDIST --dist=loadgroup" + fi + + # Run tests from s3tests/functional (boto2+boto3 combined directory). + S3TEST_CONF=${GITHUB_WORKSPACE}/s3tests.conf \ + tox -- \ + -vv -ra --showlocals --tb=long \ + --maxfail="$MAXFAIL" \ + --junitxml=${GITHUB_WORKSPACE}/artifacts/s3tests-multi/junit.xml \ + $XDIST_ARGS \ + s3tests/functional/test_s3.py \ + -m "$MARKEXPR" \ + 2>&1 | tee ${GITHUB_WORKSPACE}/artifacts/s3tests-multi/pytest.log - name: Collect logs if: always() @@ -289,7 +401,7 @@ jobs: docker compose -f compose.yml logs --no-color > artifacts/cluster/cluster.log 2>&1 || true - name: Upload artifacts - if: always() + if: always() && env.ACT != 'true' uses: actions/upload-artifact@v4 with: name: s3tests-multi diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index da069ad9..42ce4a01 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -139,6 +139,7 @@ use tokio_stream::wrappers::ReceiverStream; use tokio_tar::Archive; use tokio_util::io::{ReaderStream, StreamReader}; use tracing::{debug, error, info, instrument, warn}; +use urlencoding::encode; use uuid::Uuid; macro_rules! try_ { @@ -793,6 +794,9 @@ impl S3 for FS { key, server_side_encryption: requested_sse, ssekms_key_id: requested_kms_key_id, + sse_customer_algorithm, + sse_customer_key, + sse_customer_key_md5, .. } = req.input.clone(); let (src_bucket, src_key, version_id) = match copy_source { @@ -940,6 +944,44 @@ impl S3 for FS { } } + // Apply SSE-C encryption if customer-provided key is specified + if let (Some(sse_alg), Some(sse_key), Some(sse_md5)) = (&sse_customer_algorithm, &sse_customer_key, &sse_customer_key_md5) + { + if sse_alg.as_str() == "AES256" { + let key_bytes = BASE64_STANDARD.decode(sse_key.as_str()).map_err(|e| { + error!("Failed to decode SSE-C key: {}", e); + ApiError::from(StorageError::other("Invalid SSE-C key")) + })?; + + if key_bytes.len() != 32 { + return Err(ApiError::from(StorageError::other("SSE-C key must be 32 bytes")).into()); + } + + let computed_md5 = BASE64_STANDARD.encode(md5::compute(&key_bytes).0); + if computed_md5 != sse_md5.as_str() { + return Err(ApiError::from(StorageError::other("SSE-C key MD5 mismatch")).into()); + } + + // Store original size before encryption + src_info + .user_defined + .insert("x-amz-server-side-encryption-customer-original-size".to_string(), actual_size.to_string()); + + // SAFETY: The length of `key_bytes` is checked to be 32 bytes above, + // so this conversion cannot fail. + let key_array: [u8; 32] = key_bytes.try_into().expect("key length already checked"); + // Generate deterministic nonce from bucket-key + let nonce_source = format!("{bucket}-{key}"); + let nonce_hash = md5::compute(nonce_source.as_bytes()); + let nonce: [u8; 12] = nonce_hash.0[..12] + .try_into() + .expect("MD5 hash is always 16 bytes; taking first 12 bytes for nonce is safe"); + + let encrypt_reader = EncryptReader::new(reader, key_array, nonce); + reader = HashReader::new(Box::new(encrypt_reader), -1, actual_size, None, None, false).map_err(ApiError::from)?; + } + } + src_info.put_object_reader = Some(PutObjReader::new(reader)); // check quota @@ -949,6 +991,19 @@ impl S3 for FS { src_info.user_defined.insert(k, v); } + // Store SSE-C metadata for GET responses + if let Some(ref sse_alg) = sse_customer_algorithm { + src_info.user_defined.insert( + "x-amz-server-side-encryption-customer-algorithm".to_string(), + sse_alg.as_str().to_string(), + ); + } + if let Some(ref sse_md5) = sse_customer_key_md5 { + src_info + .user_defined + .insert("x-amz-server-side-encryption-customer-key-md5".to_string(), sse_md5.clone()); + } + // TODO: src tags let oi = store @@ -979,6 +1034,8 @@ impl S3 for FS { copy_object_result: Some(copy_object_result), server_side_encryption: effective_sse, ssekms_key_id: effective_kms_key_id, + sse_customer_algorithm, + sse_customer_key_md5, ..Default::default() }; @@ -2037,8 +2094,8 @@ impl S3 for FS { let mut key_array = [0u8; 32]; key_array.copy_from_slice(&key_bytes[..32]); - // Verify MD5 hash of the key matches what we expect - let computed_md5 = format!("{:x}", md5::compute(&key_bytes)); + // Verify MD5 hash of the key matches what the client claims + let computed_md5 = BASE64_STANDARD.encode(md5::compute(&key_bytes).0); if computed_md5 != *sse_key_md5_provided { return Err(ApiError::from(StorageError::other("SSE-C key MD5 mismatch")).into()); } @@ -2605,16 +2662,52 @@ impl S3 for FS { async fn list_objects(&self, req: S3Request) -> S3Result> { let v2_resp = self.list_objects_v2(req.map_input(Into::into)).await?; - Ok(v2_resp.map_output(|v2| ListObjectsOutput { - contents: v2.contents, - delimiter: v2.delimiter, - encoding_type: v2.encoding_type, - name: v2.name, - prefix: v2.prefix, - max_keys: v2.max_keys, - common_prefixes: v2.common_prefixes, - is_truncated: v2.is_truncated, - ..Default::default() + Ok(v2_resp.map_output(|v2| { + // For ListObjects (v1) API, NextMarker should be the last item returned when truncated + // When both Contents and CommonPrefixes are present, NextMarker should be the + // lexicographically last item (either last key or last prefix) + let next_marker = if v2.is_truncated.unwrap_or(false) { + let last_key = v2 + .contents + .as_ref() + .and_then(|contents| contents.last()) + .and_then(|obj| obj.key.as_ref()) + .cloned(); + + let last_prefix = v2 + .common_prefixes + .as_ref() + .and_then(|prefixes| prefixes.last()) + .and_then(|prefix| prefix.prefix.as_ref()) + .cloned(); + + // NextMarker should be the lexicographically last item + // This matches Ceph S3 behavior used by s3-tests + match (last_key, last_prefix) { + (Some(k), Some(p)) => { + // Return the lexicographically greater one + if k > p { Some(k) } else { Some(p) } + } + (Some(k), None) => Some(k), + (None, Some(p)) => Some(p), + (None, None) => None, + } + } else { + None + }; + + ListObjectsOutput { + contents: v2.contents, + delimiter: v2.delimiter, + encoding_type: v2.encoding_type, + name: v2.name, + prefix: v2.prefix, + max_keys: v2.max_keys, + common_prefixes: v2.common_prefixes, + is_truncated: v2.is_truncated, + next_marker, + ..Default::default() + } })) } @@ -2625,6 +2718,7 @@ impl S3 for FS { bucket, continuation_token, delimiter, + encoding_type, fetch_owner, max_keys, prefix, @@ -2687,13 +2781,31 @@ impl S3 for FS { // warn!("object_infos objects {:?}", object_infos.objects); + // Apply URL encoding if encoding_type is "url" + // Note: S3 URL encoding should encode special characters but preserve path separators (/) + let should_encode = encoding_type.as_ref().map(|e| e.as_str() == "url").unwrap_or(false); + + // Helper function to encode S3 keys/prefixes (preserving /) + // S3 URL encoding encodes special characters but keeps '/' unencoded + let encode_s3_name = |name: &str| -> String { + name.split('/') + .map(|part| encode(part).to_string()) + .collect::>() + .join("/") + }; + let objects: Vec = object_infos .objects .iter() .filter(|v| !v.name.is_empty()) .map(|v| { + let key = if should_encode { + encode_s3_name(&v.name) + } else { + v.name.to_owned() + }; let mut obj = Object { - key: Some(v.name.to_owned()), + key: Some(key), last_modified: v.mod_time.map(Timestamp::from), size: Some(v.get_actual_size().unwrap_or_default()), e_tag: v.etag.clone().map(|etag| to_s3s_etag(&etag)), @@ -2711,14 +2823,18 @@ impl S3 for FS { }) .collect(); - let key_count = objects.len() as i32; - - let common_prefixes = object_infos + let common_prefixes: Vec = object_infos .prefixes .into_iter() - .map(|v| CommonPrefix { prefix: Some(v) }) + .map(|v| { + let prefix = if should_encode { encode_s3_name(&v) } else { v }; + CommonPrefix { prefix: Some(prefix) } + }) .collect(); + // KeyCount should include both objects and common prefixes per S3 API spec + let key_count = (objects.len() + common_prefixes.len()) as i32; + // Encode next_continuation_token to base64 let next_continuation_token = object_infos .next_continuation_token @@ -2732,6 +2848,7 @@ impl S3 for FS { max_keys: Some(max_keys), contents: Some(objects), delimiter, + encoding_type: encoding_type.clone(), name: Some(bucket), prefix: Some(prefix), common_prefixes: Some(common_prefixes), @@ -2779,7 +2896,7 @@ impl S3 for FS { key: Some(v.name.to_owned()), last_modified: v.mod_time.map(Timestamp::from), size: Some(v.size), - version_id: v.version_id.map(|v| v.to_string()), + version_id: Some(v.version_id.map(|v| v.to_string()).unwrap_or_else(|| "null".to_string())), is_latest: Some(v.is_latest), e_tag: v.etag.clone().map(|etag| to_s3s_etag(&etag)), storage_class: v.storage_class.clone().map(ObjectVersionStorageClass::from), @@ -2802,13 +2919,17 @@ impl S3 for FS { .filter(|o| o.delete_marker) .map(|o| DeleteMarkerEntry { key: Some(o.name.clone()), - version_id: o.version_id.map(|v| v.to_string()), + version_id: Some(o.version_id.map(|v| v.to_string()).unwrap_or_else(|| "null".to_string())), is_latest: Some(o.is_latest), last_modified: o.mod_time.map(Timestamp::from), ..Default::default() }) .collect::>(); + // Only set next_version_id_marker if it has a value, per AWS S3 API spec + // boto3 expects it to be a string or omitted, not None + let next_version_id_marker = object_infos.next_version_idmarker.filter(|v| !v.is_empty()); + let output = ListObjectVersionsOutput { is_truncated: Some(object_infos.is_truncated), max_keys: Some(key_count), @@ -2818,6 +2939,8 @@ impl S3 for FS { common_prefixes: Some(common_prefixes), versions: Some(objects), delete_markers: Some(delete_markers), + next_key_marker: object_infos.next_marker, + next_version_id_marker, ..Default::default() }; @@ -3077,8 +3200,8 @@ impl S3 for FS { let mut key_array = [0u8; 32]; key_array.copy_from_slice(&key_bytes[..32]); - // Verify MD5 hash of the key - let computed_md5 = format!("{:x}", md5::compute(&key_bytes)); + // Verify MD5 hash of the key matches what the client claims + let computed_md5 = BASE64_STANDARD.encode(md5::compute(&key_bytes).0); if computed_md5 != *sse_key_md5_provided { return Err(ApiError::from(StorageError::other("SSE-C key MD5 mismatch")).into()); } @@ -3514,8 +3637,8 @@ impl S3 for FS { let mut key_array = [0u8; 32]; key_array.copy_from_slice(&key_bytes[..32]); - // Verify MD5 hash of the key - let computed_md5 = format!("{:x}", md5::compute(&key_bytes)); + // Verify MD5 hash of the key matches what the client claims + let computed_md5 = BASE64_STANDARD.encode(md5::compute(&key_bytes).0); if computed_md5 != *sse_key_md5_provided { return Err(ApiError::from(StorageError::other("SSE-C key MD5 mismatch")).into()); } @@ -5626,6 +5749,60 @@ mod tests { // and various dependencies that make unit testing challenging. For comprehensive testing // of S3 operations, integration tests would be more appropriate. + #[test] + fn test_list_objects_v2_key_count_includes_prefixes() { + // Test that KeyCount calculation includes both objects and common prefixes + // This verifies the fix for S3 API compatibility where KeyCount should equal + // the sum of Contents and CommonPrefixes lengths + + // Simulate the calculation logic from list_objects_v2 + let objects_count = 3_usize; + let common_prefixes_count = 2_usize; + + // KeyCount should include both objects and common prefixes per S3 API spec + let key_count = (objects_count + common_prefixes_count) as i32; + + assert_eq!(key_count, 5); + + // Edge cases: verify calculation logic + let no_objects = 0_usize; + let no_prefixes = 0_usize; + assert_eq!((no_objects + no_prefixes) as i32, 0); + + let one_object = 1_usize; + assert_eq!((one_object + no_prefixes) as i32, 1); + + let one_prefix = 1_usize; + assert_eq!((no_objects + one_prefix) as i32, 1); + } + + #[test] + fn test_s3_url_encoding_preserves_slash() { + // Test that S3 URL encoding preserves path separators (/) + // This verifies the encoding logic for EncodingType=url parameter + + use urlencoding::encode; + + // Helper function matching the implementation + let encode_s3_name = |name: &str| -> String { + name.split('/') + .map(|part| encode(part).to_string()) + .collect::>() + .join("/") + }; + + // Test cases from s3-tests + assert_eq!(encode_s3_name("asdf+b"), "asdf%2Bb"); + assert_eq!(encode_s3_name("foo+1/bar"), "foo%2B1/bar"); + assert_eq!(encode_s3_name("foo/"), "foo/"); + assert_eq!(encode_s3_name("quux ab/"), "quux%20ab/"); + + // Edge cases + assert_eq!(encode_s3_name("normal/key"), "normal/key"); + assert_eq!(encode_s3_name("key+with+plus"), "key%2Bwith%2Bplus"); + assert_eq!(encode_s3_name("key with spaces"), "key%20with%20spaces"); + } + #[test] fn test_s3_error_scenarios() { // Test that we can create expected S3 errors for common validation cases From 46557cddd13e4da94bc7b0ab142b6bc445e75385 Mon Sep 17 00:00:00 2001 From: Muhammed Hussain Karimi Date: Thu, 18 Dec 2025 15:43:24 +0330 Subject: [PATCH 02/26] :technologist: Improve shebang compatibility (#1180) Signed-off-by: Muhammed Hussain Karimi --- .envrc | 1 + .gitignore | 1 + build-rustfs.sh | 2 +- crates/ecstore/run_benchmarks.sh | 2 +- docker-buildx.sh | 2 +- docs/console-separation.md | 2 +- docs/examples/docker/docker-quickstart.sh | 2 +- docs/examples/docker/enhanced-docker-deployment.sh | 2 +- docs/examples/docker/enhanced-security-deployment.sh | 2 +- docs/examples/mnmd/test-deployment.sh | 2 +- scripts/dev_deploy.sh | 2 +- scripts/dev_rustfs.sh | 2 +- scripts/e2e-run.sh | 3 ++- scripts/install-flatc.sh | 2 +- scripts/install-protoc.sh | 2 +- scripts/notify.sh | 3 ++- scripts/run.sh | 4 +++- scripts/run_e2e_tests.sh | 2 +- scripts/run_scanner_benchmarks.sh | 2 +- scripts/setup-test-binaries.sh | 2 +- scripts/test.sh | 2 +- scripts/test/delete_xldir.sh | 2 +- scripts/test/delete_xldir_simple.sh | 2 +- 23 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 .envrc diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..8392d159 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake \ No newline at end of file diff --git a/.gitignore b/.gitignore index f4be8260..c5218d5f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ .DS_Store .idea .vscode +.direnv/ /test /logs /data diff --git a/build-rustfs.sh b/build-rustfs.sh index 651ef735..51e2383c 100755 --- a/build-rustfs.sh +++ b/build-rustfs.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # RustFS Binary Build Script # This script compiles RustFS binaries for different platforms and architectures diff --git a/crates/ecstore/run_benchmarks.sh b/crates/ecstore/run_benchmarks.sh index cf6988e0..7e5266c3 100755 --- a/crates/ecstore/run_benchmarks.sh +++ b/crates/ecstore/run_benchmarks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/docker-buildx.sh b/docker-buildx.sh index d5770078..ed19c077 100755 --- a/docker-buildx.sh +++ b/docker-buildx.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e diff --git a/docs/console-separation.md b/docs/console-separation.md index 8b6b3861..7795b4fd 100644 --- a/docs/console-separation.md +++ b/docs/console-separation.md @@ -1068,7 +1068,7 @@ curl http://localhost:9001/health #### Docker Migration Example ```bash -#!/bin/bash +#!/usr/bin/env bash # migrate-docker.sh # Stop old container diff --git a/docs/examples/docker/docker-quickstart.sh b/docs/examples/docker/docker-quickstart.sh index 03ceb78a..a83da686 100755 --- a/docs/examples/docker/docker-quickstart.sh +++ b/docs/examples/docker/docker-quickstart.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # RustFS Docker Quick Start Script # This script provides easy deployment commands for different scenarios diff --git a/docs/examples/docker/enhanced-docker-deployment.sh b/docs/examples/docker/enhanced-docker-deployment.sh index 0baefda4..aa6f5ee8 100755 --- a/docs/examples/docker/enhanced-docker-deployment.sh +++ b/docs/examples/docker/enhanced-docker-deployment.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # RustFS Enhanced Docker Deployment Examples # This script demonstrates various deployment scenarios for RustFS with console separation diff --git a/docs/examples/docker/enhanced-security-deployment.sh b/docs/examples/docker/enhanced-security-deployment.sh index d5c2aa33..63c401ae 100755 --- a/docs/examples/docker/enhanced-security-deployment.sh +++ b/docs/examples/docker/enhanced-security-deployment.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # RustFS Enhanced Security Deployment Script # This script demonstrates production-ready deployment with enhanced security features diff --git a/docs/examples/mnmd/test-deployment.sh b/docs/examples/mnmd/test-deployment.sh index 89c3b9e3..5433632a 100755 --- a/docs/examples/mnmd/test-deployment.sh +++ b/docs/examples/mnmd/test-deployment.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/dev_deploy.sh b/scripts/dev_deploy.sh index 23da85a0..c73b9ce1 100755 --- a/scripts/dev_deploy.sh +++ b/scripts/dev_deploy.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/dev_rustfs.sh b/scripts/dev_rustfs.sh index 11ce4389..7a69e1e2 100644 --- a/scripts/dev_rustfs.sh +++ b/scripts/dev_rustfs.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/e2e-run.sh b/scripts/e2e-run.sh index 9127fd0c..b518c598 100755 --- a/scripts/e2e-run.sh +++ b/scripts/e2e-run.sh @@ -1,4 +1,5 @@ -#!/bin/bash -ex +#!/usr/bin/env bash +set -ex # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/install-flatc.sh b/scripts/install-flatc.sh index 1f95a9cc..b787b8a4 100755 --- a/scripts/install-flatc.sh +++ b/scripts/install-flatc.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Install flatc 25.9.23 on macOS set -e diff --git a/scripts/install-protoc.sh b/scripts/install-protoc.sh index dfb52a0a..3d85cf21 100755 --- a/scripts/install-protoc.sh +++ b/scripts/install-protoc.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Install protoc 33.1 on macOS set -e diff --git a/scripts/notify.sh b/scripts/notify.sh index 49aedaf7..1acbcea2 100755 --- a/scripts/notify.sh +++ b/scripts/notify.sh @@ -1,4 +1,5 @@ -#!/bin/bash -e +#!/usr/bin/env bash +set -e # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/run.sh b/scripts/run.sh index 0dc3a32a..d3e99945 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -1,4 +1,6 @@ -#!/bin/bash -e +#!/usr/bin/env bash +set -e + # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/run_e2e_tests.sh b/scripts/run_e2e_tests.sh index c9e0894d..754782f1 100755 --- a/scripts/run_e2e_tests.sh +++ b/scripts/run_e2e_tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # E2E Test Runner Script # Automatically starts RustFS instance, runs tests, and cleans up diff --git a/scripts/run_scanner_benchmarks.sh b/scripts/run_scanner_benchmarks.sh index bbf68530..dce92f2b 100755 --- a/scripts/run_scanner_benchmarks.sh +++ b/scripts/run_scanner_benchmarks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Scanner performance benchmark runner # Usage: ./scripts/run_scanner_benchmarks.sh [test_type] [quick] diff --git a/scripts/setup-test-binaries.sh b/scripts/setup-test-binaries.sh index f3f01662..fa2389b0 100755 --- a/scripts/setup-test-binaries.sh +++ b/scripts/setup-test-binaries.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Setup test binaries for Docker build testing # This script creates temporary binary files for testing Docker build process diff --git a/scripts/test.sh b/scripts/test.sh index b4e1c68a..cca9e750 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2024 RustFS Team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/test/delete_xldir.sh b/scripts/test/delete_xldir.sh index 8b6896cd..ad422668 100755 --- a/scripts/test/delete_xldir.sh +++ b/scripts/test/delete_xldir.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Delete all directories ending with __XLDIR__ in the specified path diff --git a/scripts/test/delete_xldir_simple.sh b/scripts/test/delete_xldir_simple.sh index 04d4406e..493e88e6 100755 --- a/scripts/test/delete_xldir_simple.sh +++ b/scripts/test/delete_xldir_simple.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Simple version: Delete all directories ending with __XLDIR__ in the specified path From a0b2f5a2320a20383b900d78e06450601519c106 Mon Sep 17 00:00:00 2001 From: loverustfs Date: Thu, 18 Dec 2025 22:23:25 +0800 Subject: [PATCH 03/26] self-host self-host Signed-off-by: loverustfs --- .github/workflows/ci.yml | 41 +++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af1e0024..af5731c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,17 +62,25 @@ on: permissions: contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 + + CARGO_BUILD_JOBS: 4 jobs: + skip-check: name: Skip Duplicate Actions permissions: actions: write contents: read - runs-on: ubuntu-latest + runs-on: ubuntu-latest outputs: should_skip: ${{ steps.skip_check.outputs.should_skip }} steps: @@ -83,13 +91,12 @@ jobs: concurrent_skipping: "same_content_newer" cancel_others: true paths_ignore: '["*.md", "docs/**", "deploy/**"]' - # Never skip release events and tag pushes do_not_skip: '["workflow_dispatch", "schedule", "merge_group", "release", "push"]' - typos: name: Typos - runs-on: ubuntu-latest + + runs-on: [self-hosted, linux, x64] steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -100,11 +107,12 @@ jobs: name: Test and Lint needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubuntu-latest + + runs-on: [self-hosted, linux, x64] timeout-minutes: 60 steps: - - name: Delete huge unnecessary tools folder - run: rm -rf /opt/hostedtoolcache + + - name: Checkout repository uses: actions/checkout@v6 @@ -112,11 +120,17 @@ jobs: uses: ./.github/actions/setup with: rust-version: stable + cache-shared-key: ci-test-${{ hashFiles('**/Cargo.lock') }} github-token: ${{ secrets.GITHUB_TOKEN }} cache-save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Install cargo-nextest + uses: taiki-e/install-action@nextest + - name: Run tests + run: | cargo nextest run --all --exclude e2e_test cargo test --all --doc @@ -131,12 +145,20 @@ jobs: name: End-to-End Tests needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubuntu-latest + + runs-on: [self-hosted, linux, x64] timeout-minutes: 30 steps: - name: Checkout repository uses: actions/checkout@v6 + + - name: Clean up previous test run + run: | + rm -rf /tmp/rustfs + rm -f /tmp/rustfs.log + # 如果有 docker 容器残留,也建议清理 + - name: Setup Rust environment uses: ./.github/actions/setup with: @@ -155,7 +177,8 @@ jobs: - name: Build debug binary run: | touch rustfs/build.rs - cargo build -p rustfs --bins + # 限制并发,防止 build --bins 导致 OOM + cargo build -p rustfs --bins --jobs 4 - name: Run end-to-end tests run: | From 1d111464f9e77c7209bb55cfbcc6719313c20493 Mon Sep 17 00:00:00 2001 From: loverustfs Date: Fri, 19 Dec 2025 09:15:26 +0800 Subject: [PATCH 04/26] Return to GitHub hosting Return to GitHub hosting Signed-off-by: loverustfs --- .github/workflows/ci.yml | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af5731c8..2aa60f5c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -62,7 +62,6 @@ on: permissions: contents: read - concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -70,7 +69,6 @@ concurrency: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 - CARGO_BUILD_JOBS: 4 jobs: @@ -95,10 +93,9 @@ jobs: typos: name: Typos - - runs-on: [self-hosted, linux, x64] + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - name: Typos check with custom config file uses: crate-ci/typos@master @@ -107,30 +104,24 @@ jobs: name: Test and Lint needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - - runs-on: [self-hosted, linux, x64] + runs-on: ubuntu-latest timeout-minutes: 60 steps: - - - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@v4 - name: Setup Rust environment uses: ./.github/actions/setup with: rust-version: stable - cache-shared-key: ci-test-${{ hashFiles('**/Cargo.lock') }} github-token: ${{ secrets.GITHUB_TOKEN }} cache-save-if: ${{ github.ref == 'refs/heads/main' }} - - name: Install cargo-nextest uses: taiki-e/install-action@nextest - name: Run tests - run: | cargo nextest run --all --exclude e2e_test cargo test --all --doc @@ -145,19 +136,16 @@ jobs: name: End-to-End Tests needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - - runs-on: [self-hosted, linux, x64] + runs-on: ubuntu-latest timeout-minutes: 30 steps: - name: Checkout repository - uses: actions/checkout@v6 - + uses: actions/checkout@v4 - name: Clean up previous test run run: | rm -rf /tmp/rustfs rm -f /tmp/rustfs.log - # 如果有 docker 容器残留,也建议清理 - name: Setup Rust environment uses: ./.github/actions/setup @@ -177,7 +165,7 @@ jobs: - name: Build debug binary run: | touch rustfs/build.rs - # 限制并发,防止 build --bins 导致 OOM + # Limit concurrency to prevent OOM cargo build -p rustfs --bins --jobs 4 - name: Run end-to-end tests From 889c67f359e8f23a59c745847b4d0ad5a8969a3b Mon Sep 17 00:00:00 2001 From: loverustfs Date: Fri, 19 Dec 2025 09:42:21 +0800 Subject: [PATCH 05/26] Modify to ubicloud --- .github/workflows/audit.yml | 4 ++-- .github/workflows/build.yml | 20 ++++++++++---------- .github/workflows/ci.yml | 8 ++++---- .github/workflows/docker.yml | 6 +++--- .github/workflows/e2e-mint.yml | 4 ++-- .github/workflows/e2e-s3tests.yml | 4 ++-- .github/workflows/helm-package.yml | 4 ++-- .github/workflows/issue-translator.yml | 2 +- .github/workflows/performance.yml | 4 ++-- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 23635a1c..661ef05a 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -40,7 +40,7 @@ env: jobs: security-audit: name: Security Audit - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 15 steps: - name: Checkout repository @@ -65,7 +65,7 @@ jobs: dependency-review: name: Dependency Review - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 if: github.event_name == 'pull_request' permissions: contents: read diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5690d541..c692dffb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,7 +83,7 @@ jobs: # Build strategy check - determine build type based on trigger build-check: name: Build Strategy Check - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 outputs: should_build: ${{ steps.check.outputs.should_build }} build_type: ${{ steps.check.outputs.build_type }} @@ -167,19 +167,19 @@ jobs: matrix: include: # Linux builds - - os: ubuntu-latest + - os: ubicloud-standard-4 target: x86_64-unknown-linux-musl cross: false platform: linux - - os: ubuntu-latest + - os: ubicloud-standard-4 target: aarch64-unknown-linux-musl cross: true platform: linux - - os: ubuntu-latest + - os: ubicloud-standard-4 target: x86_64-unknown-linux-gnu cross: false platform: linux - - os: ubuntu-latest + - os: ubicloud-standard-4 target: aarch64-unknown-linux-gnu cross: true platform: linux @@ -532,7 +532,7 @@ jobs: name: Build Summary needs: [ build-check, build-rustfs ] if: always() && needs.build-check.outputs.should_build == 'true' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 steps: - name: Build completion summary shell: bash @@ -584,7 +584,7 @@ jobs: name: Create GitHub Release needs: [ build-check, build-rustfs ] if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 permissions: contents: write outputs: @@ -670,7 +670,7 @@ jobs: name: Upload Release Assets needs: [ build-check, build-rustfs, create-release ] if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 permissions: contents: write actions: read @@ -751,7 +751,7 @@ jobs: name: Update Latest Version needs: [ build-check, upload-release-assets ] if: startsWith(github.ref, 'refs/tags/') - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 steps: - name: Update latest.json env: @@ -801,7 +801,7 @@ jobs: name: Publish Release needs: [ build-check, create-release, upload-release-assets ] if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 permissions: contents: write steps: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aa60f5c..9d36100c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,7 +78,7 @@ jobs: permissions: actions: write contents: read - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 outputs: should_skip: ${{ steps.skip_check.outputs.should_skip }} steps: @@ -93,7 +93,7 @@ jobs: typos: name: Typos - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -104,7 +104,7 @@ jobs: name: Test and Lint needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 60 steps: - name: Checkout repository @@ -136,7 +136,7 @@ jobs: name: End-to-End Tests needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 30 steps: - name: Checkout repository diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 263c946d..37d41b50 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -72,7 +72,7 @@ jobs: # Check if we should build Docker images build-check: name: Docker Build Check - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 outputs: should_build: ${{ steps.check.outputs.should_build }} should_push: ${{ steps.check.outputs.should_push }} @@ -264,7 +264,7 @@ jobs: name: Build Docker Images needs: build-check if: needs.build-check.outputs.should_build == 'true' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 60 steps: - name: Checkout repository @@ -404,7 +404,7 @@ jobs: name: Docker Build Summary needs: [ build-check, build-docker ] if: always() && needs.build-check.outputs.should_build == 'true' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 steps: - name: Docker build completion summary run: | diff --git a/.github/workflows/e2e-mint.yml b/.github/workflows/e2e-mint.yml index 7be4086a..0baf7f49 100644 --- a/.github/workflows/e2e-mint.yml +++ b/.github/workflows/e2e-mint.yml @@ -23,7 +23,7 @@ env: jobs: mint-single: - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 40 steps: - name: Checkout @@ -100,7 +100,7 @@ jobs: mint-multi: if: github.event_name == 'workflow_dispatch' && github.event.inputs.run-multi == 'true' needs: mint-single - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 60 steps: - name: Checkout diff --git a/.github/workflows/e2e-s3tests.yml b/.github/workflows/e2e-s3tests.yml index bea59750..dcf99bf8 100644 --- a/.github/workflows/e2e-s3tests.yml +++ b/.github/workflows/e2e-s3tests.yml @@ -44,7 +44,7 @@ defaults: jobs: s3tests-single: if: github.event.inputs.test-mode == 'single' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 120 steps: - uses: actions/checkout@v4 @@ -198,7 +198,7 @@ jobs: s3tests-multi: if: github.event_name == 'workflow_dispatch' && github.event.inputs.test-mode == 'multi' - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 150 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/helm-package.yml b/.github/workflows/helm-package.yml index 5a231c88..ca9aec56 100644 --- a/.github/workflows/helm-package.yml +++ b/.github/workflows/helm-package.yml @@ -27,7 +27,7 @@ env: jobs: build-helm-package: - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 # Only run on successful builds triggered by tag pushes (version format: x.y.z or x.y.z-suffix) if: | github.event.workflow_run.conclusion == 'success' && @@ -63,7 +63,7 @@ jobs: retention-days: 1 publish-helm-package: - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 needs: [ build-helm-package ] steps: diff --git a/.github/workflows/issue-translator.yml b/.github/workflows/issue-translator.yml index 0cb805d4..b3c9d206 100644 --- a/.github/workflows/issue-translator.yml +++ b/.github/workflows/issue-translator.yml @@ -25,7 +25,7 @@ permissions: jobs: build: - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 steps: - uses: usthe/issues-translate-action@v2.7 with: diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 5ea7c4e2..c2b2ea6f 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -40,7 +40,7 @@ env: jobs: performance-profile: name: Performance Profiling - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 30 steps: - name: Checkout repository @@ -115,7 +115,7 @@ jobs: benchmark: name: Benchmark Tests - runs-on: ubuntu-latest + runs-on: ubicloud-standard-4 timeout-minutes: 45 steps: - name: Checkout repository From 10579530521773d4d786ae5742d80971a9e8e47b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=94=90=E5=B0=8F=E9=B8=AD?= Date: Fri, 19 Dec 2025 10:15:52 +0800 Subject: [PATCH 06/26] fix: Remove the compression check that has already been handled by `tower-http::CompressionLayer`. (#1190) Co-authored-by: houseme Co-authored-by: loverustfs --- rustfs/src/server/compress.rs | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/rustfs/src/server/compress.rs b/rustfs/src/server/compress.rs index da7a3616..9276869f 100644 --- a/rustfs/src/server/compress.rs +++ b/rustfs/src/server/compress.rs @@ -226,14 +226,19 @@ impl Default for CompressionConfig { /// - Only compresses responses that match configured file extensions OR MIME types /// - Respects minimum file size threshold /// - Always skips error responses (4xx, 5xx) to avoid Content-Length issues -/// - Skips already encoded responses (Content-Encoding header present) /// /// # Design Philosophy /// Unlike the previous blacklist approach, this whitelist approach: /// 1. Only compresses explicitly configured content types /// 2. Preserves Content-Length for all other responses (better browser UX) /// 3. Aligns with MinIO's opt-in compression behavior -/// 4. Avoids double compression by checking Content-Encoding header +/// +/// # Note on tower-http Integration +/// The `tower-http::CompressionLayer` automatically handles: +/// - Skipping responses with `Content-Encoding` header (already compressed) +/// - Skipping responses with `Content-Range` header (Range requests) +/// +/// These checks are performed before calling this predicate, so we don't need to check them here. /// /// # Extension Matching /// File extension matching works by extracting the filename from the @@ -273,19 +278,8 @@ impl Predicate for CompressionPredicate { return false; } - // Skip if content is already encoded (e.g., gzip, br, deflate, zstd) - // Re-compressing already compressed content provides no benefit and may cause issues - if let Some(content_encoding) = response.headers().get(http::header::CONTENT_ENCODING) { - if let Ok(encoding) = content_encoding.to_str() { - let encoding_lower = encoding.to_lowercase(); - // Check for common compression encodings - // "identity" means no encoding, so we can still compress - if encoding_lower != "identity" && !encoding_lower.is_empty() { - debug!("Skipping compression for already encoded response: Content-Encoding={}", encoding); - return false; - } - } - } + // Note: CONTENT_ENCODING and CONTENT_RANGE checks are handled by tower-http's + // CompressionLayer before calling this predicate, so we don't need to check them here. // Check Content-Length header for minimum size threshold if let Some(content_length) = response.headers().get(http::header::CONTENT_LENGTH) { From 4abfc9f554cb708c9fe1d909b1ee154f5662ec57 Mon Sep 17 00:00:00 2001 From: houseme Date: Fri, 19 Dec 2025 12:07:07 +0800 Subject: [PATCH 07/26] Fix/fix event 1216 (#1191) Signed-off-by: loverustfs Co-authored-by: loverustfs --- .github/workflows/ci.yml | 10 +- .github/workflows/e2e-mint.yml | 20 +- .github/workflows/e2e-s3tests.yml | 18 +- .gitignore | 3 + Cargo.lock | 163 ++++-- Cargo.toml | 6 +- crates/audit/Cargo.toml | 1 + crates/audit/src/factory.rs | 223 ++++++++ crates/audit/src/lib.rs | 1 + crates/audit/src/registry.rs | 506 ++++++++---------- crates/audit/src/system.rs | 91 +++- crates/audit/tests/integration_test.rs | 4 +- crates/audit/tests/performance_test.rs | 4 +- crates/audit/tests/system_integration_test.rs | 4 +- crates/common/src/globals.rs | 18 +- crates/config/src/audit/mod.rs | 2 +- crates/config/src/constants/env.rs | 3 +- crates/config/src/notify/mod.rs | 24 +- crates/config/src/notify/store.rs | 4 +- crates/ecstore/src/admin_server_info.rs | 4 +- crates/ecstore/src/config/audit.rs | 6 +- crates/ecstore/src/config/notify.rs | 6 +- crates/ecstore/src/metrics_realtime.rs | 6 +- crates/ecstore/src/sets.rs | 4 +- crates/ecstore/src/store.rs | 12 +- crates/notify/Cargo.toml | 1 + crates/notify/examples/webhook.rs | 13 +- crates/notify/src/factory.rs | 12 +- crates/notify/src/integration.rs | 14 +- crates/notify/src/registry.rs | 22 +- crates/protos/src/lib.rs | 6 +- crates/targets/src/event_name.rs | 2 +- crates/targets/src/target/mqtt.rs | 17 +- crates/targets/src/target/webhook.rs | 13 +- rustfs/src/main.rs | 3 +- rustfs/src/server/audit.rs | 5 +- rustfs/src/storage/ecfs.rs | 1 + rustfs/src/storage/tonic_service.rs | 14 +- scripts/run.sh | 53 +- 39 files changed, 828 insertions(+), 491 deletions(-) create mode 100644 crates/audit/src/factory.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d36100c..3c7e7662 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ concurrency: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 - CARGO_BUILD_JOBS: 4 + CARGO_BUILD_JOBS: 8 jobs: @@ -78,7 +78,7 @@ jobs: permissions: actions: write contents: read - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-4 outputs: should_skip: ${{ steps.skip_check.outputs.should_skip }} steps: @@ -95,7 +95,7 @@ jobs: name: Typos runs-on: ubicloud-standard-4 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - name: Typos check with custom config file uses: crate-ci/typos@master @@ -108,7 +108,7 @@ jobs: timeout-minutes: 60 steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Setup Rust environment uses: ./.github/actions/setup @@ -140,7 +140,7 @@ jobs: timeout-minutes: 30 steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Clean up previous test run run: | diff --git a/.github/workflows/e2e-mint.yml b/.github/workflows/e2e-mint.yml index 0baf7f49..5923cfde 100644 --- a/.github/workflows/e2e-mint.yml +++ b/.github/workflows/e2e-mint.yml @@ -1,8 +1,22 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name: e2e-mint on: push: - branches: [main] + branches: [ main ] paths: - ".github/workflows/e2e-mint.yml" - "Dockerfile.source" @@ -27,7 +41,7 @@ jobs: timeout-minutes: 40 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Enable buildx uses: docker/setup-buildx-action@v3 @@ -104,7 +118,7 @@ jobs: timeout-minutes: 60 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Enable buildx uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/e2e-s3tests.yml b/.github/workflows/e2e-s3tests.yml index dcf99bf8..e29d13aa 100644 --- a/.github/workflows/e2e-s3tests.yml +++ b/.github/workflows/e2e-s3tests.yml @@ -1,3 +1,17 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name: e2e-s3tests on: @@ -47,7 +61,7 @@ jobs: runs-on: ubicloud-standard-4 timeout-minutes: 120 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Enable buildx uses: docker/setup-buildx-action@v3 @@ -201,7 +215,7 @@ jobs: runs-on: ubicloud-standard-4 timeout-minutes: 150 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Enable buildx uses: docker/setup-buildx-action@v3 diff --git a/.gitignore b/.gitignore index c5218d5f..d0139ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ deploy/logs/*.log.* /s3-tests-local/ /s3tests.conf /s3tests.conf.* +*.events +*.audit +*.snappy \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index c88576d7..7ada333c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -644,9 +644,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.1" +version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" +checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" dependencies = [ "aws-lc-sys", "zeroize", @@ -654,9 +654,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.34.0" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" +checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" dependencies = [ "cc", "cmake", @@ -914,9 +914,9 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.8" +version = "0.61.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6864c190cbb8e30cf4b77b2c8f3b6dfffa697a09b7218d2f7cd3d4c4065a9f7" +checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" dependencies = [ "aws-smithy-types", ] @@ -942,9 +942,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.5" +version = "1.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" +checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -1337,9 +1337,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytemuck" @@ -1633,9 +1633,9 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cmake" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b042e5d8a74ae91bb0961acd039822472ec99f8ab0948cbf6d1369588f8be586" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" dependencies = [ "cc", ] @@ -2082,6 +2082,16 @@ dependencies = [ "darling_macro 0.21.3", ] +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + [[package]] name = "darling_core" version = "0.14.4" @@ -2124,6 +2134,19 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim 0.11.1", + "syn 2.0.111", +] + [[package]] name = "darling_macro" version = "0.14.4" @@ -2157,6 +2180,17 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", + "quote", + "syn 2.0.111", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -2997,7 +3031,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3267,7 +3301,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4292,7 +4326,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.1", "system-configuration", "tokio", "tower-service", @@ -4312,7 +4346,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.62.2", ] [[package]] @@ -4572,7 +4606,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4797,13 +4831,13 @@ dependencies = [ [[package]] name = "libredox" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50" dependencies = [ "bitflags 2.10.0", "libc", - "redox_syscall", + "redox_syscall 0.6.0", ] [[package]] @@ -5260,7 +5294,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5698,7 +5732,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link 0.2.1", ] @@ -5758,9 +5792,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pastey" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d6c094ee800037dff99e02cab0eaf3142826586742a270ab3d7a62656bd27a" +checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec" [[package]] name = "path-absolutize" @@ -6187,7 +6221,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.9", + "toml_edit 0.23.10+spec-1.0.0", ] [[package]] @@ -6422,7 +6456,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.35", - "socket2 0.5.10", + "socket2 0.6.1", "thiserror 2.0.17", "tokio", "tracing", @@ -6459,9 +6493,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.1", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -6614,6 +6648,15 @@ dependencies = [ "bitflags 2.10.0", ] +[[package]] +name = "redox_syscall" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec96166dafa0886eb81fe1c0a388bece180fbef2135f97c1e2cf8302e74b43b5" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "redox_users" version = "0.5.2" @@ -6791,9 +6834,9 @@ dependencies = [ [[package]] name = "rmcp" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df440eaa43f8573491ed4a5899719b6d29099500774abba12214a095a4083ed" +checksum = "528d42f8176e6e5e71ea69182b17d1d0a19a6b3b894b564678b74cd7cab13cfa" dependencies = [ "async-trait", "base64", @@ -6813,11 +6856,11 @@ dependencies = [ [[package]] name = "rmcp-macros" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ef03779cccab8337dd8617c53fce5c98ec21794febc397531555472ca28f8c3" +checksum = "e3f81daaa494eb8e985c9462f7d6ce1ab05e5299f48aafd76cdd3d8b060e6f59" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", "serde_json", @@ -7126,6 +7169,7 @@ dependencies = [ name = "rustfs-audit" version = "0.0.5" dependencies = [ + "async-trait", "chrono", "const-str", "futures", @@ -7732,7 +7776,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7786,9 +7830,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" dependencies = [ "web-time", "zeroize", @@ -8852,7 +8896,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -9165,9 +9209,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.3" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ "serde_core", ] @@ -9188,21 +9232,21 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.9" +version = "0.23.10+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ "indexmap 2.12.1", - "toml_datetime 0.7.3", + "toml_datetime 0.7.5+spec-1.1.0", "toml_parser", "winnow", ] [[package]] name = "toml_parser" -version = "1.0.4" +version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" dependencies = [ "winnow", ] @@ -9342,9 +9386,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -9377,9 +9421,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.35" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -9868,7 +9912,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -9884,7 +9928,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ "windows-collections", - "windows-core", + "windows-core 0.61.2", "windows-future", "windows-link 0.1.3", "windows-numerics", @@ -9896,7 +9940,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ - "windows-core", + "windows-core 0.61.2", ] [[package]] @@ -9912,13 +9956,26 @@ dependencies = [ "windows-strings 0.4.2", ] +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + [[package]] name = "windows-future" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link 0.1.3", "windows-threading", ] @@ -9963,7 +10020,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link 0.1.3", ] diff --git a/Cargo.toml b/Cargo.toml index 33f24fe5..a93368d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,7 +130,7 @@ flatbuffers = "25.9.23" form_urlencoded = "1.2.2" prost = "0.14.1" quick-xml = "0.38.4" -rmcp = { version = "0.11.0" } +rmcp = { version = "0.12.0" } rmp = { version = "0.8.14" } rmp-serde = { version = "1.3.0" } serde = { version = "1.0.228", features = ["derive"] } @@ -150,7 +150,7 @@ pbkdf2 = "0.13.0-rc.5" rsa = { version = "0.10.0-rc.10" } rustls = { version = "0.23.35", features = ["ring", "logging", "std", "tls12"], default-features = false } rustls-pemfile = "2.2.0" -rustls-pki-types = "1.13.1" +rustls-pki-types = "1.13.2" sha1 = "0.11.0-rc.3" sha2 = "0.11.0-rc.3" subtle = "2.6" @@ -238,7 +238,7 @@ temp-env = "0.3.6" tempfile = "3.23.0" test-case = "3.3.1" thiserror = "2.0.17" -tracing = { version = "0.1.43" } +tracing = { version = "0.1.44" } tracing-appender = "0.2.4" tracing-error = "0.2.1" tracing-opentelemetry = "0.32.0" diff --git a/crates/audit/Cargo.toml b/crates/audit/Cargo.toml index 414e05fc..ae97033e 100644 --- a/crates/audit/Cargo.toml +++ b/crates/audit/Cargo.toml @@ -29,6 +29,7 @@ categories = ["web-programming", "development-tools", "asynchronous", "api-bindi rustfs-targets = { workspace = true } rustfs-config = { workspace = true, features = ["audit", "constants"] } rustfs-ecstore = { workspace = true } +async-trait = { workspace = true } chrono = { workspace = true } const-str = { workspace = true } futures = { workspace = true } diff --git a/crates/audit/src/factory.rs b/crates/audit/src/factory.rs new file mode 100644 index 00000000..ea8cd9b9 --- /dev/null +++ b/crates/audit/src/factory.rs @@ -0,0 +1,223 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::AuditEntry; +use async_trait::async_trait; +use hashbrown::HashSet; +use rumqttc::QoS; +use rustfs_config::audit::{AUDIT_MQTT_KEYS, AUDIT_WEBHOOK_KEYS, ENV_AUDIT_MQTT_KEYS, ENV_AUDIT_WEBHOOK_KEYS}; +use rustfs_config::{ + AUDIT_DEFAULT_DIR, DEFAULT_LIMIT, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, MQTT_QOS, MQTT_QUEUE_DIR, + MQTT_QUEUE_LIMIT, MQTT_RECONNECT_INTERVAL, MQTT_TOPIC, MQTT_USERNAME, WEBHOOK_AUTH_TOKEN, WEBHOOK_CLIENT_CERT, + WEBHOOK_CLIENT_KEY, WEBHOOK_ENDPOINT, WEBHOOK_QUEUE_DIR, WEBHOOK_QUEUE_LIMIT, +}; +use rustfs_ecstore::config::KVS; +use rustfs_targets::{ + Target, + error::TargetError, + target::{mqtt::MQTTArgs, webhook::WebhookArgs}, +}; +use std::time::Duration; +use tracing::{debug, warn}; +use url::Url; + +/// Trait for creating targets from configuration +#[async_trait] +pub trait TargetFactory: Send + Sync { + /// Creates a target from configuration + async fn create_target(&self, id: String, config: &KVS) -> Result + Send + Sync>, TargetError>; + + /// Validates target configuration + fn validate_config(&self, id: &str, config: &KVS) -> Result<(), TargetError>; + + /// Returns a set of valid configuration field names for this target type. + /// This is used to filter environment variables. + fn get_valid_fields(&self) -> HashSet; + + /// Returns a set of valid configuration env field names for this target type. + /// This is used to filter environment variables. + fn get_valid_env_fields(&self) -> HashSet; +} + +/// Factory for creating Webhook targets +pub struct WebhookTargetFactory; + +#[async_trait] +impl TargetFactory for WebhookTargetFactory { + async fn create_target(&self, id: String, config: &KVS) -> Result + Send + Sync>, TargetError> { + // All config values are now read directly from the merged `config` KVS. + let endpoint = config + .lookup(WEBHOOK_ENDPOINT) + .ok_or_else(|| TargetError::Configuration("Missing webhook endpoint".to_string()))?; + let endpoint_url = Url::parse(&endpoint) + .map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{endpoint}')")))?; + + let args = WebhookArgs { + enable: true, // If we are here, it's already enabled. + endpoint: endpoint_url, + auth_token: config.lookup(WEBHOOK_AUTH_TOKEN).unwrap_or_default(), + queue_dir: config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or(AUDIT_DEFAULT_DIR.to_string()), + queue_limit: config + .lookup(WEBHOOK_QUEUE_LIMIT) + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_LIMIT), + client_cert: config.lookup(WEBHOOK_CLIENT_CERT).unwrap_or_default(), + client_key: config.lookup(WEBHOOK_CLIENT_KEY).unwrap_or_default(), + target_type: rustfs_targets::target::TargetType::AuditLog, + }; + + let target = rustfs_targets::target::webhook::WebhookTarget::new(id, args)?; + Ok(Box::new(target)) + } + + fn validate_config(&self, _id: &str, config: &KVS) -> Result<(), TargetError> { + // Validation also uses the merged `config` KVS directly. + let endpoint = config + .lookup(WEBHOOK_ENDPOINT) + .ok_or_else(|| TargetError::Configuration("Missing webhook endpoint".to_string()))?; + debug!("endpoint: {}", endpoint); + let parsed_endpoint = endpoint.trim(); + Url::parse(parsed_endpoint) + .map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{parsed_endpoint}')")))?; + + let client_cert = config.lookup(WEBHOOK_CLIENT_CERT).unwrap_or_default(); + let client_key = config.lookup(WEBHOOK_CLIENT_KEY).unwrap_or_default(); + + if client_cert.is_empty() != client_key.is_empty() { + return Err(TargetError::Configuration( + "Both client_cert and client_key must be specified together".to_string(), + )); + } + + let queue_dir = config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or(AUDIT_DEFAULT_DIR.to_string()); + if !queue_dir.is_empty() && !std::path::Path::new(&queue_dir).is_absolute() { + return Err(TargetError::Configuration("Webhook queue directory must be an absolute path".to_string())); + } + + Ok(()) + } + + fn get_valid_fields(&self) -> HashSet { + AUDIT_WEBHOOK_KEYS.iter().map(|s| s.to_string()).collect() + } + + fn get_valid_env_fields(&self) -> HashSet { + ENV_AUDIT_WEBHOOK_KEYS.iter().map(|s| s.to_string()).collect() + } +} + +/// Factory for creating MQTT targets +pub struct MQTTTargetFactory; + +#[async_trait] +impl TargetFactory for MQTTTargetFactory { + async fn create_target(&self, id: String, config: &KVS) -> Result + Send + Sync>, TargetError> { + let broker = config + .lookup(MQTT_BROKER) + .ok_or_else(|| TargetError::Configuration("Missing MQTT broker".to_string()))?; + let broker_url = Url::parse(&broker) + .map_err(|e| TargetError::Configuration(format!("Invalid broker URL: {e} (value: '{broker}')")))?; + + let topic = config + .lookup(MQTT_TOPIC) + .ok_or_else(|| TargetError::Configuration("Missing MQTT topic".to_string()))?; + + let args = MQTTArgs { + enable: true, // Assumed enabled. + broker: broker_url, + topic, + qos: config + .lookup(MQTT_QOS) + .and_then(|v| v.parse::().ok()) + .map(|q| match q { + 0 => QoS::AtMostOnce, + 1 => QoS::AtLeastOnce, + 2 => QoS::ExactlyOnce, + _ => QoS::AtLeastOnce, + }) + .unwrap_or(QoS::AtLeastOnce), + username: config.lookup(MQTT_USERNAME).unwrap_or_default(), + password: config.lookup(MQTT_PASSWORD).unwrap_or_default(), + max_reconnect_interval: config + .lookup(MQTT_RECONNECT_INTERVAL) + .and_then(|v| v.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or_else(|| Duration::from_secs(5)), + keep_alive: config + .lookup(MQTT_KEEP_ALIVE_INTERVAL) + .and_then(|v| v.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or_else(|| Duration::from_secs(30)), + queue_dir: config.lookup(MQTT_QUEUE_DIR).unwrap_or(AUDIT_DEFAULT_DIR.to_string()), + queue_limit: config + .lookup(MQTT_QUEUE_LIMIT) + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_LIMIT), + target_type: rustfs_targets::target::TargetType::AuditLog, + }; + + let target = rustfs_targets::target::mqtt::MQTTTarget::new(id, args)?; + Ok(Box::new(target)) + } + + fn validate_config(&self, _id: &str, config: &KVS) -> Result<(), TargetError> { + let broker = config + .lookup(MQTT_BROKER) + .ok_or_else(|| TargetError::Configuration("Missing MQTT broker".to_string()))?; + let url = Url::parse(&broker) + .map_err(|e| TargetError::Configuration(format!("Invalid broker URL: {e} (value: '{broker}')")))?; + + match url.scheme() { + "tcp" | "ssl" | "ws" | "wss" | "mqtt" | "mqtts" => {} + _ => { + return Err(TargetError::Configuration("Unsupported broker URL scheme".to_string())); + } + } + + if config.lookup(MQTT_TOPIC).is_none() { + return Err(TargetError::Configuration("Missing MQTT topic".to_string())); + } + + if let Some(qos_str) = config.lookup(MQTT_QOS) { + let qos = qos_str + .parse::() + .map_err(|_| TargetError::Configuration("Invalid QoS value".to_string()))?; + if qos > 2 { + return Err(TargetError::Configuration("QoS must be 0, 1, or 2".to_string())); + } + } + + let queue_dir = config.lookup(MQTT_QUEUE_DIR).unwrap_or_default(); + if !queue_dir.is_empty() { + if !std::path::Path::new(&queue_dir).is_absolute() { + return Err(TargetError::Configuration("MQTT queue directory must be an absolute path".to_string())); + } + if let Some(qos_str) = config.lookup(MQTT_QOS) { + if qos_str == "0" { + warn!("Using queue_dir with QoS 0 may result in event loss"); + } + } + } + + Ok(()) + } + + fn get_valid_fields(&self) -> HashSet { + AUDIT_MQTT_KEYS.iter().map(|s| s.to_string()).collect() + } + + fn get_valid_env_fields(&self) -> HashSet { + ENV_AUDIT_MQTT_KEYS.iter().map(|s| s.to_string()).collect() + } +} diff --git a/crates/audit/src/lib.rs b/crates/audit/src/lib.rs index 8207bc23..7cca0063 100644 --- a/crates/audit/src/lib.rs +++ b/crates/audit/src/lib.rs @@ -20,6 +20,7 @@ pub mod entity; pub mod error; +pub mod factory; pub mod global; pub mod observability; pub mod registry; diff --git a/crates/audit/src/registry.rs b/crates/audit/src/registry.rs index 30aa325a..c73b300a 100644 --- a/crates/audit/src/registry.rs +++ b/crates/audit/src/registry.rs @@ -12,29 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::{AuditEntry, AuditError, AuditResult}; -use futures::{StreamExt, stream::FuturesUnordered}; +use crate::{ + AuditEntry, AuditError, AuditResult, + factory::{MQTTTargetFactory, TargetFactory, WebhookTargetFactory}, +}; +use futures::StreamExt; +use futures::stream::FuturesUnordered; use hashbrown::{HashMap, HashSet}; -use rustfs_config::{ - DEFAULT_DELIMITER, ENABLE_KEY, ENV_PREFIX, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, MQTT_QOS, MQTT_QUEUE_DIR, - MQTT_QUEUE_LIMIT, MQTT_RECONNECT_INTERVAL, MQTT_TOPIC, MQTT_USERNAME, WEBHOOK_AUTH_TOKEN, WEBHOOK_BATCH_SIZE, - WEBHOOK_CLIENT_CERT, WEBHOOK_CLIENT_KEY, WEBHOOK_ENDPOINT, WEBHOOK_HTTP_TIMEOUT, WEBHOOK_MAX_RETRY, WEBHOOK_QUEUE_DIR, - WEBHOOK_QUEUE_LIMIT, WEBHOOK_RETRY_INTERVAL, audit::AUDIT_ROUTE_PREFIX, -}; +use rustfs_config::{DEFAULT_DELIMITER, ENABLE_KEY, ENV_PREFIX, EnableState, audit::AUDIT_ROUTE_PREFIX}; use rustfs_ecstore::config::{Config, KVS}; -use rustfs_targets::{ - Target, TargetError, - target::{ChannelTargetType, TargetType, mqtt::MQTTArgs, webhook::WebhookArgs}, -}; +use rustfs_targets::{Target, TargetError, target::ChannelTargetType}; +use std::str::FromStr; use std::sync::Arc; -use std::time::Duration; use tracing::{debug, error, info, warn}; -use url::Url; /// Registry for managing audit targets pub struct AuditRegistry { /// Storage for created targets targets: HashMap + Send + Sync>>, + /// Factories for creating targets + factories: HashMap>, } impl Default for AuditRegistry { @@ -46,162 +43,207 @@ impl Default for AuditRegistry { impl AuditRegistry { /// Creates a new AuditRegistry pub fn new() -> Self { - Self { targets: HashMap::new() } + let mut registry = AuditRegistry { + factories: HashMap::new(), + targets: HashMap::new(), + }; + + // Register built-in factories + registry.register(ChannelTargetType::Webhook.as_str(), Box::new(WebhookTargetFactory)); + registry.register(ChannelTargetType::Mqtt.as_str(), Box::new(MQTTTargetFactory)); + + registry } - /// Creates all audit targets from system configuration and environment variables. + /// Registers a new factory for a target type + /// + /// # Arguments + /// * `target_type` - The type of the target (e.g., "webhook", "mqtt"). + /// * `factory` - The factory instance to create targets of this type. + pub fn register(&mut self, target_type: &str, factory: Box) { + self.factories.insert(target_type.to_string(), factory); + } + + /// Creates a target of the specified type with the given ID and configuration + /// + /// # Arguments + /// * `target_type` - The type of the target (e.g., "webhook", "mqtt"). + /// * `id` - The identifier for the target instance. + /// * `config` - The configuration key-value store for the target. + /// + /// # Returns + /// * `Result + Send + Sync>, TargetError>` - The created target or an error. + pub async fn create_target( + &self, + target_type: &str, + id: String, + config: &KVS, + ) -> Result + Send + Sync>, TargetError> { + let factory = self + .factories + .get(target_type) + .ok_or_else(|| TargetError::Configuration(format!("Unknown target type: {target_type}")))?; + + // Validate configuration before creating target + factory.validate_config(&id, config)?; + + // Create target + factory.create_target(id, config).await + } + + /// Creates all targets from a configuration + /// Create all notification targets from system configuration and environment variables. /// This method processes the creation of each target concurrently as follows: - /// 1. Iterate through supported target types (webhook, mqtt). - /// 2. For each type, resolve its configuration from file and environment variables. + /// 1. Iterate through all registered target types (e.g. webhooks, mqtt). + /// 2. For each type, resolve its configuration in the configuration file and environment variables. /// 3. Identify all target instance IDs that need to be created. - /// 4. Merge configurations with precedence: ENV > file instance > file default. - /// 5. Create async tasks for enabled instances. - /// 6. Execute tasks concurrently and collect successful targets. - /// 7. Persist successful configurations back to system storage. - pub async fn create_targets_from_config( - &mut self, + /// 4. Combine the default configuration, file configuration, and environment variable configuration for each instance. + /// 5. If the instance is enabled, create an asynchronous task for it to instantiate. + /// 6. Concurrency executes all creation tasks and collects results. + pub async fn create_audit_targets_from_config( + &self, config: &Config, ) -> AuditResult + Send + Sync>>> { // Collect only environment variables with the relevant prefix to reduce memory usage let all_env: Vec<(String, String)> = std::env::vars().filter(|(key, _)| key.starts_with(ENV_PREFIX)).collect(); - // A collection of asynchronous tasks for concurrently executing target creation let mut tasks = FuturesUnordered::new(); - // let final_config = config.clone(); - + // let final_config = config.clone(); // Clone a configuration for aggregating the final result // Record the defaults for each segment so that the segment can eventually be rebuilt let mut section_defaults: HashMap = HashMap::new(); - - // Supported target types for audit - let target_types = vec![ChannelTargetType::Webhook.as_str(), ChannelTargetType::Mqtt.as_str()]; - - // 1. Traverse all target types and process them - for target_type in target_types { - let span = tracing::Span::current(); - span.record("target_type", target_type); - info!(target_type = %target_type, "Starting audit target type processing"); + // 1. Traverse all registered plants and process them by target type + for (target_type, factory) in &self.factories { + tracing::Span::current().record("target_type", target_type.as_str()); + info!("Start working on target types..."); // 2. Prepare the configuration source + // 2.1. Get the configuration segment in the file, e.g. 'audit_webhook' let section_name = format!("{AUDIT_ROUTE_PREFIX}{target_type}").to_lowercase(); let file_configs = config.0.get(§ion_name).cloned().unwrap_or_default(); + // 2.2. Get the default configuration for that type let default_cfg = file_configs.get(DEFAULT_DELIMITER).cloned().unwrap_or_default(); - debug!(?default_cfg, "Retrieved default configuration"); + debug!(?default_cfg, "Get the default configuration"); // Save defaults for eventual write back section_defaults.insert(section_name.clone(), default_cfg.clone()); - // Get valid fields for the target type - let valid_fields = match target_type { - "webhook" => get_webhook_valid_fields(), - "mqtt" => get_mqtt_valid_fields(), - _ => { - warn!(target_type = %target_type, "Unknown target type, skipping"); - continue; - } - }; - debug!(?valid_fields, "Retrieved valid configuration fields"); + // *** Optimization point 1: Get all legitimate fields of the current target type *** + let valid_fields = factory.get_valid_fields(); + debug!(?valid_fields, "Get the legitimate configuration fields"); // 3. Resolve instance IDs and configuration overrides from environment variables let mut instance_ids_from_env = HashSet::new(); - let mut env_overrides: HashMap> = HashMap::new(); - - for (env_key, env_value) in &all_env { - let audit_prefix = format!("{ENV_PREFIX}{AUDIT_ROUTE_PREFIX}{target_type}").to_uppercase(); - if !env_key.starts_with(&audit_prefix) { - continue; - } - - let suffix = &env_key[audit_prefix.len()..]; - if suffix.is_empty() { - continue; - } - - // Parse field and instance from suffix (FIELD_INSTANCE or FIELD) - let (field_name, instance_id) = if let Some(last_underscore) = suffix.rfind('_') { - let potential_field = &suffix[1..last_underscore]; // Skip leading _ - let potential_instance = &suffix[last_underscore + 1..]; - - // Check if the part before the last underscore is a valid field - if valid_fields.contains(&potential_field.to_lowercase()) { - (potential_field.to_lowercase(), potential_instance.to_lowercase()) - } else { - // Treat the entire suffix as field name with default instance - (suffix[1..].to_lowercase(), DEFAULT_DELIMITER.to_string()) + // 3.1. Instance discovery: Based on the '..._ENABLE_INSTANCEID' format + let enable_prefix = + format!("{ENV_PREFIX}{AUDIT_ROUTE_PREFIX}{target_type}{DEFAULT_DELIMITER}{ENABLE_KEY}{DEFAULT_DELIMITER}") + .to_uppercase(); + for (key, value) in &all_env { + if EnableState::from_str(value).ok().map(|s| s.is_enabled()).unwrap_or(false) { + if let Some(id) = key.strip_prefix(&enable_prefix) { + if !id.is_empty() { + instance_ids_from_env.insert(id.to_lowercase()); + } } - } else { - // No underscore, treat as field with default instance - (suffix[1..].to_lowercase(), DEFAULT_DELIMITER.to_string()) - }; - - if valid_fields.contains(&field_name) { - if instance_id != DEFAULT_DELIMITER { - instance_ids_from_env.insert(instance_id.clone()); - } - env_overrides - .entry(instance_id) - .or_default() - .insert(field_name, env_value.clone()); - } else { - debug!( - env_key = %env_key, - field_name = %field_name, - "Ignoring environment variable field not found in valid fields for target type {}", - target_type - ); } } - debug!(?env_overrides, "Completed environment variable analysis"); + + // 3.2. Parse all relevant environment variable configurations + // 3.2.1. Build environment variable prefixes such as 'RUSTFS_AUDIT_WEBHOOK_' + let env_prefix = format!("{ENV_PREFIX}{AUDIT_ROUTE_PREFIX}{target_type}{DEFAULT_DELIMITER}").to_uppercase(); + // 3.2.2. 'env_overrides' is used to store configurations parsed from environment variables in the format: {instance id -> {field -> value}} + let mut env_overrides: HashMap> = HashMap::new(); + for (key, value) in &all_env { + if let Some(rest) = key.strip_prefix(&env_prefix) { + // Use rsplitn to split from the right side to properly extract the INSTANCE_ID at the end + // Format: _ or + let mut parts = rest.rsplitn(2, DEFAULT_DELIMITER); + + // The first part from the right is INSTANCE_ID + let instance_id_part = parts.next().unwrap_or(DEFAULT_DELIMITER); + // The remaining part is FIELD_NAME + let field_name_part = parts.next(); + + let (field_name, instance_id) = match field_name_part { + // Case 1: The format is _ + // e.g., rest = "ENDPOINT_PRIMARY" -> field_name="ENDPOINT", instance_id="PRIMARY" + Some(field) => (field.to_lowercase(), instance_id_part.to_lowercase()), + // Case 2: The format is (without INSTANCE_ID) + // e.g., rest = "ENABLE" -> field_name="ENABLE", instance_id="" (Universal configuration `_ DEFAULT_DELIMITER`) + None => (instance_id_part.to_lowercase(), DEFAULT_DELIMITER.to_string()), + }; + + // *** Optimization point 2: Verify whether the parsed field_name is legal *** + if !field_name.is_empty() && valid_fields.contains(&field_name) { + debug!( + instance_id = %if instance_id.is_empty() { DEFAULT_DELIMITER } else { &instance_id }, + %field_name, + %value, + "Parsing to environment variables" + ); + env_overrides + .entry(instance_id) + .or_default() + .insert(field_name, value.clone()); + } else { + // Ignore illegal field names + warn!( + field_name = %field_name, + "Ignore environment variable fields, not found in the list of valid fields for target type {}", + target_type + ); + } + } + } + debug!(?env_overrides, "Complete the environment variable analysis"); // 4. Determine all instance IDs that need to be processed let mut all_instance_ids: HashSet = file_configs.keys().filter(|k| *k != DEFAULT_DELIMITER).cloned().collect(); all_instance_ids.extend(instance_ids_from_env); - debug!(?all_instance_ids, "Determined all instance IDs"); + debug!(?all_instance_ids, "Determine all instance IDs"); // 5. Merge configurations and create tasks for each instance for id in all_instance_ids { - // 5.1. Merge configuration, priority: Environment variables > File instance > File default + // 5.1. Merge configuration, priority: Environment variables > File instance configuration > File default configuration let mut merged_config = default_cfg.clone(); - - // Apply file instance configuration if available + // Instance-specific configuration in application files if let Some(file_instance_cfg) = file_configs.get(&id) { merged_config.extend(file_instance_cfg.clone()); } - - // Apply environment variable overrides + // Application instance-specific environment variable configuration if let Some(env_instance_cfg) = env_overrides.get(&id) { + // Convert HashMap to KVS let mut kvs_from_env = KVS::new(); for (k, v) in env_instance_cfg { kvs_from_env.insert(k.clone(), v.clone()); } merged_config.extend(kvs_from_env); } - debug!(instance_id = %id, ?merged_config, "Completed configuration merge"); + debug!(instance_id = %id, ?merged_config, "Complete configuration merge"); // 5.2. Check if the instance is enabled let enabled = merged_config .lookup(ENABLE_KEY) - .map(|v| parse_enable_value(&v)) + .map(|v| { + EnableState::from_str(v.as_str()) + .ok() + .map(|s| s.is_enabled()) + .unwrap_or(false) + }) .unwrap_or(false); if enabled { - info!(instance_id = %id, "Creating audit target"); - - // Create task for concurrent execution - let target_type_clone = target_type.to_string(); - let id_clone = id.clone(); - let merged_config_arc = Arc::new(merged_config.clone()); - let task = tokio::spawn(async move { - let result = create_audit_target(&target_type_clone, &id_clone, &merged_config_arc).await; - (target_type_clone, id_clone, result, merged_config_arc) + info!(instance_id = %id, "Target is enabled, ready to create a task"); + // 5.3. Create asynchronous tasks for enabled instances + let target_type_clone = target_type.clone(); + let tid = id.clone(); + let merged_config_arc = Arc::new(merged_config); + tasks.push(async move { + let result = factory.create_target(tid.clone(), &merged_config_arc).await; + (target_type_clone, tid, result, Arc::clone(&merged_config_arc)) }); - - tasks.push(task); - - // Update final config with successful instance - // final_config.0.entry(section_name.clone()).or_default().insert(id, merged_config); } else { - info!(instance_id = %id, "Skipping disabled audit target, will be removed from final configuration"); + info!(instance_id = %id, "Skip the disabled target and will be removed from the final configuration"); // Remove disabled target from final configuration // final_config.0.entry(section_name.clone()).or_default().remove(&id); } @@ -211,30 +253,28 @@ impl AuditRegistry { // 6. Concurrently execute all creation tasks and collect results let mut successful_targets = Vec::new(); let mut successful_configs = Vec::new(); - while let Some(task_result) = tasks.next().await { - match task_result { - Ok((target_type, id, result, kvs_arc)) => match result { - Ok(target) => { - info!(target_type = %target_type, instance_id = %id, "Created audit target successfully"); - successful_targets.push(target); - successful_configs.push((target_type, id, kvs_arc)); - } - Err(e) => { - error!(target_type = %target_type, instance_id = %id, error = %e, "Failed to create audit target"); - } - }, + while let Some((target_type, id, result, final_config)) = tasks.next().await { + match result { + Ok(target) => { + info!(target_type = %target_type, instance_id = %id, "Create a target successfully"); + successful_targets.push(target); + successful_configs.push((target_type, id, final_config)); + } Err(e) => { - error!(error = %e, "Task execution failed"); + error!(target_type = %target_type, instance_id = %id, error = %e, "Failed to create a target"); } } } - // Rebuild in pieces based on "default items + successful instances" and overwrite writeback to ensure that deleted/disabled instances will not be "resurrected" + // 7. Aggregate new configuration and write back to system configuration if !successful_configs.is_empty() || !section_defaults.is_empty() { - info!("Prepare to rebuild and save target configurations to the system configuration..."); + info!( + "Prepare to update {} successfully created target configurations to the system configuration...", + successful_configs.len() + ); - // Aggregate successful instances into segments let mut successes_by_section: HashMap> = HashMap::new(); + for (target_type, id, kvs) in successful_configs { let section_name = format!("{AUDIT_ROUTE_PREFIX}{target_type}").to_lowercase(); successes_by_section @@ -244,76 +284,99 @@ impl AuditRegistry { } let mut new_config = config.clone(); - // Collection of segments that need to be processed: Collect all segments where default items exist or where successful instances exist let mut sections: HashSet = HashSet::new(); sections.extend(section_defaults.keys().cloned()); sections.extend(successes_by_section.keys().cloned()); - for section_name in sections { + for section in sections { let mut section_map: std::collections::HashMap = std::collections::HashMap::new(); - - // The default entry (if present) is written back to `_` - if let Some(default_cfg) = section_defaults.get(§ion_name) { - if !default_cfg.is_empty() { - section_map.insert(DEFAULT_DELIMITER.to_string(), default_cfg.clone()); + // Add default item + if let Some(default_kvs) = section_defaults.get(§ion) { + if !default_kvs.is_empty() { + section_map.insert(DEFAULT_DELIMITER.to_string(), default_kvs.clone()); } } - // Successful instance write back - if let Some(instances) = successes_by_section.get(§ion_name) { + // Add successful instance item + if let Some(instances) = successes_by_section.get(§ion) { for (id, kvs) in instances { section_map.insert(id.clone(), kvs.clone()); } } - // Empty segments are removed and non-empty segments are replaced as a whole. + // Empty breaks are removed and non-empty breaks are replaced entirely. if section_map.is_empty() { - new_config.0.remove(§ion_name); + new_config.0.remove(§ion); } else { - new_config.0.insert(section_name, section_map); + new_config.0.insert(section, section_map); } } - // 7. Save the new configuration to the system - let Some(store) = rustfs_ecstore::new_object_layer_fn() else { + let Some(store) = rustfs_ecstore::global::new_object_layer_fn() else { return Err(AuditError::StorageNotAvailable( "Failed to save target configuration: server storage not initialized".to_string(), )); }; match rustfs_ecstore::config::com::save_server_config(store, &new_config).await { - Ok(_) => info!("New audit configuration saved to system successfully"), + Ok(_) => { + info!("The new configuration was saved to the system successfully.") + } Err(e) => { - error!(error = %e, "Failed to save new audit configuration"); + error!("Failed to save the new configuration: {}", e); return Err(AuditError::SaveConfig(Box::new(e))); } } } + + info!(count = successful_targets.len(), "All target processing completed"); Ok(successful_targets) } /// Adds a target to the registry + /// + /// # Arguments + /// * `id` - The identifier for the target. + /// * `target` - The target instance to be added. pub fn add_target(&mut self, id: String, target: Box + Send + Sync>) { self.targets.insert(id, target); } /// Removes a target from the registry + /// + /// # Arguments + /// * `id` - The identifier for the target to be removed. + /// + /// # Returns + /// * `Option + Send + Sync>>` - The removed target if it existed. pub fn remove_target(&mut self, id: &str) -> Option + Send + Sync>> { self.targets.remove(id) } /// Gets a target from the registry + /// + /// # Arguments + /// * `id` - The identifier for the target to be retrieved. + /// + /// # Returns + /// * `Option<&(dyn Target + Send + Sync)>` - The target if it exists. pub fn get_target(&self, id: &str) -> Option<&(dyn Target + Send + Sync)> { self.targets.get(id).map(|t| t.as_ref()) } /// Lists all target IDs + /// + /// # Returns + /// * `Vec` - A vector of all target IDs in the registry. pub fn list_targets(&self) -> Vec { self.targets.keys().cloned().collect() } /// Closes all targets and clears the registry + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure. pub async fn close_all(&mut self) -> AuditResult<()> { let mut errors = Vec::new(); @@ -331,152 +394,3 @@ impl AuditRegistry { Ok(()) } } - -/// Creates an audit target based on type and configuration -async fn create_audit_target( - target_type: &str, - id: &str, - config: &KVS, -) -> Result + Send + Sync>, TargetError> { - match target_type { - val if val == ChannelTargetType::Webhook.as_str() => { - let args = parse_webhook_args(id, config)?; - let target = rustfs_targets::target::webhook::WebhookTarget::new(id.to_string(), args)?; - Ok(Box::new(target)) - } - val if val == ChannelTargetType::Mqtt.as_str() => { - let args = parse_mqtt_args(id, config)?; - let target = rustfs_targets::target::mqtt::MQTTTarget::new(id.to_string(), args)?; - Ok(Box::new(target)) - } - _ => Err(TargetError::Configuration(format!("Unknown target type: {target_type}"))), - } -} - -/// Gets valid field names for webhook configuration -fn get_webhook_valid_fields() -> HashSet { - vec![ - ENABLE_KEY.to_string(), - WEBHOOK_ENDPOINT.to_string(), - WEBHOOK_AUTH_TOKEN.to_string(), - WEBHOOK_CLIENT_CERT.to_string(), - WEBHOOK_CLIENT_KEY.to_string(), - WEBHOOK_BATCH_SIZE.to_string(), - WEBHOOK_QUEUE_LIMIT.to_string(), - WEBHOOK_QUEUE_DIR.to_string(), - WEBHOOK_MAX_RETRY.to_string(), - WEBHOOK_RETRY_INTERVAL.to_string(), - WEBHOOK_HTTP_TIMEOUT.to_string(), - ] - .into_iter() - .collect() -} - -/// Gets valid field names for MQTT configuration -fn get_mqtt_valid_fields() -> HashSet { - vec![ - ENABLE_KEY.to_string(), - MQTT_BROKER.to_string(), - MQTT_TOPIC.to_string(), - MQTT_USERNAME.to_string(), - MQTT_PASSWORD.to_string(), - MQTT_QOS.to_string(), - MQTT_KEEP_ALIVE_INTERVAL.to_string(), - MQTT_RECONNECT_INTERVAL.to_string(), - MQTT_QUEUE_DIR.to_string(), - MQTT_QUEUE_LIMIT.to_string(), - ] - .into_iter() - .collect() -} - -/// Parses webhook arguments from KVS configuration -fn parse_webhook_args(_id: &str, config: &KVS) -> Result { - let endpoint = config - .lookup(WEBHOOK_ENDPOINT) - .filter(|s| !s.is_empty()) - .ok_or_else(|| TargetError::Configuration("webhook endpoint is required".to_string()))?; - - let endpoint_url = - Url::parse(&endpoint).map_err(|e| TargetError::Configuration(format!("invalid webhook endpoint URL: {e}")))?; - - let args = WebhookArgs { - enable: true, // Already validated as enabled - endpoint: endpoint_url, - auth_token: config.lookup(WEBHOOK_AUTH_TOKEN).unwrap_or_default(), - queue_dir: config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or_default(), - queue_limit: config - .lookup(WEBHOOK_QUEUE_LIMIT) - .and_then(|s| s.parse().ok()) - .unwrap_or(100000), - client_cert: config.lookup(WEBHOOK_CLIENT_CERT).unwrap_or_default(), - client_key: config.lookup(WEBHOOK_CLIENT_KEY).unwrap_or_default(), - target_type: TargetType::AuditLog, - }; - - args.validate()?; - Ok(args) -} - -/// Parses MQTT arguments from KVS configuration -fn parse_mqtt_args(_id: &str, config: &KVS) -> Result { - let broker = config - .lookup(MQTT_BROKER) - .filter(|s| !s.is_empty()) - .ok_or_else(|| TargetError::Configuration("MQTT broker is required".to_string()))?; - - let broker_url = Url::parse(&broker).map_err(|e| TargetError::Configuration(format!("invalid MQTT broker URL: {e}")))?; - - let topic = config - .lookup(MQTT_TOPIC) - .filter(|s| !s.is_empty()) - .ok_or_else(|| TargetError::Configuration("MQTT topic is required".to_string()))?; - - let qos = config - .lookup(MQTT_QOS) - .and_then(|s| s.parse::().ok()) - .and_then(|q| match q { - 0 => Some(rumqttc::QoS::AtMostOnce), - 1 => Some(rumqttc::QoS::AtLeastOnce), - 2 => Some(rumqttc::QoS::ExactlyOnce), - _ => None, - }) - .unwrap_or(rumqttc::QoS::AtLeastOnce); - - let args = MQTTArgs { - enable: true, // Already validated as enabled - broker: broker_url, - topic, - qos, - username: config.lookup(MQTT_USERNAME).unwrap_or_default(), - password: config.lookup(MQTT_PASSWORD).unwrap_or_default(), - max_reconnect_interval: parse_duration(&config.lookup(MQTT_RECONNECT_INTERVAL).unwrap_or_else(|| "5s".to_string())) - .unwrap_or(Duration::from_secs(5)), - keep_alive: parse_duration(&config.lookup(MQTT_KEEP_ALIVE_INTERVAL).unwrap_or_else(|| "60s".to_string())) - .unwrap_or(Duration::from_secs(60)), - queue_dir: config.lookup(MQTT_QUEUE_DIR).unwrap_or_default(), - queue_limit: config.lookup(MQTT_QUEUE_LIMIT).and_then(|s| s.parse().ok()).unwrap_or(100000), - target_type: TargetType::AuditLog, - }; - - args.validate()?; - Ok(args) -} - -/// Parses enable value from string -fn parse_enable_value(value: &str) -> bool { - matches!(value.to_lowercase().as_str(), "1" | "on" | "true" | "yes") -} - -/// Parses duration from string (e.g., "3s", "5m") -fn parse_duration(s: &str) -> Option { - if let Some(stripped) = s.strip_suffix('s') { - stripped.parse::().ok().map(Duration::from_secs) - } else if let Some(stripped) = s.strip_suffix('m') { - stripped.parse::().ok().map(|m| Duration::from_secs(m * 60)) - } else if let Some(stripped) = s.strip_suffix("ms") { - stripped.parse::().ok().map(Duration::from_millis) - } else { - s.parse::().ok().map(Duration::from_secs) - } -} diff --git a/crates/audit/src/system.rs b/crates/audit/src/system.rs index cbfd2d51..ad80ffe9 100644 --- a/crates/audit/src/system.rs +++ b/crates/audit/src/system.rs @@ -58,6 +58,12 @@ impl AuditSystem { } /// Starts the audit system with the given configuration + /// + /// # Arguments + /// * `config` - The configuration to use for starting the audit system + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn start(&self, config: Config) -> AuditResult<()> { let state = self.state.write().await; @@ -87,7 +93,7 @@ impl AuditSystem { // Create targets from configuration let mut registry = self.registry.lock().await; - match registry.create_targets_from_config(&config).await { + match registry.create_audit_targets_from_config(&config).await { Ok(targets) => { if targets.is_empty() { info!("No enabled audit targets found, keeping audit system stopped"); @@ -143,6 +149,9 @@ impl AuditSystem { } /// Pauses the audit system + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn pause(&self) -> AuditResult<()> { let mut state = self.state.write().await; @@ -161,6 +170,9 @@ impl AuditSystem { } /// Resumes the audit system + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn resume(&self) -> AuditResult<()> { let mut state = self.state.write().await; @@ -179,6 +191,9 @@ impl AuditSystem { } /// Stops the audit system and closes all targets + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn close(&self) -> AuditResult<()> { let mut state = self.state.write().await; @@ -223,11 +238,20 @@ impl AuditSystem { } /// Checks if the audit system is running + /// + /// # Returns + /// * `bool` - True if running, false otherwise pub async fn is_running(&self) -> bool { matches!(*self.state.read().await, AuditSystemState::Running) } /// Dispatches an audit log entry to all active targets + /// + /// # Arguments + /// * `entry` - The audit log entry to dispatch + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn dispatch(&self, entry: Arc) -> AuditResult<()> { let start_time = std::time::Instant::now(); @@ -319,6 +343,13 @@ impl AuditSystem { Ok(()) } + /// Dispatches a batch of audit log entries to all active targets + /// + /// # Arguments + /// * `entries` - A vector of audit log entries to dispatch + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn dispatch_batch(&self, entries: Vec>) -> AuditResult<()> { let start_time = std::time::Instant::now(); @@ -386,7 +417,13 @@ impl AuditSystem { Ok(()) } - // New: Audit flow background tasks, based on send_from_store, including retries and exponential backoffs + /// Starts the audit stream processing for a target with batching and retry logic + /// # Arguments + /// * `store` - The store from which to read audit entries + /// * `target` - The target to which audit entries will be sent + /// + /// This function spawns a background task that continuously reads audit entries from the provided store + /// and attempts to send them to the specified target. It implements retry logic with exponential backoff fn start_audit_stream_with_batching( &self, store: Box, Error = StoreError, Key = Key> + Send>, @@ -462,6 +499,12 @@ impl AuditSystem { } /// Enables a specific target + /// + /// # Arguments + /// * `target_id` - The ID of the target to enable + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn enable_target(&self, target_id: &str) -> AuditResult<()> { // This would require storing enabled/disabled state per target // For now, just check if target exists @@ -475,6 +518,12 @@ impl AuditSystem { } /// Disables a specific target + /// + /// # Arguments + /// * `target_id` - The ID of the target to disable + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn disable_target(&self, target_id: &str) -> AuditResult<()> { // This would require storing enabled/disabled state per target // For now, just check if target exists @@ -488,6 +537,12 @@ impl AuditSystem { } /// Removes a target from the system + /// + /// # Arguments + /// * `target_id` - The ID of the target to remove + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn remove_target(&self, target_id: &str) -> AuditResult<()> { let mut registry = self.registry.lock().await; if let Some(target) = registry.remove_target(target_id) { @@ -502,6 +557,13 @@ impl AuditSystem { } /// Updates or inserts a target + /// + /// # Arguments + /// * `target_id` - The ID of the target to upsert + /// * `target` - The target instance to insert or update + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn upsert_target(&self, target_id: String, target: Box + Send + Sync>) -> AuditResult<()> { let mut registry = self.registry.lock().await; @@ -523,18 +585,33 @@ impl AuditSystem { } /// Lists all targets + /// + /// # Returns + /// * `Vec` - List of target IDs pub async fn list_targets(&self) -> Vec { let registry = self.registry.lock().await; registry.list_targets() } /// Gets information about a specific target + /// + /// # Arguments + /// * `target_id` - The ID of the target to retrieve + /// + /// # Returns + /// * `Option` - Target ID if found pub async fn get_target(&self, target_id: &str) -> Option { let registry = self.registry.lock().await; registry.get_target(target_id).map(|target| target.id().to_string()) } /// Reloads configuration and updates targets + /// + /// # Arguments + /// * `new_config` - The new configuration to load + /// + /// # Returns + /// * `AuditResult<()>` - Result indicating success or failure pub async fn reload_config(&self, new_config: Config) -> AuditResult<()> { info!("Reloading audit system configuration"); @@ -554,7 +631,7 @@ impl AuditSystem { } // Create new targets from updated configuration - match registry.create_targets_from_config(&new_config).await { + match registry.create_audit_targets_from_config(&new_config).await { Ok(targets) => { info!(target_count = targets.len(), "Reloaded audit targets successfully"); @@ -594,16 +671,22 @@ impl AuditSystem { } /// Gets current audit system metrics + /// + /// # Returns + /// * `AuditMetricsReport` - Current metrics report pub async fn get_metrics(&self) -> observability::AuditMetricsReport { observability::get_metrics_report().await } /// Validates system performance against requirements + /// + /// # Returns + /// * `PerformanceValidation` - Performance validation results pub async fn validate_performance(&self) -> observability::PerformanceValidation { observability::validate_performance().await } - /// Resets all metrics + /// Resets all metrics to initial state pub async fn reset_metrics(&self) { observability::reset_metrics().await; } diff --git a/crates/audit/tests/integration_test.rs b/crates/audit/tests/integration_test.rs index d889c84e..f2ef342e 100644 --- a/crates/audit/tests/integration_test.rs +++ b/crates/audit/tests/integration_test.rs @@ -43,11 +43,11 @@ async fn test_config_parsing_webhook() { audit_webhook_section.insert("_".to_string(), default_kvs); config.0.insert("audit_webhook".to_string(), audit_webhook_section); - let mut registry = AuditRegistry::new(); + let registry = AuditRegistry::new(); // This should not fail even if server storage is not initialized // as it's an integration test - let result = registry.create_targets_from_config(&config).await; + let result = registry.create_audit_targets_from_config(&config).await; // We expect this to fail due to server storage not being initialized // but the parsing should work correctly diff --git a/crates/audit/tests/performance_test.rs b/crates/audit/tests/performance_test.rs index 4080c47b..b96e92eb 100644 --- a/crates/audit/tests/performance_test.rs +++ b/crates/audit/tests/performance_test.rs @@ -44,7 +44,7 @@ async fn test_audit_system_startup_performance() { #[tokio::test] async fn test_concurrent_target_creation() { // Test that multiple targets can be created concurrently - let mut registry = AuditRegistry::new(); + let registry = AuditRegistry::new(); // Create config with multiple webhook instances let mut config = rustfs_ecstore::config::Config(std::collections::HashMap::new()); @@ -63,7 +63,7 @@ async fn test_concurrent_target_creation() { let start = Instant::now(); // This will fail due to server storage not being initialized, but we can measure timing - let result = registry.create_targets_from_config(&config).await; + let result = registry.create_audit_targets_from_config(&config).await; let elapsed = start.elapsed(); println!("Concurrent target creation took: {elapsed:?}"); diff --git a/crates/audit/tests/system_integration_test.rs b/crates/audit/tests/system_integration_test.rs index 267a9fc1..d60c6f18 100644 --- a/crates/audit/tests/system_integration_test.rs +++ b/crates/audit/tests/system_integration_test.rs @@ -135,7 +135,7 @@ async fn test_global_audit_functions() { #[tokio::test] async fn test_config_parsing_with_multiple_instances() { - let mut registry = AuditRegistry::new(); + let registry = AuditRegistry::new(); // Create config with multiple webhook instances let mut config = Config(HashMap::new()); @@ -164,7 +164,7 @@ async fn test_config_parsing_with_multiple_instances() { config.0.insert("audit_webhook".to_string(), webhook_section); // Try to create targets from config - let result = registry.create_targets_from_config(&config).await; + let result = registry.create_audit_targets_from_config(&config).await; // Should fail due to server storage not initialized, but parsing should work match result { diff --git a/crates/common/src/globals.rs b/crates/common/src/globals.rs index 141003a2..6bcc7e29 100644 --- a/crates/common/src/globals.rs +++ b/crates/common/src/globals.rs @@ -19,21 +19,21 @@ use std::sync::LazyLock; use tokio::sync::RwLock; use tonic::transport::Channel; -pub static GLOBAL_Local_Node_Name: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); -pub static GLOBAL_Rustfs_Host: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); -pub static GLOBAL_Rustfs_Port: LazyLock> = LazyLock::new(|| RwLock::new("9000".to_string())); -pub static GLOBAL_Rustfs_Addr: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); -pub static GLOBAL_Conn_Map: LazyLock>> = LazyLock::new(|| RwLock::new(HashMap::new())); +pub static GLOBAL_LOCAL_NODE_NAME: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); +pub static GLOBAL_RUSTFS_HOST: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); +pub static GLOBAL_RUSTFS_PORT: LazyLock> = LazyLock::new(|| RwLock::new("9000".to_string())); +pub static GLOBAL_RUSTFS_ADDR: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); +pub static GLOBAL_CONN_MAP: LazyLock>> = LazyLock::new(|| RwLock::new(HashMap::new())); pub async fn set_global_addr(addr: &str) { - *GLOBAL_Rustfs_Addr.write().await = addr.to_string(); + *GLOBAL_RUSTFS_ADDR.write().await = addr.to_string(); } /// Evict a stale/dead connection from the global connection cache. /// This is critical for cluster recovery when a node dies unexpectedly (e.g., power-off). /// By removing the cached connection, subsequent requests will establish a fresh connection. pub async fn evict_connection(addr: &str) { - let removed = GLOBAL_Conn_Map.write().await.remove(addr); + let removed = GLOBAL_CONN_MAP.write().await.remove(addr); if removed.is_some() { tracing::warn!("Evicted stale connection from cache: {}", addr); } @@ -41,12 +41,12 @@ pub async fn evict_connection(addr: &str) { /// Check if a connection exists in the cache for the given address. pub async fn has_cached_connection(addr: &str) -> bool { - GLOBAL_Conn_Map.read().await.contains_key(addr) + GLOBAL_CONN_MAP.read().await.contains_key(addr) } /// Clear all cached connections. Useful for full cluster reset/recovery. pub async fn clear_all_connections() { - let mut map = GLOBAL_Conn_Map.write().await; + let mut map = GLOBAL_CONN_MAP.write().await; let count = map.len(); map.clear(); if count > 0 { diff --git a/crates/config/src/audit/mod.rs b/crates/config/src/audit/mod.rs index 92a57212..793845ff 100644 --- a/crates/config/src/audit/mod.rs +++ b/crates/config/src/audit/mod.rs @@ -29,7 +29,7 @@ pub const AUDIT_PREFIX: &str = "audit"; pub const AUDIT_ROUTE_PREFIX: &str = const_str::concat!(AUDIT_PREFIX, DEFAULT_DELIMITER); pub const AUDIT_WEBHOOK_SUB_SYS: &str = "audit_webhook"; -pub const AUDIT_MQTT_SUB_SYS: &str = "mqtt_webhook"; +pub const AUDIT_MQTT_SUB_SYS: &str = "audit_mqtt"; pub const AUDIT_STORE_EXTENSION: &str = ".audit"; #[allow(dead_code)] diff --git a/crates/config/src/constants/env.rs b/crates/config/src/constants/env.rs index e78c2b90..84116ba5 100644 --- a/crates/config/src/constants/env.rs +++ b/crates/config/src/constants/env.rs @@ -16,7 +16,8 @@ pub const DEFAULT_DELIMITER: &str = "_"; pub const ENV_PREFIX: &str = "RUSTFS_"; pub const ENV_WORD_DELIMITER: &str = "_"; -pub const DEFAULT_DIR: &str = "/opt/rustfs/events"; // Default directory for event store +pub const EVENT_DEFAULT_DIR: &str = "/opt/rustfs/events"; // Default directory for event store +pub const AUDIT_DEFAULT_DIR: &str = "/opt/rustfs/audit"; // Default directory for audit store pub const DEFAULT_LIMIT: u64 = 100000; // Default store limit /// Standard config keys and values. diff --git a/crates/config/src/notify/mod.rs b/crates/config/src/notify/mod.rs index 91a78de4..6abb2bf8 100644 --- a/crates/config/src/notify/mod.rs +++ b/crates/config/src/notify/mod.rs @@ -24,13 +24,33 @@ pub use webhook::*; use crate::DEFAULT_DELIMITER; -// --- Configuration Constants --- +/// Default target identifier for notifications, +/// Used in notification system when no specific target is provided, +/// Represents the default target stream or endpoint for notifications when no specific target is provided. pub const DEFAULT_TARGET: &str = "1"; - +/// Notification prefix for routing and identification, +/// Used in notification system, +/// This prefix is utilized in constructing routes and identifiers related to notifications within the system. pub const NOTIFY_PREFIX: &str = "notify"; +/// Notification route prefix combining the notification prefix and default delimiter +/// Combines the notification prefix with the default delimiter +/// Used in notification system for defining routes related to notifications. +/// Example: "notify:/" pub const NOTIFY_ROUTE_PREFIX: &str = const_str::concat!(NOTIFY_PREFIX, DEFAULT_DELIMITER); +/// Name of the environment variable that configures target stream concurrency. +/// Controls how many target streams are processed in parallel by the notification system. +/// Defaults to [`DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY`] if not set. +/// Example: `RUSTFS_NOTIFY_TARGET_STREAM_CONCURRENCY=20`. +pub const ENV_NOTIFY_TARGET_STREAM_CONCURRENCY: &str = "RUSTFS_NOTIFY_TARGET_STREAM_CONCURRENCY"; + +/// Default concurrency for target stream processing in the notification system +/// This value is used if the environment variable `RUSTFS_NOTIFY_TARGET_STREAM_CONCURRENCY` is not set. +/// It defines how many target streams can be processed in parallel by the notification system at any given time. +/// Adjust this value based on your system's capabilities and expected load. +pub const DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY: usize = 20; + #[allow(dead_code)] pub const NOTIFY_SUB_SYSTEMS: &[&str] = &[NOTIFY_MQTT_SUB_SYS, NOTIFY_WEBHOOK_SUB_SYS]; diff --git a/crates/config/src/notify/store.rs b/crates/config/src/notify/store.rs index ed838b05..3dab3de2 100644 --- a/crates/config/src/notify/store.rs +++ b/crates/config/src/notify/store.rs @@ -15,5 +15,5 @@ pub const DEFAULT_EXT: &str = ".unknown"; // Default file extension pub const COMPRESS_EXT: &str = ".snappy"; // Extension for compressed files -/// STORE_EXTENSION - file extension of an event file in store -pub const STORE_EXTENSION: &str = ".event"; +/// NOTIFY_STORE_EXTENSION - file extension of an event file in store +pub const NOTIFY_STORE_EXTENSION: &str = ".event"; diff --git a/crates/ecstore/src/admin_server_info.rs b/crates/ecstore/src/admin_server_info.rs index 7917004c..9117f8c0 100644 --- a/crates/ecstore/src/admin_server_info.rs +++ b/crates/ecstore/src/admin_server_info.rs @@ -23,7 +23,7 @@ use crate::{ }; use crate::data_usage::load_data_usage_cache; -use rustfs_common::{globals::GLOBAL_Local_Node_Name, heal_channel::DriveState}; +use rustfs_common::{globals::GLOBAL_LOCAL_NODE_NAME, heal_channel::DriveState}; use rustfs_madmin::{ BackendDisks, Disk, ErasureSetInfo, ITEM_INITIALIZING, ITEM_OFFLINE, ITEM_ONLINE, InfoMessage, ServerProperties, }; @@ -128,7 +128,7 @@ async fn is_server_resolvable(endpoint: &Endpoint) -> Result<()> { } pub async fn get_local_server_property() -> ServerProperties { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let mut pool_numbers = HashSet::new(); let mut network = HashMap::new(); diff --git a/crates/ecstore/src/config/audit.rs b/crates/ecstore/src/config/audit.rs index afbab13b..f0c86403 100644 --- a/crates/ecstore/src/config/audit.rs +++ b/crates/ecstore/src/config/audit.rs @@ -14,7 +14,7 @@ use crate::config::{KV, KVS}; use rustfs_config::{ - COMMENT_KEY, DEFAULT_DIR, DEFAULT_LIMIT, ENABLE_KEY, EnableState, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, + COMMENT_KEY, DEFAULT_LIMIT, ENABLE_KEY, EVENT_DEFAULT_DIR, EnableState, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, MQTT_QOS, MQTT_QUEUE_DIR, MQTT_QUEUE_LIMIT, MQTT_RECONNECT_INTERVAL, MQTT_TOPIC, MQTT_USERNAME, WEBHOOK_AUTH_TOKEN, WEBHOOK_BATCH_SIZE, WEBHOOK_CLIENT_CERT, WEBHOOK_CLIENT_KEY, WEBHOOK_ENDPOINT, WEBHOOK_HTTP_TIMEOUT, WEBHOOK_MAX_RETRY, WEBHOOK_QUEUE_DIR, WEBHOOK_QUEUE_LIMIT, WEBHOOK_RETRY_INTERVAL, @@ -63,7 +63,7 @@ pub static DEFAULT_AUDIT_WEBHOOK_KVS: LazyLock = LazyLock::new(|| { }, KV { key: WEBHOOK_QUEUE_DIR.to_owned(), - value: DEFAULT_DIR.to_owned(), + value: EVENT_DEFAULT_DIR.to_owned(), hidden_if_empty: false, }, KV { @@ -131,7 +131,7 @@ pub static DEFAULT_AUDIT_MQTT_KVS: LazyLock = LazyLock::new(|| { }, KV { key: MQTT_QUEUE_DIR.to_owned(), - value: DEFAULT_DIR.to_owned(), + value: EVENT_DEFAULT_DIR.to_owned(), hidden_if_empty: false, }, KV { diff --git a/crates/ecstore/src/config/notify.rs b/crates/ecstore/src/config/notify.rs index 74157f52..c9ebf3ba 100644 --- a/crates/ecstore/src/config/notify.rs +++ b/crates/ecstore/src/config/notify.rs @@ -14,7 +14,7 @@ use crate::config::{KV, KVS}; use rustfs_config::{ - COMMENT_KEY, DEFAULT_DIR, DEFAULT_LIMIT, ENABLE_KEY, EnableState, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, + COMMENT_KEY, DEFAULT_LIMIT, ENABLE_KEY, EVENT_DEFAULT_DIR, EnableState, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, MQTT_QOS, MQTT_QUEUE_DIR, MQTT_QUEUE_LIMIT, MQTT_RECONNECT_INTERVAL, MQTT_TOPIC, MQTT_USERNAME, WEBHOOK_AUTH_TOKEN, WEBHOOK_CLIENT_CERT, WEBHOOK_CLIENT_KEY, WEBHOOK_ENDPOINT, WEBHOOK_QUEUE_DIR, WEBHOOK_QUEUE_LIMIT, }; @@ -47,7 +47,7 @@ pub static DEFAULT_NOTIFY_WEBHOOK_KVS: LazyLock = LazyLock::new(|| { }, KV { key: WEBHOOK_QUEUE_DIR.to_owned(), - value: DEFAULT_DIR.to_owned(), + value: EVENT_DEFAULT_DIR.to_owned(), hidden_if_empty: false, }, KV { @@ -114,7 +114,7 @@ pub static DEFAULT_NOTIFY_MQTT_KVS: LazyLock = LazyLock::new(|| { }, KV { key: MQTT_QUEUE_DIR.to_owned(), - value: DEFAULT_DIR.to_owned(), + value: EVENT_DEFAULT_DIR.to_owned(), hidden_if_empty: false, }, KV { diff --git a/crates/ecstore/src/metrics_realtime.rs b/crates/ecstore/src/metrics_realtime.rs index a0f711e1..4d938a48 100644 --- a/crates/ecstore/src/metrics_realtime.rs +++ b/crates/ecstore/src/metrics_realtime.rs @@ -20,7 +20,7 @@ use crate::{ }; use chrono::Utc; use rustfs_common::{ - globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Addr}, + globals::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_ADDR}, heal_channel::DriveState, metrics::global_metrics, }; @@ -86,7 +86,7 @@ pub async fn collect_local_metrics(types: MetricType, opts: &CollectMetricsOpts) return real_time_metrics; } - let mut by_host_name = GLOBAL_Rustfs_Addr.read().await.clone(); + let mut by_host_name = GLOBAL_RUSTFS_ADDR.read().await.clone(); if !opts.hosts.is_empty() { let server = get_local_server_property().await; if opts.hosts.contains(&server.endpoint) { @@ -95,7 +95,7 @@ pub async fn collect_local_metrics(types: MetricType, opts: &CollectMetricsOpts) return real_time_metrics; } } - let local_node_name = GLOBAL_Local_Node_Name.read().await.clone(); + let local_node_name = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); if by_host_name.starts_with(":") && !local_node_name.starts_with(":") { by_host_name = local_node_name; } diff --git a/crates/ecstore/src/sets.rs b/crates/ecstore/src/sets.rs index 976fcd56..d96e8aa4 100644 --- a/crates/ecstore/src/sets.rs +++ b/crates/ecstore/src/sets.rs @@ -40,7 +40,7 @@ use futures::future::join_all; use http::HeaderMap; use rustfs_common::heal_channel::HealOpts; use rustfs_common::{ - globals::GLOBAL_Local_Node_Name, + globals::GLOBAL_LOCAL_NODE_NAME, heal_channel::{DriveState, HealItemType}, }; use rustfs_filemeta::FileInfo; @@ -170,7 +170,7 @@ impl Sets { let set_disks = SetDisks::new( fast_lock_manager.clone(), - GLOBAL_Local_Node_Name.read().await.to_string(), + GLOBAL_LOCAL_NODE_NAME.read().await.to_string(), Arc::new(RwLock::new(set_drive)), set_drive_count, parity_count, diff --git a/crates/ecstore/src/store.rs b/crates/ecstore/src/store.rs index 3097a9e2..2259e5b5 100644 --- a/crates/ecstore/src/store.rs +++ b/crates/ecstore/src/store.rs @@ -55,7 +55,7 @@ use futures::future::join_all; use http::HeaderMap; use lazy_static::lazy_static; use rand::Rng as _; -use rustfs_common::globals::{GLOBAL_Local_Node_Name, GLOBAL_Rustfs_Host, GLOBAL_Rustfs_Port}; +use rustfs_common::globals::{GLOBAL_LOCAL_NODE_NAME, GLOBAL_RUSTFS_HOST, GLOBAL_RUSTFS_PORT}; use rustfs_common::heal_channel::{HealItemType, HealOpts}; use rustfs_filemeta::FileInfo; use rustfs_madmin::heal_commands::HealResultItem; @@ -127,11 +127,11 @@ impl ECStore { info!("ECStore new address: {}", address.to_string()); let mut host = address.ip().to_string(); if host.is_empty() { - host = GLOBAL_Rustfs_Host.read().await.to_string() + host = GLOBAL_RUSTFS_HOST.read().await.to_string() } let mut port = address.port().to_string(); if port.is_empty() { - port = GLOBAL_Rustfs_Port.read().await.to_string() + port = GLOBAL_RUSTFS_PORT.read().await.to_string() } info!("ECStore new host: {}, port: {}", host, port); init_local_peer(&endpoint_pools, &host, &port).await; @@ -2329,15 +2329,15 @@ async fn init_local_peer(endpoint_pools: &EndpointServerPools, host: &String, po if peer_set.is_empty() { if !host.is_empty() { - *GLOBAL_Local_Node_Name.write().await = format!("{host}:{port}"); + *GLOBAL_LOCAL_NODE_NAME.write().await = format!("{host}:{port}"); return; } - *GLOBAL_Local_Node_Name.write().await = format!("127.0.0.1:{port}"); + *GLOBAL_LOCAL_NODE_NAME.write().await = format!("127.0.0.1:{port}"); return; } - *GLOBAL_Local_Node_Name.write().await = peer_set[0].clone(); + *GLOBAL_LOCAL_NODE_NAME.write().await = peer_set[0].clone(); } pub fn is_valid_object_prefix(_object: &str) -> bool { diff --git a/crates/notify/Cargo.toml b/crates/notify/Cargo.toml index 707c5bd2..0f02b70a 100644 --- a/crates/notify/Cargo.toml +++ b/crates/notify/Cargo.toml @@ -29,6 +29,7 @@ documentation = "https://docs.rs/rustfs-notify/latest/rustfs_notify/" rustfs-config = { workspace = true, features = ["notify", "constants"] } rustfs-ecstore = { workspace = true } rustfs-targets = { workspace = true } +rustfs-utils = { workspace = true } async-trait = { workspace = true } chrono = { workspace = true, features = ["serde"] } futures = { workspace = true } diff --git a/crates/notify/examples/webhook.rs b/crates/notify/examples/webhook.rs index b0f47dc9..e7d81c94 100644 --- a/crates/notify/examples/webhook.rs +++ b/crates/notify/examples/webhook.rs @@ -110,20 +110,21 @@ async fn reset_webhook_count(Query(params): Query, headers: HeaderM let reason = params.reason.unwrap_or_else(|| "Reason not provided".to_string()); println!("Reset webhook count, reason: {reason}"); - + let time_now = chrono::offset::Utc::now().to_string(); for header in headers { let (key, value) = header; - println!("Header: {key:?}: {value:?}"); + println!("Header: {key:?}: {value:?}, time: {time_now}"); } println!("Reset webhook count printed headers"); // Reset the counter to 0 WEBHOOK_COUNT.store(0, Ordering::SeqCst); println!("Webhook count has been reset to 0."); + let time_now = chrono::offset::Utc::now().to_string(); Response::builder() .header("Foo", "Bar") .status(StatusCode::OK) - .body(format!("Webhook count reset successfully current_count:{current_count}")) + .body(format!("Webhook count reset successfully current_count:{current_count},time: {time_now}")) .unwrap() } @@ -167,7 +168,11 @@ async fn receive_webhook(Json(payload): Json) -> StatusCode { serde_json::to_string_pretty(&payload).unwrap() ); WEBHOOK_COUNT.fetch_add(1, Ordering::SeqCst); - println!("Total webhook requests received: {}", WEBHOOK_COUNT.load(Ordering::SeqCst)); + println!( + "Total webhook requests received: {} , Time: {}", + WEBHOOK_COUNT.load(Ordering::SeqCst), + chrono::offset::Utc::now() + ); StatusCode::OK } diff --git a/crates/notify/src/factory.rs b/crates/notify/src/factory.rs index 84cf1be6..e15f5c5d 100644 --- a/crates/notify/src/factory.rs +++ b/crates/notify/src/factory.rs @@ -18,9 +18,9 @@ use hashbrown::HashSet; use rumqttc::QoS; use rustfs_config::notify::{ENV_NOTIFY_MQTT_KEYS, ENV_NOTIFY_WEBHOOK_KEYS, NOTIFY_MQTT_KEYS, NOTIFY_WEBHOOK_KEYS}; use rustfs_config::{ - DEFAULT_DIR, DEFAULT_LIMIT, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, MQTT_QOS, MQTT_QUEUE_DIR, MQTT_QUEUE_LIMIT, - MQTT_RECONNECT_INTERVAL, MQTT_TOPIC, MQTT_USERNAME, WEBHOOK_AUTH_TOKEN, WEBHOOK_CLIENT_CERT, WEBHOOK_CLIENT_KEY, - WEBHOOK_ENDPOINT, WEBHOOK_QUEUE_DIR, WEBHOOK_QUEUE_LIMIT, + DEFAULT_LIMIT, EVENT_DEFAULT_DIR, MQTT_BROKER, MQTT_KEEP_ALIVE_INTERVAL, MQTT_PASSWORD, MQTT_QOS, MQTT_QUEUE_DIR, + MQTT_QUEUE_LIMIT, MQTT_RECONNECT_INTERVAL, MQTT_TOPIC, MQTT_USERNAME, WEBHOOK_AUTH_TOKEN, WEBHOOK_CLIENT_CERT, + WEBHOOK_CLIENT_KEY, WEBHOOK_ENDPOINT, WEBHOOK_QUEUE_DIR, WEBHOOK_QUEUE_LIMIT, }; use rustfs_ecstore::config::KVS; use rustfs_targets::{ @@ -67,7 +67,7 @@ impl TargetFactory for WebhookTargetFactory { enable: true, // If we are here, it's already enabled. endpoint: endpoint_url, auth_token: config.lookup(WEBHOOK_AUTH_TOKEN).unwrap_or_default(), - queue_dir: config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or(DEFAULT_DIR.to_string()), + queue_dir: config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or(EVENT_DEFAULT_DIR.to_string()), queue_limit: config .lookup(WEBHOOK_QUEUE_LIMIT) .and_then(|v| v.parse::().ok()) @@ -100,7 +100,7 @@ impl TargetFactory for WebhookTargetFactory { )); } - let queue_dir = config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or(DEFAULT_DIR.to_string()); + let queue_dir = config.lookup(WEBHOOK_QUEUE_DIR).unwrap_or(EVENT_DEFAULT_DIR.to_string()); if !queue_dir.is_empty() && !std::path::Path::new(&queue_dir).is_absolute() { return Err(TargetError::Configuration("Webhook queue directory must be an absolute path".to_string())); } @@ -159,7 +159,7 @@ impl TargetFactory for MQTTTargetFactory { .and_then(|v| v.parse::().ok()) .map(Duration::from_secs) .unwrap_or_else(|| Duration::from_secs(30)), - queue_dir: config.lookup(MQTT_QUEUE_DIR).unwrap_or(DEFAULT_DIR.to_string()), + queue_dir: config.lookup(MQTT_QUEUE_DIR).unwrap_or(EVENT_DEFAULT_DIR.to_string()), queue_limit: config .lookup(MQTT_QUEUE_LIMIT) .and_then(|v| v.parse::().ok()) diff --git a/crates/notify/src/integration.rs b/crates/notify/src/integration.rs index 4afa0145..dc50857d 100644 --- a/crates/notify/src/integration.rs +++ b/crates/notify/src/integration.rs @@ -16,6 +16,7 @@ use crate::{ Event, error::NotificationError, notifier::EventNotifier, registry::TargetRegistry, rules::BucketNotificationConfig, stream, }; use hashbrown::HashMap; +use rustfs_config::notify::{DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY, ENV_NOTIFY_TARGET_STREAM_CONCURRENCY}; use rustfs_ecstore::config::{Config, KVS}; use rustfs_targets::EventName; use rustfs_targets::arn::TargetID; @@ -108,17 +109,14 @@ pub struct NotificationSystem { impl NotificationSystem { /// Creates a new NotificationSystem pub fn new(config: Config) -> Self { + let concurrency_limiter = + rustfs_utils::get_env_usize(ENV_NOTIFY_TARGET_STREAM_CONCURRENCY, DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY); NotificationSystem { notifier: Arc::new(EventNotifier::new()), registry: Arc::new(TargetRegistry::new()), config: Arc::new(RwLock::new(config)), stream_cancellers: Arc::new(RwLock::new(HashMap::new())), - concurrency_limiter: Arc::new(Semaphore::new( - std::env::var("RUSTFS_TARGET_STREAM_CONCURRENCY") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(20), - )), // Limit the maximum number of concurrent processing events to 20 + concurrency_limiter: Arc::new(Semaphore::new(concurrency_limiter)), // Limit the maximum number of concurrent processing events to 20 metrics: Arc::new(NotificationMetrics::new()), } } @@ -269,9 +267,9 @@ impl NotificationSystem { self.update_config_and_reload(|config| { config .0 - .entry(target_type.to_string()) + .entry(target_type.to_lowercase()) .or_default() - .insert(target_name.to_string(), kvs.clone()); + .insert(target_name.to_lowercase(), kvs.clone()); true // The configuration is always modified }) .await diff --git a/crates/notify/src/registry.rs b/crates/notify/src/registry.rs index 9d649793..cdf3aa11 100644 --- a/crates/notify/src/registry.rs +++ b/crates/notify/src/registry.rs @@ -16,9 +16,11 @@ use crate::Event; use crate::factory::{MQTTTargetFactory, TargetFactory, WebhookTargetFactory}; use futures::stream::{FuturesUnordered, StreamExt}; use hashbrown::{HashMap, HashSet}; -use rustfs_config::{DEFAULT_DELIMITER, ENABLE_KEY, ENV_PREFIX, notify::NOTIFY_ROUTE_PREFIX}; +use rustfs_config::{DEFAULT_DELIMITER, ENABLE_KEY, ENV_PREFIX, EnableState, notify::NOTIFY_ROUTE_PREFIX}; use rustfs_ecstore::config::{Config, KVS}; use rustfs_targets::{Target, TargetError, target::ChannelTargetType}; +use std::str::FromStr; +use std::sync::Arc; use tracing::{debug, error, info, warn}; /// Registry for managing target factories @@ -117,11 +119,7 @@ impl TargetRegistry { format!("{ENV_PREFIX}{NOTIFY_ROUTE_PREFIX}{target_type}{DEFAULT_DELIMITER}{ENABLE_KEY}{DEFAULT_DELIMITER}") .to_uppercase(); for (key, value) in &all_env { - if value.eq_ignore_ascii_case(rustfs_config::EnableState::One.as_str()) - || value.eq_ignore_ascii_case(rustfs_config::EnableState::On.as_str()) - || value.eq_ignore_ascii_case(rustfs_config::EnableState::True.as_str()) - || value.eq_ignore_ascii_case(rustfs_config::EnableState::Yes.as_str()) - { + if EnableState::from_str(value).ok().map(|s| s.is_enabled()).unwrap_or(false) { if let Some(id) = key.strip_prefix(&enable_prefix) { if !id.is_empty() { instance_ids_from_env.insert(id.to_lowercase()); @@ -208,10 +206,10 @@ impl TargetRegistry { let enabled = merged_config .lookup(ENABLE_KEY) .map(|v| { - v.eq_ignore_ascii_case(rustfs_config::EnableState::One.as_str()) - || v.eq_ignore_ascii_case(rustfs_config::EnableState::On.as_str()) - || v.eq_ignore_ascii_case(rustfs_config::EnableState::True.as_str()) - || v.eq_ignore_ascii_case(rustfs_config::EnableState::Yes.as_str()) + EnableState::from_str(v.as_str()) + .ok() + .map(|s| s.is_enabled()) + .unwrap_or(false) }) .unwrap_or(false); @@ -220,10 +218,10 @@ impl TargetRegistry { // 5.3. Create asynchronous tasks for enabled instances let target_type_clone = target_type.clone(); let tid = id.clone(); - let merged_config_arc = std::sync::Arc::new(merged_config); + let merged_config_arc = Arc::new(merged_config); tasks.push(async move { let result = factory.create_target(tid.clone(), &merged_config_arc).await; - (target_type_clone, tid, result, std::sync::Arc::clone(&merged_config_arc)) + (target_type_clone, tid, result, Arc::clone(&merged_config_arc)) }); } else { info!(instance_id = %id, "Skip the disabled target and will be removed from the final configuration"); diff --git a/crates/protos/src/lib.rs b/crates/protos/src/lib.rs index 4242a76f..305d67a5 100644 --- a/crates/protos/src/lib.rs +++ b/crates/protos/src/lib.rs @@ -19,7 +19,7 @@ use std::{error::Error, time::Duration}; pub use generated::*; use proto_gen::node_service::node_service_client::NodeServiceClient; -use rustfs_common::globals::{GLOBAL_Conn_Map, evict_connection}; +use rustfs_common::globals::{GLOBAL_CONN_MAP, evict_connection}; use tonic::{ Request, Status, metadata::MetadataValue, @@ -74,7 +74,7 @@ async fn create_new_channel(addr: &str) -> Result> { // Cache the new connection { - GLOBAL_Conn_Map.write().await.insert(addr.to_string(), channel.clone()); + GLOBAL_CONN_MAP.write().await.insert(addr.to_string(), channel.clone()); } debug!("Successfully created and cached gRPC channel to: {}", addr); @@ -111,7 +111,7 @@ pub async fn node_service_time_out_client( let token: MetadataValue<_> = "rustfs rpc".parse()?; // Try to get cached channel - let cached_channel = { GLOBAL_Conn_Map.read().await.get(addr).cloned() }; + let cached_channel = { GLOBAL_CONN_MAP.read().await.get(addr).cloned() }; let channel = match cached_channel { Some(channel) => { diff --git a/crates/targets/src/event_name.rs b/crates/targets/src/event_name.rs index 49df020f..6df8d3f8 100644 --- a/crates/targets/src/event_name.rs +++ b/crates/targets/src/event_name.rs @@ -353,7 +353,7 @@ mod tests { let deserialized = serde_json::from_str::(invalid_str); assert!(deserialized.is_err(), "Deserialization should fail for invalid event name"); - // empty string should be successful only serialization + // Serializing EventName::Everything produces an empty string, but deserializing an empty string should fail. let event_name = EventName::Everything; let serialized_str = "\"\""; let serialized = serde_json::to_string(&event_name); diff --git a/crates/targets/src/target/mqtt.rs b/crates/targets/src/target/mqtt.rs index 45b73e5e..61cb93c0 100644 --- a/crates/targets/src/target/mqtt.rs +++ b/crates/targets/src/target/mqtt.rs @@ -12,12 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::store::Key; -use crate::target::{ChannelTargetType, EntityTarget, TargetType}; -use crate::{StoreError, Target, TargetLog, arn::TargetID, error::TargetError, store::Store}; +use crate::{ + StoreError, Target, TargetLog, + arn::TargetID, + error::TargetError, + store::{Key, QueueStore, Store}, + target::{ChannelTargetType, EntityTarget, TargetType}, +}; use async_trait::async_trait; -use rumqttc::{AsyncClient, EventLoop, MqttOptions, Outgoing, Packet, QoS}; -use rumqttc::{ConnectionError, mqttbytes::Error as MqttBytesError}; +use rumqttc::{AsyncClient, ConnectionError, EventLoop, MqttOptions, Outgoing, Packet, QoS, mqttbytes::Error as MqttBytesError}; use serde::Serialize; use serde::de::DeserializeOwned; use std::sync::Arc; @@ -130,10 +133,10 @@ where debug!(target_id = %target_id, path = %specific_queue_path.display(), "Initializing queue store for MQTT target"); let extension = match args.target_type { TargetType::AuditLog => rustfs_config::audit::AUDIT_STORE_EXTENSION, - TargetType::NotifyEvent => rustfs_config::notify::STORE_EXTENSION, + TargetType::NotifyEvent => rustfs_config::notify::NOTIFY_STORE_EXTENSION, }; - let store = crate::store::QueueStore::>::new(specific_queue_path, args.queue_limit, extension); + let store = QueueStore::>::new(specific_queue_path, args.queue_limit, extension); if let Err(e) = store.open() { error!( target_id = %target_id, diff --git a/crates/targets/src/target/webhook.rs b/crates/targets/src/target/webhook.rs index d2de20e9..c9564274 100644 --- a/crates/targets/src/target/webhook.rs +++ b/crates/targets/src/target/webhook.rs @@ -12,16 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::target::{ChannelTargetType, EntityTarget, TargetType}; use crate::{ StoreError, Target, TargetLog, arn::TargetID, error::TargetError, - store::{Key, Store}, + store::{Key, QueueStore, Store}, + target::{ChannelTargetType, EntityTarget, TargetType}, }; use async_trait::async_trait; use reqwest::{Client, StatusCode, Url}; -use rustfs_config::notify::STORE_EXTENSION; +use rustfs_config::audit::AUDIT_STORE_EXTENSION; +use rustfs_config::notify::NOTIFY_STORE_EXTENSION; use serde::Serialize; use serde::de::DeserializeOwned; use std::{ @@ -155,11 +156,11 @@ where PathBuf::from(&args.queue_dir).join(format!("rustfs-{}-{}", ChannelTargetType::Webhook.as_str(), target_id.id)); let extension = match args.target_type { - TargetType::AuditLog => rustfs_config::audit::AUDIT_STORE_EXTENSION, - TargetType::NotifyEvent => STORE_EXTENSION, + TargetType::AuditLog => AUDIT_STORE_EXTENSION, + TargetType::NotifyEvent => NOTIFY_STORE_EXTENSION, }; - let store = crate::store::QueueStore::>::new(queue_dir, args.queue_limit, extension); + let store = QueueStore::>::new(queue_dir, args.queue_limit, extension); if let Err(e) = store.open() { error!("Failed to open store for Webhook target {}: {}", target_id.id, e); diff --git a/rustfs/src/main.rs b/rustfs/src/main.rs index bdc93286..d62777bb 100644 --- a/rustfs/src/main.rs +++ b/rustfs/src/main.rs @@ -16,9 +16,8 @@ mod admin; mod auth; mod config; mod error; -// mod grpc; mod init; -pub mod license; +mod license; mod profiling; mod server; mod storage; diff --git a/rustfs/src/server/audit.rs b/rustfs/src/server/audit.rs index 2a81af15..144f7446 100644 --- a/rustfs/src/server/audit.rs +++ b/rustfs/src/server/audit.rs @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use rustfs_audit::system::AuditSystemState; -use rustfs_audit::{AuditError, AuditResult, audit_system, init_audit_system}; +use rustfs_audit::{AuditError, AuditResult, audit_system, init_audit_system, system::AuditSystemState}; use rustfs_config::DEFAULT_DELIMITER; use rustfs_ecstore::config::GLOBAL_SERVER_CONFIG; use tracing::{info, warn}; @@ -69,7 +68,9 @@ pub(crate) async fn start_audit_system() -> AuditResult<()> { mqtt_config.is_some(), webhook_config.is_some() ); + // 3. Initialize and start the audit system let system = init_audit_system(); + // Check if the audit system is already running let state = system.get_state().await; if state == AuditSystemState::Running { warn!( diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index 42ce4a01..e12eb958 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -5122,6 +5122,7 @@ impl S3 for FS { let (clear_result, event_rules) = tokio::join!(clear_rules, parse_rules); clear_result.map_err(|e| s3_error!(InternalError, "Failed to clear rules: {e}"))?; + warn!("notify event rules: {:?}", &event_rules); // Add a new notification rule notifier_global::add_event_specific_rules(&bucket, ®ion, &event_rules) diff --git a/rustfs/src/storage/tonic_service.rs b/rustfs/src/storage/tonic_service.rs index eebe1c74..5ca8ab22 100644 --- a/rustfs/src/storage/tonic_service.rs +++ b/rustfs/src/storage/tonic_service.rs @@ -16,7 +16,7 @@ use bytes::Bytes; use futures::Stream; use futures_util::future::join_all; use rmp_serde::{Deserializer, Serializer}; -use rustfs_common::{globals::GLOBAL_Local_Node_Name, heal_channel::HealOpts}; +use rustfs_common::{globals::GLOBAL_LOCAL_NODE_NAME, heal_channel::HealOpts}; use rustfs_ecstore::{ admin_server_info::get_local_server_property, bucket::{metadata::load_bucket_metadata, metadata_sys}, @@ -1646,7 +1646,7 @@ impl Node for NodeService { } async fn get_net_info(&self, _request: Request) -> Result, Status> { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let info = get_net_info(&addr, ""); let mut buf = Vec::new(); if let Err(err) = info.serialize(&mut Serializer::new(&mut buf)) { @@ -1701,7 +1701,7 @@ impl Node for NodeService { &self, _request: Request, ) -> Result, Status> { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let info = get_sys_services(&addr); let mut buf = Vec::new(); if let Err(err) = info.serialize(&mut Serializer::new(&mut buf)) { @@ -1719,7 +1719,7 @@ impl Node for NodeService { } async fn get_sys_config(&self, _request: Request) -> Result, Status> { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let info = get_sys_config(&addr); let mut buf = Vec::new(); if let Err(err) = info.serialize(&mut Serializer::new(&mut buf)) { @@ -1737,7 +1737,7 @@ impl Node for NodeService { } async fn get_sys_errors(&self, _request: Request) -> Result, Status> { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let info = get_sys_errors(&addr); let mut buf = Vec::new(); if let Err(err) = info.serialize(&mut Serializer::new(&mut buf)) { @@ -1755,7 +1755,7 @@ impl Node for NodeService { } async fn get_mem_info(&self, _request: Request) -> Result, Status> { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let info = get_mem_info(&addr); let mut buf = Vec::new(); if let Err(err) = info.serialize(&mut Serializer::new(&mut buf)) { @@ -1798,7 +1798,7 @@ impl Node for NodeService { } async fn get_proc_info(&self, _request: Request) -> Result, Status> { - let addr = GLOBAL_Local_Node_Name.read().await.clone(); + let addr = GLOBAL_LOCAL_NODE_NAME.read().await.clone(); let info = get_proc_info(&addr); let mut buf = Vec::new(); if let Err(err) = info.serialize(&mut Serializer::new(&mut buf)) { diff --git a/scripts/run.sh b/scripts/run.sh index d3e99945..762215c6 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -36,7 +36,7 @@ mkdir -p ./target/volume/test{1..4} if [ -z "$RUST_LOG" ]; then export RUST_BACKTRACE=1 - export RUST_LOG="rustfs=debug,ecstore=info,s3s=debug,iam=info" + export RUST_LOG="rustfs=debug,ecstore=info,s3s=debug,iam=info,notify=info" fi # export RUSTFS_ERASURE_SET_DRIVE_COUNT=5 @@ -90,30 +90,30 @@ export OTEL_INSTRUMENTATION_VERSION="0.1.1" export OTEL_INSTRUMENTATION_SCHEMA_URL="https://opentelemetry.io/schemas/1.31.0" export OTEL_INSTRUMENTATION_ATTRIBUTES="env=production" -# notify -export RUSTFS_NOTIFY_WEBHOOK_ENABLE="on" # Whether to enable webhook notification -export RUSTFS_NOTIFY_WEBHOOK_ENDPOINT="http://[::]:3020/webhook" # Webhook notification address -export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR="$current_dir/deploy/logs/notify" - -export RUSTFS_NOTIFY_WEBHOOK_ENABLE_PRIMARY="on" # Whether to enable webhook notification -export RUSTFS_NOTIFY_WEBHOOK_ENDPOINT_PRIMARY="http://[::]:3020/webhook" # Webhook notification address -export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR_PRIMARY="$current_dir/deploy/logs/notify" - -export RUSTFS_NOTIFY_WEBHOOK_ENABLE_MASTER="on" # Whether to enable webhook notification -export RUSTFS_NOTIFY_WEBHOOK_ENDPOINT_MASTER="http://[::]:3020/webhook" # Webhook notification address -export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR_MASTER="$current_dir/deploy/logs/notify" - -export RUSTFS_AUDIT_WEBHOOK_ENABLE="on" # Whether to enable webhook audit -export RUSTFS_AUDIT_WEBHOOK_ENDPOINT="http://[::]:3020/webhook" # Webhook audit address -export RUSTFS_AUDIT_WEBHOOK_QUEUE_DIR="$current_dir/deploy/logs/audit" - -export RUSTFS_AUDIT_WEBHOOK_ENABLE_PRIMARY="on" # Whether to enable webhook audit -export RUSTFS_AUDIT_WEBHOOK_ENDPOINT_PRIMARY="http://[::]:3020/webhook" # Webhook audit address -export RUSTFS_AUDIT_WEBHOOK_QUEUE_DIR_PRIMARY="$current_dir/deploy/logs/audit" - -export RUSTFS_AUDIT_WEBHOOK_ENABLE_MASTER="on" # Whether to enable webhook audit -export RUSTFS_AUDIT_WEBHOOK_ENDPOINT_MASTER="http://[::]:3020/webhook" # Webhook audit address -export RUSTFS_AUDIT_WEBHOOK_QUEUE_DIR_MASTER="$current_dir/deploy/logs/audit" +## notify +#export RUSTFS_NOTIFY_WEBHOOK_ENABLE="on" # Whether to enable webhook notification +#export RUSTFS_NOTIFY_WEBHOOK_ENDPOINT="http://127.0.0.1:3020/webhook" # Webhook notification address +#export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR="$current_dir/deploy/logs/notify" +# +#export RUSTFS_NOTIFY_WEBHOOK_ENABLE_PRIMARY="on" # Whether to enable webhook notification +#export RUSTFS_NOTIFY_WEBHOOK_ENDPOINT_PRIMARY="http://127.0.0.1:3020/webhook" # Webhook notification address +#export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR_PRIMARY="$current_dir/deploy/logs/notify" +# +#export RUSTFS_NOTIFY_WEBHOOK_ENABLE_MASTER="on" # Whether to enable webhook notification +#export RUSTFS_NOTIFY_WEBHOOK_ENDPOINT_MASTER="http://127.0.0.1:3020/webhook" # Webhook notification address +#export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR_MASTER="$current_dir/deploy/logs/notify" +# +#export RUSTFS_AUDIT_WEBHOOK_ENABLE="on" # Whether to enable webhook audit +#export RUSTFS_AUDIT_WEBHOOK_ENDPOINT="http://127.0.0.1:3020/webhook" # Webhook audit address +#export RUSTFS_AUDIT_WEBHOOK_QUEUE_DIR="$current_dir/deploy/logs/audit" +# +#export RUSTFS_AUDIT_WEBHOOK_ENABLE_PRIMARY="on" # Whether to enable webhook audit +#export RUSTFS_AUDIT_WEBHOOK_ENDPOINT_PRIMARY="http://127.0.0.1:3020/webhook" # Webhook audit address +#export RUSTFS_AUDIT_WEBHOOK_QUEUE_DIR_PRIMARY="$current_dir/deploy/logs/audit" +# +#export RUSTFS_AUDIT_WEBHOOK_ENABLE_MASTER="on" # Whether to enable webhook audit +#export RUSTFS_AUDIT_WEBHOOK_ENDPOINT_MASTER="http://127.0.0.1:3020/webhook" # Webhook audit address +#export RUSTFS_AUDIT_WEBHOOK_QUEUE_DIR_MASTER="$current_dir/deploy/logs/audit" # export RUSTFS_POLICY_PLUGIN_URL="http://localhost:8181/v1/data/rustfs/authz/allow" # The URL of the OPA system # export RUSTFS_POLICY_PLUGIN_AUTH_TOKEN="your-opa-token" # The authentication token for the OPA system is optional @@ -211,5 +211,4 @@ fi # To run in release mode, use the following line #cargo run --profile release --bin rustfs # To run in debug mode, use the following line -cargo run --bin rustfs - +cargo run --bin rustfs \ No newline at end of file From 3eafeb0ff014ae1d8b1b0e6ff5b3526e28cacba9 Mon Sep 17 00:00:00 2001 From: loverustfs Date: Fri, 19 Dec 2025 13:01:17 +0800 Subject: [PATCH 08/26] Modify to accelerate --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c692dffb..b01de7df 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -454,7 +454,7 @@ jobs: OSS_ACCESS_KEY_ID: ${{ secrets.ALICLOUDOSS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }} OSS_REGION: cn-beijing - OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com + OSS_ENDPOINT: https://oss-accelerate.aliyuncs.com shell: bash run: | BUILD_TYPE="${{ needs.build-check.outputs.build_type }}" @@ -758,7 +758,7 @@ jobs: OSS_ACCESS_KEY_ID: ${{ secrets.ALICLOUDOSS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }} OSS_REGION: cn-beijing - OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com + OSS_ENDPOINT: https://oss-accelerate.aliyuncs.com shell: bash run: | if [[ -z "$OSS_ACCESS_KEY_ID" ]]; then From 61f4d307b58f8ff61b556788b86253433e638f4a Mon Sep 17 00:00:00 2001 From: loverustfs Date: Fri, 19 Dec 2025 14:57:19 +0800 Subject: [PATCH 09/26] Modify latest version tips to console --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b01de7df..a70e6aab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -758,7 +758,7 @@ jobs: OSS_ACCESS_KEY_ID: ${{ secrets.ALICLOUDOSS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.ALICLOUDOSS_KEY_SECRET }} OSS_REGION: cn-beijing - OSS_ENDPOINT: https://oss-accelerate.aliyuncs.com + OSS_ENDPOINT: https://oss-cn-beijing.aliyuncs.com shell: bash run: | if [[ -z "$OSS_ACCESS_KEY_ID" ]]; then From abe8a50b5a3bb75ef10381a7cfa13efb04fa8302 Mon Sep 17 00:00:00 2001 From: majinghe <42570491+majinghe@users.noreply.github.com> Date: Fri, 19 Dec 2025 21:50:23 +0800 Subject: [PATCH 10/26] add cert manager and ingress annotations support (#1206) --- README.md | 2 +- helm/README.md | 11 ++++----- helm/rustfs/Chart.yaml | 2 +- helm/rustfs/templates/certificate.yml | 15 +++++++++++++ helm/rustfs/templates/ingress.yaml | 20 ++++++++++++----- helm/rustfs/templates/secret-tls.yaml | 6 ++--- helm/rustfs/values.yaml | 32 ++++++++++++++++----------- 7 files changed, 59 insertions(+), 29 deletions(-) create mode 100644 helm/rustfs/templates/certificate.yml diff --git a/README.md b/README.md index 30788f2d..a5e0dca4 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ The RustFS container runs as a non-root user `rustfs` (UID `10001`). If you run docker run -d -p 9000:9000 -p 9001:9001 -v $(pwd)/data:/data -v $(pwd)/logs:/logs rustfs/rustfs:latest # Using specific version - docker run -d -p 9000:9000 -p 9001:9001 -v $(pwd)/data:/data -v $(pwd)/logs:/logs rustfs/rustfs:1.0.0.alpha.68 + docker run -d -p 9000:9000 -p 9001:9001 -v $(pwd)/data:/data -v $(pwd)/logs:/logs rustfs/rustfs:1.0.0-alpha.76 ``` You can also use Docker Compose. Using the `docker-compose.yml` file in the root directory: diff --git a/helm/README.md b/helm/README.md index 0dcb4329..3ff09825 100644 --- a/helm/README.md +++ b/helm/README.md @@ -52,13 +52,17 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-hash" | string | `"sha1"` | | | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-max-age" | string | `"3600"` | | | ingress.nginxAnnotations."nginx.ingress.kubernetes.io/session-cookie-name" | string | `"rustfs"` | | -| ingress.tls[0].hosts[0] | string | `"your.rustfs.com"` | | -| ingress.tls[0].secretName | string | `"rustfs-tls"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie" | string | `"true"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.httponly" | string | `"true"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.name" | string | `"rustfs"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.samesite" | string | `"none"` | | | ingress.traefikAnnotations."traefik.ingress.kubernetes.io/service.sticky.cookie.secure" | string | `"true"` | | +| ingress.tls.enabled | bool | `false` | Enable tls and access rustfs via https. | +| ingress.tls.certManager.enabled | string | `false` | Enable cert manager support to generate certificate automatically. | +| ingress.tls.certManager.issuer.name | string | `false` | The name of cert manager issuer. | +| ingress.tls.certManager.issuer.kind | string | `false` | The kind of cert manager issuer, issuer or cluster-issuer. | +| ingress.tls.crt | string | "" | The content of certificate file. | +| ingress.tls.key | string | "" | The content of key file. | | livenessProbe.failureThreshold | int | `3` | | | livenessProbe.httpGet.path | string | `"/health"` | | | livenessProbe.httpGet.port | string | `"endpoint"` | | @@ -100,9 +104,6 @@ RustFS helm chart supports **standalone and distributed mode**. For standalone m | storageclass.dataStorageSize | string | `"256Mi"` | The storage size for data PVC. | | storageclass.logStorageSize | string | `"256Mi"` | The storage size for logs PVC. | | storageclass.name | string | `"local-path"` | The name for StorageClass. | -| tls.crt | string | `"tls.crt"` | | -| tls.enabled | bool | `false` | | -| tls.key | string | `"tls.key"` | | | tolerations | list | `[]` | | --- diff --git a/helm/rustfs/Chart.yaml b/helm/rustfs/Chart.yaml index 2cc92efa..68118e54 100644 --- a/helm/rustfs/Chart.yaml +++ b/helm/rustfs/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0.3 +version: 0.0.76 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm/rustfs/templates/certificate.yml b/helm/rustfs/templates/certificate.yml new file mode 100644 index 00000000..7eaf6a33 --- /dev/null +++ b/helm/rustfs/templates/certificate.yml @@ -0,0 +1,15 @@ +{{- if and .Values.ingress.tls.enabled .Values.ingress.tls.certManager.enabled }} +{{- $host := index .Values.ingress.hosts 0 }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "rustfs.fullname" . }}-tls + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ .Values.ingress.tls.secretName }} + issuerRef: + name: {{ .Values.ingress.tls.certManager.issuer.name }} + kind: {{ .Values.ingress.tls.certManager.issuer.kind }} + dnsNames: + - {{ $host.host }} +{{- end }} diff --git a/helm/rustfs/templates/ingress.yaml b/helm/rustfs/templates/ingress.yaml index 47197a98..cc505cfb 100644 --- a/helm/rustfs/templates/ingress.yaml +++ b/helm/rustfs/templates/ingress.yaml @@ -1,4 +1,14 @@ {{- if .Values.ingress.enabled -}} +{{- $secretName := .Values.ingress.tls.secretName }} +{{- $ingressAnnotations := dict }} +{{- if eq .Values.ingress.className "nginx" }} + {{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.nginxAnnotations | default dict) }} +{{- else if eq .Values.ingress.className "" }} + {{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.customAnnoations | default dict) }} +{{- end }} +{{- if .Values.ingress.tls.certManager.enabled }} + {{- $ingressAnnotations = merge $ingressAnnotations (.Values.ingress.certManagerAnnotations | default dict) }} +{{- end }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: @@ -8,25 +18,23 @@ metadata: {{- with .Values.commonLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- if eq .Values.ingress.className "nginx" }} - {{- with .Values.ingress.nginxAnnotations }} + {{- with $ingressAnnotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} - {{- end }} spec: {{- with .Values.ingress.className }} ingressClassName: {{ . }} {{- end }} - {{- if .Values.tls.enabled }} + {{- if .Values.ingress.tls.enabled }} tls: - {{- range .Values.ingress.tls }} + {{- range .Values.ingress.hosts }} - hosts: {{- range .hosts }} - {{ . | quote }} {{- end }} - secretName: {{ .secretName }} {{- end }} + secretName: {{ $secretName }} {{- end }} rules: {{- range .Values.ingress.hosts }} diff --git a/helm/rustfs/templates/secret-tls.yaml b/helm/rustfs/templates/secret-tls.yaml index 6941d623..28b50600 100644 --- a/helm/rustfs/templates/secret-tls.yaml +++ b/helm/rustfs/templates/secret-tls.yaml @@ -1,4 +1,4 @@ -{{- if .Values.tls.enabled }} +{{- if and .Values.ingress.tls.enabled (not .Values.ingress.tls.certManager.enabled) }} apiVersion: v1 kind: Secret metadata: @@ -7,6 +7,6 @@ metadata: {{- toYaml .Values.commonLabels | nindent 4 }} type: kubernetes.io/tls data: - tls.crt : {{ .Values.tls.crt | b64enc | quote }} - tls.key : {{ .Values.tls.key | b64enc | quote }} + tls.crt : {{ .Values.ingress.tls.crt | b64enc | quote }} + tls.key : {{ .Values.ingress.tls.key | b64enc | quote }} {{- end }} diff --git a/helm/rustfs/values.yaml b/helm/rustfs/values.yaml index 6ed5baa7..0d78346c 100644 --- a/helm/rustfs/values.yaml +++ b/helm/rustfs/values.yaml @@ -11,7 +11,7 @@ image: # This sets the pull policy for images. pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "latest" + tag: "1.0.0-alpha.73" # This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] @@ -97,7 +97,7 @@ service: # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ ingress: enabled: true - className: "traefik" # Specify the classname, traefik or nginx. Different classname has different annotations for session sticky. + className: "nginx" # Specify the classname, traefik or nginx. Different classname has different annotations for session sticky. traefikAnnotations: traefik.ingress.kubernetes.io/service.sticky.cookie: "true" traefik.ingress.kubernetes.io/service.sticky.cookie.httponly: "true" @@ -110,20 +110,26 @@ ingress: nginx.ingress.kubernetes.io/session-cookie-hash: sha1 nginx.ingress.kubernetes.io/session-cookie-max-age: "3600" nginx.ingress.kubernetes.io/session-cookie-name: rustfs + certManagerAnnotations: + {} # Specify cert manager issuer annotations,cert-manager.io/issuer or cert-manager.io/cluster-issuer. + # cert-manager.io/issuer: "letsencrypt-staging" + customAnnotations: # Specify custom annotations + {} # Customize annotations hosts: - - host: your.rustfs.com + - host: xmg.rustfs.com paths: - path: / - pathType: ImplementationSpecific - tls: - - secretName: rustfs-tls - hosts: - - your.rustfs.com - -tls: - enabled: false - crt: tls.crt - key: tls.key + pathType: Prefix + tls: + enabled: false # Enable tls and access rustfs via https. + certManager: + enabled: false # Enable certmanager to generate certificate for rustfs, default false. + issuer: + name: letsencrypt-staging # Specify cert manager issuer name + kind: Issuer # Specify cert manager issuer kind, Issuer or ClusterIssuer. + secretName: secret-tls + crt: tls.crt + key: tls.key resources: # We usually recommend not to specify default resources and to leave this as a conscious From 8e0aeb4fdcafe297c2047bd86b0d1ac99a14c96f Mon Sep 17 00:00:00 2001 From: loverustfs Date: Fri, 19 Dec 2025 23:22:45 +0800 Subject: [PATCH 11/26] Optimize ci ubicloud (#1208) --- .github/actions/setup/action.yml | 26 +++++ .github/workflows/build.yml | 27 +++-- .github/workflows/ci.yml | 30 ++++-- .github/workflows/docker.yml | 172 +++++++++++++++++++++++-------- 4 files changed, 197 insertions(+), 58 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 7a2171b9..ca80dc79 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -55,6 +55,32 @@ runs: pkg-config \ libssl-dev + - name: Install mold linker (Linux) + if: runner.os == 'Linux' + shell: bash + run: | + # Install mold for faster linking + MOLD_VERSION="2.34.1" + ARCH=$(uname -m) + + if [[ "$ARCH" == "x86_64" ]]; then + MOLD_ARCH="x86_64" + elif [[ "$ARCH" == "aarch64" ]]; then + MOLD_ARCH="aarch64" + else + echo "Unsupported architecture: $ARCH" + exit 0 + fi + + curl -L "https://github.com/rui314/mold/releases/download/v${MOLD_VERSION}/mold-${MOLD_VERSION}-${MOLD_ARCH}-linux.tar.gz" | tar xzf - + sudo cp mold-${MOLD_VERSION}-${MOLD_ARCH}-linux/bin/mold /usr/local/bin/ + sudo mkdir -p /usr/local/libexec + sudo cp mold-${MOLD_VERSION}-${MOLD_ARCH}-linux/libexec/mold /usr/local/libexec/ || true + rm -rf mold-${MOLD_VERSION}-${MOLD_ARCH}-linux + + # Verify installation + mold --version || echo "mold installation verification failed" + - name: Install protoc uses: arduino/setup-protoc@v3 with: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a70e6aab..dc66fff9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -166,23 +166,28 @@ jobs: fail-fast: false matrix: include: - # Linux builds + # Linux x86_64 builds on x86 runners - os: ubicloud-standard-4 target: x86_64-unknown-linux-musl cross: false platform: linux - - os: ubicloud-standard-4 - target: aarch64-unknown-linux-musl - cross: true - platform: linux + arch: x86_64 - os: ubicloud-standard-4 target: x86_64-unknown-linux-gnu cross: false platform: linux - - os: ubicloud-standard-4 - target: aarch64-unknown-linux-gnu - cross: true + arch: x86_64 + # Linux aarch64 builds on ARM runners (native compilation) + - os: ubicloud-standard-4-arm + target: aarch64-unknown-linux-musl + cross: false platform: linux + arch: aarch64 + - os: ubicloud-standard-4-arm + target: aarch64-unknown-linux-gnu + cross: false + platform: linux + arch: aarch64 # macOS builds - os: macos-latest target: aarch64-apple-darwin @@ -212,7 +217,7 @@ jobs: with: rust-version: stable target: ${{ matrix.target }} - cache-shared-key: build-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }} + cache-shared-key: build-${{ matrix.arch }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }} github-token: ${{ secrets.GITHUB_TOKEN }} cache-save-if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} install-cross-tools: ${{ matrix.cross }} @@ -259,6 +264,10 @@ jobs: cargo zigbuild --release --target ${{ matrix.target }} -p rustfs --bins fi else + # Native compilation - use mold linker on Linux for faster linking + if [[ "${{ matrix.platform }}" == "linux" ]]; then + export RUSTFLAGS="${RUSTFLAGS} -C link-arg=-fuse-ld=mold" + fi cargo build --release --target ${{ matrix.target }} -p rustfs --bins fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c7e7662..ca5f1104 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,11 +101,19 @@ jobs: uses: crate-ci/typos@master test-and-lint: - name: Test and Lint + name: Test and Lint (${{ matrix.arch }}) needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubicloud-standard-4 + runs-on: ${{ matrix.runner }} timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - arch: x86_64 + runner: ubicloud-standard-4 + - arch: aarch64 + runner: ubicloud-standard-4-arm steps: - name: Checkout repository uses: actions/checkout@v6 @@ -114,7 +122,7 @@ jobs: uses: ./.github/actions/setup with: rust-version: stable - cache-shared-key: ci-test-${{ hashFiles('**/Cargo.lock') }} + cache-shared-key: ci-test-${{ matrix.arch }}-${{ hashFiles('**/Cargo.lock') }} github-token: ${{ secrets.GITHUB_TOKEN }} cache-save-if: ${{ github.ref == 'refs/heads/main' }} @@ -133,17 +141,25 @@ jobs: run: cargo clippy --all-targets --all-features -- -D warnings e2e-tests: - name: End-to-End Tests + name: End-to-End Tests (${{ matrix.arch }}) needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubicloud-standard-4 + runs-on: ${{ matrix.runner }} timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + include: + - arch: x86_64 + runner: ubicloud-standard-4 + - arch: aarch64 + runner: ubicloud-standard-4-arm steps: - name: Checkout repository uses: actions/checkout@v6 - name: Clean up previous test run - run: | + run: |matrix.arch }}-${{ rm -rf /tmp/rustfs rm -f /tmp/rustfs.log @@ -169,7 +185,7 @@ jobs: cargo build -p rustfs --bins --jobs 4 - name: Run end-to-end tests - run: | + run: |matrix.arch }}-${{ s3s-e2e --version ./scripts/e2e-run.sh ./target/debug/rustfs /tmp/rustfs diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 37d41b50..308a1185 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -258,37 +258,21 @@ jobs: # Build multi-arch Docker images # Strategy: Build images using pre-built binaries from dl.rustfs.com - # Supports both release and dev channel binaries based on build context + # Optimization: Build each architecture on its native runner to avoid QEMU overhead # Only runs when should_build is true (which includes workflow success check) - build-docker: - name: Build Docker Images + + # Prepare metadata for both builds + prepare-metadata: + name: Prepare Docker Metadata needs: build-check if: needs.build-check.outputs.should_build == 'true' runs-on: ubicloud-standard-4 - timeout-minutes: 60 + outputs: + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + docker_release: ${{ steps.meta.outputs.docker_release }} + docker_channel: ${{ steps.meta.outputs.docker_channel }} steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ env.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - # - name: Login to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Extract metadata and generate tags id: meta run: | @@ -368,41 +352,143 @@ jobs: echo "📋 Build type: $BUILD_TYPE" echo "🔖 Version: $VERSION" - - name: Build and push Docker image + # Build amd64 image on x86 runner (native build) + build-docker-amd64: + name: Build Docker Image (amd64) + needs: [build-check, prepare-metadata] + if: needs.build-check.outputs.should_build == 'true' + runs-on: ubicloud-standard-4 + timeout-minutes: 30 + outputs: + digest: ${{ steps.build.outputs.digest }} + image_name: ${{ steps.build.outputs.imageid }} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ env.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push (amd64) + id: build uses: docker/build-push-action@v6 with: context: . file: Dockerfile - platforms: ${{ env.DOCKER_PLATFORMS }} + platforms: linux/amd64 push: ${{ needs.build-check.outputs.should_push == 'true' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} + labels: ${{ needs.prepare-metadata.outputs.labels }} cache-from: | - type=gha,scope=docker-binary + type=gha,scope=docker-amd64 cache-to: | - type=gha,mode=max,scope=docker-binary + type=gha,mode=max,scope=docker-amd64 build-args: | BUILDTIME=$(date -u +'%Y-%m-%dT%H:%M:%SZ') VERSION=${{ needs.build-check.outputs.version }} BUILD_TYPE=${{ needs.build-check.outputs.build_type }} REVISION=${{ github.sha }} - RELEASE=${{ steps.meta.outputs.docker_release }} - CHANNEL=${{ steps.meta.outputs.docker_channel }} + RELEASE=${{ needs.prepare-metadata.outputs.docker_release }} + CHANNEL=${{ needs.prepare-metadata.outputs.docker_channel }} BUILDKIT_INLINE_CACHE=1 - # Enable advanced BuildKit features for better performance provenance: false sbom: false - # Add retry mechanism by splitting the build process - no-cache: false - pull: true + outputs: type=image,name=${{ env.REGISTRY_DOCKERHUB }},push-by-digest=true,name-canonical=true,push=${{ needs.build-check.outputs.should_push == 'true' }} - # Note: Manifest creation is no longer needed as we only build one variant - # Multi-arch manifests are automatically created by docker/build-push-action + # Build arm64 image on ARM runner (native build) + build-docker-arm64: + name: Build Docker Image (arm64) + needs: [build-check, prepare-metadata] + if: needs.build-check.outputs.should_build == 'true' + runs-on: ubicloud-standard-4-arm + timeout-minutes: 30 + outputs: + digest: ${{ steps.build.outputs.digest }} + image_name: ${{ steps.build.outputs.imageid }} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ env.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push (arm64) + id: build + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + platforms: linux/arm64 + push: ${{ needs.build-check.outputs.should_push == 'true' }} + labels: ${{ needs.prepare-metadata.outputs.labels }} + cache-from: | + type=gha,scope=docker-arm64 + cache-to: | + type=gha,mode=max,scope=docker-arm64 + build-args: | + BUILDTIME=$(date -u +'%Y-%m-%dT%H:%M:%SZ') + VERSION=${{ needs.build-check.outputs.version }} + BUILD_TYPE=${{ needs.build-check.outputs.build_type }} + REVISION=${{ github.sha }} + RELEASE=${{ needs.prepare-metadata.outputs.docker_release }} + CHANNEL=${{ needs.prepare-metadata.outputs.docker_channel }} + BUILDKIT_INLINE_CACHE=1 + provenance: false + sbom: false + outputs: type=image,name=${{ env.REGISTRY_DOCKERHUB }},push-by-digest=true,name-canonical=true,push=${{ needs.build-check.outputs.should_push == 'true' }} + + # Merge manifests to create multi-arch image + merge-manifests: + name: Create Multi-Arch Manifest + needs: [build-check, prepare-metadata, build-docker-amd64, build-docker-arm64] + if: needs.build-check.outputs.should_build == 'true' && needs.build-check.outputs.should_push == 'true' + runs-on: ubicloud-standard-4 + steps: + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ env.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Create and push multi-arch manifest + run: | + TAGS="${{ needs.prepare-metadata.outputs.tags }}" + + echo "🐳 Creating multi-arch manifest for tags:" + echo "$TAGS" | tr ',' '\n' | sed 's/^/ - /' + + # Convert comma-separated tags to array + IFS=',' read -ra TAG_ARRAY <<< "$TAGS" + + # Create manifest for each tag + for TAG in "${TAG_ARRAY[@]}"; do + echo "Creating manifest for: $TAG" + docker buildx imagetools create \ + -t "$TAG" \ + "${{ env.REGISTRY_DOCKERHUB }}@${{ needs.build-docker-amd64.outputs.digest }}" \ + "${{ env.REGISTRY_DOCKERHUB }}@${{ needs.build-docker-arm64.outputs.digest }}" + done + + echo "✅ Multi-arch manifest created and pushed successfully" # Docker build summary docker-summary: name: Docker Build Summary - needs: [ build-check, build-docker ] + needs: [ build-check, prepare-metadata, build-docker-amd64, build-docker-arm64, merge-manifests ] if: always() && needs.build-check.outputs.should_build == 'true' runs-on: ubicloud-standard-4 steps: @@ -415,7 +501,9 @@ jobs: echo "🐳 Docker build completed successfully!" echo "📦 Build type: $BUILD_TYPE" echo "🔢 Version: $VERSION" - echo "🚀 Strategy: Images using pre-built binaries (release channel only)" + echo "🚀 Strategy: Native builds on each architecture (no QEMU overhead)" + echo " - amd64: Built on x86 runner" + echo " - arm64: Built on ARM runner" echo "" case "$BUILD_TYPE" in From 8dd3e8b5348f06cae8e2cfa7257c8073faf41e00 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sat, 20 Dec 2025 01:31:09 +0800 Subject: [PATCH 12/26] fix: decode form-urlencoded object names in webhook/mqtt Key field (#1210) Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> Co-authored-by: houseme --- .github/workflows/ci.yml | 12 +++---- crates/targets/src/store.rs | 2 +- crates/targets/src/target/mod.rs | 27 ++++++++++++++ crates/targets/src/target/mqtt.rs | 5 ++- crates/targets/src/target/webhook.rs | 53 ++++++++++++++++++++++++++-- crates/utils/Cargo.toml | 2 +- 6 files changed, 87 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca5f1104..f73d6156 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -112,8 +112,8 @@ jobs: include: - arch: x86_64 runner: ubicloud-standard-4 - - arch: aarch64 - runner: ubicloud-standard-4-arm + # - arch: aarch64 + # runner: ubicloud-standard-4-arm steps: - name: Checkout repository uses: actions/checkout@v6 @@ -152,14 +152,14 @@ jobs: include: - arch: x86_64 runner: ubicloud-standard-4 - - arch: aarch64 - runner: ubicloud-standard-4-arm + # - arch: aarch64 + # runner: ubicloud-standard-4-arm steps: - name: Checkout repository uses: actions/checkout@v6 - name: Clean up previous test run - run: |matrix.arch }}-${{ + run: | rm -rf /tmp/rustfs rm -f /tmp/rustfs.log @@ -185,7 +185,7 @@ jobs: cargo build -p rustfs --bins --jobs 4 - name: Run end-to-end tests - run: |matrix.arch }}-${{ + run: | s3s-e2e --version ./scripts/e2e-run.sh ./target/debug/rustfs /tmp/rustfs diff --git a/crates/targets/src/store.rs b/crates/targets/src/store.rs index 139e32be..be15a9b4 100644 --- a/crates/targets/src/store.rs +++ b/crates/targets/src/store.rs @@ -312,7 +312,7 @@ where compress: true, }; - let data = serde_json::to_vec(&item).map_err(|e| StoreError::Serialization(e.to_string()))?; + let data = serde_json::to_vec(&*item).map_err(|e| StoreError::Serialization(e.to_string()))?; self.write_file(&key, &data)?; Ok(key) diff --git a/crates/targets/src/target/mod.rs b/crates/targets/src/target/mod.rs index 627fa8d0..876f186b 100644 --- a/crates/targets/src/target/mod.rs +++ b/crates/targets/src/target/mod.rs @@ -159,3 +159,30 @@ impl std::fmt::Display for TargetType { } } } + +/// Decodes a form-urlencoded object name to its original form. +/// +/// This function properly handles form-urlencoded strings where spaces are +/// represented as `+` symbols. It first replaces `+` with spaces, then +/// performs standard percent-decoding. +/// +/// # Arguments +/// * `encoded` - The form-urlencoded string to decode +/// +/// # Returns +/// The decoded string, or an error if decoding fails +/// +/// # Example +/// ``` +/// use rustfs_targets::target::decode_object_name; +/// +/// let encoded = "greeting+file+%282%29.csv"; +/// let decoded = decode_object_name(encoded).unwrap(); +/// assert_eq!(decoded, "greeting file (2).csv"); +/// ``` +pub fn decode_object_name(encoded: &str) -> Result { + let replaced = encoded.replace("+", " "); + urlencoding::decode(&replaced) + .map(|s| s.into_owned()) + .map_err(|e| TargetError::Encoding(format!("Failed to decode object key: {e}"))) +} diff --git a/crates/targets/src/target/mqtt.rs b/crates/targets/src/target/mqtt.rs index 61cb93c0..9de8ac94 100644 --- a/crates/targets/src/target/mqtt.rs +++ b/crates/targets/src/target/mqtt.rs @@ -32,7 +32,6 @@ use std::{ use tokio::sync::{Mutex, OnceCell, mpsc}; use tracing::{debug, error, info, instrument, trace, warn}; use url::Url; -use urlencoding; const DEFAULT_CONNECTION_TIMEOUT: Duration = Duration::from_secs(15); const EVENT_LOOP_POLL_TIMEOUT: Duration = Duration::from_secs(10); // For initial connection check in task @@ -258,8 +257,8 @@ where .as_ref() .ok_or_else(|| TargetError::Configuration("MQTT client not initialized".to_string()))?; - let object_name = urlencoding::decode(&event.object_name) - .map_err(|e| TargetError::Encoding(format!("Failed to decode object key: {e}")))?; + // Decode form-urlencoded object name + let object_name = crate::target::decode_object_name(&event.object_name)?; let key = format!("{}/{}", event.bucket_name, object_name); diff --git a/crates/targets/src/target/webhook.rs b/crates/targets/src/target/webhook.rs index c9564274..5c505e3b 100644 --- a/crates/targets/src/target/webhook.rs +++ b/crates/targets/src/target/webhook.rs @@ -36,7 +36,6 @@ use std::{ use tokio::net::lookup_host; use tokio::sync::mpsc; use tracing::{debug, error, info, instrument}; -use urlencoding; /// Arguments for configuring a Webhook target #[derive(Debug, Clone)] @@ -221,8 +220,8 @@ where async fn send(&self, event: &EntityTarget) -> Result<(), TargetError> { info!("Webhook Sending event to webhook target: {}", self.id); - let object_name = urlencoding::decode(&event.object_name) - .map_err(|e| TargetError::Encoding(format!("Failed to decode object key: {e}")))?; + // Decode form-urlencoded object name + let object_name = crate::target::decode_object_name(&event.object_name)?; let key = format!("{}/{}", event.bucket_name, object_name); @@ -421,3 +420,51 @@ where self.args.enable } } + +#[cfg(test)] +mod tests { + use crate::target::decode_object_name; + use url::form_urlencoded; + + #[test] + fn test_decode_object_name_with_spaces() { + // Test case from the issue: "greeting file (2).csv" + let object_name = "greeting file (2).csv"; + + // Simulate what event.rs does: form-urlencoded encoding (spaces become +) + let form_encoded = form_urlencoded::byte_serialize(object_name.as_bytes()).collect::(); + assert_eq!(form_encoded, "greeting+file+%282%29.csv"); + + // Test the decode_object_name helper function + let decoded = decode_object_name(&form_encoded).unwrap(); + assert_eq!(decoded, object_name); + assert!(!decoded.contains('+'), "Decoded string should not contain + symbols"); + } + + #[test] + fn test_decode_object_name_with_special_chars() { + // Test with various special characters + let test_cases = vec![ + ("folder/greeting file (2).csv", "folder%2Fgreeting+file+%282%29.csv"), + ("test file.txt", "test+file.txt"), + ("my file (copy).pdf", "my+file+%28copy%29.pdf"), + ("file with spaces and (parentheses).doc", "file+with+spaces+and+%28parentheses%29.doc"), + ]; + + for (original, form_encoded) in test_cases { + // Test the decode_object_name helper function + let decoded = decode_object_name(form_encoded).unwrap(); + assert_eq!(decoded, original, "Failed to decode: {}", form_encoded); + } + } + + #[test] + fn test_decode_object_name_without_spaces() { + // Test that files without spaces still work correctly + let object_name = "simple-file.txt"; + let form_encoded = form_urlencoded::byte_serialize(object_name.as_bytes()).collect::(); + + let decoded = decode_object_name(&form_encoded).unwrap(); + assert_eq!(decoded, object_name); + } +} diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 5a0bd187..9b05e84e 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -84,7 +84,7 @@ tls = ["dep:rustls", "dep:rustls-pemfile", "dep:rustls-pki-types"] # tls charac net = ["ip", "dep:url", "dep:netif", "dep:futures", "dep:transform-stream", "dep:bytes", "dep:s3s", "dep:hyper", "dep:thiserror", "dep:tokio"] # network features with DNS resolver io = ["dep:tokio"] path = [] -notify = ["dep:hyper", "dep:s3s", "dep:hashbrown", "dep:thiserror", "dep:serde", "dep:libc"] # file system notification features +notify = ["dep:hyper", "dep:s3s", "dep:hashbrown", "dep:thiserror", "dep:serde", "dep:libc", "dep:url", "dep:regex"] # file system notification features compress = ["dep:flate2", "dep:brotli", "dep:snap", "dep:lz4", "dep:zstd"] string = ["dep:regex", "dep:rand"] crypto = ["dep:base64-simd", "dep:hex-simd", "dep:hmac", "dep:hyper", "dep:sha1"] From 1e35edf079b8690fa0d2e0d7b34198f0f9218e56 Mon Sep 17 00:00:00 2001 From: loverustfs Date: Sat, 20 Dec 2025 07:50:49 +0800 Subject: [PATCH 13/26] chore(ci): restore workflows before 8e0aeb4 (#1212) --- .github/actions/setup/action.yml | 26 ----- .github/workflows/build.yml | 25 ++--- .github/workflows/ci.yml | 26 +---- .github/workflows/docker.yml | 172 ++++++++----------------------- 4 files changed, 55 insertions(+), 194 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index ca80dc79..7a2171b9 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -55,32 +55,6 @@ runs: pkg-config \ libssl-dev - - name: Install mold linker (Linux) - if: runner.os == 'Linux' - shell: bash - run: | - # Install mold for faster linking - MOLD_VERSION="2.34.1" - ARCH=$(uname -m) - - if [[ "$ARCH" == "x86_64" ]]; then - MOLD_ARCH="x86_64" - elif [[ "$ARCH" == "aarch64" ]]; then - MOLD_ARCH="aarch64" - else - echo "Unsupported architecture: $ARCH" - exit 0 - fi - - curl -L "https://github.com/rui314/mold/releases/download/v${MOLD_VERSION}/mold-${MOLD_VERSION}-${MOLD_ARCH}-linux.tar.gz" | tar xzf - - sudo cp mold-${MOLD_VERSION}-${MOLD_ARCH}-linux/bin/mold /usr/local/bin/ - sudo mkdir -p /usr/local/libexec - sudo cp mold-${MOLD_VERSION}-${MOLD_ARCH}-linux/libexec/mold /usr/local/libexec/ || true - rm -rf mold-${MOLD_VERSION}-${MOLD_ARCH}-linux - - # Verify installation - mold --version || echo "mold installation verification failed" - - name: Install protoc uses: arduino/setup-protoc@v3 with: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dc66fff9..a70e6aab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -166,28 +166,23 @@ jobs: fail-fast: false matrix: include: - # Linux x86_64 builds on x86 runners + # Linux builds - os: ubicloud-standard-4 target: x86_64-unknown-linux-musl cross: false platform: linux - arch: x86_64 + - os: ubicloud-standard-4 + target: aarch64-unknown-linux-musl + cross: true + platform: linux - os: ubicloud-standard-4 target: x86_64-unknown-linux-gnu cross: false platform: linux - arch: x86_64 - # Linux aarch64 builds on ARM runners (native compilation) - - os: ubicloud-standard-4-arm - target: aarch64-unknown-linux-musl - cross: false - platform: linux - arch: aarch64 - - os: ubicloud-standard-4-arm + - os: ubicloud-standard-4 target: aarch64-unknown-linux-gnu - cross: false + cross: true platform: linux - arch: aarch64 # macOS builds - os: macos-latest target: aarch64-apple-darwin @@ -217,7 +212,7 @@ jobs: with: rust-version: stable target: ${{ matrix.target }} - cache-shared-key: build-${{ matrix.arch }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }} + cache-shared-key: build-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }} github-token: ${{ secrets.GITHUB_TOKEN }} cache-save-if: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }} install-cross-tools: ${{ matrix.cross }} @@ -264,10 +259,6 @@ jobs: cargo zigbuild --release --target ${{ matrix.target }} -p rustfs --bins fi else - # Native compilation - use mold linker on Linux for faster linking - if [[ "${{ matrix.platform }}" == "linux" ]]; then - export RUSTFLAGS="${RUSTFLAGS} -C link-arg=-fuse-ld=mold" - fi cargo build --release --target ${{ matrix.target }} -p rustfs --bins fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f73d6156..3c7e7662 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,19 +101,11 @@ jobs: uses: crate-ci/typos@master test-and-lint: - name: Test and Lint (${{ matrix.arch }}) + name: Test and Lint needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ${{ matrix.runner }} + runs-on: ubicloud-standard-4 timeout-minutes: 60 - strategy: - fail-fast: false - matrix: - include: - - arch: x86_64 - runner: ubicloud-standard-4 - # - arch: aarch64 - # runner: ubicloud-standard-4-arm steps: - name: Checkout repository uses: actions/checkout@v6 @@ -122,7 +114,7 @@ jobs: uses: ./.github/actions/setup with: rust-version: stable - cache-shared-key: ci-test-${{ matrix.arch }}-${{ hashFiles('**/Cargo.lock') }} + cache-shared-key: ci-test-${{ hashFiles('**/Cargo.lock') }} github-token: ${{ secrets.GITHUB_TOKEN }} cache-save-if: ${{ github.ref == 'refs/heads/main' }} @@ -141,19 +133,11 @@ jobs: run: cargo clippy --all-targets --all-features -- -D warnings e2e-tests: - name: End-to-End Tests (${{ matrix.arch }}) + name: End-to-End Tests needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ${{ matrix.runner }} + runs-on: ubicloud-standard-4 timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - include: - - arch: x86_64 - runner: ubicloud-standard-4 - # - arch: aarch64 - # runner: ubicloud-standard-4-arm steps: - name: Checkout repository uses: actions/checkout@v6 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 308a1185..37d41b50 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -258,21 +258,37 @@ jobs: # Build multi-arch Docker images # Strategy: Build images using pre-built binaries from dl.rustfs.com - # Optimization: Build each architecture on its native runner to avoid QEMU overhead + # Supports both release and dev channel binaries based on build context # Only runs when should_build is true (which includes workflow success check) - - # Prepare metadata for both builds - prepare-metadata: - name: Prepare Docker Metadata + build-docker: + name: Build Docker Images needs: build-check if: needs.build-check.outputs.should_build == 'true' runs-on: ubicloud-standard-4 - outputs: - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - docker_release: ${{ steps.meta.outputs.docker_release }} - docker_channel: ${{ steps.meta.outputs.docker_channel }} + timeout-minutes: 60 steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ env.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + # - name: Login to GitHub Container Registry + # uses: docker/login-action@v3 + # with: + # registry: ghcr.io + # username: ${{ github.actor }} + # password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Extract metadata and generate tags id: meta run: | @@ -352,143 +368,41 @@ jobs: echo "📋 Build type: $BUILD_TYPE" echo "🔖 Version: $VERSION" - # Build amd64 image on x86 runner (native build) - build-docker-amd64: - name: Build Docker Image (amd64) - needs: [build-check, prepare-metadata] - if: needs.build-check.outputs.should_build == 'true' - runs-on: ubicloud-standard-4 - timeout-minutes: 30 - outputs: - digest: ${{ steps.build.outputs.digest }} - image_name: ${{ steps.build.outputs.imageid }} - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ env.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build and push (amd64) - id: build + - name: Build and push Docker image uses: docker/build-push-action@v6 with: context: . file: Dockerfile - platforms: linux/amd64 + platforms: ${{ env.DOCKER_PLATFORMS }} push: ${{ needs.build-check.outputs.should_push == 'true' }} - labels: ${{ needs.prepare-metadata.outputs.labels }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} cache-from: | - type=gha,scope=docker-amd64 + type=gha,scope=docker-binary cache-to: | - type=gha,mode=max,scope=docker-amd64 + type=gha,mode=max,scope=docker-binary build-args: | BUILDTIME=$(date -u +'%Y-%m-%dT%H:%M:%SZ') VERSION=${{ needs.build-check.outputs.version }} BUILD_TYPE=${{ needs.build-check.outputs.build_type }} REVISION=${{ github.sha }} - RELEASE=${{ needs.prepare-metadata.outputs.docker_release }} - CHANNEL=${{ needs.prepare-metadata.outputs.docker_channel }} + RELEASE=${{ steps.meta.outputs.docker_release }} + CHANNEL=${{ steps.meta.outputs.docker_channel }} BUILDKIT_INLINE_CACHE=1 + # Enable advanced BuildKit features for better performance provenance: false sbom: false - outputs: type=image,name=${{ env.REGISTRY_DOCKERHUB }},push-by-digest=true,name-canonical=true,push=${{ needs.build-check.outputs.should_push == 'true' }} + # Add retry mechanism by splitting the build process + no-cache: false + pull: true - # Build arm64 image on ARM runner (native build) - build-docker-arm64: - name: Build Docker Image (arm64) - needs: [build-check, prepare-metadata] - if: needs.build-check.outputs.should_build == 'true' - runs-on: ubicloud-standard-4-arm - timeout-minutes: 30 - outputs: - digest: ${{ steps.build.outputs.digest }} - image_name: ${{ steps.build.outputs.imageid }} - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ env.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build and push (arm64) - id: build - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile - platforms: linux/arm64 - push: ${{ needs.build-check.outputs.should_push == 'true' }} - labels: ${{ needs.prepare-metadata.outputs.labels }} - cache-from: | - type=gha,scope=docker-arm64 - cache-to: | - type=gha,mode=max,scope=docker-arm64 - build-args: | - BUILDTIME=$(date -u +'%Y-%m-%dT%H:%M:%SZ') - VERSION=${{ needs.build-check.outputs.version }} - BUILD_TYPE=${{ needs.build-check.outputs.build_type }} - REVISION=${{ github.sha }} - RELEASE=${{ needs.prepare-metadata.outputs.docker_release }} - CHANNEL=${{ needs.prepare-metadata.outputs.docker_channel }} - BUILDKIT_INLINE_CACHE=1 - provenance: false - sbom: false - outputs: type=image,name=${{ env.REGISTRY_DOCKERHUB }},push-by-digest=true,name-canonical=true,push=${{ needs.build-check.outputs.should_push == 'true' }} - - # Merge manifests to create multi-arch image - merge-manifests: - name: Create Multi-Arch Manifest - needs: [build-check, prepare-metadata, build-docker-amd64, build-docker-arm64] - if: needs.build-check.outputs.should_build == 'true' && needs.build-check.outputs.should_push == 'true' - runs-on: ubicloud-standard-4 - steps: - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ env.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Create and push multi-arch manifest - run: | - TAGS="${{ needs.prepare-metadata.outputs.tags }}" - - echo "🐳 Creating multi-arch manifest for tags:" - echo "$TAGS" | tr ',' '\n' | sed 's/^/ - /' - - # Convert comma-separated tags to array - IFS=',' read -ra TAG_ARRAY <<< "$TAGS" - - # Create manifest for each tag - for TAG in "${TAG_ARRAY[@]}"; do - echo "Creating manifest for: $TAG" - docker buildx imagetools create \ - -t "$TAG" \ - "${{ env.REGISTRY_DOCKERHUB }}@${{ needs.build-docker-amd64.outputs.digest }}" \ - "${{ env.REGISTRY_DOCKERHUB }}@${{ needs.build-docker-arm64.outputs.digest }}" - done - - echo "✅ Multi-arch manifest created and pushed successfully" + # Note: Manifest creation is no longer needed as we only build one variant + # Multi-arch manifests are automatically created by docker/build-push-action # Docker build summary docker-summary: name: Docker Build Summary - needs: [ build-check, prepare-metadata, build-docker-amd64, build-docker-arm64, merge-manifests ] + needs: [ build-check, build-docker ] if: always() && needs.build-check.outputs.should_build == 'true' runs-on: ubicloud-standard-4 steps: @@ -501,9 +415,7 @@ jobs: echo "🐳 Docker build completed successfully!" echo "📦 Build type: $BUILD_TYPE" echo "🔢 Version: $VERSION" - echo "🚀 Strategy: Native builds on each architecture (no QEMU overhead)" - echo " - amd64: Built on x86 runner" - echo " - arm64: Built on ARM runner" + echo "🚀 Strategy: Images using pre-built binaries (release channel only)" echo "" case "$BUILD_TYPE" in From b5535083ded4e642e78d9f19097f1d2808f7269b Mon Sep 17 00:00:00 2001 From: yxrxy <1532529704@qq.com> Date: Sat, 20 Dec 2025 19:15:49 +0800 Subject: [PATCH 14/26] =?UTF-8?q?fix(iam):=20store=20previous=20credential?= =?UTF-8?q?s=20in=20.rustfs.sys=20bucket=20to=20preserv=E2=80=A6=20(#1213)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/iam/src/lib.rs | 2 +- crates/iam/src/store/object.rs | 111 ++++++++++++++++++++++++++++----- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/crates/iam/src/lib.rs b/crates/iam/src/lib.rs index ebefb72f..592695d6 100644 --- a/crates/iam/src/lib.rs +++ b/crates/iam/src/lib.rs @@ -33,7 +33,7 @@ static IAM_SYS: OnceLock>> = OnceLock::new(); #[instrument(skip(ecstore))] pub async fn init_iam_sys(ecstore: Arc) -> Result<()> { debug!("init iam system"); - let s = IamCache::new(ObjectStore::new(ecstore)).await; + let s = IamCache::new(ObjectStore::new(ecstore).await).await; IAM_SYS.get_or_init(move || IamSys::new(s).into()); Ok(()) diff --git a/crates/iam/src/store/object.rs b/crates/iam/src/store/object.rs index 0390587c..05f2f3d3 100644 --- a/crates/iam/src/store/object.rs +++ b/crates/iam/src/store/object.rs @@ -120,18 +120,52 @@ fn split_path(s: &str, last_index: bool) -> (&str, &str) { #[derive(Clone)] pub struct ObjectStore { object_api: Arc, + prev_cred: Option, } impl ObjectStore { const BUCKET_NAME: &'static str = ".rustfs.sys"; + const PREV_CRED_FILE: &'static str = "config/iam/prev_cred.json"; - pub fn new(object_api: Arc) -> Self { - Self { object_api } + /// Load previous credentials from persistent storage in .rustfs.sys bucket + async fn load_prev_cred(object_api: Arc) -> Option { + match read_config(object_api, Self::PREV_CRED_FILE).await { + Ok(data) => serde_json::from_slice::(&data).ok(), + Err(_) => None, + } } - fn decrypt_data(data: &[u8]) -> Result> { - let de = rustfs_crypto::decrypt_data(get_global_action_cred().unwrap_or_default().secret_key.as_bytes(), data)?; - Ok(de) + /// Save previous credentials to persistent storage in .rustfs.sys bucket + async fn save_prev_cred(object_api: Arc, cred: &Option) -> Result<()> { + match cred { + Some(c) => { + let data = serde_json::to_vec(c).map_err(|e| Error::other(format!("Failed to serialize cred: {}", e)))?; + save_config(object_api, Self::PREV_CRED_FILE, data) + .await + .map_err(|e| Error::other(format!("Failed to write cred to storage: {}", e))) + } + None => { + // If no credentials, remove the config + match delete_config(object_api, Self::PREV_CRED_FILE).await { + Ok(_) => Ok(()), + Err(e) => { + // Ignore ConfigNotFound error when trying to delete non-existent config + if matches!(e, rustfs_ecstore::error::StorageError::ConfigNotFound) { + Ok(()) + } else { + Err(Error::other(format!("Failed to delete cred from storage: {}", e))) + } + } + } + } + } + } + + pub async fn new(object_api: Arc) -> Self { + // Load previous credentials from persistent storage in .rustfs.sys bucket + let prev_cred = Self::load_prev_cred(object_api.clone()).await.or_else(get_global_action_cred); + + Self { object_api, prev_cred } } fn encrypt_data(data: &[u8]) -> Result> { @@ -139,10 +173,65 @@ impl ObjectStore { Ok(en) } + /// Decrypt data with credential fallback mechanism + /// First tries current credentials, then falls back to previous credentials if available + async fn decrypt_fallback(&self, data: &[u8], path: &str) -> Result> { + let current_cred = get_global_action_cred().unwrap_or_default(); + + // Try current credentials first + match rustfs_crypto::decrypt_data(current_cred.secret_key.as_bytes(), data) { + Ok(decrypted) => { + // Update persistent storage with current credentials for consistency + let _ = Self::save_prev_cred(self.object_api.clone(), &Some(current_cred)).await; + Ok(decrypted) + } + Err(_) => { + // Current credentials failed, try previous credentials + if let Some(ref prev_cred) = self.prev_cred { + match rustfs_crypto::decrypt_data(prev_cred.secret_key.as_bytes(), data) { + Ok(prev_decrypted) => { + warn!("Decryption succeeded with previous credentials, path: {}", path); + + // Re-encrypt with current credentials + match rustfs_crypto::encrypt_data(current_cred.secret_key.as_bytes(), &prev_decrypted) { + Ok(re_encrypted) => { + let _ = save_config(self.object_api.clone(), path, re_encrypted).await; + } + Err(e) => { + warn!("Failed to re-encrypt with current credentials: {}, path: {}", e, path); + } + } + + // Update persistent storage with current credentials + let _ = Self::save_prev_cred(self.object_api.clone(), &Some(current_cred)).await; + Ok(prev_decrypted) + } + Err(_) => { + // Both attempts failed + warn!("Decryption failed with both current and previous credentials, deleting config: {}", path); + let _ = self.delete_iam_config(path).await; + Err(Error::ConfigNotFound) + } + } + } else { + // No previous credentials available + warn!( + "Decryption failed with current credentials and no previous credentials available, deleting config: {}", + path + ); + let _ = self.delete_iam_config(path).await; + Err(Error::ConfigNotFound) + } + } + } + } + async fn load_iamconfig_bytes_with_metadata(&self, path: impl AsRef + Send) -> Result<(Vec, ObjectInfo)> { let (data, obj) = read_config_with_metadata(self.object_api.clone(), path.as_ref(), &ObjectOptions::default()).await?; - Ok((Self::decrypt_data(&data)?, obj)) + let decrypted_data = self.decrypt_fallback(&data, path.as_ref()).await?; + + Ok((decrypted_data, obj)) } async fn list_iam_config_items(&self, prefix: &str, ctx: CancellationToken, sender: Sender) { @@ -386,15 +475,7 @@ impl Store for ObjectStore { async fn load_iam_config(&self, path: impl AsRef + Send) -> Result { let mut data = read_config(self.object_api.clone(), path.as_ref()).await?; - data = match Self::decrypt_data(&data) { - Ok(v) => v, - Err(err) => { - warn!("delete the config file when decrypt failed failed: {}, path: {}", err, path.as_ref()); - // delete the config file when decrypt failed - let _ = self.delete_iam_config(path.as_ref()).await; - return Err(Error::ConfigNotFound); - } - }; + data = self.decrypt_fallback(&data, path.as_ref()).await?; Ok(serde_json::from_slice(&data)?) } From cc31e88c91e4704eb683a0a441ffe172b89f1e96 Mon Sep 17 00:00:00 2001 From: GatewayJ <835269233@qq.com> Date: Sat, 20 Dec 2025 20:25:52 +0800 Subject: [PATCH 15/26] fix: expiration time (#1215) --- crates/policy/src/auth/credentials.rs | 3 +-- rustfs/src/admin/router.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/policy/src/auth/credentials.rs b/crates/policy/src/auth/credentials.rs index 4cbe8707..9813f6c4 100644 --- a/crates/policy/src/auth/credentials.rs +++ b/crates/policy/src/auth/credentials.rs @@ -20,7 +20,6 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::collections::HashMap; use time::OffsetDateTime; -use time::macros::offset; use tracing::warn; const ACCESS_KEY_MIN_LEN: usize = 3; @@ -231,7 +230,7 @@ pub fn create_new_credentials_with_metadata( let expiration = { if let Some(v) = claims.get("exp") { if let Some(expiry) = v.as_i64() { - Some(OffsetDateTime::from_unix_timestamp(expiry)?.to_offset(offset!(+8))) + Some(OffsetDateTime::from_unix_timestamp(expiry)?) } else { None } diff --git a/rustfs/src/admin/router.rs b/rustfs/src/admin/router.rs index a28bf29e..fd3c3306 100644 --- a/rustfs/src/admin/router.rs +++ b/rustfs/src/admin/router.rs @@ -101,7 +101,7 @@ where && headers .get(header::CONTENT_TYPE) .and_then(|v| v.to_str().ok()) - .map(|ct| ct.split(';').next().unwrap_or("").trim()) + .map(|ct| ct.split(';').next().unwrap_or("").trim().to_lowercase()) .map(|ct| ct == "application/x-www-form-urlencoded") .unwrap_or(false) { From 20ea5910495f8f4e76cde6861cefe572ccdcc108 Mon Sep 17 00:00:00 2001 From: majinghe <42570491+majinghe@users.noreply.github.com> Date: Sat, 20 Dec 2025 22:02:21 +0800 Subject: [PATCH 16/26] add custom nodeport support (#1217) --- helm/rustfs/templates/ingress.yaml | 6 ++---- helm/rustfs/templates/service.yaml | 31 +++++++++++++++++++----------- helm/rustfs/values.yaml | 10 +++++++--- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/helm/rustfs/templates/ingress.yaml b/helm/rustfs/templates/ingress.yaml index cc505cfb..bbb7b9d7 100644 --- a/helm/rustfs/templates/ingress.yaml +++ b/helm/rustfs/templates/ingress.yaml @@ -28,12 +28,10 @@ spec: {{- end }} {{- if .Values.ingress.tls.enabled }} tls: - {{- range .Values.ingress.hosts }} - hosts: - {{- range .hosts }} - - {{ . | quote }} + {{- range .Values.ingress.hosts }} + - {{ .host | quote }} {{- end }} - {{- end }} secretName: {{ $secretName }} {{- end }} rules: diff --git a/helm/rustfs/templates/service.yaml b/helm/rustfs/templates/service.yaml index e49894f2..347383ab 100644 --- a/helm/rustfs/templates/service.yaml +++ b/helm/rustfs/templates/service.yaml @@ -13,15 +13,16 @@ spec: clusterIP: None publishNotReadyAddresses: true ports: - - port: {{ .Values.service.ep_port }} - name: endpoint - - port: {{ .Values.service.console_port }} - name: console + - name: endpoint + port: {{ .Values.service.endpoint.port }} + - name: console + port: {{ .Values.service.console.port }} selector: {{- include "rustfs.selectorLabels" . | nindent 4 }} {{- end }} --- +{{- $serviceType := .Values.service.type }} apiVersion: v1 kind: Service metadata: @@ -40,19 +41,27 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} spec: - {{- if .Values.ingress.enabled }} + {{- if eq $serviceType "ClusterIP" }} type: ClusterIP - {{- else }} - type: {{ .Values.service.type }} + {{- else if eq $serviceType "NodePort" }} + type: NodePort sessionAffinity: ClientIP sessionAffinityConfig: clientIP: timeoutSeconds: 10800 {{- end }} ports: - - port: {{ .Values.service.ep_port }} - name: endpoint - - port: {{ .Values.service.console_port }} - name: console + - name: endpoint + port: {{ .Values.service.endpoint.port }} + targetPort: {{ .Values.service.endpoint.port }} + {{- if eq $serviceType "NodePort" }} + nodePort: {{ .Values.service.endpoint.nodePort }} + {{- end }} + - name: console + port: {{ .Values.service.console.port }} + targetPort: {{ .Values.service.console.port }} + {{- if eq $serviceType "NodePort" }} + nodePort: {{ .Values.service.console.nodePort }} + {{- end }} selector: {{- include "rustfs.selectorLabels" . | nindent 4 }} diff --git a/helm/rustfs/values.yaml b/helm/rustfs/values.yaml index 0d78346c..4e669a72 100644 --- a/helm/rustfs/values.yaml +++ b/helm/rustfs/values.yaml @@ -90,9 +90,13 @@ containerSecurityContext: runAsNonRoot: true service: - type: NodePort - ep_port: 9000 - console_port: 9001 + type: ClusterIP + endpoint: + port: 9000 + nodePort: 32000 + console: + port: 9001 + nodePort: 32001 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ ingress: From 3bd96bcf108720194da74cfad4ace38c755f7ce9 Mon Sep 17 00:00:00 2001 From: yxrxy <1532529704@qq.com> Date: Sun, 21 Dec 2025 12:43:48 +0800 Subject: [PATCH 17/26] fix: resolve event target deletion issue (#1219) --- crates/notify/src/integration.rs | 47 ++++++++++++++++++++++---------- crates/notify/src/notifier.rs | 9 ++++++ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/crates/notify/src/integration.rs b/crates/notify/src/integration.rs index dc50857d..790d43f9 100644 --- a/crates/notify/src/integration.rs +++ b/crates/notify/src/integration.rs @@ -212,6 +212,11 @@ impl NotificationSystem { return Ok(()); } + // Save the modified configuration to storage + rustfs_ecstore::config::com::save_server_config(store, &new_config) + .await + .map_err(|e| NotificationError::SaveConfig(e.to_string()))?; + info!("Configuration updated. Reloading system..."); self.reload_config(new_config).await } @@ -294,23 +299,35 @@ impl NotificationSystem { /// If the target configuration does not exist, it returns Ok(()) without making any changes. pub async fn remove_target_config(&self, target_type: &str, target_name: &str) -> Result<(), NotificationError> { info!("Removing config for target {} of type {}", target_name, target_type); - self.update_config_and_reload(|config| { - let mut changed = false; - if let Some(targets) = config.0.get_mut(&target_type.to_lowercase()) { - if targets.remove(&target_name.to_lowercase()).is_some() { - changed = true; + let config_result = self + .update_config_and_reload(|config| { + let mut changed = false; + if let Some(targets) = config.0.get_mut(&target_type.to_lowercase()) { + if targets.remove(&target_name.to_lowercase()).is_some() { + changed = true; + } + if targets.is_empty() { + config.0.remove(target_type); + } } - if targets.is_empty() { - config.0.remove(target_type); + if !changed { + info!("Target {} of type {} not found, no changes made.", target_name, target_type); } - } - if !changed { - info!("Target {} of type {} not found, no changes made.", target_name, target_type); - } - debug!("Config after remove: {:?}", config); - changed - }) - .await + debug!("Config after remove: {:?}", config); + changed + }) + .await; + + if config_result.is_ok() { + let target_id = TargetID::new(target_name.to_string(), target_type.to_string()); + + // Remove from target list + let target_list = self.notifier.target_list(); + let mut target_list_guard = target_list.write().await; + let _ = target_list_guard.remove_target_only(&target_id).await; + } + + config_result } /// Enhanced event stream startup function, including monitoring and concurrency control diff --git a/crates/notify/src/notifier.rs b/crates/notify/src/notifier.rs index b570fd6f..10aa5767 100644 --- a/crates/notify/src/notifier.rs +++ b/crates/notify/src/notifier.rs @@ -195,6 +195,10 @@ impl EventNotifier { ) -> Result<(), NotificationError> { // Currently active, simpler logic let mut target_list_guard = self.target_list.write().await; //Gets a write lock for the TargetList + + // Clear existing targets first - rebuild from scratch to ensure consistency with new configuration + target_list_guard.clear(); + for target_boxed in targets_to_init { // Traverse the incoming Box debug!("init bucket target: {}", target_boxed.name()); @@ -240,6 +244,11 @@ impl TargetList { Ok(()) } + /// Clears all targets from the list + pub fn clear(&mut self) { + self.targets.clear(); + } + /// Removes a target by ID. Note: This does not stop its associated event stream. /// Stream cancellation should be handled by EventNotifier. pub async fn remove_target_only(&mut self, id: &TargetID) -> Option + Send + Sync>> { From f3a1431fa57ea8c2103bd2da67e9c86919ffe88f Mon Sep 17 00:00:00 2001 From: loverustfs Date: Sun, 21 Dec 2025 16:11:55 +0800 Subject: [PATCH 18/26] fix: resolve TLS handshake failure in inter-node communication (#1201) (#1222) Co-authored-by: houseme --- Cargo.lock | 109 +++++++------------- Cargo.toml | 12 +-- crates/common/src/globals.rs | 5 + crates/config/src/constants/app.rs | 24 +++++ crates/config/src/constants/tls.rs | 22 ++++ crates/protos/src/lib.rs | 49 +++++++-- crates/utils/src/certs.rs | 14 +-- rustfs/src/main.rs | 17 +-- rustfs/src/server/cert.rs | 160 +++++++++++++++++++++++++++++ rustfs/src/server/mod.rs | 2 + scripts/run.sh | 3 + 11 files changed, 313 insertions(+), 104 deletions(-) create mode 100644 rustfs/src/server/cert.rs diff --git a/Cargo.lock b/Cargo.lock index 7ada333c..4f7d153b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1032,9 +1032,9 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "bytes", @@ -1084,9 +1084,9 @@ dependencies = [ [[package]] name = "axum-extra" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfe9f610fe4e99cf0cfcd03ccf8c63c28c616fe714d80475ef731f3b13dd21b" +checksum = "6dfbd6109d91702d55fc56df06aae7ed85c465a7a451db6c0e54a4b9ca5983d1" dependencies = [ "axum", "axum-core", @@ -1434,31 +1434,14 @@ dependencies = [ "serde_core", ] -[[package]] -name = "cargo-util-schemas" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dc1a6f7b5651af85774ae5a34b4e8be397d9cf4bc063b7e6dbd99a841837830" -dependencies = [ - "semver", - "serde", - "serde-untagged", - "serde-value", - "thiserror 2.0.17", - "toml", - "unicode-xid", - "url", -] - [[package]] name = "cargo_metadata" -version = "0.22.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3f56c207c76c07652489840ff98687dcf213de178ac0974660d6fefeaf5ec6" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" dependencies = [ "camino", "cargo-platform", - "cargo-util-schemas", "semver", "serde", "serde_json", @@ -1473,9 +1456,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.49" +version = "1.2.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c" dependencies = [ "find-msvc-tools", "jobserver", @@ -1576,7 +1559,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "crypto-common 0.1.6", + "crypto-common 0.1.7", "inout 0.1.4", ] @@ -1798,9 +1781,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.4.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" dependencies = [ "crc-catalog", ] @@ -1965,9 +1948,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -2997,7 +2980,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", "const-oid 0.9.6", - "crypto-common 0.1.6", + "crypto-common 0.1.7", "subtle", ] @@ -3405,9 +3388,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.9.23" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ "bitflags 2.10.0", "rustc_version", @@ -3607,9 +3590,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -4641,9 +4624,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010" [[package]] name = "jemalloc_pprof" @@ -4972,9 +4955,9 @@ dependencies = [ [[package]] name = "lzma-rust2" -version = "0.13.0" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c60a23ffb90d527e23192f1246b14746e2f7f071cb84476dd879071696c18a4a" +checksum = "48172246aa7c3ea28e423295dd1ca2589a24617cc4e588bb8cfe177cb2c54d95" dependencies = [ "crc", "sha2 0.10.9", @@ -5134,9 +5117,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.11" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" +checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" dependencies = [ "async-lock", "crossbeam-channel", @@ -5147,7 +5130,6 @@ dependencies = [ "futures-util", "parking_lot", "portable-atomic", - "rustc_version", "smallvec", "tagptr", "uuid", @@ -5281,9 +5263,9 @@ checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d" [[package]] name = "ntapi" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +checksum = "c70f219e21142367c70c0b30c6a9e3a14d55b4d12a204d897fbec83a0363f081" dependencies = [ "winapi", ] @@ -6113,9 +6095,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "f59e70c4aef1e55797c2e8fd94a4f2a973fc972cfde0e0b05f683667b0cd39dd" [[package]] name = "potential_utf" @@ -7879,9 +7861,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" [[package]] name = "s3s" @@ -8096,28 +8078,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-untagged" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9faf48a4a2d2693be24c6289dbe26552776eb7737074e6722891fadbe6c5058" -dependencies = [ - "erased-serde", - "serde", - "serde_core", - "typeid", -] - -[[package]] -name = "serde-value" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" -dependencies = [ - "ordered-float", - "serde", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -8315,9 +8275,9 @@ dependencies = [ [[package]] name = "shadow-rs" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d18183cef626bce22836103349c7050d73db799be0171386b80947d157ae32" +checksum = "ff351910f271e7065781b6b4f0f43cb515d474d812f31176a0246d9058e47d5d" dependencies = [ "cargo_metadata", "const_format", @@ -10434,9 +10394,9 @@ dependencies = [ [[package]] name = "zip" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +checksum = "bdd8a47718a4ee5fe78e07667cd36f3de80e7c2bfe727c7074245ffc7303c037" dependencies = [ "aes 0.8.4", "arbitrary", @@ -10445,6 +10405,7 @@ dependencies = [ "crc32fast", "deflate64", "flate2", + "generic-array", "getrandom 0.3.4", "hmac 0.12.1", "indexmap 2.12.1", diff --git a/Cargo.toml b/Cargo.toml index a93368d1..6f0d3a32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,8 +97,8 @@ async-channel = "2.5.0" async-compression = { version = "0.4.19" } async-recursion = "1.1.1" async-trait = "0.1.89" -axum = "0.8.7" -axum-extra = "0.12.2" +axum = "0.8.8" +axum-extra = "0.12.3" axum-server = { version = "0.8.0", features = ["tls-rustls-no-provider"], default-features = false } futures = "0.3.31" futures-core = "0.3.31" @@ -126,7 +126,7 @@ tower-http = { version = "0.6.8", features = ["cors"] } bytes = { version = "1.11.0", features = ["serde"] } bytesize = "2.3.1" byteorder = "1.5.0" -flatbuffers = "25.9.23" +flatbuffers = "25.12.19" form_urlencoded = "1.2.2" prost = "0.14.1" quick-xml = "0.38.4" @@ -203,7 +203,7 @@ matchit = "0.9.0" md-5 = "0.11.0-rc.3" md5 = "0.8.0" mime_guess = "2.0.5" -moka = { version = "0.12.11", features = ["future"] } +moka = { version = "0.12.12", features = ["future"] } netif = "0.1.6" nix = { version = "0.30.1", features = ["fs"] } nu-ansi-term = "0.50.3" @@ -224,7 +224,7 @@ rust-embed = { version = "8.9.0" } rustc-hash = { version = "2.1.1" } s3s = { version = "0.12.0-rc.6", features = ["minio"], git = "https://github.com/s3s-project/s3s.git", branch = "main" } serial_test = "3.2.0" -shadow-rs = { version = "1.4.0", default-features = false } +shadow-rs = { version = "1.5.0", default-features = false } siphasher = "1.0.1" smallvec = { version = "1.15.1", features = ["serde"] } smartstring = "1.0.1" @@ -252,7 +252,7 @@ walkdir = "2.5.0" wildmatch = { version = "2.6.1", features = ["serde"] } winapi = { version = "0.3.9" } xxhash-rust = { version = "0.8.15", features = ["xxh64", "xxh3"] } -zip = "6.0.0" +zip = "7.0.0" zstd = "0.13.3" # Observability and Metrics diff --git a/crates/common/src/globals.rs b/crates/common/src/globals.rs index 6bcc7e29..e0f6a38a 100644 --- a/crates/common/src/globals.rs +++ b/crates/common/src/globals.rs @@ -24,11 +24,16 @@ pub static GLOBAL_RUSTFS_HOST: LazyLock> = LazyLock::new(|| RwLoc pub static GLOBAL_RUSTFS_PORT: LazyLock> = LazyLock::new(|| RwLock::new("9000".to_string())); pub static GLOBAL_RUSTFS_ADDR: LazyLock> = LazyLock::new(|| RwLock::new("".to_string())); pub static GLOBAL_CONN_MAP: LazyLock>> = LazyLock::new(|| RwLock::new(HashMap::new())); +pub static GLOBAL_ROOT_CERT: LazyLock>>> = LazyLock::new(|| RwLock::new(None)); pub async fn set_global_addr(addr: &str) { *GLOBAL_RUSTFS_ADDR.write().await = addr.to_string(); } +pub async fn set_global_root_cert(cert: Vec) { + *GLOBAL_ROOT_CERT.write().await = Some(cert); +} + /// Evict a stale/dead connection from the global connection cache. /// This is critical for cluster recovery when a node dies unexpectedly (e.g., power-off). /// By removing the cached connection, subsequent requests will establish a fresh connection. diff --git a/crates/config/src/constants/app.rs b/crates/config/src/constants/app.rs index f62b6407..0610319e 100644 --- a/crates/config/src/constants/app.rs +++ b/crates/config/src/constants/app.rs @@ -89,6 +89,30 @@ pub const RUSTFS_TLS_KEY: &str = "rustfs_key.pem"; /// This is the default cert for TLS. pub const RUSTFS_TLS_CERT: &str = "rustfs_cert.pem"; +/// Default public certificate filename for rustfs +/// This is the default public certificate filename for rustfs. +/// It is used to store the public certificate of the application. +/// Default value: public.crt +pub const RUSTFS_PUBLIC_CERT: &str = "public.crt"; + +/// Default CA certificate filename for rustfs +/// This is the default CA certificate filename for rustfs. +/// It is used to store the CA certificate of the application. +/// Default value: ca.crt +pub const RUSTFS_CA_CERT: &str = "ca.crt"; + +/// Default HTTP prefix for rustfs +/// This is the default HTTP prefix for rustfs. +/// It is used to identify HTTP URLs. +/// Default value: http:// +pub const RUSTFS_HTTP_PREFIX: &str = "http://"; + +/// Default HTTPS prefix for rustfs +/// This is the default HTTPS prefix for rustfs. +/// It is used to identify HTTPS URLs. +/// Default value: https:// +pub const RUSTFS_HTTPS_PREFIX: &str = "https://"; + /// Default port for rustfs /// This is the default port for rustfs. /// This is used to bind the server to a specific port. diff --git a/crates/config/src/constants/tls.rs b/crates/config/src/constants/tls.rs index cfda42e2..6cbebcd4 100644 --- a/crates/config/src/constants/tls.rs +++ b/crates/config/src/constants/tls.rs @@ -12,4 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. +/// TLS related environment variable names and default values +/// Environment variable to enable TLS key logging +/// When set to "1", RustFS will log TLS keys to the specified file for debugging purposes. +/// By default, this is disabled. +/// To enable, set the environment variable RUSTFS_TLS_KEYLOG=1 pub const ENV_TLS_KEYLOG: &str = "RUSTFS_TLS_KEYLOG"; + +/// Default value for TLS key logging +/// By default, RustFS does not log TLS keys. +/// To change this behavior, set the environment variable RUSTFS_TLS_KEYLOG=1 +pub const DEFAULT_TLS_KEYLOG: bool = false; + +/// Environment variable to trust system CA certificates +/// When set to "1", RustFS will trust system CA certificates in addition to any +/// custom CA certificates provided in the configuration. +/// By default, this is disabled. +/// To enable, set the environment variable RUSTFS_TRUST_SYSTEM_CA=1 +pub const ENV_TRUST_SYSTEM_CA: &str = "RUSTFS_TRUST_SYSTEM_CA"; + +/// Default value for trusting system CA certificates +/// By default, RustFS does not trust system CA certificates. +/// To change this behavior, set the environment variable RUSTFS_TRUST_SYSTEM_CA=1 +pub const DEFAULT_TRUST_SYSTEM_CA: bool = false; diff --git a/crates/protos/src/lib.rs b/crates/protos/src/lib.rs index 305d67a5..9b3a2aa4 100644 --- a/crates/protos/src/lib.rs +++ b/crates/protos/src/lib.rs @@ -15,19 +15,19 @@ #[allow(unsafe_code)] mod generated; -use std::{error::Error, time::Duration}; - -pub use generated::*; use proto_gen::node_service::node_service_client::NodeServiceClient; -use rustfs_common::globals::{GLOBAL_CONN_MAP, evict_connection}; +use rustfs_common::globals::{GLOBAL_CONN_MAP, GLOBAL_ROOT_CERT, evict_connection}; +use std::{error::Error, time::Duration}; use tonic::{ Request, Status, metadata::MetadataValue, service::interceptor::InterceptedService, - transport::{Channel, Endpoint}, + transport::{Certificate, Channel, ClientTlsConfig, Endpoint}, }; use tracing::{debug, warn}; +pub use generated::*; + // Default 100 MB pub const DEFAULT_GRPC_SERVER_MESSAGE_LEN: usize = 100 * 1024 * 1024; @@ -46,6 +46,12 @@ const HTTP2_KEEPALIVE_TIMEOUT_SECS: u64 = 3; /// Overall RPC timeout - maximum time for any single RPC operation const RPC_TIMEOUT_SECS: u64 = 30; +/// Default HTTPS prefix for rustfs +/// This is the default HTTPS prefix for rustfs. +/// It is used to identify HTTPS URLs. +/// Default value: https:// +const RUSTFS_HTTPS_PREFIX: &str = "https://"; + /// Creates a new gRPC channel with optimized keepalive settings for cluster resilience. /// /// This function is designed to detect dead peers quickly: @@ -56,7 +62,7 @@ const RPC_TIMEOUT_SECS: u64 = 30; async fn create_new_channel(addr: &str) -> Result> { debug!("Creating new gRPC channel to: {}", addr); - let connector = Endpoint::from_shared(addr.to_string())? + let mut connector = Endpoint::from_shared(addr.to_string())? // Fast connection timeout for dead peer detection .connect_timeout(Duration::from_secs(CONNECT_TIMEOUT_SECS)) // TCP-level keepalive - OS will probe connection @@ -70,6 +76,37 @@ async fn create_new_channel(addr: &str) -> Result> { // Overall timeout for any RPC - fail fast on unresponsive peers .timeout(Duration::from_secs(RPC_TIMEOUT_SECS)); + let root_cert = GLOBAL_ROOT_CERT.read().await; + if addr.starts_with(RUSTFS_HTTPS_PREFIX) { + if let Some(cert_pem) = root_cert.as_ref() { + let ca = Certificate::from_pem(cert_pem); + // Derive the hostname from the HTTPS URL for TLS hostname verification. + let domain = addr + .trim_start_matches(RUSTFS_HTTPS_PREFIX) + .split('/') + .next() + .unwrap_or("") + .split(':') + .next() + .unwrap_or(""); + let tls = if !domain.is_empty() { + ClientTlsConfig::new().ca_certificate(ca).domain_name(domain) + } else { + // Fallback: configure TLS without explicit domain if parsing fails. + ClientTlsConfig::new().ca_certificate(ca) + }; + connector = connector.tls_config(tls)?; + debug!("Configured TLS with custom root certificate for: {}", addr); + } else { + debug!("Using system root certificates for TLS: {}", addr); + } + } else { + // Custom root certificates are configured but will be ignored for non-HTTPS addresses. + if root_cert.is_some() { + warn!("Custom root certificates are configured but not used because the address does not use HTTPS: {addr}"); + } + } + let channel = connector.connect().await?; // Cache the new connection diff --git a/crates/utils/src/certs.rs b/crates/utils/src/certs.rs index 24657f7a..463874ed 100644 --- a/crates/utils/src/certs.rs +++ b/crates/utils/src/certs.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; use std::io::Error; use std::path::Path; use std::sync::Arc; -use std::{env, fs, io}; +use std::{fs, io}; use tracing::{debug, warn}; /// Load public certificate from file. @@ -243,17 +243,7 @@ pub fn create_multi_cert_resolver( /// * A boolean indicating whether TLS key logging is enabled based on the `RUSTFS_TLS_KEYLOG` environment variable. /// pub fn tls_key_log() -> bool { - env::var("RUSTFS_TLS_KEYLOG") - .map(|v| { - let v = v.trim(); - v.eq_ignore_ascii_case("1") - || v.eq_ignore_ascii_case("on") - || v.eq_ignore_ascii_case("true") - || v.eq_ignore_ascii_case("yes") - || v.eq_ignore_ascii_case("enabled") - || v.eq_ignore_ascii_case("t") - }) - .unwrap_or(false) + crate::get_env_bool(rustfs_config::ENV_TLS_KEYLOG, rustfs_config::DEFAULT_TLS_KEYLOG) } #[cfg(test)] diff --git a/rustfs/src/main.rs b/rustfs/src/main.rs index d62777bb..fbc946cb 100644 --- a/rustfs/src/main.rs +++ b/rustfs/src/main.rs @@ -27,7 +27,7 @@ mod version; // Ensure the correct path for parse_license is imported use crate::init::{add_bucket_notification_configuration, init_buffer_profile_system, init_kms_system, init_update_check}; use crate::server::{ - SHUTDOWN_TIMEOUT, ServiceState, ServiceStateManager, ShutdownSignal, init_event_notifier, shutdown_event_notifier, + SHUTDOWN_TIMEOUT, ServiceState, ServiceStateManager, ShutdownSignal, init_cert, init_event_notifier, shutdown_event_notifier, start_audit_system, start_http_server, stop_audit_system, wait_for_shutdown, }; use chrono::Datelike; @@ -38,19 +38,19 @@ use rustfs_ahm::{ scanner::data_scanner::ScannerConfig, shutdown_ahm_services, }; use rustfs_common::globals::set_global_addr; -use rustfs_ecstore::bucket::metadata_sys::init_bucket_metadata_sys; -use rustfs_ecstore::bucket::replication::{GLOBAL_REPLICATION_POOL, init_background_replication}; -use rustfs_ecstore::config as ecconfig; -use rustfs_ecstore::config::GLOBAL_CONFIG_SYS; -use rustfs_ecstore::store_api::BucketOptions; use rustfs_ecstore::{ StorageAPI, + bucket::metadata_sys::init_bucket_metadata_sys, + bucket::replication::{GLOBAL_REPLICATION_POOL, init_background_replication}, + config as ecconfig, + config::GLOBAL_CONFIG_SYS, endpoints::EndpointServerPools, global::{set_global_rustfs_port, shutdown_background_services}, notification_sys::new_global_notification_sys, set_global_endpoints, store::ECStore, store::init_local_disks, + store_api::BucketOptions, update_erasure_type, }; use rustfs_iam::init_iam_sys; @@ -125,6 +125,11 @@ async fn async_main() -> Result<()> { // Initialize performance profiling if enabled profiling::init_from_env().await; + // Initialize TLS if a certificate path is provided + if let Some(tls_path) = &opt.tls_path { + init_cert(tls_path).await + } + // Run parameters match run(opt).await { Ok(_) => Ok(()), diff --git a/rustfs/src/server/cert.rs b/rustfs/src/server/cert.rs new file mode 100644 index 00000000..6dba5c05 --- /dev/null +++ b/rustfs/src/server/cert.rs @@ -0,0 +1,160 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use rustfs_common::globals::set_global_root_cert; +use rustfs_config::{RUSTFS_CA_CERT, RUSTFS_PUBLIC_CERT, RUSTFS_TLS_CERT}; +use tracing::{debug, info}; + +/// Initialize TLS certificates for inter-node communication. +/// This function attempts to load certificates from the specified `tls_path`. +/// It looks for `rustfs_cert.pem`, `public.crt`, and `ca.crt` files. +/// Additionally, it tries to load system root certificates from common locations +/// to ensure trust for public CAs when mixing self-signed and public certificates. +/// If any certificates are found, they are set as the global root certificates. +pub(crate) async fn init_cert(tls_path: &str) { + let mut cert_data = Vec::new(); + + // Try rustfs_cert.pem (custom cert name) + walk_dir(std::path::PathBuf::from(tls_path), RUSTFS_TLS_CERT, &mut cert_data).await; + + // Try public.crt (common CA name) + let public_cert_path = std::path::Path::new(tls_path).join(RUSTFS_PUBLIC_CERT); + load_cert_file(public_cert_path.to_str().unwrap_or_default(), &mut cert_data, "CA certificate").await; + + // Try ca.crt (common CA name) + let ca_cert_path = std::path::Path::new(tls_path).join(RUSTFS_CA_CERT); + load_cert_file(ca_cert_path.to_str().unwrap_or_default(), &mut cert_data, "CA certificate").await; + + let trust_system_ca = rustfs_utils::get_env_bool(rustfs_config::ENV_TRUST_SYSTEM_CA, rustfs_config::DEFAULT_TRUST_SYSTEM_CA); + if !trust_system_ca { + // Attempt to load system root certificates to maintain trust for public CAs + // This is important when mixing self-signed internal certs with public external certs + let system_ca_paths = [ + "/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu/Alpine + "/etc/pki/tls/certs/ca-bundle.crt", // Fedora/RHEL/CentOS + "/etc/ssl/ca-bundle.pem", // OpenSUSE + "/etc/pki/tls/cacert.pem", // OpenELEC + "/etc/ssl/cert.pem", // macOS/FreeBSD + "/usr/local/etc/openssl/cert.pem", // macOS/Homebrew OpenSSL + "/usr/local/share/certs/ca-root-nss.crt", // FreeBSD + "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", // RHEL + "/usr/share/pki/ca-trust-legacy/ca-bundle.legacy.crt", // RHEL legacy + ]; + + let mut system_cert_loaded = false; + for path in system_ca_paths { + if load_cert_file(path, &mut cert_data, "system root certificates").await { + system_cert_loaded = true; + info!("Loaded system root certificates from {}", path); + break; // Stop after finding the first valid bundle + } + } + + if !system_cert_loaded { + debug!("Could not find system root certificates in common locations."); + } + } else { + info!("Loading system root certificates disabled via RUSTFS_TRUST_SYSTEM_CA"); + } + if !cert_data.is_empty() { + set_global_root_cert(cert_data).await; + info!("Configured custom root certificates for inter-node communication"); + } +} + +/// Helper function to load a certificate file and append to cert_data. +/// Returns true if the file was successfully loaded. +async fn load_cert_file(path: &str, cert_data: &mut Vec, desc: &str) -> bool { + if tokio::fs::metadata(path).await.is_ok() { + if let Ok(data) = tokio::fs::read(path).await { + cert_data.extend(data); + cert_data.push(b'\n'); + info!("Loaded {} from {}", desc, path); + true + } else { + debug!("Failed to read {} from {}", desc, path); + false + } + } else { + debug!("{} file not found at {}", desc, path); + false + } +} + +/// Load the certificate file if its name matches `cert_name`. +/// If it matches, the certificate data is appended to `cert_data`. +/// +/// # Parameters +/// - `entry`: The directory entry to check. +/// - `cert_name`: The name of the certificate file to match. +/// - `cert_data`: A mutable vector to append loaded certificate data. +async fn load_if_matches(entry: &tokio::fs::DirEntry, cert_name: &str, cert_data: &mut Vec) { + let fname = entry.file_name().to_string_lossy().to_string(); + if fname == cert_name { + let p = entry.path(); + load_cert_file(&p.to_string_lossy(), cert_data, "certificate").await; + } +} + +/// Search the directory at `path` and one level of subdirectories to find and load +/// certificates matching `cert_name`. Loaded certificate data is appended to +/// `cert_data`. +/// # Parameters +/// - `path`: The starting directory path to search for certificates. +/// - `cert_name`: The name of the certificate file to look for. +/// - `cert_data`: A mutable vector to append loaded certificate data. +async fn walk_dir(path: std::path::PathBuf, cert_name: &str, cert_data: &mut Vec) { + if let Ok(mut rd) = tokio::fs::read_dir(&path).await { + while let Ok(Some(entry)) = rd.next_entry().await { + if let Ok(ft) = entry.file_type().await { + if ft.is_file() { + load_if_matches(&entry, cert_name, cert_data).await; + } else if ft.is_dir() { + // Only check direct subdirectories, no deeper recursion + if let Ok(mut sub_rd) = tokio::fs::read_dir(&entry.path()).await { + while let Ok(Some(sub_entry)) = sub_rd.next_entry().await { + if let Ok(sub_ft) = sub_entry.file_type().await { + if sub_ft.is_file() { + load_if_matches(&sub_entry, cert_name, cert_data).await; + } + // Ignore subdirectories and symlinks in subdirs to limit to one level + } + } + } + } else if ft.is_symlink() { + // Follow symlink and treat target as file or directory, but limit to one level + if let Ok(meta) = tokio::fs::metadata(&entry.path()).await { + if meta.is_file() { + load_if_matches(&entry, cert_name, cert_data).await; + } else if meta.is_dir() { + // Treat as directory but only check its direct contents + if let Ok(mut sub_rd) = tokio::fs::read_dir(&entry.path()).await { + while let Ok(Some(sub_entry)) = sub_rd.next_entry().await { + if let Ok(sub_ft) = sub_entry.file_type().await { + if sub_ft.is_file() { + load_if_matches(&sub_entry, cert_name, cert_data).await; + } + // Ignore deeper levels + } + } + } + } + } + } + } + } + } else { + debug!("Certificate directory not found: {}", path.display()); + } +} diff --git a/rustfs/src/server/mod.rs b/rustfs/src/server/mod.rs index df6b04a5..630f6f94 100644 --- a/rustfs/src/server/mod.rs +++ b/rustfs/src/server/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. mod audit; +mod cert; mod compress; mod event; mod http; @@ -22,6 +23,7 @@ mod runtime; mod service_state; pub(crate) use audit::{start_audit_system, stop_audit_system}; +pub(crate) use cert::init_cert; pub(crate) use event::{init_event_notifier, shutdown_event_notifier}; pub(crate) use http::start_http_server; pub(crate) use runtime::get_tokio_runtime_builder; diff --git a/scripts/run.sh b/scripts/run.sh index 762215c6..6c268c37 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -183,6 +183,9 @@ export RUSTFS_ENABLE_PROFILING=false # Heal configuration queue size export RUSTFS_HEAL_QUEUE_SIZE=10000 +# rustfs trust system CA certificates +export RUSTFS_TRUST_SYSTEM_CA=true + if [ -n "$1" ]; then export RUSTFS_VOLUMES="$1" fi From 3e2252e4bb504b84e6c91a34707e60f252a1a8d9 Mon Sep 17 00:00:00 2001 From: 0xdx2 Date: Sun, 21 Dec 2025 17:54:23 +0800 Subject: [PATCH 19/26] =?UTF-8?q?fix(config):Update=20argument=20parsing?= =?UTF-8?q?=20for=20volumes=20and=20server=5Fdomains=20to=20support=20del?= =?UTF-8?q?=E2=80=A6=20(#1209)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: houseme Co-authored-by: houseme Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Cargo.lock | 1 + rustfs/Cargo.toml | 1 + rustfs/src/config/config_test.rs | 457 +++++++++++++++++++++++++++++++ rustfs/src/config/mod.rs | 15 +- 4 files changed, 472 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f7d153b..3b2d43a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7081,6 +7081,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "serial_test", "shadow-rs", "socket2 0.6.1", "subtle", diff --git a/rustfs/Cargo.toml b/rustfs/Cargo.toml index e4c685eb..e54a52fd 100644 --- a/rustfs/Cargo.toml +++ b/rustfs/Cargo.toml @@ -144,6 +144,7 @@ pprof = { workspace = true } [dev-dependencies] uuid = { workspace = true, features = ["v4"] } +serial_test = { workspace = true } [build-dependencies] http.workspace = true diff --git a/rustfs/src/config/config_test.rs b/rustfs/src/config/config_test.rs index 1f875fae..4e449b04 100644 --- a/rustfs/src/config/config_test.rs +++ b/rustfs/src/config/config_test.rs @@ -13,9 +13,48 @@ // limitations under the License. #[cfg(test)] +#[allow(unsafe_op_in_unsafe_fn)] mod tests { use crate::config::Opt; use clap::Parser; + use rustfs_ecstore::disks_layout::DisksLayout; + use serial_test::serial; + use std::env; + + /// Helper function to run test with environment variable set. + /// Automatically cleans up the environment variable after the test. + /// + /// # Safety + /// This function uses unsafe env::set_var and env::remove_var. + /// Tests using this helper must be marked with #[serial] to avoid race conditions. + #[allow(unsafe_code)] + fn with_env_var(key: &str, value: &str, test_fn: F) + where + F: FnOnce(), + { + unsafe { + env::set_var(key, value); + } + // Ensure cleanup happens even if test panics + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(test_fn)); + unsafe { + env::remove_var(key); + } + // Re-panic if the test failed + if let Err(e) = result { + std::panic::resume_unwind(e); + } + } + + /// Helper to parse volumes and verify the layout. + fn verify_layout(volumes: &[T], verify_fn: F) + where + T: AsRef, + F: FnOnce(&DisksLayout), + { + let layout = DisksLayout::from_volumes(volumes).expect("Failed to parse volumes"); + verify_fn(&layout); + } #[test] fn test_default_console_configuration() { @@ -66,4 +105,422 @@ mod tests { assert_eq!(endpoint_port, 9000); assert_eq!(console_port, 9001); } + + #[test] + fn test_volumes_and_disk_layout_parsing() { + use rustfs_ecstore::disks_layout::DisksLayout; + + // Test case 1: Single volume path + let args = vec!["rustfs", "/data/vol1"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.volumes[0], "/data/vol1"); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse single volume"); + assert!(!layout.is_empty_layout()); + assert!(layout.is_single_drive_layout()); + assert_eq!(layout.get_single_drive_layout(), "/data/vol1"); + + // Test case 2: Multiple volume paths (space-separated via env) + let args = vec!["rustfs", "/data/vol1", "/data/vol2", "/data/vol3", "/data/vol4"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 4); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse multiple volumes"); + assert!(!layout.is_empty_layout()); + assert!(!layout.is_single_drive_layout()); + assert_eq!(layout.get_set_count(0), 1); + assert_eq!(layout.get_drives_per_set(0), 4); + + // Test case 3: Ellipses pattern - simple range + let args = vec!["rustfs", "/data/vol{1...4}"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.volumes[0], "/data/vol{1...4}"); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse ellipses pattern"); + assert!(!layout.is_empty_layout()); + assert_eq!(layout.get_set_count(0), 1); + assert_eq!(layout.get_drives_per_set(0), 4); + + // Test case 4: Ellipses pattern - larger range that creates multiple sets + let args = vec!["rustfs", "/data/vol{1...16}"]; + let opt = Opt::parse_from(args); + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse ellipses with multiple sets"); + assert!(!layout.is_empty_layout()); + assert_eq!(layout.get_drives_per_set(0), 16); + + // Test case 5: Distributed setup pattern + let args = vec!["rustfs", "http://server{1...4}/data/vol{1...4}"]; + let opt = Opt::parse_from(args); + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse distributed pattern"); + assert!(!layout.is_empty_layout()); + assert_eq!(layout.get_drives_per_set(0), 16); + + // Test case 6: Multiple pools (legacy: false) + let args = vec!["rustfs", "http://server1/data{1...4}", "http://server2/data{1...4}"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 2); + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse multiple pools"); + assert!(!layout.legacy); + assert_eq!(layout.pools.len(), 2); + + // Test case 7: Minimum valid drives for erasure coding (2 drives minimum) + let args = vec!["rustfs", "/data/vol1", "/data/vol2"]; + let opt = Opt::parse_from(args); + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Should succeed with 2 drives"); + assert_eq!(layout.get_drives_per_set(0), 2); + + // Test case 8: Invalid - single drive not enough for erasure coding + let args = vec!["rustfs", "/data/vol1"]; + let opt = Opt::parse_from(args); + // Single drive is special case and should succeed for single drive layout + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Single drive should work"); + assert!(layout.is_single_drive_layout()); + + // Test case 9: Command line with both address and volumes + let args = vec![ + "rustfs", + "/data/vol{1...8}", + "--address", + ":9000", + "--console-address", + ":9001", + ]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.address, ":9000"); + assert_eq!(opt.console_address, ":9001"); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse with address args"); + assert!(!layout.is_empty_layout()); + assert_eq!(layout.get_drives_per_set(0), 8); + + // Test case 10: Multiple ellipses in single argument - nested pattern + let args = vec!["rustfs", "/data{0...3}/vol{0...4}"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.volumes[0], "/data{0...3}/vol{0...4}"); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse nested ellipses pattern"); + assert!(!layout.is_empty_layout()); + // 4 data dirs * 5 vols = 20 drives + let total_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + assert_eq!(total_drives, 20, "Expected 20 drives from /data{{0...3}}/vol{{0...4}}"); + + // Test case 11: Multiple pools with nested ellipses patterns + let args = vec!["rustfs", "/data{0...3}/vol{0...4}", "/data{4...7}/vol{0...4}"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 2); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse multiple pools with nested patterns"); + assert!(!layout.legacy); + assert_eq!(layout.pools.len(), 2); + + // Each pool should have 20 drives (4 * 5) + let pool0_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + let pool1_drives = layout.get_set_count(1) * layout.get_drives_per_set(1); + assert_eq!(pool0_drives, 20, "Pool 0 should have 20 drives"); + assert_eq!(pool1_drives, 20, "Pool 1 should have 20 drives"); + + // Test case 11: Complex distributed pattern with multiple ellipses + let args = vec!["rustfs", "http://server{1...2}.local/disk{1...8}"]; + let opt = Opt::parse_from(args); + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse distributed nested pattern"); + assert!(!layout.is_empty_layout()); + // 2 servers * 8 disks = 16 drives + let total_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + assert_eq!(total_drives, 16, "Expected 16 drives from server{{1...2}}/disk{{1...8}}"); + + // Test case 12: Zero-padded patterns + let args = vec!["rustfs", "/data/vol{01...16}"]; + let opt = Opt::parse_from(args); + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse zero-padded pattern"); + assert!(!layout.is_empty_layout()); + assert_eq!(layout.get_drives_per_set(0), 16); + } + + /// Test environment variable parsing for volumes. + /// Uses #[serial] to avoid concurrent env var modifications. + #[test] + #[serial] + #[allow(unsafe_code)] + fn test_rustfs_volumes_env_variable() { + // Test case 1: Single volume via environment variable + with_env_var("RUSTFS_VOLUMES", "/data/vol1", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.volumes[0], "/data/vol1"); + + let layout = DisksLayout::from_volumes(&opt.volumes).expect("Failed to parse single volume from env"); + assert!(layout.is_single_drive_layout()); + }); + + // Test case 2: Multiple volumes via environment variable (space-separated) + with_env_var("RUSTFS_VOLUMES", "/data/vol1 /data/vol2 /data/vol3 /data/vol4", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 4); + assert_eq!(opt.volumes[0], "/data/vol1"); + assert_eq!(opt.volumes[1], "/data/vol2"); + assert_eq!(opt.volumes[2], "/data/vol3"); + assert_eq!(opt.volumes[3], "/data/vol4"); + + verify_layout(&opt.volumes, |layout| { + assert!(!layout.is_single_drive_layout()); + assert_eq!(layout.get_drives_per_set(0), 4); + }); + }); + + // Test case 3: Ellipses pattern via environment variable + with_env_var("RUSTFS_VOLUMES", "/data/vol{1...4}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.volumes[0], "/data/vol{1...4}"); + + verify_layout(&opt.volumes, |layout| { + assert_eq!(layout.get_drives_per_set(0), 4); + }); + }); + + // Test case 4: Larger range with ellipses + with_env_var("RUSTFS_VOLUMES", "/data/vol{1...16}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + verify_layout(&opt.volumes, |layout| { + assert_eq!(layout.get_drives_per_set(0), 16); + }); + }); + + // Test case 5: Distributed setup pattern + with_env_var("RUSTFS_VOLUMES", "http://server{1...4}/data/vol{1...4}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + verify_layout(&opt.volumes, |layout| { + assert_eq!(layout.get_drives_per_set(0), 16); + }); + }); + + // Test case 6: Multiple pools via environment variable (space-separated) + with_env_var("RUSTFS_VOLUMES", "http://server1/data{1...4} http://server2/data{1...4}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 2); + verify_layout(&opt.volumes, |layout| { + assert!(!layout.legacy); + assert_eq!(layout.pools.len(), 2); + }); + }); + + // Test case 7: Nested ellipses pattern + with_env_var("RUSTFS_VOLUMES", "/data{0...3}/vol{0...4}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.volumes[0], "/data{0...3}/vol{0...4}"); + + verify_layout(&opt.volumes, |layout| { + let total_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + assert_eq!(total_drives, 20, "Expected 20 drives from /data{{0...3}}/vol{{0...4}}"); + }); + }); + + // Test case 8: Multiple pools with nested ellipses + with_env_var("RUSTFS_VOLUMES", "/data{0...3}/vol{0...4} /data{4...7}/vol{0...4}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 2); + + verify_layout(&opt.volumes, |layout| { + assert_eq!(layout.pools.len(), 2); + let pool0_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + let pool1_drives = layout.get_set_count(1) * layout.get_drives_per_set(1); + assert_eq!(pool0_drives, 20, "Pool 0 should have 20 drives"); + assert_eq!(pool1_drives, 20, "Pool 1 should have 20 drives"); + }); + }); + + // Test case 9: Complex distributed pattern with multiple ellipses + with_env_var("RUSTFS_VOLUMES", "http://server{1...2}.local/disk{1...8}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + verify_layout(&opt.volumes, |layout| { + let total_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + assert_eq!(total_drives, 16, "Expected 16 drives from server{{1...2}}/disk{{1...8}}"); + }); + }); + + // Test case 10: Zero-padded patterns + with_env_var("RUSTFS_VOLUMES", "/data/vol{01...16}", || { + let args = vec!["rustfs"]; + let opt = Opt::parse_from(args); + verify_layout(&opt.volumes, |layout| { + assert_eq!(layout.get_drives_per_set(0), 16); + }); + }); + + // Test case 11: Environment variable with additional CLI options + with_env_var("RUSTFS_VOLUMES", "/data/vol{1...8}", || { + let args = vec!["rustfs", "--address", ":9000", "--console-address", ":9001"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + assert_eq!(opt.address, ":9000"); + assert_eq!(opt.console_address, ":9001"); + + verify_layout(&opt.volumes, |layout| { + assert_eq!(layout.get_drives_per_set(0), 8); + }); + }); + + // Test case 12: Command line argument overrides environment variable + with_env_var("RUSTFS_VOLUMES", "/data/vol1", || { + let args = vec!["rustfs", "/override/vol1"]; + let opt = Opt::parse_from(args); + assert_eq!(opt.volumes.len(), 1); + // CLI argument should override environment variable + assert_eq!(opt.volumes[0], "/override/vol1"); + }); + } + + /// Test boundary cases for path parsing. + /// NOTE: Current implementation uses space as delimiter, + /// which means paths with spaces are NOT supported. + #[test] + #[serial] + #[allow(unsafe_code)] + fn test_volumes_boundary_cases() { + // Test case 1: Paths with spaces are not properly supported (known limitation) + // This test documents the current behavior - space-separated paths will be split + with_env_var("RUSTFS_VOLUMES", "/data/my disk/vol1", || { + let args = vec!["rustfs"]; + let opt = Opt::try_parse_from(args).expect("Failed to parse with spaces in path"); + // Current behavior: space causes split into 2 volumes + assert_eq!(opt.volumes.len(), 2, "Paths with spaces are split (known limitation)"); + assert_eq!(opt.volumes[0], "/data/my"); + assert_eq!(opt.volumes[1], "disk/vol1"); + }); + + // Test case 2: Empty environment variable causes parsing failure + // because volumes is required and NonEmptyStringValueParser filters empty strings + with_env_var("RUSTFS_VOLUMES", "", || { + let args = vec!["rustfs"]; + let result = Opt::try_parse_from(args); + // Should fail because no volumes provided (empty string filtered out) + assert!(result.is_err(), "Empty RUSTFS_VOLUMES should fail parsing (required field)"); + }); + + // Test case 2b: Multiple consecutive spaces create empty strings during splitting + // This causes parsing to fail because volumes is required and empty strings are invalid + with_env_var("RUSTFS_VOLUMES", "/data/vol1 /data/vol2", || { + let args = vec!["rustfs"]; + let result = Opt::try_parse_from(args); + // Should fail because double space creates an empty element + assert!(result.is_err(), "Multiple consecutive spaces should cause parsing failure"); + }); + + // Test case 3: Very long path with ellipses (stress test) + // Note: Large drive counts may be automatically split into multiple sets + let long_path = format!("/very/long/path/structure/with/many/directories/vol{{1...{}}}", 100); + with_env_var("RUSTFS_VOLUMES", &long_path, || { + let args = vec!["rustfs"]; + let opt = Opt::try_parse_from(args).expect("Failed to parse with long ellipses path"); + verify_layout(&opt.volumes, |layout| { + // Total drives should be 100, but may be distributed across sets + let total_drives = layout.get_set_count(0) * layout.get_drives_per_set(0); + assert_eq!(total_drives, 100, "Total drives should be 100"); + }); + }); + } + + /// Test error handling for invalid ellipses patterns. + #[test] + fn test_invalid_ellipses_patterns() { + // Test case 1: Invalid ellipses format (letters instead of numbers) + let args = vec!["rustfs", "/data/vol{a...z}"]; + let opt = Opt::parse_from(args); + let result = DisksLayout::from_volumes(&opt.volumes); + assert!(result.is_err(), "Invalid ellipses pattern with letters should fail"); + + // Test case 2: Reversed range (larger to smaller) + let args = vec!["rustfs", "/data/vol{10...1}"]; + let opt = Opt::parse_from(args); + let result = DisksLayout::from_volumes(&opt.volumes); + // Depending on implementation, this may succeed with 0 drives or fail + // Document actual behavior + if let Ok(layout) = result { + assert!( + layout.is_empty_layout() || layout.get_drives_per_set(0) == 0, + "Reversed range should result in empty layout" + ); + } + } + + #[test] + fn test_server_domains_parsing() { + // Test case 1: server domains without ports + let args = vec![ + "rustfs", + "/data/vol1", + "--server-domains", + "example.com,api.example.com,cdn.example.com", + ]; + let opt = Opt::parse_from(args); + + assert_eq!(opt.server_domains.len(), 3); + assert_eq!(opt.server_domains[0], "example.com"); + assert_eq!(opt.server_domains[1], "api.example.com"); + assert_eq!(opt.server_domains[2], "cdn.example.com"); + + // Test case 2: server domains with ports + let args = vec![ + "rustfs", + "/data/vol1", + "--server-domains", + "example.com:9000,api.example.com:8080,cdn.example.com:443", + ]; + let opt = Opt::parse_from(args); + + assert_eq!(opt.server_domains.len(), 3); + assert_eq!(opt.server_domains[0], "example.com:9000"); + assert_eq!(opt.server_domains[1], "api.example.com:8080"); + assert_eq!(opt.server_domains[2], "cdn.example.com:443"); + + // Test case 3: mixed server domains (with and without ports) + let args = vec![ + "rustfs", + "/data/vol1", + "--server-domains", + "example.com,api.example.com:9000,cdn.example.com,storage.example.com:8443", + ]; + let opt = Opt::parse_from(args); + + assert_eq!(opt.server_domains.len(), 4); + assert_eq!(opt.server_domains[0], "example.com"); + assert_eq!(opt.server_domains[1], "api.example.com:9000"); + assert_eq!(opt.server_domains[2], "cdn.example.com"); + assert_eq!(opt.server_domains[3], "storage.example.com:8443"); + + // Test case 4: single domain with port + let args = vec!["rustfs", "/data/vol1", "--server-domains", "example.com:9000"]; + let opt = Opt::parse_from(args); + + assert_eq!(opt.server_domains.len(), 1); + assert_eq!(opt.server_domains[0], "example.com:9000"); + + // Test case 5: localhost with different ports + let args = vec![ + "rustfs", + "/data/vol1", + "--server-domains", + "localhost:9000,127.0.0.1:9000,localhost", + ]; + let opt = Opt::parse_from(args); + + assert_eq!(opt.server_domains.len(), 3); + assert_eq!(opt.server_domains[0], "localhost:9000"); + assert_eq!(opt.server_domains[1], "127.0.0.1:9000"); + assert_eq!(opt.server_domains[2], "localhost"); + } } diff --git a/rustfs/src/config/mod.rs b/rustfs/src/config/mod.rs index 1e553d89..14923522 100644 --- a/rustfs/src/config/mod.rs +++ b/rustfs/src/config/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. use clap::Parser; +use clap::builder::NonEmptyStringValueParser; use const_str::concat; use std::string::ToString; shadow_rs::shadow!(build); @@ -50,7 +51,12 @@ const LONG_VERSION: &str = concat!( #[command(version = SHORT_VERSION, long_version = LONG_VERSION)] pub struct Opt { /// DIR points to a directory on a filesystem. - #[arg(required = true, env = "RUSTFS_VOLUMES")] + #[arg( + required = true, + env = "RUSTFS_VOLUMES", + value_delimiter = ' ', + value_parser = NonEmptyStringValueParser::new() + )] pub volumes: Vec, /// bind to a specific ADDRESS:PORT, ADDRESS can be an IP or hostname @@ -58,7 +64,12 @@ pub struct Opt { pub address: String, /// Domain name used for virtual-hosted-style requests. - #[arg(long, env = "RUSTFS_SERVER_DOMAINS")] + #[arg( + long, + env = "RUSTFS_SERVER_DOMAINS", + value_delimiter = ',', + value_parser = NonEmptyStringValueParser::new() + )] pub server_domains: Vec, /// Access key used for authentication. From 958f054123aea17456174c2c095dfe6745071efb Mon Sep 17 00:00:00 2001 From: loverustfs Date: Sun, 21 Dec 2025 23:43:12 +0800 Subject: [PATCH 20/26] ci: update all workflows to use ubicloud-standard-2 runner --- .github/workflows/audit.yml | 4 ++-- .github/workflows/build.yml | 20 ++++++++++---------- .github/workflows/ci.yml | 8 ++++---- .github/workflows/docker.yml | 6 +++--- .github/workflows/e2e-mint.yml | 4 ++-- .github/workflows/e2e-s3tests.yml | 4 ++-- .github/workflows/helm-package.yml | 4 ++-- .github/workflows/performance.yml | 4 ++-- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 661ef05a..d54bbfef 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -40,7 +40,7 @@ env: jobs: security-audit: name: Security Audit - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 15 steps: - name: Checkout repository @@ -65,7 +65,7 @@ jobs: dependency-review: name: Dependency Review - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 if: github.event_name == 'pull_request' permissions: contents: read diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a70e6aab..7390d7c8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,7 +83,7 @@ jobs: # Build strategy check - determine build type based on trigger build-check: name: Build Strategy Check - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 outputs: should_build: ${{ steps.check.outputs.should_build }} build_type: ${{ steps.check.outputs.build_type }} @@ -167,19 +167,19 @@ jobs: matrix: include: # Linux builds - - os: ubicloud-standard-4 + - os: ubicloud-standard-2 target: x86_64-unknown-linux-musl cross: false platform: linux - - os: ubicloud-standard-4 + - os: ubicloud-standard-2 target: aarch64-unknown-linux-musl cross: true platform: linux - - os: ubicloud-standard-4 + - os: ubicloud-standard-2 target: x86_64-unknown-linux-gnu cross: false platform: linux - - os: ubicloud-standard-4 + - os: ubicloud-standard-2 target: aarch64-unknown-linux-gnu cross: true platform: linux @@ -532,7 +532,7 @@ jobs: name: Build Summary needs: [ build-check, build-rustfs ] if: always() && needs.build-check.outputs.should_build == 'true' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 steps: - name: Build completion summary shell: bash @@ -584,7 +584,7 @@ jobs: name: Create GitHub Release needs: [ build-check, build-rustfs ] if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 permissions: contents: write outputs: @@ -670,7 +670,7 @@ jobs: name: Upload Release Assets needs: [ build-check, build-rustfs, create-release ] if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 permissions: contents: write actions: read @@ -751,7 +751,7 @@ jobs: name: Update Latest Version needs: [ build-check, upload-release-assets ] if: startsWith(github.ref, 'refs/tags/') - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 steps: - name: Update latest.json env: @@ -801,7 +801,7 @@ jobs: name: Publish Release needs: [ build-check, create-release, upload-release-assets ] if: startsWith(github.ref, 'refs/tags/') && needs.build-check.outputs.build_type != 'development' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 permissions: contents: write steps: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c7e7662..9dc5da56 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,7 +78,7 @@ jobs: permissions: actions: write contents: read - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 outputs: should_skip: ${{ steps.skip_check.outputs.should_skip }} steps: @@ -93,7 +93,7 @@ jobs: typos: name: Typos - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -104,7 +104,7 @@ jobs: name: Test and Lint needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 60 steps: - name: Checkout repository @@ -136,7 +136,7 @@ jobs: name: End-to-End Tests needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 30 steps: - name: Checkout repository diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 37d41b50..383dcd57 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -72,7 +72,7 @@ jobs: # Check if we should build Docker images build-check: name: Docker Build Check - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 outputs: should_build: ${{ steps.check.outputs.should_build }} should_push: ${{ steps.check.outputs.should_push }} @@ -264,7 +264,7 @@ jobs: name: Build Docker Images needs: build-check if: needs.build-check.outputs.should_build == 'true' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 60 steps: - name: Checkout repository @@ -404,7 +404,7 @@ jobs: name: Docker Build Summary needs: [ build-check, build-docker ] if: always() && needs.build-check.outputs.should_build == 'true' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 steps: - name: Docker build completion summary run: | diff --git a/.github/workflows/e2e-mint.yml b/.github/workflows/e2e-mint.yml index 5923cfde..a9de46f7 100644 --- a/.github/workflows/e2e-mint.yml +++ b/.github/workflows/e2e-mint.yml @@ -37,7 +37,7 @@ env: jobs: mint-single: - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 40 steps: - name: Checkout @@ -114,7 +114,7 @@ jobs: mint-multi: if: github.event_name == 'workflow_dispatch' && github.event.inputs.run-multi == 'true' needs: mint-single - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 60 steps: - name: Checkout diff --git a/.github/workflows/e2e-s3tests.yml b/.github/workflows/e2e-s3tests.yml index e29d13aa..e23e3a94 100644 --- a/.github/workflows/e2e-s3tests.yml +++ b/.github/workflows/e2e-s3tests.yml @@ -58,7 +58,7 @@ defaults: jobs: s3tests-single: if: github.event.inputs.test-mode == 'single' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 120 steps: - uses: actions/checkout@v6 @@ -212,7 +212,7 @@ jobs: s3tests-multi: if: github.event_name == 'workflow_dispatch' && github.event.inputs.test-mode == 'multi' - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 150 steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/helm-package.yml b/.github/workflows/helm-package.yml index ca9aec56..954d7c41 100644 --- a/.github/workflows/helm-package.yml +++ b/.github/workflows/helm-package.yml @@ -27,7 +27,7 @@ env: jobs: build-helm-package: - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 # Only run on successful builds triggered by tag pushes (version format: x.y.z or x.y.z-suffix) if: | github.event.workflow_run.conclusion == 'success' && @@ -63,7 +63,7 @@ jobs: retention-days: 1 publish-helm-package: - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 needs: [ build-helm-package ] steps: diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index c2b2ea6f..954fd000 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -40,7 +40,7 @@ env: jobs: performance-profile: name: Performance Profiling - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 30 steps: - name: Checkout repository @@ -115,7 +115,7 @@ jobs: benchmark: name: Benchmark Tests - runs-on: ubicloud-standard-4 + runs-on: ubicloud-standard-2 timeout-minutes: 45 steps: - name: Checkout repository From 1c51e204ab0d1f5dd81d0ec92e2ccaa9dee1abf0 Mon Sep 17 00:00:00 2001 From: loverustfs Date: Sun, 21 Dec 2025 23:54:40 +0800 Subject: [PATCH 21/26] ci: reduce cargo build jobs to 2 for standard-2 runner --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9dc5da56..3a285ba6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ concurrency: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 - CARGO_BUILD_JOBS: 8 + CARGO_BUILD_JOBS: 2 jobs: @@ -166,7 +166,7 @@ jobs: run: | touch rustfs/build.rs # Limit concurrency to prevent OOM - cargo build -p rustfs --bins --jobs 4 + cargo build -p rustfs --bins --jobs 2 - name: Run end-to-end tests run: | From 08f1a31f3f16276a3caca62bf3d028490cbb4c37 Mon Sep 17 00:00:00 2001 From: houseme Date: Mon, 22 Dec 2025 00:57:05 +0800 Subject: [PATCH 22/26] Fix notification event stream cleanup, add bounded send concurrency, and reduce overhead (#1224) --- .github/workflows/ci.yml | 2 +- Cargo.lock | 1 + crates/audit/src/factory.rs | 5 +- crates/config/src/notify/mod.rs | 12 + crates/notify/Cargo.toml | 1 + crates/notify/src/factory.rs | 5 +- crates/notify/src/integration.rs | 96 +++++--- crates/notify/src/lib.rs | 1 + .../src/notification_system_subscriber.rs | 74 ++++++ crates/notify/src/notifier.rs | 217 ++++++++++++------ crates/notify/src/rules/config.rs | 78 ++++++- crates/notify/src/rules/mod.rs | 18 +- crates/notify/src/rules/pattern_rules.rs | 117 +++++++++- crates/notify/src/rules/rules_map.rs | 107 ++++++--- crates/notify/src/rules/subscriber_index.rs | 131 +++++++++++ .../notify/src/rules/subscriber_snapshot.rs | 117 ++++++++++ crates/notify/src/rules/xml_config.rs | 2 +- crates/notify/src/stream.rs | 57 ++++- crates/obs/src/telemetry.rs | 8 +- rustfs/src/server/http.rs | 41 +++- 20 files changed, 921 insertions(+), 169 deletions(-) create mode 100644 crates/notify/src/notification_system_subscriber.rs create mode 100644 crates/notify/src/rules/subscriber_index.rs create mode 100644 crates/notify/src/rules/subscriber_snapshot.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a285ba6..ae3a308c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,7 +104,7 @@ jobs: name: Test and Lint needs: skip-check if: needs.skip-check.outputs.should_skip != 'true' - runs-on: ubicloud-standard-2 + runs-on: ubicloud-standard-4 timeout-minutes: 60 steps: - name: Checkout repository diff --git a/Cargo.lock b/Cargo.lock index 3b2d43a8..08641555 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7437,6 +7437,7 @@ dependencies = [ name = "rustfs-notify" version = "0.0.5" dependencies = [ + "arc-swap", "async-trait", "axum", "chrono", diff --git a/crates/audit/src/factory.rs b/crates/audit/src/factory.rs index ea8cd9b9..9beded31 100644 --- a/crates/audit/src/factory.rs +++ b/crates/audit/src/factory.rs @@ -60,8 +60,9 @@ impl TargetFactory for WebhookTargetFactory { let endpoint = config .lookup(WEBHOOK_ENDPOINT) .ok_or_else(|| TargetError::Configuration("Missing webhook endpoint".to_string()))?; - let endpoint_url = Url::parse(&endpoint) - .map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{endpoint}')")))?; + let parsed_endpoint = endpoint.trim(); + let endpoint_url = Url::parse(parsed_endpoint) + .map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{parsed_endpoint}')")))?; let args = WebhookArgs { enable: true, // If we are here, it's already enabled. diff --git a/crates/config/src/notify/mod.rs b/crates/config/src/notify/mod.rs index 6abb2bf8..59e6493f 100644 --- a/crates/config/src/notify/mod.rs +++ b/crates/config/src/notify/mod.rs @@ -51,6 +51,18 @@ pub const ENV_NOTIFY_TARGET_STREAM_CONCURRENCY: &str = "RUSTFS_NOTIFY_TARGET_STR /// Adjust this value based on your system's capabilities and expected load. pub const DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY: usize = 20; +/// Name of the environment variable that configures send concurrency. +/// Controls how many send operations are processed in parallel by the notification system. +/// Defaults to [`DEFAULT_NOTIFY_SEND_CONCURRENCY`] if not set. +/// Example: `RUSTFS_NOTIFY_SEND_CONCURRENCY=64`. +pub const ENV_NOTIFY_SEND_CONCURRENCY: &str = "RUSTFS_NOTIFY_SEND_CONCURRENCY"; + +/// Default concurrency for send operations in the notification system +/// This value is used if the environment variable `RUSTFS_NOTIFY_SEND_CONCURRENCY` is not set. +/// It defines how many send operations can be processed in parallel by the notification system at any given time. +/// Adjust this value based on your system's capabilities and expected load. +pub const DEFAULT_NOTIFY_SEND_CONCURRENCY: usize = 64; + #[allow(dead_code)] pub const NOTIFY_SUB_SYSTEMS: &[&str] = &[NOTIFY_MQTT_SUB_SYS, NOTIFY_WEBHOOK_SUB_SYS]; diff --git a/crates/notify/Cargo.toml b/crates/notify/Cargo.toml index 0f02b70a..a4626675 100644 --- a/crates/notify/Cargo.toml +++ b/crates/notify/Cargo.toml @@ -30,6 +30,7 @@ rustfs-config = { workspace = true, features = ["notify", "constants"] } rustfs-ecstore = { workspace = true } rustfs-targets = { workspace = true } rustfs-utils = { workspace = true } +arc-swap = { workspace = true } async-trait = { workspace = true } chrono = { workspace = true, features = ["serde"] } futures = { workspace = true } diff --git a/crates/notify/src/factory.rs b/crates/notify/src/factory.rs index e15f5c5d..fb4d6312 100644 --- a/crates/notify/src/factory.rs +++ b/crates/notify/src/factory.rs @@ -60,8 +60,9 @@ impl TargetFactory for WebhookTargetFactory { let endpoint = config .lookup(WEBHOOK_ENDPOINT) .ok_or_else(|| TargetError::Configuration("Missing webhook endpoint".to_string()))?; - let endpoint_url = Url::parse(&endpoint) - .map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{endpoint}')")))?; + let parsed_endpoint = endpoint.trim(); + let endpoint_url = Url::parse(parsed_endpoint) + .map_err(|e| TargetError::Configuration(format!("Invalid endpoint URL: {e} (value: '{parsed_endpoint}')")))?; let args = WebhookArgs { enable: true, // If we are here, it's already enabled. diff --git a/crates/notify/src/integration.rs b/crates/notify/src/integration.rs index 790d43f9..ddce7560 100644 --- a/crates/notify/src/integration.rs +++ b/crates/notify/src/integration.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::notification_system_subscriber::NotificationSystemSubscriberView; use crate::{ Event, error::NotificationError, notifier::EventNotifier, registry::TargetRegistry, rules::BucketNotificationConfig, stream, }; @@ -104,6 +105,8 @@ pub struct NotificationSystem { concurrency_limiter: Arc, /// Monitoring indicators metrics: Arc, + /// Subscriber view + subscriber_view: NotificationSystemSubscriberView, } impl NotificationSystem { @@ -112,6 +115,7 @@ impl NotificationSystem { let concurrency_limiter = rustfs_utils::get_env_usize(ENV_NOTIFY_TARGET_STREAM_CONCURRENCY, DEFAULT_NOTIFY_TARGET_STREAM_CONCURRENCY); NotificationSystem { + subscriber_view: NotificationSystemSubscriberView::new(), notifier: Arc::new(EventNotifier::new()), registry: Arc::new(TargetRegistry::new()), config: Arc::new(RwLock::new(config)), @@ -188,8 +192,11 @@ impl NotificationSystem { } /// Checks if there are active subscribers for the given bucket and event name. - pub async fn has_subscriber(&self, bucket: &str, event_name: &EventName) -> bool { - self.notifier.has_subscriber(bucket, event_name).await + pub async fn has_subscriber(&self, bucket: &str, event: &EventName) -> bool { + if !self.subscriber_view.has_subscriber(bucket, event) { + return false; + } + self.notifier.has_subscriber(bucket, event).await } async fn update_config_and_reload(&self, mut modifier: F) -> Result<(), NotificationError> @@ -236,15 +243,18 @@ impl NotificationSystem { pub async fn remove_target(&self, target_id: &TargetID, target_type: &str) -> Result<(), NotificationError> { info!("Attempting to remove target: {}", target_id); + let ttype = target_type.to_lowercase(); + let tname = target_id.name.to_lowercase(); + self.update_config_and_reload(|config| { let mut changed = false; - if let Some(targets_of_type) = config.0.get_mut(target_type) { - if targets_of_type.remove(&target_id.name).is_some() { + if let Some(targets_of_type) = config.0.get_mut(&ttype) { + if targets_of_type.remove(&tname).is_some() { info!("Removed target {} from configuration", target_id); changed = true; } if targets_of_type.is_empty() { - config.0.remove(target_type); + config.0.remove(&ttype); } } if !changed { @@ -269,20 +279,24 @@ impl NotificationSystem { /// If the target configuration is invalid, it returns Err(NotificationError::Configuration). pub async fn set_target_config(&self, target_type: &str, target_name: &str, kvs: KVS) -> Result<(), NotificationError> { info!("Setting config for target {} of type {}", target_name, target_type); + let ttype = target_type.to_lowercase(); + let tname = target_name.to_lowercase(); self.update_config_and_reload(|config| { - config - .0 - .entry(target_type.to_lowercase()) - .or_default() - .insert(target_name.to_lowercase(), kvs.clone()); + config.0.entry(ttype.clone()).or_default().insert(tname.clone(), kvs.clone()); true // The configuration is always modified }) .await } /// Removes all notification configurations for a bucket. - pub async fn remove_bucket_notification_config(&self, bucket_name: &str) { - self.notifier.remove_rules_map(bucket_name).await; + /// If the configuration is successfully removed, the entire notification system will be automatically reloaded. + /// + /// # Arguments + /// * `bucket` - The name of the bucket whose notification configuration is to be removed. + /// + pub async fn remove_bucket_notification_config(&self, bucket: &str) { + self.subscriber_view.clear_bucket(bucket); + self.notifier.remove_rules_map(bucket).await; } /// Removes a Target configuration. @@ -299,11 +313,28 @@ impl NotificationSystem { /// If the target configuration does not exist, it returns Ok(()) without making any changes. pub async fn remove_target_config(&self, target_type: &str, target_name: &str) -> Result<(), NotificationError> { info!("Removing config for target {} of type {}", target_name, target_type); + + let ttype = target_type.to_lowercase(); + let tname = target_name.to_lowercase(); + + let target_id = TargetID { + id: tname.clone(), + name: ttype.clone(), + }; + + // Deletion is prohibited if bucket rules refer to it + if self.notifier.is_target_bound_to_any_bucket(&target_id).await { + return Err(NotificationError::Configuration(format!( + "Target is still bound to bucket rules and deletion is prohibited: type={} name={}", + ttype, tname + ))); + } + let config_result = self .update_config_and_reload(|config| { let mut changed = false; - if let Some(targets) = config.0.get_mut(&target_type.to_lowercase()) { - if targets.remove(&target_name.to_lowercase()).is_some() { + if let Some(targets) = config.0.get_mut(&ttype) { + if targets.remove(&tname).is_some() { changed = true; } if targets.is_empty() { @@ -319,8 +350,6 @@ impl NotificationSystem { .await; if config_result.is_ok() { - let target_id = TargetID::new(target_name.to_string(), target_type.to_string()); - // Remove from target list let target_list = self.notifier.target_list(); let mut target_list_guard = target_list.write().await; @@ -358,6 +387,9 @@ impl NotificationSystem { let _ = cancel_tx.send(()).await; } + // Clear the target_list and ensure that reload is a replacement reconstruction (solve the target_list len unchanged/residual problem) + self.notifier.remove_all_bucket_targets().await; + // Update the config self.update_config(new_config.clone()).await; @@ -388,15 +420,16 @@ impl NotificationSystem { // The storage of the cloned target and the target itself let store_clone = store.boxed_clone(); - let target_box = target.clone_dyn(); - let target_arc = Arc::from(target_box); - - // Add a reference to the monitoring metrics - let metrics = self.metrics.clone(); - let semaphore = self.concurrency_limiter.clone(); + // let target_box = target.clone_dyn(); + let target_arc = Arc::from(target.clone_dyn()); // Encapsulated enhanced version of start_event_stream - let cancel_tx = self.enhanced_start_event_stream(store_clone, target_arc, metrics, semaphore); + let cancel_tx = self.enhanced_start_event_stream( + store_clone, + target_arc, + self.metrics.clone(), + self.concurrency_limiter.clone(), + ); // Start event stream processing and save cancel sender // let cancel_tx = start_event_stream(store_clone, target_clone); @@ -423,17 +456,18 @@ impl NotificationSystem { /// Loads the bucket notification configuration pub async fn load_bucket_notification_config( &self, - bucket_name: &str, - config: &BucketNotificationConfig, + bucket: &str, + cfg: &BucketNotificationConfig, ) -> Result<(), NotificationError> { - let arn_list = self.notifier.get_arn_list(&config.region).await; + self.subscriber_view.apply_bucket_config(bucket, cfg); + let arn_list = self.notifier.get_arn_list(&cfg.region).await; if arn_list.is_empty() { return Err(NotificationError::Configuration("No targets configured".to_string())); } info!("Available ARNs: {:?}", arn_list); // Validate the configuration against the available ARNs - if let Err(e) = config.validate(&config.region, &arn_list) { - debug!("Bucket notification config validation region:{} failed: {}", &config.region, e); + if let Err(e) = cfg.validate(&cfg.region, &arn_list) { + debug!("Bucket notification config validation region:{} failed: {}", &cfg.region, e); if !e.to_string().contains("ARN not found") { return Err(NotificationError::BucketNotification(e.to_string())); } else { @@ -441,9 +475,9 @@ impl NotificationSystem { } } - let rules_map = config.get_rules_map(); - self.notifier.add_rules_map(bucket_name, rules_map.clone()).await; - info!("Loaded notification config for bucket: {}", bucket_name); + let rules_map = cfg.get_rules_map(); + self.notifier.add_rules_map(bucket, rules_map.clone()).await; + info!("Loaded notification config for bucket: {}", bucket); Ok(()) } diff --git a/crates/notify/src/lib.rs b/crates/notify/src/lib.rs index cc514dbe..4181e4d0 100644 --- a/crates/notify/src/lib.rs +++ b/crates/notify/src/lib.rs @@ -23,6 +23,7 @@ mod event; pub mod factory; mod global; pub mod integration; +mod notification_system_subscriber; pub mod notifier; pub mod registry; pub mod rules; diff --git a/crates/notify/src/notification_system_subscriber.rs b/crates/notify/src/notification_system_subscriber.rs new file mode 100644 index 00000000..11014fb5 --- /dev/null +++ b/crates/notify/src/notification_system_subscriber.rs @@ -0,0 +1,74 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::BucketNotificationConfig; +use crate::rules::{BucketRulesSnapshot, DynRulesContainer, SubscriberIndex}; +use rustfs_targets::EventName; + +/// NotificationSystemSubscriberView - Provides an interface to manage and query +/// the subscription status of buckets in the notification system. +#[derive(Debug)] +pub struct NotificationSystemSubscriberView { + index: SubscriberIndex, +} + +impl NotificationSystemSubscriberView { + /// Creates a new NotificationSystemSubscriberView with an empty SubscriberIndex. + /// + /// Returns a new instance of NotificationSystemSubscriberView. + pub fn new() -> Self { + Self { + index: SubscriberIndex::default(), + } + } + + /// Checks if a bucket has any subscribers for a specific event. + /// This is a quick check using the event mask in the snapshot. + /// + /// # Arguments + /// * `bucket` - The name of the bucket to check. + /// * `event` - The event name to check for subscriptions. + /// + /// Returns `true` if there are subscribers for the event, `false` otherwise. + #[inline] + pub fn has_subscriber(&self, bucket: &str, event: &EventName) -> bool { + self.index.has_subscriber(bucket, event) + } + + /// Builds and atomically replaces a bucket's subscription snapshot from the configuration. + /// + /// Core principle: masks and rules are calculated and stored together in the same update. + /// + /// # Arguments + /// * `bucket` - The name of the bucket to update. + /// * `cfg` - The bucket notification configuration to compile into a snapshot. + pub fn apply_bucket_config(&self, bucket: &str, cfg: &BucketNotificationConfig) { + // *It is recommended to merge compile into one function to ensure the same origin. + let snapshot: BucketRulesSnapshot = cfg.compile_snapshot(); + + // *debug to prevent inconsistencies from being introduced when modifying the compile logic in the future. + snapshot.debug_assert_mask_consistent(); + + self.index.store_snapshot(bucket, snapshot); + } + + /// Clears a bucket's subscription snapshot. + /// + /// #Arguments + /// * `bucket` - The name of the bucket to clear. + #[inline] + pub fn clear_bucket(&self, bucket: &str) { + self.index.clear_bucket(bucket); + } +} diff --git a/crates/notify/src/notifier.rs b/crates/notify/src/notifier.rs index 10aa5767..78beda9c 100644 --- a/crates/notify/src/notifier.rs +++ b/crates/notify/src/notifier.rs @@ -14,19 +14,21 @@ use crate::{error::NotificationError, event::Event, rules::RulesMap}; use hashbrown::HashMap; +use rustfs_config::notify::{DEFAULT_NOTIFY_SEND_CONCURRENCY, ENV_NOTIFY_SEND_CONCURRENCY}; use rustfs_targets::EventName; use rustfs_targets::Target; use rustfs_targets::arn::TargetID; use rustfs_targets::target::EntityTarget; use starshard::AsyncShardedHashMap; use std::sync::Arc; -use tokio::sync::RwLock; +use tokio::sync::{RwLock, Semaphore}; use tracing::{debug, error, info, instrument, warn}; /// Manages event notification to targets based on rules pub struct EventNotifier { target_list: Arc>, bucket_rules_map: Arc>, + send_limiter: Arc, } impl Default for EventNotifier { @@ -37,16 +39,41 @@ impl Default for EventNotifier { impl EventNotifier { /// Creates a new EventNotifier + /// + /// # Returns + /// Returns a new instance of EventNotifier. pub fn new() -> Self { + let max_inflight = rustfs_utils::get_env_usize(ENV_NOTIFY_SEND_CONCURRENCY, DEFAULT_NOTIFY_SEND_CONCURRENCY); EventNotifier { target_list: Arc::new(RwLock::new(TargetList::new())), bucket_rules_map: Arc::new(AsyncShardedHashMap::new(0)), + send_limiter: Arc::new(Semaphore::new(max_inflight)), } } + /// Checks whether a TargetID is still referenced by any bucket's rules. + /// + /// # Arguments + /// * `target_id` - The TargetID to check. + /// + /// # Returns + /// Returns `true` if the TargetID is bound to any bucket, otherwise `false`. + pub async fn is_target_bound_to_any_bucket(&self, target_id: &TargetID) -> bool { + // `AsyncShardedHashMap::iter()`: Traverse (bucket_name, rules_map) + let items = self.bucket_rules_map.iter().await; + for (_bucket, rules_map) in items { + if rules_map.contains_target_id(target_id) { + return true; + } + } + false + } + /// Returns a reference to the target list /// This method provides access to the target list for external use. /// + /// # Returns + /// Returns an `Arc>` representing the target list. pub fn target_list(&self) -> Arc> { Arc::clone(&self.target_list) } @@ -54,17 +81,23 @@ impl EventNotifier { /// Removes all notification rules for a bucket /// /// # Arguments - /// * `bucket_name` - The name of the bucket for which to remove rules + /// * `bucket` - The name of the bucket for which to remove rules /// /// This method removes all rules associated with the specified bucket name. /// It will log a message indicating the removal of rules. - pub async fn remove_rules_map(&self, bucket_name: &str) { - if self.bucket_rules_map.remove(&bucket_name.to_string()).await.is_some() { - info!("Removed all notification rules for bucket: {}", bucket_name); + pub async fn remove_rules_map(&self, bucket: &str) { + if self.bucket_rules_map.remove(&bucket.to_string()).await.is_some() { + info!("Removed all notification rules for bucket: {}", bucket); } } /// Returns a list of ARNs for the registered targets + /// + /// # Arguments + /// * `region` - The region to use for generating the ARNs + /// + /// # Returns + /// Returns a vector of strings representing the ARNs of the registered targets pub async fn get_arn_list(&self, region: &str) -> Vec { let target_list_guard = self.target_list.read().await; target_list_guard @@ -75,24 +108,37 @@ impl EventNotifier { } /// Adds a rules map for a bucket - pub async fn add_rules_map(&self, bucket_name: &str, rules_map: RulesMap) { + /// + /// # Arguments + /// * `bucket` - The name of the bucket for which to add the rules map + /// * `rules_map` - The rules map to add for the bucket + pub async fn add_rules_map(&self, bucket: &str, rules_map: RulesMap) { if rules_map.is_empty() { - self.bucket_rules_map.remove(&bucket_name.to_string()).await; + self.bucket_rules_map.remove(&bucket.to_string()).await; } else { - self.bucket_rules_map.insert(bucket_name.to_string(), rules_map).await; + self.bucket_rules_map.insert(bucket.to_string(), rules_map).await; } - info!("Added rules for bucket: {}", bucket_name); + info!("Added rules for bucket: {}", bucket); } /// Gets the rules map for a specific bucket. - pub async fn get_rules_map(&self, bucket_name: &str) -> Option { - self.bucket_rules_map.get(&bucket_name.to_string()).await + /// + /// # Arguments + /// * `bucket` - The name of the bucket for which to get the rules map + /// + /// # Returns + /// Returns `Some(RulesMap)` if rules exist for the bucket, otherwise returns `None`. + pub async fn get_rules_map(&self, bucket: &str) -> Option { + self.bucket_rules_map.get(&bucket.to_string()).await } /// Removes notification rules for a bucket - pub async fn remove_notification(&self, bucket_name: &str) { - self.bucket_rules_map.remove(&bucket_name.to_string()).await; - info!("Removed notification rules for bucket: {}", bucket_name); + /// + /// # Arguments + /// * `bucket` - The name of the bucket for which to remove notification rules + pub async fn remove_notification(&self, bucket: &str) { + self.bucket_rules_map.remove(&bucket.to_string()).await; + info!("Removed notification rules for bucket: {}", bucket); } /// Removes all targets @@ -125,69 +171,87 @@ impl EventNotifier { } /// Sends an event to the appropriate targets based on the bucket rules + /// + /// # Arguments + /// * `event` - The event to send #[instrument(skip_all)] pub async fn send(&self, event: Arc) { let bucket_name = &event.s3.bucket.name; let object_key = &event.s3.object.key; let event_name = event.event_name; - if let Some(rules) = self.bucket_rules_map.get(bucket_name).await { - let target_ids = rules.match_rules(event_name, object_key); - if target_ids.is_empty() { - debug!("No matching targets for event in bucket: {}", bucket_name); - return; - } - let target_ids_len = target_ids.len(); - let mut handles = vec![]; - // Use scope to limit the borrow scope of target_list - { - let target_list_guard = self.target_list.read().await; - info!("Sending event to targets: {:?}", target_ids); - for target_id in target_ids { - // `get` now returns Option> - if let Some(target_arc) = target_list_guard.get(&target_id) { - // Clone an Arc> (which is where target_list is stored) to move into an asynchronous task - // target_arc is already Arc, clone it for the async task - let cloned_target_for_task = target_arc.clone(); - let event_clone = event.clone(); - let target_name_for_task = cloned_target_for_task.name(); // Get the name before generating the task - debug!("Preparing to send event to target: {}", target_name_for_task); - // Use cloned data in closures to avoid borrowing conflicts - // Create an EntityTarget from the event - let entity_target: Arc> = Arc::new(EntityTarget { - object_name: object_key.to_string(), - bucket_name: bucket_name.to_string(), - event_name, - data: event_clone.clone().as_ref().clone(), - }); - let handle = tokio::spawn(async move { - if let Err(e) = cloned_target_for_task.save(entity_target.clone()).await { - error!("Failed to send event to target {}: {}", target_name_for_task, e); - } else { - debug!("Successfully saved event to target {}", target_name_for_task); - } - }); - handles.push(handle); - } else { - warn!("Target ID {:?} found in rules but not in target list.", target_id); - } - } - // target_list is automatically released here - } - - // Wait for all tasks to be completed - for handle in handles { - if let Err(e) = handle.await { - error!("Task for sending/saving event failed: {}", e); - } - } - info!("Event processing initiated for {} targets for bucket: {}", target_ids_len, bucket_name); - } else { + let Some(rules) = self.bucket_rules_map.get(bucket_name).await else { debug!("No rules found for bucket: {}", bucket_name); + return; + }; + + let target_ids = rules.match_rules(event_name, object_key); + if target_ids.is_empty() { + debug!("No matching targets for event in bucket: {}", bucket_name); + return; } + let target_ids_len = target_ids.len(); + let mut handles = vec![]; + + // Use scope to limit the borrow scope of target_list + let target_list_guard = self.target_list.read().await; + info!("Sending event to targets: {:?}", target_ids); + for target_id in target_ids { + // `get` now returns Option> + if let Some(target_arc) = target_list_guard.get(&target_id) { + // Clone an Arc> (which is where target_list is stored) to move into an asynchronous task + // target_arc is already Arc, clone it for the async task + let target_for_task = target_arc.clone(); + let limiter = self.send_limiter.clone(); + let event_clone = event.clone(); + let target_name_for_task = target_for_task.name(); // Get the name before generating the task + debug!("Preparing to send event to target: {}", target_name_for_task); + // Use cloned data in closures to avoid borrowing conflicts + // Create an EntityTarget from the event + let entity_target: Arc> = Arc::new(EntityTarget { + object_name: object_key.to_string(), + bucket_name: bucket_name.to_string(), + event_name, + data: event_clone.as_ref().clone(), + }); + let handle = tokio::spawn(async move { + let _permit = match limiter.acquire_owned().await { + Ok(p) => p, + Err(e) => { + error!("Failed to acquire send permit for target {}: {}", target_name_for_task, e); + return; + } + }; + if let Err(e) = target_for_task.save(entity_target.clone()).await { + error!("Failed to send event to target {}: {}", target_name_for_task, e); + } else { + debug!("Successfully saved event to target {}", target_name_for_task); + } + }); + handles.push(handle); + } else { + warn!("Target ID {:?} found in rules but not in target list.", target_id); + } + } + // target_list is automatically released here + drop(target_list_guard); + + // Wait for all tasks to be completed + for handle in handles { + if let Err(e) = handle.await { + error!("Task for sending/saving event failed: {}", e); + } + } + info!("Event processing initiated for {} targets for bucket: {}", target_ids_len, bucket_name); } /// Initializes the targets for buckets + /// + /// # Arguments + /// * `targets_to_init` - A vector of boxed targets to initialize + /// + /// # Returns + /// Returns `Ok(())` if initialization is successful, otherwise returns a `NotificationError`. #[instrument(skip(self, targets_to_init))] pub async fn init_bucket_targets( &self, @@ -218,6 +282,7 @@ impl EventNotifier { /// A thread-safe list of targets pub struct TargetList { + /// Map of TargetID to Target targets: HashMap + Send + Sync>>, } @@ -234,6 +299,12 @@ impl TargetList { } /// Adds a target to the list + /// + /// # Arguments + /// * `target` - The target to add + /// + /// # Returns + /// Returns `Ok(())` if the target was added successfully, or a `NotificationError` if an error occurred. pub fn add(&mut self, target: Arc + Send + Sync>) -> Result<(), NotificationError> { let id = target.id(); if self.targets.contains_key(&id) { @@ -251,6 +322,12 @@ impl TargetList { /// Removes a target by ID. Note: This does not stop its associated event stream. /// Stream cancellation should be handled by EventNotifier. + /// + /// # Arguments + /// * `id` - The ID of the target to remove + /// + /// # Returns + /// Returns the removed target if it existed, otherwise `None`. pub async fn remove_target_only(&mut self, id: &TargetID) -> Option + Send + Sync>> { if let Some(target_arc) = self.targets.remove(id) { if let Err(e) = target_arc.close().await { @@ -278,6 +355,12 @@ impl TargetList { } /// Returns a target by ID + /// + /// # Arguments + /// * `id` - The ID of the target to retrieve + /// + /// # Returns + /// Returns the target if it exists, otherwise `None`. pub fn get(&self, id: &TargetID) -> Option + Send + Sync>> { self.targets.get(id).cloned() } @@ -292,7 +375,7 @@ impl TargetList { self.targets.len() } - // is_empty can be derived from len() + /// is_empty can be derived from len() pub fn is_empty(&self) -> bool { self.targets.is_empty() } diff --git a/crates/notify/src/rules/config.rs b/crates/notify/src/rules/config.rs index 5be48e8d..607e6aa0 100644 --- a/crates/notify/src/rules/config.rs +++ b/crates/notify/src/rules/config.rs @@ -15,13 +15,60 @@ use super::rules_map::RulesMap; use super::xml_config::ParseConfigError as BucketNotificationConfigError; use crate::rules::NotificationConfiguration; -use crate::rules::pattern_rules; -use crate::rules::target_id_set; -use hashbrown::HashMap; +use crate::rules::subscriber_snapshot::{BucketRulesSnapshot, DynRulesContainer, RuleEvents, RulesContainer}; use rustfs_targets::EventName; use rustfs_targets::arn::TargetID; use serde::{Deserialize, Serialize}; use std::io::Read; +use std::sync::Arc; + +/// A "rule view", only used for snapshot mask/consistency verification. +/// Here we choose to generate the view by "single event" to ensure that event_mask calculation is reliable and simple. +#[derive(Debug)] +struct RuleView { + events: Vec, +} + +impl RuleEvents for RuleView { + fn subscribed_events(&self) -> &[EventName] { + &self.events + } +} + +/// Adapt RulesMap to RulesContainer. +/// Key point: The items returned by iter_rules are &dyn RuleEvents, so a RuleView list is cached in the container. +#[derive(Debug)] +struct CompiledRules { + // Keep RulesMap (can be used later if you want to make more complex judgments during the snapshot reading phase) + #[allow(dead_code)] + rules_map: RulesMap, + // for RulesContainer::iter_rules + rule_views: Vec, +} + +impl CompiledRules { + fn from_rules_map(rules_map: &RulesMap) -> Self { + let mut rule_views = Vec::new(); + + for ev in rules_map.iter_events() { + rule_views.push(RuleView { events: vec![ev] }); + } + + Self { + rules_map: rules_map.clone(), + rule_views, + } + } +} + +impl RulesContainer for CompiledRules { + type Rule = dyn RuleEvents; + + fn iter_rules<'a>(&'a self) -> Box + 'a> { + // Key: Convert &RuleView into &dyn RuleEvents + Box::new(self.rule_views.iter().map(|v| v as &dyn RuleEvents)) + } +} /// Configuration for bucket notifications. /// This struct now holds the parsed and validated rules in the new RulesMap format. @@ -119,11 +166,26 @@ impl BucketNotificationConfig { pub fn set_region(&mut self, region: &str) { self.region = region.to_string(); } -} -// Add a helper to PatternRules if not already present -impl pattern_rules::PatternRules { - pub fn inner(&self) -> &HashMap { - &self.rules + /// Compiles the current BucketNotificationConfig into a BucketRulesSnapshot. + /// This involves transforming the rules into a format suitable for runtime use, + /// and calculating the event mask based on the subscribed events of the rules. + /// + /// # Returns + /// A BucketRulesSnapshot containing the compiled rules and event mask. + pub fn compile_snapshot(&self) -> BucketRulesSnapshot { + // 1) Generate container from RulesMap + let compiled = CompiledRules::from_rules_map(self.get_rules_map()); + let rules: Arc = Arc::new(compiled) as Arc; + + // 2) Calculate event_mask + let mut mask = 0u64; + for rule in rules.iter_rules() { + for ev in rule.subscribed_events() { + mask |= ev.mask(); + } + } + + BucketRulesSnapshot { event_mask: mask, rules } } } diff --git a/crates/notify/src/rules/mod.rs b/crates/notify/src/rules/mod.rs index 69b141f4..b976ddd9 100644 --- a/crates/notify/src/rules/mod.rs +++ b/crates/notify/src/rules/mod.rs @@ -12,22 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod config; pub mod pattern; -pub mod pattern_rules; -pub mod rules_map; -pub mod target_id_set; +mod pattern_rules; +mod rules_map; +mod subscriber_index; +mod subscriber_snapshot; +mod target_id_set; pub mod xml_config; // For XML structure definition and parsing - -pub mod config; // Definition and parsing for BucketNotificationConfig +// Definition and parsing for BucketNotificationConfig // Re-export key types from submodules for easy access to `crate::rules::TypeName` // Re-export key types from submodules for external use pub use config::BucketNotificationConfig; // Assume that BucketNotificationConfigError is also defined in config.rs // Or if it is still an alias for xml_config::ParseConfigError , adjust accordingly -pub use xml_config::ParseConfigError as BucketNotificationConfigError; - pub use pattern_rules::PatternRules; pub use rules_map::RulesMap; +pub use subscriber_index::*; +pub use subscriber_snapshot::*; pub use target_id_set::TargetIdSet; -pub use xml_config::{NotificationConfiguration, ParseConfigError}; +pub use xml_config::{NotificationConfiguration, ParseConfigError, ParseConfigError as BucketNotificationConfigError}; diff --git a/crates/notify/src/rules/pattern_rules.rs b/crates/notify/src/rules/pattern_rules.rs index 20b0fe93..06b31f07 100644 --- a/crates/notify/src/rules/pattern_rules.rs +++ b/crates/notify/src/rules/pattern_rules.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::pattern; -use super::target_id_set::TargetIdSet; +use crate::rules::TargetIdSet; +use crate::rules::pattern; use hashbrown::HashMap; use rayon::prelude::*; use rustfs_targets::arn::TargetID; @@ -27,31 +27,69 @@ pub struct PatternRules { } impl PatternRules { + /// Create a new, empty PatternRules. pub fn new() -> Self { Default::default() } /// Add rules: Pattern and Target ID. /// If the schema already exists, add target_id to the existing TargetIdSet. + /// + /// # Arguments + /// * `pattern` - The object name pattern. + /// * `target_id` - The TargetID to associate with the pattern. pub fn add(&mut self, pattern: String, target_id: TargetID) { self.rules.entry(pattern).or_default().insert(target_id); } /// Checks if there are any rules that match the given object name. + /// + /// # Arguments + /// * `object_name` - The object name to match against the patterns. + /// + /// # Returns + /// `true` if any pattern matches the object name, otherwise `false`. pub fn match_simple(&self, object_name: &str) -> bool { self.rules.keys().any(|p| pattern::match_simple(p, object_name)) } /// Returns all TargetIDs that match the object name. + /// + /// Performance optimization points: + /// 1) Small collections are serialized directly to avoid rayon scheduling/merging overhead + /// 2) When hitting, no longer temporarily allocate TargetIdSet for each rule, but directly extend + /// + /// # Arguments + /// * `object_name` - The object name to match against the patterns. + /// + /// # Returns + /// A TargetIdSet containing all TargetIDs that match the object name. pub fn match_targets(&self, object_name: &str) -> TargetIdSet { + let n = self.rules.len(); + if n == 0 { + return TargetIdSet::new(); + } + + // Experience Threshold: Serial is usually faster below this value (can be adjusted after benchmarking) + const PAR_THRESHOLD: usize = 128; + + if n < PAR_THRESHOLD { + let mut out = TargetIdSet::new(); + for (pattern_str, target_set) in self.rules.iter() { + if pattern::match_simple(pattern_str, object_name) { + out.extend(target_set.iter().cloned()); + } + } + return out; + } + // Parallel path: Each thread accumulates a local set and finally merges it to reduce frequent allocations self.rules .par_iter() - .filter_map(|(pattern_str, target_set)| { + .fold(TargetIdSet::new, |mut local, (pattern_str, target_set)| { if pattern::match_simple(pattern_str, object_name) { - Some(target_set.iter().cloned().collect::()) - } else { - None + local.extend(target_set.iter().cloned()); } + local }) .reduce(TargetIdSet::new, |mut acc, set| { acc.extend(set); @@ -65,6 +103,11 @@ impl PatternRules { /// Merge another PatternRules. /// Corresponding to Go's `Rules.Union`. + /// # Arguments + /// * `other` - The PatternRules to merge with. + /// + /// # Returns + /// A new PatternRules containing the union of both. pub fn union(&self, other: &Self) -> Self { let mut new_rules = self.clone(); for (pattern, their_targets) in &other.rules { @@ -76,6 +119,13 @@ impl PatternRules { /// Calculate the difference from another PatternRules. /// Corresponding to Go's `Rules.Difference`. + /// The result contains only the patterns and TargetIDs that are in `self` but not in `other`. + /// + /// # Arguments + /// * `other` - The PatternRules to compare against. + /// + /// # Returns + /// A new PatternRules containing the difference. pub fn difference(&self, other: &Self) -> Self { let mut result_rules = HashMap::new(); for (pattern, self_targets) in &self.rules { @@ -94,4 +144,59 @@ impl PatternRules { } PatternRules { rules: result_rules } } + + /// Merge another PatternRules into self in place. + /// Corresponding to Go's `Rules.UnionInPlace`. + /// # Arguments + /// * `other` - The PatternRules to merge with. + pub fn union_in_place(&mut self, other: &Self) { + for (pattern, their_targets) in &other.rules { + self.rules + .entry(pattern.clone()) + .or_default() + .extend(their_targets.iter().cloned()); + } + } + + /// Calculate the difference from another PatternRules in place. + /// Corresponding to Go's `Rules.DifferenceInPlace`. + /// The result contains only the patterns and TargetIDs that are in `self` but not in `other`. + /// # Arguments + /// * `other` - The PatternRules to compare against. + pub fn difference_in_place(&mut self, other: &Self) { + self.rules.retain(|pattern, self_targets| { + if let Some(other_targets) = other.rules.get(pattern) { + // Remove other_targets from self_targets + self_targets.retain(|tid| !other_targets.contains(tid)); + } + !self_targets.is_empty() + }); + } + + /// Remove a pattern and its associated TargetID set from the PatternRules. + /// + /// # Arguments + /// * `pattern` - The pattern to remove. + pub fn remove_pattern(&mut self, pattern: &str) -> bool { + self.rules.remove(pattern).is_some() + } + + /// Determine whether the current PatternRules contains the specified TargetID (referenced by any pattern). + /// + /// # Parameters + /// * `target_id` - The TargetID to check for existence within the PatternRules + /// + /// # Returns + /// * `true` if the TargetID exists in any of the patterns; `false` otherwise. + pub fn contains_target_id(&self, target_id: &TargetID) -> bool { + self.rules.values().any(|set| set.contains(target_id)) + } + + /// Expose the internal rules for use in scenarios such as BucketNotificationConfig::validate. + /// + /// # Returns + /// A reference to the internal HashMap of patterns to TargetIdSets. + pub fn inner(&self) -> &HashMap { + &self.rules + } } diff --git a/crates/notify/src/rules/rules_map.rs b/crates/notify/src/rules/rules_map.rs index 59bb9c6c..c0f29675 100644 --- a/crates/notify/src/rules/rules_map.rs +++ b/crates/notify/src/rules/rules_map.rs @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::pattern_rules::PatternRules; -use super::target_id_set::TargetIdSet; +use crate::rules::{PatternRules, TargetIdSet}; use hashbrown::HashMap; use rustfs_targets::EventName; use rustfs_targets::arn::TargetID; @@ -31,6 +30,9 @@ pub struct RulesMap { impl RulesMap { /// Create a new, empty RulesMap. + /// + /// # Returns + /// A new instance of RulesMap with an empty map and a total_events_mask set to 0. pub fn new() -> Self { Default::default() } @@ -67,12 +69,12 @@ impl RulesMap { /// Merge another RulesMap. /// `RulesMap.Add(rulesMap2 RulesMap) corresponding to Go + /// + /// # Parameters + /// * `other_map` - The other RulesMap to be merged into the current one. pub fn add_map(&mut self, other_map: &Self) { for (event_name, other_pattern_rules) in &other_map.map { - let self_pattern_rules = self.map.entry(*event_name).or_default(); - // PatternRules::union Returns the new PatternRules, we need to modify the existing ones - let merged_rules = self_pattern_rules.union(other_pattern_rules); - *self_pattern_rules = merged_rules; + self.map.entry(*event_name).or_default().union_in_place(other_pattern_rules); } // Directly merge two masks. self.total_events_mask |= other_map.total_events_mask; @@ -81,11 +83,14 @@ impl RulesMap { /// Remove another rule defined in the RulesMap from the current RulesMap. /// /// After the rule is removed, `total_events_mask` is recalculated to ensure its accuracy. + /// + /// # Parameters + /// * `other_map` - The other RulesMap containing rules to be removed from the current one. pub fn remove_map(&mut self, other_map: &Self) { let mut events_to_remove = Vec::new(); for (event_name, self_pattern_rules) in &mut self.map { if let Some(other_pattern_rules) = other_map.map.get(event_name) { - *self_pattern_rules = self_pattern_rules.difference(other_pattern_rules); + self_pattern_rules.difference_in_place(other_pattern_rules); if self_pattern_rules.is_empty() { events_to_remove.push(*event_name); } @@ -102,6 +107,9 @@ impl RulesMap { /// /// This method uses a bitmask for a quick check of O(1) complexity. /// `event_name` can be a compound type, such as `ObjectCreatedAll`. + /// + /// # Parameters + /// * `event_name` - The event name to check for subscribers. pub fn has_subscriber(&self, event_name: &EventName) -> bool { // event_name.mask() will handle compound events correctly (self.total_events_mask & event_name.mask()) != 0 @@ -112,39 +120,54 @@ impl RulesMap { /// # Notice /// The `event_name` parameter should be a specific, non-compound event type. /// Because this is taken from the `Event` object that actually occurs. + /// + /// # Parameters + /// * `event_name` - The specific event name to match against. + /// * `object_key` - The object key to match against the patterns in the rules. + /// + /// # Returns + /// * A set of TargetIDs that match the given event and object key. pub fn match_rules(&self, event_name: EventName, object_key: &str) -> TargetIdSet { // Use bitmask to quickly determine whether there is a matching rule if (self.total_events_mask & event_name.mask()) == 0 { return TargetIdSet::new(); // No matching rules } - // First try to directly match the event name - if let Some(pattern_rules) = self.map.get(&event_name) { - let targets = pattern_rules.match_targets(object_key); - if !targets.is_empty() { - return targets; - } - } - // Go's RulesMap[eventName] is directly retrieved, and if it does not exist, it is empty Rules. - // Rust's HashMap::get returns Option. If the event name does not exist, there is no rule. - // Compound events (such as ObjectCreatedAll) have been expanded as a single event when add_rule_config. - // Therefore, a single event name should be used when querying. - // If event_name itself is a single type, look it up directly. - // If event_name is a compound type, Go's logic is expanded when added. - // Here match_rules should receive events that may already be single. - // If the caller passes in a compound event, it should expand itself or handle this function first. - // Assume that event_name is already a specific event that can be used for searching. + // In Go, RulesMap[eventName] returns empty rules if the key doesn't exist. + // Rust's HashMap::get returns Option, so missing key means no rules. + // Compound events like ObjectCreatedAll are expanded into specific events during add_rule_config. + // Thus, queries should use specific event names. + // If event_name is compound, expansion happens at addition time. + // match_rules assumes event_name is already a specific event for lookup. + // Callers should expand compound events before calling this method. self.map .get(&event_name) .map_or_else(TargetIdSet::new, |pr| pr.match_targets(object_key)) } /// Check if RulesMap is empty. + /// + /// # Returns + /// * `true` if there are no rules in the map; `false` otherwise pub fn is_empty(&self) -> bool { self.map.is_empty() } + /// Determine whether the current RulesMap contains the specified TargetID (referenced by any event / pattern). + /// + /// # Parameters + /// * `target_id` - The TargetID to check for existence within the RulesMap + /// + /// # Returns + /// * `true` if the TargetID exists in any of the PatternRules; `false` otherwise. + pub fn contains_target_id(&self, target_id: &TargetID) -> bool { + self.map.values().any(|pr| pr.contains_target_id(target_id)) + } + /// Returns a clone of internal rules for use in scenarios such as BucketNotificationConfig::validate. + /// + /// # Returns + /// A reference to the internal HashMap of EventName to PatternRules. pub fn inner(&self) -> &HashMap { &self.map } @@ -160,18 +183,32 @@ impl RulesMap { } /// Remove rules and optimize performance + /// + /// # Parameters + /// * `event_name` - The EventName from which to remove the rule. + /// * `pattern` - The pattern of the rule to be removed. #[allow(dead_code)] pub fn remove_rule(&mut self, event_name: &EventName, pattern: &str) { + let mut remove_event = false; + if let Some(pattern_rules) = self.map.get_mut(event_name) { - pattern_rules.rules.remove(pattern); + pattern_rules.remove_pattern(pattern); if pattern_rules.is_empty() { - self.map.remove(event_name); + remove_event = true; } } + + if remove_event { + self.map.remove(event_name); + } + self.recalculate_mask(); // Delay calculation mask } - /// Batch Delete Rules + /// Batch Delete Rules and Optimize Performance + /// + /// # Parameters + /// * `event_names` - A slice of EventNames to be removed. #[allow(dead_code)] pub fn remove_rules(&mut self, event_names: &[EventName]) { for event_name in event_names { @@ -181,9 +218,27 @@ impl RulesMap { } /// Update rules and optimize performance + /// + /// # Parameters + /// * `event_name` - The EventName to update. + /// * `pattern` - The pattern of the rule to be updated. + /// * `target_id` - The TargetID to be added. #[allow(dead_code)] pub fn update_rule(&mut self, event_name: EventName, pattern: String, target_id: TargetID) { self.map.entry(event_name).or_default().add(pattern, target_id); self.total_events_mask |= event_name.mask(); // Update only the relevant bitmask } + + /// Iterate all EventName keys contained in this RulesMap. + /// + /// Used by snapshot compilation to compute bucket event_mask. + /// + /// # Returns + /// An iterator over all EventName keys in the RulesMap. + #[inline] + pub fn iter_events(&self) -> impl Iterator + '_ { + // `inner()` is already used by config.rs, so we reuse it here. + // If the key type is `EventName`, `.copied()` is the cheapest way to return values. + self.inner().keys().copied() + } } diff --git a/crates/notify/src/rules/subscriber_index.rs b/crates/notify/src/rules/subscriber_index.rs new file mode 100644 index 00000000..205bc58a --- /dev/null +++ b/crates/notify/src/rules/subscriber_index.rs @@ -0,0 +1,131 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::rules::{BucketRulesSnapshot, BucketSnapshotRef, DynRulesContainer}; +use arc_swap::ArcSwap; +use rustfs_targets::EventName; +use starshard::ShardedHashMap; +use std::fmt; +use std::sync::Arc; + +/// A global bucket -> snapshot index. +/// +/// Read path: lock-free load (ArcSwap) +/// Write path: atomic replacement after building a new snapshot +pub struct SubscriberIndex { + // Use starshard for sharding to reduce lock competition when the number of buckets is large + inner: ShardedHashMap>>>, + // Cache an "empty rule container" for empty snapshots (avoids building every time) + empty_rules: Arc, +} + +/// Avoid deriving fields that do not support Debug +impl fmt::Debug for SubscriberIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SubscriberIndex").finish_non_exhaustive() + } +} + +impl SubscriberIndex { + /// Create a new SubscriberIndex. + /// + /// # Arguments + /// * `empty_rules` - An Arc to an empty rules container used for empty snapshots + /// + /// Returns a new instance of SubscriberIndex. + pub fn new(empty_rules: Arc) -> Self { + Self { + inner: ShardedHashMap::new(64), + empty_rules, + } + } + + /// Get the current snapshot of a bucket. + /// If it does not exist, return empty snapshot. + /// + /// # Arguments + /// * `bucket` - The name of the bucket to load. + /// + /// Returns the snapshot reference for the specified bucket. + pub fn load_snapshot(&self, bucket: &str) -> BucketSnapshotRef { + match self.inner.get(&bucket.to_string()) { + Some(cell) => cell.load_full(), + None => Arc::new(BucketRulesSnapshot::empty(self.empty_rules.clone())), + } + } + + /// Quickly determine whether the bucket has a subscription to an event. + /// This judgment can be consistent with subsequent rule matching when reading the same snapshot. + /// + /// # Arguments + /// * `bucket` - The name of the bucket to check. + /// * `event` - The event name to check for subscriptions. + /// + /// Returns `true` if there are subscribers for the event, `false` otherwise. + #[inline] + pub fn has_subscriber(&self, bucket: &str, event: &EventName) -> bool { + let snap = self.load_snapshot(bucket); + if snap.event_mask == 0 { + return false; + } + snap.has_event(event) + } + + /// Atomically update a bucket's snapshot (whole package replacement). + /// + /// - The caller first builds the complete `BucketRulesSnapshot` (including event\_mask and rules). + /// - This method ensures that the read path will not observe intermediate states. + /// + /// # Arguments + /// * `bucket` - The name of the bucket to update. + /// * `new_snapshot` - The new snapshot to store for the bucket. + pub fn store_snapshot(&self, bucket: &str, new_snapshot: BucketRulesSnapshot) { + let key = bucket.to_string(); + + let cell = self.inner.get(&key).unwrap_or_else(|| { + // Insert a default cell (empty snapshot) + let init = Arc::new(ArcSwap::from_pointee(BucketRulesSnapshot::empty(self.empty_rules.clone()))); + self.inner.insert(key.clone(), init.clone()); + init + }); + + cell.store(Arc::new(new_snapshot)); + } + + /// Delete the bucket's subscription view (make it empty). + /// + /// # Arguments + /// * `bucket` - The name of the bucket to clear. + pub fn clear_bucket(&self, bucket: &str) { + if let Some(cell) = self.inner.get(&bucket.to_string()) { + cell.store(Arc::new(BucketRulesSnapshot::empty(self.empty_rules.clone()))); + } + } +} + +impl Default for SubscriberIndex { + fn default() -> Self { + // An available empty rule container is required; here it is implemented using minimal empty + #[derive(Debug)] + struct EmptyRules; + impl crate::rules::subscriber_snapshot::RulesContainer for EmptyRules { + type Rule = dyn crate::rules::subscriber_snapshot::RuleEvents; + fn iter_rules<'a>(&'a self) -> Box + 'a> { + Box::new(std::iter::empty()) + } + } + + Self::new(Arc::new(EmptyRules) as Arc) + } +} diff --git a/crates/notify/src/rules/subscriber_snapshot.rs b/crates/notify/src/rules/subscriber_snapshot.rs new file mode 100644 index 00000000..4eed5d28 --- /dev/null +++ b/crates/notify/src/rules/subscriber_snapshot.rs @@ -0,0 +1,117 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use rustfs_targets::EventName; +use std::sync::Arc; + +/// Let the rules structure provide "what events it is subscribed to". +/// This way BucketRulesSnapshot does not need to know the internal shape of rules. +pub trait RuleEvents { + fn subscribed_events(&self) -> &[EventName]; +} + +/// Let the rules container provide the ability to iterate over all rules (abstracting only to the minimum necessary). +pub trait RulesContainer { + type Rule: RuleEvents + ?Sized; + fn iter_rules<'a>(&'a self) -> Box + 'a>; + + /// Fast empty judgment for snapshots (fix missing `rules.is_empty()`) + fn is_empty(&self) -> bool { + self.iter_rules().next().is_none() + } +} + +/// Represents a bucket's notification subscription view snapshot (immutable). +/// +/// - `event_mask`: Quickly determine whether there is a subscription to a certain type of event (bitset/flags). +/// - `rules`: precise rule mapping (prefix/suffix/pattern -> targets). +/// +/// The read path only reads this snapshot to ensure consistency. +#[derive(Debug, Clone)] +pub struct BucketRulesSnapshot +where + R: RulesContainer + ?Sized, +{ + pub event_mask: u64, + pub rules: Arc, +} + +impl BucketRulesSnapshot +where + R: RulesContainer + ?Sized, +{ + /// Create an empty snapshot with no subscribed events and no rules. + /// + /// # Arguments + /// * `rules` - An Arc to a rules container (can be an empty container). + /// + /// # Returns + /// An instance of `BucketRulesSnapshot` with an empty event mask. + #[inline] + pub fn empty(rules: Arc) -> Self { + Self { event_mask: 0, rules } + } + + /// Check if the snapshot has any subscribers for the specified event. + /// + /// # Arguments + /// * `event` - The event name to check for subscriptions. + /// + /// # Returns + /// `true` if there are subscribers for the event, `false` otherwise. + #[inline] + pub fn has_event(&self, event: &EventName) -> bool { + (self.event_mask & event.mask()) != 0 + } + + /// Check if the snapshot is empty (no subscribed events or rules). + /// + /// # Returns + /// `true` if the snapshot is empty, `false` otherwise. + #[inline] + pub fn is_empty(&self) -> bool { + self.event_mask == 0 || self.rules.is_empty() + } + + /// [debug] Assert that `event_mask` is consistent with the event declared in `rules`. + /// + /// Constraints: + /// - only runs in debug builds (release incurs no cost). + /// - If the rule contains compound events (\*All / Everything), rely on `EventName::mask()` to automatically expand. + #[inline] + pub fn debug_assert_mask_consistent(&self) { + #[cfg(debug_assertions)] + { + let mut recomputed = 0u64; + for rule in self.rules.iter_rules() { + for ev in rule.subscribed_events() { + recomputed |= ev.mask(); + } + } + + debug_assert!( + recomputed == self.event_mask, + "BucketRulesSnapshot.event_mask inconsistent: stored={:#x}, recomputed={:#x}", + self.event_mask, + recomputed + ); + } + } +} + +/// Unify trait-object snapshot types (fix Sized / missing generic arguments) +pub type DynRulesContainer = dyn RulesContainer + Send + Sync; + +/// Expose Arc form to facilitate sharing. +pub type BucketSnapshotRef = Arc>; diff --git a/crates/notify/src/rules/xml_config.rs b/crates/notify/src/rules/xml_config.rs index 134f0db2..698167d6 100644 --- a/crates/notify/src/rules/xml_config.rs +++ b/crates/notify/src/rules/xml_config.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::pattern; +use crate::rules::pattern; use hashbrown::HashSet; use rustfs_targets::EventName; use rustfs_targets::arn::{ARN, ArnError, TargetIDError}; diff --git a/crates/notify/src/stream.rs b/crates/notify/src/stream.rs index 9b37c13b..8c70d3c2 100644 --- a/crates/notify/src/stream.rs +++ b/crates/notify/src/stream.rs @@ -13,18 +13,23 @@ // limitations under the License. use crate::{Event, integration::NotificationMetrics}; -use rustfs_targets::StoreError; -use rustfs_targets::Target; -use rustfs_targets::TargetError; -use rustfs_targets::store::{Key, Store}; -use rustfs_targets::target::EntityTarget; +use rustfs_targets::{ + StoreError, Target, TargetError, + store::{Key, Store}, + target::EntityTarget, +}; use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::{Semaphore, mpsc}; use tokio::time::sleep; use tracing::{debug, error, info, warn}; -/// Streams events from the store to the target +/// Streams events from the store to the target with retry logic +/// +/// # Arguments +/// - `store`: The event store +/// - `target`: The target to send events to +/// - `cancel_rx`: Receiver to listen for cancellation signals pub async fn stream_events( store: &mut (dyn Store + Send), target: &dyn Target, @@ -67,6 +72,7 @@ pub async fn stream_events( match target.send_from_store(key.clone()).await { Ok(_) => { info!("Successfully sent event for target: {}", target.name()); + // send_from_store deletes the event from store on success success = true; } Err(e) => { @@ -104,6 +110,13 @@ pub async fn stream_events( } /// Starts the event streaming process for a target +/// +/// # Arguments +/// - `store`: The event store +/// - `target`: The target to send events to +/// +/// # Returns +/// A sender to signal cancellation of the event stream pub fn start_event_stream( mut store: Box + Send>, target: Arc + Send + Sync>, @@ -119,6 +132,15 @@ pub fn start_event_stream( } /// Start event stream with batch processing +/// +/// # Arguments +/// - `store`: The event store +/// - `target`: The target to send events to clients +/// - `metrics`: Metrics for monitoring +/// - `semaphore`: Semaphore to limit concurrency +/// +/// # Returns +/// A sender to signal cancellation of the event stream pub fn start_event_stream_with_batching( mut store: Box, Error = StoreError, Key = Key> + Send>, target: Arc + Send + Sync>, @@ -136,6 +158,16 @@ pub fn start_event_stream_with_batching( } /// Event stream processing with batch processing +/// +/// # Arguments +/// - `store`: The event store +/// - `target`: The target to send events to clients +/// - `cancel_rx`: Receiver to listen for cancellation signals +/// - `metrics`: Metrics for monitoring +/// - `semaphore`: Semaphore to limit concurrency +/// +/// # Notes +/// This function processes events in batches to improve efficiency. pub async fn stream_events_with_batching( store: &mut (dyn Store, Error = StoreError, Key = Key> + Send), target: &dyn Target, @@ -231,7 +263,17 @@ pub async fn stream_events_with_batching( } } -/// Processing event batches +/// Processing event batches for targets +/// # Arguments +/// - `batch`: The batch of events to process +/// - `batch_keys`: The corresponding keys of the events in the batch +/// - `target`: The target to send events to clients +/// - `max_retries`: Maximum number of retries for sending an event +/// - `base_delay`: Base delay duration for retries +/// - `metrics`: Metrics for monitoring +/// - `semaphore`: Semaphore to limit concurrency +/// # Notes +/// This function processes a batch of events, sending each event to the target with retry async fn process_batch( batch: &mut Vec>, batch_keys: &mut Vec, @@ -262,6 +304,7 @@ async fn process_batch( // Retry logic while retry_count < max_retries && !success { + // After sending successfully, the event in the storage is deleted synchronously. match target.send_from_store(key.clone()).await { Ok(_) => { info!("Successfully sent event for target: {}, Key: {}", target.name(), key.to_string()); diff --git a/crates/obs/src/telemetry.rs b/crates/obs/src/telemetry.rs index e2c5baf7..2aa2642c 100644 --- a/crates/obs/src/telemetry.rs +++ b/crates/obs/src/telemetry.rs @@ -39,9 +39,9 @@ use rustfs_config::{ ENV_OBS_LOG_DIRECTORY, ENV_OBS_LOG_FLUSH_MS, ENV_OBS_LOG_MESSAGE_CAPA, ENV_OBS_LOG_POOL_CAPA, }, }; -use rustfs_utils::{get_env_u64, get_env_usize, get_local_ip_with_default}; +use rustfs_utils::{get_env_opt_str, get_env_u64, get_env_usize, get_local_ip_with_default}; use smallvec::SmallVec; -use std::{borrow::Cow, env, fs, io::IsTerminal, time::Duration}; +use std::{borrow::Cow, fs, io::IsTerminal, time::Duration}; use tracing::info; use tracing_error::ErrorLayer; use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer}; @@ -574,8 +574,8 @@ pub(crate) fn init_telemetry(config: &OtelConfig) -> Result s, + Err(e) => { + warn!("Failed to create socket for {:?}: {}, falling back to IPv4", server_addr, e); + let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port()); + server_addr = ipv4_addr; + socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))? + } + }; + // If address is IPv6 try to enable dual-stack; on failure, switch to IPv4 socket. if server_addr.is_ipv6() { if let Err(e) = socket.set_only_v6(false) { - warn!("Failed to set IPV6_V6ONLY=false, falling back to IPv4-only: {}", e); - // Fallback to a new IPv4 socket if setting dual-stack fails. + warn!("Failed to set IPV6_V6ONLY=false, attempting IPv4 fallback: {}", e); let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port()); server_addr = ipv4_addr; socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?; @@ -140,8 +150,27 @@ pub async fn start_http_server( socket.set_reuse_address(true)?; // Set the socket to non-blocking before passing it to Tokio. socket.set_nonblocking(true)?; - socket.bind(&server_addr.into())?; - socket.listen(backlog)?; + + // Attempt bind; if bind fails for IPv6, try IPv4 fallback once more. + if let Err(bind_err) = socket.bind(&server_addr.into()) { + warn!("Failed to bind to {}: {}.", server_addr, bind_err); + if server_addr.is_ipv6() { + // Try IPv4 fallback + let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port()); + server_addr = ipv4_addr; + socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?; + socket.set_reuse_address(true)?; + socket.set_nonblocking(true)?; + socket.bind(&server_addr.into())?; + // [FIX] Ensure fallback socket is moved to listening state as well. + socket.listen(backlog)?; + } else { + return Err(bind_err); + } + } else { + // Listen on the socket when initial bind succeeded + socket.listen(backlog)?; + } TcpListener::from_std(socket.into())? }; From 80cfb4feab9c023a7a5e83ae7108f42d70353358 Mon Sep 17 00:00:00 2001 From: weisd Date: Mon, 22 Dec 2025 17:15:19 +0800 Subject: [PATCH 23/26] Add Disk Timeout and Health Check Functionality (#1196) Signed-off-by: weisd Co-authored-by: loverustfs --- Cargo.lock | 1 + crates/ahm/src/heal/manager.rs | 15 +- crates/ahm/src/scanner/data_scanner.rs | 4 +- crates/ahm/tests/data_usage_fallback_test.rs | 17 +- crates/ahm/tests/heal_integration_test.rs | 10 +- crates/ecstore/Cargo.toml | 1 + .../ecstore/src/cache_value/metacache_set.rs | 31 +- crates/ecstore/src/disk/disk_store.rs | 770 ++++++++++++ crates/ecstore/src/disk/local.rs | 84 +- crates/ecstore/src/disk/mod.rs | 10 +- crates/ecstore/src/rpc/peer_s3_client.rs | 375 ++++-- crates/ecstore/src/rpc/remote_disk.rs | 1064 +++++++++++------ crates/ecstore/src/set_disk.rs | 255 ++-- crates/ecstore/src/sets.rs | 2 +- crates/ecstore/src/store_init.rs | 14 +- crates/protos/src/lib.rs | 5 + crates/utils/src/string.rs | 8 + 17 files changed, 2017 insertions(+), 649 deletions(-) create mode 100644 crates/ecstore/src/disk/disk_store.rs diff --git a/Cargo.lock b/Cargo.lock index 08641555..ef5e6041 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7302,6 +7302,7 @@ dependencies = [ "tonic", "tower", "tracing", + "tracing-subscriber", "url", "urlencoding", "uuid", diff --git a/crates/ahm/src/heal/manager.rs b/crates/ahm/src/heal/manager.rs index 39c5f8fd..4e287e38 100644 --- a/crates/ahm/src/heal/manager.rs +++ b/crates/ahm/src/heal/manager.rs @@ -468,14 +468,17 @@ impl HealManager { let active_heals = self.active_heals.clone(); let cancel_token = self.cancel_token.clone(); let storage = self.storage.clone(); - - info!( - "start_auto_disk_scanner: Starting auto disk scanner with interval: {:?}", - config.read().await.heal_interval - ); + let mut duration = { + let config = config.read().await; + config.heal_interval + }; + if duration < Duration::from_secs(1) { + duration = Duration::from_secs(1); + } + info!("start_auto_disk_scanner: Starting auto disk scanner with interval: {:?}", duration); tokio::spawn(async move { - let mut interval = interval(config.read().await.heal_interval); + let mut interval = interval(duration); loop { tokio::select! { diff --git a/crates/ahm/src/scanner/data_scanner.rs b/crates/ahm/src/scanner/data_scanner.rs index 93ea5fec..eaf85255 100644 --- a/crates/ahm/src/scanner/data_scanner.rs +++ b/crates/ahm/src/scanner/data_scanner.rs @@ -30,7 +30,7 @@ use rustfs_ecstore::{ bucket::versioning::VersioningApi, bucket::versioning_sys::BucketVersioningSys, data_usage::{aggregate_local_snapshots, compute_bucket_usage, store_data_usage_in_backend}, - disk::{Disk, DiskAPI, DiskStore, RUSTFS_META_BUCKET, WalkDirOptions}, + disk::{DiskAPI, DiskStore, RUSTFS_META_BUCKET, WalkDirOptions}, set_disk::SetDisks, store_api::ObjectInfo, }; @@ -1977,7 +1977,7 @@ impl Scanner { } else { // Apply lifecycle actions if let Some(lifecycle_config) = &lifecycle_config { - if let Disk::Local(_local_disk) = &**disk { + if disk.is_local() { let vcfg = BucketVersioningSys::get(bucket).await.ok(); let mut scanner_item = ScannerItem { diff --git a/crates/ahm/tests/data_usage_fallback_test.rs b/crates/ahm/tests/data_usage_fallback_test.rs index 48fd5457..03a7cfe5 100644 --- a/crates/ahm/tests/data_usage_fallback_test.rs +++ b/crates/ahm/tests/data_usage_fallback_test.rs @@ -21,10 +21,11 @@ use rustfs_ecstore::bucket::metadata_sys::{BucketMetadataSys, GLOBAL_BucketMetad use rustfs_ecstore::endpoints::EndpointServerPools; use rustfs_ecstore::store::ECStore; use rustfs_ecstore::store_api::{ObjectIO, PutObjReader, StorageAPI}; -use std::sync::Arc; +use std::sync::{Arc, Once}; use tempfile::TempDir; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; +use tracing::Level; /// Build a minimal single-node ECStore over a temp directory and populate objects. async fn create_store_with_objects(count: usize) -> (TempDir, std::sync::Arc) { @@ -74,8 +75,22 @@ async fn create_store_with_objects(count: usize) -> (TempDir, std::sync::Arc, Arc, Arc)> = OnceLock::new(); static INIT: Once = Once::new(); -fn init_tracing() { +pub fn init_tracing() { INIT.call_once(|| { - let _ = tracing_subscriber::fmt::try_init(); + let _ = tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_timer(tracing_subscriber::fmt::time::UtcTime::rfc_3339()) + .with_thread_names(true) + .try_init(); }); } @@ -356,7 +360,7 @@ mod serial_tests { // Create heal manager with faster interval let cfg = HealConfig { - heal_interval: Duration::from_secs(2), + heal_interval: Duration::from_secs(1), ..Default::default() }; let heal_manager = HealManager::new(heal_storage.clone(), Some(cfg)); diff --git a/crates/ecstore/Cargo.toml b/crates/ecstore/Cargo.toml index b2cfda4d..bd021c19 100644 --- a/crates/ecstore/Cargo.toml +++ b/crates/ecstore/Cargo.toml @@ -113,6 +113,7 @@ faster-hex = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } criterion = { workspace = true, features = ["html_reports"] } temp-env = { workspace = true } +tracing-subscriber = { workspace = true } [build-dependencies] shadow-rs = { workspace = true, features = ["build", "metadata"] } diff --git a/crates/ecstore/src/cache_value/metacache_set.rs b/crates/ecstore/src/cache_value/metacache_set.rs index 621ffea7..b71b2c30 100644 --- a/crates/ecstore/src/cache_value/metacache_set.rs +++ b/crates/ecstore/src/cache_value/metacache_set.rs @@ -16,7 +16,7 @@ use crate::disk::error::DiskError; use crate::disk::{self, DiskAPI, DiskStore, WalkDirOptions}; use futures::future::join_all; use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetacacheReader, is_io_eof}; -use std::{future::Future, pin::Pin, sync::Arc}; +use std::{future::Future, pin::Pin}; use tokio::spawn; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; @@ -71,14 +71,14 @@ pub async fn list_path_raw(rx: CancellationToken, opts: ListPathRawOptions) -> d let mut jobs: Vec>> = Vec::new(); let mut readers = Vec::with_capacity(opts.disks.len()); - let fds = Arc::new(opts.fallback_disks.clone()); + let fds = opts.fallback_disks.iter().flatten().cloned().collect::>(); let cancel_rx = CancellationToken::new(); for disk in opts.disks.iter() { let opdisk = disk.clone(); let opts_clone = opts.clone(); - let fds_clone = fds.clone(); + let mut fds_clone = fds.clone(); let cancel_rx_clone = cancel_rx.clone(); let (rd, mut wr) = tokio::io::duplex(64); readers.push(MetacacheReader::new(rd)); @@ -113,21 +113,20 @@ pub async fn list_path_raw(rx: CancellationToken, opts: ListPathRawOptions) -> d } while need_fallback { - // warn!("list_path_raw: while need_fallback start"); - let disk = match fds_clone.iter().find(|d| d.is_some()) { - Some(d) => { - if let Some(disk) = d.clone() { - disk - } else { - warn!("list_path_raw: fallback disk is none"); - break; - } - } - None => { - warn!("list_path_raw: fallback disk is none2"); - break; + let disk_op = { + if fds_clone.is_empty() { + None + } else { + let disk = fds_clone.remove(0); + if disk.is_online().await { Some(disk.clone()) } else { None } } }; + + let Some(disk) = disk_op else { + warn!("list_path_raw: fallback disk is none"); + break; + }; + match disk .as_ref() .walk_dir( diff --git a/crates/ecstore/src/disk/disk_store.rs b/crates/ecstore/src/disk/disk_store.rs new file mode 100644 index 00000000..3ccd8c7d --- /dev/null +++ b/crates/ecstore/src/disk/disk_store.rs @@ -0,0 +1,770 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::disk::{ + CheckPartsResp, DeleteOptions, DiskAPI, DiskError, DiskInfo, DiskInfoOptions, DiskLocation, Endpoint, Error, + FileInfoVersions, ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, Result, UpdateMetadataOpts, VolumeInfo, + WalkDirOptions, local::LocalDisk, +}; +use bytes::Bytes; +use rustfs_filemeta::{FileInfo, ObjectPartInfo, RawFileInfo}; +use rustfs_utils::string::parse_bool_with_default; +use std::{ + path::PathBuf, + sync::{ + Arc, + atomic::{AtomicI64, AtomicU32, Ordering}, + }, + time::Duration, +}; +use tokio::{sync::RwLock, time}; +use tokio_util::sync::CancellationToken; +use tracing::{debug, info, warn}; +use uuid::Uuid; + +/// Disk health status constants +const DISK_HEALTH_OK: u32 = 0; +const DISK_HEALTH_FAULTY: u32 = 1; + +pub const ENV_RUSTFS_DRIVE_ACTIVE_MONITORING: &str = "RUSTFS_DRIVE_ACTIVE_MONITORING"; +pub const ENV_RUSTFS_DRIVE_MAX_TIMEOUT_DURATION: &str = "RUSTFS_DRIVE_MAX_TIMEOUT_DURATION"; +pub const CHECK_EVERY: Duration = Duration::from_secs(15); +pub const SKIP_IF_SUCCESS_BEFORE: Duration = Duration::from_secs(5); +pub const CHECK_TIMEOUT_DURATION: Duration = Duration::from_secs(5); + +lazy_static::lazy_static! { + static ref TEST_OBJ: String = format!("health-check-{}", Uuid::new_v4()); + static ref TEST_DATA: Bytes = Bytes::from(vec![42u8; 2048]); + static ref TEST_BUCKET: String = ".rustfs.sys/tmp".to_string(); +} + +pub fn get_max_timeout_duration() -> Duration { + std::env::var(ENV_RUSTFS_DRIVE_MAX_TIMEOUT_DURATION) + .map(|v| Duration::from_secs(v.parse::().unwrap_or(30))) + .unwrap_or(Duration::from_secs(30)) +} + +/// DiskHealthTracker tracks the health status of a disk. +/// Similar to Go's diskHealthTracker. +#[derive(Debug)] +pub struct DiskHealthTracker { + /// Atomic timestamp of last successful operation + pub last_success: AtomicI64, + /// Atomic timestamp of last operation start + pub last_started: AtomicI64, + /// Atomic disk status (OK or Faulty) + pub status: AtomicU32, + /// Atomic number of waiting operations + pub waiting: AtomicU32, +} + +impl DiskHealthTracker { + /// Create a new disk health tracker + pub fn new() -> Self { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + + Self { + last_success: AtomicI64::new(now), + last_started: AtomicI64::new(now), + status: AtomicU32::new(DISK_HEALTH_OK), + waiting: AtomicU32::new(0), + } + } + + /// Log a successful operation + pub fn log_success(&self) { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + self.last_success.store(now, Ordering::Relaxed); + } + + /// Check if disk is faulty + pub fn is_faulty(&self) -> bool { + self.status.load(Ordering::Relaxed) == DISK_HEALTH_FAULTY + } + + /// Set disk as faulty + pub fn set_faulty(&self) { + self.status.store(DISK_HEALTH_FAULTY, Ordering::Relaxed); + } + + /// Set disk as OK + pub fn set_ok(&self) { + self.status.store(DISK_HEALTH_OK, Ordering::Relaxed); + } + + pub fn swap_ok_to_faulty(&self) -> bool { + self.status + .compare_exchange(DISK_HEALTH_OK, DISK_HEALTH_FAULTY, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + } + + /// Increment waiting operations counter + pub fn increment_waiting(&self) { + self.waiting.fetch_add(1, Ordering::Relaxed); + } + + /// Decrement waiting operations counter + pub fn decrement_waiting(&self) { + self.waiting.fetch_sub(1, Ordering::Relaxed); + } + + /// Get waiting operations count + pub fn waiting_count(&self) -> u32 { + self.waiting.load(Ordering::Relaxed) + } + + /// Get last success timestamp + pub fn last_success(&self) -> i64 { + self.last_success.load(Ordering::Relaxed) + } +} + +impl Default for DiskHealthTracker { + fn default() -> Self { + Self::new() + } +} + +/// Health check context key for tracking disk operations +#[derive(Debug, Clone)] +struct HealthDiskCtxKey; + +#[derive(Debug)] +struct HealthDiskCtxValue { + last_success: Arc, +} + +impl HealthDiskCtxValue { + fn log_success(&self) { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + self.last_success.store(now, Ordering::Relaxed); + } +} + +/// LocalDiskWrapper wraps a DiskStore with health tracking capabilities. +/// This is similar to Go's xlStorageDiskIDCheck. +#[derive(Debug, Clone)] +pub struct LocalDiskWrapper { + /// The underlying disk store + disk: Arc, + /// Health tracker + health: Arc, + /// Whether health checking is enabled + health_check: bool, + /// Cancellation token for monitoring tasks + cancel_token: CancellationToken, + /// Disk ID for stale checking + disk_id: Arc>>, +} + +impl LocalDiskWrapper { + /// Create a new LocalDiskWrapper + pub fn new(disk: Arc, health_check: bool) -> Self { + // Check environment variable for health check override + // Default to true if not set, but only enable if both param and env are true + let env_health_check = std::env::var(ENV_RUSTFS_DRIVE_ACTIVE_MONITORING) + .map(|v| parse_bool_with_default(&v, true)) + .unwrap_or(true); + + let ret = Self { + disk, + health: Arc::new(DiskHealthTracker::new()), + health_check: health_check && env_health_check, + cancel_token: CancellationToken::new(), + disk_id: Arc::new(RwLock::new(None)), + }; + + ret.start_monitoring(); + + ret + } + + pub fn get_disk(&self) -> Arc { + self.disk.clone() + } + + /// Start the disk monitoring if health_check is enabled + pub fn start_monitoring(&self) { + if self.health_check { + let health = Arc::clone(&self.health); + let cancel_token = self.cancel_token.clone(); + let disk = Arc::clone(&self.disk); + + tokio::spawn(async move { + Self::monitor_disk_writable(disk, health, cancel_token).await; + }); + } + } + + /// Stop the disk monitoring + pub async fn stop_monitoring(&self) { + self.cancel_token.cancel(); + } + + /// Monitor disk writability periodically + async fn monitor_disk_writable(disk: Arc, health: Arc, cancel_token: CancellationToken) { + // TODO: config interval + + let mut interval = time::interval(CHECK_EVERY); + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + return; + } + _ = interval.tick() => { + if cancel_token.is_cancelled() { + return; + } + + if health.status.load(Ordering::Relaxed) != DISK_HEALTH_OK { + continue; + } + + let last_success_nanos = health.last_success.load(Ordering::Relaxed); + let elapsed = Duration::from_nanos( + (std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64 - last_success_nanos) as u64 + ); + + if elapsed < SKIP_IF_SUCCESS_BEFORE { + continue; + } + + tokio::time::sleep(Duration::from_secs(1)).await; + + + debug!("health check: performing health check"); + if Self::perform_health_check(disk.clone(), &TEST_BUCKET, &TEST_OBJ, &TEST_DATA, true, CHECK_TIMEOUT_DURATION).await.is_err() && health.swap_ok_to_faulty() { + // Health check failed, disk is considered faulty + + health.increment_waiting(); // Balance the increment from failed operation + + let health_clone = Arc::clone(&health); + let disk_clone = disk.clone(); + let cancel_clone = cancel_token.clone(); + + tokio::spawn(async move { + Self::monitor_disk_status(disk_clone, health_clone, cancel_clone).await; + }); + } + } + } + } + } + + /// Perform a health check by writing and reading a test file + async fn perform_health_check( + disk: Arc, + test_bucket: &str, + test_filename: &str, + test_data: &Bytes, + check_faulty_only: bool, + timeout_duration: Duration, + ) -> Result<()> { + // Perform health check with timeout + let health_check_result = tokio::time::timeout(timeout_duration, async { + // Try to write test data + disk.write_all(test_bucket, test_filename, test_data.clone()).await?; + + // Try to read back the data + let read_data = disk.read_all(test_bucket, test_filename).await?; + + // Verify data integrity + if read_data.len() != test_data.len() { + warn!( + "health check: test file data length mismatch: expected {} bytes, got {}", + test_data.len(), + read_data.len() + ); + if check_faulty_only { + return Ok(()); + } + return Err(DiskError::FaultyDisk); + } + + // Clean up + disk.delete( + test_bucket, + test_filename, + DeleteOptions { + recursive: false, + immediate: false, + undo_write: false, + old_data_dir: None, + }, + ) + .await?; + + Ok(()) + }) + .await; + + match health_check_result { + Ok(result) => match result { + Ok(()) => Ok(()), + Err(e) => { + debug!("health check: failed: {:?}", e); + + if e == DiskError::FaultyDisk { + return Err(e); + } + + if check_faulty_only { Ok(()) } else { Err(e) } + } + }, + Err(_) => { + // Timeout occurred + warn!("health check: timeout after {:?}", timeout_duration); + Err(DiskError::FaultyDisk) + } + } + } + + /// Monitor disk status and try to bring it back online + async fn monitor_disk_status(disk: Arc, health: Arc, cancel_token: CancellationToken) { + const CHECK_EVERY: Duration = Duration::from_secs(5); + + let mut interval = time::interval(CHECK_EVERY); + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + return; + } + _ = interval.tick() => { + if cancel_token.is_cancelled() { + return; + } + + match Self::perform_health_check(disk.clone(), &TEST_BUCKET, &TEST_OBJ, &TEST_DATA, false, CHECK_TIMEOUT_DURATION).await { + Ok(_) => { + info!("Disk {} is back online", disk.to_string()); + health.set_ok(); + health.decrement_waiting(); + return; + } + Err(e) => { + warn!("Disk {} still faulty: {:?}", disk.to_string(), e); + } + } + } + } + } + } + + async fn check_id(&self, want_id: Option) -> Result<()> { + if want_id.is_none() { + return Ok(()); + } + + let stored_disk_id = self.disk.get_disk_id().await?; + + if stored_disk_id != want_id { + return Err(Error::other(format!("Disk ID mismatch wanted {:?}, got {:?}", want_id, stored_disk_id))); + } + + Ok(()) + } + + /// Check if disk ID is stale + async fn check_disk_stale(&self) -> Result<()> { + let Some(current_disk_id) = *self.disk_id.read().await else { + return Ok(()); + }; + + let stored_disk_id = match self.disk.get_disk_id().await? { + Some(id) => id, + None => return Ok(()), // Empty disk ID is allowed during initialization + }; + + if current_disk_id != stored_disk_id { + return Err(DiskError::DiskNotFound); + } + + Ok(()) + } + + /// Set the disk ID + pub async fn set_disk_id_internal(&self, id: Option) -> Result<()> { + let mut disk_id = self.disk_id.write().await; + *disk_id = id; + Ok(()) + } + + /// Get the current disk ID + pub async fn get_current_disk_id(&self) -> Option { + *self.disk_id.read().await + } + + /// Track disk health for an operation. + /// This method should wrap disk operations to ensure health checking. + pub async fn track_disk_health(&self, operation: F, timeout_duration: Duration) -> Result + where + F: FnOnce() -> Fut, + Fut: std::future::Future>, + { + // Check if disk is faulty + if self.health.is_faulty() { + warn!("disk {} health is faulty, returning error", self.to_string()); + return Err(DiskError::FaultyDisk); + } + + // Check if disk is stale + self.check_disk_stale().await?; + + // Record operation start + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + self.health.last_started.store(now, Ordering::Relaxed); + self.health.increment_waiting(); + + if timeout_duration == Duration::ZERO { + let result = operation().await; + self.health.decrement_waiting(); + if result.is_ok() { + self.health.log_success(); + } + return result; + } + // Execute the operation with timeout + let result = tokio::time::timeout(timeout_duration, operation()).await; + + match result { + Ok(operation_result) => { + // Log success and decrement waiting counter + if operation_result.is_ok() { + self.health.log_success(); + } + self.health.decrement_waiting(); + operation_result + } + Err(_) => { + // Timeout occurred, mark disk as potentially faulty and decrement waiting counter + self.health.decrement_waiting(); + warn!("disk operation timeout after {:?}", timeout_duration); + Err(DiskError::other(format!("disk operation timeout after {:?}", timeout_duration))) + } + } + } +} + +#[async_trait::async_trait] +impl DiskAPI for LocalDiskWrapper { + fn to_string(&self) -> String { + self.disk.to_string() + } + + async fn is_online(&self) -> bool { + let Ok(Some(disk_id)) = self.disk.get_disk_id().await else { + return false; + }; + + let Some(current_disk_id) = *self.disk_id.read().await else { + return false; + }; + + current_disk_id == disk_id + } + + fn is_local(&self) -> bool { + self.disk.is_local() + } + + fn host_name(&self) -> String { + self.disk.host_name() + } + + fn endpoint(&self) -> Endpoint { + self.disk.endpoint() + } + + async fn close(&self) -> Result<()> { + self.stop_monitoring().await; + self.disk.close().await + } + + async fn get_disk_id(&self) -> Result> { + self.disk.get_disk_id().await + } + + async fn set_disk_id(&self, id: Option) -> Result<()> { + self.set_disk_id_internal(id).await + } + + fn path(&self) -> PathBuf { + self.disk.path() + } + + fn get_disk_location(&self) -> DiskLocation { + self.disk.get_disk_location() + } + + async fn disk_info(&self, opts: &DiskInfoOptions) -> Result { + if opts.noop && opts.metrics { + let mut info = DiskInfo::default(); + // Add health metrics + info.metrics.total_waiting = self.health.waiting_count(); + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + return Ok(info); + } + + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + + let result = self.disk.disk_info(opts).await?; + + if let Some(current_disk_id) = *self.disk_id.read().await + && Some(current_disk_id) != result.id + { + return Err(DiskError::DiskNotFound); + }; + + Ok(result) + } + + async fn make_volume(&self, volume: &str) -> Result<()> { + self.track_disk_health(|| async { self.disk.make_volume(volume).await }, get_max_timeout_duration()) + .await + } + + async fn make_volumes(&self, volumes: Vec<&str>) -> Result<()> { + self.track_disk_health(|| async { self.disk.make_volumes(volumes).await }, get_max_timeout_duration()) + .await + } + + async fn list_volumes(&self) -> Result> { + self.track_disk_health(|| async { self.disk.list_volumes().await }, Duration::ZERO) + .await + } + + async fn stat_volume(&self, volume: &str) -> Result { + self.track_disk_health(|| async { self.disk.stat_volume(volume).await }, get_max_timeout_duration()) + .await + } + + async fn delete_volume(&self, volume: &str) -> Result<()> { + self.track_disk_health(|| async { self.disk.delete_volume(volume).await }, Duration::ZERO) + .await + } + + async fn walk_dir(&self, opts: WalkDirOptions, wr: &mut W) -> Result<()> { + self.track_disk_health(|| async { self.disk.walk_dir(opts, wr).await }, Duration::ZERO) + .await + } + + async fn delete_version( + &self, + volume: &str, + path: &str, + fi: FileInfo, + force_del_marker: bool, + opts: DeleteOptions, + ) -> Result<()> { + self.track_disk_health( + || async { self.disk.delete_version(volume, path, fi, force_del_marker, opts).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn delete_versions(&self, volume: &str, versions: Vec, opts: DeleteOptions) -> Vec> { + // Check if disk is faulty before proceeding + if self.health.is_faulty() { + return vec![Some(DiskError::FaultyDisk); versions.len()]; + } + + // Check if disk is stale + if let Err(e) = self.check_disk_stale().await { + return vec![Some(e); versions.len()]; + } + + // Record operation start + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + self.health.last_started.store(now, Ordering::Relaxed); + self.health.increment_waiting(); + + // Execute the operation + let result = self.disk.delete_versions(volume, versions, opts).await; + + self.health.decrement_waiting(); + let has_err = result.iter().any(|e| e.is_some()); + if !has_err { + // Log success and decrement waiting counter + self.health.log_success(); + } + + result + } + + async fn delete_paths(&self, volume: &str, paths: &[String]) -> Result<()> { + self.track_disk_health(|| async { self.disk.delete_paths(volume, paths).await }, get_max_timeout_duration()) + .await + } + + async fn write_metadata(&self, org_volume: &str, volume: &str, path: &str, fi: FileInfo) -> Result<()> { + self.track_disk_health( + || async { self.disk.write_metadata(org_volume, volume, path, fi).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn update_metadata(&self, volume: &str, path: &str, fi: FileInfo, opts: &UpdateMetadataOpts) -> Result<()> { + self.track_disk_health( + || async { self.disk.update_metadata(volume, path, fi, opts).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn read_version( + &self, + org_volume: &str, + volume: &str, + path: &str, + version_id: &str, + opts: &ReadOptions, + ) -> Result { + self.track_disk_health( + || async { self.disk.read_version(org_volume, volume, path, version_id, opts).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn read_xl(&self, volume: &str, path: &str, read_data: bool) -> Result { + self.track_disk_health(|| async { self.disk.read_xl(volume, path, read_data).await }, get_max_timeout_duration()) + .await + } + + async fn rename_data( + &self, + src_volume: &str, + src_path: &str, + fi: FileInfo, + dst_volume: &str, + dst_path: &str, + ) -> Result { + self.track_disk_health( + || async { self.disk.rename_data(src_volume, src_path, fi, dst_volume, dst_path).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn list_dir(&self, origvolume: &str, volume: &str, dir_path: &str, count: i32) -> Result> { + self.track_disk_health( + || async { self.disk.list_dir(origvolume, volume, dir_path, count).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn read_file(&self, volume: &str, path: &str) -> Result { + self.track_disk_health(|| async { self.disk.read_file(volume, path).await }, get_max_timeout_duration()) + .await + } + + async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result { + self.track_disk_health( + || async { self.disk.read_file_stream(volume, path, offset, length).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn append_file(&self, volume: &str, path: &str) -> Result { + self.track_disk_health(|| async { self.disk.append_file(volume, path).await }, Duration::ZERO) + .await + } + + async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: i64) -> Result { + self.track_disk_health( + || async { self.disk.create_file(origvolume, volume, path, file_size).await }, + Duration::ZERO, + ) + .await + } + + async fn rename_file(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str) -> Result<()> { + self.track_disk_health( + || async { self.disk.rename_file(src_volume, src_path, dst_volume, dst_path).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Bytes) -> Result<()> { + self.track_disk_health( + || async { self.disk.rename_part(src_volume, src_path, dst_volume, dst_path, meta).await }, + get_max_timeout_duration(), + ) + .await + } + + async fn delete(&self, volume: &str, path: &str, opt: DeleteOptions) -> Result<()> { + self.track_disk_health(|| async { self.disk.delete(volume, path, opt).await }, get_max_timeout_duration()) + .await + } + + async fn verify_file(&self, volume: &str, path: &str, fi: &FileInfo) -> Result { + self.track_disk_health(|| async { self.disk.verify_file(volume, path, fi).await }, Duration::ZERO) + .await + } + + async fn check_parts(&self, volume: &str, path: &str, fi: &FileInfo) -> Result { + self.track_disk_health(|| async { self.disk.check_parts(volume, path, fi).await }, Duration::ZERO) + .await + } + + async fn read_parts(&self, bucket: &str, paths: &[String]) -> Result> { + self.track_disk_health(|| async { self.disk.read_parts(bucket, paths).await }, Duration::ZERO) + .await + } + + async fn read_multiple(&self, req: ReadMultipleReq) -> Result> { + self.track_disk_health(|| async { self.disk.read_multiple(req).await }, Duration::ZERO) + .await + } + + async fn write_all(&self, volume: &str, path: &str, data: Bytes) -> Result<()> { + self.track_disk_health(|| async { self.disk.write_all(volume, path, data).await }, get_max_timeout_duration()) + .await + } + + async fn read_all(&self, volume: &str, path: &str) -> Result { + self.track_disk_health(|| async { self.disk.read_all(volume, path).await }, get_max_timeout_duration()) + .await + } +} diff --git a/crates/ecstore/src/disk/local.rs b/crates/ecstore/src/disk/local.rs index 5ed851e6..42444f7e 100644 --- a/crates/ecstore/src/disk/local.rs +++ b/crates/ecstore/src/disk/local.rs @@ -69,7 +69,7 @@ use tokio::sync::RwLock; use tracing::{debug, error, info, warn}; use uuid::Uuid; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FormatInfo { pub id: Option, pub data: Bytes, @@ -77,16 +77,6 @@ pub struct FormatInfo { pub last_check: Option, } -impl FormatInfo { - pub fn last_check_valid(&self) -> bool { - let now = OffsetDateTime::now_utc(); - self.file_info.is_some() - && self.id.is_some() - && self.last_check.is_some() - && (now.unix_timestamp() - self.last_check.unwrap().unix_timestamp() <= 1) - } -} - /// A helper enum to handle internal buffer types for writing data. pub enum InternalBuf<'a> { Ref(&'a [u8]), @@ -185,7 +175,7 @@ impl LocalDisk { }; let root_clone = root.clone(); let update_fn: UpdateFn = Box::new(move || { - let disk_id = id.map_or("".to_string(), |id| id.to_string()); + let disk_id = id; let root = root_clone.clone(); Box::pin(async move { match get_disk_info(root.clone()).await { @@ -200,7 +190,7 @@ impl LocalDisk { minor: info.minor, fs_type: info.fstype, root_disk: root, - id: disk_id.to_string(), + id: disk_id, ..Default::default() }; // if root { @@ -1295,7 +1285,7 @@ impl DiskAPI for LocalDisk { } #[tracing::instrument(skip(self))] async fn is_online(&self) -> bool { - self.check_format_json().await.is_ok() + true } #[tracing::instrument(skip(self))] @@ -1342,24 +1332,40 @@ impl DiskAPI for LocalDisk { #[tracing::instrument(level = "debug", skip(self))] async fn get_disk_id(&self) -> Result> { - let mut format_info = self.format_info.write().await; + let format_info = { + let format_info = self.format_info.read().await; + format_info.clone() + }; let id = format_info.id; - if format_info.last_check_valid() { - return Ok(id); + // if format_info.last_check_valid() { + // return Ok(id); + // } + + if format_info.file_info.is_some() && id.is_some() { + // check last check time + if let Some(last_check) = format_info.last_check { + if last_check.unix_timestamp() + 1 < OffsetDateTime::now_utc().unix_timestamp() { + return Ok(id); + } + } } let file_meta = self.check_format_json().await?; if let Some(file_info) = &format_info.file_info { if super::fs::same_file(&file_meta, file_info) { + let mut format_info = self.format_info.write().await; format_info.last_check = Some(OffsetDateTime::now_utc()); + drop(format_info); return Ok(id); } } + debug!("get_disk_id: read format.json"); + let b = fs::read(&self.format_path).await.map_err(to_unformatted_disk_error)?; let fm = FormatV3::try_from(b.as_slice()).map_err(|e| { @@ -1375,20 +1381,19 @@ impl DiskAPI for LocalDisk { return Err(DiskError::InconsistentDisk); } + let mut format_info = self.format_info.write().await; format_info.id = Some(disk_id); format_info.file_info = Some(file_meta); format_info.data = b.into(); format_info.last_check = Some(OffsetDateTime::now_utc()); + drop(format_info); Ok(Some(disk_id)) } #[tracing::instrument(skip(self))] - async fn set_disk_id(&self, id: Option) -> Result<()> { + async fn set_disk_id(&self, _id: Option) -> Result<()> { // No setup is required locally - // TODO: add check_id_store - let mut format_info = self.format_info.write().await; - format_info.id = id; Ok(()) } @@ -2438,6 +2443,10 @@ impl DiskAPI for LocalDisk { info.endpoint = self.endpoint.to_string(); info.scanning = self.scanning.load(Ordering::SeqCst) == 1; + if info.id.is_none() { + info.id = self.get_disk_id().await.unwrap_or(None); + } + Ok(info) } } @@ -2705,39 +2714,6 @@ mod test { } } - #[tokio::test] - async fn test_format_info_last_check_valid() { - let now = OffsetDateTime::now_utc(); - - // Valid format info - let valid_format_info = FormatInfo { - id: Some(Uuid::new_v4()), - data: vec![1, 2, 3].into(), - file_info: Some(fs::metadata("../../../..").await.unwrap()), - last_check: Some(now), - }; - assert!(valid_format_info.last_check_valid()); - - // Invalid format info (missing id) - let invalid_format_info = FormatInfo { - id: None, - data: vec![1, 2, 3].into(), - file_info: Some(fs::metadata("../../../..").await.unwrap()), - last_check: Some(now), - }; - assert!(!invalid_format_info.last_check_valid()); - - // Invalid format info (old timestamp) - let old_time = OffsetDateTime::now_utc() - time::Duration::seconds(10); - let old_format_info = FormatInfo { - id: Some(Uuid::new_v4()), - data: vec![1, 2, 3].into(), - file_info: Some(fs::metadata("../../../..").await.unwrap()), - last_check: Some(old_time), - }; - assert!(!old_format_info.last_check_valid()); - } - #[tokio::test] async fn test_read_file_exists() { let test_file = "./test_read_exists.txt"; diff --git a/crates/ecstore/src/disk/mod.rs b/crates/ecstore/src/disk/mod.rs index 5f419380..fdba21c9 100644 --- a/crates/ecstore/src/disk/mod.rs +++ b/crates/ecstore/src/disk/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod disk_store; pub mod endpoint; pub mod error; pub mod error_conv; @@ -30,6 +31,7 @@ pub const FORMAT_CONFIG_FILE: &str = "format.json"; pub const STORAGE_FORMAT_FILE: &str = "xl.meta"; pub const STORAGE_FORMAT_FILE_BACKUP: &str = "xl.meta.bkp"; +use crate::disk::disk_store::LocalDiskWrapper; use crate::rpc::RemoteDisk; use bytes::Bytes; use endpoint::Endpoint; @@ -51,7 +53,7 @@ pub type FileWriter = Box; #[derive(Debug)] pub enum Disk { - Local(Box), + Local(Box), Remote(Box), } @@ -398,7 +400,7 @@ impl DiskAPI for Disk { pub async fn new_disk(ep: &Endpoint, opt: &DiskOption) -> Result { if ep.is_local { let s = LocalDisk::new(ep, opt.cleanup).await?; - Ok(Arc::new(Disk::Local(Box::new(s)))) + Ok(Arc::new(Disk::Local(Box::new(LocalDiskWrapper::new(Arc::new(s), opt.health_check))))) } else { let remote_disk = RemoteDisk::new(ep, opt).await?; Ok(Arc::new(Disk::Remote(Box::new(remote_disk)))) @@ -534,7 +536,7 @@ pub struct DiskInfo { pub scanning: bool, pub endpoint: String, pub mount_path: String, - pub id: String, + pub id: Option, pub rotational: bool, pub metrics: DiskMetrics, pub error: String, @@ -1015,7 +1017,7 @@ mod tests { let endpoint = Endpoint::try_from(test_dir).unwrap(); let local_disk = LocalDisk::new(&endpoint, false).await.unwrap(); - let disk = Disk::Local(Box::new(local_disk)); + let disk = Disk::Local(Box::new(LocalDiskWrapper::new(Arc::new(local_disk), false))); // Test basic methods assert!(disk.is_local()); diff --git a/crates/ecstore/src/rpc/peer_s3_client.rs b/crates/ecstore/src/rpc/peer_s3_client.rs index ac0a035c..fe251a3e 100644 --- a/crates/ecstore/src/rpc/peer_s3_client.rs +++ b/crates/ecstore/src/rpc/peer_s3_client.rs @@ -13,14 +13,18 @@ // limitations under the License. use crate::bucket::metadata_sys; +use crate::disk::error::DiskError; use crate::disk::error::{Error, Result}; use crate::disk::error_reduce::{BUCKET_OP_IGNORED_ERRS, is_all_buckets_not_found, reduce_write_quorum_errs}; -use crate::disk::{DiskAPI, DiskStore}; +use crate::disk::{DiskAPI, DiskStore, disk_store::get_max_timeout_duration}; use crate::global::GLOBAL_LOCAL_DISK_MAP; use crate::store::all_local_disk; use crate::store_utils::is_reserved_or_invalid_bucket; use crate::{ - disk::{self, VolumeInfo}, + disk::{ + self, VolumeInfo, + disk_store::{CHECK_EVERY, CHECK_TIMEOUT_DURATION, DiskHealthTracker}, + }, endpoints::{EndpointServerPools, Node}, store_api::{BucketInfo, BucketOptions, DeleteBucketOptions, MakeBucketOptions}, }; @@ -32,10 +36,11 @@ use rustfs_protos::node_service_time_out_client; use rustfs_protos::proto_gen::node_service::{ DeleteBucketRequest, GetBucketInfoRequest, HealBucketRequest, ListBucketRequest, MakeBucketRequest, }; -use std::{collections::HashMap, fmt::Debug, sync::Arc}; -use tokio::sync::RwLock; +use std::{collections::HashMap, fmt::Debug, sync::Arc, time::Duration}; +use tokio::{net::TcpStream, sync::RwLock, time}; +use tokio_util::sync::CancellationToken; use tonic::Request; -use tracing::info; +use tracing::{debug, info, warn}; type Client = Arc>; @@ -559,16 +564,160 @@ pub struct RemotePeerS3Client { pub node: Option, pub pools: Option>, addr: String, + /// Health tracker for connection monitoring + health: Arc, + /// Cancellation token for monitoring tasks + cancel_token: CancellationToken, } impl RemotePeerS3Client { pub fn new(node: Option, pools: Option>) -> Self { let addr = node.as_ref().map(|v| v.url.to_string()).unwrap_or_default().to_string(); - Self { node, pools, addr } + let client = Self { + node, + pools, + addr, + health: Arc::new(DiskHealthTracker::new()), + cancel_token: CancellationToken::new(), + }; + + // Start health monitoring + client.start_health_monitoring(); + + client } + pub fn get_addr(&self) -> String { self.addr.clone() } + + /// Start health monitoring for the remote peer + fn start_health_monitoring(&self) { + let health = Arc::clone(&self.health); + let cancel_token = self.cancel_token.clone(); + let addr = self.addr.clone(); + + tokio::spawn(async move { + Self::monitor_remote_peer_health(addr, health, cancel_token).await; + }); + } + + /// Monitor remote peer health periodically + async fn monitor_remote_peer_health(addr: String, health: Arc, cancel_token: CancellationToken) { + let mut interval = time::interval(CHECK_EVERY); + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + debug!("Health monitoring cancelled for remote peer: {}", addr); + return; + } + _ = interval.tick() => { + if cancel_token.is_cancelled() { + return; + } + + // Skip health check if peer is already marked as faulty + if health.is_faulty() { + continue; + } + + // Perform basic connectivity check + if Self::perform_connectivity_check(&addr).await.is_err() && health.swap_ok_to_faulty() { + warn!("Remote peer health check failed for {}: marking as faulty", addr); + + // Start recovery monitoring + let health_clone = Arc::clone(&health); + let addr_clone = addr.clone(); + let cancel_clone = cancel_token.clone(); + + tokio::spawn(async move { + Self::monitor_remote_peer_recovery(addr_clone, health_clone, cancel_clone).await; + }); + } + } + } + } + } + + /// Monitor remote peer recovery and mark as healthy when recovered + async fn monitor_remote_peer_recovery(addr: String, health: Arc, cancel_token: CancellationToken) { + let mut interval = time::interval(Duration::from_secs(5)); // Check every 5 seconds + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + return; + } + _ = interval.tick() => { + if Self::perform_connectivity_check(&addr).await.is_ok() { + info!("Remote peer recovered: {}", addr); + health.set_ok(); + return; + } + } + } + } + } + + /// Perform basic connectivity check for remote peer + async fn perform_connectivity_check(addr: &str) -> Result<()> { + use tokio::time::timeout; + + let url = url::Url::parse(addr).map_err(|e| Error::other(format!("Invalid URL: {}", e)))?; + + let Some(host) = url.host_str() else { + return Err(Error::other("No host in URL".to_string())); + }; + + let port = url.port_or_known_default().unwrap_or(80); + + // Try to establish TCP connection + match timeout(CHECK_TIMEOUT_DURATION, TcpStream::connect((host, port))).await { + Ok(Ok(_)) => Ok(()), + _ => Err(Error::other(format!("Cannot connect to {}:{}", host, port))), + } + } + + /// Execute operation with timeout and health tracking + async fn execute_with_timeout(&self, operation: F, timeout_duration: Duration) -> Result + where + F: FnOnce() -> Fut, + Fut: std::future::Future>, + { + // Check if peer is faulty + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + + // Record operation start + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + self.health.last_started.store(now, std::sync::atomic::Ordering::Relaxed); + self.health.increment_waiting(); + + // Execute operation with timeout + let result = time::timeout(timeout_duration, operation()).await; + + match result { + Ok(operation_result) => { + // Log success and decrement waiting counter + if operation_result.is_ok() { + self.health.log_success(); + } + self.health.decrement_waiting(); + operation_result + } + Err(_) => { + // Timeout occurred, mark peer as potentially faulty + self.health.decrement_waiting(); + warn!("Remote peer operation timeout after {:?}", timeout_duration); + Err(Error::other(format!("Remote peer operation timeout after {:?}", timeout_duration))) + } + } + } } #[async_trait] @@ -578,115 +727,145 @@ impl PeerS3Client for RemotePeerS3Client { } async fn heal_bucket(&self, bucket: &str, opts: &HealOpts) -> Result { - let options: String = serde_json::to_string(opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(HealBucketRequest { - bucket: bucket.to_string(), - options, - }); - let response = client.heal_bucket(request).await?.into_inner(); - if !response.success { - return if let Some(err) = response.error { - Err(err.into()) - } else { - Err(Error::other("")) - }; - } + self.execute_with_timeout( + || async { + let options: String = serde_json::to_string(opts)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(HealBucketRequest { + bucket: bucket.to_string(), + options, + }); + let response = client.heal_bucket(request).await?.into_inner(); + if !response.success { + return if let Some(err) = response.error { + Err(err.into()) + } else { + Err(Error::other("")) + }; + } - Ok(HealResultItem { - heal_item_type: HealItemType::Bucket.to_string(), - bucket: bucket.to_string(), - set_count: 0, - ..Default::default() - }) + Ok(HealResultItem { + heal_item_type: HealItemType::Bucket.to_string(), + bucket: bucket.to_string(), + set_count: 0, + ..Default::default() + }) + }, + get_max_timeout_duration(), + ) + .await } async fn list_bucket(&self, opts: &BucketOptions) -> Result> { - let options = serde_json::to_string(opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ListBucketRequest { options }); - let response = client.list_bucket(request).await?.into_inner(); - if !response.success { - return if let Some(err) = response.error { - Err(err.into()) - } else { - Err(Error::other("")) - }; - } - let bucket_infos = response - .bucket_infos - .into_iter() - .filter_map(|json_str| serde_json::from_str::(&json_str).ok()) - .collect(); + self.execute_with_timeout( + || async { + let options = serde_json::to_string(opts)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ListBucketRequest { options }); + let response = client.list_bucket(request).await?.into_inner(); + if !response.success { + return if let Some(err) = response.error { + Err(err.into()) + } else { + Err(Error::other("")) + }; + } + let bucket_infos = response + .bucket_infos + .into_iter() + .filter_map(|json_str| serde_json::from_str::(&json_str).ok()) + .collect(); - Ok(bucket_infos) + Ok(bucket_infos) + }, + get_max_timeout_duration(), + ) + .await } async fn make_bucket(&self, bucket: &str, opts: &MakeBucketOptions) -> Result<()> { - let options = serde_json::to_string(opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(MakeBucketRequest { - name: bucket.to_string(), - options, - }); - let response = client.make_bucket(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let options = serde_json::to_string(opts)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(MakeBucketRequest { + name: bucket.to_string(), + options, + }); + let response = client.make_bucket(request).await?.into_inner(); - // TODO: deal with error - if !response.success { - return if let Some(err) = response.error { - Err(err.into()) - } else { - Err(Error::other("")) - }; - } + // TODO: deal with error + if !response.success { + return if let Some(err) = response.error { + Err(err.into()) + } else { + Err(Error::other("")) + }; + } - Ok(()) + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } async fn get_bucket_info(&self, bucket: &str, opts: &BucketOptions) -> Result { - let options = serde_json::to_string(opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(GetBucketInfoRequest { - bucket: bucket.to_string(), - options, - }); - let response = client.get_bucket_info(request).await?.into_inner(); - if !response.success { - return if let Some(err) = response.error { - Err(err.into()) - } else { - Err(Error::other("")) - }; - } - let bucket_info = serde_json::from_str::(&response.bucket_info)?; + self.execute_with_timeout( + || async { + let options = serde_json::to_string(opts)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(GetBucketInfoRequest { + bucket: bucket.to_string(), + options, + }); + let response = client.get_bucket_info(request).await?.into_inner(); + if !response.success { + return if let Some(err) = response.error { + Err(err.into()) + } else { + Err(Error::other("")) + }; + } + let bucket_info = serde_json::from_str::(&response.bucket_info)?; - Ok(bucket_info) + Ok(bucket_info) + }, + get_max_timeout_duration(), + ) + .await } async fn delete_bucket(&self, bucket: &str, _opts: &DeleteBucketOptions) -> Result<()> { - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(DeleteBucketRequest { - bucket: bucket.to_string(), - }); - let response = client.delete_bucket(request).await?.into_inner(); - if !response.success { - return if let Some(err) = response.error { - Err(err.into()) - } else { - Err(Error::other("")) - }; - } + let request = Request::new(DeleteBucketRequest { + bucket: bucket.to_string(), + }); + let response = client.delete_bucket(request).await?.into_inner(); + if !response.success { + return if let Some(err) = response.error { + Err(err.into()) + } else { + Err(Error::other("")) + }; + } - Ok(()) + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } } diff --git a/crates/ecstore/src/rpc/remote_disk.rs b/crates/ecstore/src/rpc/remote_disk.rs index 5e024f0b..175ad3bf 100644 --- a/crates/ecstore/src/rpc/remote_disk.rs +++ b/crates/ecstore/src/rpc/remote_disk.rs @@ -12,7 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{path::PathBuf, time::Duration}; +use std::{ + path::PathBuf, + sync::{Arc, atomic::Ordering}, + time::Duration, +}; use bytes::Bytes; use futures::lock::Mutex; @@ -26,13 +30,21 @@ use rustfs_protos::{ StatVolumeRequest, UpdateMetadataRequest, VerifyFileRequest, WriteAllRequest, WriteMetadataRequest, }, }; +use rustfs_utils::string::parse_bool_with_default; +use tokio::time; +use tokio_util::sync::CancellationToken; +use tracing::{debug, info, warn}; use crate::disk::{ CheckPartsResp, DeleteOptions, DiskAPI, DiskInfo, DiskInfoOptions, DiskLocation, DiskOption, FileInfoVersions, ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, UpdateMetadataOpts, VolumeInfo, WalkDirOptions, + disk_store::{ + CHECK_EVERY, CHECK_TIMEOUT_DURATION, ENV_RUSTFS_DRIVE_ACTIVE_MONITORING, SKIP_IF_SUCCESS_BEFORE, get_max_timeout_duration, + }, endpoint::Endpoint, }; use crate::disk::{FileReader, FileWriter}; +use crate::disk::{disk_store::DiskHealthTracker, error::DiskError}; use crate::{ disk::error::{Error, Result}, rpc::build_auth_headers, @@ -42,7 +54,6 @@ use rustfs_protos::proto_gen::node_service::RenamePartRequest; use rustfs_rio::{HttpReader, HttpWriter}; use tokio::{io::AsyncWrite, net::TcpStream, time::timeout}; use tonic::Request; -use tracing::{debug, info}; use uuid::Uuid; #[derive(Debug)] @@ -52,12 +63,16 @@ pub struct RemoteDisk { pub url: url::Url, pub root: PathBuf, endpoint: Endpoint, + /// Whether health checking is enabled + health_check: bool, + /// Health tracker for connection monitoring + health: Arc, + /// Cancellation token for monitoring tasks + cancel_token: CancellationToken, } -const REMOTE_DISK_ONLINE_PROBE_TIMEOUT: Duration = Duration::from_millis(750); - impl RemoteDisk { - pub async fn new(ep: &Endpoint, _opt: &DiskOption) -> Result { + pub async fn new(ep: &Endpoint, opt: &DiskOption) -> Result { // let root = fs::canonicalize(ep.url.path()).await?; let root = PathBuf::from(ep.get_file_path()); let addr = if let Some(port) = ep.url.port() { @@ -65,13 +80,184 @@ impl RemoteDisk { } else { format!("{}://{}", ep.url.scheme(), ep.url.host_str().unwrap()) }; - Ok(Self { + + let env_health_check = std::env::var(ENV_RUSTFS_DRIVE_ACTIVE_MONITORING) + .map(|v| parse_bool_with_default(&v, true)) + .unwrap_or(true); + + let disk = Self { id: Mutex::new(None), - addr, + addr: addr.clone(), url: ep.url.clone(), root, endpoint: ep.clone(), - }) + health_check: opt.health_check && env_health_check, + health: Arc::new(DiskHealthTracker::new()), + cancel_token: CancellationToken::new(), + }; + + // Start health monitoring + disk.start_health_monitoring(); + + Ok(disk) + } + + /// Start health monitoring for the remote disk + fn start_health_monitoring(&self) { + if self.health_check { + let health = Arc::clone(&self.health); + let cancel_token = self.cancel_token.clone(); + let addr = self.addr.clone(); + + tokio::spawn(async move { + Self::monitor_remote_disk_health(addr, health, cancel_token).await; + }); + } + } + + /// Monitor remote disk health periodically + async fn monitor_remote_disk_health(addr: String, health: Arc, cancel_token: CancellationToken) { + let mut interval = time::interval(CHECK_EVERY); + + // Perform basic connectivity check + if Self::perform_connectivity_check(&addr).await.is_err() && health.swap_ok_to_faulty() { + warn!("Remote disk health check failed for {}: marking as faulty", addr); + + // Start recovery monitoring + let health_clone = Arc::clone(&health); + let addr_clone = addr.clone(); + let cancel_clone = cancel_token.clone(); + + tokio::spawn(async move { + Self::monitor_remote_disk_recovery(addr_clone, health_clone, cancel_clone).await; + }); + } + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + debug!("Health monitoring cancelled for remote disk: {}", addr); + return; + } + _ = interval.tick() => { + if cancel_token.is_cancelled() { + return; + } + + // Skip health check if disk is already marked as faulty + if health.is_faulty() { + continue; + } + + let last_success_nanos = health.last_success.load(Ordering::Relaxed); + let elapsed = Duration::from_nanos( + (std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64 - last_success_nanos) as u64 + ); + + if elapsed < SKIP_IF_SUCCESS_BEFORE { + continue; + } + + // Perform basic connectivity check + if Self::perform_connectivity_check(&addr).await.is_err() && health.swap_ok_to_faulty() { + warn!("Remote disk health check failed for {}: marking as faulty", addr); + + // Start recovery monitoring + let health_clone = Arc::clone(&health); + let addr_clone = addr.clone(); + let cancel_clone = cancel_token.clone(); + + tokio::spawn(async move { + Self::monitor_remote_disk_recovery(addr_clone, health_clone, cancel_clone).await; + }); + } + } + } + } + } + + /// Monitor remote disk recovery and mark as healthy when recovered + async fn monitor_remote_disk_recovery(addr: String, health: Arc, cancel_token: CancellationToken) { + let mut interval = time::interval(CHECK_EVERY); + + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + return; + } + _ = interval.tick() => { + if Self::perform_connectivity_check(&addr).await.is_ok() { + info!("Remote disk recovered: {}", addr); + health.set_ok(); + return; + } + } + } + } + } + + /// Perform basic connectivity check for remote disk + async fn perform_connectivity_check(addr: &str) -> Result<()> { + let url = url::Url::parse(addr).map_err(|e| Error::other(format!("Invalid URL: {}", e)))?; + + let Some(host) = url.host_str() else { + return Err(Error::other("No host in URL".to_string())); + }; + + let port = url.port_or_known_default().unwrap_or(80); + + // Try to establish TCP connection + match timeout(CHECK_TIMEOUT_DURATION, TcpStream::connect((host, port))).await { + Ok(Ok(stream)) => { + drop(stream); + Ok(()) + } + _ => Err(Error::other(format!("Cannot connect to {}:{}", host, port))), + } + } + + /// Execute operation with timeout and health tracking + async fn execute_with_timeout(&self, operation: F, timeout_duration: Duration) -> Result + where + F: FnOnce() -> Fut, + Fut: std::future::Future>, + { + // Check if disk is faulty + if self.health.is_faulty() { + warn!("disk {} health is faulty, returning error", self.to_string()); + return Err(DiskError::FaultyDisk); + } + + // Record operation start + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as i64; + self.health.last_started.store(now, std::sync::atomic::Ordering::Relaxed); + self.health.increment_waiting(); + + // Execute operation with timeout + let result = time::timeout(timeout_duration, operation()).await; + + match result { + Ok(operation_result) => { + // Log success and decrement waiting counter + if operation_result.is_ok() { + self.health.log_success(); + } + self.health.decrement_waiting(); + operation_result + } + Err(_) => { + // Timeout occurred, mark disk as potentially faulty + self.health.decrement_waiting(); + warn!("Remote disk operation timeout after {:?}", timeout_duration); + Err(Error::other(format!("Remote disk operation timeout after {:?}", timeout_duration))) + } + } } } @@ -85,19 +271,8 @@ impl DiskAPI for RemoteDisk { #[tracing::instrument(skip(self))] async fn is_online(&self) -> bool { - let Some(host) = self.endpoint.url.host_str().map(|host| host.to_string()) else { - return false; - }; - - let port = self.endpoint.url.port_or_known_default().unwrap_or(80); - - match timeout(REMOTE_DISK_ONLINE_PROBE_TIMEOUT, TcpStream::connect((host, port))).await { - Ok(Ok(stream)) => { - drop(stream); - true - } - _ => false, - } + // If disk is marked as faulty, consider it offline + !self.health.is_faulty() } #[tracing::instrument(skip(self))] @@ -114,6 +289,7 @@ impl DiskAPI for RemoteDisk { } #[tracing::instrument(skip(self))] async fn close(&self) -> Result<()> { + self.cancel_token.cancel(); Ok(()) } #[tracing::instrument(skip(self))] @@ -164,108 +340,143 @@ impl DiskAPI for RemoteDisk { #[tracing::instrument(skip(self))] async fn make_volume(&self, volume: &str) -> Result<()> { info!("make_volume"); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(MakeVolumeRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - }); - let response = client.make_volume(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(MakeVolumeRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.make_volume(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn make_volumes(&self, volumes: Vec<&str>) -> Result<()> { info!("make_volumes"); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(MakeVolumesRequest { - disk: self.endpoint.to_string(), - volumes: volumes.iter().map(|s| (*s).to_string()).collect(), - }); - let response = client.make_volumes(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(MakeVolumesRequest { + disk: self.endpoint.to_string(), + volumes: volumes.iter().map(|s| (*s).to_string()).collect(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.make_volumes(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn list_volumes(&self) -> Result> { info!("list_volumes"); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ListVolumesRequest { - disk: self.endpoint.to_string(), - }); - let response = client.list_volumes(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ListVolumesRequest { + disk: self.endpoint.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.list_volumes(request).await?.into_inner(); - let infos = response - .volume_infos - .into_iter() - .filter_map(|json_str| serde_json::from_str::(&json_str).ok()) - .collect(); + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(infos) + let infos = response + .volume_infos + .into_iter() + .filter_map(|json_str| serde_json::from_str::(&json_str).ok()) + .collect(); + + Ok(infos) + }, + Duration::ZERO, + ) + .await } #[tracing::instrument(skip(self))] async fn stat_volume(&self, volume: &str) -> Result { info!("stat_volume"); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(StatVolumeRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - }); - let response = client.stat_volume(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(StatVolumeRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.stat_volume(request).await?.into_inner(); - let volume_info = serde_json::from_str::(&response.volume_info)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(volume_info) + let volume_info = serde_json::from_str::(&response.volume_info)?; + + Ok(volume_info) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn delete_volume(&self, volume: &str) -> Result<()> { info!("delete_volume {}/{}", self.endpoint.to_string(), volume); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(DeleteVolumeRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - }); - let response = client.delete_volume(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(DeleteVolumeRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.delete_volume(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + Duration::ZERO, + ) + .await } // // FIXME: TODO: use writer @@ -328,36 +539,47 @@ impl DiskAPI for RemoteDisk { opts: DeleteOptions, ) -> Result<()> { info!("delete_version"); - let file_info = serde_json::to_string(&fi)?; - let opts = serde_json::to_string(&opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(DeleteVersionRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - file_info, - force_del_marker, - opts, - }); + self.execute_with_timeout( + || async { + let file_info = serde_json::to_string(&fi)?; + let opts = serde_json::to_string(&opts)?; - let response = client.delete_version(request).await?.into_inner(); + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(DeleteVersionRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + file_info, + force_del_marker, + opts, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.delete_version(request).await?.into_inner(); - // let raw_file_info = serde_json::from_str::(&response.raw_file_info)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(()) + // let raw_file_info = serde_json::from_str::(&response.raw_file_info)?; + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn delete_versions(&self, volume: &str, versions: Vec, opts: DeleteOptions) -> Vec> { info!("delete_versions"); + if self.health.is_faulty() { + return vec![Some(DiskError::FaultyDisk); versions.len()]; + } + let opts = match serde_json::to_string(&opts) { Ok(opts) => opts, Err(err) => { @@ -401,12 +623,24 @@ impl DiskAPI for RemoteDisk { // TODO: use Error not string - let response = match client.delete_versions(request).await { + let result = self + .execute_with_timeout( + || async { + client + .delete_versions(request) + .await + .map_err(|err| Error::other(format!("delete_versions failed: {err}"))) + }, + get_max_timeout_duration(), + ) + .await; + + let response = match result { Ok(response) => response, Err(err) => { let mut errors = Vec::with_capacity(versions.len()); for _ in 0..versions.len() { - errors.push(Some(Error::other(err.to_string()))); + errors.push(Some(err.clone())); } return errors; } @@ -437,71 +671,91 @@ impl DiskAPI for RemoteDisk { async fn delete_paths(&self, volume: &str, paths: &[String]) -> Result<()> { info!("delete_paths"); let paths = paths.to_owned(); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(DeletePathsRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - paths, - }); - let response = client.delete_paths(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(DeletePathsRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + paths: paths.clone(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.delete_paths(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn write_metadata(&self, _org_volume: &str, volume: &str, path: &str, fi: FileInfo) -> Result<()> { info!("write_metadata {}/{}", volume, path); let file_info = serde_json::to_string(&fi)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(WriteMetadataRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - file_info, - }); - let response = client.write_metadata(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(WriteMetadataRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + file_info: file_info.clone(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.write_metadata(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn update_metadata(&self, volume: &str, path: &str, fi: FileInfo, opts: &UpdateMetadataOpts) -> Result<()> { info!("update_metadata"); let file_info = serde_json::to_string(&fi)?; - let opts = serde_json::to_string(&opts)?; + let opts_str = serde_json::to_string(&opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(UpdateMetadataRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - file_info, - opts, - }); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(UpdateMetadataRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + file_info: file_info.clone(), + opts: opts_str.clone(), + }); - let response = client.update_metadata(request).await?.into_inner(); + let response = client.update_metadata(request).await?.into_inner(); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(()) + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] @@ -514,51 +768,65 @@ impl DiskAPI for RemoteDisk { opts: &ReadOptions, ) -> Result { info!("read_version"); - let opts = serde_json::to_string(opts)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ReadVersionRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - version_id: version_id.to_string(), - opts, - }); + let opts_str = serde_json::to_string(opts)?; - let response = client.read_version(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ReadVersionRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + version_id: version_id.to_string(), + opts: opts_str.clone(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.read_version(request).await?.into_inner(); - let file_info = serde_json::from_str::(&response.file_info)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(file_info) + let file_info = serde_json::from_str::(&response.file_info)?; + + Ok(file_info) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(level = "debug", skip(self))] async fn read_xl(&self, volume: &str, path: &str, read_data: bool) -> Result { info!("read_xl {}/{}/{}", self.endpoint.to_string(), volume, path); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ReadXlRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - read_data, - }); - let response = client.read_xl(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ReadXlRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + read_data, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.read_xl(request).await?.into_inner(); - let raw_file_info = serde_json::from_str::(&response.raw_file_info)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(raw_file_info) + let raw_file_info = serde_json::from_str::(&response.raw_file_info)?; + + Ok(raw_file_info) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] @@ -571,33 +839,45 @@ impl DiskAPI for RemoteDisk { dst_path: &str, ) -> Result { info!("rename_data {}/{}/{}/{}", self.addr, self.endpoint.to_string(), dst_volume, dst_path); - let file_info = serde_json::to_string(&fi)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(RenameDataRequest { - disk: self.endpoint.to_string(), - src_volume: src_volume.to_string(), - src_path: src_path.to_string(), - file_info, - dst_volume: dst_volume.to_string(), - dst_path: dst_path.to_string(), - }); - let response = client.rename_data(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let file_info = serde_json::to_string(&fi)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(RenameDataRequest { + disk: self.endpoint.to_string(), + src_volume: src_volume.to_string(), + src_path: src_path.to_string(), + file_info, + dst_volume: dst_volume.to_string(), + dst_path: dst_path.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.rename_data(request).await?.into_inner(); - let rename_data_resp = serde_json::from_str::(&response.rename_data_resp)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(rename_data_resp) + let rename_data_resp = serde_json::from_str::(&response.rename_data_resp)?; + + Ok(rename_data_resp) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn list_dir(&self, _origvolume: &str, volume: &str, dir_path: &str, count: i32) -> Result> { debug!("list_dir {}/{}", volume, dir_path); + + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let mut client = node_service_time_out_client(&self.addr) .await .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; @@ -621,6 +901,10 @@ impl DiskAPI for RemoteDisk { async fn walk_dir(&self, opts: WalkDirOptions, wr: &mut W) -> Result<()> { info!("walk_dir {}", self.endpoint.to_string()); + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let url = format!( "{}/rustfs/rpc/walk_dir?disk={}", self.endpoint.grid_host(), @@ -644,6 +928,10 @@ impl DiskAPI for RemoteDisk { async fn read_file(&self, volume: &str, path: &str) -> Result { info!("read_file {}/{}", volume, path); + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let url = format!( "{}/rustfs/rpc/read_file_stream?disk={}&volume={}&path={}&offset={}&length={}", self.endpoint.grid_host(), @@ -670,6 +958,11 @@ impl DiskAPI for RemoteDisk { // offset, // length // ); + + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let url = format!( "{}/rustfs/rpc/read_file_stream?disk={}&volume={}&path={}&offset={}&length={}", self.endpoint.grid_host(), @@ -690,6 +983,10 @@ impl DiskAPI for RemoteDisk { async fn append_file(&self, volume: &str, path: &str) -> Result { info!("append_file {}/{}", volume, path); + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let url = format!( "{}/rustfs/rpc/put_file_stream?disk={}&volume={}&path={}&append={}&size={}", self.endpoint.grid_host(), @@ -716,6 +1013,10 @@ impl DiskAPI for RemoteDisk { // file_size // ); + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let url = format!( "{}/rustfs/rpc/put_file_stream?disk={}&volume={}&path={}&append={}&size={}", self.endpoint.grid_host(), @@ -735,216 +1036,282 @@ impl DiskAPI for RemoteDisk { #[tracing::instrument(level = "debug", skip(self))] async fn rename_file(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str) -> Result<()> { info!("rename_file"); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(RenameFileRequest { - disk: self.endpoint.to_string(), - src_volume: src_volume.to_string(), - src_path: src_path.to_string(), - dst_volume: dst_volume.to_string(), - dst_path: dst_path.to_string(), - }); - let response = client.rename_file(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(RenameFileRequest { + disk: self.endpoint.to_string(), + src_volume: src_volume.to_string(), + src_path: src_path.to_string(), + dst_volume: dst_volume.to_string(), + dst_path: dst_path.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.rename_file(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Bytes) -> Result<()> { info!("rename_part {}/{}", src_volume, src_path); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(RenamePartRequest { - disk: self.endpoint.to_string(), - src_volume: src_volume.to_string(), - src_path: src_path.to_string(), - dst_volume: dst_volume.to_string(), - dst_path: dst_path.to_string(), - meta, - }); - let response = client.rename_part(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(RenamePartRequest { + disk: self.endpoint.to_string(), + src_volume: src_volume.to_string(), + src_path: src_path.to_string(), + dst_volume: dst_volume.to_string(), + dst_path: dst_path.to_string(), + meta, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.rename_part(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn delete(&self, volume: &str, path: &str, opt: DeleteOptions) -> Result<()> { info!("delete {}/{}/{}", self.endpoint.to_string(), volume, path); - let options = serde_json::to_string(&opt)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(DeleteRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - options, - }); - let response = client.delete(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let options = serde_json::to_string(&opt)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(DeleteRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + options, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.delete(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn verify_file(&self, volume: &str, path: &str, fi: &FileInfo) -> Result { info!("verify_file"); - let file_info = serde_json::to_string(&fi)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(VerifyFileRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - file_info, - }); - let response = client.verify_file(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let file_info = serde_json::to_string(&fi)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(VerifyFileRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + file_info, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.verify_file(request).await?.into_inner(); - let check_parts_resp = serde_json::from_str::(&response.check_parts_resp)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(check_parts_resp) + let check_parts_resp = serde_json::from_str::(&response.check_parts_resp)?; + + Ok(check_parts_resp) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn read_parts(&self, bucket: &str, paths: &[String]) -> Result> { - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ReadPartsRequest { - disk: self.endpoint.to_string(), - bucket: bucket.to_string(), - paths: paths.to_vec(), - }); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ReadPartsRequest { + disk: self.endpoint.to_string(), + bucket: bucket.to_string(), + paths: paths.to_vec(), + }); - let response = client.read_parts(request).await?.into_inner(); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.read_parts(request).await?.into_inner(); + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - let read_parts_resp = rmp_serde::from_slice::>(&response.object_part_infos)?; + let read_parts_resp = rmp_serde::from_slice::>(&response.object_part_infos)?; - Ok(read_parts_resp) + Ok(read_parts_resp) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn check_parts(&self, volume: &str, path: &str, fi: &FileInfo) -> Result { info!("check_parts"); - let file_info = serde_json::to_string(&fi)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(CheckPartsRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - file_info, - }); - let response = client.check_parts(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let file_info = serde_json::to_string(&fi)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(CheckPartsRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + file_info, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.check_parts(request).await?.into_inner(); - let check_parts_resp = serde_json::from_str::(&response.check_parts_resp)?; + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(check_parts_resp) + let check_parts_resp = serde_json::from_str::(&response.check_parts_resp)?; + + Ok(check_parts_resp) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn read_multiple(&self, req: ReadMultipleReq) -> Result> { info!("read_multiple {}/{}/{}", self.endpoint.to_string(), req.bucket, req.prefix); - let read_multiple_req = serde_json::to_string(&req)?; - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ReadMultipleRequest { - disk: self.endpoint.to_string(), - read_multiple_req, - }); - let response = client.read_multiple(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let read_multiple_req = serde_json::to_string(&req)?; + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ReadMultipleRequest { + disk: self.endpoint.to_string(), + read_multiple_req, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.read_multiple(request).await?.into_inner(); - let read_multiple_resps = response - .read_multiple_resps - .into_iter() - .filter_map(|json_str| serde_json::from_str::(&json_str).ok()) - .collect(); + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } - Ok(read_multiple_resps) + let read_multiple_resps = response + .read_multiple_resps + .into_iter() + .filter_map(|json_str| serde_json::from_str::(&json_str).ok()) + .collect(); + + Ok(read_multiple_resps) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn write_all(&self, volume: &str, path: &str, data: Bytes) -> Result<()> { info!("write_all"); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(WriteAllRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - data, - }); - let response = client.write_all(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(WriteAllRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + data, + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.write_all(request).await?.into_inner(); - Ok(()) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(()) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn read_all(&self, volume: &str, path: &str) -> Result { info!("read_all {}/{}", volume, path); - let mut client = node_service_time_out_client(&self.addr) - .await - .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; - let request = Request::new(ReadAllRequest { - disk: self.endpoint.to_string(), - volume: volume.to_string(), - path: path.to_string(), - }); - let response = client.read_all(request).await?.into_inner(); + self.execute_with_timeout( + || async { + let mut client = node_service_time_out_client(&self.addr) + .await + .map_err(|err| Error::other(format!("can not get client, err: {err}")))?; + let request = Request::new(ReadAllRequest { + disk: self.endpoint.to_string(), + volume: volume.to_string(), + path: path.to_string(), + }); - if !response.success { - return Err(response.error.unwrap_or_default().into()); - } + let response = client.read_all(request).await?.into_inner(); - Ok(response.data) + if !response.success { + return Err(response.error.unwrap_or_default().into()); + } + + Ok(response.data) + }, + get_max_timeout_duration(), + ) + .await } #[tracing::instrument(skip(self))] async fn disk_info(&self, opts: &DiskInfoOptions) -> Result { + if self.health.is_faulty() { + return Err(DiskError::FaultyDisk); + } + let opts = serde_json::to_string(&opts)?; let mut client = node_service_time_out_client(&self.addr) .await @@ -969,9 +1336,24 @@ impl DiskAPI for RemoteDisk { #[cfg(test)] mod tests { use super::*; + use std::sync::Once; use tokio::net::TcpListener; + use tracing::Level; use uuid::Uuid; + static INIT: Once = Once::new(); + + fn init_tracing(filter_level: Level) { + INIT.call_once(|| { + let _ = tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_max_level(filter_level) + .with_timer(tracing_subscriber::fmt::time::UtcTime::rfc_3339()) + .with_thread_names(true) + .try_init(); + }); + } + #[tokio::test] async fn test_remote_disk_creation() { let url = url::Url::parse("http://example.com:9000/path").unwrap(); @@ -1080,6 +1462,8 @@ mod tests { #[tokio::test] async fn test_remote_disk_is_online_detects_missing_listener() { + init_tracing(Level::ERROR); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); let ip = addr.ip(); @@ -1098,10 +1482,14 @@ mod tests { let disk_option = DiskOption { cleanup: false, - health_check: false, + health_check: true, }; let remote_disk = RemoteDisk::new(&endpoint, &disk_option).await.unwrap(); + + // wait for health check connect timeout + tokio::time::sleep(Duration::from_secs(6)).await; + assert!(!remote_disk.is_online().await); } diff --git a/crates/ecstore/src/set_disk.rs b/crates/ecstore/src/set_disk.rs index 054934e6..d823f784 100644 --- a/crates/ecstore/src/set_disk.rs +++ b/crates/ecstore/src/set_disk.rs @@ -174,56 +174,56 @@ impl SetDisks { }) } - async fn cached_disk_health(&self, index: usize) -> Option { - let cache = self.disk_health_cache.read().await; - cache - .get(index) - .and_then(|entry| entry.as_ref().and_then(|state| state.cached_value())) - } + // async fn cached_disk_health(&self, index: usize) -> Option { + // let cache = self.disk_health_cache.read().await; + // cache + // .get(index) + // .and_then(|entry| entry.as_ref().and_then(|state| state.cached_value())) + // } - async fn update_disk_health(&self, index: usize, online: bool) { - let mut cache = self.disk_health_cache.write().await; - if cache.len() <= index { - cache.resize(index + 1, None); - } - cache[index] = Some(DiskHealthEntry { - last_check: Instant::now(), - online, - }); - } + // async fn update_disk_health(&self, index: usize, online: bool) { + // let mut cache = self.disk_health_cache.write().await; + // if cache.len() <= index { + // cache.resize(index + 1, None); + // } + // cache[index] = Some(DiskHealthEntry { + // last_check: Instant::now(), + // online, + // }); + // } - async fn is_disk_online_cached(&self, index: usize, disk: &DiskStore) -> bool { - if let Some(online) = self.cached_disk_health(index).await { - return online; - } + // async fn is_disk_online_cached(&self, index: usize, disk: &DiskStore) -> bool { + // if let Some(online) = self.cached_disk_health(index).await { + // return online; + // } - let disk_clone = disk.clone(); - let online = timeout(DISK_ONLINE_TIMEOUT, async move { disk_clone.is_online().await }) - .await - .unwrap_or(false); - self.update_disk_health(index, online).await; - online - } + // let disk_clone = disk.clone(); + // let online = timeout(DISK_ONLINE_TIMEOUT, async move { disk_clone.is_online().await }) + // .await + // .unwrap_or(false); + // self.update_disk_health(index, online).await; + // online + // } - async fn filter_online_disks(&self, disks: Vec>) -> (Vec>, usize) { - let mut filtered = Vec::with_capacity(disks.len()); - let mut online_count = 0; + // async fn filter_online_disks(&self, disks: Vec>) -> (Vec>, usize) { + // let mut filtered = Vec::with_capacity(disks.len()); + // let mut online_count = 0; - for (idx, disk) in disks.into_iter().enumerate() { - if let Some(disk_store) = disk { - if self.is_disk_online_cached(idx, &disk_store).await { - filtered.push(Some(disk_store)); - online_count += 1; - } else { - filtered.push(None); - } - } else { - filtered.push(None); - } - } + // for (idx, disk) in disks.into_iter().enumerate() { + // if let Some(disk_store) = disk { + // if self.is_disk_online_cached(idx, &disk_store).await { + // filtered.push(Some(disk_store)); + // online_count += 1; + // } else { + // filtered.push(None); + // } + // } else { + // filtered.push(None); + // } + // } - (filtered, online_count) - } + // (filtered, online_count) + // } fn format_lock_error(&self, bucket: &str, object: &str, mode: &str, err: &LockResult) -> String { match err { LockResult::Timeout => { @@ -259,9 +259,28 @@ impl SetDisks { } async fn get_online_disks(&self) -> Vec> { - let disks = self.get_disks_internal().await; - let (filtered, _) = self.filter_online_disks(disks).await; - filtered.into_iter().filter(|disk| disk.is_some()).collect() + let mut disks = self.get_disks_internal().await; + + // TODO: diskinfo filter online + + let mut new_disk = Vec::with_capacity(disks.len()); + + for disk in disks.iter() { + if let Some(d) = disk { + if d.is_online().await { + new_disk.push(disk.clone()); + } + } + } + + let mut rng = rand::rng(); + + disks.shuffle(&mut rng); + + new_disk + // let disks = self.get_disks_internal().await; + // let (filtered, _) = self.filter_online_disks(disks).await; + // filtered.into_iter().filter(|disk| disk.is_some()).collect() } async fn get_online_local_disks(&self) -> Vec> { let mut disks = self.get_online_disks().await; @@ -1467,7 +1486,9 @@ impl SetDisks { let object = object.clone(); let version_id = version_id.clone(); tokio::spawn(async move { - if let Some(disk) = disk { + if let Some(disk) = disk + && disk.is_online().await + { if version_id.is_empty() { match disk.read_xl(&bucket, &object, read_data).await { Ok(info) => { @@ -1799,14 +1820,14 @@ impl SetDisks { } pub async fn renew_disk(&self, ep: &Endpoint) { - debug!("renew_disk start {:?}", ep); + debug!("renew_disk: start {:?}", ep); let (new_disk, fm) = match Self::connect_endpoint(ep).await { Ok(res) => res, Err(e) => { - warn!("connect_endpoint err {:?}", &e); + warn!("renew_disk: connect_endpoint err {:?}", &e); if ep.is_local && e == DiskError::UnformattedDisk { - info!("unformatteddisk will trigger heal_disk, {:?}", ep); + info!("renew_disk unformatteddisk will trigger heal_disk, {:?}", ep); let set_disk_id = format!("pool_{}_set_{}", ep.pool_idx, ep.set_idx); let _ = send_heal_disk(set_disk_id, Some(HealChannelPriority::Normal)).await; } @@ -1817,7 +1838,7 @@ impl SetDisks { let (set_idx, disk_idx) = match self.find_disk_index(&fm) { Ok(res) => res, Err(e) => { - warn!("find_disk_index err {:?}", e); + warn!("renew_disk: find_disk_index err {:?}", e); return; } }; @@ -1837,7 +1858,7 @@ impl SetDisks { } } - debug!("renew_disk update {:?}", fm.erasure.this); + debug!("renew_disk: update {:?}", fm.erasure.this); let mut disk_lock = self.disks.write().await; disk_lock[disk_idx] = Some(new_disk); @@ -3051,7 +3072,7 @@ impl SetDisks { for (index, disk) in latest_disks.iter().enumerate() { if let Some(outdated_disk) = &out_dated_disks[index] { info!(disk_index = index, "Creating writer for outdated disk"); - let writer = create_bitrot_writer( + let writer = match create_bitrot_writer( is_inline_buffer, Some(outdated_disk), RUSTFS_META_TMP_BUCKET, @@ -3060,7 +3081,19 @@ impl SetDisks { erasure.shard_size(), HashAlgorithm::HighwayHash256, ) - .await?; + .await + { + Ok(writer) => writer, + Err(err) => { + warn!( + "create_bitrot_writer disk {}, err {:?}, skipping operation", + outdated_disk.to_string(), + err + ); + writers.push(None); + continue; + } + }; writers.push(Some(writer)); } else { info!(disk_index = index, "Skipping writer (disk not outdated)"); @@ -3790,8 +3823,8 @@ impl ObjectIO for SetDisks { #[tracing::instrument(level = "debug", skip(self, data,))] async fn put_object(&self, bucket: &str, object: &str, data: &mut PutObjReader, opts: &ObjectOptions) -> Result { - let disks_snapshot = self.get_disks_internal().await; - let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await; + let disks = self.get_disks_internal().await; + // let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await; // Acquire per-object exclusive lock via RAII guard. It auto-releases asynchronously on drop. let _object_lock_guard = if !opts.no_lock { @@ -3832,13 +3865,13 @@ impl ObjectIO for SetDisks { write_quorum += 1 } - if filtered_online < write_quorum { - warn!( - "online disk snapshot {} below write quorum {} for {}/{}; returning erasure write quorum error", - filtered_online, write_quorum, bucket, object - ); - return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object])); - } + // if filtered_online < write_quorum { + // warn!( + // "online disk snapshot {} below write quorum {} for {}/{}; returning erasure write quorum error", + // filtered_online, write_quorum, bucket, object + // ); + // return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object])); + // } let mut fi = FileInfo::new([bucket, object].join("/").as_str(), data_drives, parity_drives); @@ -3877,8 +3910,10 @@ impl ObjectIO for SetDisks { let mut writers = Vec::with_capacity(shuffle_disks.len()); let mut errors = Vec::with_capacity(shuffle_disks.len()); for disk_op in shuffle_disks.iter() { - if let Some(disk) = disk_op { - let writer = create_bitrot_writer( + if let Some(disk) = disk_op + && disk.is_online().await + { + let writer = match create_bitrot_writer( is_inline_buffer, Some(disk), RUSTFS_META_TMP_BUCKET, @@ -3887,29 +3922,16 @@ impl ObjectIO for SetDisks { erasure.shard_size(), HashAlgorithm::HighwayHash256, ) - .await?; - - // let writer = if is_inline_buffer { - // BitrotWriter::new( - // Writer::from_cursor(Cursor::new(Vec::new())), - // erasure.shard_size(), - // HashAlgorithm::HighwayHash256, - // ) - // } else { - // let f = match disk - // .create_file("", RUSTFS_META_TMP_BUCKET, &tmp_object, erasure.shard_file_size(data.content_length)) - // .await - // { - // Ok(f) => f, - // Err(e) => { - // errors.push(Some(e)); - // writers.push(None); - // continue; - // } - // }; - - // BitrotWriter::new(Writer::from_tokio_writer(f), erasure.shard_size(), HashAlgorithm::HighwayHash256) - // }; + .await + { + Ok(writer) => writer, + Err(err) => { + warn!("create_bitrot_writer disk {}, err {:?}, skipping operation", disk.to_string(), err); + errors.push(Some(err)); + writers.push(None); + continue; + } + }; writers.push(Some(writer)); errors.push(None); @@ -4072,7 +4094,7 @@ impl StorageAPI for SetDisks { async fn local_storage_info(&self) -> rustfs_madmin::StorageInfo { let disks = self.get_disks_internal().await; - let mut local_disks: Vec>> = Vec::new(); + let mut local_disks: Vec> = Vec::new(); let mut local_endpoints = Vec::new(); for (i, ep) in self.set_endpoints.iter().enumerate() { @@ -4908,9 +4930,7 @@ impl StorageAPI for SetDisks { for disk in disks.iter() { if let Some(disk) = disk { - if disk.is_online().await { - continue; - } + continue; } let _ = self.add_partial(bucket, object, opts.version_id.as_ref().expect("err")).await; break; @@ -5129,16 +5149,16 @@ impl StorageAPI for SetDisks { return Err(Error::other(format!("checksum mismatch: {checksum}"))); } - let disks_snapshot = self.get_disks_internal().await; - let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await; + let disks = self.get_disks_internal().await; + // let (disks, filtered_online) = self.filter_online_disks(disks_snapshot).await; - if filtered_online < write_quorum { - warn!( - "online disk snapshot {} below write quorum {} for multipart {}/{}; returning erasure write quorum error", - filtered_online, write_quorum, bucket, object - ); - return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object])); - } + // if filtered_online < write_quorum { + // warn!( + // "online disk snapshot {} below write quorum {} for multipart {}/{}; returning erasure write quorum error", + // filtered_online, write_quorum, bucket, object + // ); + // return Err(to_object_err(Error::ErasureWriteQuorum, vec![bucket, object])); + // } let shuffle_disks = Self::shuffle_disks(&disks, &fi.erasure.distribution); @@ -5152,7 +5172,7 @@ impl StorageAPI for SetDisks { let mut errors = Vec::with_capacity(shuffle_disks.len()); for disk_op in shuffle_disks.iter() { if let Some(disk) = disk_op { - let writer = create_bitrot_writer( + let writer = match create_bitrot_writer( false, Some(disk), RUSTFS_META_TMP_BUCKET, @@ -5161,23 +5181,16 @@ impl StorageAPI for SetDisks { erasure.shard_size(), HashAlgorithm::HighwayHash256, ) - .await?; - - // let writer = { - // let f = match disk - // .create_file("", RUSTFS_META_TMP_BUCKET, &tmp_part_path, erasure.shard_file_size(data.content_length)) - // .await - // { - // Ok(f) => f, - // Err(e) => { - // errors.push(Some(e)); - // writers.push(None); - // continue; - // } - // }; - - // BitrotWriter::new(Writer::from_tokio_writer(f), erasure.shard_size(), HashAlgorithm::HighwayHash256) - // }; + .await + { + Ok(writer) => writer, + Err(err) => { + warn!("create_bitrot_writer disk {}, err {:?}, skipping operation", disk.to_string(), err); + errors.push(Some(err)); + writers.push(None); + continue; + } + }; writers.push(Some(writer)); errors.push(None); @@ -6769,7 +6782,7 @@ async fn get_disks_info(disks: &[Option], eps: &[Endpoint]) -> Vec{ diff --git a/crates/ecstore/src/store_init.rs b/crates/ecstore/src/store_init.rs index 965088d0..437b5218 100644 --- a/crates/ecstore/src/store_init.rs +++ b/crates/ecstore/src/store_init.rs @@ -265,7 +265,10 @@ pub async fn load_format_erasure(disk: &DiskStore, heal: bool) -> disk::error::R .map_err(|e| match e { DiskError::FileNotFound => DiskError::UnformattedDisk, DiskError::DiskNotFound => DiskError::UnformattedDisk, - _ => e, + _ => { + warn!("load_format_erasure err: {:?} {:?}", disk.to_string(), e); + e + } })?; let mut fm = FormatV3::try_from(data.as_ref())?; @@ -312,17 +315,18 @@ async fn save_format_file_all(disks: &[Option], formats: &[Option, format: &Option) -> disk::error::Result<()> { - if disk.is_none() { + let Some(disk) = disk else { return Err(DiskError::DiskNotFound); - } + }; - let format = format.as_ref().unwrap(); + let Some(format) = format else { + return Err(DiskError::other("format is none")); + }; let json_data = format.to_json()?; let tmpfile = Uuid::new_v4().to_string(); - let disk = disk.as_ref().unwrap(); disk.write_all(RUSTFS_META_BUCKET, tmpfile.as_str(), json_data.into_bytes().into()) .await?; diff --git a/crates/protos/src/lib.rs b/crates/protos/src/lib.rs index 9b3a2aa4..42fab1f4 100644 --- a/crates/protos/src/lib.rs +++ b/crates/protos/src/lib.rs @@ -26,6 +26,11 @@ use tonic::{ }; use tracing::{debug, warn}; +// Type alias for the complex client type +pub type NodeServiceClientType = NodeServiceClient< + InterceptedService) -> Result, Status> + Send + Sync + 'static>>, +>; + pub use generated::*; // Default 100 MB diff --git a/crates/utils/src/string.rs b/crates/utils/src/string.rs index 42a8e0a6..8d3879d1 100644 --- a/crates/utils/src/string.rs +++ b/crates/utils/src/string.rs @@ -48,6 +48,14 @@ pub fn parse_bool(str: &str) -> Result { } } +pub fn parse_bool_with_default(str: &str, default: bool) -> bool { + match str { + "1" | "t" | "T" | "true" | "TRUE" | "True" | "on" | "ON" | "On" | "enabled" => true, + "0" | "f" | "F" | "false" | "FALSE" | "False" | "off" | "OFF" | "Off" | "disabled" => false, + _ => default, + } +} + /// Matches a simple pattern against a name using wildcards. /// /// # Arguments From f17990f7462165af07574a6f234adeeeca7800ab Mon Sep 17 00:00:00 2001 From: Juri Malinovski Date: Mon, 22 Dec 2025 14:25:23 +0200 Subject: [PATCH 24/26] helm: allow to define additional config variables (#1220) Signed-off-by: Juri Malinovski --- helm/rustfs/templates/statefulset.yaml | 3 --- helm/rustfs/values.yaml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/helm/rustfs/templates/statefulset.yaml b/helm/rustfs/templates/statefulset.yaml index 432443ff..a9b07b54 100644 --- a/helm/rustfs/templates/statefulset.yaml +++ b/helm/rustfs/templates/statefulset.yaml @@ -115,9 +115,6 @@ spec: name: endpoint - containerPort: {{ .Values.service.console_port }} name: console - env: - - name: REPLICA_COUNT - value: {{ .Values.replicaCount | quote }} envFrom: - configMapRef: name: {{ include "rustfs.fullname" . }}-config diff --git a/helm/rustfs/values.yaml b/helm/rustfs/values.yaml index 4e669a72..898e17cd 100644 --- a/helm/rustfs/values.yaml +++ b/helm/rustfs/values.yaml @@ -124,7 +124,7 @@ ingress: paths: - path: / pathType: Prefix - tls: + tls: enabled: false # Enable tls and access rustfs via https. certManager: enabled: false # Enable certmanager to generate certificate for rustfs, default false. From af5c0b13efa0c663f777de39c58241bd51cbab51 Mon Sep 17 00:00:00 2001 From: loverustfs Date: Mon, 22 Dec 2025 20:43:00 +0800 Subject: [PATCH 25/26] fix: HeadObject returns 404 for deleted objects with versioning enabled (#1229) Co-authored-by: houseme --- .../head_deleted_object_versioning_test.rs | 138 ++++++++++++++++++ crates/e2e_test/src/reliant/mod.rs | 1 + rustfs/src/storage/ecfs.rs | 7 + 3 files changed, 146 insertions(+) create mode 100644 crates/e2e_test/src/reliant/head_deleted_object_versioning_test.rs diff --git a/crates/e2e_test/src/reliant/head_deleted_object_versioning_test.rs b/crates/e2e_test/src/reliant/head_deleted_object_versioning_test.rs new file mode 100644 index 00000000..a4d47175 --- /dev/null +++ b/crates/e2e_test/src/reliant/head_deleted_object_versioning_test.rs @@ -0,0 +1,138 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Test for HeadObject on deleted objects with versioning enabled +//! +//! This test reproduces the issue where getting a deleted object returns +//! 200 OK instead of 404 NoSuchKey when versioning is enabled. + +#![cfg(test)] + +use aws_config::meta::region::RegionProviderChain; +use aws_sdk_s3::Client; +use aws_sdk_s3::config::{Credentials, Region}; +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::types::{BucketVersioningStatus, VersioningConfiguration}; +use bytes::Bytes; +use serial_test::serial; +use std::error::Error; +use tracing::info; + +const ENDPOINT: &str = "http://localhost:9000"; +const ACCESS_KEY: &str = "rustfsadmin"; +const SECRET_KEY: &str = "rustfsadmin"; +const BUCKET: &str = "test-head-deleted-versioning-bucket"; + +async fn create_aws_s3_client() -> Result> { + let region_provider = RegionProviderChain::default_provider().or_else(Region::new("us-east-1")); + let shared_config = aws_config::defaults(aws_config::BehaviorVersion::latest()) + .region(region_provider) + .credentials_provider(Credentials::new(ACCESS_KEY, SECRET_KEY, None, None, "static")) + .endpoint_url(ENDPOINT) + .load() + .await; + + let client = Client::from_conf( + aws_sdk_s3::Config::from(&shared_config) + .to_builder() + .force_path_style(true) + .build(), + ); + Ok(client) +} + +/// Setup test bucket, creating it if it doesn't exist, and enable versioning +async fn setup_test_bucket(client: &Client) -> Result<(), Box> { + match client.create_bucket().bucket(BUCKET).send().await { + Ok(_) => {} + Err(SdkError::ServiceError(e)) => { + let e = e.into_err(); + let error_code = e.meta().code().unwrap_or(""); + if !error_code.eq("BucketAlreadyExists") && !error_code.eq("BucketAlreadyOwnedByYou") { + return Err(e.into()); + } + } + Err(e) => { + return Err(e.into()); + } + } + + // Enable versioning + client + .put_bucket_versioning() + .bucket(BUCKET) + .versioning_configuration( + VersioningConfiguration::builder() + .status(BucketVersioningStatus::Enabled) + .build(), + ) + .send() + .await?; + + Ok(()) +} + +/// Test that HeadObject on a deleted object returns NoSuchKey when versioning is enabled +#[tokio::test] +#[serial] +#[ignore = "requires running RustFS server at localhost:9000"] +async fn test_head_deleted_object_versioning_returns_nosuchkey() -> Result<(), Box> { + let _ = tracing_subscriber::fmt() + .with_max_level(tracing::Level::INFO) + .with_test_writer() + .try_init(); + + info!("🧪 Starting test_head_deleted_object_versioning_returns_nosuchkey"); + + let client = create_aws_s3_client().await?; + setup_test_bucket(&client).await?; + + let key = "test-head-deleted-versioning.txt"; + let content = b"Test content for HeadObject with versioning"; + + // Upload and verify + client + .put_object() + .bucket(BUCKET) + .key(key) + .body(Bytes::from_static(content).into()) + .send() + .await?; + + // Delete the object (creates a delete marker) + client.delete_object().bucket(BUCKET).key(key).send().await?; + + // Try to head the deleted object (latest version is delete marker) + let head_result = client.head_object().bucket(BUCKET).key(key).send().await; + + assert!(head_result.is_err(), "HeadObject on deleted object should return an error"); + + match head_result.unwrap_err() { + SdkError::ServiceError(service_err) => { + let s3_err = service_err.into_err(); + assert!( + s3_err.meta().code() == Some("NoSuchKey") + || s3_err.meta().code() == Some("NotFound") + || s3_err.meta().code() == Some("404"), + "Error should be NoSuchKey or NotFound, got: {s3_err:?}" + ); + info!("✅ HeadObject correctly returns NoSuchKey/NotFound"); + } + other_err => { + panic!("Expected ServiceError but got: {other_err:?}"); + } + } + + Ok(()) +} diff --git a/crates/e2e_test/src/reliant/mod.rs b/crates/e2e_test/src/reliant/mod.rs index 83d89906..05a4867b 100644 --- a/crates/e2e_test/src/reliant/mod.rs +++ b/crates/e2e_test/src/reliant/mod.rs @@ -14,6 +14,7 @@ mod conditional_writes; mod get_deleted_object_test; +mod head_deleted_object_versioning_test; mod lifecycle; mod lock; mod node_interact_test; diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index e12eb958..183ccb26 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -2443,6 +2443,13 @@ impl S3 for FS { let info = store.get_object_info(&bucket, &key, &opts).await.map_err(ApiError::from)?; + if info.delete_marker { + if opts.version_id.is_none() { + return Err(S3Error::new(S3ErrorCode::NoSuchKey)); + } + return Err(S3Error::new(S3ErrorCode::MethodNotAllowed)); + } + if let Some(match_etag) = if_none_match { if let Some(strong_etag) = match_etag.into_etag() { if info From 73c15d6be1796b607d2c8446b6040c19ef07b9d5 Mon Sep 17 00:00:00 2001 From: Ali Mehraji Date: Tue, 23 Dec 2025 04:21:04 +0330 Subject: [PATCH 26/26] Add: rust installation in Makefile (#1188) Signed-off-by: Ali Mehraji Signed-off-by: houseme Co-authored-by: houseme Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Makefile | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 60bfce83..40ac1738 100644 --- a/Makefile +++ b/Makefile @@ -9,30 +9,53 @@ CONTAINER_NAME ?= rustfs-dev DOCKERFILE_PRODUCTION = Dockerfile DOCKERFILE_SOURCE = Dockerfile.source +# Fatal check +# Checks all required dependencies and exits with error if not found +# (e.g., cargo, rustfmt) +check-%: + @command -v $* >/dev/null 2>&1 || { \ + echo >&2 "❌ '$*' is not installed."; \ + exit 1; \ + } + +# Warning-only check +# Checks for optional dependencies and issues a warning if not found +# (e.g., cargo-nextest for enhanced testing) +warn-%: + @command -v $* >/dev/null 2>&1 || { \ + echo >&2 "⚠️ '$*' is not installed."; \ + } + +# For checking dependencies use check- or warn- +.PHONY: core-deps fmt-deps test-deps +core-deps: check-cargo +fmt-deps: check-rustfmt +test-deps: warn-cargo-nextest + # Code quality and formatting targets .PHONY: fmt -fmt: +fmt: core-deps fmt-deps @echo "🔧 Formatting code..." cargo fmt --all .PHONY: fmt-check -fmt-check: +fmt-check: core-deps fmt-deps @echo "📝 Checking code formatting..." cargo fmt --all --check .PHONY: clippy -clippy: +clippy: core-deps @echo "🔍 Running clippy checks..." cargo clippy --fix --allow-dirty cargo clippy --all-targets --all-features -- -D warnings .PHONY: check -check: +check: core-deps @echo "🔨 Running compilation check..." cargo check --all-targets .PHONY: test -test: +test: core-deps test-deps @echo "🧪 Running tests..." @if command -v cargo-nextest >/dev/null 2>&1; then \ cargo nextest run --all --exclude e2e_test; \ @@ -42,16 +65,16 @@ test: fi cargo test --all --doc -.PHONY: pre-commit -pre-commit: fmt clippy check test - @echo "✅ All pre-commit checks passed!" - .PHONY: setup-hooks setup-hooks: @echo "🔧 Setting up git hooks..." chmod +x .git/hooks/pre-commit @echo "✅ Git hooks setup complete!" +.PHONY: pre-commit +pre-commit: fmt clippy check test + @echo "✅ All pre-commit checks passed!" + .PHONY: e2e-server e2e-server: sh $(shell pwd)/scripts/run.sh @@ -186,8 +209,6 @@ docker-dev-push: --push \ . - - # Local production builds using direct buildx (alternative to docker-buildx.sh) .PHONY: docker-buildx-production-local docker-buildx-production-local: @@ -247,8 +268,6 @@ dev-env-stop: .PHONY: dev-env-restart dev-env-restart: dev-env-stop dev-env-start - - # ======================================================================================== # Build Utilities # ========================================================================================