From 05032cf887e5b63f0b9f4cc457ae81161320e0b5 Mon Sep 17 00:00:00 2001 From: houseme Date: Wed, 4 Mar 2026 19:22:54 +0800 Subject: [PATCH] chore: update dependencies and workspace resolver (#2073) --- .../compose/docker-compose.observability.yaml | 2 +- .../docker-compose-example-for-rustfs.yml | 270 +++++++++++++++++ .../docker-compose-tempo-ha-override.yml | 62 ++++ .docker/observability/docker-compose.yml | 89 +----- .../provisioning/dashboards/dashboard.yml | 14 + .../provisioning/datasources/datasources.yaml | 14 + .docker/observability/tempo-ha.yaml | 286 ++++++++++++++++++ .docker/observability/tempo.yaml | 54 +++- Cargo.lock | 24 +- Cargo.toml | 6 +- crates/mcp/src/server.rs | 21 +- docker-compose.yml | 2 +- scripts/run.sh | 7 +- 13 files changed, 723 insertions(+), 128 deletions(-) create mode 100644 .docker/observability/docker-compose-example-for-rustfs.yml create mode 100644 .docker/observability/docker-compose-tempo-ha-override.yml create mode 100644 .docker/observability/tempo-ha.yaml diff --git a/.docker/compose/docker-compose.observability.yaml b/.docker/compose/docker-compose.observability.yaml index e6495b4c..9c8c3cd0 100644 --- a/.docker/compose/docker-compose.observability.yaml +++ b/.docker/compose/docker-compose.observability.yaml @@ -104,7 +104,7 @@ services: environment: - TZ=Asia/Shanghai volumes: - - ../../.docker/observability/loki-config.yaml:/etc/loki/local-config.yaml:ro + - ../../.docker/observability/loki.yaml:/etc/loki/local-config.yaml:ro - loki-data:/loki ports: - "3100:3100" diff --git a/.docker/observability/docker-compose-example-for-rustfs.yml b/.docker/observability/docker-compose-example-for-rustfs.yml new file mode 100644 index 00000000..a1cb7ae0 --- /dev/null +++ b/.docker/observability/docker-compose-example-for-rustfs.yml @@ -0,0 +1,270 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +services: + rustfs: + security_opt: + - "no-new-privileges:true" + image: rustfs/rustfs:latest + container_name: rustfs-server + ports: + - "9000:9000" # S3 API port + - "9001:9001" # Console port + environment: + - RUSTFS_VOLUMES=/data/rustfs + - RUSTFS_ADDRESS=0.0.0.0:9000 + - RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9001 + - RUSTFS_CONSOLE_ENABLE=true + - RUSTFS_CORS_ALLOWED_ORIGINS=* + - RUSTFS_CONSOLE_CORS_ALLOWED_ORIGINS=* + - RUSTFS_ACCESS_KEY=rustfsadmin + - RUSTFS_SECRET_KEY=rustfsadmin + - RUSTFS_OBS_LOGGER_LEVEL=info + - RUSTFS_OBS_ENDPOINT=http://otel-collector:4318 + - RUSTFS_OBS_PROFILING_ENDPOINT=http://pyroscope:4040 + volumes: + - rustfs-data:/data/rustfs + networks: + - otel-network + restart: unless-stopped + healthcheck: + test: + [ + "CMD", + "sh", + "-c", + "curl -f http://127.0.0.1:9000/health && curl -f http://127.0.0.1:9001/rustfs/console/health", + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + depends_on: + otel-collector: + condition: service_started + + rustfs-init: + image: alpine + container_name: rustfs-init + volumes: + - rustfs-data:/data + networks: + - otel-network + command: > + sh -c " + chown -R 10001:10001 /data && + echo 'Volume Permissions fixed' && + exit 0 + " + restart: no + + # --- Tracing --- + + tempo: + image: grafana/tempo:latest + container_name: tempo + command: [ "-config.file=/etc/tempo.yaml" ] + volumes: + - ./tempo.yaml:/etc/tempo.yaml:ro + - tempo-data:/var/tempo + ports: + - "3200:3200" # tempo + - "4317" # otlp grpc + - "4318" # otlp http + networks: + - otel-network + restart: unless-stopped + depends_on: + - redpanda + healthcheck: + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:3200/ready" ] + interval: 10s + timeout: 5s + retries: 3 + start_period: 15s + + redpanda: + image: redpandadata/redpanda:latest # for tempo ingest + container_name: redpanda + ports: + - "9092:9092" + networks: + - otel-network + restart: unless-stopped + command: > + redpanda start --overprovisioned + --mode=dev-container + --kafka-addr=PLAINTEXT://0.0.0.0:9092 + --advertise-kafka-addr=PLAINTEXT://redpanda:9092 + + jaeger: + image: jaegertracing/jaeger:latest + container_name: jaeger + environment: + - SPAN_STORAGE_TYPE=badger + - BADGER_EPHEMERAL=false + - BADGER_DIRECTORY_VALUE=/badger/data + - BADGER_DIRECTORY_KEY=/badger/key + - COLLECTOR_OTLP_ENABLED=true + volumes: + - ./jaeger.yaml:/etc/jaeger/config.yml + - jaeger-data:/badger + ports: + - "16686:16686" # Web UI + - "14269:14269" # Admin/Metrics + - "4317" # otlp grpc + - "4318" # otlp http + command: [ "--config", "/etc/jaeger/config.yml" ] + networks: + - otel-network + restart: unless-stopped + healthcheck: + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:14269" ] + interval: 10s + timeout: 5s + retries: 3 + start_period: 15s + + # --- Metrics --- + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + ports: + - "9090:9090" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--web.enable-otlp-receiver" # Enable OTLP + - "--web.enable-remote-write-receiver" # Enable remote write + - "--enable-feature=promql-experimental-functions" # Enable info() + - "--storage.tsdb.retention.time=30d" + restart: unless-stopped + networks: + - otel-network + healthcheck: + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy" ] + interval: 10s + timeout: 5s + retries: 3 + + # --- Logging --- + + loki: + image: grafana/loki:latest + container_name: loki + volumes: + - ./loki.yaml:/etc/loki/loki.yaml:ro + - loki-data:/loki + ports: + - "3100:3100" + command: -config.file=/etc/loki/loki.yaml + networks: + - otel-network + restart: unless-stopped + healthcheck: + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:3100/ready" ] + interval: 15s + timeout: 10s + retries: 5 + start_period: 60s + + # --- Collection --- + + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + volumes: + - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro + ports: + - "1888:1888" # pprof + - "8888:8888" # Prometheus metrics for Collector + - "8889:8889" # Prometheus metrics for application indicators + - "13133:13133" # health check + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "55679:55679" # zpages + networks: + - otel-network + restart: unless-stopped + depends_on: + - tempo + - jaeger + - prometheus + - loki + healthcheck: + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:13133" ] + interval: 10s + timeout: 5s + retries: 3 + + # --- Profiles --- + + pyroscope: + image: grafana/pyroscope:latest + container_name: pyroscope + ports: + - "4040:4040" + command: + - -self-profiling.disable-push=true + networks: + - otel-network + restart: unless-stopped + + # --- Visualization --- + + grafana: + image: grafana/grafana:latest + container_name: grafana + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_SECURITY_ADMIN_USER=admin + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/etc/grafana/dashboards:ro + - grafana-data:/var/lib/grafana + networks: + - otel-network + restart: unless-stopped + depends_on: + - prometheus + - tempo + - loki + healthcheck: + test: + [ "CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health" ] + interval: 10s + timeout: 5s + retries: 3 + +volumes: + rustfs-data: + tempo-data: + jaeger-data: + prometheus-data: + loki-data: + grafana-data: + +networks: + otel-network: + driver: bridge + name: "network_otel" + ipam: + config: + - subnet: 172.28.0.0/16 + driver_opts: + com.docker.network.enable_ipv6: "true" diff --git a/.docker/observability/docker-compose-tempo-ha-override.yml b/.docker/observability/docker-compose-tempo-ha-override.yml new file mode 100644 index 00000000..20c3dd70 --- /dev/null +++ b/.docker/observability/docker-compose-tempo-ha-override.yml @@ -0,0 +1,62 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Docker Compose override file for High Availability Tempo setup +# +# Usage: +# docker-compose -f docker-compose-example-for-rustfs.yml \ +# -f docker-compose-tempo-ha-override.yml up + +services: + # Override Tempo to use high-availability configuration + tempo: + volumes: + - ./tempo-ha.yaml:/etc/tempo.yaml:ro + - tempo-data:/var/tempo + ports: + - "3200:3200" # Tempo HTTP + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "7946:7946" # Memberlist + - "14250:14250" # Jaeger gRPC + - "14268:14268" # Jaeger Thrift HTTP + - "9411:9411" # Zipkin + environment: + - TEMPO_MEMBERLIST_BIND_PORT=7946 + healthcheck: + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:3200/ready" ] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + depends_on: + - redpanda + +volumes: + tempo-data: + driver: local + driver_opts: + type: tmpfs + device: tmpfs + o: "size=4g" # Allocate 4GB tmpfs for Tempo data (adjust based on your needs) + +# Network configuration remains the same +# networks: +# otel-network: +# driver: bridge +# name: "network_otel" +# ipam: +# config: +# - subnet: 172.28.0.0/16 + diff --git a/.docker/observability/docker-compose.yml b/.docker/observability/docker-compose.yml index d2c88e0a..b6091463 100644 --- a/.docker/observability/docker-compose.yml +++ b/.docker/observability/docker-compose.yml @@ -13,68 +13,13 @@ # limitations under the License. services: - rustfs: - security_opt: - - "no-new-privileges:true" - image: rustfs/rustfs:latest - container_name: rustfs-server - ports: - - "9000:9000" # S3 API port - - "9001:9001" # Console port - environment: - - RUSTFS_VOLUMES=/data/rustfs - - RUSTFS_ADDRESS=0.0.0.0:9000 - - RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9001 - - RUSTFS_CONSOLE_ENABLE=true - - RUSTFS_CORS_ALLOWED_ORIGINS=* - - RUSTFS_CONSOLE_CORS_ALLOWED_ORIGINS=* - - RUSTFS_ACCESS_KEY=rustfsadmin - - RUSTFS_SECRET_KEY=rustfsadmin - - RUSTFS_OBS_LOGGER_LEVEL=info - - RUSTFS_OBS_ENDPOINT=http://otel-collector:4318 - - RUSTFS_OBS_PROFILING_ENDPOINT=http://pyroscope:4040 - volumes: - - rustfs-data:/data/rustfs - networks: - - otel-network - restart: unless-stopped - healthcheck: - test: - [ - "CMD", - "sh", - "-c", - "curl -f http://127.0.0.1:9000/health && curl -f http://127.0.0.1:9001/rustfs/console/health", - ] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - depends_on: - otel-collector: - condition: service_started - - rustfs-init: - image: alpine - container_name: rustfs-init - volumes: - - rustfs-data:/data - networks: - - otel-network - command: > - sh -c " - chown -R 10001:10001 /data && - echo 'Volume Permissions fixed' && - exit 0 - " - restart: no # --- Tracing --- tempo: image: grafana/tempo:latest container_name: tempo - command: ["-config.file=/etc/tempo.yaml"] + command: [ "-config.file=/etc/tempo.yaml" ] volumes: - ./tempo.yaml:/etc/tempo.yaml:ro - tempo-data:/var/tempo @@ -82,32 +27,17 @@ services: - "3200:3200" # tempo - "4317" # otlp grpc - "4318" # otlp http + - "7946" # memberlist networks: - otel-network restart: unless-stopped - depends_on: - - redpanda healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:3200/ready"] + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:3200/ready" ] interval: 10s timeout: 5s retries: 3 start_period: 15s - redpanda: - image: redpandadata/redpanda:latest # for tempo ingest - container_name: redpanda - ports: - - "9092:9092" - networks: - - otel-network - restart: unless-stopped - command: > - redpanda start --overprovisioned - --mode=dev-container - --kafka-addr=PLAINTEXT://0.0.0.0:9092 - --advertise-kafka-addr=PLAINTEXT://redpanda:9092 - jaeger: image: jaegertracing/jaeger:latest container_name: jaeger @@ -125,12 +55,12 @@ services: - "14269:14269" # Admin/Metrics - "4317" # otlp grpc - "4318" # otlp http - command: ["--config", "/etc/jaeger/config.yml"] + command: [ "--config", "/etc/jaeger/config.yml" ] networks: - otel-network restart: unless-stopped healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:14269"] + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:14269" ] interval: 10s timeout: 5s retries: 3 @@ -156,7 +86,7 @@ services: networks: - otel-network healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"] + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy" ] interval: 10s timeout: 5s retries: 3 @@ -176,7 +106,7 @@ services: - otel-network restart: unless-stopped healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"] + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:3100/ready" ] interval: 15s timeout: 10s retries: 5 @@ -205,7 +135,7 @@ services: - prometheus - loki healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:13133"] + test: [ "CMD", "wget", "--spider", "-q", "http://localhost:13133" ] interval: 10s timeout: 5s retries: 3 @@ -246,13 +176,12 @@ services: - loki healthcheck: test: - ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"] + [ "CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health" ] interval: 10s timeout: 5s retries: 3 volumes: - rustfs-data: tempo-data: jaeger-data: prometheus-data: diff --git a/.docker/observability/grafana/provisioning/dashboards/dashboard.yml b/.docker/observability/grafana/provisioning/dashboards/dashboard.yml index eef4519d..4393f59b 100644 --- a/.docker/observability/grafana/provisioning/dashboards/dashboard.yml +++ b/.docker/observability/grafana/provisioning/dashboards/dashboard.yml @@ -1,3 +1,17 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: 1 providers: diff --git a/.docker/observability/grafana/provisioning/datasources/datasources.yaml b/.docker/observability/grafana/provisioning/datasources/datasources.yaml index f1ee62f1..8ede0e5a 100644 --- a/.docker/observability/grafana/provisioning/datasources/datasources.yaml +++ b/.docker/observability/grafana/provisioning/datasources/datasources.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: 1 datasources: diff --git a/.docker/observability/tempo-ha.yaml b/.docker/observability/tempo-ha.yaml new file mode 100644 index 00000000..67025b79 --- /dev/null +++ b/.docker/observability/tempo-ha.yaml @@ -0,0 +1,286 @@ +# Copyright 2024 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# High Availability Tempo Configuration for docker-compose-example-for-rustfs.yml +# Features: +# - Distributed architecture with multiple components +# - Kafka-based ingestion for fault tolerance +# - Replication factor of 3 for data resilience +# - Query frontend for load balancing +# - Metrics generation from traces +# - WAL for durability + +partition_ring_live_store: true +stream_over_http_enabled: true + +server: + http_listen_port: 3200 + http_server_read_timeout: 30s + http_server_write_timeout: 30s + grpc_server_max_recv_msg_size: 4194304 # 4MB + grpc_server_max_send_msg_size: 4194304 + log_level: info + log_format: json + +# Memberlist configuration for distributed mode +memberlist: + node_name: tempo + bind_port: 7946 + join_members: + - tempo:7946 + retransmit_factor: 4 + node_timeout: 15s + retransmit_interval: 300ms + dead_node_reclaim_time: 30s + +# Distributor configuration - receives traces and routes to ingesters +distributor: + ingester_write_path_enabled: true + kafka_write_path_enabled: true + rate_limit_bytes: 10MB + rate_limit_enabled: true + receivers: + otlp: + protocols: + grpc: + endpoint: "0.0.0.0:4317" + max_concurrent_streams: 0 + max_receive_message_size: 4194304 + http: + endpoint: "0.0.0.0:4318" + cors: + allowed_origins: + - "*" + max_age: 86400 + jaeger: + protocols: + grpc: + endpoint: "0.0.0.0:14250" + thrift_http: + endpoint: "0.0.0.0:14268" + zipkin: + endpoint: "0.0.0.0:9411" + ring: + kvstore: + store: memberlist + heartbeat_timeout: 5s + replication_factor: 3 + heartbeat_interval: 5s + +# Ingester configuration - stores traces and querying +ingester: + lifecycler: + address: tempo + ring: + kvstore: + store: memberlist + replication_factor: 3 + max_cache_freshness_per_sec: 10s + heartbeat_interval: 5s + heartbeat_timeout: 5s + num_tokens: 128 + tokens_file_path: /var/tempo/tokens.json + claim_on_rollout: true + trace_idle_period: 20s + max_block_bytes: 10_000_000 + max_block_duration: 10m + chunk_size_bytes: 1_000_000 + chunk_encoding: snappy + wal: + checkpoint_duration: 5s + max_wal_blocks: 4 + metrics: + enabled: true + level: block + target_info_duration: 15m + +# WAL configuration for data durability +wal: + checkpoint_duration: 5s + flush_on_shutdown: true + path: /var/tempo/wal + +# Kafka ingestion configuration - for high throughput scenarios +ingest: + enabled: true + kafka: + brokers: [ redpanda:9092 ] + topic: tempo-ingest + encoding: protobuf + consumer_group: tempo-ingest-consumer + session_timeout: 10s + rebalance_timeout: 1m + partition: auto + verbosity: 2 + +# Query frontend configuration - distributed querying +query_frontend: + compression: gzip + downstream_url: http://localhost:3200 + log_queries_longer_than: 5s + cache_uncompressed_bytes: 100MB + max_outstanding_requests_per_tenant: 100 + max_query_length: 48h + max_query_lookback: 30d + default_result_cache_ttl: 1m + result_cache: + cache: + enable_fifocache: true + default_validity: 1m + rf1_after: "1999-01-01T00:00:00Z" + mcp_server: + enabled: true + +# Querier configuration - queries traces +querier: + frontend_worker: + frontend_address: localhost:3200 + grpc_client_config: + max_recv_msg_size: 104857600 + max_concurrent_queries: 20 + max_metric_bytes_per_trace: 1MB + +# Query scheduler configuration - for distributed querying +query_scheduler: + use_scheduler_ring: false + +# Metrics generator configuration - generates metrics from traces +metrics_generator: + enabled: true + registry: + enabled: true + external_labels: + source: tempo + cluster: rustfs-docker-ha + environment: production + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + resource_to_telemetry_conversion: + enabled: true + processor: + batch: + timeout: 10s + send_batch_size: 1024 + memory_limiter: + check_interval: 5s + limit_mib: 512 + spike_limit_mib: 128 + processors: + - span-metrics + - local-blocks + - service-graphs + generate_native_histograms: both + +# Backend worker configuration +backend_worker: + backend_scheduler_addr: localhost:3200 + compaction: + block_retention: 24h + compacted_block_retention: 1h + ring: + kvstore: + store: memberlist + +# Backend scheduler configuration +backend_scheduler: + enabled: true + provider: + compaction: + compaction: + block_retention: 24h + compacted_block_retention: 1h + concurrency: 25 + v2_out_path: /var/tempo/blocks/compaction + +# Storage configuration - local backend with proper retention +storage: + trace: + backend: local + wal: + path: /var/tempo/wal + checkpoint_duration: 5s + flush_on_shutdown: true + local: + path: /var/tempo/blocks + bloom_filter_false_positive: 0.05 + bloom_shift: 4 + index: + downsample_bytes: 1000000 + page_size_bytes: 0 + cache_size_bytes: 0 + pool: + max_workers: 400 + queue_depth: 10000 + +# Compactor configuration - manages block compaction +compactor: + compaction: + block_retention: 168h # 7 days + compacted_block_retention: 1h + concurrency: 25 + v2_out_path: /var/tempo/blocks/compaction + shard_count: 32 + max_block_bytes: 107374182400 # 100GB + max_compaction_objects: 6000000 + max_time_per_tenant: 5m + block_size_bytes: 107374182400 + ring: + kvstore: + store: memberlist + heartbeat_interval: 5s + heartbeat_timeout: 5s + +# Limits configuration - rate limiting and quotas +limits: + max_traces_per_user: 10000 + max_bytes_per_trace: 10485760 # 10MB + max_search_bytes_per_trace: 0 + forgiving_oversize_traces: true + rate_limit_bytes: 10MB + rate_limit_enabled: true + ingestion_burst_size_bytes: 20MB + ingestion_rate_limit_bytes: 10MB + max_bytes_per_second: 10485760 + metrics_generator_max_active_series: 10000 + metrics_generator_max_churned_series: 10000 + metrics_generator_forta_out_of_order_ttl: 5m + +# Override configuration +overrides: + defaults: + metrics_generator: + processors: + - span-metrics + - local-blocks + - service-graphs + generate_native_histograms: both + max_active_series: 10000 + max_churned_series: 10000 + +# Usage reporting configuration +usage_report: + reporting_enabled: false + +# Tracing configuration for debugging +tracing: + enabled: true + jaeger: + sampler: + name: probabilistic + param: 0.1 + reporter_log_spans: false + diff --git a/.docker/observability/tempo.yaml b/.docker/observability/tempo.yaml index e74c6183..f2ba2096 100644 --- a/.docker/observability/tempo.yaml +++ b/.docker/observability/tempo.yaml @@ -19,9 +19,16 @@ server: http_listen_port: 3200 log_level: info +memberlist: + node_name: tempo + bind_port: 7946 + join_members: + - tempo:7946 + +# Distributor configuration - receives traces and writes directly to ingesters distributor: - ingester_write_path_enabled: false - kafka_write_path_enabled: true + ingester_write_path_enabled: true + kafka_write_path_enabled: false receivers: otlp: protocols: @@ -29,10 +36,21 @@ distributor: endpoint: "tempo:4317" http: endpoint: "tempo:4318" - #log_received_spans: - # enabled: true - # log_discarded_spans: - # enabled: true + ring: + kvstore: + store: memberlist + +# Ingester configuration - consumes from Kafka and stores traces +ingester: + lifecycler: + ring: + kvstore: + store: memberlist + replication_factor: 1 + tokens_file_path: /var/tempo/tokens.json + trace_idle_period: 10s + max_block_bytes: 1_000_000 + max_block_duration: 5m backend_scheduler: provider: @@ -49,7 +67,8 @@ backend_worker: store: memberlist querier: - query_live_store: true + frontend_worker: + frontend_address: tempo:3200 metrics_generator: registry: @@ -78,17 +97,28 @@ storage: overrides: defaults: metrics_generator: - processors: ["span-metrics", "service-graphs", "local-blocks"] + processors: [ "span-metrics", "service-graphs", "local-blocks" ] generate_native_histograms: both ingest: - enabled: true - kafka: - address: redpanda:9092 - topic: tempo-ingest + enabled: false + # Disabled because using direct ingester write path + # If you want Kafka path, enable this and set: + # kafka: + # brokers: [redpanda:9092] + # topic: tempo-ingest + # encoding: protobuf + # consumer_group: tempo-ingest-consumer block_builder: consume_cycle_duration: 30s +compactor: + compaction: + block_retention: 168h # 7 days + ring: + kvstore: + store: memberlist + usage_report: reporting_enabled: false diff --git a/Cargo.lock b/Cargo.lock index 75b8abef..747e7976 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4580,9 +4580,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -4595,9 +4595,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", @@ -4606,9 +4606,9 @@ dependencies = [ [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -6580,9 +6580,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -6982,9 +6982,9 @@ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" [[package]] name = "rmcp" -version = "0.17.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0ce46f9101dc911f07e1468084c057839d15b08040d110820c5513312ef56a" +checksum = "d2cb14cb9278a12eae884c9f3c0cfeca2cc28f361211206424a1d7abed95f090" dependencies = [ "async-trait", "base64 0.22.1", @@ -7004,9 +7004,9 @@ dependencies = [ [[package]] name = "rmcp-macros" -version = "0.17.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abad6f5f46e220e3bda2fc90fd1ad64c1c2a2bd716d52c845eb5c9c64cda7542" +checksum = "6a02ea81d9482b07e1fe156ac7cf98b6823d51fb84531936a5e1cbb4eec31ad5" dependencies = [ "darling 0.23.0", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 9820f95c..6ceae3a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,7 @@ members = [ "crates/workers", # Worker thread pools and task scheduling "crates/zip", # ZIP file handling and compression ] -resolver = "2" +resolver = "3" [workspace.package] edition = "2024" @@ -142,7 +142,7 @@ flatbuffers = "25.12.19" form_urlencoded = "1.2.2" prost = "0.14.3" quick-xml = "0.39.2" -rmcp = { version = "0.17.0" } +rmcp = { version = "1.1.0" } rmp = { version = "0.8.15" } rmp-serde = { version = "1.3.1" } serde = { version = "1.0.228", features = ["derive"] } @@ -171,7 +171,7 @@ zeroize = { version = "1.8.2", features = ["derive"] } # Time and Date chrono = { version = "0.4.44", features = ["serde"] } humantime = "2.3.0" -jiff = { version = "0.2.22", features = ["serde"] } +jiff = { version = "0.2.23", features = ["serde"] } time = { version = "0.3.47", features = ["std", "parsing", "formatting", "macros", "serde"] } # Utilities and Tools diff --git a/crates/mcp/src/server.rs b/crates/mcp/src/server.rs index 1b43c7fa..1088761c 100644 --- a/crates/mcp/src/server.rs +++ b/crates/mcp/src/server.rs @@ -16,7 +16,7 @@ use anyhow::Result; use rmcp::{ ErrorData, RoleServer, ServerHandler, handler::server::{router::tool::ToolRouter, wrapper::Parameters}, - model::{Implementation, ProtocolVersion, ServerCapabilities, ServerInfo, ToolsCapability}, + model::{Implementation, ProtocolVersion, ServerCapabilities, ServerInfo}, service::{NotificationContext, RequestContext}, tool, tool_handler, tool_router, }; @@ -604,21 +604,10 @@ impl RustfsMcpServer { #[tool_handler(router = self.tool_router)] impl ServerHandler for RustfsMcpServer { fn get_info(&self) -> ServerInfo { - ServerInfo { - protocol_version: ProtocolVersion::V_2024_11_05, - capabilities: ServerCapabilities { - tools: Some(ToolsCapability { - list_changed: Some(false), - }), - ..Default::default() - }, - instructions: Some("RustFS MCP Server providing S3 operations through Model Context Protocol".into()), - server_info: Implementation { - name: "rustfs-mcp-server".into(), - version: env!("CARGO_PKG_VERSION").into(), - ..Default::default() - }, - } + ServerInfo::new(ServerCapabilities::builder().enable_tools().build()) + .with_instructions("RustFS MCP Server providing S3 operations through Model Context Protocol") + .with_server_info(Implementation::new("rustfs-mcp-server", env!("CARGO_PKG_VERSION"))) + .with_protocol_version(ProtocolVersion::LATEST) } async fn ping(&self, _ctx: RequestContext) -> Result<(), ErrorData> { diff --git a/docker-compose.yml b/docker-compose.yml index be2aefa8..2a668070 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -211,7 +211,7 @@ services: environment: - TZ=Asia/Shanghai volumes: - - ./.docker/observability/loki-config.yaml:/etc/loki/local-config.yaml:ro + - ./.docker/observability/loki.yaml:/etc/loki/local-config.yaml:ro - loki_data:/loki ports: - "3100:3100" diff --git a/scripts/run.sh b/scripts/run.sh index d54832d5..e57e34dc 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -41,7 +41,7 @@ fi # export RUSTFS_ERASURE_SET_DRIVE_COUNT=5 -# export RUSTFS_STORAGE_CLASS_INLINE_BLOCK="512 KB" +# export RUSTFS_STORAGE_CLASS_INLINE_BLOCK="512 KB"√ export RUSTFS_VOLUMES="./target/volume/test{1...4}" # export RUSTFS_VOLUMES="./target/volume/test" @@ -61,14 +61,15 @@ export RUSTFS_CONSOLE_ADDRESS=":9001" #export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=http://localhost:9090/api/v1/otlp/v1/metrics #export RUSTFS_OBS_LOG_ENDPOINT=http://loki:3100/otlp/v1/logs # OpenTelemetry Collector logs address http://loki:3100/otlp/v1/logs #export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=http://loki:3100/otlp/v1/logs +#export RUSTFS_OBS_PROFILING_ENDPOINT=http://localhost:4040 # OpenTelemetry Collector profiling address #export RUSTFS_OBS_USE_STDOUT=true # Whether to use standard output #export RUSTFS_OBS_SAMPLE_RATIO=2.0 # Sample ratio, between 0.0-1.0, 0.0 means no sampling, 1.0 means full sampling #export RUSTFS_OBS_METER_INTERVAL=1 # Sampling interval in seconds #export RUSTFS_OBS_SERVICE_NAME=rustfs # Service name #export RUSTFS_OBS_SERVICE_VERSION=0.1.0 # Service version -export RUSTFS_OBS_ENVIRONMENT=develop # Environment name development, staging, production +export RUSTFS_OBS_ENVIRONMENT=production # Environment name development, staging, production export RUSTFS_OBS_LOGGER_LEVEL=info # Log level, supports trace, debug, info, warn, error -export RUSTFS_OBS_LOG_STDOUT_ENABLED=true # Whether to enable local stdout logging +export RUSTFS_OBS_LOG_STDOUT_ENABLED=false # Whether to enable local stdout logging export RUSTFS_OBS_LOG_DIRECTORY="$current_dir/deploy/logs" # Log directory export RUSTFS_OBS_LOG_ROTATION_TIME="minutely" # Log rotation time unit, can be "minutely", "hourly", "daily" export RUSTFS_OBS_LOG_KEEP_FILES=30 # Number of log files to keep