Files
rustfs/.docker/observability/docker-compose.yml
2026-02-27 01:21:12 +08:00

203 lines
5.3 KiB
YAML

# Copyright 2024 RustFS Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
services:
# --- Tracing ---
tempo-init:
image: busybox:latest
command: [ "sh", "-c", "chown -R 10001:10001 /var/tempo" ]
volumes:
- ./tempo-data:/var/tempo
user: root
networks:
- otel-network
restart: "no"
tempo:
image: grafana/tempo:latest
user: "10001"
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./tempo.yaml:/etc/tempo.yaml:ro
- ./tempo-data:/var/tempo
ports:
- "3200:3200" # tempo
- "4317" # otlp grpc
- "4318" # otlp http
restart: unless-stopped
networks:
- otel-network
healthcheck:
test: [ "CMD-SHELL", "wget --spider -q http://localhost:3200/metrics || exit 1" ]
interval: 10s
timeout: 5s
retries: 5
start_period: 40s
jaeger:
image: jaegertracing/jaeger:latest
environment:
- TZ=Asia/Shanghai
- SPAN_STORAGE_TYPE=badger
- BADGER_EPHEMERAL=false
- BADGER_DIRECTORY_VALUE=/badger/data
- BADGER_DIRECTORY_KEY=/badger/key
- COLLECTOR_OTLP_ENABLED=true
volumes:
- ./jaeger-data:/badger
ports:
- "16686:16686" # Web UI
- "14269:14269" # Admin/Metrics
- "4317"
- "4318"
networks:
- otel-network
healthcheck:
test: [ "CMD-SHELL", "wget --spider -q http://localhost:14269 || exit 1" ]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
# --- Metrics ---
prometheus:
image: prom/prometheus:latest
environment:
- TZ=Asia/Shanghai
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus-data:/prometheus
ports:
- "9090:9090"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.enable-otlp-receiver'
- '--web.enable-remote-write-receiver'
- '--enable-feature=promql-experimental-functions'
- '--storage.tsdb.min-block-duration=2h'
- '--storage.tsdb.max-block-duration=2h'
- '--log.level=info'
- '--storage.tsdb.retention.time=30d'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
restart: unless-stopped
networks:
- otel-network
healthcheck:
test: [ "CMD-SHELL", "wget --spider -q http://localhost:9090/-/healthy || exit 1" ]
interval: 10s
timeout: 5s
retries: 3
# --- Logging ---
loki:
image: grafana/loki:latest
environment:
- TZ=Asia/Shanghai
volumes:
- ./loki-config.yaml:/etc/loki/local-config.yaml:ro
- ./loki-data:/loki
ports:
- "3100:3100"
command: -config.file=/etc/loki/local-config.yaml
networks:
- otel-network
healthcheck:
test: [ "CMD-SHELL", "wget --spider -q http://localhost:3100/metrics || exit 1" ]
interval: 15s
timeout: 10s
retries: 5
start_period: 60s
# --- Collection ---
otel-collector:
image: otel/opentelemetry-collector-contrib:latest
environment:
- TZ=Asia/Shanghai
volumes:
- ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro
ports:
- "1888:1888" # pprof
- "8888:8888" # Prometheus metrics for Collector
- "8889:8889" # Prometheus metrics for application indicators
- "13133:13133" # health check
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
- "55679:55679" # zpages
networks:
- otel-network
depends_on:
- tempo
- jaeger
- prometheus
- loki
healthcheck:
test: [ "CMD-SHELL", "wget --spider -q http://localhost:13133 || exit 1" ]
interval: 10s
timeout: 5s
retries: 3
start_period: 20s
# --- Visualization ---
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning
- ./grafana/dashboards:/var/lib/grafana/dashboards
- ./grafana-data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_SECURITY_ADMIN_USER=admin
- TZ=Asia/Shanghai
- GF_INSTALL_PLUGINS=grafana-pyroscope-datasource
- GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/home.json
restart: unless-stopped
networks:
- otel-network
depends_on:
- prometheus
- tempo
- loki
healthcheck:
test: [ "CMD-SHELL", "wget --spider -q http://localhost:3000/api/health || exit 1" ]
interval: 10s
timeout: 5s
retries: 3
volumes:
prometheus-data:
tempo-data:
loki-data:
jaeger-data:
grafana-data:
networks:
otel-network:
driver: bridge
name: "network_otel"
ipam:
config:
- subnet: 172.28.0.0/16
driver_opts:
com.docker.network.enable_ipv6: "true"