From 564a02f34475aca0f1e879e5cf5aec5ea016fb81 Mon Sep 17 00:00:00 2001 From: houseme Date: Sun, 13 Jul 2025 20:22:46 +0800 Subject: [PATCH] feat(obs, net): Add Tempo service and enable dual-stack listener (#192) This commit introduces two key enhancements: the integration of Grafana Tempo for distributed tracing and the implementation of a dual-stack TCP listener for improved network compatibility. - **Observability**: - Adds the `tempo` service to the `docker-compose.yml` observability stack. - Tempo is configured to collect and store traces, integrating with the existing OpenTelemetry setup. - A custom `tempo-entrypoint.sh` script is included to manage volume permissions on startup. - **Networking**: - Modifies `http.rs` to support dual-stack (IPv4/IPv6) connections on a single socket. - By setting the `IPV6_V6ONLY` socket option to `false`, the server can now accept both IPv6 and IPv4-mapped IPv6 traffic, enhancing cross-platform support. --- .docker/observability/docker-compose.yml | 32 ++++++-- .../observability/grafana-datasources.yaml | 32 ++++++++ .../observability/otel-collector-config.yaml | 6 +- .docker/observability/prometheus.yml | 7 +- .docker/observability/tempo-data/.gitignore | 1 + .docker/observability/tempo-entrypoint.sh | 8 ++ .docker/observability/tempo.yaml | 55 ++++++++++++++ Cargo.lock | 27 +++++++ Cargo.toml | 1 + rustfs/Cargo.toml | 1 + rustfs/src/server/http.rs | 73 ++++++++++++++++++- 11 files changed, 232 insertions(+), 11 deletions(-) create mode 100644 .docker/observability/grafana-datasources.yaml create mode 100644 .docker/observability/tempo-data/.gitignore create mode 100755 .docker/observability/tempo-entrypoint.sh create mode 100644 .docker/observability/tempo.yaml diff --git a/.docker/observability/docker-compose.yml b/.docker/observability/docker-compose.yml index 84f5b933..0c843c6f 100644 --- a/.docker/observability/docker-compose.yml +++ b/.docker/observability/docker-compose.yml @@ -13,6 +13,22 @@ # limitations under the License. services: + + tempo: + image: grafana/tempo:latest + #user: root # The container must be started with root to execute chown in the script + #entrypoint: [ "/etc/tempo/entrypoint.sh" ] # Specify a custom entry point + command: [ "-config.file=/etc/tempo.yaml" ] # This is passed as a parameter to the entry point script + volumes: + - ./tempo-entrypoint.sh:/etc/tempo/entrypoint.sh # Mount entry point script + - ./tempo.yaml:/etc/tempo.yaml + - ./tempo-data:/var/tempo + ports: + - "3200:3200" # tempo + - "24317:4317" # otlp grpc + networks: + - otel-network + otel-collector: image: otel/opentelemetry-collector-contrib:0.129.1 environment: @@ -20,13 +36,13 @@ services: volumes: - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml ports: - - 1888:1888 - - 8888:8888 - - 8889:8889 - - 13133:13133 - - 4317:4317 - - 4318:4318 - - 55679:55679 + - "1888:1888" + - "8888:8888" + - "8889:8889" + - "13133:13133" + - "4317:4317" + - "4318:4318" + - "55679:55679" networks: - otel-network jaeger: @@ -64,6 +80,8 @@ services: image: grafana/grafana:12.0.2 ports: - "3000:3000" # Web UI + volumes: + - ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml environment: - GF_SECURITY_ADMIN_PASSWORD=admin - TZ=Asia/Shanghai diff --git a/.docker/observability/grafana-datasources.yaml b/.docker/observability/grafana-datasources.yaml new file mode 100644 index 00000000..fca5cff4 --- /dev/null +++ b/.docker/observability/grafana-datasources.yaml @@ -0,0 +1,32 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + basicAuth: false + isDefault: false + version: 1 + editable: false + jsonData: + httpMethod: GET + - name: Tempo + type: tempo + access: proxy + orgId: 1 + url: http://tempo:3200 + basicAuth: false + isDefault: true + version: 1 + editable: false + apiVersion: 1 + uid: tempo + jsonData: + httpMethod: GET + serviceMap: + datasourceUid: prometheus + streamingEnabled: + search: true \ No newline at end of file diff --git a/.docker/observability/otel-collector-config.yaml b/.docker/observability/otel-collector-config.yaml index fb813fe3..83042f6e 100644 --- a/.docker/observability/otel-collector-config.yaml +++ b/.docker/observability/otel-collector-config.yaml @@ -33,6 +33,10 @@ exporters: endpoint: "jaeger:4317" # Jaeger 的 OTLP gRPC 端点 tls: insecure: true # 开发环境禁用 TLS,生产环境需配置证书 + otlp/tempo: # OTLP 导出器,用于跟踪数据 + endpoint: "tempo:4317" # tempo 的 OTLP gRPC 端点 + tls: + insecure: true # 开发环境禁用 TLS,生产环境需配置证书 prometheus: # Prometheus 导出器,用于指标数据 endpoint: "0.0.0.0:8889" # Prometheus 刮取端点 namespace: "rustfs" # 指标前缀 @@ -53,7 +57,7 @@ service: traces: receivers: [ otlp ] processors: [ memory_limiter,batch ] - exporters: [ otlp/traces ] + exporters: [ otlp/traces,otlp/tempo ] metrics: receivers: [ otlp ] processors: [ batch ] diff --git a/.docker/observability/prometheus.yml b/.docker/observability/prometheus.yml index 0082481d..8d7526d7 100644 --- a/.docker/observability/prometheus.yml +++ b/.docker/observability/prometheus.yml @@ -18,8 +18,11 @@ global: scrape_configs: - job_name: 'otel-collector' static_configs: - - targets: ['otel-collector:8888'] # 从 Collector 刮取指标 + - targets: [ 'otel-collector:8888' ] # 从 Collector 刮取指标 - job_name: 'otel-metrics' static_configs: - - targets: ['otel-collector:8889'] # 应用指标 + - targets: [ 'otel-collector:8889' ] # 应用指标 + - job_name: 'tempo' + static_configs: + - targets: [ 'tempo:3200' ] diff --git a/.docker/observability/tempo-data/.gitignore b/.docker/observability/tempo-data/.gitignore new file mode 100644 index 00000000..f59ec20a --- /dev/null +++ b/.docker/observability/tempo-data/.gitignore @@ -0,0 +1 @@ +* \ No newline at end of file diff --git a/.docker/observability/tempo-entrypoint.sh b/.docker/observability/tempo-entrypoint.sh new file mode 100755 index 00000000..002c8e1c --- /dev/null +++ b/.docker/observability/tempo-entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# Run as root to fix directory permissions +chown -R 10001:10001 /var/tempo + +# Use su-exec (a lightweight sudo/gosu alternative, commonly used in Alpine mirroring) +# Switch to user 10001 and execute the original command (CMD) passed to the script +# "$@" represents all parameters passed to this script, i.e. command in docker-compose +exec su-exec 10001:10001 /tempo "$@" \ No newline at end of file diff --git a/.docker/observability/tempo.yaml b/.docker/observability/tempo.yaml new file mode 100644 index 00000000..01d13b8f --- /dev/null +++ b/.docker/observability/tempo.yaml @@ -0,0 +1,55 @@ +stream_over_http_enabled: true +server: + http_listen_port: 3200 + log_level: info + +query_frontend: + search: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + metadata_slo: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + trace_by_id: + duration_slo: 5s + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: "tempo:4317" + +ingester: + max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally + +compactor: + compaction: + block_retention: 1h # overall Tempo trace retention. set for demo purposes + +metrics_generator: + registry: + external_labels: + source: tempo + cluster: docker-compose + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + traces_storage: + path: /var/tempo/generator/traces + +storage: + trace: + backend: local # backend configuration to use + wal: + path: /var/tempo/wal # where to store the wal locally + local: + path: /var/tempo/blocks + +overrides: + defaults: + metrics_generator: + processors: [ service-graphs, span-metrics, local-blocks ] # enables metrics generator + generate_native_histograms: both \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 47e9fedb..51b8bbed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3540,6 +3540,18 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3d8a32ae18130a3c84dd492d4215c3d913c3b07c6b63c2eb3eb7ff1101ab7bf" +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "enumflags2" version = "0.7.12" @@ -7899,6 +7911,7 @@ dependencies = [ "serde_urlencoded", "shadow-rs", "socket2 0.6.0", + "sysctl", "thiserror 2.0.12", "tikv-jemallocator", "time", @@ -9548,6 +9561,20 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "sysctl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" +dependencies = [ + "bitflags 2.9.1", + "byteorder", + "enum-as-inner", + "libc", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "sysinfo" version = "0.36.0" diff --git a/Cargo.toml b/Cargo.toml index 64dbb6d4..5d557b11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -225,6 +225,7 @@ snap = "1.1.1" socket2 = "0.6.0" strum = { version = "0.27.1", features = ["derive"] } sysinfo = "0.36.0" +sysctl = "0.6.0" tempfile = "3.20.0" temp-env = "0.3.6" test-case = "3.3.1" diff --git a/rustfs/Cargo.toml b/rustfs/Cargo.toml index c8a562d0..97ea6a07 100644 --- a/rustfs/Cargo.toml +++ b/rustfs/Cargo.toml @@ -81,6 +81,7 @@ serde_json.workspace = true serde_urlencoded = { workspace = true } shadow-rs = { workspace = true, features = ["build", "metadata"] } socket2 = { workspace = true } +sysctl = { workspace = true } thiserror = { workspace = true } tracing.workspace = true time = { workspace = true, features = ["parsing", "formatting", "serde"] } diff --git a/rustfs/src/server/http.rs b/rustfs/src/server/http.rs index 34a20813..1beef881 100644 --- a/rustfs/src/server/http.rs +++ b/rustfs/src/server/http.rs @@ -64,7 +64,37 @@ pub async fn start_http_server( let server_address = server_addr.to_string(); // The listening address and port are obtained from the parameters - let listener = TcpListener::bind(server_address.clone()).await?; + // let listener = TcpListener::bind(server_address.clone()).await?; + + // The listening address and port are obtained from the parameters + let listener = { + let mut server_addr = server_addr; + let mut socket = socket2::Socket::new( + socket2::Domain::for_address(server_addr), + socket2::Type::STREAM, + Some(socket2::Protocol::TCP), + )?; + + if server_addr.is_ipv6() { + if let Err(e) = socket.set_only_v6(false) { + warn!("Failed to set IPV6_V6ONLY=false, falling back to IPv4-only: {}", e); + // Fallback to a new IPv4 socket if setting dual-stack fails. + let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port()); + server_addr = ipv4_addr; + socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?; + } + } + + // Common setup for both IPv4 and successful dual-stack IPv6 + let backlog = get_listen_backlog(); + socket.set_reuse_address(true)?; + // Set the socket to non-blocking before passing it to Tokio. + socket.set_nonblocking(true)?; + socket.bind(&server_addr.into())?; + socket.listen(backlog)?; + TcpListener::from_std(socket.into())? + }; + // Obtain the listener address let local_addr: SocketAddr = listener.local_addr()?; debug!("Listening on: {}", local_addr); @@ -427,3 +457,44 @@ fn check_auth(req: Request<()>) -> std::result::Result, Status> { _ => Err(Status::unauthenticated("No valid auth token")), } } + +/// Determines the listen backlog size. +/// +/// It tries to read the system's maximum connection queue length (`somaxconn`). +/// If reading fails, it falls back to a default value (e.g., 1024). +/// This makes the backlog size adaptive to the system configuration. +fn get_listen_backlog() -> i32 { + const DEFAULT_BACKLOG: i32 = 1024; + + #[cfg(target_os = "linux")] + { + // For Linux, read from /proc/sys/net/core/somaxconn + match std::fs::read_to_string("/proc/sys/net/core/somaxconn") { + Ok(s) => s.trim().parse().unwrap_or(DEFAULT_BACKLOG), + Err(_) => DEFAULT_BACKLOG, + } + } + #[cfg(any(target_os = "macos", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))] + { + // For macOS and BSD variants, use sysctl + use sysctl::Sysctl; + match sysctl::Ctl::new("kern.ipc.somaxconn") { + Ok(ctl) => match ctl.value() { + Ok(sysctl::CtlValue::Int(val)) => val, + _ => DEFAULT_BACKLOG, + }, + Err(_) => DEFAULT_BACKLOG, + } + } + #[cfg(not(any( + target_os = "linux", + target_os = "macos", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd" + )))] + { + // Fallback for Windows and other operating systems + DEFAULT_BACKLOG + } +}