feat(obs, net): Add Tempo service and enable dual-stack listener (#192)

This commit introduces two key enhancements: the integration of Grafana Tempo for distributed tracing and the implementation of a dual-stack TCP listener for improved network compatibility.

- **Observability**:
  - Adds the `tempo` service to the `docker-compose.yml` observability stack.
  - Tempo is configured to collect and store traces, integrating with the existing OpenTelemetry setup.
  - A custom `tempo-entrypoint.sh` script is included to manage volume permissions on startup.

- **Networking**:
  - Modifies `http.rs` to support dual-stack (IPv4/IPv6) connections on a single socket.
  - By setting the `IPV6_V6ONLY` socket option to `false`, the server can now accept both IPv6 and IPv4-mapped IPv6 traffic, enhancing cross-platform support.
This commit is contained in:
houseme
2025-07-13 20:22:46 +08:00
committed by GitHub
parent 5b582a4234
commit 564a02f344
11 changed files with 232 additions and 11 deletions

View File

@@ -13,6 +13,22 @@
# limitations under the License.
services:
tempo:
image: grafana/tempo:latest
#user: root # The container must be started with root to execute chown in the script
#entrypoint: [ "/etc/tempo/entrypoint.sh" ] # Specify a custom entry point
command: [ "-config.file=/etc/tempo.yaml" ] # This is passed as a parameter to the entry point script
volumes:
- ./tempo-entrypoint.sh:/etc/tempo/entrypoint.sh # Mount entry point script
- ./tempo.yaml:/etc/tempo.yaml
- ./tempo-data:/var/tempo
ports:
- "3200:3200" # tempo
- "24317:4317" # otlp grpc
networks:
- otel-network
otel-collector:
image: otel/opentelemetry-collector-contrib:0.129.1
environment:
@@ -20,13 +36,13 @@ services:
volumes:
- ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- 1888:1888
- 8888:8888
- 8889:8889
- 13133:13133
- 4317:4317
- 4318:4318
- 55679:55679
- "1888:1888"
- "8888:8888"
- "8889:8889"
- "13133:13133"
- "4317:4317"
- "4318:4318"
- "55679:55679"
networks:
- otel-network
jaeger:
@@ -64,6 +80,8 @@ services:
image: grafana/grafana:12.0.2
ports:
- "3000:3000" # Web UI
volumes:
- ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- TZ=Asia/Shanghai

View File

@@ -0,0 +1,32 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: false
version: 1
editable: false
jsonData:
httpMethod: GET
- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo:3200
basicAuth: false
isDefault: true
version: 1
editable: false
apiVersion: 1
uid: tempo
jsonData:
httpMethod: GET
serviceMap:
datasourceUid: prometheus
streamingEnabled:
search: true

View File

@@ -33,6 +33,10 @@ exporters:
endpoint: "jaeger:4317" # Jaeger 的 OTLP gRPC 端点
tls:
insecure: true # 开发环境禁用 TLS生产环境需配置证书
otlp/tempo: # OTLP 导出器,用于跟踪数据
endpoint: "tempo:4317" # tempo 的 OTLP gRPC 端点
tls:
insecure: true # 开发环境禁用 TLS生产环境需配置证书
prometheus: # Prometheus 导出器,用于指标数据
endpoint: "0.0.0.0:8889" # Prometheus 刮取端点
namespace: "rustfs" # 指标前缀
@@ -53,7 +57,7 @@ service:
traces:
receivers: [ otlp ]
processors: [ memory_limiter,batch ]
exporters: [ otlp/traces ]
exporters: [ otlp/traces,otlp/tempo ]
metrics:
receivers: [ otlp ]
processors: [ batch ]

View File

@@ -18,8 +18,11 @@ global:
scrape_configs:
- job_name: 'otel-collector'
static_configs:
- targets: ['otel-collector:8888'] # 从 Collector 刮取指标
- targets: [ 'otel-collector:8888' ] # 从 Collector 刮取指标
- job_name: 'otel-metrics'
static_configs:
- targets: ['otel-collector:8889'] # 应用指标
- targets: [ 'otel-collector:8889' ] # 应用指标
- job_name: 'tempo'
static_configs:
- targets: [ 'tempo:3200' ]

View File

@@ -0,0 +1 @@
*

View File

@@ -0,0 +1,8 @@
#!/bin/sh
# Run as root to fix directory permissions
chown -R 10001:10001 /var/tempo
# Use su-exec (a lightweight sudo/gosu alternative, commonly used in Alpine mirroring)
# Switch to user 10001 and execute the original command (CMD) passed to the script
# "$@" represents all parameters passed to this script, i.e. command in docker-compose
exec su-exec 10001:10001 /tempo "$@"

View File

@@ -0,0 +1,55 @@
stream_over_http_enabled: true
server:
http_listen_port: 3200
log_level: info
query_frontend:
search:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
metadata_slo:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
trace_by_id:
duration_slo: 5s
distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: "tempo:4317"
ingester:
max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally
compactor:
compaction:
block_retention: 1h # overall Tempo trace retention. set for demo purposes
metrics_generator:
registry:
external_labels:
source: tempo
cluster: docker-compose
storage:
path: /var/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true
traces_storage:
path: /var/tempo/generator/traces
storage:
trace:
backend: local # backend configuration to use
wal:
path: /var/tempo/wal # where to store the wal locally
local:
path: /var/tempo/blocks
overrides:
defaults:
metrics_generator:
processors: [ service-graphs, span-metrics, local-blocks ] # enables metrics generator
generate_native_histograms: both

27
Cargo.lock generated
View File

@@ -3540,6 +3540,18 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3d8a32ae18130a3c84dd492d4215c3d913c3b07c6b63c2eb3eb7ff1101ab7bf"
[[package]]
name = "enum-as-inner"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "enumflags2"
version = "0.7.12"
@@ -7899,6 +7911,7 @@ dependencies = [
"serde_urlencoded",
"shadow-rs",
"socket2 0.6.0",
"sysctl",
"thiserror 2.0.12",
"tikv-jemallocator",
"time",
@@ -9548,6 +9561,20 @@ dependencies = [
"syn 2.0.104",
]
[[package]]
name = "sysctl"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [
"bitflags 2.9.1",
"byteorder",
"enum-as-inner",
"libc",
"thiserror 1.0.69",
"walkdir",
]
[[package]]
name = "sysinfo"
version = "0.36.0"

View File

@@ -225,6 +225,7 @@ snap = "1.1.1"
socket2 = "0.6.0"
strum = { version = "0.27.1", features = ["derive"] }
sysinfo = "0.36.0"
sysctl = "0.6.0"
tempfile = "3.20.0"
temp-env = "0.3.6"
test-case = "3.3.1"

View File

@@ -81,6 +81,7 @@ serde_json.workspace = true
serde_urlencoded = { workspace = true }
shadow-rs = { workspace = true, features = ["build", "metadata"] }
socket2 = { workspace = true }
sysctl = { workspace = true }
thiserror = { workspace = true }
tracing.workspace = true
time = { workspace = true, features = ["parsing", "formatting", "serde"] }

View File

@@ -64,7 +64,37 @@ pub async fn start_http_server(
let server_address = server_addr.to_string();
// The listening address and port are obtained from the parameters
let listener = TcpListener::bind(server_address.clone()).await?;
// let listener = TcpListener::bind(server_address.clone()).await?;
// The listening address and port are obtained from the parameters
let listener = {
let mut server_addr = server_addr;
let mut socket = socket2::Socket::new(
socket2::Domain::for_address(server_addr),
socket2::Type::STREAM,
Some(socket2::Protocol::TCP),
)?;
if server_addr.is_ipv6() {
if let Err(e) = socket.set_only_v6(false) {
warn!("Failed to set IPV6_V6ONLY=false, falling back to IPv4-only: {}", e);
// Fallback to a new IPv4 socket if setting dual-stack fails.
let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port());
server_addr = ipv4_addr;
socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?;
}
}
// Common setup for both IPv4 and successful dual-stack IPv6
let backlog = get_listen_backlog();
socket.set_reuse_address(true)?;
// Set the socket to non-blocking before passing it to Tokio.
socket.set_nonblocking(true)?;
socket.bind(&server_addr.into())?;
socket.listen(backlog)?;
TcpListener::from_std(socket.into())?
};
// Obtain the listener address
let local_addr: SocketAddr = listener.local_addr()?;
debug!("Listening on: {}", local_addr);
@@ -427,3 +457,44 @@ fn check_auth(req: Request<()>) -> std::result::Result<Request<()>, Status> {
_ => Err(Status::unauthenticated("No valid auth token")),
}
}
/// Determines the listen backlog size.
///
/// It tries to read the system's maximum connection queue length (`somaxconn`).
/// If reading fails, it falls back to a default value (e.g., 1024).
/// This makes the backlog size adaptive to the system configuration.
fn get_listen_backlog() -> i32 {
const DEFAULT_BACKLOG: i32 = 1024;
#[cfg(target_os = "linux")]
{
// For Linux, read from /proc/sys/net/core/somaxconn
match std::fs::read_to_string("/proc/sys/net/core/somaxconn") {
Ok(s) => s.trim().parse().unwrap_or(DEFAULT_BACKLOG),
Err(_) => DEFAULT_BACKLOG,
}
}
#[cfg(any(target_os = "macos", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))]
{
// For macOS and BSD variants, use sysctl
use sysctl::Sysctl;
match sysctl::Ctl::new("kern.ipc.somaxconn") {
Ok(ctl) => match ctl.value() {
Ok(sysctl::CtlValue::Int(val)) => val,
_ => DEFAULT_BACKLOG,
},
Err(_) => DEFAULT_BACKLOG,
}
}
#[cfg(not(any(
target_os = "linux",
target_os = "macos",
target_os = "freebsd",
target_os = "netbsd",
target_os = "openbsd"
)))]
{
// Fallback for Windows and other operating systems
DEFAULT_BACKLOG
}
}