mirror of
https://github.com/rustfs/rustfs.git
synced 2026-01-17 01:30:33 +00:00
feat(obs, net): Add Tempo service and enable dual-stack listener (#192)
This commit introduces two key enhancements: the integration of Grafana Tempo for distributed tracing and the implementation of a dual-stack TCP listener for improved network compatibility. - **Observability**: - Adds the `tempo` service to the `docker-compose.yml` observability stack. - Tempo is configured to collect and store traces, integrating with the existing OpenTelemetry setup. - A custom `tempo-entrypoint.sh` script is included to manage volume permissions on startup. - **Networking**: - Modifies `http.rs` to support dual-stack (IPv4/IPv6) connections on a single socket. - By setting the `IPV6_V6ONLY` socket option to `false`, the server can now accept both IPv6 and IPv4-mapped IPv6 traffic, enhancing cross-platform support.
This commit is contained in:
@@ -13,6 +13,22 @@
|
||||
# limitations under the License.
|
||||
|
||||
services:
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:latest
|
||||
#user: root # The container must be started with root to execute chown in the script
|
||||
#entrypoint: [ "/etc/tempo/entrypoint.sh" ] # Specify a custom entry point
|
||||
command: [ "-config.file=/etc/tempo.yaml" ] # This is passed as a parameter to the entry point script
|
||||
volumes:
|
||||
- ./tempo-entrypoint.sh:/etc/tempo/entrypoint.sh # Mount entry point script
|
||||
- ./tempo.yaml:/etc/tempo.yaml
|
||||
- ./tempo-data:/var/tempo
|
||||
ports:
|
||||
- "3200:3200" # tempo
|
||||
- "24317:4317" # otlp grpc
|
||||
networks:
|
||||
- otel-network
|
||||
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:0.129.1
|
||||
environment:
|
||||
@@ -20,13 +36,13 @@ services:
|
||||
volumes:
|
||||
- ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
|
||||
ports:
|
||||
- 1888:1888
|
||||
- 8888:8888
|
||||
- 8889:8889
|
||||
- 13133:13133
|
||||
- 4317:4317
|
||||
- 4318:4318
|
||||
- 55679:55679
|
||||
- "1888:1888"
|
||||
- "8888:8888"
|
||||
- "8889:8889"
|
||||
- "13133:13133"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "55679:55679"
|
||||
networks:
|
||||
- otel-network
|
||||
jaeger:
|
||||
@@ -64,6 +80,8 @@ services:
|
||||
image: grafana/grafana:12.0.2
|
||||
ports:
|
||||
- "3000:3000" # Web UI
|
||||
volumes:
|
||||
- ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
- TZ=Asia/Shanghai
|
||||
|
||||
32
.docker/observability/grafana-datasources.yaml
Normal file
32
.docker/observability/grafana-datasources.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://prometheus:9090
|
||||
basicAuth: false
|
||||
isDefault: false
|
||||
version: 1
|
||||
editable: false
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://tempo:3200
|
||||
basicAuth: false
|
||||
isDefault: true
|
||||
version: 1
|
||||
editable: false
|
||||
apiVersion: 1
|
||||
uid: tempo
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
serviceMap:
|
||||
datasourceUid: prometheus
|
||||
streamingEnabled:
|
||||
search: true
|
||||
@@ -33,6 +33,10 @@ exporters:
|
||||
endpoint: "jaeger:4317" # Jaeger 的 OTLP gRPC 端点
|
||||
tls:
|
||||
insecure: true # 开发环境禁用 TLS,生产环境需配置证书
|
||||
otlp/tempo: # OTLP 导出器,用于跟踪数据
|
||||
endpoint: "tempo:4317" # tempo 的 OTLP gRPC 端点
|
||||
tls:
|
||||
insecure: true # 开发环境禁用 TLS,生产环境需配置证书
|
||||
prometheus: # Prometheus 导出器,用于指标数据
|
||||
endpoint: "0.0.0.0:8889" # Prometheus 刮取端点
|
||||
namespace: "rustfs" # 指标前缀
|
||||
@@ -53,7 +57,7 @@ service:
|
||||
traces:
|
||||
receivers: [ otlp ]
|
||||
processors: [ memory_limiter,batch ]
|
||||
exporters: [ otlp/traces ]
|
||||
exporters: [ otlp/traces,otlp/tempo ]
|
||||
metrics:
|
||||
receivers: [ otlp ]
|
||||
processors: [ batch ]
|
||||
|
||||
@@ -18,8 +18,11 @@ global:
|
||||
scrape_configs:
|
||||
- job_name: 'otel-collector'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:8888'] # 从 Collector 刮取指标
|
||||
- targets: [ 'otel-collector:8888' ] # 从 Collector 刮取指标
|
||||
- job_name: 'otel-metrics'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:8889'] # 应用指标
|
||||
- targets: [ 'otel-collector:8889' ] # 应用指标
|
||||
- job_name: 'tempo'
|
||||
static_configs:
|
||||
- targets: [ 'tempo:3200' ]
|
||||
|
||||
|
||||
1
.docker/observability/tempo-data/.gitignore
vendored
Normal file
1
.docker/observability/tempo-data/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*
|
||||
8
.docker/observability/tempo-entrypoint.sh
Executable file
8
.docker/observability/tempo-entrypoint.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
# Run as root to fix directory permissions
|
||||
chown -R 10001:10001 /var/tempo
|
||||
|
||||
# Use su-exec (a lightweight sudo/gosu alternative, commonly used in Alpine mirroring)
|
||||
# Switch to user 10001 and execute the original command (CMD) passed to the script
|
||||
# "$@" represents all parameters passed to this script, i.e. command in docker-compose
|
||||
exec su-exec 10001:10001 /tempo "$@"
|
||||
55
.docker/observability/tempo.yaml
Normal file
55
.docker/observability/tempo.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
stream_over_http_enabled: true
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
log_level: info
|
||||
|
||||
query_frontend:
|
||||
search:
|
||||
duration_slo: 5s
|
||||
throughput_bytes_slo: 1.073741824e+09
|
||||
metadata_slo:
|
||||
duration_slo: 5s
|
||||
throughput_bytes_slo: 1.073741824e+09
|
||||
trace_by_id:
|
||||
duration_slo: 5s
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: "tempo:4317"
|
||||
|
||||
ingester:
|
||||
max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 1h # overall Tempo trace retention. set for demo purposes
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
cluster: docker-compose
|
||||
storage:
|
||||
path: /var/tempo/generator/wal
|
||||
remote_write:
|
||||
- url: http://prometheus:9090/api/v1/write
|
||||
send_exemplars: true
|
||||
traces_storage:
|
||||
path: /var/tempo/generator/traces
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local # backend configuration to use
|
||||
wal:
|
||||
path: /var/tempo/wal # where to store the wal locally
|
||||
local:
|
||||
path: /var/tempo/blocks
|
||||
|
||||
overrides:
|
||||
defaults:
|
||||
metrics_generator:
|
||||
processors: [ service-graphs, span-metrics, local-blocks ] # enables metrics generator
|
||||
generate_native_histograms: both
|
||||
27
Cargo.lock
generated
27
Cargo.lock
generated
@@ -3540,6 +3540,18 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3d8a32ae18130a3c84dd492d4215c3d913c3b07c6b63c2eb3eb7ff1101ab7bf"
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enumflags2"
|
||||
version = "0.7.12"
|
||||
@@ -7899,6 +7911,7 @@ dependencies = [
|
||||
"serde_urlencoded",
|
||||
"shadow-rs",
|
||||
"socket2 0.6.0",
|
||||
"sysctl",
|
||||
"thiserror 2.0.12",
|
||||
"tikv-jemallocator",
|
||||
"time",
|
||||
@@ -9548,6 +9561,20 @@ dependencies = [
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysctl"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"byteorder",
|
||||
"enum-as-inner",
|
||||
"libc",
|
||||
"thiserror 1.0.69",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysinfo"
|
||||
version = "0.36.0"
|
||||
|
||||
@@ -225,6 +225,7 @@ snap = "1.1.1"
|
||||
socket2 = "0.6.0"
|
||||
strum = { version = "0.27.1", features = ["derive"] }
|
||||
sysinfo = "0.36.0"
|
||||
sysctl = "0.6.0"
|
||||
tempfile = "3.20.0"
|
||||
temp-env = "0.3.6"
|
||||
test-case = "3.3.1"
|
||||
|
||||
@@ -81,6 +81,7 @@ serde_json.workspace = true
|
||||
serde_urlencoded = { workspace = true }
|
||||
shadow-rs = { workspace = true, features = ["build", "metadata"] }
|
||||
socket2 = { workspace = true }
|
||||
sysctl = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing.workspace = true
|
||||
time = { workspace = true, features = ["parsing", "formatting", "serde"] }
|
||||
|
||||
@@ -64,7 +64,37 @@ pub async fn start_http_server(
|
||||
let server_address = server_addr.to_string();
|
||||
|
||||
// The listening address and port are obtained from the parameters
|
||||
let listener = TcpListener::bind(server_address.clone()).await?;
|
||||
// let listener = TcpListener::bind(server_address.clone()).await?;
|
||||
|
||||
// The listening address and port are obtained from the parameters
|
||||
let listener = {
|
||||
let mut server_addr = server_addr;
|
||||
let mut socket = socket2::Socket::new(
|
||||
socket2::Domain::for_address(server_addr),
|
||||
socket2::Type::STREAM,
|
||||
Some(socket2::Protocol::TCP),
|
||||
)?;
|
||||
|
||||
if server_addr.is_ipv6() {
|
||||
if let Err(e) = socket.set_only_v6(false) {
|
||||
warn!("Failed to set IPV6_V6ONLY=false, falling back to IPv4-only: {}", e);
|
||||
// Fallback to a new IPv4 socket if setting dual-stack fails.
|
||||
let ipv4_addr = SocketAddr::new(std::net::Ipv4Addr::UNSPECIFIED.into(), server_addr.port());
|
||||
server_addr = ipv4_addr;
|
||||
socket = socket2::Socket::new(socket2::Domain::IPV4, socket2::Type::STREAM, Some(socket2::Protocol::TCP))?;
|
||||
}
|
||||
}
|
||||
|
||||
// Common setup for both IPv4 and successful dual-stack IPv6
|
||||
let backlog = get_listen_backlog();
|
||||
socket.set_reuse_address(true)?;
|
||||
// Set the socket to non-blocking before passing it to Tokio.
|
||||
socket.set_nonblocking(true)?;
|
||||
socket.bind(&server_addr.into())?;
|
||||
socket.listen(backlog)?;
|
||||
TcpListener::from_std(socket.into())?
|
||||
};
|
||||
|
||||
// Obtain the listener address
|
||||
let local_addr: SocketAddr = listener.local_addr()?;
|
||||
debug!("Listening on: {}", local_addr);
|
||||
@@ -427,3 +457,44 @@ fn check_auth(req: Request<()>) -> std::result::Result<Request<()>, Status> {
|
||||
_ => Err(Status::unauthenticated("No valid auth token")),
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines the listen backlog size.
|
||||
///
|
||||
/// It tries to read the system's maximum connection queue length (`somaxconn`).
|
||||
/// If reading fails, it falls back to a default value (e.g., 1024).
|
||||
/// This makes the backlog size adaptive to the system configuration.
|
||||
fn get_listen_backlog() -> i32 {
|
||||
const DEFAULT_BACKLOG: i32 = 1024;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
// For Linux, read from /proc/sys/net/core/somaxconn
|
||||
match std::fs::read_to_string("/proc/sys/net/core/somaxconn") {
|
||||
Ok(s) => s.trim().parse().unwrap_or(DEFAULT_BACKLOG),
|
||||
Err(_) => DEFAULT_BACKLOG,
|
||||
}
|
||||
}
|
||||
#[cfg(any(target_os = "macos", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))]
|
||||
{
|
||||
// For macOS and BSD variants, use sysctl
|
||||
use sysctl::Sysctl;
|
||||
match sysctl::Ctl::new("kern.ipc.somaxconn") {
|
||||
Ok(ctl) => match ctl.value() {
|
||||
Ok(sysctl::CtlValue::Int(val)) => val,
|
||||
_ => DEFAULT_BACKLOG,
|
||||
},
|
||||
Err(_) => DEFAULT_BACKLOG,
|
||||
}
|
||||
}
|
||||
#[cfg(not(any(
|
||||
target_os = "linux",
|
||||
target_os = "macos",
|
||||
target_os = "freebsd",
|
||||
target_os = "netbsd",
|
||||
target_os = "openbsd"
|
||||
)))]
|
||||
{
|
||||
// Fallback for Windows and other operating systems
|
||||
DEFAULT_BACKLOG
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user