mirror of
https://github.com/rustfs/rustfs.git
synced 2026-03-17 14:24:08 +00:00
feat(obs): add advanced log management configuration (#2016)
Co-authored-by: houseme <housemecn@gmail.com> Co-authored-by: 安正超 <anzhengchao@gmail.com> Co-authored-by: 唐小鸭 <tangtang1251@qq.com>
This commit is contained in:
362
Cargo.lock
generated
362
Cargo.lock
generated
@@ -1724,17 +1724,6 @@ version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "core_affinity"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a034b3a7b624016c6e13f5df875747cc25f884156aad2abd12b6c46797971342"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"num_cpus",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.5.1"
|
||||
@@ -3300,17 +3289,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "erased-serde"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_core",
|
||||
"typeid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.14"
|
||||
@@ -3440,30 +3418,6 @@ dependencies = [
|
||||
"zlib-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flexi_logger"
|
||||
version = "0.31.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aea7feddba9b4e83022270d49a58d4a1b3fdad04b34f78cf1ce471f698e42672"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"core_affinity",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-queue",
|
||||
"flate2",
|
||||
"log",
|
||||
"notify-debouncer-mini",
|
||||
"nu-ansi-term",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
"toml",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.11.1"
|
||||
@@ -3508,15 +3462,6 @@ version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsevent-sys"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.32"
|
||||
@@ -4440,9 +4385,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "inferno"
|
||||
version = "0.12.4"
|
||||
version = "0.12.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d35223c50fdd26419a4ccea2c73be68bd2b29a3d7d6123ffe101c17f4c20a52a"
|
||||
checksum = "20dd69640582458beceefcf045f8de34263d45194999c9a49fcd53e5b503d522"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"clap",
|
||||
@@ -4455,31 +4400,11 @@ dependencies = [
|
||||
"log",
|
||||
"num-format",
|
||||
"once_cell",
|
||||
"quick-xml 0.38.4",
|
||||
"quick-xml 0.39.2",
|
||||
"rgb",
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inotify"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"inotify-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inotify-sys"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.4"
|
||||
@@ -4702,26 +4627,6 @@ dependencies = [
|
||||
"simple_asn1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kqueue"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a"
|
||||
dependencies = [
|
||||
"kqueue-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kqueue-sys"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-regex"
|
||||
version = "3.6.0"
|
||||
@@ -4877,12 +4782,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libredox"
|
||||
version = "0.1.12"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
|
||||
checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"libc",
|
||||
"plain",
|
||||
"redox_syscall 0.7.3",
|
||||
]
|
||||
|
||||
@@ -4979,10 +4885,6 @@ name = "log"
|
||||
version = "0.4.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"value-bag",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
@@ -5178,7 +5080,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
@@ -5303,45 +5204,6 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "notify"
|
||||
version = "8.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"fsevent-sys",
|
||||
"inotify",
|
||||
"kqueue",
|
||||
"libc",
|
||||
"log",
|
||||
"mio",
|
||||
"notify-types",
|
||||
"walkdir",
|
||||
"windows-sys 0.60.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "notify-debouncer-mini"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17849edfaabd9a5fef1c606d99cfc615a8e99f7ac4366406d86c7942a3184cf2"
|
||||
dependencies = [
|
||||
"log",
|
||||
"notify",
|
||||
"notify-types",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "notify-types"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42b8cfee0e339a0337359f3c88165702ac6e600dc01c0cc9579a92d62b08477a"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ntapi"
|
||||
version = "0.4.3"
|
||||
@@ -6117,6 +5979,12 @@ version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "plain"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.7"
|
||||
@@ -6241,7 +6109,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"backtrace",
|
||||
"flate2",
|
||||
"inferno 0.12.4",
|
||||
"inferno 0.12.5",
|
||||
"num",
|
||||
"paste",
|
||||
"prost",
|
||||
@@ -6493,15 +6361,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.38.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.39.2"
|
||||
@@ -7506,8 +7365,6 @@ dependencies = [
|
||||
name = "rustfs-keystone"
|
||||
version = "0.0.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"bytes",
|
||||
"futures",
|
||||
"http 1.4.0",
|
||||
@@ -7516,7 +7373,6 @@ dependencies = [
|
||||
"hyper",
|
||||
"moka",
|
||||
"reqwest 0.13.2",
|
||||
"rustfs-common",
|
||||
"rustfs-credentials",
|
||||
"rustfs-policy",
|
||||
"serde",
|
||||
@@ -7526,7 +7382,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"tower",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7652,9 +7507,9 @@ dependencies = [
|
||||
name = "rustfs-obs"
|
||||
version = "0.0.5"
|
||||
dependencies = [
|
||||
"flexi_logger",
|
||||
"flate2",
|
||||
"glob",
|
||||
"metrics",
|
||||
"nu-ansi-term",
|
||||
"nvml-wrapper",
|
||||
"opentelemetry",
|
||||
"opentelemetry-appender-tracing",
|
||||
@@ -7667,6 +7522,7 @@ dependencies = [
|
||||
"serde",
|
||||
"smallvec",
|
||||
"sysinfo",
|
||||
"tempfile",
|
||||
"thiserror 2.0.18",
|
||||
"tokio",
|
||||
"tracing",
|
||||
@@ -7674,6 +7530,7 @@ dependencies = [
|
||||
"tracing-error",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8388,15 +8245,6 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_fmt"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e497af288b3b95d067a23a4f749f2861121ffcb2f6d8379310dcda040c345ed"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.149"
|
||||
@@ -8430,15 +8278,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_urlencoded"
|
||||
version = "0.7.1"
|
||||
@@ -8683,7 +8522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b3b8565691b22d2bdfc066426ed48f837fc0c5f2c8cad8d9718f7f99d6995c1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"erased-serde 0.3.31",
|
||||
"erased-serde",
|
||||
"rustversion",
|
||||
"serde_core",
|
||||
]
|
||||
@@ -8973,84 +8812,6 @@ dependencies = [
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1aaf178a50bbdd86043fce9bf0a5867007d9b382db89d1c96ccae4601ff1ff9"
|
||||
|
||||
[[package]]
|
||||
name = "sval_buffer"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f89273e48f03807ebf51c4d81c52f28d35ffa18a593edf97e041b52de143df89"
|
||||
dependencies = [
|
||||
"sval",
|
||||
"sval_ref",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval_dynamic"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0430f4e18e7eba21a49d10d25a8dec3ce0e044af40b162347e99a8e3c3ced864"
|
||||
dependencies = [
|
||||
"sval",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval_fmt"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "835f51b9d7331b9d7fc48fc716c02306fa88c4a076b1573531910c91a525882d"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"sval",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval_json"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13cbfe3ef406ee2366e7e8ab3678426362085fa9eaedf28cb878a967159dced3"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"sval",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval_nested"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b20358af4af787c34321a86618c3cae12eabdd0e9df22cd9dd2c6834214c518"
|
||||
dependencies = [
|
||||
"sval",
|
||||
"sval_buffer",
|
||||
"sval_ref",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval_ref"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5e500f8eb2efa84f75e7090f7fc43f621b9f8b6cde571c635b3855f97b332a"
|
||||
dependencies = [
|
||||
"sval",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sval_serde"
|
||||
version = "2.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2032ae39b11dcc6c18d5fbc50a661ea191cac96484c59ccf49b002261ca2c1"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"sval",
|
||||
"sval_nested",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
version = "12.17.2"
|
||||
@@ -9468,45 +9229,6 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.9.12+spec-1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863"
|
||||
dependencies = [
|
||||
"indexmap 2.13.0",
|
||||
"serde_core",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_parser",
|
||||
"toml_writer",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.7.5+spec-1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_parser"
|
||||
version = "1.0.9+spec-1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
|
||||
dependencies = [
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_writer"
|
||||
version = "1.0.6+spec-1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.14.5"
|
||||
@@ -9775,12 +9497,6 @@ version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e"
|
||||
|
||||
[[package]]
|
||||
name = "typeid"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.19.0"
|
||||
@@ -9931,42 +9647,6 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
||||
|
||||
[[package]]
|
||||
name = "value-bag"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0"
|
||||
dependencies = [
|
||||
"value-bag-serde1",
|
||||
"value-bag-sval2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "value-bag-serde1"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16530907bfe2999a1773ca5900a65101e092c70f642f25cc23ca0c43573262c5"
|
||||
dependencies = [
|
||||
"erased-serde 0.4.9",
|
||||
"serde_core",
|
||||
"serde_fmt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "value-bag-sval2"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d00ae130edd690eaa877e4f40605d534790d1cf1d651e7685bd6a144521b251f"
|
||||
dependencies = [
|
||||
"sval",
|
||||
"sval_buffer",
|
||||
"sval_dynamic",
|
||||
"sval_fmt",
|
||||
"sval_json",
|
||||
"sval_ref",
|
||||
"sval_serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vaultrs"
|
||||
version = "0.7.4"
|
||||
@@ -10590,12 +10270,6 @@ version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.7.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen"
|
||||
version = "0.51.0"
|
||||
|
||||
@@ -200,7 +200,6 @@ derive_builder = "0.20.2"
|
||||
enumset = "1.1.10"
|
||||
faster-hex = "0.10.0"
|
||||
flate2 = "1.1.9"
|
||||
flexi_logger = { version = "0.31.8", features = ["trc", "dont_minimize_extra_stacks", "compress", "kv", "json"] }
|
||||
glob = "0.3.3"
|
||||
google-cloud-storage = "1.8.0"
|
||||
google-cloud-auth = "1.6.0"
|
||||
@@ -219,7 +218,6 @@ md5 = "0.8.0"
|
||||
mime_guess = "2.0.5"
|
||||
moka = { version = "0.12.13", features = ["future"] }
|
||||
netif = "0.1.6"
|
||||
nu-ansi-term = "0.50.3"
|
||||
num_cpus = { version = "1.17.0" }
|
||||
nvml-wrapper = "0.12.0"
|
||||
object_store = "0.12.5"
|
||||
|
||||
@@ -164,7 +164,7 @@ pub const DEFAULT_LOG_FILENAME: &str = "rustfs";
|
||||
/// This is the default log filename for OBS.
|
||||
/// It is used to store the logs of the application.
|
||||
/// Default value: rustfs.log
|
||||
pub const DEFAULT_OBS_LOG_FILENAME: &str = concat!(DEFAULT_LOG_FILENAME, "");
|
||||
pub const DEFAULT_OBS_LOG_FILENAME: &str = concat!(DEFAULT_LOG_FILENAME, ".log");
|
||||
|
||||
/// Default log directory for rustfs
|
||||
/// This is the default log directory for rustfs.
|
||||
@@ -183,9 +183,9 @@ pub const DEFAULT_LOG_ROTATION_SIZE_MB: u64 = 100;
|
||||
/// Default log rotation time for rustfs
|
||||
/// This is the default log rotation time for rustfs.
|
||||
/// It is used to rotate the logs of the application.
|
||||
/// Default value: hour, eg: day,hour,minute,second
|
||||
/// Default value: hour, eg: daily,hourly,minutely
|
||||
/// Environment variable: RUSTFS_OBS_LOG_ROTATION_TIME
|
||||
pub const DEFAULT_LOG_ROTATION_TIME: &str = "hour";
|
||||
pub const DEFAULT_LOG_ROTATION_TIME: &str = "hourly";
|
||||
|
||||
/// Default log keep files for rustfs
|
||||
/// This is the default log keep files for rustfs.
|
||||
|
||||
@@ -41,23 +41,30 @@ pub const ENV_OBS_LOG_ROTATION_SIZE_MB: &str = "RUSTFS_OBS_LOG_ROTATION_SIZE_MB"
|
||||
pub const ENV_OBS_LOG_ROTATION_TIME: &str = "RUSTFS_OBS_LOG_ROTATION_TIME";
|
||||
pub const ENV_OBS_LOG_KEEP_FILES: &str = "RUSTFS_OBS_LOG_KEEP_FILES";
|
||||
|
||||
/// Log pool capacity for async logging
|
||||
pub const ENV_OBS_LOG_POOL_CAPA: &str = "RUSTFS_OBS_LOG_POOL_CAPA";
|
||||
/// Log cleanup related configurations
|
||||
pub const ENV_OBS_LOG_KEEP_COUNT: &str = "RUSTFS_OBS_LOG_KEEP_COUNT";
|
||||
pub const ENV_OBS_LOG_MAX_TOTAL_SIZE_BYTES: &str = "RUSTFS_OBS_LOG_MAX_TOTAL_SIZE_BYTES";
|
||||
pub const ENV_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES: &str = "RUSTFS_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES";
|
||||
pub const ENV_OBS_LOG_COMPRESS_OLD_FILES: &str = "RUSTFS_OBS_LOG_COMPRESS_OLD_FILES";
|
||||
pub const ENV_OBS_LOG_GZIP_COMPRESSION_LEVEL: &str = "RUSTFS_OBS_LOG_GZIP_COMPRESSION_LEVEL";
|
||||
pub const ENV_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS: &str = "RUSTFS_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS";
|
||||
pub const ENV_OBS_LOG_EXCLUDE_PATTERNS: &str = "RUSTFS_OBS_LOG_EXCLUDE_PATTERNS";
|
||||
pub const ENV_OBS_LOG_DELETE_EMPTY_FILES: &str = "RUSTFS_OBS_LOG_DELETE_EMPTY_FILES";
|
||||
pub const ENV_OBS_LOG_MIN_FILE_AGE_SECONDS: &str = "RUSTFS_OBS_LOG_MIN_FILE_AGE_SECONDS";
|
||||
pub const ENV_OBS_LOG_CLEANUP_INTERVAL_SECONDS: &str = "RUSTFS_OBS_LOG_CLEANUP_INTERVAL_SECONDS";
|
||||
pub const ENV_OBS_LOG_DRY_RUN: &str = "RUSTFS_OBS_LOG_DRY_RUN";
|
||||
|
||||
/// Log message capacity for async logging
|
||||
pub const ENV_OBS_LOG_MESSAGE_CAPA: &str = "RUSTFS_OBS_LOG_MESSAGE_CAPA";
|
||||
|
||||
/// Log flush interval in milliseconds for async logging
|
||||
pub const ENV_OBS_LOG_FLUSH_MS: &str = "RUSTFS_OBS_LOG_FLUSH_MS";
|
||||
|
||||
/// Default values for log pool
|
||||
pub const DEFAULT_OBS_LOG_POOL_CAPA: usize = 10240;
|
||||
|
||||
/// Default values for message capacity
|
||||
pub const DEFAULT_OBS_LOG_MESSAGE_CAPA: usize = 32768;
|
||||
|
||||
/// Default values for flush interval in milliseconds
|
||||
pub const DEFAULT_OBS_LOG_FLUSH_MS: u64 = 200;
|
||||
/// Default values for log cleanup
|
||||
pub const DEFAULT_OBS_LOG_KEEP_COUNT: usize = 10;
|
||||
pub const DEFAULT_OBS_LOG_MAX_TOTAL_SIZE_BYTES: u64 = 2 * 1024 * 1024 * 1024; // 2 GiB
|
||||
pub const DEFAULT_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES: u64 = 0; // No single file limit
|
||||
pub const DEFAULT_OBS_LOG_COMPRESS_OLD_FILES: bool = true;
|
||||
pub const DEFAULT_OBS_LOG_GZIP_COMPRESSION_LEVEL: u32 = 6;
|
||||
pub const DEFAULT_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS: u64 = 30;
|
||||
pub const DEFAULT_OBS_LOG_DELETE_EMPTY_FILES: bool = true;
|
||||
pub const DEFAULT_OBS_LOG_MIN_FILE_AGE_SECONDS: u64 = 3600; // 1 hour
|
||||
pub const DEFAULT_OBS_LOG_CLEANUP_INTERVAL_SECONDS: u64 = 6 * 3600; // 6 hours
|
||||
pub const DEFAULT_OBS_LOG_DRY_RUN: bool = false;
|
||||
|
||||
/// Default values for observability configuration
|
||||
// ### Supported Environment Values
|
||||
@@ -96,6 +103,21 @@ mod tests {
|
||||
assert_eq!(ENV_OBS_TRACES_EXPORT_ENABLED, "RUSTFS_OBS_TRACES_EXPORT_ENABLED");
|
||||
assert_eq!(ENV_OBS_METRICS_EXPORT_ENABLED, "RUSTFS_OBS_METRICS_EXPORT_ENABLED");
|
||||
assert_eq!(ENV_OBS_LOGS_EXPORT_ENABLED, "RUSTFS_OBS_LOGS_EXPORT_ENABLED");
|
||||
// Test log cleanup related env keys
|
||||
assert_eq!(ENV_OBS_LOG_KEEP_COUNT, "RUSTFS_OBS_LOG_KEEP_COUNT");
|
||||
assert_eq!(ENV_OBS_LOG_MAX_TOTAL_SIZE_BYTES, "RUSTFS_OBS_LOG_MAX_TOTAL_SIZE_BYTES");
|
||||
assert_eq!(ENV_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES, "RUSTFS_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES");
|
||||
assert_eq!(ENV_OBS_LOG_COMPRESS_OLD_FILES, "RUSTFS_OBS_LOG_COMPRESS_OLD_FILES");
|
||||
assert_eq!(ENV_OBS_LOG_GZIP_COMPRESSION_LEVEL, "RUSTFS_OBS_LOG_GZIP_COMPRESSION_LEVEL");
|
||||
assert_eq!(
|
||||
ENV_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS,
|
||||
"RUSTFS_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS"
|
||||
);
|
||||
assert_eq!(ENV_OBS_LOG_EXCLUDE_PATTERNS, "RUSTFS_OBS_LOG_EXCLUDE_PATTERNS");
|
||||
assert_eq!(ENV_OBS_LOG_DELETE_EMPTY_FILES, "RUSTFS_OBS_LOG_DELETE_EMPTY_FILES");
|
||||
assert_eq!(ENV_OBS_LOG_MIN_FILE_AGE_SECONDS, "RUSTFS_OBS_LOG_MIN_FILE_AGE_SECONDS");
|
||||
assert_eq!(ENV_OBS_LOG_CLEANUP_INTERVAL_SECONDS, "RUSTFS_OBS_LOG_CLEANUP_INTERVAL_SECONDS");
|
||||
assert_eq!(ENV_OBS_LOG_DRY_RUN, "RUSTFS_OBS_LOG_DRY_RUN");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -33,12 +33,9 @@ serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
time = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
moka = { workspace = true }
|
||||
rustfs-common = { workspace = true }
|
||||
rustfs-credentials = { workspace = true }
|
||||
rustfs-policy = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
# Middleware dependencies
|
||||
tower = { workspace = true }
|
||||
http = { workspace = true }
|
||||
@@ -51,7 +48,6 @@ futures = { workspace = true }
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["test-util"] }
|
||||
tower = { workspace = true, features = ["util"] }
|
||||
axum = { workspace = true }
|
||||
hyper = { workspace = true, features = ["server"] }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
|
||||
@@ -36,9 +36,9 @@ full = ["gpu"]
|
||||
[dependencies]
|
||||
rustfs-config = { workspace = true, features = ["constants", "observability"] }
|
||||
rustfs-utils = { workspace = true, features = ["ip", "path"] }
|
||||
flexi_logger = { workspace = true }
|
||||
flate2 = { workspace = true }
|
||||
glob = { workspace = true }
|
||||
metrics = { workspace = true }
|
||||
nu-ansi-term = { workspace = true }
|
||||
nvml-wrapper = { workspace = true, optional = true }
|
||||
opentelemetry = { workspace = true }
|
||||
opentelemetry-appender-tracing = { workspace = true, features = ["experimental_use_tracing_span_context", "experimental_metadata_attributes"] }
|
||||
@@ -56,7 +56,9 @@ tracing-subscriber = { workspace = true, features = ["registry", "std", "fmt", "
|
||||
tokio = { workspace = true, features = ["sync", "fs", "rt-multi-thread", "rt", "time", "macros"] }
|
||||
sysinfo = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tempfile = { workspace = true }
|
||||
|
||||
@@ -1,82 +1,290 @@
|
||||
[](https://rustfs.com)
|
||||
# rustfs-obs
|
||||
|
||||
# RustFS Obs - Observability & Monitoring
|
||||
|
||||
<p align="center">
|
||||
<strong>Comprehensive observability and monitoring system for RustFS distributed object storage</strong>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/rustfs/rustfs/actions/workflows/ci.yml"><img alt="CI" src="https://github.com/rustfs/rustfs/actions/workflows/ci.yml/badge.svg" /></a>
|
||||
<a href="https://docs.rustfs.com/">📖 Documentation</a>
|
||||
· <a href="https://github.com/rustfs/rustfs/issues">🐛 Bug Reports</a>
|
||||
· <a href="https://github.com/rustfs/rustfs/discussions">💬 Discussions</a>
|
||||
</p>
|
||||
Observability library for [RustFS](https://github.com/rustfs/rustfs) providing structured JSON
|
||||
logging, distributed tracing, and metrics via OpenTelemetry.
|
||||
|
||||
---
|
||||
|
||||
## 📖 Overview
|
||||
## Features
|
||||
|
||||
**RustFS Obs** provides comprehensive observability and monitoring capabilities for the [RustFS](https://rustfs.com) distributed object storage system. For the complete RustFS experience, please visit the [main RustFS repository](https://github.com/rustfs/rustfs).
|
||||
| Feature | Description |
|
||||
|---------|-------------|
|
||||
| **Structured logging** | JSON-formatted logs via `tracing-subscriber` |
|
||||
| **Rolling-file logging** | Daily / hourly rotation with automatic cleanup |
|
||||
| **Distributed tracing** | OTLP/HTTP export to Jaeger, Tempo, or any OTel collector |
|
||||
| **Metrics** | OTLP/HTTP export, bridged from the `metrics` crate facade |
|
||||
| **Log cleanup** | Background task: size limits, gzip compression, retention policies |
|
||||
| **GPU metrics** *(optional)* | Enable with the `gpu` feature flag |
|
||||
|
||||
## ✨ Features
|
||||
---
|
||||
|
||||
- **Environment-Aware Logging**: Automatically configures logging behavior based on deployment environment
|
||||
- Production: File-only logging (stdout disabled by default for security and log aggregation)
|
||||
- Development/Test: Full logging with stdout support for debugging
|
||||
- OpenTelemetry integration for distributed tracing
|
||||
- Prometheus metrics collection and exposition
|
||||
- Structured logging with configurable levels and rotation
|
||||
- Performance profiling and analytics
|
||||
- Real-time health checks and status monitoring
|
||||
- Custom dashboards and alerting integration
|
||||
- Enhanced error handling and resilience
|
||||
## Quick Start
|
||||
|
||||
## 🚀 Environment-Aware Logging
|
||||
```toml
|
||||
# Cargo.toml
|
||||
[dependencies]
|
||||
rustfs-obs = { version = "0.0.5" }
|
||||
|
||||
The obs module automatically adapts logging behavior based on your deployment environment:
|
||||
|
||||
### Production Environment
|
||||
```bash
|
||||
# Set production environment - disables stdout logging by default
|
||||
export RUSTFS_OBS_ENVIRONMENT=production
|
||||
|
||||
# All logs go to files only (no stdout) for security and log aggregation
|
||||
# Enhanced error handling with clear failure diagnostics
|
||||
# GPU metrics support
|
||||
rustfs-obs = { version = "0.0.5", features = ["gpu"] }
|
||||
```
|
||||
|
||||
### Development/Test Environment
|
||||
```bash
|
||||
# Set development environment - enables stdout logging
|
||||
export RUSTFS_OBS_ENVIRONMENT=development
|
||||
|
||||
# Logs appear both in files and stdout for easier debugging
|
||||
# Full span tracking and verbose error messages
|
||||
```
|
||||
|
||||
### Configuration Override
|
||||
You can always override the environment defaults:
|
||||
```rust
|
||||
use rustfs_obs::OtelConfig;
|
||||
use rustfs_obs::init_obs;
|
||||
|
||||
let config = OtelConfig {
|
||||
endpoint: "".to_string(),
|
||||
use_stdout: Some(true), // Explicit override - forces stdout even in production
|
||||
environment: Some("production".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// Build config from environment variables, then initialise all backends.
|
||||
let _guard = init_obs(None).await.expect("failed to initialise observability");
|
||||
|
||||
tracing::info!("RustFS started");
|
||||
|
||||
// _guard is dropped here — all providers are flushed and shut down.
|
||||
}
|
||||
```
|
||||
|
||||
### Supported Environment Values
|
||||
- `production` - Secure file-only logging
|
||||
- `development` - Full debugging with stdout
|
||||
- `test` - Test environment with stdout support
|
||||
- `staging` - Staging environment with stdout support
|
||||
> **Keep `_guard` alive** for the lifetime of your application. Dropping it
|
||||
> triggers an ordered shutdown of every OpenTelemetry provider.
|
||||
|
||||
## 📚 Documentation
|
||||
---
|
||||
|
||||
For comprehensive documentation, examples, and usage guides, please visit the main [RustFS repository](https://github.com/rustfs/rustfs).
|
||||
## Initialisation
|
||||
|
||||
## 📄 License
|
||||
### With an explicit OTLP endpoint
|
||||
|
||||
This project is licensed under the Apache License 2.0 - see the [LICENSE](../../LICENSE) file for details.
|
||||
```rust
|
||||
use rustfs_obs::init_obs;
|
||||
|
||||
let _guard = init_obs(Some("http://otel-collector:4318".to_string()))
|
||||
.await
|
||||
.expect("observability init failed");
|
||||
```
|
||||
|
||||
### With a custom config struct
|
||||
|
||||
```rust
|
||||
use rustfs_obs::{AppConfig, OtelConfig, init_obs_with_config};
|
||||
|
||||
let config = AppConfig::new_with_endpoint(Some("http://localhost:4318".to_string()));
|
||||
let _guard = init_obs_with_config(&config.observability)
|
||||
.await
|
||||
.expect("observability init failed");
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Routing Logic
|
||||
|
||||
The library selects a backend automatically based on configuration:
|
||||
|
||||
```
|
||||
1. Any OTLP endpoint set?
|
||||
└─ YES → Full OTLP/HTTP pipeline (traces + metrics + logs)
|
||||
|
||||
2. RUSTFS_OBS_LOG_DIRECTORY set to a non-empty path?
|
||||
└─ YES → Rolling-file JSON logging
|
||||
+ Stdout mirror enabled if:
|
||||
- RUSTFS_OBS_LOG_STDOUT_ENABLED=true (explicit), OR
|
||||
- RUSTFS_OBS_ENVIRONMENT != "production" (automatic)
|
||||
|
||||
3. Default → Stdout-only JSON logging (all signals)
|
||||
```
|
||||
|
||||
**Key Points:**
|
||||
- When **no log directory** is configured, logs automatically go to **stdout only** (perfect for development)
|
||||
- When a **log directory** is set, logs go to **rolling files** in that directory
|
||||
- In **non-production environments**, stdout is automatically mirrored alongside file logging for visibility
|
||||
- In **production** mode, you must explicitly set `RUSTFS_OBS_LOG_STDOUT_ENABLED=true` to see stdout in addition to files
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
All configuration is read from environment variables at startup.
|
||||
|
||||
### OTLP / Export
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `RUSTFS_OBS_ENDPOINT` | _(empty)_ | Root OTLP/HTTP endpoint, e.g. `http://otel-collector:4318` |
|
||||
| `RUSTFS_OBS_TRACE_ENDPOINT` | _(empty)_ | Dedicated trace endpoint (overrides root + `/v1/traces`) |
|
||||
| `RUSTFS_OBS_METRIC_ENDPOINT` | _(empty)_ | Dedicated metrics endpoint |
|
||||
| `RUSTFS_OBS_LOG_ENDPOINT` | _(empty)_ | Dedicated log endpoint |
|
||||
| `RUSTFS_OBS_TRACES_EXPORT_ENABLED` | `true` | Toggle trace export |
|
||||
| `RUSTFS_OBS_METRICS_EXPORT_ENABLED` | `true` | Toggle metrics export |
|
||||
| `RUSTFS_OBS_LOGS_EXPORT_ENABLED` | `true` | Toggle OTLP log export |
|
||||
| `RUSTFS_OBS_USE_STDOUT` | `false` | Mirror all signals to stdout alongside OTLP |
|
||||
| `RUSTFS_OBS_SAMPLE_RATIO` | `0.1` | Trace sampling ratio `0.0`–`1.0` |
|
||||
| `RUSTFS_OBS_METER_INTERVAL` | `15` | Metrics export interval (seconds) |
|
||||
|
||||
### Service identity
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `RUSTFS_OBS_SERVICE_NAME` | `rustfs` | OTel `service.name` |
|
||||
| `RUSTFS_OBS_SERVICE_VERSION` | _(crate version)_ | OTel `service.version` |
|
||||
| `RUSTFS_OBS_ENVIRONMENT` | `development` | Deployment environment (`production`, `development`, …) |
|
||||
|
||||
### Local logging
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `RUSTFS_OBS_LOGGER_LEVEL` | `info` | Log level; `RUST_LOG` syntax supported |
|
||||
| `RUSTFS_OBS_LOG_STDOUT_ENABLED` | `false` | When file logging is active, also mirror to stdout |
|
||||
| `RUSTFS_OBS_LOG_DIRECTORY` | _(empty)_ | **Directory for rolling log files. When empty, logs go to stdout only** |
|
||||
| `RUSTFS_OBS_LOG_FILENAME` | `rustfs` | Base filename for rolling logs (date suffix added automatically) |
|
||||
| `RUSTFS_OBS_LOG_ROTATION_TIME` | `hourly` | Rotation granularity: `minutely`, `hourly`, or `daily` |
|
||||
| `RUSTFS_OBS_LOG_KEEP_FILES` | `30` | Number of rolling files to keep |
|
||||
|
||||
### Log cleanup
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `RUSTFS_OBS_LOG_KEEP_COUNT` | `10` | Minimum files the cleaner must always preserve |
|
||||
| `RUSTFS_OBS_LOG_MAX_TOTAL_SIZE_BYTES` | `2147483648` | Hard cap on total log directory size (2 GiB) |
|
||||
| `RUSTFS_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES` | `0` | Per-file size cap; `0` = unlimited |
|
||||
| `RUSTFS_OBS_LOG_COMPRESS_OLD_FILES` | `true` | Gzip-compress files before deleting |
|
||||
| `RUSTFS_OBS_LOG_GZIP_COMPRESSION_LEVEL` | `6` | Gzip level `1` (fastest) – `9` (best) |
|
||||
| `RUSTFS_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS` | `30` | Delete `.gz` archives older than N days; `0` = keep forever |
|
||||
| `RUSTFS_OBS_LOG_EXCLUDE_PATTERNS` | _(empty)_ | Comma-separated glob patterns to never clean up |
|
||||
| `RUSTFS_OBS_LOG_DELETE_EMPTY_FILES` | `true` | Remove zero-byte files |
|
||||
| `RUSTFS_OBS_LOG_MIN_FILE_AGE_SECONDS` | `3600` | Minimum file age (seconds) before cleanup |
|
||||
| `RUSTFS_OBS_LOG_CLEANUP_INTERVAL_SECONDS` | `21600` | How often the cleanup task runs (6 hours) |
|
||||
| `RUSTFS_OBS_LOG_DRY_RUN` | `false` | Report deletions without actually removing files |
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
### Stdout-only (development default)
|
||||
|
||||
```bash
|
||||
# No RUSTFS_OBS_LOG_DIRECTORY set → stdout JSON
|
||||
RUSTFS_OBS_LOGGER_LEVEL=debug ./rustfs
|
||||
```
|
||||
|
||||
### Rolling-file logging
|
||||
|
||||
```bash
|
||||
export RUSTFS_OBS_LOG_DIRECTORY=/var/log/rustfs
|
||||
export RUSTFS_OBS_LOGGER_LEVEL=info
|
||||
export RUSTFS_OBS_LOG_KEEP_FILES=30
|
||||
export RUSTFS_OBS_LOG_MAX_TOTAL_SIZE_BYTES=5368709120 # 5 GiB
|
||||
./rustfs
|
||||
```
|
||||
|
||||
### Full OTLP pipeline (production)
|
||||
|
||||
```bash
|
||||
export RUSTFS_OBS_ENDPOINT=http://otel-collector:4318
|
||||
export RUSTFS_OBS_ENVIRONMENT=production
|
||||
export RUSTFS_OBS_SAMPLE_RATIO=0.05 # 5% trace sampling
|
||||
export RUSTFS_OBS_LOG_DIRECTORY=/var/log/rustfs
|
||||
export RUSTFS_OBS_LOG_STDOUT_ENABLED=false
|
||||
./rustfs
|
||||
```
|
||||
|
||||
### Separate per-signal endpoints
|
||||
|
||||
```bash
|
||||
export RUSTFS_OBS_TRACE_ENDPOINT=http://tempo:4318/v1/traces
|
||||
export RUSTFS_OBS_METRIC_ENDPOINT=http://prometheus-otel:4318/v1/metrics
|
||||
export RUSTFS_OBS_LOG_ENDPOINT=http://loki-otel:4318/v1/logs
|
||||
./rustfs
|
||||
```
|
||||
|
||||
### Dry-run cleanup audit
|
||||
|
||||
```bash
|
||||
export RUSTFS_OBS_LOG_DIRECTORY=/var/log/rustfs
|
||||
export RUSTFS_OBS_LOG_DRY_RUN=true
|
||||
./rustfs
|
||||
# Observe log output — no files will actually be deleted.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
rustfs-obs/src/
|
||||
├── lib.rs # Crate root; public re-exports
|
||||
├── config.rs # OtelConfig + AppConfig; env-var loading
|
||||
├── error.rs # TelemetryError type
|
||||
├── global.rs # init_obs / init_obs_with_config entry points
|
||||
│
|
||||
├── telemetry/ # Backend initialisation
|
||||
│ ├── mod.rs # init_telemetry routing logic
|
||||
│ ├── guard.rs # OtelGuard RAII lifecycle manager
|
||||
│ ├── filter.rs # EnvFilter construction helpers
|
||||
│ ├── resource.rs # OTel Resource builder
|
||||
│ ├── local.rs # Stdout-only and rolling-file backends
|
||||
│ ├── otel.rs # Full OTLP/HTTP pipeline
|
||||
│ └── recorder.rs # metrics-crate → OTel bridge (Recorder)
|
||||
│
|
||||
├── log_cleanup/ # Background log-file cleanup subsystem
|
||||
│ ├── mod.rs # LogCleaner public API + tests
|
||||
│ ├── types.rs # FileInfo shared type
|
||||
│ ├── scanner.rs # Filesystem discovery
|
||||
│ ├── compress.rs # Gzip compression helper
|
||||
│ └── cleaner.rs # Selection, compression, deletion logic
|
||||
│
|
||||
└── system/ # Host metrics (CPU, memory, disk, GPU)
|
||||
├── mod.rs
|
||||
├── attributes.rs
|
||||
├── collector.rs
|
||||
├── metrics.rs
|
||||
└── gpu.rs # GPU metrics (feature = "gpu")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Using `LogCleaner` Directly
|
||||
|
||||
```rust
|
||||
use std::path::PathBuf;
|
||||
use rustfs_obs::LogCleaner;
|
||||
|
||||
let cleaner = LogCleaner::new(
|
||||
PathBuf::from("/var/log/rustfs"),
|
||||
"rustfs.log.".to_string(), // file_prefix
|
||||
10, // keep_count
|
||||
2 * 1024 * 1024 * 1024, // max_total_size_bytes (2 GiB)
|
||||
0, // max_single_file_size_bytes (unlimited)
|
||||
true, // compress_old_files
|
||||
6, // gzip_compression_level
|
||||
30, // compressed_file_retention_days
|
||||
vec!["current.log".to_string()], // exclude_patterns
|
||||
true, // delete_empty_files
|
||||
3600, // min_file_age_seconds (1 hour)
|
||||
false, // dry_run
|
||||
);
|
||||
|
||||
let (deleted, freed_bytes) = cleaner.cleanup().expect("cleanup failed");
|
||||
println!("Deleted {deleted} files, freed {freed_bytes} bytes");
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Feature Flags
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| _(default)_ | Core logging, tracing, and metrics |
|
||||
| `gpu` | GPU utilisation metrics via `nvml` |
|
||||
| `full` | All features enabled |
|
||||
|
||||
```toml
|
||||
# Enable GPU monitoring
|
||||
rustfs-obs = { version = "0.0.5", features = ["gpu"] }
|
||||
|
||||
# Enable everything
|
||||
rustfs-obs = { version = "0.0.5", features = ["full"] }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0 — see [LICENSE](../../LICENSE).
|
||||
|
||||
@@ -12,93 +12,209 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Observability configuration for RustFS.
|
||||
//!
|
||||
//! All configuration is read from environment variables. The canonical list of
|
||||
//! variable names and their defaults lives in `rustfs-config/src/observability/mod.rs`.
|
||||
//!
|
||||
//! Two public structs are provided:
|
||||
//! - [`OtelConfig`] — the primary flat configuration that drives every backend.
|
||||
//! - [`AppConfig`] — a thin wrapper used when the config is embedded inside a
|
||||
//! larger application configuration struct.
|
||||
|
||||
use rustfs_config::observability::{
|
||||
DEFAULT_OBS_ENVIRONMENT_PRODUCTION, ENV_OBS_ENDPOINT, ENV_OBS_ENVIRONMENT, ENV_OBS_LOG_DIRECTORY, ENV_OBS_LOG_ENDPOINT,
|
||||
ENV_OBS_LOG_FILENAME, ENV_OBS_LOG_KEEP_FILES, ENV_OBS_LOG_ROTATION_SIZE_MB, ENV_OBS_LOG_ROTATION_TIME,
|
||||
ENV_OBS_LOG_STDOUT_ENABLED, ENV_OBS_LOGGER_LEVEL, ENV_OBS_LOGS_EXPORT_ENABLED, ENV_OBS_METER_INTERVAL,
|
||||
ENV_OBS_METRIC_ENDPOINT, ENV_OBS_METRICS_EXPORT_ENABLED, ENV_OBS_SAMPLE_RATIO, ENV_OBS_SERVICE_NAME, ENV_OBS_SERVICE_VERSION,
|
||||
ENV_OBS_TRACE_ENDPOINT, ENV_OBS_TRACES_EXPORT_ENABLED, ENV_OBS_USE_STDOUT,
|
||||
DEFAULT_OBS_ENVIRONMENT_PRODUCTION, DEFAULT_OBS_LOG_CLEANUP_INTERVAL_SECONDS, DEFAULT_OBS_LOG_COMPRESS_OLD_FILES,
|
||||
DEFAULT_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS, DEFAULT_OBS_LOG_DELETE_EMPTY_FILES, DEFAULT_OBS_LOG_DRY_RUN,
|
||||
DEFAULT_OBS_LOG_GZIP_COMPRESSION_LEVEL, DEFAULT_OBS_LOG_KEEP_COUNT, DEFAULT_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES,
|
||||
DEFAULT_OBS_LOG_MAX_TOTAL_SIZE_BYTES, DEFAULT_OBS_LOG_MIN_FILE_AGE_SECONDS, ENV_OBS_ENDPOINT, ENV_OBS_ENVIRONMENT,
|
||||
ENV_OBS_LOG_CLEANUP_INTERVAL_SECONDS, ENV_OBS_LOG_COMPRESS_OLD_FILES, ENV_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS,
|
||||
ENV_OBS_LOG_DELETE_EMPTY_FILES, ENV_OBS_LOG_DIRECTORY, ENV_OBS_LOG_DRY_RUN, ENV_OBS_LOG_ENDPOINT,
|
||||
ENV_OBS_LOG_EXCLUDE_PATTERNS, ENV_OBS_LOG_FILENAME, ENV_OBS_LOG_GZIP_COMPRESSION_LEVEL, ENV_OBS_LOG_KEEP_COUNT,
|
||||
ENV_OBS_LOG_KEEP_FILES, ENV_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES, ENV_OBS_LOG_MAX_TOTAL_SIZE_BYTES,
|
||||
ENV_OBS_LOG_MIN_FILE_AGE_SECONDS, ENV_OBS_LOG_ROTATION_TIME, ENV_OBS_LOG_STDOUT_ENABLED, ENV_OBS_LOGGER_LEVEL,
|
||||
ENV_OBS_LOGS_EXPORT_ENABLED, ENV_OBS_METER_INTERVAL, ENV_OBS_METRIC_ENDPOINT, ENV_OBS_METRICS_EXPORT_ENABLED,
|
||||
ENV_OBS_SAMPLE_RATIO, ENV_OBS_SERVICE_NAME, ENV_OBS_SERVICE_VERSION, ENV_OBS_TRACE_ENDPOINT, ENV_OBS_TRACES_EXPORT_ENABLED,
|
||||
ENV_OBS_USE_STDOUT,
|
||||
};
|
||||
use rustfs_config::{
|
||||
APP_NAME, DEFAULT_LOG_KEEP_FILES, DEFAULT_LOG_LEVEL, DEFAULT_LOG_ROTATION_SIZE_MB, DEFAULT_LOG_ROTATION_TIME,
|
||||
DEFAULT_OBS_LOG_FILENAME, DEFAULT_OBS_LOG_STDOUT_ENABLED, DEFAULT_OBS_LOGS_EXPORT_ENABLED,
|
||||
DEFAULT_OBS_METRICS_EXPORT_ENABLED, DEFAULT_OBS_TRACES_EXPORT_ENABLED, ENVIRONMENT, METER_INTERVAL, SAMPLE_RATIO,
|
||||
SERVICE_VERSION, USE_STDOUT,
|
||||
APP_NAME, DEFAULT_LOG_KEEP_FILES, DEFAULT_LOG_LEVEL, DEFAULT_LOG_ROTATION_TIME, DEFAULT_OBS_LOG_FILENAME,
|
||||
DEFAULT_OBS_LOG_STDOUT_ENABLED, DEFAULT_OBS_LOGS_EXPORT_ENABLED, DEFAULT_OBS_METRICS_EXPORT_ENABLED,
|
||||
DEFAULT_OBS_TRACES_EXPORT_ENABLED, ENVIRONMENT, METER_INTERVAL, SAMPLE_RATIO, SERVICE_VERSION, USE_STDOUT,
|
||||
};
|
||||
use rustfs_utils::dirs::get_log_directory_to_string;
|
||||
use rustfs_utils::{get_env_bool, get_env_f64, get_env_opt_str, get_env_str, get_env_u64, get_env_usize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
|
||||
/// Observability: OpenTelemetry configuration
|
||||
/// # Fields
|
||||
/// * `endpoint`: Endpoint for metric collection
|
||||
/// * `use_stdout`: Output to stdout
|
||||
/// * `sample_ratio`: Trace sampling ratio
|
||||
/// * `meter_interval`: Metric collection interval
|
||||
/// * `service_name`: Service name
|
||||
/// * `service_version`: Service version
|
||||
/// * `environment`: Environment
|
||||
/// * `logger_level`: Logger level
|
||||
/// * `local_logging_enabled`: Local logging enabled
|
||||
/// # Added flexi_logger related configurations
|
||||
/// * `log_directory`: Log file directory
|
||||
/// * `log_filename`: The name of the log file
|
||||
/// * `log_rotation_size_mb`: Log file size cut threshold (MB)
|
||||
/// * `log_rotation_time`: Logs are cut by time (Hour,Day,Minute,Second)
|
||||
/// * `log_keep_files`: Number of log files to be retained
|
||||
/// # Returns
|
||||
/// A new instance of OtelConfig
|
||||
/// Full observability configuration used by all telemetry backends.
|
||||
///
|
||||
/// Fields are grouped into three logical sections:
|
||||
///
|
||||
/// ## OpenTelemetry / OTLP export
|
||||
/// Controls whether and where traces, metrics, and logs are exported over the
|
||||
/// wire using the OTLP/HTTP protocol.
|
||||
///
|
||||
/// ## Local logging
|
||||
/// Controls the rolling-file appender: directory, filename, rotation policy,
|
||||
/// and the number of files to retain.
|
||||
///
|
||||
/// ## Log cleanup
|
||||
/// Controls the background cleanup task: size limits, compression, retention
|
||||
/// of compressed archives, exclusion patterns, and dry-run mode.
|
||||
///
|
||||
/// # Design Notes
|
||||
///
|
||||
/// - All fields are `Option<T>` to allow partial configuration via environment
|
||||
/// variables with sensible defaults provided by constants in `rustfs-config`.
|
||||
/// - `log_keep_count` represents the cleaner's minimum retention; `log_keep_files`
|
||||
/// controls the rolling-appender's file limit (both typically set to the same value).
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use rustfs_obs::OtelConfig;
|
||||
///
|
||||
/// // Build from environment variables (typical production usage).
|
||||
/// let config = OtelConfig::new();
|
||||
///
|
||||
/// // Build with an explicit OTLP endpoint.
|
||||
/// let config = OtelConfig::extract_otel_config_from_env(
|
||||
/// Some("http://otel-collector:4318".to_string())
|
||||
/// );
|
||||
/// ```
|
||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||
pub struct OtelConfig {
|
||||
pub endpoint: String, // Endpoint for otel collection
|
||||
pub trace_endpoint: Option<String>, // Endpoint for trace collection
|
||||
pub metric_endpoint: Option<String>, // Endpoint for metric collection
|
||||
pub log_endpoint: Option<String>, // Endpoint for log collection
|
||||
pub traces_export_enabled: Option<bool>, // Enable/disable trace export
|
||||
pub metrics_export_enabled: Option<bool>, // Enable/disable metric export
|
||||
pub logs_export_enabled: Option<bool>, // Enable/disable log export
|
||||
pub use_stdout: Option<bool>, // Output to stdout
|
||||
pub sample_ratio: Option<f64>, // Trace sampling ratio
|
||||
pub meter_interval: Option<u64>, // Metric collection interval
|
||||
pub service_name: Option<String>, // Service name
|
||||
pub service_version: Option<String>, // Service version
|
||||
pub environment: Option<String>, // Environment
|
||||
pub logger_level: Option<String>, // Logger level
|
||||
pub log_stdout_enabled: Option<bool>, // Stdout logging enabled
|
||||
// Added flexi_logger related configurations
|
||||
pub log_directory: Option<String>, // LOG FILE DIRECTORY
|
||||
pub log_filename: Option<String>, // The name of the log file
|
||||
pub log_rotation_size_mb: Option<u64>, // Log file size cut threshold (MB)
|
||||
pub log_rotation_time: Option<String>, // Logs are cut by time (Hour, Day,Minute, Second)
|
||||
pub log_keep_files: Option<usize>, // Number of log files to be retained
|
||||
// ── OTLP export ──────────────────────────────────────────────────────────
|
||||
/// Root OTLP/HTTP endpoint (e.g. `http://otel-collector:4318`).
|
||||
/// Per-signal endpoints below take precedence when set.
|
||||
pub endpoint: String,
|
||||
/// Dedicated trace endpoint; overrides `endpoint` + `/v1/traces` fallback.
|
||||
pub trace_endpoint: Option<String>,
|
||||
/// Dedicated metrics endpoint; overrides `endpoint` + `/v1/metrics` fallback.
|
||||
pub metric_endpoint: Option<String>,
|
||||
/// Dedicated log endpoint; overrides `endpoint` + `/v1/logs` fallback.
|
||||
pub log_endpoint: Option<String>,
|
||||
/// Whether to export distributed traces (default: `true`).
|
||||
pub traces_export_enabled: Option<bool>,
|
||||
/// Whether to export metrics (default: `true`).
|
||||
pub metrics_export_enabled: Option<bool>,
|
||||
/// Whether to export logs via OTLP (default: `true`).
|
||||
pub logs_export_enabled: Option<bool>,
|
||||
/// **[OTLP-only]** Mirror all signals to stdout in addition to OTLP export.
|
||||
/// Only applies when an OTLP endpoint is configured.
|
||||
pub use_stdout: Option<bool>,
|
||||
/// Fraction of traces to sample, `0.0`–`1.0` (default: `0.1`).
|
||||
pub sample_ratio: Option<f64>,
|
||||
/// Metrics export interval in seconds (default: `15`).
|
||||
pub meter_interval: Option<u64>,
|
||||
/// OTel `service.name` attribute (default: `APP_NAME`).
|
||||
pub service_name: Option<String>,
|
||||
/// OTel `service.version` attribute (default: `SERVICE_VERSION`).
|
||||
pub service_version: Option<String>,
|
||||
/// Deployment environment tag, e.g. `production` or `development`.
|
||||
pub environment: Option<String>,
|
||||
|
||||
// ── Local logging ─────────────────────────────────────────────────────────
|
||||
/// Minimum log level directive (default: `info`).
|
||||
/// Respects `RUST_LOG` syntax when set via environment.
|
||||
pub logger_level: Option<String>,
|
||||
/// When `true`, a stdout JSON layer is always attached regardless of the
|
||||
/// active backend (default: `false` in production, `true` otherwise).
|
||||
pub log_stdout_enabled: Option<bool>,
|
||||
/// Directory where rolling log files are written.
|
||||
/// When absent or empty, logging falls back to stdout-only mode.
|
||||
pub log_directory: Option<String>,
|
||||
/// Base name for log files (without date suffix), e.g. `rustfs`.
|
||||
/// Used for both rolling-appender naming and cleanup scanning.
|
||||
pub log_filename: Option<String>,
|
||||
/// Rotation time granularity: `"hourly"` or `"daily"` (default: `"daily"`).
|
||||
pub log_rotation_time: Option<String>,
|
||||
/// Number of rolling log files to retain (default: `30`).
|
||||
/// The rolling-appender will delete the oldest file when this limit is exceeded.
|
||||
pub log_keep_files: Option<usize>,
|
||||
|
||||
// ── Log cleanup ───────────────────────────────────────────────────────────
|
||||
/// Minimum number of files the cleaner must always preserve.
|
||||
/// Typically set to the same value as `log_keep_files`.
|
||||
pub log_keep_count: Option<usize>,
|
||||
/// Hard ceiling on the total size (bytes) of all log files (default: 2 GiB).
|
||||
pub log_max_total_size_bytes: Option<u64>,
|
||||
/// Per-file size ceiling (bytes); `0` means unlimited (default: `0`).
|
||||
pub log_max_single_file_size_bytes: Option<u64>,
|
||||
/// Compress eligible files with gzip before deletion (default: `true`).
|
||||
pub log_compress_old_files: Option<bool>,
|
||||
/// Gzip compression level `1`–`9` (default: `6`).
|
||||
pub log_gzip_compression_level: Option<u32>,
|
||||
/// Delete compressed archives older than this many days; `0` = keep forever
|
||||
/// (default: `30`).
|
||||
pub log_compressed_file_retention_days: Option<u64>,
|
||||
/// Comma-separated glob patterns for files that must never be cleaned up.
|
||||
pub log_exclude_patterns: Option<String>,
|
||||
/// Delete zero-byte log files during cleanup (default: `true`).
|
||||
pub log_delete_empty_files: Option<bool>,
|
||||
/// A file younger than this many seconds is never touched (default: `3600`).
|
||||
pub log_min_file_age_seconds: Option<u64>,
|
||||
/// How often the background cleanup task runs, in seconds (default: `21600`).
|
||||
pub log_cleanup_interval_seconds: Option<u64>,
|
||||
/// Log what *would* be deleted without actually removing anything
|
||||
/// (default: `false`).
|
||||
pub log_dry_run: Option<bool>,
|
||||
}
|
||||
|
||||
impl OtelConfig {
|
||||
/// Helper function: Extract observable configuration from environment variables
|
||||
/// Build an [`OtelConfig`] from environment variables.
|
||||
///
|
||||
/// The optional `endpoint` argument sets the root OTLP endpoint. If it is
|
||||
/// `None` or an empty string the value is read from the
|
||||
/// `RUSTFS_OBS_ENDPOINT` environment variable instead.
|
||||
///
|
||||
/// When no endpoint is configured at all, `use_stdout` is forced to `true`
|
||||
/// so that logs are still visible during development.
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use rustfs_obs::OtelConfig;
|
||||
///
|
||||
/// // Read everything from env vars.
|
||||
/// let config = OtelConfig::extract_otel_config_from_env(None);
|
||||
///
|
||||
/// // Override the endpoint programmatically.
|
||||
/// let config = OtelConfig::extract_otel_config_from_env(
|
||||
/// Some("http://localhost:4318".to_string())
|
||||
/// );
|
||||
/// ```
|
||||
pub fn extract_otel_config_from_env(endpoint: Option<String>) -> OtelConfig {
|
||||
let endpoint = if let Some(endpoint) = endpoint {
|
||||
if endpoint.is_empty() {
|
||||
env::var(ENV_OBS_ENDPOINT).unwrap_or_else(|_| "".to_string())
|
||||
} else {
|
||||
endpoint
|
||||
}
|
||||
} else {
|
||||
env::var(ENV_OBS_ENDPOINT).unwrap_or_else(|_| "".to_string())
|
||||
let endpoint = match endpoint {
|
||||
Some(ep) if !ep.is_empty() => ep,
|
||||
_ => env::var(ENV_OBS_ENDPOINT).unwrap_or_default(),
|
||||
};
|
||||
let mut use_stdout = get_env_bool(ENV_OBS_USE_STDOUT, USE_STDOUT);
|
||||
if endpoint.is_empty() {
|
||||
use_stdout = true;
|
||||
}
|
||||
|
||||
// Force stdout when there is no remote endpoint so that operators
|
||||
// always have *some* log output in the default configuration.
|
||||
let use_stdout = if endpoint.is_empty() {
|
||||
true
|
||||
} else {
|
||||
get_env_bool(ENV_OBS_USE_STDOUT, USE_STDOUT)
|
||||
};
|
||||
|
||||
// The canonical log directory is resolved only when explicitly set via
|
||||
// environment variable. When absent or empty, logging falls back to
|
||||
// stdout-only mode (not file-rolling).
|
||||
let log_directory = match std::env::var(ENV_OBS_LOG_DIRECTORY) {
|
||||
Ok(val) if !val.is_empty() => Some(val),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// `log_keep_files` (legacy) and `log_keep_count` (new) share the same
|
||||
// environment variables but have slightly different semantics.
|
||||
// `log_keep_files` is the rolling-appender retention count; `log_keep_count`
|
||||
// is the cleaner's minimum-keep threshold. Both default to the same value.
|
||||
let log_keep_files = Some(get_env_usize(ENV_OBS_LOG_KEEP_FILES, DEFAULT_LOG_KEEP_FILES));
|
||||
let log_keep_count = Some(get_env_usize(ENV_OBS_LOG_KEEP_COUNT, DEFAULT_OBS_LOG_KEEP_COUNT));
|
||||
|
||||
// `log_rotation_time` drives the rolling-appender rotation period.
|
||||
let log_rotation_time = Some(get_env_str(ENV_OBS_LOG_ROTATION_TIME, DEFAULT_LOG_ROTATION_TIME));
|
||||
|
||||
OtelConfig {
|
||||
// OTLP
|
||||
endpoint,
|
||||
trace_endpoint: get_env_opt_str(ENV_OBS_TRACE_ENDPOINT),
|
||||
metric_endpoint: get_env_opt_str(ENV_OBS_METRIC_ENDPOINT),
|
||||
@@ -112,25 +228,47 @@ impl OtelConfig {
|
||||
service_name: Some(get_env_str(ENV_OBS_SERVICE_NAME, APP_NAME)),
|
||||
service_version: Some(get_env_str(ENV_OBS_SERVICE_VERSION, SERVICE_VERSION)),
|
||||
environment: Some(get_env_str(ENV_OBS_ENVIRONMENT, ENVIRONMENT)),
|
||||
// Local logging
|
||||
logger_level: Some(get_env_str(ENV_OBS_LOGGER_LEVEL, DEFAULT_LOG_LEVEL)),
|
||||
log_stdout_enabled: Some(get_env_bool(ENV_OBS_LOG_STDOUT_ENABLED, DEFAULT_OBS_LOG_STDOUT_ENABLED)),
|
||||
log_directory: Some(get_log_directory_to_string(ENV_OBS_LOG_DIRECTORY)),
|
||||
log_directory,
|
||||
log_filename: Some(get_env_str(ENV_OBS_LOG_FILENAME, DEFAULT_OBS_LOG_FILENAME)),
|
||||
log_rotation_size_mb: Some(get_env_u64(ENV_OBS_LOG_ROTATION_SIZE_MB, DEFAULT_LOG_ROTATION_SIZE_MB)), // Default to 100 MB
|
||||
log_rotation_time: Some(get_env_str(ENV_OBS_LOG_ROTATION_TIME, DEFAULT_LOG_ROTATION_TIME)), // Default to "Hour"
|
||||
log_keep_files: Some(get_env_usize(ENV_OBS_LOG_KEEP_FILES, DEFAULT_LOG_KEEP_FILES)), // Default to keeping 30 log files
|
||||
log_rotation_time,
|
||||
log_keep_files,
|
||||
// Log cleanup
|
||||
log_keep_count,
|
||||
log_max_total_size_bytes: Some(get_env_u64(ENV_OBS_LOG_MAX_TOTAL_SIZE_BYTES, DEFAULT_OBS_LOG_MAX_TOTAL_SIZE_BYTES)),
|
||||
log_max_single_file_size_bytes: Some(get_env_u64(
|
||||
ENV_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES,
|
||||
DEFAULT_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES,
|
||||
)),
|
||||
log_compress_old_files: Some(get_env_bool(ENV_OBS_LOG_COMPRESS_OLD_FILES, DEFAULT_OBS_LOG_COMPRESS_OLD_FILES)),
|
||||
log_gzip_compression_level: Some(get_env_u64(
|
||||
ENV_OBS_LOG_GZIP_COMPRESSION_LEVEL,
|
||||
DEFAULT_OBS_LOG_GZIP_COMPRESSION_LEVEL as u64,
|
||||
) as u32),
|
||||
log_compressed_file_retention_days: Some(get_env_u64(
|
||||
ENV_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS,
|
||||
DEFAULT_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS,
|
||||
)),
|
||||
log_exclude_patterns: get_env_opt_str(ENV_OBS_LOG_EXCLUDE_PATTERNS),
|
||||
log_delete_empty_files: Some(get_env_bool(ENV_OBS_LOG_DELETE_EMPTY_FILES, DEFAULT_OBS_LOG_DELETE_EMPTY_FILES)),
|
||||
log_min_file_age_seconds: Some(get_env_u64(ENV_OBS_LOG_MIN_FILE_AGE_SECONDS, DEFAULT_OBS_LOG_MIN_FILE_AGE_SECONDS)),
|
||||
log_cleanup_interval_seconds: Some(get_env_u64(
|
||||
ENV_OBS_LOG_CLEANUP_INTERVAL_SECONDS,
|
||||
DEFAULT_OBS_LOG_CLEANUP_INTERVAL_SECONDS,
|
||||
)),
|
||||
log_dry_run: Some(get_env_bool(ENV_OBS_LOG_DRY_RUN, DEFAULT_OBS_LOG_DRY_RUN)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new instance of OtelConfig with default values
|
||||
/// Create a new [`OtelConfig`] populated entirely from environment variables.
|
||||
///
|
||||
/// # Returns
|
||||
/// A new instance of OtelConfig
|
||||
/// Equivalent to `OtelConfig::extract_otel_config_from_env(None)`.
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use rustfs_obs::OtelConfig;
|
||||
///
|
||||
/// let config = OtelConfig::new();
|
||||
/// ```
|
||||
pub fn new() -> Self {
|
||||
@@ -138,26 +276,17 @@ impl OtelConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement Default trait for OtelConfig
|
||||
/// This allows creating a default instance of OtelConfig using OtelConfig::default()
|
||||
/// which internally calls OtelConfig::new()
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use rustfs_obs::OtelConfig;
|
||||
///
|
||||
/// let config = OtelConfig::default();
|
||||
/// ```
|
||||
impl Default for OtelConfig {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Overall application configuration
|
||||
/// Add observability configuration
|
||||
/// Top-level application configuration that embeds [`OtelConfig`].
|
||||
///
|
||||
/// Observability: OpenTelemetry configuration
|
||||
/// Use this when the observability config lives inside a larger `AppConfig`
|
||||
/// struct, e.g. when deserialising from a config file that also contains other
|
||||
/// application settings.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
@@ -171,23 +300,18 @@ pub struct AppConfig {
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
/// Create a new instance of AppConfig with default values
|
||||
///
|
||||
/// # Returns
|
||||
/// A new instance of AppConfig
|
||||
/// Create an [`AppConfig`] with all observability settings read from the
|
||||
/// environment (no explicit endpoint override).
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
observability: OtelConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new instance of AppConfig with specified endpoint
|
||||
/// Create an [`AppConfig`] with an explicit OTLP endpoint.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `endpoint` - An optional string representing the endpoint for metric collection
|
||||
///
|
||||
/// # Returns
|
||||
/// A new instance of AppConfig
|
||||
/// * `endpoint` - Root OTLP/HTTP endpoint URL, or `None` to read from env.
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
@@ -202,27 +326,16 @@ impl AppConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement Default trait for AppConfig
|
||||
/// This allows creating a default instance of AppConfig using AppConfig::default()
|
||||
/// which internally calls AppConfig::new()
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use rustfs_obs::AppConfig;
|
||||
///
|
||||
/// let config = AppConfig::default();
|
||||
/// ```
|
||||
impl Default for AppConfig {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the current environment is production
|
||||
///
|
||||
/// # Returns
|
||||
/// true if production, false otherwise
|
||||
/// Returns `true` when the current runtime environment is `production`.
|
||||
///
|
||||
/// Reads the `RUSTFS_OBS_ENVIRONMENT` environment variable and compares it
|
||||
/// case-insensitively against the string `"production"`.
|
||||
pub fn is_production_environment() -> bool {
|
||||
get_env_str(ENV_OBS_ENVIRONMENT, ENVIRONMENT).eq_ignore_ascii_case(DEFAULT_OBS_ENVIRONMENT_PRODUCTION)
|
||||
}
|
||||
|
||||
@@ -30,39 +30,39 @@
|
||||
//! # use all functions
|
||||
//! rustfs-obs = { version = "0.1.0", features = ["full"] }
|
||||
//! ```
|
||||
///
|
||||
/// ## Usage
|
||||
///
|
||||
/// ```no_run
|
||||
/// use rustfs_obs::init_obs;
|
||||
///
|
||||
/// # #[tokio::main]
|
||||
/// # async fn main() {
|
||||
/// # let _guard = match init_obs(None).await {
|
||||
/// # Ok(g) => g,
|
||||
/// # Err(e) => {
|
||||
/// # panic!("Failed to initialize observability: {:?}", e);
|
||||
/// # }
|
||||
/// # };
|
||||
/// # // Application logic here
|
||||
/// # {
|
||||
/// # // Simulate some work
|
||||
/// # tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
/// # println!("Application is running...");
|
||||
/// # }
|
||||
/// # // Guard will be dropped here, flushing telemetry data
|
||||
/// # }
|
||||
/// ```
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use rustfs_obs::init_obs;
|
||||
//!
|
||||
//! # #[tokio::main]
|
||||
//! # async fn main() {
|
||||
//! # let _guard = match init_obs(None).await {
|
||||
//! # Ok(g) => g,
|
||||
//! # Err(e) => {
|
||||
//! # panic!("Failed to initialize observability: {:?}", e);
|
||||
//! # }
|
||||
//! # };
|
||||
//! # // Application logic here
|
||||
//! # {
|
||||
//! # // Simulate some work
|
||||
//! # tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
//! # println!("Application is running...");
|
||||
//! # }
|
||||
//! # // Guard will be dropped here, flushing telemetry data
|
||||
//! # }
|
||||
//! ```
|
||||
mod config;
|
||||
mod error;
|
||||
mod global;
|
||||
mod recorder;
|
||||
mod log_cleanup;
|
||||
mod system;
|
||||
mod telemetry;
|
||||
|
||||
pub use config::*;
|
||||
pub use error::*;
|
||||
pub use global::*;
|
||||
pub use recorder::*;
|
||||
pub use log_cleanup::*;
|
||||
pub use system::SystemObserver;
|
||||
pub use telemetry::OtelGuard;
|
||||
pub use telemetry::{OtelGuard, Recorder};
|
||||
|
||||
276
crates/obs/src/log_cleanup/cleaner.rs
Normal file
276
crates/obs/src/log_cleanup/cleaner.rs
Normal file
@@ -0,0 +1,276 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Core log-file cleanup orchestration.
|
||||
//!
|
||||
//! [`LogCleaner`] is the public entry point for the cleanup subsystem.
|
||||
//! Construct it with [`LogCleaner::new`] and call [`LogCleaner::cleanup`]
|
||||
//! periodically (e.g. from a `tokio::spawn`-ed loop).
|
||||
//!
|
||||
//! Internally the cleaner delegates to:
|
||||
//! - [`super::scanner`] — to discover which files exist and which are eligible,
|
||||
//! - [`super::compress`] — to gzip-compress files before they are deleted,
|
||||
//! - [`LogCleaner::select_files_to_delete`] — to apply count / size limits.
|
||||
|
||||
use super::compress::compress_file;
|
||||
use super::scanner::{collect_expired_compressed_files, collect_log_files};
|
||||
use super::types::FileInfo;
|
||||
use std::path::PathBuf;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
/// Log-file lifecycle manager.
|
||||
///
|
||||
/// Holds all cleanup policy parameters and exposes a single [`cleanup`] method
|
||||
/// that performs one full cleanup pass.
|
||||
///
|
||||
/// # Thread-safety
|
||||
/// `LogCleaner` is `Send + Sync`. Multiple callers can share a reference
|
||||
/// (e.g. via `Arc`) and call `cleanup` concurrently without data races,
|
||||
/// because no mutable state is mutated after construction.
|
||||
pub struct LogCleaner {
|
||||
/// Directory containing the managed log files.
|
||||
pub(super) log_dir: PathBuf,
|
||||
/// Filename prefix that identifies managed files (e.g. `"rustfs.log."`).
|
||||
pub(super) file_prefix: String,
|
||||
/// The cleaner will never delete files if doing so would leave fewer than
|
||||
/// this many files in the directory.
|
||||
pub(super) keep_count: usize,
|
||||
/// Hard ceiling on the total bytes of all managed files; `0` = no limit.
|
||||
pub(super) max_total_size_bytes: u64,
|
||||
/// Hard ceiling on a single file's size; `0` = no per-file limit.
|
||||
pub(super) max_single_file_size_bytes: u64,
|
||||
/// Compress eligible files with gzip before removing them.
|
||||
pub(super) compress_old_files: bool,
|
||||
/// Gzip compression level (`1`–`9`, clamped on construction).
|
||||
pub(super) gzip_compression_level: u32,
|
||||
/// Delete compressed archives older than this many days; `0` = keep forever.
|
||||
pub(super) compressed_file_retention_days: u64,
|
||||
/// Compiled glob patterns for files that must never be cleaned up.
|
||||
pub(super) exclude_patterns: Vec<glob::Pattern>,
|
||||
/// Delete zero-byte files even when they are younger than `min_file_age_seconds`.
|
||||
pub(super) delete_empty_files: bool,
|
||||
/// Files younger than this threshold (in seconds) are never touched.
|
||||
pub(super) min_file_age_seconds: u64,
|
||||
/// When `true`, log what would be done without performing any destructive
|
||||
/// filesystem operations.
|
||||
pub(super) dry_run: bool,
|
||||
}
|
||||
|
||||
impl LogCleaner {
|
||||
/// Build a new [`LogCleaner`] with the supplied policy parameters.
|
||||
///
|
||||
/// `exclude_patterns` is a list of glob strings (e.g. `"*.lock"`). Invalid
|
||||
/// glob patterns are silently ignored.
|
||||
///
|
||||
/// `gzip_compression_level` is clamped to the range `[1, 9]`.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
log_dir: PathBuf,
|
||||
file_prefix: String,
|
||||
keep_count: usize,
|
||||
max_total_size_bytes: u64,
|
||||
max_single_file_size_bytes: u64,
|
||||
compress_old_files: bool,
|
||||
gzip_compression_level: u32,
|
||||
compressed_file_retention_days: u64,
|
||||
exclude_patterns: Vec<String>,
|
||||
delete_empty_files: bool,
|
||||
min_file_age_seconds: u64,
|
||||
dry_run: bool,
|
||||
) -> Self {
|
||||
let patterns = exclude_patterns
|
||||
.into_iter()
|
||||
.filter_map(|p| glob::Pattern::new(&p).ok())
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
log_dir,
|
||||
file_prefix,
|
||||
keep_count,
|
||||
max_total_size_bytes,
|
||||
max_single_file_size_bytes,
|
||||
compress_old_files,
|
||||
gzip_compression_level: gzip_compression_level.clamp(1, 9),
|
||||
compressed_file_retention_days,
|
||||
exclude_patterns: patterns,
|
||||
delete_empty_files,
|
||||
min_file_age_seconds,
|
||||
dry_run,
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform one full cleanup pass.
|
||||
///
|
||||
/// Steps:
|
||||
/// 1. Scan the log directory for managed files.
|
||||
/// 2. Apply count/size policies to select files for deletion.
|
||||
/// 3. Optionally compress selected files, then delete them.
|
||||
/// 4. Collect and delete expired compressed archives.
|
||||
///
|
||||
/// # Returns
|
||||
/// A tuple `(deleted_count, freed_bytes)` covering all deletions in this
|
||||
/// pass (both regular files and expired compressed archives).
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an [`std::io::Error`] if the log directory cannot be read.
|
||||
pub fn cleanup(&self) -> Result<(usize, u64), std::io::Error> {
|
||||
if !self.log_dir.exists() {
|
||||
debug!("Log directory does not exist: {:?}", self.log_dir);
|
||||
return Ok((0, 0));
|
||||
}
|
||||
|
||||
let mut total_deleted = 0usize;
|
||||
let mut total_freed = 0u64;
|
||||
|
||||
// ── 1. Discover active log files ──────────────────────────────────────
|
||||
let mut files = collect_log_files(
|
||||
&self.log_dir,
|
||||
&self.file_prefix,
|
||||
&self.exclude_patterns,
|
||||
self.min_file_age_seconds,
|
||||
self.delete_empty_files,
|
||||
self.dry_run,
|
||||
)?;
|
||||
|
||||
if files.is_empty() {
|
||||
debug!("No log files found in directory: {:?}", self.log_dir);
|
||||
} else {
|
||||
files.sort_by_key(|f| f.modified);
|
||||
let total_size: u64 = files.iter().map(|f| f.size).sum();
|
||||
info!(
|
||||
"Found {} log files, total size: {} bytes ({:.2} MB)",
|
||||
files.len(),
|
||||
total_size,
|
||||
total_size as f64 / 1024.0 / 1024.0
|
||||
);
|
||||
|
||||
// ── 2. Select + compress + delete ─────────────────────────────────
|
||||
let to_delete = self.select_files_to_delete(&files, total_size);
|
||||
if !to_delete.is_empty() {
|
||||
let (d, f) = self.compress_and_delete(&to_delete)?;
|
||||
total_deleted += d;
|
||||
total_freed += f;
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Remove expired compressed archives ─────────────────────────────
|
||||
let expired_gz = collect_expired_compressed_files(&self.log_dir, &self.file_prefix, self.compressed_file_retention_days)?;
|
||||
if !expired_gz.is_empty() {
|
||||
let (d, f) = self.delete_files(&expired_gz)?;
|
||||
total_deleted += d;
|
||||
total_freed += f;
|
||||
}
|
||||
|
||||
if total_deleted > 0 || total_freed > 0 {
|
||||
info!(
|
||||
"Cleanup completed: deleted {} files, freed {} bytes ({:.2} MB)",
|
||||
total_deleted,
|
||||
total_freed,
|
||||
total_freed as f64 / 1024.0 / 1024.0
|
||||
);
|
||||
}
|
||||
|
||||
Ok((total_deleted, total_freed))
|
||||
}
|
||||
|
||||
// ─── Selection ────────────────────────────────────────────────────────────
|
||||
|
||||
/// Choose which files from `files` (sorted oldest-first) should be deleted.
|
||||
///
|
||||
/// The algorithm respects three constraints in order:
|
||||
/// 1. Always keep at least `keep_count` files.
|
||||
/// 2. Delete old files while the total size exceeds `max_total_size_bytes`.
|
||||
/// 3. Delete any file whose individual size exceeds `max_single_file_size_bytes`.
|
||||
pub(super) fn select_files_to_delete(&self, files: &[FileInfo], total_size: u64) -> Vec<FileInfo> {
|
||||
let mut to_delete = Vec::new();
|
||||
|
||||
if files.len() <= self.keep_count {
|
||||
return to_delete;
|
||||
}
|
||||
|
||||
let mut current_size = total_size;
|
||||
let deletable = files.len() - self.keep_count;
|
||||
|
||||
for (idx, file) in files.iter().enumerate() {
|
||||
if idx >= deletable {
|
||||
break;
|
||||
}
|
||||
|
||||
let over_total = self.max_total_size_bytes > 0 && current_size > self.max_total_size_bytes;
|
||||
let over_single = self.max_single_file_size_bytes > 0 && file.size > self.max_single_file_size_bytes;
|
||||
|
||||
if over_total || over_single {
|
||||
if over_single {
|
||||
debug!(
|
||||
"File exceeds single-file size limit: {:?} ({} > {} bytes)",
|
||||
file.path, file.size, self.max_single_file_size_bytes
|
||||
);
|
||||
}
|
||||
current_size = current_size.saturating_sub(file.size);
|
||||
to_delete.push(file.clone());
|
||||
} else {
|
||||
// Neither limit is breached; stop scanning.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
to_delete
|
||||
}
|
||||
|
||||
// ─── Compression + deletion ───────────────────────────────────────────────
|
||||
|
||||
/// Optionally compress and then delete the given files.
|
||||
fn compress_and_delete(&self, files: &[FileInfo]) -> Result<(usize, u64), std::io::Error> {
|
||||
if self.compress_old_files {
|
||||
for f in files {
|
||||
if let Err(e) = compress_file(&f.path, self.gzip_compression_level, self.dry_run) {
|
||||
tracing::warn!("Failed to compress {:?}: {}", f.path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.delete_files(files)
|
||||
}
|
||||
|
||||
/// Delete all files in `files`, logging each operation.
|
||||
///
|
||||
/// Errors on individual files are logged but do **not** abort the loop.
|
||||
///
|
||||
/// # Returns
|
||||
/// `(deleted_count, freed_bytes)`.
|
||||
pub(super) fn delete_files(&self, files: &[FileInfo]) -> Result<(usize, u64), std::io::Error> {
|
||||
let mut deleted = 0usize;
|
||||
let mut freed = 0u64;
|
||||
|
||||
for f in files {
|
||||
if self.dry_run {
|
||||
info!("[DRY RUN] Would delete: {:?} ({} bytes)", f.path, f.size);
|
||||
deleted += 1;
|
||||
freed += f.size;
|
||||
} else {
|
||||
match std::fs::remove_file(&f.path) {
|
||||
Ok(()) => {
|
||||
debug!("Deleted: {:?}", f.path);
|
||||
deleted += 1;
|
||||
freed += f.size;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to delete {:?}: {}", f.path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((deleted, freed))
|
||||
}
|
||||
}
|
||||
76
crates/obs/src/log_cleanup/compress.rs
Normal file
76
crates/obs/src/log_cleanup/compress.rs
Normal file
@@ -0,0 +1,76 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Gzip compression helper for old log files.
|
||||
//!
|
||||
//! Files are compressed in place: `<name>` → `<name>.gz`. The original file
|
||||
//! is **not** deleted here — deletion is handled by the caller after
|
||||
//! compression succeeds.
|
||||
|
||||
use flate2::Compression;
|
||||
use flate2::write::GzEncoder;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, BufWriter, Write};
|
||||
use std::path::Path;
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Compress `path` to `<path>.gz` using gzip.
|
||||
///
|
||||
/// If a `.gz` file for the given path already exists the function returns
|
||||
/// `Ok(())` immediately without overwriting the existing archive.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `path` - Path to the uncompressed log file.
|
||||
/// * `level` - Gzip compression level (`1`–`9`); clamped automatically.
|
||||
/// * `dry_run` - When `true`, log what would be done without writing anything.
|
||||
///
|
||||
/// # Errors
|
||||
/// Propagates any I/O error encountered while opening, reading, writing, or
|
||||
/// flushing files.
|
||||
pub(super) fn compress_file(path: &Path, level: u32, dry_run: bool) -> Result<(), std::io::Error> {
|
||||
let gz_path = path.with_extension("gz");
|
||||
|
||||
if gz_path.exists() {
|
||||
debug!("Compressed file already exists, skipping: {:?}", gz_path);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if dry_run {
|
||||
info!("[DRY RUN] Would compress file: {:?} -> {:?}", path, gz_path);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let input = File::open(path)?;
|
||||
let output = File::create(&gz_path)?;
|
||||
|
||||
let mut reader = BufReader::new(input);
|
||||
let mut writer = BufWriter::new(output);
|
||||
|
||||
let mut encoder = GzEncoder::new(Vec::new(), Compression::new(level.clamp(1, 9)));
|
||||
std::io::copy(&mut reader, &mut encoder)?;
|
||||
let compressed = encoder.finish()?;
|
||||
|
||||
writer.write_all(&compressed)?;
|
||||
writer.flush()?;
|
||||
|
||||
debug!(
|
||||
"Compressed {:?} -> {:?} ({} bytes -> {} bytes)",
|
||||
path,
|
||||
gz_path,
|
||||
std::fs::metadata(path).map(|m| m.len()).unwrap_or(0),
|
||||
compressed.len()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
178
crates/obs/src/log_cleanup/mod.rs
Normal file
178
crates/obs/src/log_cleanup/mod.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Log-file cleanup subsystem.
|
||||
//!
|
||||
//! This module provides [`LogCleaner`], a configurable manager that
|
||||
//! periodically removes, compresses, or archives old rolling log files.
|
||||
//!
|
||||
//! ## Sub-modules
|
||||
//!
|
||||
//! | Module | Responsibility |
|
||||
//! |-------------|----------------------------------------------------------|
|
||||
//! | `types` | Shared data types (`FileInfo`) |
|
||||
//! | `scanner` | Filesystem traversal — discovers eligible files |
|
||||
//! | `compress` | Gzip compression helper |
|
||||
//! | `cleaner` | Core orchestration — selection, compression, deletion |
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use std::path::PathBuf;
|
||||
//! use rustfs_obs::LogCleaner;
|
||||
//!
|
||||
//! let cleaner = LogCleaner::new(
|
||||
//! PathBuf::from("/var/log/rustfs"),
|
||||
//! "rustfs.log.".to_string(),
|
||||
//! 10, // keep_count
|
||||
//! 2 * 1024 * 1024 * 1024, // max_total_size_bytes (2 GiB)
|
||||
//! 0, // max_single_file_size_bytes (unlimited)
|
||||
//! true, // compress_old_files
|
||||
//! 6, // gzip_compression_level
|
||||
//! 30, // compressed_file_retention_days
|
||||
//! vec![], // exclude_patterns
|
||||
//! true, // delete_empty_files
|
||||
//! 3600, // min_file_age_seconds (1 hour)
|
||||
//! false, // dry_run
|
||||
//! );
|
||||
//!
|
||||
//! let (deleted, freed_bytes) = cleaner.cleanup().expect("cleanup failed");
|
||||
//! println!("Deleted {deleted} files, freed {freed_bytes} bytes");
|
||||
//! ```
|
||||
|
||||
mod cleaner;
|
||||
mod compress;
|
||||
mod scanner;
|
||||
mod types;
|
||||
|
||||
pub use cleaner::LogCleaner;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::cleaner::LogCleaner;
|
||||
use super::scanner;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn create_log_file(dir: &Path, name: &str, size: usize) -> std::io::Result<()> {
|
||||
let path = dir.join(name);
|
||||
let mut f = File::create(path)?;
|
||||
f.write_all(&vec![b'X'; size])?;
|
||||
f.flush()
|
||||
}
|
||||
|
||||
/// Build a cleaner with sensible test defaults (no compression, no age gate).
|
||||
fn make_cleaner(dir: std::path::PathBuf, keep: usize, max_bytes: u64) -> LogCleaner {
|
||||
LogCleaner::new(
|
||||
dir,
|
||||
"app.log.".to_string(),
|
||||
keep,
|
||||
max_bytes,
|
||||
0, // max_single_file_size_bytes
|
||||
false, // compress_old_files
|
||||
6, // gzip_compression_level
|
||||
30, // compressed_file_retention_days
|
||||
Vec::new(), // exclude_patterns
|
||||
true, // delete_empty_files
|
||||
0, // min_file_age_seconds (0 = no age gate in tests)
|
||||
false, // dry_run
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cleanup_removes_oldest_when_over_size() -> std::io::Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let dir = tmp.path().to_path_buf();
|
||||
|
||||
create_log_file(&dir, "app.log.2024-01-01", 1024)?;
|
||||
create_log_file(&dir, "app.log.2024-01-02", 1024)?;
|
||||
create_log_file(&dir, "app.log.2024-01-03", 1024)?;
|
||||
create_log_file(&dir, "other.log", 1024)?; // not managed
|
||||
|
||||
// Total managed = 3 072 bytes; limit = 2 048; keep_count = 2 → must delete 1.
|
||||
let cleaner = make_cleaner(dir.clone(), 2, 2048);
|
||||
let (deleted, freed) = cleaner.cleanup()?;
|
||||
|
||||
assert_eq!(deleted, 1, "should delete exactly one file");
|
||||
assert_eq!(freed, 1024);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cleanup_respects_keep_count() -> std::io::Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let dir = tmp.path().to_path_buf();
|
||||
|
||||
for i in 1..=5 {
|
||||
create_log_file(&dir, &format!("app.log.2024-01-0{i}"), 1024)?;
|
||||
}
|
||||
|
||||
// No size limit, keep_count = 3 → nothing to delete (5 > 3 but size == 0 limit).
|
||||
let cleaner = make_cleaner(dir.clone(), 3, 0);
|
||||
let (deleted, _) = cleaner.cleanup()?;
|
||||
assert_eq!(deleted, 0, "keep_count prevents deletion when no size limit");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cleanup_ignores_unrelated_files() -> std::io::Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let dir = tmp.path().to_path_buf();
|
||||
|
||||
create_log_file(&dir, "app.log.2024-01-01", 1024)?;
|
||||
create_log_file(&dir, "app.log.2024-01-02", 1024)?;
|
||||
create_log_file(&dir, "other.log", 512)?; // different prefix
|
||||
|
||||
let cleaner = make_cleaner(dir.clone(), 1, 512);
|
||||
let (deleted, _) = cleaner.cleanup()?;
|
||||
|
||||
// "other.log" must not be counted or deleted.
|
||||
assert_eq!(deleted, 1, "only managed files should be deleted");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_collect_log_files_counts_correctly() -> std::io::Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let dir = tmp.path().to_path_buf();
|
||||
|
||||
create_log_file(&dir, "app.log.2024-01-01", 1024)?;
|
||||
create_log_file(&dir, "app.log.2024-01-02", 2048)?;
|
||||
create_log_file(&dir, "other.log", 512)?;
|
||||
|
||||
let files = scanner::collect_log_files(&dir, "app.log.", &[], 0, true, false)?;
|
||||
assert_eq!(files.len(), 2, "scanner should find exactly 2 managed files");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dry_run_does_not_delete() -> std::io::Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let dir = tmp.path().to_path_buf();
|
||||
|
||||
create_log_file(&dir, "app.log.2024-01-01", 1024)?;
|
||||
create_log_file(&dir, "app.log.2024-01-02", 1024)?;
|
||||
create_log_file(&dir, "app.log.2024-01-03", 1024)?;
|
||||
|
||||
let cleaner = LogCleaner::new(dir.clone(), "app.log.".to_string(), 1, 1024, 0, false, 6, 30, vec![], true, 0, true);
|
||||
let (deleted, _freed) = cleaner.cleanup()?;
|
||||
|
||||
// dry_run=true reports deletions but doesn't actually remove files.
|
||||
assert!(deleted > 0, "dry_run should report files as deleted");
|
||||
assert_eq!(std::fs::read_dir(&dir)?.count(), 3, "no files should actually be removed");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
197
crates/obs/src/log_cleanup/scanner.rs
Normal file
197
crates/obs/src/log_cleanup/scanner.rs
Normal file
@@ -0,0 +1,197 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Filesystem scanner for discovering log files eligible for cleanup.
|
||||
//!
|
||||
//! This module is intentionally kept read-only: it does **not** delete or
|
||||
//! compress any files — it only reports what it found.
|
||||
|
||||
use super::types::FileInfo;
|
||||
use std::path::Path;
|
||||
use std::time::{Duration, SystemTime};
|
||||
use tracing::debug;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Collect all log files in `log_dir` whose name starts with `file_prefix`.
|
||||
///
|
||||
/// Files that:
|
||||
/// - are already compressed (`.gz` extension),
|
||||
/// - are zero-byte and `delete_empty_files` is `true` (these are handled
|
||||
/// immediately by the caller), or
|
||||
/// - match one of the `exclude_patterns`,
|
||||
/// - were modified more recently than `min_file_age_seconds` seconds ago,
|
||||
///
|
||||
/// are skipped and not returned in the result list.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `log_dir` - Root directory to scan (depth 1 only, no recursion).
|
||||
/// * `file_prefix` - Only filenames starting with this string are considered.
|
||||
/// * `exclude_patterns` - Compiled glob patterns; matching files are skipped.
|
||||
/// * `min_file_age_seconds` - Files younger than this threshold are skipped.
|
||||
/// * `delete_empty_files` - When `true`, zero-byte files trigger an immediate
|
||||
/// delete by the caller before the rest of cleanup runs.
|
||||
pub(super) fn collect_log_files(
|
||||
log_dir: &Path,
|
||||
file_prefix: &str,
|
||||
exclude_patterns: &[glob::Pattern],
|
||||
min_file_age_seconds: u64,
|
||||
delete_empty_files: bool,
|
||||
dry_run: bool,
|
||||
) -> Result<Vec<FileInfo>, std::io::Error> {
|
||||
let mut files = Vec::new();
|
||||
let now = SystemTime::now();
|
||||
|
||||
for entry in WalkDir::new(log_dir)
|
||||
.max_depth(1)
|
||||
.follow_links(false)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
{
|
||||
let path = entry.path();
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let filename = match path.file_name().and_then(|n| n.to_str()) {
|
||||
Some(f) => f,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Only manage files that carry our prefix.
|
||||
if !filename.starts_with(file_prefix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compressed files are handled by collect_compressed_files.
|
||||
if filename.ends_with(".gz") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Honour exclusion patterns.
|
||||
if is_excluded(filename, exclude_patterns) {
|
||||
debug!("Excluding file from cleanup: {:?}", filename);
|
||||
continue;
|
||||
}
|
||||
|
||||
let metadata = match entry.metadata() {
|
||||
Ok(m) => m,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let modified = match metadata.modified() {
|
||||
Ok(t) => t,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let file_size = metadata.len();
|
||||
|
||||
// Delete zero-byte files immediately (outside the normal selection
|
||||
// logic) when the feature is enabled.
|
||||
if file_size == 0 && delete_empty_files {
|
||||
if !dry_run {
|
||||
if let Err(e) = std::fs::remove_file(path) {
|
||||
tracing::warn!("Failed to delete empty file {:?}: {}", path, e);
|
||||
} else {
|
||||
debug!("Deleted empty file: {:?}", path);
|
||||
}
|
||||
} else {
|
||||
tracing::info!("[DRY RUN] Would delete empty file: {:?}", path);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip files that are too young.
|
||||
if let Ok(age) = now.duration_since(modified)
|
||||
&& age.as_secs() < min_file_age_seconds
|
||||
{
|
||||
debug!(
|
||||
"Skipping file (too new): {:?}, age: {}s, min_age: {}s",
|
||||
filename,
|
||||
age.as_secs(),
|
||||
min_file_age_seconds
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
files.push(FileInfo {
|
||||
path: path.to_path_buf(),
|
||||
size: file_size,
|
||||
modified,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
/// Collect compressed `.gz` log files whose age exceeds the retention period.
|
||||
///
|
||||
/// When `compressed_file_retention_days` is `0` the function returns immediately
|
||||
/// without collecting anything (files are kept indefinitely).
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `log_dir` - Root directory to scan.
|
||||
/// * `file_prefix` - Only `.gz` files that also start with this prefix are considered.
|
||||
/// * `compressed_file_retention_days` - Files older than this are eligible for
|
||||
/// deletion; `0` means never delete compressed files.
|
||||
pub(super) fn collect_expired_compressed_files(
|
||||
log_dir: &Path,
|
||||
file_prefix: &str,
|
||||
compressed_file_retention_days: u64,
|
||||
) -> Result<Vec<FileInfo>, std::io::Error> {
|
||||
if compressed_file_retention_days == 0 {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let retention = Duration::from_secs(compressed_file_retention_days * 24 * 3600);
|
||||
let now = SystemTime::now();
|
||||
let mut files = Vec::new();
|
||||
|
||||
for entry in WalkDir::new(log_dir)
|
||||
.max_depth(1)
|
||||
.follow_links(false)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
{
|
||||
let path = entry.path();
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let filename = match path.file_name().and_then(|n| n.to_str()) {
|
||||
Some(f) => f,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if !filename.starts_with(file_prefix) || !filename.ends_with(".gz") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Ok(metadata) = entry.metadata() else { continue };
|
||||
let Ok(modified) = metadata.modified() else { continue };
|
||||
let Ok(age) = now.duration_since(modified) else { continue };
|
||||
|
||||
if age > retention {
|
||||
files.push(FileInfo {
|
||||
path: path.to_path_buf(),
|
||||
size: metadata.len(),
|
||||
modified,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
/// Returns `true` if `filename` matches any of the compiled exclusion patterns.
|
||||
pub(super) fn is_excluded(filename: &str, patterns: &[glob::Pattern]) -> bool {
|
||||
patterns.iter().any(|p| p.matches(filename))
|
||||
}
|
||||
33
crates/obs/src/log_cleanup/types.rs
Normal file
33
crates/obs/src/log_cleanup/types.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Shared types used across the log-cleanup sub-modules.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
|
||||
/// Metadata for a single log file discovered by the scanner.
|
||||
///
|
||||
/// Carries enough information to make cleanup decisions (sort by age, compare
|
||||
/// size against limits, etc.) without re-reading filesystem metadata on every
|
||||
/// operation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(super) struct FileInfo {
|
||||
/// Absolute path to the file.
|
||||
pub path: PathBuf,
|
||||
/// File size in bytes at the time of discovery.
|
||||
pub size: u64,
|
||||
/// Last-modification timestamp from the filesystem.
|
||||
pub modified: SystemTime,
|
||||
}
|
||||
@@ -1,725 +0,0 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::config::OtelConfig;
|
||||
use crate::global::OBSERVABILITY_METRIC_ENABLED;
|
||||
use crate::{Recorder, TelemetryError};
|
||||
use flexi_logger::{DeferredNow, Record, WriteMode, WriteMode::AsyncWith, style};
|
||||
use metrics::counter;
|
||||
use nu_ansi_term::Color;
|
||||
use opentelemetry::{KeyValue, global, trace::TracerProvider};
|
||||
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
|
||||
use opentelemetry_otlp::{Compression, Protocol, WithExportConfig, WithHttpConfig};
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
use opentelemetry_sdk::{
|
||||
Resource,
|
||||
logs::SdkLoggerProvider,
|
||||
metrics::{PeriodicReader, SdkMeterProvider},
|
||||
trace::{RandomIdGenerator, Sampler, SdkTracerProvider},
|
||||
};
|
||||
use opentelemetry_semantic_conventions::{
|
||||
SCHEMA_URL,
|
||||
attribute::{DEPLOYMENT_ENVIRONMENT_NAME, NETWORK_LOCAL_ADDRESS, SERVICE_VERSION as OTEL_SERVICE_VERSION},
|
||||
};
|
||||
use rustfs_config::{
|
||||
APP_NAME, DEFAULT_LOG_KEEP_FILES, DEFAULT_LOG_LEVEL, DEFAULT_OBS_LOG_STDOUT_ENABLED, DEFAULT_OBS_LOGS_EXPORT_ENABLED,
|
||||
DEFAULT_OBS_METRICS_EXPORT_ENABLED, DEFAULT_OBS_TRACES_EXPORT_ENABLED, ENVIRONMENT, METER_INTERVAL, SAMPLE_RATIO,
|
||||
SERVICE_VERSION,
|
||||
observability::{
|
||||
DEFAULT_OBS_ENVIRONMENT_PRODUCTION, DEFAULT_OBS_LOG_FLUSH_MS, DEFAULT_OBS_LOG_MESSAGE_CAPA, DEFAULT_OBS_LOG_POOL_CAPA,
|
||||
ENV_OBS_LOG_DIRECTORY, ENV_OBS_LOG_FLUSH_MS, ENV_OBS_LOG_MESSAGE_CAPA, ENV_OBS_LOG_POOL_CAPA,
|
||||
},
|
||||
};
|
||||
use rustfs_utils::{get_env_opt_str, get_env_u64, get_env_usize, get_local_ip_with_default};
|
||||
use smallvec::SmallVec;
|
||||
use std::{borrow::Cow, fs, io::IsTerminal, time::Duration};
|
||||
use tracing::info;
|
||||
use tracing_error::ErrorLayer;
|
||||
use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer};
|
||||
use tracing_subscriber::{
|
||||
EnvFilter, Layer,
|
||||
fmt::{format::FmtSpan, time::LocalTime},
|
||||
layer::SubscriberExt,
|
||||
util::SubscriberInitExt,
|
||||
};
|
||||
|
||||
/// A guard object that manages the lifecycle of OpenTelemetry components.
|
||||
///
|
||||
/// This struct holds references to the created OpenTelemetry providers and ensures
|
||||
/// they are properly shut down when the guard is dropped. It implements the RAII
|
||||
/// (Resource Acquisition Is Initialization) pattern for managing telemetry resources.
|
||||
///
|
||||
/// When this guard goes out of scope, it will automatically shut down:
|
||||
/// - The tracer provider (for distributed tracing)
|
||||
/// - The meter provider (for metrics collection)
|
||||
/// - The logger provider (for structured logging)
|
||||
///
|
||||
/// Implement Debug trait correctly, rather than using derive, as some fields may not have implemented Debug
|
||||
pub struct OtelGuard {
|
||||
tracer_provider: Option<SdkTracerProvider>,
|
||||
meter_provider: Option<SdkMeterProvider>,
|
||||
logger_provider: Option<SdkLoggerProvider>,
|
||||
flexi_logger_handles: Option<flexi_logger::LoggerHandle>,
|
||||
tracing_guard: Option<tracing_appender::non_blocking::WorkerGuard>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for OtelGuard {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("OtelGuard")
|
||||
.field("tracer_provider", &self.tracer_provider.is_some())
|
||||
.field("meter_provider", &self.meter_provider.is_some())
|
||||
.field("logger_provider", &self.logger_provider.is_some())
|
||||
.field("flexi_logger_handles", &self.flexi_logger_handles.is_some())
|
||||
.field("tracing_guard", &self.tracing_guard.is_some())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for OtelGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Some(provider) = self.tracer_provider.take()
|
||||
&& let Err(err) = provider.shutdown()
|
||||
{
|
||||
eprintln!("Tracer shutdown error: {err:?}");
|
||||
}
|
||||
|
||||
if let Some(provider) = self.meter_provider.take()
|
||||
&& let Err(err) = provider.shutdown()
|
||||
{
|
||||
eprintln!("Meter shutdown error: {err:?}");
|
||||
}
|
||||
if let Some(provider) = self.logger_provider.take()
|
||||
&& let Err(err) = provider.shutdown()
|
||||
{
|
||||
eprintln!("Logger shutdown error: {err:?}");
|
||||
}
|
||||
|
||||
if let Some(handle) = self.flexi_logger_handles.take() {
|
||||
handle.shutdown();
|
||||
println!("flexi_logger shutdown completed");
|
||||
}
|
||||
|
||||
if let Some(guard) = self.tracing_guard.take() {
|
||||
drop(guard);
|
||||
println!("Tracing guard dropped, flushing logs.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// create OpenTelemetry Resource
|
||||
fn resource(config: &OtelConfig) -> Resource {
|
||||
Resource::builder()
|
||||
.with_service_name(Cow::Borrowed(config.service_name.as_deref().unwrap_or(APP_NAME)).to_string())
|
||||
.with_schema_url(
|
||||
[
|
||||
KeyValue::new(
|
||||
OTEL_SERVICE_VERSION,
|
||||
Cow::Borrowed(config.service_version.as_deref().unwrap_or(SERVICE_VERSION)).to_string(),
|
||||
),
|
||||
KeyValue::new(
|
||||
DEPLOYMENT_ENVIRONMENT_NAME,
|
||||
Cow::Borrowed(config.environment.as_deref().unwrap_or(ENVIRONMENT)).to_string(),
|
||||
),
|
||||
KeyValue::new(NETWORK_LOCAL_ADDRESS, get_local_ip_with_default()),
|
||||
],
|
||||
SCHEMA_URL,
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Creates a periodic reader for stdout metrics
|
||||
fn create_periodic_reader(interval: u64) -> PeriodicReader<opentelemetry_stdout::MetricExporter> {
|
||||
PeriodicReader::builder(opentelemetry_stdout::MetricExporter::default())
|
||||
.with_interval(Duration::from_secs(interval))
|
||||
.build()
|
||||
}
|
||||
|
||||
// Read the AsyncWith parameter from the environment variable
|
||||
fn get_env_async_with() -> WriteMode {
|
||||
let pool_capa = get_env_usize(ENV_OBS_LOG_POOL_CAPA, DEFAULT_OBS_LOG_POOL_CAPA);
|
||||
let message_capa = get_env_usize(ENV_OBS_LOG_MESSAGE_CAPA, DEFAULT_OBS_LOG_MESSAGE_CAPA);
|
||||
let flush_ms = get_env_u64(ENV_OBS_LOG_FLUSH_MS, DEFAULT_OBS_LOG_FLUSH_MS);
|
||||
|
||||
AsyncWith {
|
||||
pool_capa,
|
||||
message_capa,
|
||||
flush_interval: Duration::from_millis(flush_ms),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_env_filter(logger_level: &str, default_level: Option<&str>) -> EnvFilter {
|
||||
let level = default_level.unwrap_or(logger_level);
|
||||
let mut filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level));
|
||||
if !matches!(logger_level, "trace" | "debug") {
|
||||
let directives: SmallVec<[&str; 5]> = smallvec::smallvec!["hyper", "tonic", "h2", "reqwest", "tower"];
|
||||
for directive in directives {
|
||||
filter = filter.add_directive(format!("{directive}=off").parse().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
filter
|
||||
}
|
||||
|
||||
/// Custom Log Formatter Function - Terminal Output (with Color)
|
||||
#[inline(never)]
|
||||
fn format_with_color(w: &mut dyn std::io::Write, now: &mut DeferredNow, record: &Record) -> Result<(), std::io::Error> {
|
||||
let level = record.level();
|
||||
let level_style = style(level);
|
||||
let binding = std::thread::current();
|
||||
let thread_name = binding.name().unwrap_or("unnamed");
|
||||
let thread_id = format!("{:?}", std::thread::current().id());
|
||||
write!(
|
||||
w,
|
||||
"[{}] {} [{}] [{}:{}] [{}:{}] {}",
|
||||
now.now().format(flexi_logger::TS_DASHES_BLANK_COLONS_DOT_BLANK),
|
||||
level_style.paint(level.to_string()),
|
||||
Color::Magenta.paint(record.target()),
|
||||
Color::Blue.paint(record.file().unwrap_or("unknown")),
|
||||
Color::Blue.paint(record.line().unwrap_or(0).to_string()),
|
||||
Color::Green.paint(thread_name),
|
||||
Color::Green.paint(thread_id),
|
||||
record.args()
|
||||
)
|
||||
}
|
||||
|
||||
/// Custom Log Formatter - File Output (No Color)
|
||||
#[inline(never)]
|
||||
fn format_for_file(w: &mut dyn std::io::Write, now: &mut DeferredNow, record: &Record) -> Result<(), std::io::Error> {
|
||||
let level = record.level();
|
||||
let binding = std::thread::current();
|
||||
let thread_name = binding.name().unwrap_or("unnamed");
|
||||
let thread_id = format!("{:?}", std::thread::current().id());
|
||||
write!(
|
||||
w,
|
||||
"[{}] {} [{}] [{}:{}] [{}:{}] {}",
|
||||
now.now().format(flexi_logger::TS_DASHES_BLANK_COLONS_DOT_BLANK),
|
||||
level,
|
||||
record.target(),
|
||||
record.file().unwrap_or("unknown"),
|
||||
record.line().unwrap_or(0),
|
||||
thread_name,
|
||||
thread_id,
|
||||
record.args()
|
||||
)
|
||||
}
|
||||
|
||||
/// stdout + span information (fix: retain WorkerGuard to avoid releasing after initialization)
|
||||
fn init_stdout_logging(_config: &OtelConfig, logger_level: &str, is_production: bool) -> OtelGuard {
|
||||
let env_filter = build_env_filter(logger_level, None);
|
||||
let (nb, guard) = tracing_appender::non_blocking(std::io::stdout());
|
||||
let enable_color = std::io::stdout().is_terminal();
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_timer(LocalTime::rfc_3339())
|
||||
.with_target(true)
|
||||
.with_ansi(enable_color)
|
||||
.with_thread_names(true)
|
||||
.with_thread_ids(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_writer(nb)
|
||||
.json()
|
||||
.with_current_span(true)
|
||||
.with_span_list(true)
|
||||
.with_span_events(if is_production { FmtSpan::CLOSE } else { FmtSpan::FULL });
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(ErrorLayer::default())
|
||||
.with(fmt_layer)
|
||||
.init();
|
||||
|
||||
OBSERVABILITY_METRIC_ENABLED.set(false).ok();
|
||||
counter!("rustfs.start.total").increment(1);
|
||||
info!("Init stdout logging (level: {})", logger_level);
|
||||
OtelGuard {
|
||||
tracer_provider: None,
|
||||
meter_provider: None,
|
||||
logger_provider: None,
|
||||
flexi_logger_handles: None,
|
||||
tracing_guard: Some(guard),
|
||||
}
|
||||
}
|
||||
|
||||
/// File rolling log (size switching + number retained)
|
||||
fn init_file_logging(config: &OtelConfig, logger_level: &str, is_production: bool) -> Result<OtelGuard, TelemetryError> {
|
||||
use flexi_logger::{Age, Cleanup, Criterion, FileSpec, LogSpecification, Naming};
|
||||
|
||||
let service_name = config.service_name.as_deref().unwrap_or(APP_NAME);
|
||||
let default_log_directory = rustfs_utils::dirs::get_log_directory_to_string(ENV_OBS_LOG_DIRECTORY);
|
||||
let log_directory = config.log_directory.as_deref().unwrap_or(default_log_directory.as_str());
|
||||
let log_filename = config.log_filename.as_deref().unwrap_or(service_name);
|
||||
let keep_files = config.log_keep_files.unwrap_or(DEFAULT_LOG_KEEP_FILES);
|
||||
if let Err(e) = fs::create_dir_all(log_directory) {
|
||||
return Err(TelemetryError::Io(e.to_string()));
|
||||
}
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let desired: u32 = 0o755;
|
||||
match fs::metadata(log_directory) {
|
||||
Ok(meta) => {
|
||||
let current = meta.permissions().mode() & 0o777;
|
||||
// Only tighten to 0755 if existing permissions are looser than target, avoid loosening
|
||||
if (current & !desired) != 0 {
|
||||
if let Err(e) = fs::set_permissions(log_directory, Permissions::from_mode(desired)) {
|
||||
return Err(TelemetryError::SetPermissions(format!(
|
||||
"dir='{log_directory}', want={desired:#o}, have={current:#o}, err={e}"
|
||||
)));
|
||||
}
|
||||
// Second verification
|
||||
if let Ok(meta2) = fs::metadata(log_directory) {
|
||||
let after = meta2.permissions().mode() & 0o777;
|
||||
if after != desired {
|
||||
return Err(TelemetryError::SetPermissions(format!(
|
||||
"dir='{log_directory}', want={desired:#o}, after={after:#o}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(TelemetryError::Io(format!("stat '{log_directory}' failed: {e}")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parsing level
|
||||
let log_spec = LogSpecification::parse(logger_level)
|
||||
.unwrap_or_else(|_| LogSpecification::parse(DEFAULT_LOG_LEVEL).unwrap_or(LogSpecification::error()));
|
||||
|
||||
// Switch by size (MB), Build log cutting conditions
|
||||
let rotation_criterion = match (config.log_rotation_time.as_deref(), config.log_rotation_size_mb) {
|
||||
// Cut by time and size at the same time
|
||||
(Some(time), Some(size)) => {
|
||||
let age = match time.to_lowercase().as_str() {
|
||||
"hour" => Age::Hour,
|
||||
"day" => Age::Day,
|
||||
"minute" => Age::Minute,
|
||||
"second" => Age::Second,
|
||||
_ => Age::Day, // The default is by day
|
||||
};
|
||||
Criterion::AgeOrSize(age, size * 1024 * 1024) // Convert to bytes
|
||||
}
|
||||
// Cut by time only
|
||||
(Some(time), None) => {
|
||||
let age = match time.to_lowercase().as_str() {
|
||||
"hour" => Age::Hour,
|
||||
"day" => Age::Day,
|
||||
"minute" => Age::Minute,
|
||||
"second" => Age::Second,
|
||||
_ => Age::Day, // The default is by day
|
||||
};
|
||||
Criterion::Age(age)
|
||||
}
|
||||
// Cut by size only
|
||||
(None, Some(size)) => {
|
||||
Criterion::Size(size * 1024 * 1024) // Convert to bytes
|
||||
}
|
||||
// By default, it is cut by the day
|
||||
_ => Criterion::Age(Age::Day),
|
||||
};
|
||||
|
||||
// write mode
|
||||
let write_mode = get_env_async_with();
|
||||
// Build
|
||||
let mut builder = flexi_logger::Logger::try_with_env_or_str(logger_level)
|
||||
.unwrap_or(flexi_logger::Logger::with(log_spec.clone()))
|
||||
.format_for_stderr(format_with_color)
|
||||
.format_for_stdout(format_with_color)
|
||||
.format_for_files(format_for_file)
|
||||
.log_to_file(
|
||||
FileSpec::default()
|
||||
.directory(log_directory)
|
||||
.basename(log_filename)
|
||||
.suppress_timestamp(),
|
||||
)
|
||||
.rotate(rotation_criterion, Naming::TimestampsDirect, Cleanup::KeepLogFiles(keep_files))
|
||||
.write_mode(write_mode)
|
||||
.append()
|
||||
.use_utc();
|
||||
|
||||
// Optional copy to stdout (for local observation)
|
||||
if config.log_stdout_enabled.unwrap_or(DEFAULT_OBS_LOG_STDOUT_ENABLED) || !is_production {
|
||||
builder = builder.duplicate_to_stdout(flexi_logger::Duplicate::All);
|
||||
} else {
|
||||
builder = builder.duplicate_to_stdout(flexi_logger::Duplicate::None);
|
||||
}
|
||||
|
||||
let handle = match builder.start() {
|
||||
Ok(h) => Some(h),
|
||||
Err(e) => {
|
||||
eprintln!("ERROR: start flexi_logger failed: {e}");
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
OBSERVABILITY_METRIC_ENABLED.set(false).ok();
|
||||
info!(
|
||||
"Init file logging at '{}', roll size {:?}MB, keep {}",
|
||||
log_directory, config.log_rotation_size_mb, keep_files
|
||||
);
|
||||
|
||||
Ok(OtelGuard {
|
||||
tracer_provider: None,
|
||||
meter_provider: None,
|
||||
logger_provider: None,
|
||||
flexi_logger_handles: handle,
|
||||
tracing_guard: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Observability (HTTP export, supports three sub-endpoints; if not, fallback to unified endpoint)
|
||||
fn init_observability_http(config: &OtelConfig, logger_level: &str, is_production: bool) -> Result<OtelGuard, TelemetryError> {
|
||||
// Resources and sampling
|
||||
let res = resource(config);
|
||||
let service_name = config.service_name.as_deref().unwrap_or(APP_NAME).to_owned();
|
||||
let use_stdout = config.use_stdout.unwrap_or(!is_production);
|
||||
let sample_ratio = config.sample_ratio.unwrap_or(SAMPLE_RATIO);
|
||||
let sampler = if (0.0..1.0).contains(&sample_ratio) {
|
||||
Sampler::TraceIdRatioBased(sample_ratio)
|
||||
} else {
|
||||
Sampler::AlwaysOn
|
||||
};
|
||||
|
||||
// Endpoint
|
||||
let root_ep = config.endpoint.clone(); // owned String
|
||||
|
||||
let trace_ep: String = config
|
||||
.trace_endpoint
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{root_ep}/v1/traces"));
|
||||
|
||||
let metric_ep: String = config
|
||||
.metric_endpoint
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{root_ep}/v1/metrics"));
|
||||
|
||||
let log_ep: String = config
|
||||
.log_endpoint
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{root_ep}/v1/logs"));
|
||||
|
||||
// Tracer(HTTP)
|
||||
let tracer_provider = {
|
||||
if trace_ep.is_empty() || !config.traces_export_enabled.unwrap_or(DEFAULT_OBS_TRACES_EXPORT_ENABLED) {
|
||||
None
|
||||
} else {
|
||||
let exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_http()
|
||||
.with_endpoint(trace_ep.as_str())
|
||||
.with_protocol(Protocol::HttpBinary)
|
||||
.with_compression(Compression::Gzip)
|
||||
.build()
|
||||
.map_err(|e| TelemetryError::BuildSpanExporter(e.to_string()))?;
|
||||
|
||||
let mut builder = SdkTracerProvider::builder()
|
||||
.with_sampler(sampler)
|
||||
.with_id_generator(RandomIdGenerator::default())
|
||||
.with_resource(res.clone())
|
||||
.with_batch_exporter(exporter);
|
||||
|
||||
if use_stdout {
|
||||
builder = builder.with_batch_exporter(opentelemetry_stdout::SpanExporter::default());
|
||||
}
|
||||
|
||||
let provider = builder.build();
|
||||
global::set_tracer_provider(provider.clone());
|
||||
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||
Some(provider)
|
||||
}
|
||||
};
|
||||
|
||||
// Meter(HTTP)
|
||||
let meter_provider = {
|
||||
if metric_ep.is_empty() || !config.metrics_export_enabled.unwrap_or(DEFAULT_OBS_METRICS_EXPORT_ENABLED) {
|
||||
None
|
||||
} else {
|
||||
let exporter = opentelemetry_otlp::MetricExporter::builder()
|
||||
.with_http()
|
||||
.with_endpoint(metric_ep.as_str())
|
||||
.with_temporality(opentelemetry_sdk::metrics::Temporality::default())
|
||||
.with_protocol(Protocol::HttpBinary)
|
||||
.with_compression(Compression::Gzip)
|
||||
.build()
|
||||
.map_err(|e| TelemetryError::BuildMetricExporter(e.to_string()))?;
|
||||
let meter_interval = config.meter_interval.unwrap_or(METER_INTERVAL);
|
||||
|
||||
let (provider, recorder) = Recorder::builder(service_name.clone())
|
||||
.with_meter_provider(|b| {
|
||||
let b = b.with_resource(res.clone()).with_reader(
|
||||
PeriodicReader::builder(exporter)
|
||||
.with_interval(Duration::from_secs(meter_interval))
|
||||
.build(),
|
||||
);
|
||||
if use_stdout {
|
||||
b.with_reader(create_periodic_reader(meter_interval))
|
||||
} else {
|
||||
b
|
||||
}
|
||||
})
|
||||
.build();
|
||||
global::set_meter_provider(provider.clone());
|
||||
metrics::set_global_recorder(recorder).map_err(|e| TelemetryError::InstallMetricsRecorder(e.to_string()))?;
|
||||
OBSERVABILITY_METRIC_ENABLED.set(true).ok();
|
||||
Some(provider)
|
||||
}
|
||||
};
|
||||
|
||||
// Logger(HTTP)
|
||||
let logger_provider = {
|
||||
if log_ep.is_empty() || !config.logs_export_enabled.unwrap_or(DEFAULT_OBS_LOGS_EXPORT_ENABLED) {
|
||||
None
|
||||
} else {
|
||||
let exporter = opentelemetry_otlp::LogExporter::builder()
|
||||
.with_http()
|
||||
.with_endpoint(log_ep.as_str())
|
||||
.with_protocol(Protocol::HttpBinary)
|
||||
.with_compression(Compression::Gzip)
|
||||
.build()
|
||||
.map_err(|e| TelemetryError::BuildLogExporter(e.to_string()))?;
|
||||
|
||||
let mut builder = SdkLoggerProvider::builder().with_resource(res);
|
||||
builder = builder.with_batch_exporter(exporter);
|
||||
if use_stdout {
|
||||
builder = builder.with_batch_exporter(opentelemetry_stdout::LogExporter::default());
|
||||
}
|
||||
Some(builder.build())
|
||||
}
|
||||
};
|
||||
|
||||
// Tracing layer
|
||||
let fmt_layer_opt = {
|
||||
if config.log_stdout_enabled.unwrap_or(DEFAULT_OBS_LOG_STDOUT_ENABLED) {
|
||||
let enable_color = std::io::stdout().is_terminal();
|
||||
let mut layer = tracing_subscriber::fmt::layer()
|
||||
.with_timer(LocalTime::rfc_3339())
|
||||
.with_target(true)
|
||||
.with_ansi(enable_color)
|
||||
.with_thread_names(true)
|
||||
.with_thread_ids(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.json()
|
||||
.with_current_span(true)
|
||||
.with_span_list(true);
|
||||
let span_event = if is_production { FmtSpan::CLOSE } else { FmtSpan::FULL };
|
||||
layer = layer.with_span_events(span_event);
|
||||
Some(layer.with_filter(build_env_filter(logger_level, None)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let filter = build_env_filter(logger_level, None);
|
||||
let otel_bridge = logger_provider
|
||||
.as_ref()
|
||||
.map(|p| OpenTelemetryTracingBridge::new(p).with_filter(build_env_filter(logger_level, None)));
|
||||
let tracer_layer = tracer_provider
|
||||
.as_ref()
|
||||
.map(|p| OpenTelemetryLayer::new(p.tracer(service_name.to_string())));
|
||||
let metrics_layer = meter_provider.as_ref().map(|p| MetricsLayer::new(p.clone()));
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(filter)
|
||||
.with(ErrorLayer::default())
|
||||
.with(fmt_layer_opt)
|
||||
.with(tracer_layer)
|
||||
.with(otel_bridge)
|
||||
.with(metrics_layer)
|
||||
.init();
|
||||
|
||||
counter!("rustfs.start.total").increment(1);
|
||||
info!(
|
||||
"Init observability (HTTP): trace='{}', metric='{}', log='{}'",
|
||||
trace_ep, metric_ep, log_ep
|
||||
);
|
||||
|
||||
Ok(OtelGuard {
|
||||
tracer_provider,
|
||||
meter_provider,
|
||||
logger_provider,
|
||||
flexi_logger_handles: None,
|
||||
tracing_guard: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Initialize Telemetry,Entrance: three rules
|
||||
pub(crate) fn init_telemetry(config: &OtelConfig) -> Result<OtelGuard, TelemetryError> {
|
||||
let environment = config.environment.as_deref().unwrap_or(ENVIRONMENT);
|
||||
let is_production = environment.eq_ignore_ascii_case(DEFAULT_OBS_ENVIRONMENT_PRODUCTION);
|
||||
let logger_level = config.logger_level.as_deref().unwrap_or(DEFAULT_LOG_LEVEL);
|
||||
|
||||
// Rule 3: Observability (any endpoint is enabled if it is not empty)
|
||||
let has_obs = !config.endpoint.is_empty()
|
||||
|| config.trace_endpoint.as_deref().map(|s| !s.is_empty()).unwrap_or(false)
|
||||
|| config.metric_endpoint.as_deref().map(|s| !s.is_empty()).unwrap_or(false)
|
||||
|| config.log_endpoint.as_deref().map(|s| !s.is_empty()).unwrap_or(false);
|
||||
|
||||
if has_obs {
|
||||
return init_observability_http(config, logger_level, is_production);
|
||||
}
|
||||
|
||||
// Rule 2: The user has explicitly customized the log directory (determined by whether ENV_OBS_LOG_DIRECTORY is set)
|
||||
let user_set_log_dir = get_env_opt_str(ENV_OBS_LOG_DIRECTORY);
|
||||
if user_set_log_dir.filter(|d| !d.is_empty()).is_some() {
|
||||
return init_file_logging(config, logger_level, is_production);
|
||||
}
|
||||
|
||||
// Rule 1: Default stdout (error level)
|
||||
Ok(init_stdout_logging(config, DEFAULT_LOG_LEVEL, is_production))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rustfs_config::USE_STDOUT;
|
||||
|
||||
#[test]
|
||||
fn test_production_environment_detection() {
|
||||
// Test production environment logic
|
||||
let production_envs = vec!["production", "PRODUCTION", "Production"];
|
||||
|
||||
for env_value in production_envs {
|
||||
let is_production = env_value.to_lowercase() == "production";
|
||||
assert!(is_production, "Should detect '{env_value}' as production environment");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_production_environment_detection() {
|
||||
// Test non-production environment logic
|
||||
let non_production_envs = vec!["development", "test", "staging", "dev", "local"];
|
||||
|
||||
for env_value in non_production_envs {
|
||||
let is_production = env_value.to_lowercase() == "production";
|
||||
assert!(!is_production, "Should not detect '{env_value}' as production environment");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stdout_behavior_logic() {
|
||||
// Test the stdout behavior logic without environment manipulation
|
||||
struct TestCase {
|
||||
is_production: bool,
|
||||
config_use_stdout: Option<bool>,
|
||||
expected_use_stdout: bool,
|
||||
description: &'static str,
|
||||
}
|
||||
|
||||
let test_cases = vec![
|
||||
TestCase {
|
||||
is_production: true,
|
||||
config_use_stdout: None,
|
||||
expected_use_stdout: false,
|
||||
description: "Production with no config should disable stdout",
|
||||
},
|
||||
TestCase {
|
||||
is_production: false,
|
||||
config_use_stdout: None,
|
||||
expected_use_stdout: USE_STDOUT,
|
||||
description: "Non-production with no config should use default",
|
||||
},
|
||||
TestCase {
|
||||
is_production: true,
|
||||
config_use_stdout: Some(true),
|
||||
expected_use_stdout: true,
|
||||
description: "Production with explicit true should enable stdout",
|
||||
},
|
||||
TestCase {
|
||||
is_production: true,
|
||||
config_use_stdout: Some(false),
|
||||
expected_use_stdout: false,
|
||||
description: "Production with explicit false should disable stdout",
|
||||
},
|
||||
TestCase {
|
||||
is_production: false,
|
||||
config_use_stdout: Some(true),
|
||||
expected_use_stdout: true,
|
||||
description: "Non-production with explicit true should enable stdout",
|
||||
},
|
||||
];
|
||||
|
||||
for case in test_cases {
|
||||
let default_use_stdout = if case.is_production { false } else { USE_STDOUT };
|
||||
|
||||
let actual_use_stdout = case.config_use_stdout.unwrap_or(default_use_stdout);
|
||||
|
||||
assert_eq!(actual_use_stdout, case.expected_use_stdout, "Test case failed: {}", case.description);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_log_level_filter_mapping_logic() {
|
||||
// Test the log level mapping logic used in the real implementation
|
||||
let test_cases = vec![
|
||||
("trace", "Trace"),
|
||||
("debug", "Debug"),
|
||||
("info", "Info"),
|
||||
("warn", "Warn"),
|
||||
("warning", "Warn"),
|
||||
("error", "Error"),
|
||||
("off", "None"),
|
||||
("invalid_level", "Info"), // Should default to Info
|
||||
];
|
||||
|
||||
for (input_level, expected_variant) in test_cases {
|
||||
let filter_variant = match input_level.to_lowercase().as_str() {
|
||||
"trace" => "Trace",
|
||||
"debug" => "Debug",
|
||||
"info" => "Info",
|
||||
"warn" | "warning" => "Warn",
|
||||
"error" => "Error",
|
||||
"off" => "None",
|
||||
_ => "Info", // default case
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
filter_variant, expected_variant,
|
||||
"Log level '{input_level}' should map to '{expected_variant}'"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_otel_config_environment_defaults() {
|
||||
// Test that OtelConfig properly handles environment detection logic
|
||||
let config = OtelConfig {
|
||||
endpoint: "".to_string(),
|
||||
use_stdout: None,
|
||||
environment: Some("production".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Simulate the logic from init_telemetry
|
||||
let environment = config.environment.as_deref().unwrap_or(ENVIRONMENT);
|
||||
assert_eq!(environment, "production");
|
||||
|
||||
// Test with development environment
|
||||
let dev_config = OtelConfig {
|
||||
endpoint: "".to_string(),
|
||||
use_stdout: None,
|
||||
environment: Some("development".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let dev_environment = dev_config.environment.as_deref().unwrap_or(ENVIRONMENT);
|
||||
assert_eq!(dev_environment, "development");
|
||||
}
|
||||
}
|
||||
94
crates/obs/src/telemetry/filter.rs
Normal file
94
crates/obs/src/telemetry/filter.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Log filtering utilities for tracing subscribers.
|
||||
//!
|
||||
//! This module provides helper functions for building `EnvFilter` instances
|
||||
//! used across different logging backends (stdout, file, OpenTelemetry).
|
||||
|
||||
use smallvec::SmallVec;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
/// Build an `EnvFilter` from the given log level string.
|
||||
///
|
||||
/// If the `RUST_LOG` environment variable is set, it takes precedence over the
|
||||
/// provided `logger_level`. For non-verbose levels (`info`, `warn`, `error`),
|
||||
/// noisy internal crates (`hyper`, `tonic`, `h2`, `reqwest`, `tower`) are
|
||||
/// automatically silenced to reduce log noise.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `logger_level` - The desired log level string (e.g., `"info"`, `"debug"`).
|
||||
/// * `default_level` - An optional override that replaces `logger_level` as the
|
||||
/// base directive; useful when the caller wants to force a specific level
|
||||
/// regardless of what is stored in config.
|
||||
///
|
||||
/// # Returns
|
||||
/// A configured `EnvFilter` ready to be attached to a `tracing_subscriber` registry.
|
||||
pub(super) fn build_env_filter(logger_level: &str, default_level: Option<&str>) -> EnvFilter {
|
||||
let level = default_level.unwrap_or(logger_level);
|
||||
let mut filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level));
|
||||
|
||||
// Suppress chatty infrastructure crates unless the operator explicitly
|
||||
// requests trace/debug output.
|
||||
if !matches!(logger_level, "trace" | "debug") {
|
||||
let directives: SmallVec<[&str; 5]> = smallvec::smallvec!["hyper", "tonic", "h2", "reqwest", "tower"];
|
||||
for directive in directives {
|
||||
filter = filter.add_directive(format!("{directive}=off").parse().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
filter
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_build_env_filter_default_level_overrides() {
|
||||
// Ensure that providing a default_level uses it instead of logger_level.
|
||||
let filter = build_env_filter("debug", Some("error"));
|
||||
// The Debug output uses `LevelFilter::ERROR` for the error level directive.
|
||||
let dbg = format!("{filter:?}");
|
||||
assert!(
|
||||
dbg.contains("LevelFilter::ERROR"),
|
||||
"Expected 'LevelFilter::ERROR' in filter debug output: {dbg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_env_filter_suppresses_noisy_crates() {
|
||||
// For info level, hyper/tonic/etc. should be suppressed with OFF.
|
||||
let filter = build_env_filter("info", None);
|
||||
let dbg = format!("{filter:?}");
|
||||
// The Debug output uses `LevelFilter::OFF` for suppressed crates.
|
||||
assert!(
|
||||
dbg.contains("LevelFilter::OFF"),
|
||||
"Expected 'LevelFilter::OFF' suppression directives in filter: {dbg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_env_filter_debug_no_suppression() {
|
||||
// For debug level, our code does NOT inject any OFF directives.
|
||||
let filter = build_env_filter("debug", None);
|
||||
let dbg = format!("{filter:?}");
|
||||
// Verify the filter builds without panicking and contains the debug level.
|
||||
assert!(!dbg.is_empty());
|
||||
assert!(
|
||||
dbg.contains("LevelFilter::DEBUG"),
|
||||
"Expected 'LevelFilter::DEBUG' in filter debug output: {dbg}"
|
||||
);
|
||||
}
|
||||
}
|
||||
103
crates/obs/src/telemetry/guard.rs
Normal file
103
crates/obs/src/telemetry/guard.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! RAII guard for OpenTelemetry provider lifecycle management.
|
||||
//!
|
||||
//! [`OtelGuard`] holds all runtime resources created during telemetry
|
||||
//! initialisation. Dropping it triggers an ordered shutdown:
|
||||
//!
|
||||
//! 1. Tracer provider — flushes pending spans.
|
||||
//! 2. Meter provider — flushes pending metrics.
|
||||
//! 3. Logger provider — flushes pending log records.
|
||||
//! 4. Cleanup task — aborted to prevent lingering background work.
|
||||
//! 5. Tracing worker guard — flushes buffered log lines written by
|
||||
//! `tracing_appender`.
|
||||
|
||||
use opentelemetry_sdk::{logs::SdkLoggerProvider, metrics::SdkMeterProvider, trace::SdkTracerProvider};
|
||||
|
||||
/// RAII guard that owns all active OpenTelemetry providers and the
|
||||
/// `tracing_appender` worker guard.
|
||||
///
|
||||
/// Construct this via the `init_*` functions in [`crate::telemetry`] rather
|
||||
/// than directly. The guard must be kept alive for the entire duration of the
|
||||
/// application — once dropped, all telemetry pipelines are shut down.
|
||||
pub struct OtelGuard {
|
||||
/// Optional tracer provider for distributed tracing.
|
||||
pub(crate) tracer_provider: Option<SdkTracerProvider>,
|
||||
/// Optional meter provider for metrics collection.
|
||||
pub(crate) meter_provider: Option<SdkMeterProvider>,
|
||||
/// Optional logger provider for OTLP log export.
|
||||
pub(crate) logger_provider: Option<SdkLoggerProvider>,
|
||||
/// Worker guard that keeps the non-blocking `tracing_appender` thread
|
||||
/// alive. Dropping it blocks until all buffered records are flushed.
|
||||
pub(crate) tracing_guard: Option<tracing_appender::non_blocking::WorkerGuard>,
|
||||
/// Optional guard for stdout logging; kept separate to allow independent flushing and shutdown.
|
||||
pub(crate) stdout_guard: Option<tracing_appender::non_blocking::WorkerGuard>,
|
||||
/// Handle to the background log-cleanup task; aborted on drop.
|
||||
pub(crate) cleanup_handle: Option<tokio::task::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for OtelGuard {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("OtelGuard")
|
||||
.field("tracer_provider", &self.tracer_provider.is_some())
|
||||
.field("meter_provider", &self.meter_provider.is_some())
|
||||
.field("logger_provider", &self.logger_provider.is_some())
|
||||
.field("tracing_guard", &self.tracing_guard.is_some())
|
||||
.field("stdout_guard", &self.stdout_guard.is_some())
|
||||
.field("cleanup_handle", &self.cleanup_handle.is_some())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for OtelGuard {
|
||||
/// Shut down all telemetry providers in order.
|
||||
///
|
||||
/// Errors during shutdown are printed to `stderr` so they are visible even
|
||||
/// after the tracing subscriber has been torn down.
|
||||
fn drop(&mut self) {
|
||||
if let Some(provider) = self.tracer_provider.take()
|
||||
&& let Err(err) = provider.shutdown()
|
||||
{
|
||||
eprintln!("Tracer shutdown error: {err:?}");
|
||||
}
|
||||
|
||||
if let Some(provider) = self.meter_provider.take()
|
||||
&& let Err(err) = provider.shutdown()
|
||||
{
|
||||
eprintln!("Meter shutdown error: {err:?}");
|
||||
}
|
||||
|
||||
if let Some(provider) = self.logger_provider.take()
|
||||
&& let Err(err) = provider.shutdown()
|
||||
{
|
||||
eprintln!("Logger shutdown error: {err:?}");
|
||||
}
|
||||
|
||||
if let Some(handle) = self.cleanup_handle.take() {
|
||||
handle.abort();
|
||||
eprintln!("Log cleanup task stopped");
|
||||
}
|
||||
|
||||
if let Some(guard) = self.tracing_guard.take() {
|
||||
drop(guard);
|
||||
eprintln!("Tracing guard dropped, flushing logs.");
|
||||
}
|
||||
|
||||
if let Some(guard) = self.stdout_guard.take() {
|
||||
drop(guard);
|
||||
eprintln!("Stdout guard dropped, flushing logs.");
|
||||
}
|
||||
}
|
||||
}
|
||||
385
crates/obs/src/telemetry/local.rs
Normal file
385
crates/obs/src/telemetry/local.rs
Normal file
@@ -0,0 +1,385 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Local logging backend: stdout-only or file-rolling with optional stdout mirror.
|
||||
//!
|
||||
//! # Behaviour
|
||||
//!
|
||||
//! | Condition | Result |
|
||||
//! |----------------------------------|----------------------------------------------|
|
||||
//! | No log directory configured | JSON logs written to **stdout only** |
|
||||
//! | Log directory configured | JSON logs written to **rolling file**; |
|
||||
//! | | stdout mirror enabled when `log_stdout_enabled` |
|
||||
//! | | is `true` or environment is non-production |
|
||||
//!
|
||||
//! The function [`init_local_logging`] is the single entry point for both
|
||||
//! cases; callers do **not** need to distinguish between stdout and file modes.
|
||||
|
||||
use crate::TelemetryError;
|
||||
use crate::config::OtelConfig;
|
||||
use crate::global::OBSERVABILITY_METRIC_ENABLED;
|
||||
use crate::log_cleanup::LogCleaner;
|
||||
use crate::telemetry::filter::build_env_filter;
|
||||
use metrics::counter;
|
||||
use rustfs_config::observability::{
|
||||
DEFAULT_OBS_LOG_CLEANUP_INTERVAL_SECONDS, DEFAULT_OBS_LOG_COMPRESS_OLD_FILES, DEFAULT_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS,
|
||||
DEFAULT_OBS_LOG_DELETE_EMPTY_FILES, DEFAULT_OBS_LOG_DRY_RUN, DEFAULT_OBS_LOG_GZIP_COMPRESSION_LEVEL,
|
||||
DEFAULT_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES, DEFAULT_OBS_LOG_MAX_TOTAL_SIZE_BYTES, DEFAULT_OBS_LOG_MIN_FILE_AGE_SECONDS,
|
||||
};
|
||||
use rustfs_config::{APP_NAME, DEFAULT_LOG_KEEP_FILES, DEFAULT_LOG_ROTATION_TIME, DEFAULT_OBS_LOG_STDOUT_ENABLED};
|
||||
use std::{fs, io::IsTerminal, time::Duration};
|
||||
use tracing::info;
|
||||
use tracing_error::ErrorLayer;
|
||||
use tracing_subscriber::{
|
||||
fmt::{format::FmtSpan, time::LocalTime},
|
||||
layer::SubscriberExt,
|
||||
util::SubscriberInitExt,
|
||||
};
|
||||
|
||||
use super::guard::OtelGuard;
|
||||
|
||||
/// Initialize local logging (stdout-only or file-rolling).
|
||||
///
|
||||
/// When `log_directory` is empty or `None` in the config the function sets up
|
||||
/// a non-blocking JSON subscriber that writes to **stdout** and returns
|
||||
/// immediately — no file I/O, no cleanup task.
|
||||
///
|
||||
/// When a log directory is provided the function additionally:
|
||||
/// 1. Creates the directory (including on Unix, enforces `0755` permissions).
|
||||
/// 2. Attaches a rolling-file appender (daily or hourly based on
|
||||
/// `log_rotation_time`).
|
||||
/// 3. Optionally mirrors output to stdout based on `log_stdout_enabled`.
|
||||
/// 4. Spawns a background cleanup task that periodically removes or compresses
|
||||
/// old log files according to the cleanup configuration in [`OtelConfig`].
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - Observability configuration, fully populated from environment variables.
|
||||
/// * `logger_level` - Effective log level string (e.g., `"info"`).
|
||||
/// * `is_production` - Whether the runtime environment is production; controls
|
||||
/// span verbosity and stdout mirroring defaults.
|
||||
///
|
||||
/// # Returns
|
||||
/// An [`OtelGuard`] that keeps the `tracing_appender` worker alive and holds
|
||||
/// a handle to the cleanup task (if started). Dropping the guard flushes
|
||||
/// in-flight logs and stops the cleanup task.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`TelemetryError`] if the log directory cannot be created or its
|
||||
/// permissions cannot be set (Unix only).
|
||||
pub(super) fn init_local_logging(
|
||||
config: &OtelConfig,
|
||||
logger_level: &str,
|
||||
is_production: bool,
|
||||
) -> Result<OtelGuard, TelemetryError> {
|
||||
// Determine the effective log directory. An absent or empty value means
|
||||
// stdout-only mode: we skip file setup entirely.
|
||||
let log_dir_str = config.log_directory.as_deref().filter(|s| !s.is_empty());
|
||||
|
||||
if let Some(log_directory) = log_dir_str {
|
||||
init_file_logging_internal(config, log_directory, logger_level, is_production)
|
||||
} else {
|
||||
Ok(init_stdout_only(config, logger_level, is_production))
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Stdout-only ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Set up a non-blocking stdout JSON subscriber with no file I/O.
|
||||
///
|
||||
/// Used when no log directory has been configured. The subscriber formats
|
||||
/// every log record as a JSON line, including RFC-3339 timestamps, thread
|
||||
/// identifiers, file/line information, and span context.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `_config` - Unused at the moment; reserved for future configuration.
|
||||
/// * `logger_level` - Effective log level string.
|
||||
/// * `is_production` - Controls span event verbosity.
|
||||
fn init_stdout_only(_config: &OtelConfig, logger_level: &str, is_production: bool) -> OtelGuard {
|
||||
let env_filter = build_env_filter(logger_level, None);
|
||||
let (nb, guard) = tracing_appender::non_blocking(std::io::stdout());
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_timer(LocalTime::rfc_3339())
|
||||
.with_target(true)
|
||||
.with_ansi(std::io::stdout().is_terminal())
|
||||
.with_thread_names(true)
|
||||
.with_thread_ids(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_writer(nb)
|
||||
.json()
|
||||
.with_current_span(true)
|
||||
.with_span_list(true)
|
||||
.with_span_events(if is_production { FmtSpan::CLOSE } else { FmtSpan::FULL });
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(ErrorLayer::default())
|
||||
.with(fmt_layer)
|
||||
.init();
|
||||
|
||||
OBSERVABILITY_METRIC_ENABLED.set(false).ok();
|
||||
counter!("rustfs.start.total").increment(1);
|
||||
info!("Init stdout logging (level: {})", logger_level);
|
||||
|
||||
OtelGuard {
|
||||
tracer_provider: None,
|
||||
meter_provider: None,
|
||||
logger_provider: None,
|
||||
tracing_guard: Some(guard),
|
||||
stdout_guard: None,
|
||||
cleanup_handle: None,
|
||||
}
|
||||
}
|
||||
|
||||
// ─── File-rolling ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Internal implementation for file-based rolling log setup.
|
||||
///
|
||||
/// Called by [`init_local_logging`] when a log directory is present.
|
||||
/// Handles directory creation, permission enforcement (Unix), file appender
|
||||
/// setup, optional stdout mirror, and log-cleanup task spawning.
|
||||
fn init_file_logging_internal(
|
||||
config: &OtelConfig,
|
||||
log_directory: &str,
|
||||
logger_level: &str,
|
||||
is_production: bool,
|
||||
) -> Result<OtelGuard, TelemetryError> {
|
||||
let service_name = config.service_name.as_deref().unwrap_or(APP_NAME);
|
||||
let log_filename = config.log_filename.as_deref().unwrap_or(service_name);
|
||||
let keep_files = config.log_keep_files.unwrap_or(DEFAULT_LOG_KEEP_FILES);
|
||||
|
||||
// ── 1. Ensure the log directory exists ───────────────────────────────────
|
||||
if let Err(e) = fs::create_dir_all(log_directory) {
|
||||
return Err(TelemetryError::Io(e.to_string()));
|
||||
}
|
||||
|
||||
// ── 2. Enforce directory permissions (Unix only) ─────────────────────────
|
||||
#[cfg(unix)]
|
||||
ensure_dir_permissions(log_directory)?;
|
||||
|
||||
// ── 3. Choose rotation strategy ──────────────────────────────────────────
|
||||
// `log_rotation_time` drives the rolling-appender rotation period.
|
||||
let rotation = config
|
||||
.log_rotation_time
|
||||
.as_deref()
|
||||
.unwrap_or(DEFAULT_LOG_ROTATION_TIME)
|
||||
.to_lowercase();
|
||||
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
||||
let file_appender = {
|
||||
let rotation = match rotation.as_str() {
|
||||
"minutely" => Rotation::MINUTELY,
|
||||
"hourly" => Rotation::HOURLY,
|
||||
_ => Rotation::DAILY,
|
||||
};
|
||||
RollingFileAppender::builder()
|
||||
.rotation(rotation)
|
||||
.filename_suffix(log_filename)
|
||||
.max_log_files(keep_files)
|
||||
.build(log_directory)
|
||||
.expect("failed to initialize rolling file appender")
|
||||
};
|
||||
|
||||
let (non_blocking, guard) = tracing_appender::non_blocking(file_appender);
|
||||
|
||||
// ── 4. Build subscriber layers ────────────────────────────────────────────
|
||||
let env_filter = build_env_filter(logger_level, None);
|
||||
let span_events = if is_production { FmtSpan::CLOSE } else { FmtSpan::FULL };
|
||||
|
||||
// File layer writes JSON without ANSI codes.
|
||||
let file_layer = tracing_subscriber::fmt::layer()
|
||||
.with_timer(LocalTime::rfc_3339())
|
||||
.with_target(true)
|
||||
.with_ansi(false)
|
||||
.with_thread_names(true)
|
||||
.with_thread_ids(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_writer(non_blocking)
|
||||
.json()
|
||||
.with_current_span(true)
|
||||
.with_span_list(true)
|
||||
.with_span_events(span_events.clone());
|
||||
|
||||
// Optional stdout mirror: enabled explicitly via `log_stdout_enabled`, or
|
||||
// unconditionally in non-production environments.
|
||||
let (stdout_layer, stdout_guard) = if config.log_stdout_enabled.unwrap_or(DEFAULT_OBS_LOG_STDOUT_ENABLED) || !is_production {
|
||||
let (stdout_nb, stdout_guard) = tracing_appender::non_blocking(std::io::stdout());
|
||||
let enable_color = std::io::stdout().is_terminal();
|
||||
(
|
||||
Some(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_timer(LocalTime::rfc_3339())
|
||||
.with_target(true)
|
||||
.with_ansi(enable_color)
|
||||
.with_thread_names(true)
|
||||
.with_thread_ids(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_writer(stdout_nb) // .json()
|
||||
// .with_current_span(true)
|
||||
// .with_span_list(true)
|
||||
.with_span_events(span_events),
|
||||
),
|
||||
Some(stdout_guard),
|
||||
)
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(ErrorLayer::default())
|
||||
.with(file_layer)
|
||||
.with(stdout_layer)
|
||||
.init();
|
||||
|
||||
OBSERVABILITY_METRIC_ENABLED.set(false).ok();
|
||||
|
||||
// ── 5. Start background cleanup task ─────────────────────────────────────
|
||||
let cleanup_handle = spawn_cleanup_task(config, log_directory, log_filename, keep_files);
|
||||
|
||||
info!(
|
||||
"Init file logging at '{}', rotation: {}, keep {} files",
|
||||
log_directory, rotation, keep_files
|
||||
);
|
||||
|
||||
Ok(OtelGuard {
|
||||
tracer_provider: None,
|
||||
meter_provider: None,
|
||||
logger_provider: None,
|
||||
tracing_guard: Some(guard),
|
||||
stdout_guard,
|
||||
cleanup_handle: Some(cleanup_handle),
|
||||
})
|
||||
}
|
||||
|
||||
// ─── Directory permissions (Unix) ─────────────────────────────────────────────
|
||||
|
||||
/// Ensure the log directory has at most `0755` permissions (Unix only).
|
||||
///
|
||||
/// Tightens permissions to `0755` if the directory is more permissive.
|
||||
/// This prevents world-writable log directories from being a security hazard.
|
||||
/// No-ops if permissions are already `0755` or stricter.
|
||||
#[cfg(unix)]
|
||||
fn ensure_dir_permissions(log_directory: &str) -> Result<(), TelemetryError> {
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let desired: u32 = 0o755;
|
||||
match fs::metadata(log_directory) {
|
||||
Ok(meta) => {
|
||||
let current = meta.permissions().mode() & 0o777;
|
||||
// Only tighten to 0755 if existing permissions are looser than target.
|
||||
if (current & !desired) != 0 {
|
||||
if let Err(e) = fs::set_permissions(log_directory, Permissions::from_mode(desired)) {
|
||||
return Err(TelemetryError::SetPermissions(format!(
|
||||
"dir='{log_directory}', want={desired:#o}, have={current:#o}, err={e}"
|
||||
)));
|
||||
}
|
||||
// Second verification pass to confirm the change took effect.
|
||||
if let Ok(meta2) = fs::metadata(log_directory) {
|
||||
let after = meta2.permissions().mode() & 0o777;
|
||||
if after != desired {
|
||||
return Err(TelemetryError::SetPermissions(format!(
|
||||
"dir='{log_directory}', want={desired:#o}, after={after:#o}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => Err(TelemetryError::Io(format!("stat '{log_directory}' failed: {e}"))),
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Cleanup task ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Spawn a background task that periodically cleans up old log files.
|
||||
///
|
||||
/// All cleanup parameters are derived from [`OtelConfig`] fields, with
|
||||
/// sensible defaults when fields are absent. The task runs on the current
|
||||
/// Tokio runtime and should be aborted (via the returned `JoinHandle`) when
|
||||
/// the application shuts down.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - Observability config containing cleanup parameters.
|
||||
/// * `log_directory` - Directory path of the rolling log files.
|
||||
/// * `log_filename` - Base filename (used as the file prefix for matching).
|
||||
/// * `keep_files` - Legacy keep-files count; used as fallback when the new
|
||||
/// `log_keep_count` field is absent.
|
||||
///
|
||||
/// # Returns
|
||||
/// A [`tokio::task::JoinHandle`] for the spawned cleanup loop.
|
||||
fn spawn_cleanup_task(
|
||||
config: &OtelConfig,
|
||||
log_directory: &str,
|
||||
log_filename: &str,
|
||||
keep_files: usize,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
let log_dir = std::path::PathBuf::from(log_directory);
|
||||
let file_prefix = config.log_filename.as_deref().unwrap_or(log_filename).to_string();
|
||||
let keep_count = config.log_keep_count.unwrap_or(keep_files);
|
||||
let max_total_size = config
|
||||
.log_max_total_size_bytes
|
||||
.unwrap_or(DEFAULT_OBS_LOG_MAX_TOTAL_SIZE_BYTES * keep_count as u64);
|
||||
let max_single_file_size = config
|
||||
.log_max_single_file_size_bytes
|
||||
.unwrap_or(DEFAULT_OBS_LOG_MAX_SINGLE_FILE_SIZE_BYTES);
|
||||
let compress = config.log_compress_old_files.unwrap_or(DEFAULT_OBS_LOG_COMPRESS_OLD_FILES);
|
||||
let gzip_level = config
|
||||
.log_gzip_compression_level
|
||||
.unwrap_or(DEFAULT_OBS_LOG_GZIP_COMPRESSION_LEVEL);
|
||||
let retention_days = config
|
||||
.log_compressed_file_retention_days
|
||||
.unwrap_or(DEFAULT_OBS_LOG_COMPRESSED_FILE_RETENTION_DAYS);
|
||||
let exclude_patterns = config
|
||||
.log_exclude_patterns
|
||||
.as_deref()
|
||||
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
||||
.unwrap_or_default();
|
||||
let delete_empty = config.log_delete_empty_files.unwrap_or(DEFAULT_OBS_LOG_DELETE_EMPTY_FILES);
|
||||
let min_age = config
|
||||
.log_min_file_age_seconds
|
||||
.unwrap_or(DEFAULT_OBS_LOG_MIN_FILE_AGE_SECONDS);
|
||||
let dry_run = config.log_dry_run.unwrap_or(DEFAULT_OBS_LOG_DRY_RUN);
|
||||
let cleanup_interval = config
|
||||
.log_cleanup_interval_seconds
|
||||
.unwrap_or(DEFAULT_OBS_LOG_CLEANUP_INTERVAL_SECONDS);
|
||||
|
||||
let cleaner = LogCleaner::new(
|
||||
log_dir,
|
||||
file_prefix,
|
||||
keep_count,
|
||||
max_total_size,
|
||||
max_single_file_size,
|
||||
compress,
|
||||
gzip_level,
|
||||
retention_days,
|
||||
exclude_patterns,
|
||||
delete_empty,
|
||||
min_age,
|
||||
dry_run,
|
||||
);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(cleanup_interval));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
if let Err(e) = cleaner.cleanup() {
|
||||
tracing::warn!("Log cleanup failed: {}", e);
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
243
crates/obs/src/telemetry/mod.rs
Normal file
243
crates/obs/src/telemetry/mod.rs
Normal file
@@ -0,0 +1,243 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Telemetry initialisation module for RustFS.
|
||||
//!
|
||||
//! This module is the single entry point for all observability backends.
|
||||
//! Callers should use [`init_telemetry`] and keep the returned [`OtelGuard`]
|
||||
//! alive for the lifetime of the application.
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! The module is split into focused sub-modules:
|
||||
//!
|
||||
//! | Sub-module | Responsibility |
|
||||
//! |--------------|---------------------------------------------------------|
|
||||
//! | `guard` | [`OtelGuard`] RAII type for provider lifecycle |
|
||||
//! | `filter` | `EnvFilter` construction helpers |
|
||||
//! | `resource` | OpenTelemetry `Resource` builder |
|
||||
//! | `local` | Local logging: stdout-only **or** rolling-file |
|
||||
//! | `otel` | Full OTLP/HTTP pipeline (traces + metrics + logs) |
|
||||
//!
|
||||
//! ## Routing rules (evaluated in order)
|
||||
//!
|
||||
//! 1. **OpenTelemetry** — if any OTLP endpoint is configured, the full HTTP
|
||||
//! pipeline is initialised via [`otel::init_observability_http`].
|
||||
//! 2. **File logging** — if `RUSTFS_OBS_LOG_DIRECTORY` (or `log_directory` /
|
||||
//! `log_dir` in config) is set to a non-empty value, rolling-file logging is
|
||||
//! initialised together with an optional stdout mirror.
|
||||
//! 3. **Stdout only** — default fallback; no file I/O, no remote export.
|
||||
|
||||
mod filter;
|
||||
mod guard;
|
||||
mod local;
|
||||
mod otel;
|
||||
mod recorder;
|
||||
mod resource;
|
||||
|
||||
use crate::TelemetryError;
|
||||
use crate::config::OtelConfig;
|
||||
pub use guard::OtelGuard;
|
||||
pub use recorder::Recorder;
|
||||
use rustfs_config::observability::ENV_OBS_LOG_DIRECTORY;
|
||||
use rustfs_config::{DEFAULT_LOG_LEVEL, ENVIRONMENT, observability::DEFAULT_OBS_ENVIRONMENT_PRODUCTION};
|
||||
use rustfs_utils::get_env_opt_str;
|
||||
|
||||
/// Initialize the telemetry subsystem according to the provided configuration.
|
||||
///
|
||||
/// Evaluates three routing rules in priority order and delegates to the
|
||||
/// appropriate backend:
|
||||
///
|
||||
/// 1. If any OTLP endpoint is set, initialises the full
|
||||
/// OpenTelemetry HTTP pipeline (traces + metrics + logs).
|
||||
/// 2. If a log directory is explicitly configured via the
|
||||
/// `RUSTFS_OBS_LOG_DIRECTORY` environment variable, initialises
|
||||
/// rolling-file logging with an optional stdout mirror.
|
||||
/// 3. Otherwise, falls back to stdout-only JSON logging.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - Observability configuration, typically built from environment
|
||||
/// variables via [`OtelConfig::extract_otel_config_from_env`].
|
||||
///
|
||||
/// # Returns
|
||||
/// An [`OtelGuard`] that must be kept alive for the duration of the
|
||||
/// application. Dropping it triggers ordered shutdown of all providers.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`TelemetryError`] when a backend fails to initialise (e.g., cannot
|
||||
/// create the log directory, or an OTLP exporter cannot connect).
|
||||
pub(crate) fn init_telemetry(config: &OtelConfig) -> Result<OtelGuard, TelemetryError> {
|
||||
let environment = config.environment.as_deref().unwrap_or(ENVIRONMENT);
|
||||
let is_production = environment.eq_ignore_ascii_case(DEFAULT_OBS_ENVIRONMENT_PRODUCTION);
|
||||
let logger_level = config.logger_level.as_deref().unwrap_or(DEFAULT_LOG_LEVEL);
|
||||
|
||||
// ── Rule 1: OpenTelemetry HTTP pipeline ───────────────────────────────────
|
||||
// Activated when at least one OTLP endpoint is non-empty.
|
||||
let has_obs = !config.endpoint.is_empty()
|
||||
|| config.trace_endpoint.as_deref().map(|s| !s.is_empty()).unwrap_or(false)
|
||||
|| config.metric_endpoint.as_deref().map(|s| !s.is_empty()).unwrap_or(false)
|
||||
|| config.log_endpoint.as_deref().map(|s| !s.is_empty()).unwrap_or(false);
|
||||
|
||||
if has_obs {
|
||||
return otel::init_observability_http(config, logger_level, is_production);
|
||||
}
|
||||
|
||||
// ── Rule 2 & 3: Local logging (file or stdout) ────────────────────────────
|
||||
// `init_local_logging` internally decides between file and stdout mode
|
||||
// based on whether a log directory is configured.
|
||||
//
|
||||
// We check the environment variable here (rather than relying solely on the
|
||||
// config struct) to honour dynamic overrides set after config construction.
|
||||
let user_set_log_dir = get_env_opt_str(ENV_OBS_LOG_DIRECTORY);
|
||||
let effective_config = if user_set_log_dir.as_deref().filter(|d| !d.is_empty()).is_some() {
|
||||
// Environment variable is set: ensure the config reflects it so that
|
||||
// `init_local_logging` picks up the value even if the struct was built
|
||||
// before the env var was set.
|
||||
std::borrow::Cow::Owned(OtelConfig {
|
||||
log_directory: user_set_log_dir,
|
||||
..config.clone()
|
||||
})
|
||||
} else {
|
||||
std::borrow::Cow::Borrowed(config)
|
||||
};
|
||||
|
||||
local::init_local_logging(&effective_config, logger_level, is_production)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rustfs_config::observability::DEFAULT_OBS_ENVIRONMENT_PRODUCTION;
|
||||
use rustfs_config::{ENVIRONMENT, USE_STDOUT};
|
||||
|
||||
#[test]
|
||||
fn test_production_environment_detection() {
|
||||
// Verify that case-insensitive comparison correctly identifies production.
|
||||
let production_envs = ["production", "PRODUCTION", "Production"];
|
||||
for env_value in production_envs {
|
||||
let is_production = env_value.eq_ignore_ascii_case(DEFAULT_OBS_ENVIRONMENT_PRODUCTION);
|
||||
assert!(is_production, "Should detect '{env_value}' as production environment");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_production_environment_detection() {
|
||||
// Verify that non-production environments are not misidentified.
|
||||
let non_production_envs = ["development", "test", "staging", "dev", "local"];
|
||||
for env_value in non_production_envs {
|
||||
let is_production = env_value.eq_ignore_ascii_case(DEFAULT_OBS_ENVIRONMENT_PRODUCTION);
|
||||
assert!(!is_production, "Should not detect '{env_value}' as production environment");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stdout_behavior_logic() {
|
||||
// Validate the stdout-enable logic for different environment/config combinations.
|
||||
struct TestCase {
|
||||
is_production: bool,
|
||||
config_use_stdout: Option<bool>,
|
||||
expected_use_stdout: bool,
|
||||
description: &'static str,
|
||||
}
|
||||
|
||||
let test_cases = [
|
||||
TestCase {
|
||||
is_production: true,
|
||||
config_use_stdout: None,
|
||||
expected_use_stdout: false,
|
||||
description: "Production with no config should disable stdout",
|
||||
},
|
||||
TestCase {
|
||||
is_production: false,
|
||||
config_use_stdout: None,
|
||||
expected_use_stdout: USE_STDOUT,
|
||||
description: "Non-production with no config should use default",
|
||||
},
|
||||
TestCase {
|
||||
is_production: true,
|
||||
config_use_stdout: Some(true),
|
||||
expected_use_stdout: true,
|
||||
description: "Production with explicit true should enable stdout",
|
||||
},
|
||||
TestCase {
|
||||
is_production: true,
|
||||
config_use_stdout: Some(false),
|
||||
expected_use_stdout: false,
|
||||
description: "Production with explicit false should disable stdout",
|
||||
},
|
||||
TestCase {
|
||||
is_production: false,
|
||||
config_use_stdout: Some(true),
|
||||
expected_use_stdout: true,
|
||||
description: "Non-production with explicit true should enable stdout",
|
||||
},
|
||||
];
|
||||
|
||||
for case in &test_cases {
|
||||
let default_use_stdout = if case.is_production { false } else { USE_STDOUT };
|
||||
let actual = case.config_use_stdout.unwrap_or(default_use_stdout);
|
||||
assert_eq!(actual, case.expected_use_stdout, "Test case failed: {}", case.description);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_log_level_filter_mapping_logic() {
|
||||
// Validate the log level string → tracing level mapping used in filters.
|
||||
let test_cases = [
|
||||
("trace", "Trace"),
|
||||
("debug", "Debug"),
|
||||
("info", "Info"),
|
||||
("warn", "Warn"),
|
||||
("warning", "Warn"),
|
||||
("error", "Error"),
|
||||
("off", "None"),
|
||||
("invalid_level", "Info"),
|
||||
];
|
||||
|
||||
for (input, expected) in test_cases {
|
||||
let mapped = match input.to_lowercase().as_str() {
|
||||
"trace" => "Trace",
|
||||
"debug" => "Debug",
|
||||
"info" => "Info",
|
||||
"warn" | "warning" => "Warn",
|
||||
"error" => "Error",
|
||||
"off" => "None",
|
||||
_ => "Info",
|
||||
};
|
||||
assert_eq!(mapped, expected, "Log level '{input}' should map to '{expected}'");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_otel_config_environment_defaults() {
|
||||
// Verify that environment field defaults behave correctly.
|
||||
use crate::config::OtelConfig;
|
||||
let config = OtelConfig {
|
||||
endpoint: "".to_string(),
|
||||
use_stdout: None,
|
||||
environment: Some("production".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
let environment = config.environment.as_deref().unwrap_or(ENVIRONMENT);
|
||||
assert_eq!(environment, "production");
|
||||
|
||||
let dev_config = OtelConfig {
|
||||
endpoint: "".to_string(),
|
||||
use_stdout: None,
|
||||
environment: Some("development".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
let dev_environment = dev_config.environment.as_deref().unwrap_or(ENVIRONMENT);
|
||||
assert_eq!(dev_environment, "development");
|
||||
}
|
||||
}
|
||||
317
crates/obs/src/telemetry/otel.rs
Normal file
317
crates/obs/src/telemetry/otel.rs
Normal file
@@ -0,0 +1,317 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! OpenTelemetry HTTP exporter initialisation.
|
||||
//!
|
||||
//! This module sets up full OTLP/HTTP pipelines for:
|
||||
//! - **Traces** via [`opentelemetry_otlp::SpanExporter`]
|
||||
//! - **Metrics** via [`opentelemetry_otlp::MetricExporter`]
|
||||
//! - **Logs** via [`opentelemetry_otlp::LogExporter`]
|
||||
//!
|
||||
//! Each signal has a dedicated endpoint field in [`OtelConfig`]. When a
|
||||
//! per-signal endpoint is absent, the function falls back to appending the
|
||||
//! standard OTLP path suffix to the root `endpoint` field:
|
||||
//!
|
||||
//! | Signal | Fallback path |
|
||||
//! |---------|-----------------|
|
||||
//! | Traces | `/v1/traces` |
|
||||
//! | Metrics | `/v1/metrics` |
|
||||
//! | Logs | `/v1/logs` |
|
||||
//!
|
||||
//! All exporters use **HTTP binary** (Protobuf) encoding with **gzip**
|
||||
//! compression for efficiency over the wire.
|
||||
|
||||
use crate::TelemetryError;
|
||||
use crate::config::OtelConfig;
|
||||
use crate::global::OBSERVABILITY_METRIC_ENABLED;
|
||||
use crate::telemetry::filter::build_env_filter;
|
||||
use crate::telemetry::guard::OtelGuard;
|
||||
use crate::telemetry::recorder::Recorder;
|
||||
use crate::telemetry::resource::build_resource;
|
||||
use metrics::counter;
|
||||
use opentelemetry::{global, trace::TracerProvider};
|
||||
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
|
||||
use opentelemetry_otlp::{Compression, Protocol, WithExportConfig, WithHttpConfig};
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
use opentelemetry_sdk::{
|
||||
logs::SdkLoggerProvider,
|
||||
metrics::{PeriodicReader, SdkMeterProvider},
|
||||
trace::{RandomIdGenerator, Sampler, SdkTracerProvider},
|
||||
};
|
||||
use rustfs_config::{
|
||||
APP_NAME, DEFAULT_OBS_LOG_STDOUT_ENABLED, DEFAULT_OBS_LOGS_EXPORT_ENABLED, DEFAULT_OBS_METRICS_EXPORT_ENABLED,
|
||||
DEFAULT_OBS_TRACES_EXPORT_ENABLED, METER_INTERVAL, SAMPLE_RATIO,
|
||||
};
|
||||
use std::{io::IsTerminal, time::Duration};
|
||||
use tracing::info;
|
||||
use tracing_error::ErrorLayer;
|
||||
use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer};
|
||||
use tracing_subscriber::{
|
||||
Layer,
|
||||
fmt::{format::FmtSpan, time::LocalTime},
|
||||
layer::SubscriberExt,
|
||||
util::SubscriberInitExt,
|
||||
};
|
||||
|
||||
/// Initialize the full OpenTelemetry HTTP pipeline (traces + metrics + logs).
|
||||
///
|
||||
/// This function is invoked when at least one OTLP endpoint has been
|
||||
/// configured. It creates exporters, wires them into SDK providers, installs
|
||||
/// a global tracer/meter, and builds a `tracing_subscriber` registry that
|
||||
/// bridges Rust's `tracing` macros to the OTLP pipelines.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - Fully populated observability configuration.
|
||||
/// * `logger_level` - Effective log level string (e.g., `"info"`).
|
||||
/// * `is_production` - Controls span verbosity and stdout layer defaults.
|
||||
///
|
||||
/// # Returns
|
||||
/// An [`OtelGuard`] owning all created providers. Dropping it triggers an
|
||||
/// ordered shutdown and flushes all pending telemetry data.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`TelemetryError`] if any exporter or provider fails to build.
|
||||
///
|
||||
/// # Note
|
||||
/// This function is intentionally kept unchanged from the pre-refactor
|
||||
/// implementation to preserve existing OTLP behaviour.
|
||||
pub(super) fn init_observability_http(
|
||||
config: &OtelConfig,
|
||||
logger_level: &str,
|
||||
is_production: bool,
|
||||
) -> Result<OtelGuard, TelemetryError> {
|
||||
// ── Resource & sampling ──────────────────────────────────────────────────
|
||||
let res = build_resource(config);
|
||||
let service_name = config.service_name.as_deref().unwrap_or(APP_NAME).to_owned();
|
||||
let use_stdout = config.use_stdout.unwrap_or(!is_production);
|
||||
let sample_ratio = config.sample_ratio.unwrap_or(SAMPLE_RATIO);
|
||||
let sampler = if (0.0..1.0).contains(&sample_ratio) {
|
||||
Sampler::TraceIdRatioBased(sample_ratio)
|
||||
} else {
|
||||
Sampler::AlwaysOn
|
||||
};
|
||||
|
||||
// ── Endpoint resolution ───────────────────────────────────────────────────
|
||||
// Each signal may have a dedicated endpoint; if absent, fall back to the
|
||||
// root endpoint with the standard OTLP path suffix appended.
|
||||
let root_ep = config.endpoint.clone();
|
||||
|
||||
let trace_ep: String = config
|
||||
.trace_endpoint
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{root_ep}/v1/traces"));
|
||||
|
||||
let metric_ep: String = config
|
||||
.metric_endpoint
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{root_ep}/v1/metrics"));
|
||||
|
||||
let log_ep: String = config
|
||||
.log_endpoint
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{root_ep}/v1/logs"));
|
||||
|
||||
// ── Tracer provider (HTTP) ────────────────────────────────────────────────
|
||||
let tracer_provider = build_tracer_provider(&trace_ep, config, res.clone(), sampler, use_stdout)?;
|
||||
|
||||
// ── Meter provider (HTTP) ─────────────────────────────────────────────────
|
||||
let meter_provider = build_meter_provider(&metric_ep, config, res.clone(), &service_name, use_stdout)?;
|
||||
|
||||
// ── Logger provider (HTTP) ────────────────────────────────────────────────
|
||||
let logger_provider = build_logger_provider(&log_ep, config, res, use_stdout)?;
|
||||
|
||||
// ── Tracing subscriber registry ───────────────────────────────────────────
|
||||
// Build an optional stdout formatting layer. When `log_stdout_enabled` is
|
||||
// false the field is `None` and tracing-subscriber will skip it.
|
||||
let fmt_layer_opt = if config.log_stdout_enabled.unwrap_or(DEFAULT_OBS_LOG_STDOUT_ENABLED) {
|
||||
let enable_color = std::io::stdout().is_terminal();
|
||||
let span_event = if is_production { FmtSpan::CLOSE } else { FmtSpan::FULL };
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.with_timer(LocalTime::rfc_3339())
|
||||
.with_target(true)
|
||||
.with_ansi(enable_color)
|
||||
.with_thread_names(true)
|
||||
.with_thread_ids(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.json()
|
||||
.with_current_span(true)
|
||||
.with_span_list(true)
|
||||
.with_span_events(span_event)
|
||||
.with_filter(build_env_filter(logger_level, None));
|
||||
Some(layer)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let filter = build_env_filter(logger_level, None);
|
||||
let otel_bridge = logger_provider
|
||||
.as_ref()
|
||||
.map(|p| OpenTelemetryTracingBridge::new(p).with_filter(build_env_filter(logger_level, None)));
|
||||
let tracer_layer = tracer_provider
|
||||
.as_ref()
|
||||
.map(|p| OpenTelemetryLayer::new(p.tracer(service_name.to_string())));
|
||||
let metrics_layer = meter_provider.as_ref().map(|p| MetricsLayer::new(p.clone()));
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(filter)
|
||||
.with(ErrorLayer::default())
|
||||
.with(fmt_layer_opt)
|
||||
.with(tracer_layer)
|
||||
.with(otel_bridge)
|
||||
.with(metrics_layer)
|
||||
.init();
|
||||
|
||||
counter!("rustfs.start.total").increment(1);
|
||||
info!(
|
||||
"Init observability (HTTP): trace='{}', metric='{}', log='{}'",
|
||||
trace_ep, metric_ep, log_ep
|
||||
);
|
||||
|
||||
Ok(OtelGuard {
|
||||
tracer_provider,
|
||||
meter_provider,
|
||||
logger_provider,
|
||||
tracing_guard: None,
|
||||
stdout_guard: None,
|
||||
cleanup_handle: None,
|
||||
})
|
||||
}
|
||||
|
||||
// ─── Private builder helpers ──────────────────────────────────────────────────
|
||||
|
||||
/// Build an optional [`SdkTracerProvider`] for the given trace endpoint.
|
||||
///
|
||||
/// Returns `None` when the endpoint is empty or trace export is disabled.
|
||||
fn build_tracer_provider(
|
||||
trace_ep: &str,
|
||||
config: &OtelConfig,
|
||||
res: opentelemetry_sdk::Resource,
|
||||
sampler: Sampler,
|
||||
use_stdout: bool,
|
||||
) -> Result<Option<SdkTracerProvider>, TelemetryError> {
|
||||
if trace_ep.is_empty() || !config.traces_export_enabled.unwrap_or(DEFAULT_OBS_TRACES_EXPORT_ENABLED) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_http()
|
||||
.with_endpoint(trace_ep)
|
||||
.with_protocol(Protocol::HttpBinary)
|
||||
.with_compression(Compression::Gzip)
|
||||
.build()
|
||||
.map_err(|e| TelemetryError::BuildSpanExporter(e.to_string()))?;
|
||||
|
||||
let mut builder = SdkTracerProvider::builder()
|
||||
.with_sampler(sampler)
|
||||
.with_id_generator(RandomIdGenerator::default())
|
||||
.with_resource(res)
|
||||
.with_batch_exporter(exporter);
|
||||
|
||||
if use_stdout {
|
||||
builder = builder.with_batch_exporter(opentelemetry_stdout::SpanExporter::default());
|
||||
}
|
||||
|
||||
let provider = builder.build();
|
||||
global::set_tracer_provider(provider.clone());
|
||||
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||
Ok(Some(provider))
|
||||
}
|
||||
|
||||
/// Build an optional [`SdkMeterProvider`] for the given metrics endpoint.
|
||||
///
|
||||
/// Returns `None` when the endpoint is empty or metric export is disabled.
|
||||
fn build_meter_provider(
|
||||
metric_ep: &str,
|
||||
config: &OtelConfig,
|
||||
res: opentelemetry_sdk::Resource,
|
||||
service_name: &str,
|
||||
use_stdout: bool,
|
||||
) -> Result<Option<SdkMeterProvider>, TelemetryError> {
|
||||
if metric_ep.is_empty() || !config.metrics_export_enabled.unwrap_or(DEFAULT_OBS_METRICS_EXPORT_ENABLED) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let exporter = opentelemetry_otlp::MetricExporter::builder()
|
||||
.with_http()
|
||||
.with_endpoint(metric_ep)
|
||||
.with_temporality(opentelemetry_sdk::metrics::Temporality::default())
|
||||
.with_protocol(Protocol::HttpBinary)
|
||||
.with_compression(Compression::Gzip)
|
||||
.build()
|
||||
.map_err(|e| TelemetryError::BuildMetricExporter(e.to_string()))?;
|
||||
|
||||
let meter_interval = config.meter_interval.unwrap_or(METER_INTERVAL);
|
||||
|
||||
let (provider, recorder) = Recorder::builder(service_name.to_string())
|
||||
.with_meter_provider(|b: opentelemetry_sdk::metrics::MeterProviderBuilder| {
|
||||
let b = b.with_resource(res).with_reader(
|
||||
PeriodicReader::builder(exporter)
|
||||
.with_interval(Duration::from_secs(meter_interval))
|
||||
.build(),
|
||||
);
|
||||
if use_stdout {
|
||||
b.with_reader(create_periodic_reader(meter_interval))
|
||||
} else {
|
||||
b
|
||||
}
|
||||
})
|
||||
.build();
|
||||
|
||||
global::set_meter_provider(provider.clone() as SdkMeterProvider);
|
||||
metrics::set_global_recorder(recorder).map_err(|e| TelemetryError::InstallMetricsRecorder(e.to_string()))?;
|
||||
OBSERVABILITY_METRIC_ENABLED.set(true).ok();
|
||||
Ok(Some(provider))
|
||||
}
|
||||
|
||||
/// Build an optional [`SdkLoggerProvider`] for the given log endpoint.
|
||||
///
|
||||
/// Returns `None` when the endpoint is empty or log export is disabled.
|
||||
fn build_logger_provider(
|
||||
log_ep: &str,
|
||||
config: &OtelConfig,
|
||||
res: opentelemetry_sdk::Resource,
|
||||
use_stdout: bool,
|
||||
) -> Result<Option<SdkLoggerProvider>, TelemetryError> {
|
||||
if log_ep.is_empty() || !config.logs_export_enabled.unwrap_or(DEFAULT_OBS_LOGS_EXPORT_ENABLED) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let exporter = opentelemetry_otlp::LogExporter::builder()
|
||||
.with_http()
|
||||
.with_endpoint(log_ep)
|
||||
.with_protocol(Protocol::HttpBinary)
|
||||
.with_compression(Compression::Gzip)
|
||||
.build()
|
||||
.map_err(|e| TelemetryError::BuildLogExporter(e.to_string()))?;
|
||||
|
||||
let mut builder = SdkLoggerProvider::builder().with_resource(res);
|
||||
builder = builder.with_batch_exporter(exporter);
|
||||
if use_stdout {
|
||||
builder = builder.with_batch_exporter(opentelemetry_stdout::LogExporter::default());
|
||||
}
|
||||
Ok(Some(builder.build()))
|
||||
}
|
||||
|
||||
/// Create a stdout periodic metrics reader for the given interval.
|
||||
fn create_periodic_reader(interval: u64) -> PeriodicReader<opentelemetry_stdout::MetricExporter> {
|
||||
PeriodicReader::builder(opentelemetry_stdout::MetricExporter::default())
|
||||
.with_interval(Duration::from_secs(interval))
|
||||
.build()
|
||||
}
|
||||
64
crates/obs/src/telemetry/resource.rs
Normal file
64
crates/obs/src/telemetry/resource.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! OpenTelemetry [`Resource`] construction for RustFS.
|
||||
//!
|
||||
//! A `Resource` describes the entity producing telemetry data. The resource
|
||||
//! built here includes the service name, service version, deployment
|
||||
//! environment, and the local machine IP address so that data can be
|
||||
//! correlated across services in a distributed system.
|
||||
|
||||
use crate::config::OtelConfig;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_sdk::Resource;
|
||||
use opentelemetry_semantic_conventions::{
|
||||
SCHEMA_URL,
|
||||
attribute::{DEPLOYMENT_ENVIRONMENT_NAME, NETWORK_LOCAL_ADDRESS, SERVICE_VERSION as OTEL_SERVICE_VERSION},
|
||||
};
|
||||
use rustfs_config::{APP_NAME, ENVIRONMENT, SERVICE_VERSION};
|
||||
use rustfs_utils::get_local_ip_with_default;
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// Build an OpenTelemetry [`Resource`] populated from the provided config.
|
||||
///
|
||||
/// The resource carries the following attributes:
|
||||
/// - `service.name` — from `config.service_name`, defaulting to [`APP_NAME`].
|
||||
/// - `service.version` — from `config.service_version`, defaulting to
|
||||
/// [`SERVICE_VERSION`].
|
||||
/// - `deployment.environment` — from `config.environment`, defaulting to
|
||||
/// [`ENVIRONMENT`].
|
||||
/// - `network.local.address` — the primary local IP of the current host,
|
||||
/// useful for identifying individual nodes in a cluster.
|
||||
///
|
||||
/// All attributes are attached to the resource using the semantic conventions
|
||||
/// schema URL to ensure compatibility with standard OTLP backends.
|
||||
pub(super) fn build_resource(config: &OtelConfig) -> Resource {
|
||||
Resource::builder()
|
||||
.with_service_name(Cow::Borrowed(config.service_name.as_deref().unwrap_or(APP_NAME)).to_string())
|
||||
.with_schema_url(
|
||||
[
|
||||
KeyValue::new(
|
||||
OTEL_SERVICE_VERSION,
|
||||
Cow::Borrowed(config.service_version.as_deref().unwrap_or(SERVICE_VERSION)).to_string(),
|
||||
),
|
||||
KeyValue::new(
|
||||
DEPLOYMENT_ENVIRONMENT_NAME,
|
||||
Cow::Borrowed(config.environment.as_deref().unwrap_or(ENVIRONMENT)).to_string(),
|
||||
),
|
||||
KeyValue::new(NETWORK_LOCAL_ADDRESS, get_local_ip_with_default()),
|
||||
],
|
||||
SCHEMA_URL,
|
||||
)
|
||||
.build()
|
||||
}
|
||||
@@ -89,7 +89,7 @@ static GLOBAL: profiling::allocator::TracingAllocator<mimalloc::MiMalloc> =
|
||||
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn main() {
|
||||
let runtime = server::get_tokio_runtime_builder()
|
||||
let runtime = server::tokio_runtime_builder()
|
||||
.build()
|
||||
.expect("Failed to build Tokio runtime");
|
||||
let result = runtime.block_on(async_main());
|
||||
@@ -283,7 +283,7 @@ async fn run(config: config::Config) -> Result<()> {
|
||||
// // Initialize global configuration system
|
||||
let mut retry_count = 0;
|
||||
while let Err(e) = ecconfig::init_global_config_sys(store.clone()).await {
|
||||
error!("ecconfig::init_global_config_sys failed {:?}", e);
|
||||
error!("ecstore config::init_global_config_sys failed {:?}", e);
|
||||
// TODO: check error type
|
||||
retry_count += 1;
|
||||
if retry_count > 15 {
|
||||
|
||||
@@ -31,7 +31,7 @@ pub(crate) use event::{init_event_notifier, shutdown_event_notifier};
|
||||
pub(crate) use http::start_http_server;
|
||||
pub(crate) use prefix::*;
|
||||
pub(crate) use readiness::ReadinessGateLayer;
|
||||
pub(crate) use runtime::get_tokio_runtime_builder;
|
||||
pub(crate) use runtime::tokio_runtime_builder;
|
||||
pub(crate) use service_state::SHUTDOWN_TIMEOUT;
|
||||
pub(crate) use service_state::ServiceState;
|
||||
pub(crate) use service_state::ServiceStateManager;
|
||||
|
||||
@@ -80,11 +80,11 @@ fn compute_default_max_blocking_threads() -> usize {
|
||||
/// Panics if environment variable values are invalid
|
||||
/// # Examples
|
||||
/// ```no_run
|
||||
/// use rustfs_server::get_tokio_runtime_builder;
|
||||
/// let builder = get_tokio_runtime_builder();
|
||||
/// use rustfs_server::tokio_runtime_builder;
|
||||
/// let builder = tokio_runtime_builder();
|
||||
/// let runtime = builder.build().unwrap();
|
||||
/// ```
|
||||
pub(crate) fn get_tokio_runtime_builder() -> tokio::runtime::Builder {
|
||||
pub(crate) fn tokio_runtime_builder() -> tokio::runtime::Builder {
|
||||
let mut builder = tokio::runtime::Builder::new_multi_thread();
|
||||
|
||||
// Worker threads(Default physical cores)
|
||||
@@ -136,7 +136,10 @@ pub(crate) fn get_tokio_runtime_builder() -> tokio::runtime::Builder {
|
||||
});
|
||||
}
|
||||
if !rustfs_obs::is_production_environment() {
|
||||
tracing::debug!(
|
||||
println!(
|
||||
"Starting Tokio runtime with configured parameters: worker_threads={}, max_blocking_threads={}, \
|
||||
thread_stack_size={}, thread_keep_alive={}, global_queue_interval={}, event_interval={}, \
|
||||
max_io_events_per_tick={}, thread_name={}",
|
||||
worker_threads,
|
||||
max_blocking_threads,
|
||||
thread_stack_size,
|
||||
@@ -144,8 +147,7 @@ pub(crate) fn get_tokio_runtime_builder() -> tokio::runtime::Builder {
|
||||
global_queue_interval,
|
||||
event_interval,
|
||||
max_io_events_per_tick,
|
||||
thread_name,
|
||||
"Starting Tokio runtime with configured parameters"
|
||||
thread_name
|
||||
);
|
||||
}
|
||||
builder
|
||||
|
||||
@@ -68,11 +68,10 @@ export RUSTFS_CONSOLE_ADDRESS=":9001"
|
||||
#export RUSTFS_OBS_SERVICE_VERSION=0.1.0 # Service version
|
||||
export RUSTFS_OBS_ENVIRONMENT=develop # Environment name
|
||||
export RUSTFS_OBS_LOGGER_LEVEL=info # Log level, supports trace, debug, info, warn, error
|
||||
export RUSTFS_OBS_LOG_STDOUT_ENABLED=false # Whether to enable local stdout logging
|
||||
export RUSTFS_OBS_LOG_STDOUT_ENABLED=true # Whether to enable local stdout logging
|
||||
export RUSTFS_OBS_LOG_DIRECTORY="$current_dir/deploy/logs" # Log directory
|
||||
export RUSTFS_OBS_LOG_ROTATION_TIME="hour" # Log rotation time unit, can be "second", "minute", "hour", "day"
|
||||
export RUSTFS_OBS_LOG_ROTATION_SIZE_MB=100 # Log rotation size in MB
|
||||
export RUSTFS_OBS_LOG_POOL_CAPA=10240 # Log pool capacity
|
||||
export RUSTFS_OBS_LOG_ROTATION_TIME="minutely" # Log rotation time unit, can be "minutely", "hourly", "daily"
|
||||
export RUSTFS_OBS_LOG_KEEP_FILES=30 # Number of log files to keep
|
||||
export RUSTFS_OBS_LOG_MESSAGE_CAPA=32768 # Log message capacity
|
||||
export RUSTFS_OBS_LOG_FLUSH_MS=300 # Log flush interval in milliseconds
|
||||
|
||||
|
||||
Reference in New Issue
Block a user