diff --git a/Cargo.lock b/Cargo.lock index 3a7d3e00..925847c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -452,6 +452,17 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-lock" +version = "3.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -1732,6 +1743,12 @@ dependencies = [ "itertools 0.13.0", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -1937,6 +1954,12 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + [[package]] name = "datafusion" version = "46.0.1" @@ -2961,6 +2984,20 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows 0.61.3", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -3129,6 +3166,57 @@ dependencies = [ "vsimd", ] +[[package]] +name = "hickory-proto" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "once_cell", + "rand 0.9.2", + "ring", + "rustls 0.23.31", + "thiserror 2.0.16", + "tinyvec", + "tokio", + "tokio-rustls 0.26.2", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "moka", + "once_cell", + "parking_lot", + "rand 0.9.2", + "resolv-conf", + "rustls 0.23.31", + "smallvec", + "thiserror 2.0.16", + "tokio", + "tokio-rustls 0.26.2", + "tracing", +] + [[package]] name = "highway" version = "1.3.0" @@ -3565,6 +3653,18 @@ dependencies = [ "libc", ] +[[package]] +name = "ipconfig" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +dependencies = [ + "socket2 0.5.10", + "widestring", + "windows-sys 0.48.0", + "winreg", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3882,6 +3982,19 @@ dependencies = [ "value-bag", ] +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lru" version = "0.12.5" @@ -4035,6 +4148,28 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "moka" +version = "0.12.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" +dependencies = [ + "async-lock", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "event-listener", + "futures-util", + "loom", + "parking_lot", + "portable-atomic", + "rustc_version", + "smallvec", + "tagptr", + "thiserror 1.0.69", + "uuid", +] + [[package]] name = "multimap" version = "0.10.1" @@ -4404,6 +4539,10 @@ name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +dependencies = [ + "critical-section", + "portable-atomic", +] [[package]] name = "once_cell_polyfill" @@ -4865,6 +5004,12 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "potential_utf" version = "0.1.3" @@ -5411,6 +5556,12 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "resolv-conf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95325155c684b1c89f7765e30bc1c42e4a6da51ca513615660cb8a62ef9a88e3" + [[package]] name = "rfc6979" version = "0.3.1" @@ -6178,6 +6329,8 @@ dependencies = [ "flate2", "futures", "hex-simd", + "hickory-proto", + "hickory-resolver", "highway", "hmac 0.12.1", "hyper 1.7.0", @@ -6185,6 +6338,7 @@ dependencies = [ "local-ip-address", "lz4", "md-5", + "moka", "netif", "nix 0.30.1", "rand 0.9.2", @@ -6201,6 +6355,7 @@ dependencies = [ "snap", "sysinfo 0.37.0", "tempfile", + "thiserror 2.0.16", "tokio", "tracing", "transform-stream", @@ -6509,6 +6664,12 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -7244,6 +7405,12 @@ dependencies = [ "libc", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "temp-env" version = "0.3.6" @@ -8264,6 +8431,12 @@ dependencies = [ "rustix 0.38.44", ] +[[package]] +name = "widestring" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7cf3379ca1aac9eea11fba24fd7e315d621f8dfe35c8d7d2be8b793726e07d" + [[package]] name = "wildmatch" version = "2.4.0" @@ -8436,6 +8609,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -8463,6 +8645,21 @@ dependencies = [ "windows-targets 0.53.3", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -8505,6 +8702,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -8517,6 +8720,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -8529,6 +8738,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -8553,6 +8768,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -8565,6 +8786,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -8577,6 +8804,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -8589,6 +8822,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -8610,6 +8849,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "wit-bindgen" version = "0.45.0" diff --git a/Cargo.toml b/Cargo.toml index c9935b19..cbda4991 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,6 +129,8 @@ glob = "0.3.3" hex = "0.4.3" hex-simd = "0.8.0" highway = { version = "1.3.0" } +hickory-proto = "0.25.2" +hickory-resolver = { version = "0.25.2", features = ["tls-ring"] } hmac = "0.12.1" hyper = "1.7.0" hyper-util = { version = "0.1.16", features = [ @@ -149,6 +151,7 @@ lz4 = "1.28.1" matchit = "0.8.4" md-5 = "0.10.6" mime_guess = "2.0.5" +moka = { version = "0.12.10", features = ["future"] } netif = "0.1.6" nix = { version = "0.30.1", features = ["fs"] } nu-ansi-term = "0.50.1" @@ -261,7 +264,6 @@ xxhash-rust = { version = "0.8.15", features = ["xxh64", "xxh3"] } zip = "2.4.2" zstd = "0.13.3" - [workspace.metadata.cargo-shear] ignored = ["rustfs", "rust-i18n", "rustfs-mcp", "rustfs-audit-logger", "tokio-test"] diff --git a/crates/ecstore/src/endpoints.rs b/crates/ecstore/src/endpoints.rs index 2c2f3b15..79226111 100644 --- a/crates/ecstore/src/endpoints.rs +++ b/crates/ecstore/src/endpoints.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use rustfs_utils::{XHost, check_local_server_addr, get_host_ip, is_local_host}; -use tracing::{instrument, warn}; +use rustfs_utils::{XHost, check_local_server_addr, get_host_ip, get_host_ip_async, is_local_host}; +use tracing::{error, instrument, warn}; use crate::{ disk::endpoint::{Endpoint, EndpointType}, @@ -241,9 +241,20 @@ impl PoolEndpointList { } let host = ep.url.host().unwrap(); - let host_ip_set = host_ip_cache.entry(host.clone()).or_insert({ - get_host_ip(host.clone()).map_err(|e| Error::other(format!("host '{host}' cannot resolve: {e}")))? - }); + let host_ip_set = if let Some(set) = host_ip_cache.get(&host) { + set + } else { + let ips = match get_host_ip(host.clone()) { + Ok(ips) => ips, + Err(e) => { + error!("host {} not found, error:{}", host, e); + get_host_ip_async(host.clone()) + .map_err(|e| Error::other(format!("host '{host}' cannot resolve: {e}")))? + } + }; + host_ip_cache.insert(host.clone(), ips); + host_ip_cache.get(&host).unwrap() + }; let path = ep.get_file_path(); match path_ip_map.entry(path) { diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 26226472..9e99c258 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -27,44 +27,49 @@ categories = ["web-programming", "development-tools", "cryptography"] [dependencies] base64-simd = { workspace = true, optional = true } blake3 = { workspace = true, optional = true } -crc32fast.workspace = true +brotli = { workspace = true, optional = true } +bytes = { workspace = true, optional = true } +crc32fast = { workspace = true } +flate2 = { workspace = true, optional = true } +futures = { workspace = true, optional = true } hex-simd = { workspace = true, optional = true } highway = { workspace = true, optional = true } +hickory-resolver = { workspace = true, optional = true } +hickory-proto = { workspace = true, optional = true } +hmac = { workspace = true, optional = true } +hyper = { workspace = true, optional = true } +hyper-util = { workspace = true, optional = true } local-ip-address = { workspace = true, optional = true } +lz4 = { workspace = true, optional = true } md-5 = { workspace = true, optional = true } +moka = { workspace = true, optional = true, features = ["future"] } netif = { workspace = true, optional = true } nix = { workspace = true, optional = true } +rand = { workspace = true, optional = true } regex = { workspace = true, optional = true } rustfs-config = { workspace = true, features = ["constants"] } rustls = { workspace = true, optional = true } rustls-pemfile = { workspace = true, optional = true } rustls-pki-types = { workspace = true, optional = true } +s3s = { workspace = true, optional = true } serde = { workspace = true, optional = true } -siphasher = { workspace = true, optional = true } -tempfile = { workspace = true, optional = true } -tokio = { workspace = true, optional = true, features = ["io-util", "macros"] } -tracing = { workspace = true } -url = { workspace = true, optional = true } -flate2 = { workspace = true, optional = true } -brotli = { workspace = true, optional = true } -zstd = { workspace = true, optional = true } -snap = { workspace = true, optional = true } -lz4 = { workspace = true, optional = true } -rand = { workspace = true, optional = true } -futures = { workspace = true, optional = true } -transform-stream = { workspace = true, optional = true } -bytes = { workspace = true, optional = true } -sysinfo = { workspace = true, optional = true } -hyper-util = { workspace = true, optional = true } sha1 = { workspace = true, optional = true } sha2 = { workspace = true, optional = true } -hmac = { workspace = true, optional = true } -s3s = { workspace = true, optional = true } -hyper = { workspace = true, optional = true } +siphasher = { workspace = true, optional = true } +snap = { workspace = true, optional = true } +sysinfo = { workspace = true, optional = true } +tempfile = { workspace = true, optional = true } +thiserror = { workspace = true, optional = true } +tokio = { workspace = true, optional = true, features = ["io-util", "macros"] } +tracing = { workspace = true } +transform-stream = { workspace = true, optional = true } +url = { workspace = true, optional = true } +zstd = { workspace = true, optional = true } [dev-dependencies] tempfile = { workspace = true } rand = { workspace = true } +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } [target.'cfg(windows)'.dependencies] winapi = { workspace = true, optional = true, features = ["std", "fileapi", "minwindef", "ntdef", "winnt"] } @@ -76,7 +81,7 @@ workspace = true default = ["ip"] # features that are enabled by default ip = ["dep:local-ip-address"] # ip characteristics and their dependencies tls = ["dep:rustls", "dep:rustls-pemfile", "dep:rustls-pki-types"] # tls characteristics and their dependencies -net = ["ip", "dep:url", "dep:netif", "dep:futures", "dep:transform-stream", "dep:bytes", "dep:s3s", "dep:hyper", "dep:hyper-util"] # empty network features +net = ["ip", "dep:url", "dep:netif", "dep:futures", "dep:transform-stream", "dep:bytes", "dep:s3s", "dep:hyper", "dep:hyper-util", "dep:hickory-resolver", "dep:hickory-proto", "dep:moka", "dep:thiserror"] # network features with DNS resolver io = ["dep:tokio"] path = [] notify = ["dep:hyper", "dep:s3s"] # file system notification features diff --git a/crates/utils/src/dns_resolver.rs b/crates/utils/src/dns_resolver.rs new file mode 100644 index 00000000..e952d999 --- /dev/null +++ b/crates/utils/src/dns_resolver.rs @@ -0,0 +1,473 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Layered DNS resolution utility for Kubernetes environments +//! +//! This module provides robust DNS resolution with multiple fallback layers: +//! 1. Local cache (Moka) for previously resolved results +//! 2. System DNS resolver (container/host adaptive) using hickory-resolver +//! 3. Public DNS servers as final fallback (8.8.8.8, 1.1.1.1) using hickory-resolver with TLS +//! +//! The resolver is designed to handle 5-level or deeper domain names that may fail +//! in Kubernetes environments due to CoreDNS configuration, DNS recursion limits, +//! or network-related issues. Uses hickory-resolver for actual DNS queries with TLS support. + +use hickory_resolver::Resolver; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::name_server::TokioConnectionProvider; +use moka::future::Cache; +use std::net::IpAddr; +use std::sync::OnceLock; +use std::time::Duration; +use tracing::{debug, error, info, instrument, warn}; + +/// Maximum FQDN length according to RFC standards +const MAX_FQDN_LENGTH: usize = 253; +/// Maximum DNS label length according to RFC standards +const MAX_LABEL_LENGTH: usize = 63; +/// Cache entry TTL in seconds +const CACHE_TTL_SECONDS: u64 = 300; // 5 minutes +/// Maximum cache size (number of entries) +const MAX_CACHE_SIZE: u64 = 10000; + +/// DNS resolution error types with detailed context and tracing information +#[derive(Debug, thiserror::Error)] +pub enum DnsError { + #[error("Invalid domain format: {reason}")] + InvalidFormat { reason: String }, + + #[error("Local cache miss for domain: {domain}")] + CacheMiss { domain: String }, + + #[error("System DNS resolution failed for domain: {domain} - {source}")] + SystemDnsFailed { + domain: String, + #[source] + source: Box, + }, + + #[error("Public DNS resolution failed for domain: {domain} - {source}")] + PublicDnsFailed { + domain: String, + #[source] + source: Box, + }, + + #[error( + "All DNS resolution attempts failed for domain: {domain}. Please check your domain spelling, network connectivity, or DNS configuration" + )] + AllAttemptsFailed { domain: String }, + + #[error("DNS resolver initialization failed: {source}")] + InitializationFailed { + #[source] + source: Box, + }, + + #[error("DNS configuration error: {source}")] + ConfigurationError { + #[source] + source: Box, + }, +} + +/// Layered DNS resolver with caching and multiple fallback strategies +pub struct LayeredDnsResolver { + /// Local cache for resolved domains using Moka for high performance + cache: Cache>, + /// System DNS resolver using hickory-resolver with default configuration + system_resolver: Resolver, + /// Public DNS resolver using hickory-resolver with Cloudflare DNS servers + public_resolver: Resolver, +} + +impl LayeredDnsResolver { + /// Create a new layered DNS resolver with automatic DNS configuration detection + #[instrument(skip_all)] + pub async fn new() -> Result { + info!("Initializing layered DNS resolver with hickory-resolver, Moka cache and public DNS fallback"); + + // Create Moka cache with TTL and size limits + let cache = Cache::builder() + .time_to_live(Duration::from_secs(CACHE_TTL_SECONDS)) + .max_capacity(MAX_CACHE_SIZE) + .build(); + + // Create system DNS resolver with default configuration (auto-detects container/host DNS) + let system_resolver = + Resolver::builder_with_config(ResolverConfig::default(), TokioConnectionProvider::default()).build(); + + let mut config = ResolverConfig::cloudflare_tls(); + for ns in ResolverConfig::google_tls().name_servers() { + config.add_name_server(ns.clone()) + } + // Create public DNS resolver using Cloudflare DNS with TLS support + let public_resolver = Resolver::builder_with_config(config, TokioConnectionProvider::default()).build(); + + info!("DNS resolver initialized successfully with hickory-resolver system and Cloudflare TLS public fallback"); + + Ok(Self { + cache, + system_resolver, + public_resolver, + }) + } + + /// Validate domain format according to RFC standards + #[instrument(skip_all, fields(domain = %domain))] + fn validate_domain_format(domain: &str) -> Result<(), DnsError> { + // Check FQDN length + if domain.len() > MAX_FQDN_LENGTH { + return Err(DnsError::InvalidFormat { + reason: format!("FQDN must not exceed {} bytes, got {} bytes", MAX_FQDN_LENGTH, domain.len()), + }); + } + + // Check each label length + for label in domain.split('.') { + if label.len() > MAX_LABEL_LENGTH { + return Err(DnsError::InvalidFormat { + reason: format!( + "Each label must not exceed {} bytes, label '{}' has {} bytes", + MAX_LABEL_LENGTH, + label, + label.len() + ), + }); + } + } + + // Check for empty labels (except trailing dot) + let labels: Vec<&str> = domain.trim_end_matches('.').split('.').collect(); + for label in &labels { + if label.is_empty() { + return Err(DnsError::InvalidFormat { + reason: "Domain contains empty labels".to_string(), + }); + } + } + + Ok(()) + } + + /// Check local cache for resolved domain + #[instrument(skip_all, fields(domain = %domain))] + async fn check_cache(&self, domain: &str) -> Option> { + match self.cache.get(domain).await { + Some(ips) => { + debug!("DNS cache hit for domain: {}, found {} IPs", domain, ips.len()); + Some(ips) + } + None => { + debug!("DNS cache miss for domain: {}", domain); + None + } + } + } + + /// Update local cache with resolved IPs + #[instrument(skip_all, fields(domain = %domain, ip_count = ips.len()))] + async fn update_cache(&self, domain: &str, ips: Vec) { + self.cache.insert(domain.to_string(), ips.clone()).await; + debug!("DNS cache updated for domain: {} with {} IPs", domain, ips.len()); + } + + /// Get cache statistics for monitoring + #[instrument(skip_all)] + pub async fn cache_stats(&self) -> (u64, u64) { + let entry_count = self.cache.entry_count(); + let weighted_size = self.cache.weighted_size(); + debug!("DNS cache stats - entries: {}, weighted_size: {}", entry_count, weighted_size); + (entry_count, weighted_size) + } + + /// Manually invalidate cache entries (useful for testing or forced refresh) + #[instrument(skip_all)] + pub async fn invalidate_cache(&self) { + self.cache.invalidate_all(); + info!("DNS cache invalidated"); + } + + /// Resolve domain using system DNS (cluster/host DNS configuration) with hickory-resolver + #[instrument(skip_all, fields(domain = %domain))] + async fn resolve_with_system_dns(&self, domain: &str) -> Result, DnsError> { + debug!("Attempting system DNS resolution for domain: {} using hickory-resolver", domain); + + match self.system_resolver.lookup_ip(domain).await { + Ok(lookup) => { + let ips: Vec = lookup.iter().collect(); + if !ips.is_empty() { + info!("System DNS resolution successful for domain: {} -> {} IPs", domain, ips.len()); + debug!("System DNS resolved IPs: {:?}", ips); + Ok(ips) + } else { + warn!("System DNS returned empty result for domain: {}", domain); + Err(DnsError::SystemDnsFailed { + domain: domain.to_string(), + source: "No IP addresses found".to_string().into(), + }) + } + } + Err(e) => { + warn!("System DNS resolution failed for domain: {} - {}", domain, e); + Err(DnsError::SystemDnsFailed { + domain: domain.to_string(), + source: Box::new(e), + }) + } + } + } + + /// Resolve domain using public DNS servers (Cloudflare TLS DNS) with hickory-resolver + #[instrument(skip_all, fields(domain = %domain))] + async fn resolve_with_public_dns(&self, domain: &str) -> Result, DnsError> { + debug!( + "Attempting public DNS resolution for domain: {} using hickory-resolver with TLS-enabled Cloudflare DNS", + domain + ); + + match self.public_resolver.lookup_ip(domain).await { + Ok(lookup) => { + let ips: Vec = lookup.iter().collect(); + if !ips.is_empty() { + info!("Public DNS resolution successful for domain: {} -> {} IPs", domain, ips.len()); + debug!("Public DNS resolved IPs: {:?}", ips); + Ok(ips) + } else { + warn!("Public DNS returned empty result for domain: {}", domain); + Err(DnsError::PublicDnsFailed { + domain: domain.to_string(), + source: "No IP addresses found".to_string().into(), + }) + } + } + Err(e) => { + error!("Public DNS resolution failed for domain: {} - {}", domain, e); + Err(DnsError::PublicDnsFailed { + domain: domain.to_string(), + source: Box::new(e), + }) + } + } + } + + /// Resolve domain with layered fallback strategy using hickory-resolver + /// + /// Resolution order with detailed tracing: + /// 1. Local cache (Moka with TTL) + /// 2. System DNS (hickory-resolver with host/container adaptive configuration) + /// 3. Public DNS (hickory-resolver with TLS-enabled Cloudflare DNS fallback) + #[instrument(skip_all, fields(domain = %domain))] + pub async fn resolve(&self, domain: &str) -> Result, DnsError> { + // Validate domain format first + Self::validate_domain_format(domain)?; + + info!("Starting DNS resolution for domain: {}", domain); + + // Step 1: Check local cache + if let Some(ips) = self.check_cache(domain).await { + info!("DNS resolution completed from cache for domain: {} -> {} IPs", domain, ips.len()); + return Ok(ips); + } + + debug!("Local cache miss for domain: {}, attempting system DNS", domain); + + // Step 2: Try system DNS (cluster/host adaptive) + match self.resolve_with_system_dns(domain).await { + Ok(ips) => { + self.update_cache(domain, ips.clone()).await; + info!("DNS resolution completed via system DNS for domain: {} -> {} IPs", domain, ips.len()); + return Ok(ips); + } + Err(system_err) => { + warn!("System DNS failed for domain: {} - {}", domain, system_err); + } + } + + // Step 3: Fallback to public DNS + info!("Falling back to public DNS for domain: {}", domain); + match self.resolve_with_public_dns(domain).await { + Ok(ips) => { + self.update_cache(domain, ips.clone()).await; + info!("DNS resolution completed via public DNS for domain: {} -> {} IPs", domain, ips.len()); + Ok(ips) + } + Err(public_err) => { + error!( + "All DNS resolution attempts failed for domain: {}. System DNS: failed, Public DNS: {}", + domain, public_err + ); + Err(DnsError::AllAttemptsFailed { + domain: domain.to_string(), + }) + } + } + } +} + +/// Global DNS resolver instance +static GLOBAL_DNS_RESOLVER: OnceLock = OnceLock::new(); + +/// Initialize the global DNS resolver +#[instrument] +pub async fn init_global_dns_resolver() -> Result<(), DnsError> { + info!("Initializing global DNS resolver"); + let resolver = LayeredDnsResolver::new().await?; + + match GLOBAL_DNS_RESOLVER.set(resolver) { + Ok(()) => { + info!("Global DNS resolver initialized successfully"); + Ok(()) + } + Err(_) => { + warn!("Global DNS resolver was already initialized"); + Ok(()) + } + } +} + +/// Get the global DNS resolver instance +pub fn get_global_dns_resolver() -> Option<&'static LayeredDnsResolver> { + GLOBAL_DNS_RESOLVER.get() +} + +/// Resolve domain using the global DNS resolver with comprehensive tracing +#[instrument(skip_all, fields(domain = %domain))] +pub async fn resolve_domain(domain: &str) -> Result, DnsError> { + match get_global_dns_resolver() { + Some(resolver) => resolver.resolve(domain).await, + None => Err(DnsError::InitializationFailed { + source: "Global DNS resolver not initialized. Call init_global_dns_resolver() first." + .to_string() + .into(), + }), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_domain_validation() { + // Valid domains + assert!(LayeredDnsResolver::validate_domain_format("example.com").is_ok()); + assert!(LayeredDnsResolver::validate_domain_format("sub.example.com").is_ok()); + assert!(LayeredDnsResolver::validate_domain_format("very.deep.sub.domain.example.com").is_ok()); + + // Invalid domains - too long FQDN + let long_domain = "a".repeat(254); + assert!(LayeredDnsResolver::validate_domain_format(&long_domain).is_err()); + + // Invalid domains - label too long + let long_label = format!("{}.com", "a".repeat(64)); + assert!(LayeredDnsResolver::validate_domain_format(&long_label).is_err()); + + // Invalid domains - empty label + assert!(LayeredDnsResolver::validate_domain_format("example..com").is_err()); + } + + #[tokio::test] + async fn test_cache_functionality() { + let resolver = LayeredDnsResolver::new().await.unwrap(); + + // Test cache miss + assert!(resolver.check_cache("example.com").await.is_none()); + + // Update cache + let test_ips = vec![IpAddr::from([192, 0, 2, 1])]; + resolver.update_cache("example.com", test_ips.clone()).await; + + // Test cache hit + assert_eq!(resolver.check_cache("example.com").await, Some(test_ips)); + + // Test cache stats (note: moka cache might not immediately reflect changes) + let (total, _weighted_size) = resolver.cache_stats().await; + // Cache should have at least the entry we just added (might be 0 due to async nature) + assert!(total <= 1, "Cache should have at most 1 entry, got {}", total); + } + + #[tokio::test] + async fn test_dns_resolution() { + let resolver = LayeredDnsResolver::new().await.unwrap(); + + // Test resolution of a known domain (localhost should always resolve) + match resolver.resolve("localhost").await { + Ok(ips) => { + assert!(!ips.is_empty()); + println!("Resolved localhost to: {:?}", ips); + } + Err(e) => { + // In some test environments, even localhost might fail + // This is acceptable as long as our error handling works + println!("DNS resolution failed (might be expected in test environments): {}", e); + } + } + } + + #[tokio::test] + async fn test_invalid_domain_resolution() { + let resolver = LayeredDnsResolver::new().await.unwrap(); + + // Test resolution of invalid domain + let result = resolver + .resolve("nonexistent.invalid.domain.example.thisdefinitelydoesnotexist") + .await; + assert!(result.is_err()); + + if let Err(e) = result { + println!("Expected error for invalid domain: {}", e); + // Should be AllAttemptsFailed since both system and public DNS should fail + assert!(matches!(e, DnsError::AllAttemptsFailed { .. })); + } + } + + #[tokio::test] + async fn test_cache_invalidation() { + let resolver = LayeredDnsResolver::new().await.unwrap(); + + // Add entry to cache + let test_ips = vec![IpAddr::from([192, 0, 2, 1])]; + resolver.update_cache("test.example.com", test_ips.clone()).await; + + // Verify cache hit + assert_eq!(resolver.check_cache("test.example.com").await, Some(test_ips)); + + // Invalidate cache + resolver.invalidate_cache().await; + + // Verify cache miss after invalidation + assert!(resolver.check_cache("test.example.com").await.is_none()); + } + + #[tokio::test] + async fn test_global_resolver_initialization() { + // Test initialization + assert!(init_global_dns_resolver().await.is_ok()); + + // Test that resolver is available + assert!(get_global_dns_resolver().is_some()); + + // Test domain resolution through global resolver + match resolve_domain("localhost").await { + Ok(ips) => { + assert!(!ips.is_empty()); + println!("Global resolver resolved localhost to: {:?}", ips); + } + Err(e) => { + println!("Global resolver DNS resolution failed (might be expected in test environments): {}", e); + } + } + } +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 9fbe3a13..d87569c0 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -14,11 +14,15 @@ #[cfg(feature = "tls")] pub mod certs; +#[cfg(feature = "net")] +pub mod dns_resolver; #[cfg(feature = "ip")] pub mod ip; #[cfg(feature = "net")] pub mod net; +#[cfg(feature = "net")] +pub use dns_resolver::*; #[cfg(feature = "net")] pub use net::*; diff --git a/crates/utils/src/net.rs b/crates/utils/src/net.rs index e1e2955c..71018420 100644 --- a/crates/utils/src/net.rs +++ b/crates/utils/src/net.rs @@ -86,7 +86,50 @@ pub fn is_local_host(host: Host<&str>, port: u16, local_port: u16) -> std::io::R Ok(is_local_host) } -/// returns IP address of given host. +/// returns IP address of given host using layered DNS resolution. +pub fn get_host_ip_async(host: Host<&str>) -> std::io::Result> { + match host { + Host::Domain(domain) => { + #[cfg(feature = "net")] + { + use crate::dns_resolver::resolve_domain; + let handle = tokio::runtime::Handle::current(); + handle.block_on(async { + match resolve_domain(domain).await { + Ok(ips) => Ok(ips.into_iter().collect()), + Err(e) => Err(std::io::Error::other(format!("DNS resolution failed: {}", e))), + } + }) + } + #[cfg(not(feature = "net"))] + { + // Fallback to standard resolution when DNS resolver is not available + match (domain, 0) + .to_socket_addrs() + .map(|v| v.map(|v| v.ip()).collect::>()) + { + Ok(ips) => Ok(ips), + Err(err) => Err(std::io::Error::other(err)), + } + } + } + Host::Ipv4(ip) => { + let mut set = HashSet::with_capacity(1); + set.insert(IpAddr::V4(ip)); + Ok(set) + } + Host::Ipv6(ip) => { + let mut set = HashSet::with_capacity(1); + set.insert(IpAddr::V6(ip)); + Ok(set) + } + } +} + +/// returns IP address of given host using standard resolution. +/// +/// **Note**: This function uses standard library DNS resolution. +/// For enhanced DNS resolution with Kubernetes support, use `get_host_ip_async()`. pub fn get_host_ip(host: Host<&str>) -> std::io::Result> { match host { Host::Domain(domain) => match (domain, 0) diff --git a/rustfs/src/main.rs b/rustfs/src/main.rs index c75c4083..5bcc56e0 100644 --- a/rustfs/src/main.rs +++ b/rustfs/src/main.rs @@ -56,6 +56,7 @@ use rustfs_iam::init_iam_sys; use rustfs_notify::global::notifier_instance; use rustfs_obs::{init_obs, set_global_guard}; use rustfs_targets::arn::TargetID; +use rustfs_utils::dns_resolver::init_global_dns_resolver; use rustfs_utils::net::parse_and_resolve_address; use s3s::s3_error; use std::io::{Error, Result}; @@ -100,6 +101,12 @@ async fn main() -> Result<()> { #[instrument(skip(opt))] async fn run(opt: config::Opt) -> Result<()> { debug!("opt: {:?}", &opt); + + // Initialize global DNS resolver early for enhanced DNS resolution + if let Err(e) = init_global_dns_resolver().await { + warn!("Failed to initialize global DNS resolver: {}. Using standard DNS resolution.", e); + } + if let Some(region) = &opt.region { rustfs_ecstore::global::set_global_region(region.clone()); }