diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..1a2c18b5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,122 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +RustFS is a high-performance distributed object storage software built with Rust, providing S3-compatible APIs and advanced features like data lakes, AI, and big data support. It's designed as an alternative to MinIO with better performance and a more business-friendly Apache 2.0 license. + +## Build Commands + +### Primary Build Commands +- `cargo build --release` - Build the main RustFS binary +- `./build-rustfs.sh` - Recommended build script that handles console resources and cross-platform compilation +- `./build-rustfs.sh --dev` - Development build with debug symbols +- `make build` or `just build` - Use Make/Just for standardized builds + +### Platform-Specific Builds +- `./build-rustfs.sh --platform x86_64-unknown-linux-musl` - Build for musl target +- `./build-rustfs.sh --platform aarch64-unknown-linux-gnu` - Build for ARM64 +- `make build-musl` or `just build-musl` - Build musl variant +- `make build-cross-all` - Build all supported architectures + +### Testing Commands +- `cargo test --workspace --exclude e2e_test` - Run unit tests (excluding e2e tests) +- `cargo nextest run --all --exclude e2e_test` - Use nextest if available (faster) +- `cargo test --all --doc` - Run documentation tests +- `make test` or `just test` - Run full test suite + +### Code Quality +- `cargo fmt --all` - Format code +- `cargo clippy --all-targets --all-features -- -D warnings` - Lint code +- `make pre-commit` or `just pre-commit` - Run all quality checks (fmt, clippy, check, test) + +### Docker Build Commands +- `make docker-buildx` - Build multi-architecture production images +- `make docker-dev-local` - Build development image for local use +- `./docker-buildx.sh --push` - Build and push production images + +## Architecture Overview + +### Core Components + +**Main Binary (`rustfs/`):** +- Entry point at `rustfs/src/main.rs` +- Core modules: admin, auth, config, server, storage, license management, profiling +- HTTP server with S3-compatible APIs +- Service state management and graceful shutdown +- Parallel service initialization with DNS resolver, bucket metadata, and IAM + +**Key Crates (`crates/`):** +- `ecstore` - Erasure coding storage implementation (core storage layer) +- `iam` - Identity and Access Management +- `madmin` - Management dashboard and admin API interface +- `s3select-api` & `s3select-query` - S3 Select API and query engine +- `config` - Configuration management with notify features +- `crypto` - Cryptography and security features +- `lock` - Distributed locking implementation +- `filemeta` - File metadata management +- `rio` - Rust I/O utilities and abstractions +- `common` - Shared utilities and data structures +- `protos` - Protocol buffer definitions +- `audit-logger` - Audit logging for file operations +- `notify` - Event notification system +- `obs` - Observability utilities +- `workers` - Worker thread pools and task scheduling +- `appauth` - Application authentication and authorization + +### Build System +- Cargo workspace with 25+ crates +- Custom `build-rustfs.sh` script for advanced build options +- Multi-architecture Docker builds via `docker-buildx.sh` +- Both Make and Just task runners supported +- Cross-compilation support for multiple Linux targets + +### Key Dependencies +- `axum` - HTTP framework for S3 API server +- `tokio` - Async runtime +- `s3s` - S3 protocol implementation library +- `datafusion` - For S3 Select query processing +- `hyper`/`hyper-util` - HTTP client/server utilities +- `rustls` - TLS implementation +- `serde`/`serde_json` - Serialization +- `tracing` - Structured logging and observability +- `pprof` - Performance profiling with flamegraph support +- `tikv-jemallocator` - Memory allocator for Linux GNU builds + +### Development Workflow +- Console resources are embedded during build via `rust-embed` +- Protocol buffers generated via custom `gproto` binary +- E2E tests in separate crate (`e2e_test`) +- Shadow build for version/metadata embedding +- Support for both GNU and musl libc targets + +### Performance & Observability +- Performance profiling available with `pprof` integration (disabled on Windows) +- Profiling enabled via environment variables in production +- Built-in observability with OpenTelemetry integration +- Background services (scanner, heal) can be controlled via environment variables: + - `RUSTFS_ENABLE_SCANNER` (default: true) + - `RUSTFS_ENABLE_HEAL` (default: true) + +### Service Architecture +- Service state management with graceful shutdown handling +- Parallel initialization of core systems (DNS, bucket metadata, IAM) +- Event notification system with MQTT and webhook support +- Auto-heal and data scanner for storage integrity +- Jemalloc allocator for Linux GNU targets for better performance + +## Environment Variables +- `RUSTFS_ENABLE_SCANNER` - Enable/disable background data scanner +- `RUSTFS_ENABLE_HEAL` - Enable/disable auto-heal functionality +- Various profiling and observability controls + +## Code Style +- Communicate with me in Chinese, but only English can be used in code files. +- Code that may cause program crashes (such as unwrap/expect) must not be used, except for testing purposes. +- Code that may cause performance issues (such as blocking IO) must not be used, except for testing purposes. +- Code that may cause memory leaks must not be used, except for testing purposes. +- Code that may cause deadlocks must not be used, except for testing purposes. +- Code that may cause undefined behavior must not be used, except for testing purposes. +- Code that may cause panics must not be used, except for testing purposes. +- Code that may cause data races must not be used, except for testing purposes. diff --git a/Cargo.lock b/Cargo.lock index a193bcc1..e1b1eeef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6300,7 +6300,6 @@ dependencies = [ "heapless", "once_cell", "parking_lot", - "pprof", "rustfs-protos", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 624535c5..cbda4991 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -176,7 +176,6 @@ opentelemetry-semantic-conventions = { version = "0.30.0", features = [ parking_lot = "0.12.4" path-absolutize = "3.1.1" path-clean = "1.0.1" -pprof = { version = "0.15.0", features = ["flamegraph", "protobuf-codec"] } blake3 = { version = "1.8.2" } pbkdf2 = "0.12.2" percent-encoding = "2.3.2" diff --git a/crates/lock/Cargo.toml b/crates/lock/Cargo.toml index c2ebd58b..3d65b815 100644 --- a/crates/lock/Cargo.toml +++ b/crates/lock/Cargo.toml @@ -47,4 +47,3 @@ smallvec = "1.11" smartstring = "1.0" crossbeam-queue = "0.3" heapless = "0.8" -pprof.workspace = true diff --git a/rustfs/Cargo.toml b/rustfs/Cargo.toml index 4e3c8205..62b71b95 100644 --- a/rustfs/Cargo.toml +++ b/rustfs/Cargo.toml @@ -107,7 +107,6 @@ url = { workspace = true } urlencoding = { workspace = true } uuid = { workspace = true } zip = { workspace = true } -pprof.workspace = true [target.'cfg(any(target_os = "macos", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies] sysctl = { workspace = true } @@ -119,6 +118,9 @@ libsystemd.workspace = true [target.'cfg(all(target_os = "linux", target_env = "gnu"))'.dependencies] tikv-jemallocator = "0.6" +[target.'cfg(not(target_os = "windows"))'.dependencies] +pprof = { version = "0.15.0", features = ["flamegraph", "protobuf-codec"] } + [build-dependencies] http.workspace = true futures.workspace = true diff --git a/rustfs/src/admin/handlers.rs b/rustfs/src/admin/handlers.rs index 2898aec6..40d7b872 100644 --- a/rustfs/src/admin/handlers.rs +++ b/rustfs/src/admin/handlers.rs @@ -1238,101 +1238,112 @@ pub struct ProfileHandler {} #[async_trait::async_trait] impl Operation for ProfileHandler { async fn call(&self, req: S3Request, _params: Params<'_, '_>) -> S3Result> { - use crate::profiling; - - if !profiling::is_profiler_enabled() { + #[cfg(target_os = "windows")] + { return Ok(S3Response::new(( - StatusCode::SERVICE_UNAVAILABLE, - Body::from("Profiler not enabled. Set RUSTFS_ENABLE_PROFILING=true to enable profiling".to_string()), + StatusCode::NOT_IMPLEMENTED, + Body::from("CPU profiling is not supported on Windows platform".to_string()), ))); } - let queries = extract_query_params(&req.uri); - let seconds = queries.get("seconds").and_then(|s| s.parse::().ok()).unwrap_or(30); - let format = queries.get("format").cloned().unwrap_or_else(|| "protobuf".to_string()); + #[cfg(not(target_os = "windows"))] + { + use crate::profiling; - if seconds > 300 { - return Ok(S3Response::new(( - StatusCode::BAD_REQUEST, - Body::from("Profile duration cannot exceed 300 seconds".to_string()), - ))); - } - - let guard = match profiling::get_profiler_guard() { - Some(guard) => guard, - None => { + if !profiling::is_profiler_enabled() { return Ok(S3Response::new(( StatusCode::SERVICE_UNAVAILABLE, - Body::from("Profiler not initialized".to_string()), + Body::from("Profiler not enabled. Set RUSTFS_ENABLE_PROFILING=true to enable profiling".to_string()), ))); } - }; - info!("Starting CPU profile collection for {} seconds", seconds); + let queries = extract_query_params(&req.uri); + let seconds = queries.get("seconds").and_then(|s| s.parse::().ok()).unwrap_or(30); + let format = queries.get("format").cloned().unwrap_or_else(|| "protobuf".to_string()); - tokio::time::sleep(std::time::Duration::from_secs(seconds)).await; - - let guard_lock = match guard.lock() { - Ok(guard) => guard, - Err(_) => { - error!("Failed to acquire profiler guard lock"); + if seconds > 300 { return Ok(S3Response::new(( - StatusCode::INTERNAL_SERVER_ERROR, - Body::from("Failed to acquire profiler lock".to_string()), + StatusCode::BAD_REQUEST, + Body::from("Profile duration cannot exceed 300 seconds".to_string()), ))); } - }; - let report = match guard_lock.report().build() { - Ok(report) => report, - Err(e) => { - error!("Failed to build profiler report: {}", e); - return Ok(S3Response::new(( - StatusCode::INTERNAL_SERVER_ERROR, - Body::from(format!("Failed to build profile report: {}", e)), - ))); - } - }; - - info!("CPU profile collection completed"); - - match format.as_str() { - "protobuf" | "pb" => { - let profile = report.pprof().unwrap(); - let mut body = Vec::new(); - if let Err(e) = profile.write_to_vec(&mut body) { - error!("Failed to serialize protobuf profile: {}", e); + let guard = match profiling::get_profiler_guard() { + Some(guard) => guard, + None => { return Ok(S3Response::new(( - StatusCode::INTERNAL_SERVER_ERROR, - Body::from("Failed to serialize profile".to_string()), + StatusCode::SERVICE_UNAVAILABLE, + Body::from("Profiler not initialized".to_string()), ))); } + }; - let mut headers = HeaderMap::new(); - headers.insert(CONTENT_TYPE, "application/octet-stream".parse().unwrap()); - Ok(S3Response::with_headers((StatusCode::OK, Body::from(body)), headers)) - } - "flamegraph" | "svg" => { - let mut flamegraph_buf = Vec::new(); - match report.flamegraph(&mut flamegraph_buf) { - Ok(()) => (), - Err(e) => { - error!("Failed to generate flamegraph: {}", e); + info!("Starting CPU profile collection for {} seconds", seconds); + + tokio::time::sleep(std::time::Duration::from_secs(seconds)).await; + + let guard_lock = match guard.lock() { + Ok(guard) => guard, + Err(_) => { + error!("Failed to acquire profiler guard lock"); + return Ok(S3Response::new(( + StatusCode::INTERNAL_SERVER_ERROR, + Body::from("Failed to acquire profiler lock".to_string()), + ))); + } + }; + + let report = match guard_lock.report().build() { + Ok(report) => report, + Err(e) => { + error!("Failed to build profiler report: {}", e); + return Ok(S3Response::new(( + StatusCode::INTERNAL_SERVER_ERROR, + Body::from(format!("Failed to build profile report: {}", e)), + ))); + } + }; + + info!("CPU profile collection completed"); + + match format.as_str() { + "protobuf" | "pb" => { + let profile = report.pprof().unwrap(); + let mut body = Vec::new(); + if let Err(e) = profile.write_to_vec(&mut body) { + error!("Failed to serialize protobuf profile: {}", e); return Ok(S3Response::new(( StatusCode::INTERNAL_SERVER_ERROR, - Body::from(format!("Failed to generate flamegraph: {}", e)), + Body::from("Failed to serialize profile".to_string()), ))); } - }; - let mut headers = HeaderMap::new(); - headers.insert(CONTENT_TYPE, "image/svg+xml".parse().unwrap()); - Ok(S3Response::with_headers((StatusCode::OK, Body::from(flamegraph_buf)), headers)) + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, "application/octet-stream".parse().unwrap()); + Ok(S3Response::with_headers((StatusCode::OK, Body::from(body)), headers)) + } + "flamegraph" | "svg" => { + let mut flamegraph_buf = Vec::new(); + match report.flamegraph(&mut flamegraph_buf) { + Ok(()) => (), + Err(e) => { + error!("Failed to generate flamegraph: {}", e); + return Ok(S3Response::new(( + StatusCode::INTERNAL_SERVER_ERROR, + Body::from(format!("Failed to generate flamegraph: {}", e)), + ))); + } + }; + + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, "image/svg+xml".parse().unwrap()); + Ok(S3Response::with_headers((StatusCode::OK, Body::from(flamegraph_buf)), headers)) + } + _ => Ok(S3Response::new(( + StatusCode::BAD_REQUEST, + Body::from("Unsupported format. Use 'protobuf' or 'flamegraph'".to_string()), + ))), } - _ => Ok(S3Response::new(( - StatusCode::BAD_REQUEST, - Body::from("Unsupported format. Use 'protobuf' or 'flamegraph'".to_string()), - ))), } } } @@ -1341,23 +1352,35 @@ pub struct ProfileStatusHandler {} #[async_trait::async_trait] impl Operation for ProfileStatusHandler { async fn call(&self, _req: S3Request, _params: Params<'_, '_>) -> S3Result> { - use crate::profiling; use std::collections::HashMap; - let status = if profiling::is_profiler_enabled() { - HashMap::from([ - ("enabled", "true"), - ("status", "running"), - ("supported_formats", "protobuf, flamegraph"), - ("max_duration_seconds", "300"), - ("endpoint", "/rustfs/admin/debug/pprof/profile"), - ]) - } else { - HashMap::from([ - ("enabled", "false"), - ("status", "disabled"), - ("message", "Set RUSTFS_ENABLE_PROFILING=true to enable profiling"), - ]) + #[cfg(target_os = "windows")] + let status = HashMap::from([ + ("enabled", "false"), + ("status", "not_supported"), + ("platform", "windows"), + ("message", "CPU profiling is not supported on Windows platform"), + ]); + + #[cfg(not(target_os = "windows"))] + let status = { + use crate::profiling; + + if profiling::is_profiler_enabled() { + HashMap::from([ + ("enabled", "true"), + ("status", "running"), + ("supported_formats", "protobuf, flamegraph"), + ("max_duration_seconds", "300"), + ("endpoint", "/rustfs/admin/debug/pprof/profile"), + ]) + } else { + HashMap::from([ + ("enabled", "false"), + ("status", "disabled"), + ("message", "Set RUSTFS_ENABLE_PROFILING=true to enable profiling"), + ]) + } }; match serde_json::to_string(&status) { diff --git a/rustfs/src/admin/mod.rs b/rustfs/src/admin/mod.rs index da316dad..e020ed34 100644 --- a/rustfs/src/admin/mod.rs +++ b/rustfs/src/admin/mod.rs @@ -214,6 +214,7 @@ pub fn make_admin_route(console_enabled: bool) -> std::io::Result AdminOperation(&RemoveRemoteTargetHandler {}), )?; + // Performance profiling endpoints (available on all platforms, with platform-specific responses) r.insert( Method::GET, format!("{}{}", ADMIN_PREFIX, "/debug/pprof/profile").as_str(), diff --git a/rustfs/src/main.rs b/rustfs/src/main.rs index ecf0f3cb..98a5932e 100644 --- a/rustfs/src/main.rs +++ b/rustfs/src/main.rs @@ -18,6 +18,7 @@ mod config; mod error; // mod grpc; pub mod license; +#[cfg(not(target_os = "windows"))] mod profiling; mod server; mod storage; @@ -96,6 +97,7 @@ async fn main() -> Result<()> { set_global_guard(guard).map_err(Error::other)?; // Initialize performance profiling if enabled + #[cfg(not(target_os = "windows"))] profiling::start_profiling_if_enabled(); // Run parameters