diff --git a/.vscode/launch.json b/.vscode/launch.json index 215cd78e..62da1e91 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -113,6 +113,7 @@ // "RUSTFS_OBS_TRACE_ENDPOINT": "http://127.0.0.1:4318/v1/traces", // jeager otlp http endpoint // "RUSTFS_OBS_METRIC_ENDPOINT": "http://127.0.0.1:4318/v1/metrics", // default otlp http endpoint // "RUSTFS_OBS_LOG_ENDPOINT": "http://127.0.0.1:4318/v1/logs", // default otlp http endpoint + // "RUSTFS_COMPRESS_ENABLE": "true", "RUSTFS_CONSOLE_ADDRESS": "127.0.0.1:9001", "RUSTFS_OBS_LOG_DIRECTORY": "./target/logs", }, diff --git a/crates/config/src/constants/compress.rs b/crates/config/src/constants/compress.rs new file mode 100644 index 00000000..4af04571 --- /dev/null +++ b/crates/config/src/constants/compress.rs @@ -0,0 +1,61 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! HTTP Response Compression Configuration +//! +//! This module provides configuration options for HTTP response compression. +//! By default, compression is disabled (aligned with MinIO behavior). +//! When enabled via `RUSTFS_COMPRESS_ENABLE=on`, compression can be configured +//! to apply only to specific file extensions, MIME types, and minimum file sizes. + +/// Environment variable to enable/disable HTTP response compression +/// Default: off (disabled) +/// Values: on, off, true, false, yes, no, 1, 0 +/// Example: RUSTFS_COMPRESS_ENABLE=on +pub const ENV_COMPRESS_ENABLE: &str = "RUSTFS_COMPRESS_ENABLE"; + +/// Default compression enable state +/// Aligned with MinIO behavior - compression is disabled by default +pub const DEFAULT_COMPRESS_ENABLE: bool = false; + +/// Environment variable for file extensions that should be compressed +/// Comma-separated list of file extensions (with or without leading dot) +/// Default: "" (empty, meaning use MIME type matching only) +/// Example: RUSTFS_COMPRESS_EXTENSIONS=.txt,.log,.csv,.json,.xml,.html,.css,.js +pub const ENV_COMPRESS_EXTENSIONS: &str = "RUSTFS_COMPRESS_EXTENSIONS"; + +/// Default file extensions for compression +/// Empty by default - relies on MIME type matching +pub const DEFAULT_COMPRESS_EXTENSIONS: &str = ""; + +/// Environment variable for MIME types that should be compressed +/// Comma-separated list of MIME types, supports wildcard (*) for subtypes +/// Default: "text/*,application/json,application/xml,application/javascript" +/// Example: RUSTFS_COMPRESS_MIME_TYPES=text/*,application/json,application/xml +pub const ENV_COMPRESS_MIME_TYPES: &str = "RUSTFS_COMPRESS_MIME_TYPES"; + +/// Default MIME types for compression +/// Includes common text-based content types that benefit from compression +pub const DEFAULT_COMPRESS_MIME_TYPES: &str = "text/*,application/json,application/xml,application/javascript"; + +/// Environment variable for minimum file size to apply compression +/// Files smaller than this size will not be compressed +/// Default: 1000 (bytes) +/// Example: RUSTFS_COMPRESS_MIN_SIZE=1000 +pub const ENV_COMPRESS_MIN_SIZE: &str = "RUSTFS_COMPRESS_MIN_SIZE"; + +/// Default minimum file size for compression (in bytes) +/// Files smaller than 1000 bytes typically don't benefit from compression +/// and the compression overhead may outweigh the benefits +pub const DEFAULT_COMPRESS_MIN_SIZE: u64 = 1000; diff --git a/crates/config/src/constants/mod.rs b/crates/config/src/constants/mod.rs index 94400961..7f6dbff9 100644 --- a/crates/config/src/constants/mod.rs +++ b/crates/config/src/constants/mod.rs @@ -14,6 +14,7 @@ pub(crate) mod app; pub(crate) mod body_limits; +pub(crate) mod compress; pub(crate) mod console; pub(crate) mod env; pub(crate) mod heal; diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 1228ae53..9d83800e 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -19,6 +19,8 @@ pub use constants::app::*; #[cfg(feature = "constants")] pub use constants::body_limits::*; #[cfg(feature = "constants")] +pub use constants::compress::*; +#[cfg(feature = "constants")] pub use constants::console::*; #[cfg(feature = "constants")] pub use constants::env::*; diff --git a/docs/compression-best-practices.md b/docs/compression-best-practices.md index 77d66ce8..6a10e7db 100644 --- a/docs/compression-best-practices.md +++ b/docs/compression-best-practices.md @@ -3,7 +3,89 @@ ## Overview This document outlines best practices for HTTP response compression in RustFS, based on lessons learned from fixing the -NoSuchKey error response regression (Issue #901). +NoSuchKey error response regression (Issue #901) and the whitelist-based compression redesign (Issue #902). + +## Whitelist-Based Compression (Issue #902) + +### Design Philosophy + +After Issue #901, we identified that the blacklist approach (compress everything except known problematic types) was +still causing issues with browser downloads showing "unknown file size". In Issue #902, we redesigned the compression +system using a **whitelist approach** aligned with MinIO's behavior: + +1. **Compression is disabled by default** - Opt-in rather than opt-out +2. **Only explicitly configured content types are compressed** - Preserves Content-Length for all other responses +3. **Fine-grained configuration** - Control via file extensions, MIME types, and size thresholds +4. **Skip already-encoded content** - Avoid double compression + +### Configuration Options + +RustFS provides flexible compression configuration via environment variables and command-line arguments: + +| Environment Variable | CLI Argument | Default | Description | +|---------------------|--------------|---------|-------------| +| `RUSTFS_COMPRESS_ENABLE` | | `false` | Enable/disable compression | +| `RUSTFS_COMPRESS_EXTENSIONS` | | `""` | File extensions to compress (e.g., `.txt,.log,.csv`) | +| `RUSTFS_COMPRESS_MIME_TYPES` | | `text/*,application/json,...` | MIME types to compress (supports wildcards) | +| `RUSTFS_COMPRESS_MIN_SIZE` | | `1000` | Minimum file size (bytes) for compression | + +### Usage Examples + +```bash +# Enable compression for text files and JSON +RUSTFS_COMPRESS_ENABLE=on \ +RUSTFS_COMPRESS_EXTENSIONS=.txt,.log,.csv,.json,.xml \ +RUSTFS_COMPRESS_MIME_TYPES=text/*,application/json,application/xml \ +RUSTFS_COMPRESS_MIN_SIZE=1000 \ +rustfs /data + +# Or using command-line arguments +rustfs /data \ + --compress-enable \ + --compress-extensions ".txt,.log,.csv" \ + --compress-mime-types "text/*,application/json" \ + --compress-min-size 1000 +``` + +### Implementation Details + +The `CompressionPredicate` implements intelligent compression decisions: + +```rust +impl Predicate for CompressionPredicate { + fn should_compress(&self, response: &Response) -> bool { + // 1. Check if compression is enabled + if !self.config.enabled { return false; } + + // 2. Never compress error responses + if status.is_client_error() || status.is_server_error() { return false; } + + // 3. Skip already-encoded content (gzip, br, deflate, etc.) + if has_content_encoding(response) { return false; } + + // 4. Check minimum size threshold + if content_length < self.config.min_size { return false; } + + // 5. Check whitelist: extension OR MIME type must match + if matches_extension(response) || matches_mime_type(response) { + return true; + } + + // 6. Default: don't compress (whitelist approach) + false + } +} +``` + +### Benefits of Whitelist Approach + +| Aspect | Blacklist (Old) | Whitelist (New) | +|--------|-----------------|-----------------| +| Default behavior | Compress most content | No compression | +| Content-Length | Often removed | Preserved for unmatched types | +| Browser downloads | "Unknown file size" | Accurate file size shown | +| Configuration | Complex exclusion rules | Simple inclusion rules | +| MinIO compatibility | Different behavior | Aligned behavior | ## Key Principles @@ -38,21 +120,54 @@ if status.is_client_error() || status.is_server_error() { - May actually increase payload size - Adds latency without benefit -**Recommended Threshold**: 256 bytes minimum +**Recommended Threshold**: 1000 bytes minimum (configurable via `RUSTFS_COMPRESS_MIN_SIZE`) **Implementation**: ```rust if let Some(content_length) = response.headers().get(CONTENT_LENGTH) { if let Ok(length) = content_length.to_str()?.parse::()? { - if length < 256 { + if length < self.config.min_size { return false; // Don't compress small responses } } } ``` -### 3. Maintain Observability +### 3. Skip Already-Encoded Content + +**Rationale**: If the response already has a `Content-Encoding` header (e.g., gzip, br, deflate, zstd), the content +is already compressed. Re-compressing provides no benefit and may cause issues: + +- Double compression wastes CPU cycles +- May corrupt data or increase size +- Breaks decompression on client side + +**Implementation**: + +```rust +// Skip if content is already encoded (e.g., gzip, br, deflate, zstd) +if let Some(content_encoding) = response.headers().get(CONTENT_ENCODING) { + if let Ok(encoding) = content_encoding.to_str() { + let encoding_lower = encoding.to_lowercase(); + // "identity" means no encoding, so we can still compress + if encoding_lower != "identity" && !encoding_lower.is_empty() { + debug!("Skipping compression for already encoded response: {}", encoding); + return false; + } + } +} +``` + +**Common Content-Encoding Values**: + +- `gzip` - GNU zip compression +- `br` - Brotli compression +- `deflate` - Deflate compression +- `zstd` - Zstandard compression +- `identity` - No encoding (compression allowed) + +### 4. Maintain Observability **Rationale**: Compression decisions can affect debugging and troubleshooting. Always log when compression is skipped. @@ -84,38 +199,58 @@ grep "Skipping compression" logs/rustfs.log | wc -l .layer(CompressionLayer::new()) ``` -**Problem**: Can cause Content-Length mismatches with error responses +**Problem**: Can cause Content-Length mismatches with error responses and browser download issues -### ✅ Using Intelligent Predicates +### ❌ Using Blacklist Approach ```rust -// GOOD - Filter based on status and size -.layer(CompressionLayer::new().compress_when(ShouldCompress)) -``` - -### ❌ Ignoring Content-Length Header - -```rust -// BAD - Only checking status +// BAD - Blacklist approach (compress everything except...) fn should_compress(&self, response: &Response) -> bool { - !response.status().is_client_error() + // Skip images, videos, archives... + if is_already_compressed_type(content_type) { return false; } + true // Compress everything else } ``` -**Problem**: May compress tiny responses unnecessarily +**Problem**: Removes Content-Length for many file types, causing "unknown file size" in browsers -### ✅ Checking Both Status and Size +### ✅ Using Whitelist-Based Predicate ```rust -// GOOD - Multi-criteria decision +// GOOD - Whitelist approach with configurable predicate +.layer(CompressionLayer::new().compress_when(CompressionPredicate::new(config))) +``` + +### ❌ Ignoring Content-Encoding Header + +```rust +// BAD - May double-compress already compressed content fn should_compress(&self, response: &Response) -> bool { - // Check status + matches_mime_type(response) // Missing Content-Encoding check +} +``` + +**Problem**: Double compression wastes CPU and may corrupt data + +### ✅ Comprehensive Checks + +```rust +// GOOD - Multi-criteria whitelist decision +fn should_compress(&self, response: &Response) -> bool { + // 1. Must be enabled + if !self.config.enabled { return false; } + + // 2. Skip error responses if response.status().is_error() { return false; } - // Check size - if get_content_length(response) < 256 { return false; } + // 3. Skip already-encoded content + if has_content_encoding(response) { return false; } - true + // 4. Check minimum size + if get_content_length(response) < self.config.min_size { return false; } + + // 5. Must match whitelist (extension OR MIME type) + matches_extension(response) || matches_mime_type(response) } ``` @@ -224,28 +359,52 @@ async fn test_error_response_not_truncated() { ## Migration Guide +### Migrating from Blacklist to Whitelist Approach + +If you're upgrading from an older RustFS version with blacklist-based compression: + +1. **Compression is now disabled by default** + - Set `RUSTFS_COMPRESS_ENABLE=on` to enable + - This ensures backward compatibility for existing deployments + +2. **Configure your whitelist** + ```bash + # Example: Enable compression for common text formats + RUSTFS_COMPRESS_ENABLE=on + RUSTFS_COMPRESS_EXTENSIONS=.txt,.log,.csv,.json,.xml,.html,.css,.js + RUSTFS_COMPRESS_MIME_TYPES=text/*,application/json,application/xml,application/javascript + RUSTFS_COMPRESS_MIN_SIZE=1000 + ``` + +3. **Verify browser downloads** + - Check that file downloads show accurate file sizes + - Verify Content-Length headers are preserved for non-compressed content + ### Updating Existing Code If you're adding compression to an existing service: -1. **Start Conservative**: Only compress responses > 1KB -2. **Monitor Impact**: Watch CPU and latency metrics -3. **Lower Threshold Gradually**: Test with smaller thresholds -4. **Always Exclude Errors**: Never compress 4xx/5xx +1. **Start with compression disabled** (default) +2. **Define your whitelist**: Identify content types that benefit from compression +3. **Set appropriate thresholds**: Start with 1KB minimum size +4. **Enable and monitor**: Watch CPU, latency, and download behavior ### Rollout Strategy 1. **Stage 1**: Deploy to canary (5% traffic) - Monitor for 24 hours - Check error rates and latency + - Verify browser download behavior 2. **Stage 2**: Expand to 25% traffic - Monitor for 48 hours - Validate compression ratios + - Check Content-Length preservation 3. **Stage 3**: Full rollout (100% traffic) - Continue monitoring for 1 week - Document any issues + - Fine-tune whitelist based on actual usage ## Related Documentation @@ -253,13 +412,33 @@ If you're adding compression to an existing service: - [tower-http Compression](https://docs.rs/tower-http/latest/tower_http/compression/) - [HTTP Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding) +## Architecture + +### Module Structure + +The compression functionality is organized in a dedicated module for maintainability: + +``` +rustfs/src/server/ +├── compress.rs # Compression configuration and predicate +├── http.rs # HTTP server (uses compress module) +└── mod.rs # Module declarations +``` + +### Key Components + +1. **`CompressionConfig`** - Stores compression settings parsed from environment/CLI +2. **`CompressionPredicate`** - Implements `tower_http::compression::predicate::Predicate` +3. **Configuration Constants** - Defined in `crates/config/src/constants/compress.rs` + ## References 1. Issue #901: NoSuchKey error response regression -2. [Google Web Fundamentals - Text Compression](https://web.dev/reduce-network-payloads-using-text-compression/) -3. [AWS Best Practices - Response Compression](https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/) +2. Issue #902: Whitelist-based compression redesign +3. [Google Web Fundamentals - Text Compression](https://web.dev/reduce-network-payloads-using-text-compression/) +4. [AWS Best Practices - Response Compression](https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/) --- -**Last Updated**: 2025-11-24 +**Last Updated**: 2025-12-13 **Maintainer**: RustFS Team diff --git a/rustfs/src/server/compress.rs b/rustfs/src/server/compress.rs new file mode 100644 index 00000000..da7a3616 --- /dev/null +++ b/rustfs/src/server/compress.rs @@ -0,0 +1,485 @@ +// Copyright 2024 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! HTTP Response Compression Module +//! +//! This module provides configurable HTTP response compression functionality +//! using a whitelist-based approach. Unlike traditional blacklist approaches, +//! this design only compresses explicitly configured content types, which: +//! +//! 1. Preserves Content-Length for all other responses (better browser UX) +//! 2. Aligns with MinIO's opt-in compression behavior +//! 3. Provides fine-grained control over what gets compressed +//! +//! # Configuration +//! +//! Compression can be configured via environment variables or command line options: +//! +//! - `RUSTFS_COMPRESS_ENABLE` - Enable/disable compression (default: off) +//! - `RUSTFS_COMPRESS_EXTENSIONS` - File extensions to compress (e.g., `.txt,.log,.csv`) +//! - `RUSTFS_COMPRESS_MIME_TYPES` - MIME types to compress (e.g., `text/*,application/json`) +//! - `RUSTFS_COMPRESS_MIN_SIZE` - Minimum file size for compression (default: 1000 bytes) +//! +//! # Example +//! +//! ```bash +//! RUSTFS_COMPRESS_ENABLE=on \ +//! RUSTFS_COMPRESS_EXTENSIONS=.txt,.log,.csv \ +//! RUSTFS_COMPRESS_MIME_TYPES=text/*,application/json \ +//! RUSTFS_COMPRESS_MIN_SIZE=1000 \ +//! rustfs /data +//! ``` + +use http::Response; +use rustfs_config::{ + DEFAULT_COMPRESS_ENABLE, DEFAULT_COMPRESS_EXTENSIONS, DEFAULT_COMPRESS_MIME_TYPES, DEFAULT_COMPRESS_MIN_SIZE, + ENV_COMPRESS_ENABLE, ENV_COMPRESS_EXTENSIONS, ENV_COMPRESS_MIME_TYPES, ENV_COMPRESS_MIN_SIZE, EnableState, +}; +use std::str::FromStr; +use tower_http::compression::predicate::Predicate; +use tracing::debug; + +/// Configuration for HTTP response compression. +/// +/// This structure holds the whitelist-based compression settings: +/// - File extensions that should be compressed (checked via Content-Disposition header) +/// - MIME types that should be compressed (supports wildcards like `text/*`) +/// - Minimum file size threshold for compression +/// +/// When compression is enabled, only responses matching these criteria will be compressed. +/// This approach aligns with MinIO's behavior where compression is opt-in rather than default. +#[derive(Clone, Debug)] +pub struct CompressionConfig { + /// Whether compression is enabled + pub enabled: bool, + /// File extensions to compress (normalized to lowercase with leading dot) + pub extensions: Vec, + /// MIME type patterns to compress (supports wildcards like `text/*`) + pub mime_patterns: Vec, + /// Minimum file size (in bytes) for compression + pub min_size: u64, +} + +impl CompressionConfig { + /// Create a new compression configuration from environment variables + /// + /// Reads the following environment variables: + /// - `RUSTFS_COMPRESS_ENABLE` - Enable/disable compression (default: false) + /// - `RUSTFS_COMPRESS_EXTENSIONS` - File extensions to compress (default: "") + /// - `RUSTFS_COMPRESS_MIME_TYPES` - MIME types to compress (default: "text/*,application/json,...") + /// - `RUSTFS_COMPRESS_MIN_SIZE` - Minimum file size for compression (default: 1000) + pub fn from_env() -> Self { + // Read compression enable state + let enabled = std::env::var(ENV_COMPRESS_ENABLE) + .ok() + .and_then(|v| EnableState::from_str(&v).ok()) + .map(|state| state.is_enabled()) + .unwrap_or(DEFAULT_COMPRESS_ENABLE); + + // Read file extensions + let extensions_str = std::env::var(ENV_COMPRESS_EXTENSIONS).unwrap_or_else(|_| DEFAULT_COMPRESS_EXTENSIONS.to_string()); + let extensions: Vec = if extensions_str.is_empty() { + Vec::new() + } else { + extensions_str + .split(',') + .map(|s| { + let s = s.trim().to_lowercase(); + if s.starts_with('.') { s } else { format!(".{s}") } + }) + .filter(|s| s.len() > 1) + .collect() + }; + + // Read MIME type patterns + let mime_types_str = std::env::var(ENV_COMPRESS_MIME_TYPES).unwrap_or_else(|_| DEFAULT_COMPRESS_MIME_TYPES.to_string()); + let mime_patterns: Vec = if mime_types_str.is_empty() { + Vec::new() + } else { + mime_types_str + .split(',') + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect() + }; + + // Read minimum file size + let min_size = std::env::var(ENV_COMPRESS_MIN_SIZE) + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_COMPRESS_MIN_SIZE); + + Self { + enabled, + extensions, + mime_patterns, + min_size, + } + } + + /// Check if a MIME type matches any of the configured patterns + pub(crate) fn matches_mime_type(&self, content_type: &str) -> bool { + let ct_lower = content_type.to_lowercase(); + // Extract the main MIME type (before any parameters like charset) + let main_type = ct_lower.split(';').next().unwrap_or(&ct_lower).trim(); + + for pattern in &self.mime_patterns { + if pattern.ends_with("/*") { + // Wildcard pattern like "text/*" + let prefix = &pattern[..pattern.len() - 1]; // "text/" + if main_type.starts_with(prefix) { + return true; + } + } else if main_type == pattern { + // Exact match + return true; + } + } + false + } + + /// Check if a filename matches any of the configured extensions + /// The filename is extracted from Content-Disposition header + pub(crate) fn matches_extension(&self, filename: &str) -> bool { + if self.extensions.is_empty() { + return false; + } + + let filename_lower = filename.to_lowercase(); + for ext in &self.extensions { + if filename_lower.ends_with(ext) { + return true; + } + } + false + } + + /// Extract filename from Content-Disposition header + /// Format: attachment; filename="example.txt" or attachment; filename=example.txt + pub(crate) fn extract_filename_from_content_disposition(header_value: &str) -> Option { + // Look for filename= or filename*= parameter + let lower = header_value.to_lowercase(); + + // Try to find filename="..." or filename=... + if let Some(idx) = lower.find("filename=") { + let start = idx + "filename=".len(); + let rest = &header_value[start..]; + + // Check if it's quoted + if let Some(stripped) = rest.strip_prefix('"') { + // Find closing quote + if let Some(end_quote) = stripped.find('"') { + return Some(stripped[..end_quote].to_string()); + } + } else { + // Unquoted - take until semicolon or end + let end = rest.find(';').unwrap_or(rest.len()); + return Some(rest[..end].trim().to_string()); + } + } + + None + } +} + +impl Default for CompressionConfig { + fn default() -> Self { + Self { + enabled: rustfs_config::DEFAULT_COMPRESS_ENABLE, + extensions: rustfs_config::DEFAULT_COMPRESS_EXTENSIONS + .split(',') + .filter_map(|s| { + let s = s.trim().to_lowercase(); + if s.is_empty() { + None + } else if s.starts_with('.') { + Some(s) + } else { + Some(format!(".{s}")) + } + }) + .collect(), + mime_patterns: rustfs_config::DEFAULT_COMPRESS_MIME_TYPES + .split(',') + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect(), + min_size: rustfs_config::DEFAULT_COMPRESS_MIN_SIZE, + } + } +} + +/// Predicate to determine if a response should be compressed. +/// +/// This predicate implements a whitelist-based compression approach: +/// - Only compresses responses that match configured file extensions OR MIME types +/// - Respects minimum file size threshold +/// - Always skips error responses (4xx, 5xx) to avoid Content-Length issues +/// - Skips already encoded responses (Content-Encoding header present) +/// +/// # Design Philosophy +/// Unlike the previous blacklist approach, this whitelist approach: +/// 1. Only compresses explicitly configured content types +/// 2. Preserves Content-Length for all other responses (better browser UX) +/// 3. Aligns with MinIO's opt-in compression behavior +/// 4. Avoids double compression by checking Content-Encoding header +/// +/// # Extension Matching +/// File extension matching works by extracting the filename from the +/// `Content-Disposition` response header (e.g., `attachment; filename="file.txt"`). +/// +/// # Performance +/// This predicate is evaluated per-response and has O(n) complexity where n is +/// the number of configured extensions/MIME patterns. +#[derive(Clone, Debug)] +pub struct CompressionPredicate { + config: CompressionConfig, +} + +impl CompressionPredicate { + /// Create a new compression predicate with the given configuration + pub fn new(config: CompressionConfig) -> Self { + Self { config } + } +} + +impl Predicate for CompressionPredicate { + fn should_compress(&self, response: &Response) -> bool + where + B: http_body::Body, + { + // If compression is disabled, never compress + if !self.config.enabled { + return false; + } + + let status = response.status(); + + // Never compress error responses (4xx and 5xx status codes) + // This prevents Content-Length mismatch issues with error responses + if status.is_client_error() || status.is_server_error() { + debug!("Skipping compression for error response: status={}", status.as_u16()); + return false; + } + + // Skip if content is already encoded (e.g., gzip, br, deflate, zstd) + // Re-compressing already compressed content provides no benefit and may cause issues + if let Some(content_encoding) = response.headers().get(http::header::CONTENT_ENCODING) { + if let Ok(encoding) = content_encoding.to_str() { + let encoding_lower = encoding.to_lowercase(); + // Check for common compression encodings + // "identity" means no encoding, so we can still compress + if encoding_lower != "identity" && !encoding_lower.is_empty() { + debug!("Skipping compression for already encoded response: Content-Encoding={}", encoding); + return false; + } + } + } + + // Check Content-Length header for minimum size threshold + if let Some(content_length) = response.headers().get(http::header::CONTENT_LENGTH) { + if let Ok(length_str) = content_length.to_str() { + if let Ok(length) = length_str.parse::() { + if length < self.config.min_size { + debug!( + "Skipping compression for small response: size={} bytes, min_size={}", + length, self.config.min_size + ); + return false; + } + } + } + } + + // Check if the response matches configured extension via Content-Disposition + if let Some(content_disposition) = response.headers().get(http::header::CONTENT_DISPOSITION) { + if let Ok(cd) = content_disposition.to_str() { + if let Some(filename) = CompressionConfig::extract_filename_from_content_disposition(cd) { + if self.config.matches_extension(&filename) { + debug!("Compressing response: filename '{}' matches configured extension", filename); + return true; + } + } + } + } + + // Check if the response matches configured MIME type + if let Some(content_type) = response.headers().get(http::header::CONTENT_TYPE) { + if let Ok(ct) = content_type.to_str() { + if self.config.matches_mime_type(ct) { + debug!("Compressing response: Content-Type '{}' matches configured MIME pattern", ct); + return true; + } + } + } + + // Default: don't compress (whitelist approach) + debug!("Skipping compression: response does not match any configured extension or MIME type"); + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compression_config_default() { + let config = CompressionConfig::default(); + assert!(!config.enabled); + assert!(config.extensions.is_empty()); + assert!(!config.mime_patterns.is_empty()); + assert_eq!(config.min_size, 1000); + } + + #[test] + fn test_compression_config_mime_matching() { + let config = CompressionConfig { + enabled: true, + extensions: vec![], + mime_patterns: vec!["text/*".to_string(), "application/json".to_string()], + min_size: 1000, + }; + + // Test wildcard matching + assert!(config.matches_mime_type("text/plain")); + assert!(config.matches_mime_type("text/html")); + assert!(config.matches_mime_type("text/css")); + assert!(config.matches_mime_type("TEXT/PLAIN")); // case insensitive + + // Test exact matching + assert!(config.matches_mime_type("application/json")); + assert!(config.matches_mime_type("application/json; charset=utf-8")); + + // Test non-matching types + assert!(!config.matches_mime_type("image/png")); + assert!(!config.matches_mime_type("application/octet-stream")); + assert!(!config.matches_mime_type("video/mp4")); + } + + #[test] + fn test_compression_config_extension_matching() { + let config = CompressionConfig { + enabled: true, + extensions: vec![".txt".to_string(), ".log".to_string(), ".csv".to_string()], + mime_patterns: vec![], + min_size: 1000, + }; + + // Test matching extensions + assert!(config.matches_extension("file.txt")); + assert!(config.matches_extension("path/to/file.log")); + assert!(config.matches_extension("data.csv")); + assert!(config.matches_extension("FILE.TXT")); // case insensitive + + // Test non-matching extensions + assert!(!config.matches_extension("image.png")); + assert!(!config.matches_extension("archive.zip")); + assert!(!config.matches_extension("document.pdf")); + } + + #[test] + fn test_extract_filename_from_content_disposition() { + // Quoted filename + assert_eq!( + CompressionConfig::extract_filename_from_content_disposition(r#"attachment; filename="example.txt""#), + Some("example.txt".to_string()) + ); + + // Unquoted filename + assert_eq!( + CompressionConfig::extract_filename_from_content_disposition("attachment; filename=example.log"), + Some("example.log".to_string()) + ); + + // Filename with path + assert_eq!( + CompressionConfig::extract_filename_from_content_disposition(r#"attachment; filename="path/to/file.csv""#), + Some("path/to/file.csv".to_string()) + ); + + // Mixed case + assert_eq!( + CompressionConfig::extract_filename_from_content_disposition(r#"Attachment; FILENAME="test.json""#), + Some("test.json".to_string()) + ); + + // No filename + assert_eq!(CompressionConfig::extract_filename_from_content_disposition("inline"), None); + } + + #[test] + fn test_compression_config_from_empty_strings() { + // Simulate config with empty extension and mime strings + let config = CompressionConfig { + enabled: true, + extensions: "" + .split(',') + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect(), + mime_patterns: "" + .split(',') + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect(), + min_size: 1000, + }; + + assert!(config.extensions.is_empty()); + assert!(config.mime_patterns.is_empty()); + assert!(!config.matches_extension("file.txt")); + assert!(!config.matches_mime_type("text/plain")); + } + + #[test] + fn test_compression_config_extension_normalization() { + // Extensions should be normalized with leading dot + let extensions: Vec = "txt,.log,csv" + .split(',') + .map(|s| { + let s = s.trim().to_lowercase(); + if s.starts_with('.') { s } else { format!(".{s}") } + }) + .filter(|s| s.len() > 1) + .collect(); + + assert_eq!(extensions, vec![".txt", ".log", ".csv"]); + } + + #[test] + fn test_compression_predicate_creation() { + // Test that CompressionPredicate can be created with various configs + let config_disabled = CompressionConfig { + enabled: false, + extensions: vec![".txt".to_string()], + mime_patterns: vec!["text/*".to_string()], + min_size: 0, + }; + let predicate = CompressionPredicate::new(config_disabled.clone()); + assert!(!predicate.config.enabled); + + let config_enabled = CompressionConfig { + enabled: true, + extensions: vec![".txt".to_string(), ".log".to_string()], + mime_patterns: vec!["text/*".to_string(), "application/json".to_string()], + min_size: 1000, + }; + let predicate = CompressionPredicate::new(config_enabled.clone()); + assert!(predicate.config.enabled); + assert_eq!(predicate.config.extensions.len(), 2); + assert_eq!(predicate.config.mime_patterns.len(), 2); + assert_eq!(predicate.config.min_size, 1000); + } +} diff --git a/rustfs/src/server/http.rs b/rustfs/src/server/http.rs index 521c2b06..acecdceb 100644 --- a/rustfs/src/server/http.rs +++ b/rustfs/src/server/http.rs @@ -13,6 +13,7 @@ // limitations under the License. // Ensure the correct path for parse_license is imported +use super::compress::{CompressionConfig, CompressionPredicate}; use crate::admin; use crate::auth::IAMAuth; use crate::config; @@ -43,7 +44,7 @@ use tokio_rustls::TlsAcceptor; use tonic::{Request, Status, metadata::MetadataValue}; use tower::ServiceBuilder; use tower_http::catch_panic::CatchPanicLayer; -use tower_http::compression::{CompressionLayer, predicate::Predicate}; +use tower_http::compression::CompressionLayer; use tower_http::cors::{AllowOrigin, Any, CorsLayer}; use tower_http::request_id::{MakeRequestUuid, PropagateRequestIdLayer, SetRequestIdLayer}; use tower_http::trace::TraceLayer; @@ -108,60 +109,6 @@ fn get_cors_allowed_origins() -> String { .unwrap_or(rustfs_config::DEFAULT_CONSOLE_CORS_ALLOWED_ORIGINS.to_string()) } -/// Predicate to determine if a response should be compressed. -/// -/// This predicate implements intelligent compression selection to avoid issues -/// with error responses and small payloads. It excludes: -/// - Client error responses (4xx status codes) - typically small XML/JSON error messages -/// - Server error responses (5xx status codes) - ensures error details are preserved -/// - Very small responses (< 256 bytes) - compression overhead outweighs benefits -/// -/// # Rationale -/// The CompressionLayer can cause Content-Length header mismatches with error responses, -/// particularly when the s3s library generates XML error responses (~119 bytes for NoSuchKey). -/// By excluding these responses from compression, we ensure: -/// 1. Error responses are sent with accurate Content-Length headers -/// 2. Clients receive complete error bodies without truncation -/// 3. Small responses avoid compression overhead -/// -/// # Performance -/// This predicate is evaluated per-response and has O(1) complexity. -#[derive(Clone, Copy, Debug)] -struct ShouldCompress; - -impl Predicate for ShouldCompress { - fn should_compress(&self, response: &Response) -> bool - where - B: http_body::Body, - { - let status = response.status(); - - // Never compress error responses (4xx and 5xx status codes) - // This prevents Content-Length mismatch issues with error responses - if status.is_client_error() || status.is_server_error() { - debug!("Skipping compression for error response: status={}", status.as_u16()); - return false; - } - - // Check Content-Length header to avoid compressing very small responses - // Responses smaller than 256 bytes typically don't benefit from compression - // and may actually increase in size due to compression overhead - if let Some(content_length) = response.headers().get(http::header::CONTENT_LENGTH) { - if let Ok(length_str) = content_length.to_str() { - if let Ok(length) = length_str.parse::() { - if length < 256 { - debug!("Skipping compression for small response: size={} bytes", length); - return false; - } - } - } - } - - // Compress successful responses with sufficient size - true - } -} - pub async fn start_http_server( opt: &config::Opt, worker_state_manager: ServiceStateManager, @@ -290,6 +237,17 @@ pub async fn start_http_server( Some(cors_allowed_origins) }; + // Create compression configuration from environment variables + let compression_config = CompressionConfig::from_env(); + if compression_config.enabled { + info!( + "HTTP response compression enabled: extensions={:?}, mime_patterns={:?}, min_size={} bytes", + compression_config.extensions, compression_config.mime_patterns, compression_config.min_size + ); + } else { + debug!("HTTP response compression is disabled"); + } + let is_console = opt.console_enable; tokio::spawn(async move { // Create CORS layer inside the server loop closure @@ -395,15 +353,15 @@ pub async fn start_http_server( warn!(?err, "Failed to set set_send_buffer_size"); } - process_connection( - socket, - tls_acceptor.clone(), - http_server.clone(), - s3_service.clone(), - graceful.clone(), - cors_layer.clone(), + let connection_ctx = ConnectionContext { + http_server: http_server.clone(), + s3_service: s3_service.clone(), + cors_layer: cors_layer.clone(), + compression_config: compression_config.clone(), is_console, - ); + }; + + process_connection(socket, tls_acceptor.clone(), connection_ctx, graceful.clone()); } worker_state_manager.update(ServiceState::Stopping); @@ -496,6 +454,15 @@ async fn setup_tls_acceptor(tls_path: &str) -> Result> { Ok(None) } +#[derive(Clone)] +struct ConnectionContext { + http_server: Arc>, + s3_service: S3Service, + cors_layer: CorsLayer, + compression_config: CompressionConfig, + is_console: bool, +} + /// Process a single incoming TCP connection. /// /// This function is executed in a new Tokio task and it will: @@ -507,13 +474,18 @@ async fn setup_tls_acceptor(tls_path: &str) -> Result> { fn process_connection( socket: TcpStream, tls_acceptor: Option>, - http_server: Arc>, - s3_service: S3Service, + context: ConnectionContext, graceful: Arc, - cors_layer: CorsLayer, - is_console: bool, ) { tokio::spawn(async move { + let ConnectionContext { + http_server, + s3_service, + cors_layer, + compression_config, + is_console, + } = context; + // Build services inside each connected task to avoid passing complex service types across tasks, // It also ensures that each connection has an independent service instance. let rpc_service = NodeServiceServer::with_interceptor(make_server(), check_auth); @@ -577,8 +549,9 @@ fn process_connection( ) .layer(PropagateRequestIdLayer::x_request_id()) .layer(cors_layer) - // Compress responses, but exclude error responses to avoid Content-Length mismatch issues - .layer(CompressionLayer::new().compress_when(ShouldCompress)) + // Compress responses based on whitelist configuration + // Only compresses when enabled and matches configured extensions/MIME types + .layer(CompressionLayer::new().compress_when(CompressionPredicate::new(compression_config))) .option_layer(if is_console { Some(RedirectLayer) } else { None }) .service(service); diff --git a/rustfs/src/server/mod.rs b/rustfs/src/server/mod.rs index 5aee97e3..df6b04a5 100644 --- a/rustfs/src/server/mod.rs +++ b/rustfs/src/server/mod.rs @@ -13,13 +13,13 @@ // limitations under the License. mod audit; +mod compress; +mod event; mod http; mod hybrid; mod layer; -mod service_state; - -mod event; mod runtime; +mod service_state; pub(crate) use audit::{start_audit_system, stop_audit_system}; pub(crate) use event::{init_event_notifier, shutdown_event_notifier}; diff --git a/scripts/run.sh b/scripts/run.sh index 2b75d326..a4329132 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -106,9 +106,53 @@ export RUSTFS_NOTIFY_WEBHOOK_QUEUE_DIR_MASTER="$current_dir/deploy/logs/notify" export RUSTFS_NS_SCANNER_INTERVAL=60 # Object scanning interval in seconds -# exportRUSTFS_SKIP_BACKGROUND_TASK=true +# export RUSTFS_SKIP_BACKGROUND_TASK=true -# export RUSTFS_COMPRESSION_ENABLED=true # Whether to enable compression +# Storage level compression (compression at object storage level) +# export RUSTFS_COMPRESSION_ENABLED=true # Whether to enable storage-level compression for objects + +# HTTP Response Compression (whitelist-based, aligned with MinIO) +# By default, HTTP response compression is DISABLED (aligned with MinIO behavior) +# When enabled, only explicitly configured file types will be compressed +# This preserves Content-Length headers for better browser download experience + +# Enable HTTP response compression +# export RUSTFS_COMPRESS_ENABLE=on + +# Example 1: Compress text files and logs +# Suitable for log files, text documents, CSV files +# export RUSTFS_COMPRESS_ENABLE=on +# export RUSTFS_COMPRESS_EXTENSIONS=.txt,.log,.csv +# export RUSTFS_COMPRESS_MIME_TYPES=text/* +# export RUSTFS_COMPRESS_MIN_SIZE=1000 + +# Example 2: Compress JSON and XML API responses +# Suitable for API services that return JSON/XML data +# export RUSTFS_COMPRESS_ENABLE=on +# export RUSTFS_COMPRESS_EXTENSIONS=.json,.xml +# export RUSTFS_COMPRESS_MIME_TYPES=application/json,application/xml +# export RUSTFS_COMPRESS_MIN_SIZE=1000 + +# Example 3: Comprehensive web content compression +# Suitable for web applications (HTML, CSS, JavaScript, JSON) +# export RUSTFS_COMPRESS_ENABLE=on +# export RUSTFS_COMPRESS_EXTENSIONS=.html,.css,.js,.json,.xml,.txt,.svg +# export RUSTFS_COMPRESS_MIME_TYPES=text/*,application/json,application/xml,application/javascript,image/svg+xml +# export RUSTFS_COMPRESS_MIN_SIZE=1000 + +# Example 4: Compress only large text files (minimum 10KB) +# Useful when you want to avoid compression overhead for small files +# export RUSTFS_COMPRESS_ENABLE=on +# export RUSTFS_COMPRESS_EXTENSIONS=.txt,.log +# export RUSTFS_COMPRESS_MIME_TYPES=text/* +# export RUSTFS_COMPRESS_MIN_SIZE=10240 + +# Notes: +# - Only files matching EITHER extensions OR MIME types will be compressed (whitelist approach) +# - Error responses (4xx, 5xx) are never compressed to avoid Content-Length issues +# - Already encoded content (gzip, br, deflate, zstd) is automatically skipped +# - Minimum size threshold prevents compression of small files where overhead > benefit +# - Wildcard patterns supported in MIME types (e.g., text/* matches text/plain, text/html, etc.) #export RUSTFS_REGION="us-east-1"