add reed-solomon-simd

This commit is contained in:
weisd
2025-06-09 18:04:42 +08:00
parent 711cab777f
commit e62947f7b2
5 changed files with 1113 additions and 119 deletions

25
Cargo.lock generated
View File

@@ -3644,6 +3644,7 @@ dependencies = [
"protos",
"rand 0.9.1",
"reed-solomon-erasure",
"reed-solomon-simd",
"regex",
"reqwest",
"rmp",
@@ -3865,6 +3866,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "fixedbitset"
version = "0.5.7"
@@ -7007,7 +7014,7 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
"fixedbitset",
"fixedbitset 0.5.7",
"indexmap 2.9.0",
]
@@ -7823,6 +7830,12 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "readme-rustdocifier"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08ad765b21a08b1a8e5cdce052719188a23772bcbefb3c439f0baaf62c56ceac"
[[package]]
name = "recursive"
version = "0.1.1"
@@ -7896,6 +7909,16 @@ dependencies = [
"spin",
]
[[package]]
name = "reed-solomon-simd"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab6badd4f4b9c93832eb3707431e8e7bea282fae96801312f0990d48b030f8c5"
dependencies = [
"fixedbitset 0.4.2",
"readme-rustdocifier",
]
[[package]]
name = "regex"
version = "1.11.1"

View File

@@ -158,6 +158,7 @@ protobuf = "3.7"
rand = "0.9.1"
rdkafka = { version = "0.37.0", features = ["tokio"] }
reed-solomon-erasure = { version = "6.0.0", features = ["simd-accel"] }
reed-solomon-simd = { version = "3.0.0" }
regex = { version = "1.11.1" }
reqwest = { version = "0.12.19", default-features = false, features = [
"rustls-tls",

View File

@@ -10,6 +10,11 @@ rust-version.workspace = true
[lints]
workspace = true
[features]
default = ["reed-solomon-simd"]
reed-solomon-simd = ["dep:reed-solomon-simd"]
reed-solomon-erasure = ["dep:reed-solomon-erasure"]
[dependencies]
rustfs-config = { workspace = true }
async-trait.workspace = true
@@ -35,7 +40,8 @@ http.workspace = true
highway = { workspace = true }
url.workspace = true
uuid = { workspace = true, features = ["v4", "fast-rng", "serde"] }
reed-solomon-erasure = { workspace = true }
reed-solomon-erasure = { version = "6.0.0", features = ["simd-accel"], optional = true }
reed-solomon-simd = { version = "3.0.0", optional = true }
transform-stream = "0.3.1"
lazy_static.workspace = true
lock.workspace = true

109
ecstore/README.md Normal file
View File

@@ -0,0 +1,109 @@
# ECStore - Erasure Coding Storage
ECStore provides erasure coding functionality for the RustFS project, supporting multiple Reed-Solomon implementations for optimal performance and compatibility.
## Reed-Solomon Implementations
### Available Backends
#### `reed-solomon-erasure` (Default)
- **Stability**: Mature and well-tested implementation
- **Performance**: Good performance with SIMD acceleration when available
- **Compatibility**: Works with any shard size
- **Memory**: Efficient memory usage
- **Use case**: Recommended for production use
#### `reed-solomon-simd` (Optional)
- **Performance**: Optimized SIMD implementation for maximum speed
- **Limitations**: Has restrictions on shard sizes (must be >= 64 bytes typically)
- **Memory**: May use more memory for small shards
- **Use case**: Best for large data blocks where performance is critical
### Feature Flags
Configure the Reed-Solomon implementation using Cargo features:
```toml
# Use default implementation (reed-solomon-erasure)
ecstore = "0.0.1"
# Use SIMD implementation for maximum performance
ecstore = { version = "0.0.1", features = ["reed-solomon-simd"], default-features = false }
# Use traditional implementation explicitly
ecstore = { version = "0.0.1", features = ["reed-solomon-erasure"], default-features = false }
```
### Usage Example
```rust
use ecstore::erasure_coding::Erasure;
// Create erasure coding instance
// 4 data shards, 2 parity shards, 1KB block size
let erasure = Erasure::new(4, 2, 1024);
// Encode data
let data = b"hello world from rustfs erasure coding";
let shards = erasure.encode_data(data)?;
// Simulate loss of one shard
let mut shards_opt: Vec<Option<Vec<u8>>> = shards
.iter()
.map(|b| Some(b.to_vec()))
.collect();
shards_opt[2] = None; // Lose shard 2
// Reconstruct missing data
erasure.decode_data(&mut shards_opt)?;
// Recover original data
let mut recovered = Vec::new();
for shard in shards_opt.iter().take(4) { // Only data shards
recovered.extend_from_slice(shard.as_ref().unwrap());
}
recovered.truncate(data.len());
assert_eq!(&recovered, data);
```
## Performance Considerations
### When to use `reed-solomon-simd`
- Large block sizes (>= 1KB recommended)
- High-throughput scenarios
- CPU-intensive workloads where encoding/decoding is the bottleneck
### When to use `reed-solomon-erasure`
- Small block sizes
- Memory-constrained environments
- General-purpose usage
- Production deployments requiring maximum stability
### Implementation Details
#### `reed-solomon-erasure`
- **Instance Reuse**: The encoder instance is cached and reused across multiple operations
- **Thread Safety**: Thread-safe with interior mutability
- **Memory Efficiency**: Lower memory footprint for small data
#### `reed-solomon-simd`
- **Instance Creation**: New encoder/decoder instances are created for each operation
- **API Design**: The SIMD implementation's API is designed for single-use instances
- **Performance Trade-off**: While instances are created per operation, the SIMD optimizations provide significant performance benefits for large data blocks
- **Optimization**: Future versions may implement instance pooling if the underlying API supports reuse
### Performance Tips
1. **Batch Operations**: When possible, batch multiple small operations into larger blocks
2. **Block Size Optimization**: Use block sizes that are multiples of 64 bytes for SIMD implementations
3. **Memory Allocation**: Pre-allocate buffers when processing multiple blocks
4. **Feature Selection**: Choose the appropriate feature based on your data size and performance requirements
## Cross-Platform Compatibility
Both implementations support:
- x86_64 with SIMD acceleration
- aarch64 (ARM64) with optimizations
- Other architectures with fallback implementations
The `reed-solomon-erasure` implementation provides better cross-platform compatibility and is recommended for most use cases.

File diff suppressed because it is too large Load Diff